krotik
/
eliasdb


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807
							/*
 * EliasDB
 *
 * Copyright 2016 Matthias Ladkau. All rights reserved.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

package parser

import (
	"bytes"
	"fmt"

	"devt.de/krotik/common/stringutil"
)

// AST Nodes
// =========

/*
ASTNode models a node in the AST
*/
type ASTNode struct {
	Name     string     // Name of the node
	Token    *LexToken  // Lexer token of this ASTNode
	Children []*ASTNode // Child nodes
	Runtime  Runtime    // Runtime component for this ASTNode

	binding        int                                                             // Binding power of this node
	nullDenotation func(p *parser, self *ASTNode) (*ASTNode, error)                // Configure token as beginning node
	leftDenotation func(p *parser, self *ASTNode, left *ASTNode) (*ASTNode, error) // Configure token as left node
}

/*
ASTFromPlain creates an AST from a plain AST.
A plain AST is a nested map structure like this:

	{
		name     : <name of node>
		value    : <value of node>
		children : [ <child nodes> ]
	}
*/
func ASTFromPlain(plainAST map[string]interface{}) (*ASTNode, error) {
	var astChildren []*ASTNode

	name, ok := plainAST["name"]
	if !ok {
		return nil, fmt.Errorf("Found plain ast node without a name: %v", plainAST)
	}

	value, ok := plainAST["value"]
	if !ok {
		return nil, fmt.Errorf("Found plain ast node without a value: %v", plainAST)
	}

	// Create children

	if children, ok := plainAST["children"]; ok {

		if ic, ok := children.([]interface{}); ok {

			// Do a list conversion if necessary - this is necessary when we parse
			// JSON with map[string]interface{} this

			childrenList := make([]map[string]interface{}, len(ic))
			for i := range ic {
				childrenList[i] = ic[i].(map[string]interface{})
			}

			children = childrenList
		}

		for _, child := range children.([]map[string]interface{}) {

			astChild, err := ASTFromPlain(child)
			if err != nil {
				return nil, err
			}

			astChildren = append(astChildren, astChild)
		}
	}

	return &ASTNode{fmt.Sprint(name), &LexToken{TokenGeneral, 0,
		fmt.Sprint(value), 0, 0}, astChildren, nil, 0, nil, nil}, nil
}

/*
Create a new instance of this ASTNode which is connected to a concrete lexer token.
*/
func (n *ASTNode) instance(p *parser, t *LexToken) *ASTNode {
	ret := &ASTNode{n.Name, t, make([]*ASTNode, 0, 2), nil, n.binding, n.nullDenotation, n.leftDenotation}
	if p.rp != nil {
		ret.Runtime = p.rp.Runtime(ret)
	}
	return ret
}

/*
Plain returns this ASTNode and all its children as plain AST. A plain AST
only contains map objects, lists and primitive types which can be serialized
with JSON.
*/
func (n *ASTNode) Plain() map[string]interface{} {
	ret := make(map[string]interface{})

	ret["name"] = n.Name

	lenChildren := len(n.Children)

	if lenChildren > 0 {
		children := make([]map[string]interface{}, lenChildren)
		for i, child := range n.Children {
			children[i] = child.Plain()
		}

		ret["children"] = children
	}

	// The value is what the lexer found in the source

	ret["value"] = n.Token.Val

	return ret
}

/*
String returns a string representation of this token.
*/
func (n *ASTNode) String() string {
	var buf bytes.Buffer
	n.levelString(0, &buf)
	return buf.String()
}

/*
levelString function to recursively print the tree.
*/
func (n *ASTNode) levelString(indent int, buf *bytes.Buffer) {

	// Print current level

	buf.WriteString(stringutil.GenerateRollingString(" ", indent*2))

	if n.Name == NodeVALUE || (n.Name == NodeSHOWTERM && n.Token.Val != "@") {
		buf.WriteString(fmt.Sprintf(n.Name+": %v", n.Token))
	} else {
		buf.WriteString(n.Name)
	}

	buf.WriteString("\n")

	// Print children

	for _, child := range n.Children {
		child.levelString(indent+1, buf)
	}
}

/*
Map of AST nodes corresponding to lexer tokens
*/
var astNodeMap map[LexTokenID]*ASTNode

/*
TokenSHOWTERM is an extra token which is generated by the parser
to group show terms
*/
const TokenSHOWTERM = LexTokenID(-1)

func init() {
	astNodeMap = map[LexTokenID]*ASTNode{
		TokenEOF:           {NodeEOF, nil, nil, nil, 0, ndTerm, nil},
		TokenVALUE:         {NodeVALUE, nil, nil, nil, 0, ndTerm, nil},
		TokenNODEKIND:      {NodeVALUE, nil, nil, nil, 0, ndTerm, nil},
		TokenTRUE:          {NodeTRUE, nil, nil, nil, 0, ndTerm, nil},
		TokenFALSE:         {NodeFALSE, nil, nil, nil, 0, ndTerm, nil},
		TokenNULL:          {NodeNULL, nil, nil, nil, 0, ndTerm, nil},
		TokenAT:            {NodeFUNC, nil, nil, nil, 0, ndFunc, nil},
		TokenORDERING:      {NodeORDERING, nil, nil, nil, 0, ndWithFunc, nil},
		TokenFILTERING:     {NodeFILTERING, nil, nil, nil, 0, ndWithFunc, nil},
		TokenNULLTRAVERSAL: {NodeNULLTRAVERSAL, nil, nil, nil, 0, ndWithFunc, nil},

		// Special tokens - always handled in a denotation function

		TokenCOMMA:  {NodeCOMMA, nil, nil, nil, 0, nil, nil},
		TokenGROUP:  {NodeGROUP, nil, nil, nil, 0, nil, nil},
		TokenEND:    {NodeEND, nil, nil, nil, 0, nil, nil},
		TokenAS:     {NodeAS, nil, nil, nil, 0, nil, nil},
		TokenFORMAT: {NodeFORMAT, nil, nil, nil, 0, nil, nil},

		// Keywords

		TokenGET:    {NodeGET, nil, nil, nil, 0, ndGet, nil},
		TokenLOOKUP: {NodeLOOKUP, nil, nil, nil, 0, ndLookup, nil},
		TokenFROM:   {NodeFROM, nil, nil, nil, 0, ndFrom, nil},
		TokenWHERE:  {NodeWHERE, nil, nil, nil, 0, ndPrefix, nil},

		TokenUNIQUE:      {NodeUNIQUE, nil, nil, nil, 0, ndPrefix, nil},
		TokenUNIQUECOUNT: {NodeUNIQUECOUNT, nil, nil, nil, 0, ndPrefix, nil},
		TokenISNOTNULL:   {NodeISNOTNULL, nil, nil, nil, 0, ndPrefix, nil},
		TokenASCENDING:   {NodeASCENDING, nil, nil, nil, 0, ndPrefix, nil},
		TokenDESCENDING:  {NodeDESCENDING, nil, nil, nil, 0, ndPrefix, nil},

		TokenTRAVERSE: {NodeTRAVERSE, nil, nil, nil, 0, ndTraverse, nil},
		TokenPRIMARY:  {NodePRIMARY, nil, nil, nil, 0, ndPrefix, nil},
		TokenSHOW:     {NodeSHOW, nil, nil, nil, 0, ndShow, nil},
		TokenSHOWTERM: {NodeSHOWTERM, nil, nil, nil, 0, ndShow, nil},
		TokenWITH:     {NodeWITH, nil, nil, nil, 0, ndWith, nil},
		TokenLIST:     {NodeLIST, nil, nil, nil, 0, nil, nil},

		// Boolean operations

		TokenNOT: {NodeNOT, nil, nil, nil, 20, ndPrefix, nil},
		TokenOR:  {NodeOR, nil, nil, nil, 30, nil, ldInfix},
		TokenAND: {NodeAND, nil, nil, nil, 40, nil, ldInfix},

		TokenGEQ: {NodeGEQ, nil, nil, nil, 60, nil, ldInfix},
		TokenLEQ: {NodeLEQ, nil, nil, nil, 60, nil, ldInfix},
		TokenNEQ: {NodeNEQ, nil, nil, nil, 60, nil, ldInfix},
		TokenEQ:  {NodeEQ, nil, nil, nil, 60, nil, ldInfix},
		TokenGT:  {NodeGT, nil, nil, nil, 60, nil, ldInfix},
		TokenLT:  {NodeLT, nil, nil, nil, 60, nil, ldInfix},

		TokenLIKE:        {NodeLIKE, nil, nil, nil, 60, nil, ldInfix},
		TokenIN:          {NodeIN, nil, nil, nil, 60, nil, ldInfix},
		TokenCONTAINS:    {NodeCONTAINS, nil, nil, nil, 60, nil, ldInfix},
		TokenBEGINSWITH:  {NodeBEGINSWITH, nil, nil, nil, 60, nil, ldInfix},
		TokenENDSWITH:    {NodeENDSWITH, nil, nil, nil, 60, nil, ldInfix},
		TokenCONTAINSNOT: {NodeCONTAINSNOT, nil, nil, nil, 60, nil, ldInfix},
		TokenNOTIN:       {NodeNOTIN, nil, nil, nil, 60, nil, ldInfix},

		// Simple arithmetic expressions

		TokenPLUS:   {NodePLUS, nil, nil, nil, 110, ndPrefix, ldInfix},
		TokenMINUS:  {NodeMINUS, nil, nil, nil, 110, ndPrefix, ldInfix},
		TokenTIMES:  {NodeTIMES, nil, nil, nil, 120, nil, ldInfix},
		TokenDIV:    {NodeDIV, nil, nil, nil, 120, nil, ldInfix},
		TokenMODINT: {NodeMODINT, nil, nil, nil, 120, nil, ldInfix},
		TokenDIVINT: {NodeDIVINT, nil, nil, nil, 120, nil, ldInfix},

		// Brackets

		TokenLPAREN: {NodeLPAREN, nil, nil, nil, 150, ndInner, nil},
		TokenRPAREN: {NodeRPAREN, nil, nil, nil, 0, nil, nil},
		TokenLBRACK: {NodeLBRACK, nil, nil, nil, 150, ndList, nil},
		TokenRBRACK: {NodeRBRACK, nil, nil, nil, 0, nil, nil},
	}
}

// Parser
// ======

/*
Parser data structure
*/
type parser struct {
	name   string          // Name to identify the input
	node   *ASTNode        // Current ast node
	tokens chan LexToken   // Channel which contains lex tokens
	rp     RuntimeProvider // Runtime provider which creates runtime components
}

/*
Parse parses a given input string and returns an AST.
*/
func Parse(name string, input string) (*ASTNode, error) {
	return ParseWithRuntime(name, input, nil)
}

/*
ParseWithRuntime parses a given input string and returns an AST decorated with
runtime components.
*/
func ParseWithRuntime(name string, input string, rp RuntimeProvider) (*ASTNode, error) {
	p := &parser{name, nil, Lex(name, input), rp}

	node, err := p.next()

	if err != nil {
		return nil, err
	}

	p.node = node

	return p.run(0)
}

/*
run models the main parser function.
*/
func (p *parser) run(rightBinding int) (*ASTNode, error) {
	var err error

	n := p.node

	p.node, err = p.next()
	if err != nil {
		return nil, err
	}

	// Start with the null denotation of this statement / expression

	if n.nullDenotation == nil {
		return nil, p.newParserError(ErrImpossibleNullDenotation,
			n.Token.String(), *n.Token)
	}

	left, err := n.nullDenotation(p, n)
	if err != nil {
		return nil, err
	}

	// Collect left denotations as long as the left binding power is greater
	// than the initial right one

	for rightBinding < p.node.binding {
		var nleft *ASTNode

		n = p.node

		p.node, err = p.next()

		if err != nil {
			return nil, err
		}

		if n.leftDenotation == nil {
			return nil, p.newParserError(ErrImpossibleLeftDenotation,
				n.Token.String(), *n.Token)
		}

		// Get the next left denotation

		nleft, err = n.leftDenotation(p, n, left)

		left = nleft

		if err != nil {
			return nil, err
		}
	}

	return left, nil
}

/*
next retrieves the next lexer token.
*/
func (p *parser) next() (*ASTNode, error) {

	token, more := <-p.tokens

	if !more {

		// Unexpected end of input - the associated token is an empty error token

		return nil, p.newParserError(ErrUnexpectedEnd, "", token)

	} else if token.ID == TokenError {

		// There was a lexer error wrap it in a parser error

		return nil, p.newParserError(ErrLexicalError, token.Val, token)

	} else if node, ok := astNodeMap[token.ID]; ok {

		return node.instance(p, &token), nil
	}

	return nil, p.newParserError(ErrUnknownToken, fmt.Sprintf("id:%v (%v)", token.ID, token), token)
}

// Standard null denotation functions
// ==================================

/*
ndTerm is used for terminals.
*/
func ndTerm(p *parser, self *ASTNode) (*ASTNode, error) {
	return self, nil
}

/*
ndInner returns the inner expression of an enclosed block and discard the
block token. This method is used for brackets.
*/
func ndInner(p *parser, self *ASTNode) (*ASTNode, error) {

	// Get the inner expression

	exp, err := p.run(0)
	if err != nil {
		return nil, err
	}

	// We return here the inner expression - discarding the bracket tokens

	return exp, skipToken(p, TokenRPAREN)
}

/*
ndPrefix is used for prefix operators.
*/
func ndPrefix(p *parser, self *ASTNode) (*ASTNode, error) {

	// Make sure a prefix will only prefix the next item

	val, err := p.run(self.binding + 20)
	if err != nil {
		return nil, err
	}

	self.Children = append(self.Children, val)

	return self, nil
}

// Null denotation functions for specific expressions
// ==================================================

/*
ndGet is used to parse lookup expressions.
*/
func ndGet(p *parser, self *ASTNode) (*ASTNode, error) {

	// Must specify a node kind

	if err := acceptChild(p, self, TokenNODEKIND); err != nil {
		return nil, err
	}

	// Parse the rest and add it as children

	for p.node.Token.ID != TokenEOF {
		exp, err := p.run(0)
		if err != nil {
			return nil, err
		}

		self.Children = append(self.Children, exp)
	}

	return self, nil
}

/*
ndLookup is used to parse lookup expressions.
*/
func ndLookup(p *parser, self *ASTNode) (*ASTNode, error) {

	// Must specify a node kind

	if err := acceptChild(p, self, TokenNODEKIND); err != nil {
		return nil, err
	}

	// Must have at least on node key

	if err := acceptChild(p, self, TokenVALUE); err != nil {
		return nil, err
	}

	// Read all commas and accept further values as additional node keys

	for skipToken(p, TokenCOMMA) == nil {
		if err := acceptChild(p, self, TokenVALUE); err != nil {
			return nil, err
		}
	}

	// Parse the rest and add it as children

	for p.node.Token.ID != TokenEOF {
		exp, err := p.run(0)
		if err != nil {
			return nil, err
		}

		self.Children = append(self.Children, exp)
	}

	return self, nil
}

/*
ndFrom is used to parse from group ... expressions.
*/
func ndFrom(p *parser, self *ASTNode) (*ASTNode, error) {

	// Must be followed by a group keyword

	if err := acceptChild(p, self, TokenGROUP); err != nil {
		return nil, err
	}

	// Must have a group name

	return self, acceptChild(p, self.Children[0], TokenVALUE)
}

/*
ndTraverse is used to parse traverse expressions.
*/
func ndTraverse(p *parser, self *ASTNode) (*ASTNode, error) {

	// Must be followed by traversal spec

	if err := acceptChild(p, self, TokenVALUE); err != nil {
		return nil, err
	}

	// Parse the rest and add it as children - must end with "end" if
	// further clauses are given

	for p.node.Token.ID != TokenEOF && p.node.Token.ID != TokenEND {
		exp, err := p.run(0)
		if err != nil {
			return nil, err
		}

		self.Children = append(self.Children, exp)
	}

	if p.node.Token.ID == TokenEND {
		skipToken(p, TokenEND)
	}

	return self, nil
}

/*
ndFunc is used to parse functions.
*/
func ndFunc(p *parser, self *ASTNode) (*ASTNode, error) {

	// Must specify a name

	if err := acceptChild(p, self, TokenVALUE); err != nil {
		return nil, err
	}

	// Must have an opening bracket

	if err := skipToken(p, TokenLPAREN); err != nil {
		return nil, err
	}

	// Read in the first attribute

	if p.node.Token.ID == TokenVALUE {

		// Next call cannot fail since we just checked for it. Value is optional.

		acceptChild(p, self, TokenVALUE)

		// Read all commas and accept further values as parameters until the end

		for skipToken(p, TokenCOMMA) == nil {
			if err := acceptChild(p, self, TokenVALUE); err != nil {
				return nil, err
			}
		}
	}

	// Must have a closing bracket

	return self, skipToken(p, TokenRPAREN)
}

/*
ndShow is used to parse a show clauses.
*/
func ndShow(p *parser, self *ASTNode) (*ASTNode, error) {

	acceptShowTerm := func() error {
		st := astNodeMap[TokenSHOWTERM].instance(p, p.node.Token)

		if p.node.Token.ID == TokenAT {

			// Parse a function

			exp, err := p.run(0)
			if err != nil {
				return err
			}

			st.Children = append(st.Children, exp)

		} else {

			// Skip the value token from which we just created an AST node

			skipToken(p, TokenVALUE)
		}

		// Parse an "as" definition if given

		if p.node.Token.ID == TokenAS {

			current := p.node
			acceptChild(p, st, TokenAS)

			if err := acceptChild(p, current, TokenVALUE); err != nil {
				return err
			}
		}

		// Parse a "format" definition if given

		if p.node.Token.ID == TokenFORMAT {

			current := p.node
			acceptChild(p, st, TokenFORMAT)

			if err := acceptChild(p, current, TokenVALUE); err != nil {
				return err
			}
		}

		self.Children = append(self.Children, st)

		return nil
	}

	// Read in the first node attribute

	if p.node.Token.ID == TokenVALUE || p.node.Token.ID == TokenAT {
		if err := acceptShowTerm(); err != nil {
			return nil, err
		}

		// Read further show entries

		for skipToken(p, TokenCOMMA) == nil {
			if err := acceptShowTerm(); err != nil {
				return nil, err
			}
		}
	}

	return self, nil
}

/*
ndWith is used to parse a with clauses.
*/
func ndWith(p *parser, self *ASTNode) (*ASTNode, error) {

	// Parse the rest and add it as children

	for p.node.Token.ID != TokenEOF {
		exp, err := p.run(0)
		if err != nil {
			return nil, err
		}

		self.Children = append(self.Children, exp)

		if p.node.Token.ID == TokenCOMMA {
			skipToken(p, TokenCOMMA)
		}
	}

	return self, nil
}

/*
ndWithFunc is used to parse directives in with clauses.
*/
func ndWithFunc(p *parser, self *ASTNode) (*ASTNode, error) {

	// Must have an opening bracket

	if err := skipToken(p, TokenLPAREN); err != nil {
		return nil, err
	}

	for p.node.Token.ID != TokenRPAREN {

		// Parse all the expressions inside the directives

		exp, err := p.run(0)
		if err != nil {
			return nil, err
		}

		self.Children = append(self.Children, exp)

		if p.node.Token.ID == TokenCOMMA {
			skipToken(p, TokenCOMMA)
		}
	}

	// Must have a closing bracket

	return self, skipToken(p, TokenRPAREN)
}

/*
ndList is used to collect elements of a list.
*/
func ndList(p *parser, self *ASTNode) (*ASTNode, error) {

	// Create a list token

	st := astNodeMap[TokenLIST].instance(p, self.Token)

	// Get the inner expression

	for p.node.Token.ID != TokenRBRACK {

		// Parse all the expressions inside the directives

		exp, err := p.run(0)
		if err != nil {
			return nil, err
		}

		st.Children = append(st.Children, exp)

		if p.node.Token.ID == TokenCOMMA {
			skipToken(p, TokenCOMMA)
		}
	}

	// Must have a closing bracket

	return st, skipToken(p, TokenRBRACK)
}

// Standard left denotation functions
// ==================================

/*
ldInfix is used for infix operators.
*/
func ldInfix(p *parser, self *ASTNode, left *ASTNode) (*ASTNode, error) {

	right, err := p.run(self.binding)
	if err != nil {
		return nil, err
	}

	self.Children = append(self.Children, left)
	self.Children = append(self.Children, right)

	return self, nil
}

// Helper functions
// ================

/*
skipToken skips over a given token.
*/
func skipToken(p *parser, ids ...LexTokenID) error {
	var err error

	canSkip := func(id LexTokenID) bool {
		for _, i := range ids {
			if i == id {
				return true
			}
		}
		return false
	}

	if !canSkip(p.node.Token.ID) {
		if p.node.Token.ID == TokenEOF {
			return p.newParserError(ErrUnexpectedEnd, "", *p.node.Token)
		}
		return p.newParserError(ErrUnexpectedToken, p.node.Token.Val, *p.node.Token)
	}

	// This should never return an error unless we skip over EOF or complex tokens
	// like values

	p.node, err = p.next()

	return err
}

/*
acceptChild accepts the current token as a child.
*/
func acceptChild(p *parser, self *ASTNode, id LexTokenID) error {
	var err error

	current := p.node

	p.node, err = p.next()
	if err != nil {
		return err
	}

	if current.Token.ID == id {
		self.Children = append(self.Children, current)
		return nil
	}

	return p.newParserError(ErrUnexpectedToken, current.Token.Val, *current.Token)
}