| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807 | /* * EliasDB * * Copyright 2016 Matthias Ladkau. All rights reserved. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */package parserimport (	"bytes"	"fmt"	"devt.de/krotik/common/stringutil")// AST Nodes// =========/*ASTNode models a node in the AST*/type ASTNode struct {	Name     string     // Name of the node	Token    *LexToken  // Lexer token of this ASTNode	Children []*ASTNode // Child nodes	Runtime  Runtime    // Runtime component for this ASTNode	binding        int                                                             // Binding power of this node	nullDenotation func(p *parser, self *ASTNode) (*ASTNode, error)                // Configure token as beginning node	leftDenotation func(p *parser, self *ASTNode, left *ASTNode) (*ASTNode, error) // Configure token as left node}/*ASTFromPlain creates an AST from a plain AST.A plain AST is a nested map structure like this:	{		name     : <name of node>		value    : <value of node>		children : [ <child nodes> ]	}*/func ASTFromPlain(plainAST map[string]interface{}) (*ASTNode, error) {	var astChildren []*ASTNode	name, ok := plainAST["name"]	if !ok {		return nil, fmt.Errorf("Found plain ast node without a name: %v", plainAST)	}	value, ok := plainAST["value"]	if !ok {		return nil, fmt.Errorf("Found plain ast node without a value: %v", plainAST)	}	// Create children	if children, ok := plainAST["children"]; ok {		if ic, ok := children.([]interface{}); ok {			// Do a list conversion if necessary - this is necessary when we parse			// JSON with map[string]interface{} this			childrenList := make([]map[string]interface{}, len(ic))			for i := range ic {				childrenList[i] = ic[i].(map[string]interface{})			}			children = childrenList		}		for _, child := range children.([]map[string]interface{}) {			astChild, err := ASTFromPlain(child)			if err != nil {				return nil, err			}			astChildren = append(astChildren, astChild)		}	}	return &ASTNode{fmt.Sprint(name), &LexToken{TokenGeneral, 0,		fmt.Sprint(value), 0, 0}, astChildren, nil, 0, nil, nil}, nil}/*Create a new instance of this ASTNode which is connected to a concrete lexer token.*/func (n *ASTNode) instance(p *parser, t *LexToken) *ASTNode {	ret := &ASTNode{n.Name, t, make([]*ASTNode, 0, 2), nil, n.binding, n.nullDenotation, n.leftDenotation}	if p.rp != nil {		ret.Runtime = p.rp.Runtime(ret)	}	return ret}/*Plain returns this ASTNode and all its children as plain AST. A plain ASTonly contains map objects, lists and primitive types which can be serializedwith JSON.*/func (n *ASTNode) Plain() map[string]interface{} {	ret := make(map[string]interface{})	ret["name"] = n.Name	lenChildren := len(n.Children)	if lenChildren > 0 {		children := make([]map[string]interface{}, lenChildren)		for i, child := range n.Children {			children[i] = child.Plain()		}		ret["children"] = children	}	// The value is what the lexer found in the source	ret["value"] = n.Token.Val	return ret}/*String returns a string representation of this token.*/func (n *ASTNode) String() string {	var buf bytes.Buffer	n.levelString(0, &buf)	return buf.String()}/*levelString function to recursively print the tree.*/func (n *ASTNode) levelString(indent int, buf *bytes.Buffer) {	// Print current level	buf.WriteString(stringutil.GenerateRollingString(" ", indent*2))	if n.Name == NodeVALUE || (n.Name == NodeSHOWTERM && n.Token.Val != "@") {		buf.WriteString(fmt.Sprintf(n.Name+": %v", n.Token))	} else {		buf.WriteString(n.Name)	}	buf.WriteString("\n")	// Print children	for _, child := range n.Children {		child.levelString(indent+1, buf)	}}/*Map of AST nodes corresponding to lexer tokens*/var astNodeMap map[LexTokenID]*ASTNode/*TokenSHOWTERM is an extra token which is generated by the parserto group show terms*/const TokenSHOWTERM = LexTokenID(-1)func init() {	astNodeMap = map[LexTokenID]*ASTNode{		TokenEOF:           {NodeEOF, nil, nil, nil, 0, ndTerm, nil},		TokenVALUE:         {NodeVALUE, nil, nil, nil, 0, ndTerm, nil},		TokenNODEKIND:      {NodeVALUE, nil, nil, nil, 0, ndTerm, nil},		TokenTRUE:          {NodeTRUE, nil, nil, nil, 0, ndTerm, nil},		TokenFALSE:         {NodeFALSE, nil, nil, nil, 0, ndTerm, nil},		TokenNULL:          {NodeNULL, nil, nil, nil, 0, ndTerm, nil},		TokenAT:            {NodeFUNC, nil, nil, nil, 0, ndFunc, nil},		TokenORDERING:      {NodeORDERING, nil, nil, nil, 0, ndWithFunc, nil},		TokenFILTERING:     {NodeFILTERING, nil, nil, nil, 0, ndWithFunc, nil},		TokenNULLTRAVERSAL: {NodeNULLTRAVERSAL, nil, nil, nil, 0, ndWithFunc, nil},		// Special tokens - always handled in a denotation function		TokenCOMMA:  {NodeCOMMA, nil, nil, nil, 0, nil, nil},		TokenGROUP:  {NodeGROUP, nil, nil, nil, 0, nil, nil},		TokenEND:    {NodeEND, nil, nil, nil, 0, nil, nil},		TokenAS:     {NodeAS, nil, nil, nil, 0, nil, nil},		TokenFORMAT: {NodeFORMAT, nil, nil, nil, 0, nil, nil},		// Keywords		TokenGET:    {NodeGET, nil, nil, nil, 0, ndGet, nil},		TokenLOOKUP: {NodeLOOKUP, nil, nil, nil, 0, ndLookup, nil},		TokenFROM:   {NodeFROM, nil, nil, nil, 0, ndFrom, nil},		TokenWHERE:  {NodeWHERE, nil, nil, nil, 0, ndPrefix, nil},		TokenUNIQUE:      {NodeUNIQUE, nil, nil, nil, 0, ndPrefix, nil},		TokenUNIQUECOUNT: {NodeUNIQUECOUNT, nil, nil, nil, 0, ndPrefix, nil},		TokenISNOTNULL:   {NodeISNOTNULL, nil, nil, nil, 0, ndPrefix, nil},		TokenASCENDING:   {NodeASCENDING, nil, nil, nil, 0, ndPrefix, nil},		TokenDESCENDING:  {NodeDESCENDING, nil, nil, nil, 0, ndPrefix, nil},		TokenTRAVERSE: {NodeTRAVERSE, nil, nil, nil, 0, ndTraverse, nil},		TokenPRIMARY:  {NodePRIMARY, nil, nil, nil, 0, ndPrefix, nil},		TokenSHOW:     {NodeSHOW, nil, nil, nil, 0, ndShow, nil},		TokenSHOWTERM: {NodeSHOWTERM, nil, nil, nil, 0, ndShow, nil},		TokenWITH:     {NodeWITH, nil, nil, nil, 0, ndWith, nil},		TokenLIST:     {NodeLIST, nil, nil, nil, 0, nil, nil},		// Boolean operations		TokenNOT: {NodeNOT, nil, nil, nil, 20, ndPrefix, nil},		TokenOR:  {NodeOR, nil, nil, nil, 30, nil, ldInfix},		TokenAND: {NodeAND, nil, nil, nil, 40, nil, ldInfix},		TokenGEQ: {NodeGEQ, nil, nil, nil, 60, nil, ldInfix},		TokenLEQ: {NodeLEQ, nil, nil, nil, 60, nil, ldInfix},		TokenNEQ: {NodeNEQ, nil, nil, nil, 60, nil, ldInfix},		TokenEQ:  {NodeEQ, nil, nil, nil, 60, nil, ldInfix},		TokenGT:  {NodeGT, nil, nil, nil, 60, nil, ldInfix},		TokenLT:  {NodeLT, nil, nil, nil, 60, nil, ldInfix},		TokenLIKE:        {NodeLIKE, nil, nil, nil, 60, nil, ldInfix},		TokenIN:          {NodeIN, nil, nil, nil, 60, nil, ldInfix},		TokenCONTAINS:    {NodeCONTAINS, nil, nil, nil, 60, nil, ldInfix},		TokenBEGINSWITH:  {NodeBEGINSWITH, nil, nil, nil, 60, nil, ldInfix},		TokenENDSWITH:    {NodeENDSWITH, nil, nil, nil, 60, nil, ldInfix},		TokenCONTAINSNOT: {NodeCONTAINSNOT, nil, nil, nil, 60, nil, ldInfix},		TokenNOTIN:       {NodeNOTIN, nil, nil, nil, 60, nil, ldInfix},		// Simple arithmetic expressions		TokenPLUS:   {NodePLUS, nil, nil, nil, 110, ndPrefix, ldInfix},		TokenMINUS:  {NodeMINUS, nil, nil, nil, 110, ndPrefix, ldInfix},		TokenTIMES:  {NodeTIMES, nil, nil, nil, 120, nil, ldInfix},		TokenDIV:    {NodeDIV, nil, nil, nil, 120, nil, ldInfix},		TokenMODINT: {NodeMODINT, nil, nil, nil, 120, nil, ldInfix},		TokenDIVINT: {NodeDIVINT, nil, nil, nil, 120, nil, ldInfix},		// Brackets		TokenLPAREN: {NodeLPAREN, nil, nil, nil, 150, ndInner, nil},		TokenRPAREN: {NodeRPAREN, nil, nil, nil, 0, nil, nil},		TokenLBRACK: {NodeLBRACK, nil, nil, nil, 150, ndList, nil},		TokenRBRACK: {NodeRBRACK, nil, nil, nil, 0, nil, nil},	}}// Parser// ======/*Parser data structure*/type parser struct {	name   string          // Name to identify the input	node   *ASTNode        // Current ast node	tokens chan LexToken   // Channel which contains lex tokens	rp     RuntimeProvider // Runtime provider which creates runtime components}/*Parse parses a given input string and returns an AST.*/func Parse(name string, input string) (*ASTNode, error) {	return ParseWithRuntime(name, input, nil)}/*ParseWithRuntime parses a given input string and returns an AST decorated withruntime components.*/func ParseWithRuntime(name string, input string, rp RuntimeProvider) (*ASTNode, error) {	p := &parser{name, nil, Lex(name, input), rp}	node, err := p.next()	if err != nil {		return nil, err	}	p.node = node	return p.run(0)}/*run models the main parser function.*/func (p *parser) run(rightBinding int) (*ASTNode, error) {	var err error	n := p.node	p.node, err = p.next()	if err != nil {		return nil, err	}	// Start with the null denotation of this statement / expression	if n.nullDenotation == nil {		return nil, p.newParserError(ErrImpossibleNullDenotation,			n.Token.String(), *n.Token)	}	left, err := n.nullDenotation(p, n)	if err != nil {		return nil, err	}	// Collect left denotations as long as the left binding power is greater	// than the initial right one	for rightBinding < p.node.binding {		var nleft *ASTNode		n = p.node		p.node, err = p.next()		if err != nil {			return nil, err		}		if n.leftDenotation == nil {			return nil, p.newParserError(ErrImpossibleLeftDenotation,				n.Token.String(), *n.Token)		}		// Get the next left denotation		nleft, err = n.leftDenotation(p, n, left)		left = nleft		if err != nil {			return nil, err		}	}	return left, nil}/*next retrieves the next lexer token.*/func (p *parser) next() (*ASTNode, error) {	token, more := <-p.tokens	if !more {		// Unexpected end of input - the associated token is an empty error token		return nil, p.newParserError(ErrUnexpectedEnd, "", token)	} else if token.ID == TokenError {		// There was a lexer error wrap it in a parser error		return nil, p.newParserError(ErrLexicalError, token.Val, token)	} else if node, ok := astNodeMap[token.ID]; ok {		return node.instance(p, &token), nil	}	return nil, p.newParserError(ErrUnknownToken, fmt.Sprintf("id:%v (%v)", token.ID, token), token)}// Standard null denotation functions// ==================================/*ndTerm is used for terminals.*/func ndTerm(p *parser, self *ASTNode) (*ASTNode, error) {	return self, nil}/*ndInner returns the inner expression of an enclosed block and discard theblock token. This method is used for brackets.*/func ndInner(p *parser, self *ASTNode) (*ASTNode, error) {	// Get the inner expression	exp, err := p.run(0)	if err != nil {		return nil, err	}	// We return here the inner expression - discarding the bracket tokens	return exp, skipToken(p, TokenRPAREN)}/*ndPrefix is used for prefix operators.*/func ndPrefix(p *parser, self *ASTNode) (*ASTNode, error) {	// Make sure a prefix will only prefix the next item	val, err := p.run(self.binding + 20)	if err != nil {		return nil, err	}	self.Children = append(self.Children, val)	return self, nil}// Null denotation functions for specific expressions// ==================================================/*ndGet is used to parse lookup expressions.*/func ndGet(p *parser, self *ASTNode) (*ASTNode, error) {	// Must specify a node kind	if err := acceptChild(p, self, TokenNODEKIND); err != nil {		return nil, err	}	// Parse the rest and add it as children	for p.node.Token.ID != TokenEOF {		exp, err := p.run(0)		if err != nil {			return nil, err		}		self.Children = append(self.Children, exp)	}	return self, nil}/*ndLookup is used to parse lookup expressions.*/func ndLookup(p *parser, self *ASTNode) (*ASTNode, error) {	// Must specify a node kind	if err := acceptChild(p, self, TokenNODEKIND); err != nil {		return nil, err	}	// Must have at least on node key	if err := acceptChild(p, self, TokenVALUE); err != nil {		return nil, err	}	// Read all commas and accept further values as additional node keys	for skipToken(p, TokenCOMMA) == nil {		if err := acceptChild(p, self, TokenVALUE); err != nil {			return nil, err		}	}	// Parse the rest and add it as children	for p.node.Token.ID != TokenEOF {		exp, err := p.run(0)		if err != nil {			return nil, err		}		self.Children = append(self.Children, exp)	}	return self, nil}/*ndFrom is used to parse from group ... expressions.*/func ndFrom(p *parser, self *ASTNode) (*ASTNode, error) {	// Must be followed by a group keyword	if err := acceptChild(p, self, TokenGROUP); err != nil {		return nil, err	}	// Must have a group name	return self, acceptChild(p, self.Children[0], TokenVALUE)}/*ndTraverse is used to parse traverse expressions.*/func ndTraverse(p *parser, self *ASTNode) (*ASTNode, error) {	// Must be followed by traversal spec	if err := acceptChild(p, self, TokenVALUE); err != nil {		return nil, err	}	// Parse the rest and add it as children - must end with "end" if	// further clauses are given	for p.node.Token.ID != TokenEOF && p.node.Token.ID != TokenEND {		exp, err := p.run(0)		if err != nil {			return nil, err		}		self.Children = append(self.Children, exp)	}	if p.node.Token.ID == TokenEND {		skipToken(p, TokenEND)	}	return self, nil}/*ndFunc is used to parse functions.*/func ndFunc(p *parser, self *ASTNode) (*ASTNode, error) {	// Must specify a name	if err := acceptChild(p, self, TokenVALUE); err != nil {		return nil, err	}	// Must have an opening bracket	if err := skipToken(p, TokenLPAREN); err != nil {		return nil, err	}	// Read in the first attribute	if p.node.Token.ID == TokenVALUE {		// Next call cannot fail since we just checked for it. Value is optional.		acceptChild(p, self, TokenVALUE)		// Read all commas and accept further values as parameters until the end		for skipToken(p, TokenCOMMA) == nil {			if err := acceptChild(p, self, TokenVALUE); err != nil {				return nil, err			}		}	}	// Must have a closing bracket	return self, skipToken(p, TokenRPAREN)}/*ndShow is used to parse a show clauses.*/func ndShow(p *parser, self *ASTNode) (*ASTNode, error) {	acceptShowTerm := func() error {		st := astNodeMap[TokenSHOWTERM].instance(p, p.node.Token)		if p.node.Token.ID == TokenAT {			// Parse a function			exp, err := p.run(0)			if err != nil {				return err			}			st.Children = append(st.Children, exp)		} else {			// Skip the value token from which we just created an AST node			skipToken(p, TokenVALUE)		}		// Parse an "as" definition if given		if p.node.Token.ID == TokenAS {			current := p.node			acceptChild(p, st, TokenAS)			if err := acceptChild(p, current, TokenVALUE); err != nil {				return err			}		}		// Parse a "format" definition if given		if p.node.Token.ID == TokenFORMAT {			current := p.node			acceptChild(p, st, TokenFORMAT)			if err := acceptChild(p, current, TokenVALUE); err != nil {				return err			}		}		self.Children = append(self.Children, st)		return nil	}	// Read in the first node attribute	if p.node.Token.ID == TokenVALUE || p.node.Token.ID == TokenAT {		if err := acceptShowTerm(); err != nil {			return nil, err		}		// Read further show entries		for skipToken(p, TokenCOMMA) == nil {			if err := acceptShowTerm(); err != nil {				return nil, err			}		}	}	return self, nil}/*ndWith is used to parse a with clauses.*/func ndWith(p *parser, self *ASTNode) (*ASTNode, error) {	// Parse the rest and add it as children	for p.node.Token.ID != TokenEOF {		exp, err := p.run(0)		if err != nil {			return nil, err		}		self.Children = append(self.Children, exp)		if p.node.Token.ID == TokenCOMMA {			skipToken(p, TokenCOMMA)		}	}	return self, nil}/*ndWithFunc is used to parse directives in with clauses.*/func ndWithFunc(p *parser, self *ASTNode) (*ASTNode, error) {	// Must have an opening bracket	if err := skipToken(p, TokenLPAREN); err != nil {		return nil, err	}	for p.node.Token.ID != TokenRPAREN {		// Parse all the expressions inside the directives		exp, err := p.run(0)		if err != nil {			return nil, err		}		self.Children = append(self.Children, exp)		if p.node.Token.ID == TokenCOMMA {			skipToken(p, TokenCOMMA)		}	}	// Must have a closing bracket	return self, skipToken(p, TokenRPAREN)}/*ndList is used to collect elements of a list.*/func ndList(p *parser, self *ASTNode) (*ASTNode, error) {	// Create a list token	st := astNodeMap[TokenLIST].instance(p, self.Token)	// Get the inner expression	for p.node.Token.ID != TokenRBRACK {		// Parse all the expressions inside the directives		exp, err := p.run(0)		if err != nil {			return nil, err		}		st.Children = append(st.Children, exp)		if p.node.Token.ID == TokenCOMMA {			skipToken(p, TokenCOMMA)		}	}	// Must have a closing bracket	return st, skipToken(p, TokenRBRACK)}// Standard left denotation functions// ==================================/*ldInfix is used for infix operators.*/func ldInfix(p *parser, self *ASTNode, left *ASTNode) (*ASTNode, error) {	right, err := p.run(self.binding)	if err != nil {		return nil, err	}	self.Children = append(self.Children, left)	self.Children = append(self.Children, right)	return self, nil}// Helper functions// ================/*skipToken skips over a given token.*/func skipToken(p *parser, ids ...LexTokenID) error {	var err error	canSkip := func(id LexTokenID) bool {		for _, i := range ids {			if i == id {				return true			}		}		return false	}	if !canSkip(p.node.Token.ID) {		if p.node.Token.ID == TokenEOF {			return p.newParserError(ErrUnexpectedEnd, "", *p.node.Token)		}		return p.newParserError(ErrUnexpectedToken, p.node.Token.Val, *p.node.Token)	}	// This should never return an error unless we skip over EOF or complex tokens	// like values	p.node, err = p.next()	return err}/*acceptChild accepts the current token as a child.*/func acceptChild(p *parser, self *ASTNode, id LexTokenID) error {	var err error	current := p.node	p.node, err = p.next()	if err != nil {		return err	}	if current.Token.ID == id {		self.Children = append(self.Children, current)		return nil	}	return p.newParserError(ErrUnexpectedToken, current.Token.Val, *current.Token)}
 |