123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807 |
- /*
- * EliasDB
- *
- * Copyright 2016 Matthias Ladkau. All rights reserved.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/.
- */
- package parser
- import (
- "bytes"
- "fmt"
- "devt.de/krotik/common/stringutil"
- )
- // AST Nodes
- // =========
- /*
- ASTNode models a node in the AST
- */
- type ASTNode struct {
- Name string // Name of the node
- Token *LexToken // Lexer token of this ASTNode
- Children []*ASTNode // Child nodes
- Runtime Runtime // Runtime component for this ASTNode
- binding int // Binding power of this node
- nullDenotation func(p *parser, self *ASTNode) (*ASTNode, error) // Configure token as beginning node
- leftDenotation func(p *parser, self *ASTNode, left *ASTNode) (*ASTNode, error) // Configure token as left node
- }
- /*
- ASTFromPlain creates an AST from a plain AST.
- A plain AST is a nested map structure like this:
- {
- name : <name of node>
- value : <value of node>
- children : [ <child nodes> ]
- }
- */
- func ASTFromPlain(plainAST map[string]interface{}) (*ASTNode, error) {
- var astChildren []*ASTNode
- name, ok := plainAST["name"]
- if !ok {
- return nil, fmt.Errorf("Found plain ast node without a name: %v", plainAST)
- }
- value, ok := plainAST["value"]
- if !ok {
- return nil, fmt.Errorf("Found plain ast node without a value: %v", plainAST)
- }
- // Create children
- if children, ok := plainAST["children"]; ok {
- if ic, ok := children.([]interface{}); ok {
- // Do a list conversion if necessary - this is necessary when we parse
- // JSON with map[string]interface{} this
- childrenList := make([]map[string]interface{}, len(ic))
- for i := range ic {
- childrenList[i] = ic[i].(map[string]interface{})
- }
- children = childrenList
- }
- for _, child := range children.([]map[string]interface{}) {
- astChild, err := ASTFromPlain(child)
- if err != nil {
- return nil, err
- }
- astChildren = append(astChildren, astChild)
- }
- }
- return &ASTNode{fmt.Sprint(name), &LexToken{TokenGeneral, 0,
- fmt.Sprint(value), 0, 0}, astChildren, nil, 0, nil, nil}, nil
- }
- /*
- Create a new instance of this ASTNode which is connected to a concrete lexer token.
- */
- func (n *ASTNode) instance(p *parser, t *LexToken) *ASTNode {
- ret := &ASTNode{n.Name, t, make([]*ASTNode, 0, 2), nil, n.binding, n.nullDenotation, n.leftDenotation}
- if p.rp != nil {
- ret.Runtime = p.rp.Runtime(ret)
- }
- return ret
- }
- /*
- Plain returns this ASTNode and all its children as plain AST. A plain AST
- only contains map objects, lists and primitive types which can be serialized
- with JSON.
- */
- func (n *ASTNode) Plain() map[string]interface{} {
- ret := make(map[string]interface{})
- ret["name"] = n.Name
- lenChildren := len(n.Children)
- if lenChildren > 0 {
- children := make([]map[string]interface{}, lenChildren)
- for i, child := range n.Children {
- children[i] = child.Plain()
- }
- ret["children"] = children
- }
- // The value is what the lexer found in the source
- ret["value"] = n.Token.Val
- return ret
- }
- /*
- String returns a string representation of this token.
- */
- func (n *ASTNode) String() string {
- var buf bytes.Buffer
- n.levelString(0, &buf)
- return buf.String()
- }
- /*
- levelString function to recursively print the tree.
- */
- func (n *ASTNode) levelString(indent int, buf *bytes.Buffer) {
- // Print current level
- buf.WriteString(stringutil.GenerateRollingString(" ", indent*2))
- if n.Name == NodeVALUE || (n.Name == NodeSHOWTERM && n.Token.Val != "@") {
- buf.WriteString(fmt.Sprintf(n.Name+": %v", n.Token))
- } else {
- buf.WriteString(n.Name)
- }
- buf.WriteString("\n")
- // Print children
- for _, child := range n.Children {
- child.levelString(indent+1, buf)
- }
- }
- /*
- Map of AST nodes corresponding to lexer tokens
- */
- var astNodeMap map[LexTokenID]*ASTNode
- /*
- TokenSHOWTERM is an extra token which is generated by the parser
- to group show terms
- */
- const TokenSHOWTERM = LexTokenID(-1)
- func init() {
- astNodeMap = map[LexTokenID]*ASTNode{
- TokenEOF: {NodeEOF, nil, nil, nil, 0, ndTerm, nil},
- TokenVALUE: {NodeVALUE, nil, nil, nil, 0, ndTerm, nil},
- TokenNODEKIND: {NodeVALUE, nil, nil, nil, 0, ndTerm, nil},
- TokenTRUE: {NodeTRUE, nil, nil, nil, 0, ndTerm, nil},
- TokenFALSE: {NodeFALSE, nil, nil, nil, 0, ndTerm, nil},
- TokenNULL: {NodeNULL, nil, nil, nil, 0, ndTerm, nil},
- TokenAT: {NodeFUNC, nil, nil, nil, 0, ndFunc, nil},
- TokenORDERING: {NodeORDERING, nil, nil, nil, 0, ndWithFunc, nil},
- TokenFILTERING: {NodeFILTERING, nil, nil, nil, 0, ndWithFunc, nil},
- TokenNULLTRAVERSAL: {NodeNULLTRAVERSAL, nil, nil, nil, 0, ndWithFunc, nil},
- // Special tokens - always handled in a denotation function
- TokenCOMMA: {NodeCOMMA, nil, nil, nil, 0, nil, nil},
- TokenGROUP: {NodeGROUP, nil, nil, nil, 0, nil, nil},
- TokenEND: {NodeEND, nil, nil, nil, 0, nil, nil},
- TokenAS: {NodeAS, nil, nil, nil, 0, nil, nil},
- TokenFORMAT: {NodeFORMAT, nil, nil, nil, 0, nil, nil},
- // Keywords
- TokenGET: {NodeGET, nil, nil, nil, 0, ndGet, nil},
- TokenLOOKUP: {NodeLOOKUP, nil, nil, nil, 0, ndLookup, nil},
- TokenFROM: {NodeFROM, nil, nil, nil, 0, ndFrom, nil},
- TokenWHERE: {NodeWHERE, nil, nil, nil, 0, ndPrefix, nil},
- TokenUNIQUE: {NodeUNIQUE, nil, nil, nil, 0, ndPrefix, nil},
- TokenUNIQUECOUNT: {NodeUNIQUECOUNT, nil, nil, nil, 0, ndPrefix, nil},
- TokenISNOTNULL: {NodeISNOTNULL, nil, nil, nil, 0, ndPrefix, nil},
- TokenASCENDING: {NodeASCENDING, nil, nil, nil, 0, ndPrefix, nil},
- TokenDESCENDING: {NodeDESCENDING, nil, nil, nil, 0, ndPrefix, nil},
- TokenTRAVERSE: {NodeTRAVERSE, nil, nil, nil, 0, ndTraverse, nil},
- TokenPRIMARY: {NodePRIMARY, nil, nil, nil, 0, ndPrefix, nil},
- TokenSHOW: {NodeSHOW, nil, nil, nil, 0, ndShow, nil},
- TokenSHOWTERM: {NodeSHOWTERM, nil, nil, nil, 0, ndShow, nil},
- TokenWITH: {NodeWITH, nil, nil, nil, 0, ndWith, nil},
- TokenLIST: {NodeLIST, nil, nil, nil, 0, nil, nil},
- // Boolean operations
- TokenNOT: {NodeNOT, nil, nil, nil, 20, ndPrefix, nil},
- TokenOR: {NodeOR, nil, nil, nil, 30, nil, ldInfix},
- TokenAND: {NodeAND, nil, nil, nil, 40, nil, ldInfix},
- TokenGEQ: {NodeGEQ, nil, nil, nil, 60, nil, ldInfix},
- TokenLEQ: {NodeLEQ, nil, nil, nil, 60, nil, ldInfix},
- TokenNEQ: {NodeNEQ, nil, nil, nil, 60, nil, ldInfix},
- TokenEQ: {NodeEQ, nil, nil, nil, 60, nil, ldInfix},
- TokenGT: {NodeGT, nil, nil, nil, 60, nil, ldInfix},
- TokenLT: {NodeLT, nil, nil, nil, 60, nil, ldInfix},
- TokenLIKE: {NodeLIKE, nil, nil, nil, 60, nil, ldInfix},
- TokenIN: {NodeIN, nil, nil, nil, 60, nil, ldInfix},
- TokenCONTAINS: {NodeCONTAINS, nil, nil, nil, 60, nil, ldInfix},
- TokenBEGINSWITH: {NodeBEGINSWITH, nil, nil, nil, 60, nil, ldInfix},
- TokenENDSWITH: {NodeENDSWITH, nil, nil, nil, 60, nil, ldInfix},
- TokenCONTAINSNOT: {NodeCONTAINSNOT, nil, nil, nil, 60, nil, ldInfix},
- TokenNOTIN: {NodeNOTIN, nil, nil, nil, 60, nil, ldInfix},
- // Simple arithmetic expressions
- TokenPLUS: {NodePLUS, nil, nil, nil, 110, ndPrefix, ldInfix},
- TokenMINUS: {NodeMINUS, nil, nil, nil, 110, ndPrefix, ldInfix},
- TokenTIMES: {NodeTIMES, nil, nil, nil, 120, nil, ldInfix},
- TokenDIV: {NodeDIV, nil, nil, nil, 120, nil, ldInfix},
- TokenMODINT: {NodeMODINT, nil, nil, nil, 120, nil, ldInfix},
- TokenDIVINT: {NodeDIVINT, nil, nil, nil, 120, nil, ldInfix},
- // Brackets
- TokenLPAREN: {NodeLPAREN, nil, nil, nil, 150, ndInner, nil},
- TokenRPAREN: {NodeRPAREN, nil, nil, nil, 0, nil, nil},
- TokenLBRACK: {NodeLBRACK, nil, nil, nil, 150, ndList, nil},
- TokenRBRACK: {NodeRBRACK, nil, nil, nil, 0, nil, nil},
- }
- }
- // Parser
- // ======
- /*
- Parser data structure
- */
- type parser struct {
- name string // Name to identify the input
- node *ASTNode // Current ast node
- tokens chan LexToken // Channel which contains lex tokens
- rp RuntimeProvider // Runtime provider which creates runtime components
- }
- /*
- Parse parses a given input string and returns an AST.
- */
- func Parse(name string, input string) (*ASTNode, error) {
- return ParseWithRuntime(name, input, nil)
- }
- /*
- ParseWithRuntime parses a given input string and returns an AST decorated with
- runtime components.
- */
- func ParseWithRuntime(name string, input string, rp RuntimeProvider) (*ASTNode, error) {
- p := &parser{name, nil, Lex(name, input), rp}
- node, err := p.next()
- if err != nil {
- return nil, err
- }
- p.node = node
- return p.run(0)
- }
- /*
- run models the main parser function.
- */
- func (p *parser) run(rightBinding int) (*ASTNode, error) {
- var err error
- n := p.node
- p.node, err = p.next()
- if err != nil {
- return nil, err
- }
- // Start with the null denotation of this statement / expression
- if n.nullDenotation == nil {
- return nil, p.newParserError(ErrImpossibleNullDenotation,
- n.Token.String(), *n.Token)
- }
- left, err := n.nullDenotation(p, n)
- if err != nil {
- return nil, err
- }
- // Collect left denotations as long as the left binding power is greater
- // than the initial right one
- for rightBinding < p.node.binding {
- var nleft *ASTNode
- n = p.node
- p.node, err = p.next()
- if err != nil {
- return nil, err
- }
- if n.leftDenotation == nil {
- return nil, p.newParserError(ErrImpossibleLeftDenotation,
- n.Token.String(), *n.Token)
- }
- // Get the next left denotation
- nleft, err = n.leftDenotation(p, n, left)
- left = nleft
- if err != nil {
- return nil, err
- }
- }
- return left, nil
- }
- /*
- next retrieves the next lexer token.
- */
- func (p *parser) next() (*ASTNode, error) {
- token, more := <-p.tokens
- if !more {
- // Unexpected end of input - the associated token is an empty error token
- return nil, p.newParserError(ErrUnexpectedEnd, "", token)
- } else if token.ID == TokenError {
- // There was a lexer error wrap it in a parser error
- return nil, p.newParserError(ErrLexicalError, token.Val, token)
- } else if node, ok := astNodeMap[token.ID]; ok {
- return node.instance(p, &token), nil
- }
- return nil, p.newParserError(ErrUnknownToken, fmt.Sprintf("id:%v (%v)", token.ID, token), token)
- }
- // Standard null denotation functions
- // ==================================
- /*
- ndTerm is used for terminals.
- */
- func ndTerm(p *parser, self *ASTNode) (*ASTNode, error) {
- return self, nil
- }
- /*
- ndInner returns the inner expression of an enclosed block and discard the
- block token. This method is used for brackets.
- */
- func ndInner(p *parser, self *ASTNode) (*ASTNode, error) {
- // Get the inner expression
- exp, err := p.run(0)
- if err != nil {
- return nil, err
- }
- // We return here the inner expression - discarding the bracket tokens
- return exp, skipToken(p, TokenRPAREN)
- }
- /*
- ndPrefix is used for prefix operators.
- */
- func ndPrefix(p *parser, self *ASTNode) (*ASTNode, error) {
- // Make sure a prefix will only prefix the next item
- val, err := p.run(self.binding + 20)
- if err != nil {
- return nil, err
- }
- self.Children = append(self.Children, val)
- return self, nil
- }
- // Null denotation functions for specific expressions
- // ==================================================
- /*
- ndGet is used to parse lookup expressions.
- */
- func ndGet(p *parser, self *ASTNode) (*ASTNode, error) {
- // Must specify a node kind
- if err := acceptChild(p, self, TokenNODEKIND); err != nil {
- return nil, err
- }
- // Parse the rest and add it as children
- for p.node.Token.ID != TokenEOF {
- exp, err := p.run(0)
- if err != nil {
- return nil, err
- }
- self.Children = append(self.Children, exp)
- }
- return self, nil
- }
- /*
- ndLookup is used to parse lookup expressions.
- */
- func ndLookup(p *parser, self *ASTNode) (*ASTNode, error) {
- // Must specify a node kind
- if err := acceptChild(p, self, TokenNODEKIND); err != nil {
- return nil, err
- }
- // Must have at least on node key
- if err := acceptChild(p, self, TokenVALUE); err != nil {
- return nil, err
- }
- // Read all commas and accept further values as additional node keys
- for skipToken(p, TokenCOMMA) == nil {
- if err := acceptChild(p, self, TokenVALUE); err != nil {
- return nil, err
- }
- }
- // Parse the rest and add it as children
- for p.node.Token.ID != TokenEOF {
- exp, err := p.run(0)
- if err != nil {
- return nil, err
- }
- self.Children = append(self.Children, exp)
- }
- return self, nil
- }
- /*
- ndFrom is used to parse from group ... expressions.
- */
- func ndFrom(p *parser, self *ASTNode) (*ASTNode, error) {
- // Must be followed by a group keyword
- if err := acceptChild(p, self, TokenGROUP); err != nil {
- return nil, err
- }
- // Must have a group name
- return self, acceptChild(p, self.Children[0], TokenVALUE)
- }
- /*
- ndTraverse is used to parse traverse expressions.
- */
- func ndTraverse(p *parser, self *ASTNode) (*ASTNode, error) {
- // Must be followed by traversal spec
- if err := acceptChild(p, self, TokenVALUE); err != nil {
- return nil, err
- }
- // Parse the rest and add it as children - must end with "end" if
- // further clauses are given
- for p.node.Token.ID != TokenEOF && p.node.Token.ID != TokenEND {
- exp, err := p.run(0)
- if err != nil {
- return nil, err
- }
- self.Children = append(self.Children, exp)
- }
- if p.node.Token.ID == TokenEND {
- skipToken(p, TokenEND)
- }
- return self, nil
- }
- /*
- ndFunc is used to parse functions.
- */
- func ndFunc(p *parser, self *ASTNode) (*ASTNode, error) {
- // Must specify a name
- if err := acceptChild(p, self, TokenVALUE); err != nil {
- return nil, err
- }
- // Must have an opening bracket
- if err := skipToken(p, TokenLPAREN); err != nil {
- return nil, err
- }
- // Read in the first attribute
- if p.node.Token.ID == TokenVALUE {
- // Next call cannot fail since we just checked for it. Value is optional.
- acceptChild(p, self, TokenVALUE)
- // Read all commas and accept further values as parameters until the end
- for skipToken(p, TokenCOMMA) == nil {
- if err := acceptChild(p, self, TokenVALUE); err != nil {
- return nil, err
- }
- }
- }
- // Must have a closing bracket
- return self, skipToken(p, TokenRPAREN)
- }
- /*
- ndShow is used to parse a show clauses.
- */
- func ndShow(p *parser, self *ASTNode) (*ASTNode, error) {
- acceptShowTerm := func() error {
- st := astNodeMap[TokenSHOWTERM].instance(p, p.node.Token)
- if p.node.Token.ID == TokenAT {
- // Parse a function
- exp, err := p.run(0)
- if err != nil {
- return err
- }
- st.Children = append(st.Children, exp)
- } else {
- // Skip the value token from which we just created an AST node
- skipToken(p, TokenVALUE)
- }
- // Parse an "as" definition if given
- if p.node.Token.ID == TokenAS {
- current := p.node
- acceptChild(p, st, TokenAS)
- if err := acceptChild(p, current, TokenVALUE); err != nil {
- return err
- }
- }
- // Parse a "format" definition if given
- if p.node.Token.ID == TokenFORMAT {
- current := p.node
- acceptChild(p, st, TokenFORMAT)
- if err := acceptChild(p, current, TokenVALUE); err != nil {
- return err
- }
- }
- self.Children = append(self.Children, st)
- return nil
- }
- // Read in the first node attribute
- if p.node.Token.ID == TokenVALUE || p.node.Token.ID == TokenAT {
- if err := acceptShowTerm(); err != nil {
- return nil, err
- }
- // Read further show entries
- for skipToken(p, TokenCOMMA) == nil {
- if err := acceptShowTerm(); err != nil {
- return nil, err
- }
- }
- }
- return self, nil
- }
- /*
- ndWith is used to parse a with clauses.
- */
- func ndWith(p *parser, self *ASTNode) (*ASTNode, error) {
- // Parse the rest and add it as children
- for p.node.Token.ID != TokenEOF {
- exp, err := p.run(0)
- if err != nil {
- return nil, err
- }
- self.Children = append(self.Children, exp)
- if p.node.Token.ID == TokenCOMMA {
- skipToken(p, TokenCOMMA)
- }
- }
- return self, nil
- }
- /*
- ndWithFunc is used to parse directives in with clauses.
- */
- func ndWithFunc(p *parser, self *ASTNode) (*ASTNode, error) {
- // Must have an opening bracket
- if err := skipToken(p, TokenLPAREN); err != nil {
- return nil, err
- }
- for p.node.Token.ID != TokenRPAREN {
- // Parse all the expressions inside the directives
- exp, err := p.run(0)
- if err != nil {
- return nil, err
- }
- self.Children = append(self.Children, exp)
- if p.node.Token.ID == TokenCOMMA {
- skipToken(p, TokenCOMMA)
- }
- }
- // Must have a closing bracket
- return self, skipToken(p, TokenRPAREN)
- }
- /*
- ndList is used to collect elements of a list.
- */
- func ndList(p *parser, self *ASTNode) (*ASTNode, error) {
- // Create a list token
- st := astNodeMap[TokenLIST].instance(p, self.Token)
- // Get the inner expression
- for p.node.Token.ID != TokenRBRACK {
- // Parse all the expressions inside the directives
- exp, err := p.run(0)
- if err != nil {
- return nil, err
- }
- st.Children = append(st.Children, exp)
- if p.node.Token.ID == TokenCOMMA {
- skipToken(p, TokenCOMMA)
- }
- }
- // Must have a closing bracket
- return st, skipToken(p, TokenRBRACK)
- }
- // Standard left denotation functions
- // ==================================
- /*
- ldInfix is used for infix operators.
- */
- func ldInfix(p *parser, self *ASTNode, left *ASTNode) (*ASTNode, error) {
- right, err := p.run(self.binding)
- if err != nil {
- return nil, err
- }
- self.Children = append(self.Children, left)
- self.Children = append(self.Children, right)
- return self, nil
- }
- // Helper functions
- // ================
- /*
- skipToken skips over a given token.
- */
- func skipToken(p *parser, ids ...LexTokenID) error {
- var err error
- canSkip := func(id LexTokenID) bool {
- for _, i := range ids {
- if i == id {
- return true
- }
- }
- return false
- }
- if !canSkip(p.node.Token.ID) {
- if p.node.Token.ID == TokenEOF {
- return p.newParserError(ErrUnexpectedEnd, "", *p.node.Token)
- }
- return p.newParserError(ErrUnexpectedToken, p.node.Token.Val, *p.node.Token)
- }
- // This should never return an error unless we skip over EOF or complex tokens
- // like values
- p.node, err = p.next()
- return err
- }
- /*
- acceptChild accepts the current token as a child.
- */
- func acceptChild(p *parser, self *ASTNode, id LexTokenID) error {
- var err error
- current := p.node
- p.node, err = p.next()
- if err != nil {
- return err
- }
- if current.Token.ID == id {
- self.Children = append(self.Children, current)
- return nil
- }
- return p.newParserError(ErrUnexpectedToken, current.Token.Val, *current.Token)
- }
|