/* * Public Domain Software * * I (Matthias Ladkau) am the author of the source code in this file. * I have placed the source code in this file in the public domain. * * For further information see: http://creativecommons.org/publicdomain/zero/1.0/ */ /* Package parser contains a ECAL parser. Lexer for Source Text Lex() is a lexer function to convert a given search query into a list of tokens. Based on a talk by Rob Pike: Lexical Scanning in Go https://www.youtube.com/watch?v=HxaD_trXwRE The lexer's output is pushed into a channel which is consumed by the parser. This design enables the concurrent processing of the input text by lexer and parser. Parser Parse() is a parser which produces a parse tree from a given set of lexer tokens. Based on an article by Douglas Crockford: Top Down Operator Precedence http://crockford.com/javascript/tdop/tdop.html which is based on the ideas of Vaughan Pratt and his paper: Top Down Operator Precedence http://portal.acm.org/citation.cfm?id=512931 https://tdop.github.io/ ParseWithRuntime() parses a given input and decorates the resulting parse tree with runtime components which can be used to interpret the parsed query. */ package parser /* LexTokenID represents a unique lexer token ID */ type LexTokenID int /* Available lexer token types */ const ( TokenError LexTokenID = iota // Lexing error token with a message as val TokenEOF // End-of-file token TokenAny // Unspecified token (used when building an AST from a Go map structure) TokenCOMMENT // Comment TokenSTRING // String constant TokenNUMBER // Number constant TokenIDENTIFIER // Idendifier // Constructed tokens which are generated by the parser not the lexer TokenSTATEMENTS // A code block TokenLIST // List value TokenMAP // MAP value TokenGUARD // Guard expressions for conditional statements TOKENodeSYMBOLS // Used to separate symbols from other tokens in this list // Condition operators TokenGEQ TokenLEQ TokenNEQ TokenEQ TokenGT TokenLT // Grouping symbols TokenLPAREN TokenRPAREN TokenLBRACK TokenRBRACK TokenLBRACE TokenRBRACE // Separators TokenDOT TokenCOMMA TokenCOLON TokenSEMICOLON // Arithmetic operators TokenPLUS TokenMINUS TokenTIMES TokenDIV TokenDIVINT TokenMODINT // Assignment statement TokenASSIGN // Data structure access TokenACCESS // The colon '' has a context specific meaning and is checked by the parser TOKENodeKEYWORDS // Used to separate keywords from other tokens in this list // Sink definition TokenSINK TokenKINDMATCH TokenSCOPEMATCH TokenSTATEMATCH TokenPRIORITY TokenSUPPRESSES // Function definition TokenFUNC // Boolean operators TokenAND TokenOR TokenNOT // Condition operators TokenLIKE TokenIN TokenHASPREFIX TokenHASSUFFIX TokenNOTIN // Constant terminals TokenFALSE TokenTRUE TokenNULL // Conditional statements TokenIF TokenELIF TokenELSE // Loop statements TokenFOR TokenBREAK TokenCONTINUE ) /* Available parser AST node types */ const ( NodeEOF = "EOF" NodeVALUE = "value" // Simple value // Constructed tokens NodeSTATEMENTS = "statements" // List of statements NodeLIST = "list" // List value NodeMAP = "map" // Map value NodeGUARD = "guard" // Guard expressions for conditional statements // Map entries NodeMAPENTRY = "entry" // Map entry value // Boolean operators NodeOR = "or" NodeAND = "and" NodeNOT = "not" // Condition operators NodeLIKE = "like" NodeIN = "in" NodeBEGINSWITH = "beginswith" NodeENDSWITH = "endswith" NodeNOTIN = "notin" NodeGEQ = ">=" NodeLEQ = "<=" NodeNEQ = "!=" NodeEQ = "==" NodeGT = ">" NodeLT = "<" // Constants NodeTRUE = "true" NodeFALSE = "false" NodeNULL = "null" // Arithmetic operators NodePLUS = "plus" NodeMINUS = "minus" NodeTIMES = "times" NodeDIV = "div" NodeMODINT = "modint" NodeDIVINT = "divint" // Assignment statement NodeASSIGN = ":=" // Function call statement NodeFUNCCALL = "funccall" // Data structure access NodeACCESS = "access" // Sink definition NodeSINK = "sink" NodeKINDMATCH = "kindmatch" NodeSCOPEMATCH = "scopematch" NodeSTATEMATCH = "statematch" NodePRIORITY = "priority" NodeSUPPRESSES = "suppresses" // Block statements NodeCOND = "cond" NodeLOOP = "loop" // Single statements NodeBREAK = "break" NodeCONTINUE = "continue" )