/* * ECAL * * Copyright 2020 Matthias Ladkau. All rights reserved. * * This Source Code Form is subject to the terms of the MIT * License, If a copy of the MIT License was not distributed with this * file, You can obtain one at https://opensource.org/licenses/MIT. */ /* Package parser contains a ECAL parser. Lexer for Source Text Lex() is a lexer function to convert a given search query into a list of tokens. Based on a talk by Rob Pike: Lexical Scanning in Go https://www.youtube.com/watch?v=HxaD_trXwRE The lexer's output is pushed into a channel which is consumed by the parser. This design enables the concurrent processing of the input text by lexer and parser. Parser Parse() is a parser which produces a parse tree from a given set of lexer tokens. Based on an article by Douglas Crockford: Top Down Operator Precedence http://crockford.com/javascript/tdop/tdop.html which is based on the ideas of Vaughan Pratt and his paper: Top Down Operator Precedence http://portal.acm.org/citation.cfm?id=512931 https://tdop.github.io/ ParseWithRuntime() parses a given input and decorates the resulting parse tree with runtime components which can be used to interpret the parsed query. */ package parser /* LexTokenID represents a unique lexer token ID */ type LexTokenID int /* Available meta data types */ const ( MetaDataPreComment = "MetaDataPreComment" MetaDataPostComment = "MetaDataPostComment" MetaDataGeneral = "MetaDataGeneral" ) /* Available lexer token types */ const ( TokenError LexTokenID = iota // Lexing error token with a message as val TokenEOF // End-of-file token TokenANY // Unspecified token (used when building an AST from a Go map structure) TokenPRECOMMENT // Comment /* ... */ TokenPOSTCOMMENT // Comment # ... // Value tokens TokenSTRING // String constant TokenNUMBER // Number constant TokenIDENTIFIER // Idendifier // Constructed tokens which are generated by the parser not the lexer TokenSTATEMENTS // A code block TokenFUNCCALL // A function call TokenCOMPACCESS // Access to a composition structure TokenLIST // List value TokenMAP // MAP value TokenPARAMS // Function parameters TokenGUARD // Conditional statements TOKENodeSYMBOLS // Used to separate symbols from other tokens in this list // Condition operators TokenGEQ TokenLEQ TokenNEQ TokenEQ TokenGT TokenLT // Grouping symbols TokenLPAREN TokenRPAREN TokenLBRACK TokenRBRACK TokenLBRACE TokenRBRACE // Separators TokenDOT TokenCOMMA TokenSEMICOLON // Grouping TokenCOLON TokenEQUAL // Arithmetic operators TokenPLUS TokenMINUS TokenTIMES TokenDIV TokenDIVINT TokenMODINT // Assignment statement TokenASSIGN TokenLET TOKENodeKEYWORDS // Used to separate keywords from other tokens in this list // Import statement TokenIMPORT TokenAS // Sink definition TokenSINK TokenKINDMATCH TokenSCOPEMATCH TokenSTATEMATCH TokenPRIORITY TokenSUPPRESSES // Function definition TokenFUNC TokenRETURN // Boolean operators TokenAND TokenOR TokenNOT // Condition operators TokenLIKE TokenIN TokenHASPREFIX TokenHASSUFFIX TokenNOTIN // Constant terminals TokenFALSE TokenTRUE TokenNULL // Conditional statements TokenIF TokenELIF TokenELSE // Loop statements TokenFOR TokenBREAK TokenCONTINUE // Try block TokenTRY TokenEXCEPT TokenOTHERWISE TokenFINALLY // Mutex block TokenMUTEX TokenENDLIST ) /* IsValidTokenID check if a given token ID is valid. */ func IsValidTokenID(value int) bool { return value < int(TokenENDLIST) } /* Available parser AST node types */ const ( NodeEOF = "EOF" NodeSTRING = "string" // String constant NodeNUMBER = "number" // Number constant NodeIDENTIFIER = "identifier" // Idendifier // Constructed tokens NodeSTATEMENTS = "statements" // List of statements NodeFUNCCALL = "funccall" // Function call NodeCOMPACCESS = "compaccess" // Composition structure access NodeLIST = "list" // List value NodeMAP = "map" // Map value NodePARAMS = "params" // Function parameters NodeGUARD = "guard" // Guard expressions for conditional statements // Condition operators NodeGEQ = ">=" NodeLEQ = "<=" NodeNEQ = "!=" NodeEQ = "==" NodeGT = ">" NodeLT = "<" // Separators NodeKVP = "kvp" // Key-value pair NodePRESET = "preset" // Preset value // Arithmetic operators NodePLUS = "plus" NodeMINUS = "minus" NodeTIMES = "times" NodeDIV = "div" NodeMODINT = "modint" NodeDIVINT = "divint" // Assignment statement NodeASSIGN = ":=" NodeLET = "let" // Import statement NodeIMPORT = "import" // Sink definition NodeSINK = "sink" NodeKINDMATCH = "kindmatch" NodeSCOPEMATCH = "scopematch" NodeSTATEMATCH = "statematch" NodePRIORITY = "priority" NodeSUPPRESSES = "suppresses" // Function definition NodeFUNC = "function" NodeRETURN = "return" // Boolean operators NodeAND = "and" NodeOR = "or" NodeNOT = "not" // Condition operators NodeLIKE = "like" NodeIN = "in" NodeHASPREFIX = "hasprefix" NodeHASSUFFIX = "hassuffix" NodeNOTIN = "notin" // Constant terminals NodeTRUE = "true" NodeFALSE = "false" NodeNULL = "null" // Conditional statements NodeIF = "if" // Loop statements NodeLOOP = "loop" NodeBREAK = "break" NodeCONTINUE = "continue" // Try block NodeTRY = "try" NodeEXCEPT = "except" NodeAS = "as" NodeOTHERWISE = "otherwise" NodeFINALLY = "finally" // Mutex block NodeMUTEX = "mutex" )