123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285 |
- /*
- * Public Domain Software
- *
- * I (Matthias Ladkau) am the author of the source code in this file.
- * I have placed the source code in this file in the public domain.
- *
- * For further information see: http://creativecommons.org/publicdomain/zero/1.0/
- */
- /*
- Package parser contains a ECAL parser.
- Lexer for Source Text
- Lex() is a lexer function to convert a given search query into a list of tokens.
- Based on a talk by Rob Pike: Lexical Scanning in Go
- https://www.youtube.com/watch?v=HxaD_trXwRE
- The lexer's output is pushed into a channel which is consumed by the parser.
- This design enables the concurrent processing of the input text by lexer and
- parser.
- Parser
- Parse() is a parser which produces a parse tree from a given set of lexer tokens.
- Based on an article by Douglas Crockford: Top Down Operator Precedence
- http://crockford.com/javascript/tdop/tdop.html
- which is based on the ideas of Vaughan Pratt and his paper: Top Down Operator Precedence
- http://portal.acm.org/citation.cfm?id=512931
- https://tdop.github.io/
- ParseWithRuntime() parses a given input and decorates the resulting parse tree
- with runtime components which can be used to interpret the parsed query.
- */
- package parser
- /*
- LexTokenID represents a unique lexer token ID
- */
- type LexTokenID int
- /*
- Available meta data types
- */
- const (
- MetaDataPreComment = "MetaDataPreComment"
- MetaDataPostComment = "MetaDataPostComment"
- MetaDataGeneral = "MetaDataGeneral"
- )
- /*
- Available lexer token types
- */
- const (
- TokenError LexTokenID = iota // Lexing error token with a message as val
- TokenEOF // End-of-file token
- TokenANY // Unspecified token (used when building an AST from a Go map structure)
- TokenPRECOMMENT // Comment /* ... */
- TokenPOSTCOMMENT // Comment # ...
- // Value tokens
- TokenSTRING // String constant
- TokenNUMBER // Number constant
- TokenIDENTIFIER // Idendifier
- // Constructed tokens which are generated by the parser not the lexer
- TokenSTATEMENTS // A code block
- TokenFUNCCALL // A function call
- TokenCOMPACCESS // Access to a composition structure
- TokenLIST // List value
- TokenMAP // MAP value
- TokenPARAMS // Function parameters
- TokenGUARD // Conditional statements
- TOKENodeSYMBOLS // Used to separate symbols from other tokens in this list
- // Condition operators
- TokenGEQ
- TokenLEQ
- TokenNEQ
- TokenEQ
- TokenGT
- TokenLT
- // Grouping symbols
- TokenLPAREN
- TokenRPAREN
- TokenLBRACK
- TokenRBRACK
- TokenLBRACE
- TokenRBRACE
- // Separators
- TokenDOT
- TokenCOMMA
- TokenSEMICOLON
- // Grouping
- TokenCOLON
- TokenEQUAL
- // Arithmetic operators
- TokenPLUS
- TokenMINUS
- TokenTIMES
- TokenDIV
- TokenDIVINT
- TokenMODINT
- // Assignment statement
- TokenASSIGN
- TOKENodeKEYWORDS // Used to separate keywords from other tokens in this list
- // Import statement
- TokenIMPORT
- TokenAS
- // Sink definition
- TokenSINK
- TokenKINDMATCH
- TokenSCOPEMATCH
- TokenSTATEMATCH
- TokenPRIORITY
- TokenSUPPRESSES
- // Function definition
- TokenFUNC
- TokenRETURN
- // Boolean operators
- TokenAND
- TokenOR
- TokenNOT
- // Condition operators
- TokenLIKE
- TokenIN
- TokenHASPREFIX
- TokenHASSUFFIX
- TokenNOTIN
- // Constant terminals
- TokenFALSE
- TokenTRUE
- TokenNULL
- // Conditional statements
- TokenIF
- TokenELIF
- TokenELSE
- // Loop statements
- TokenFOR
- TokenBREAK
- TokenCONTINUE
- TokenENDLIST
- )
- /*
- IsValidTokenID check if a given token ID is valid.
- */
- func IsValidTokenID(value int) bool {
- return value < int(TokenENDLIST)
- }
- /*
- Available parser AST node types
- */
- const (
- NodeEOF = "EOF"
- NodeSTRING = "string" // String constant
- NodeNUMBER = "number" // Number constant
- NodeIDENTIFIER = "identifier" // Idendifier
- // Constructed tokens
- NodeSTATEMENTS = "statements" // List of statements
- NodeFUNCCALL = "funccall" // Function call
- NodeCOMPACCESS = "compaccess" // Composition structure access
- NodeLIST = "list" // List value
- NodeMAP = "map" // Map value
- NodePARAMS = "params" // Function parameters
- NodeGUARD = "guard" // Guard expressions for conditional statements
- // Condition operators
- NodeGEQ = ">="
- NodeLEQ = "<="
- NodeNEQ = "!="
- NodeEQ = "=="
- NodeGT = ">"
- NodeLT = "<"
- // Separators
- NodeKVP = "kvp" // Key-value pair
- NodePRESET = "preset" // Preset value
- // Arithmetic operators
- NodePLUS = "plus"
- NodeMINUS = "minus"
- NodeTIMES = "times"
- NodeDIV = "div"
- NodeMODINT = "modint"
- NodeDIVINT = "divint"
- // Assignment statement
- NodeASSIGN = ":="
- // Import statement
- NodeIMPORT = "import"
- // Sink definition
- NodeSINK = "sink"
- NodeKINDMATCH = "kindmatch"
- NodeSCOPEMATCH = "scopematch"
- NodeSTATEMATCH = "statematch"
- NodePRIORITY = "priority"
- NodeSUPPRESSES = "suppresses"
- // Function definition
- NodeFUNC = "function"
- NodeRETURN = "return"
- // Boolean operators
- NodeAND = "and"
- NodeOR = "or"
- NodeNOT = "not"
- // Condition operators
- NodeLIKE = "like"
- NodeIN = "in"
- NodeHASPREFIX = "hasprefix"
- NodeHASSUFFIX = "hassuffix"
- NodeNOTIN = "notin"
- // Constant terminals
- NodeTRUE = "true"
- NodeFALSE = "false"
- NodeNULL = "null"
- // Conditional statements
- NodeIF = "if"
- // Loop statements
- NodeLOOP = "loop"
- NodeBREAK = "break"
- NodeCONTINUE = "continue"
- )
|