const.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. /*
  2. * ECAL
  3. *
  4. * Copyright 2020 Matthias Ladkau. All rights reserved.
  5. *
  6. * This Source Code Form is subject to the terms of the MIT
  7. * License, If a copy of the MIT License was not distributed with this
  8. * file, You can obtain one at https://opensource.org/licenses/MIT.
  9. */
  10. /*
  11. Package parser contains a ECAL parser.
  12. Lexer for Source Text
  13. Lex() is a lexer function to convert a given search query into a list of tokens.
  14. Based on a talk by Rob Pike: Lexical Scanning in Go
  15. https://www.youtube.com/watch?v=HxaD_trXwRE
  16. The lexer's output is pushed into a channel which is consumed by the parser.
  17. This design enables the concurrent processing of the input text by lexer and
  18. parser.
  19. Parser
  20. Parse() is a parser which produces a parse tree from a given set of lexer tokens.
  21. Based on an article by Douglas Crockford: Top Down Operator Precedence
  22. http://crockford.com/javascript/tdop/tdop.html
  23. which is based on the ideas of Vaughan Pratt and his paper: Top Down Operator Precedence
  24. http://portal.acm.org/citation.cfm?id=512931
  25. https://tdop.github.io/
  26. ParseWithRuntime() parses a given input and decorates the resulting parse tree
  27. with runtime components which can be used to interpret the parsed query.
  28. */
  29. package parser
  30. /*
  31. LexTokenID represents a unique lexer token ID
  32. */
  33. type LexTokenID int
  34. /*
  35. Available meta data types
  36. */
  37. const (
  38. MetaDataPreComment = "MetaDataPreComment"
  39. MetaDataPostComment = "MetaDataPostComment"
  40. MetaDataGeneral = "MetaDataGeneral"
  41. )
  42. /*
  43. Available lexer token types
  44. */
  45. const (
  46. TokenError LexTokenID = iota // Lexing error token with a message as val
  47. TokenEOF // End-of-file token
  48. TokenANY // Unspecified token (used when building an AST from a Go map structure)
  49. TokenPRECOMMENT // Comment /* ... */
  50. TokenPOSTCOMMENT // Comment # ...
  51. // Value tokens
  52. TokenSTRING // String constant
  53. TokenNUMBER // Number constant
  54. TokenIDENTIFIER // Idendifier
  55. // Constructed tokens which are generated by the parser not the lexer
  56. TokenSTATEMENTS // A code block
  57. TokenFUNCCALL // A function call
  58. TokenCOMPACCESS // Access to a composition structure
  59. TokenLIST // List value
  60. TokenMAP // MAP value
  61. TokenPARAMS // Function parameters
  62. TokenGUARD // Conditional statements
  63. TOKENodeSYMBOLS // Used to separate symbols from other tokens in this list
  64. // Condition operators
  65. TokenGEQ
  66. TokenLEQ
  67. TokenNEQ
  68. TokenEQ
  69. TokenGT
  70. TokenLT
  71. // Grouping symbols
  72. TokenLPAREN
  73. TokenRPAREN
  74. TokenLBRACK
  75. TokenRBRACK
  76. TokenLBRACE
  77. TokenRBRACE
  78. // Separators
  79. TokenDOT
  80. TokenCOMMA
  81. TokenSEMICOLON
  82. // Grouping
  83. TokenCOLON
  84. TokenEQUAL
  85. // Arithmetic operators
  86. TokenPLUS
  87. TokenMINUS
  88. TokenTIMES
  89. TokenDIV
  90. TokenDIVINT
  91. TokenMODINT
  92. // Assignment statement
  93. TokenASSIGN
  94. TokenLET
  95. TOKENodeKEYWORDS // Used to separate keywords from other tokens in this list
  96. // Import statement
  97. TokenIMPORT
  98. TokenAS
  99. // Sink definition
  100. TokenSINK
  101. TokenKINDMATCH
  102. TokenSCOPEMATCH
  103. TokenSTATEMATCH
  104. TokenPRIORITY
  105. TokenSUPPRESSES
  106. // Function definition
  107. TokenFUNC
  108. TokenRETURN
  109. // Boolean operators
  110. TokenAND
  111. TokenOR
  112. TokenNOT
  113. // Condition operators
  114. TokenLIKE
  115. TokenIN
  116. TokenHASPREFIX
  117. TokenHASSUFFIX
  118. TokenNOTIN
  119. // Constant terminals
  120. TokenFALSE
  121. TokenTRUE
  122. TokenNULL
  123. // Conditional statements
  124. TokenIF
  125. TokenELIF
  126. TokenELSE
  127. // Loop statements
  128. TokenFOR
  129. TokenBREAK
  130. TokenCONTINUE
  131. // Try block
  132. TokenTRY
  133. TokenEXCEPT
  134. TokenOTHERWISE
  135. TokenFINALLY
  136. // Mutex block
  137. TokenMUTEX
  138. TokenENDLIST
  139. )
  140. /*
  141. IsValidTokenID check if a given token ID is valid.
  142. */
  143. func IsValidTokenID(value int) bool {
  144. return value < int(TokenENDLIST)
  145. }
  146. /*
  147. Available parser AST node types
  148. */
  149. const (
  150. NodeEOF = "EOF"
  151. NodeSTRING = "string" // String constant
  152. NodeNUMBER = "number" // Number constant
  153. NodeIDENTIFIER = "identifier" // Idendifier
  154. // Constructed tokens
  155. NodeSTATEMENTS = "statements" // List of statements
  156. NodeFUNCCALL = "funccall" // Function call
  157. NodeCOMPACCESS = "compaccess" // Composition structure access
  158. NodeLIST = "list" // List value
  159. NodeMAP = "map" // Map value
  160. NodePARAMS = "params" // Function parameters
  161. NodeGUARD = "guard" // Guard expressions for conditional statements
  162. // Condition operators
  163. NodeGEQ = ">="
  164. NodeLEQ = "<="
  165. NodeNEQ = "!="
  166. NodeEQ = "=="
  167. NodeGT = ">"
  168. NodeLT = "<"
  169. // Separators
  170. NodeKVP = "kvp" // Key-value pair
  171. NodePRESET = "preset" // Preset value
  172. // Arithmetic operators
  173. NodePLUS = "plus"
  174. NodeMINUS = "minus"
  175. NodeTIMES = "times"
  176. NodeDIV = "div"
  177. NodeMODINT = "modint"
  178. NodeDIVINT = "divint"
  179. // Assignment statement
  180. NodeASSIGN = ":="
  181. NodeLET = "let"
  182. // Import statement
  183. NodeIMPORT = "import"
  184. // Sink definition
  185. NodeSINK = "sink"
  186. NodeKINDMATCH = "kindmatch"
  187. NodeSCOPEMATCH = "scopematch"
  188. NodeSTATEMATCH = "statematch"
  189. NodePRIORITY = "priority"
  190. NodeSUPPRESSES = "suppresses"
  191. // Function definition
  192. NodeFUNC = "function"
  193. NodeRETURN = "return"
  194. // Boolean operators
  195. NodeAND = "and"
  196. NodeOR = "or"
  197. NodeNOT = "not"
  198. // Condition operators
  199. NodeLIKE = "like"
  200. NodeIN = "in"
  201. NodeHASPREFIX = "hasprefix"
  202. NodeHASSUFFIX = "hassuffix"
  203. NodeNOTIN = "notin"
  204. // Constant terminals
  205. NodeTRUE = "true"
  206. NodeFALSE = "false"
  207. NodeNULL = "null"
  208. // Conditional statements
  209. NodeIF = "if"
  210. // Loop statements
  211. NodeLOOP = "loop"
  212. NodeBREAK = "break"
  213. NodeCONTINUE = "continue"
  214. // Try block
  215. NodeTRY = "try"
  216. NodeEXCEPT = "except"
  217. NodeAS = "as"
  218. NodeOTHERWISE = "otherwise"
  219. NodeFINALLY = "finally"
  220. // Mutex block
  221. NodeMUTEX = "mutex"
  222. )