parser.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. /*
  2. * Public Domain Software
  3. *
  4. * I (Matthias Ladkau) am the author of the source code in this file.
  5. * I have placed the source code in this file in the public domain.
  6. *
  7. * For further information see: http://creativecommons.org/publicdomain/zero/1.0/
  8. */
  9. package parser
  10. import (
  11. "fmt"
  12. )
  13. /*
  14. Map of AST nodes corresponding to lexer tokens. The map determines how a given
  15. sequence of lexer tokens are organized into an AST.
  16. */
  17. var astNodeMap map[LexTokenID]*ASTNode
  18. func init() {
  19. astNodeMap = map[LexTokenID]*ASTNode{
  20. TokenEOF: {NodeEOF, nil, nil, nil, nil, 0, ndTerm, nil},
  21. // Value tokens
  22. TokenSTRING: {NodeSTRING, nil, nil, nil, nil, 0, ndTerm, nil},
  23. TokenNUMBER: {NodeNUMBER, nil, nil, nil, nil, 0, ndTerm, nil},
  24. TokenIDENTIFIER: {NodeIDENTIFIER, nil, nil, nil, nil, 0, ndIdentifier, nil},
  25. // Constructed tokens
  26. TokenSTATEMENTS: {NodeSTATEMENTS, nil, nil, nil, nil, 0, nil, nil},
  27. TokenFUNCCALL: {NodeFUNCCALL, nil, nil, nil, nil, 0, nil, nil},
  28. TokenCOMPACCESS: {NodeCOMPACCESS, nil, nil, nil, nil, 0, nil, nil},
  29. /*
  30. TokenLIST: {NodeLIST, nil, nil, nil, 0, nil, nil},
  31. TokenMAP: {NodeMAP, nil, nil, nil, 0, nil, nil},
  32. TokenGUARD: {NodeGUARD, nil, nil, nil, 0, nil, nil},
  33. */
  34. // Condition operators
  35. TokenGEQ: {NodeGEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  36. TokenLEQ: {NodeLEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  37. TokenNEQ: {NodeNEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  38. TokenEQ: {NodeEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  39. TokenGT: {NodeGT, nil, nil, nil, nil, 60, nil, ldInfix},
  40. TokenLT: {NodeLT, nil, nil, nil, nil, 60, nil, ldInfix},
  41. // Grouping symbols
  42. TokenLBRACK: {"", nil, nil, nil, nil, 150, ndInner, nil},
  43. TokenRBRACK: {"", nil, nil, nil, nil, 0, nil, nil},
  44. TokenLPAREN: {"", nil, nil, nil, nil, 150, ndInner, nil},
  45. TokenRPAREN: {"", nil, nil, nil, nil, 0, nil, nil},
  46. // Separators
  47. TokenDOT: {"", nil, nil, nil, nil, 0, nil, nil},
  48. TokenCOMMA: {"", nil, nil, nil, nil, 0, nil, nil},
  49. TokenSEMICOLON: {"", nil, nil, nil, nil, 0, nil, nil},
  50. // Arithmetic operators
  51. TokenPLUS: {NodePLUS, nil, nil, nil, nil, 110, ndPrefix, ldInfix},
  52. TokenMINUS: {NodeMINUS, nil, nil, nil, nil, 110, ndPrefix, ldInfix},
  53. TokenTIMES: {NodeTIMES, nil, nil, nil, nil, 120, nil, ldInfix},
  54. TokenDIV: {NodeDIV, nil, nil, nil, nil, 120, nil, ldInfix},
  55. TokenDIVINT: {NodeDIVINT, nil, nil, nil, nil, 120, nil, ldInfix},
  56. TokenMODINT: {NodeMODINT, nil, nil, nil, nil, 120, nil, ldInfix},
  57. // Assignment statement
  58. TokenASSIGN: {NodeASSIGN, nil, nil, nil, nil, 10, nil, ldInfix},
  59. // Import statement
  60. TokenIMPORT: {NodeIMPORT, nil, nil, nil, nil, 0, ndImport, nil},
  61. TokenAS: {"", nil, nil, nil, nil, 0, ndImport, nil},
  62. // Boolean operators
  63. TokenOR: {NodeOR, nil, nil, nil, nil, 30, nil, ldInfix},
  64. TokenAND: {NodeAND, nil, nil, nil, nil, 40, nil, ldInfix},
  65. TokenNOT: {NodeNOT, nil, nil, nil, nil, 20, ndPrefix, nil},
  66. // Condition operators
  67. TokenLIKE: {NodeLIKE, nil, nil, nil, nil, 60, nil, ldInfix},
  68. TokenIN: {NodeIN, nil, nil, nil, nil, 60, nil, ldInfix},
  69. TokenHASPREFIX: {NodeHASPREFIX, nil, nil, nil, nil, 60, nil, ldInfix},
  70. TokenHASSUFFIX: {NodeHASSUFFIX, nil, nil, nil, nil, 60, nil, ldInfix},
  71. TokenNOTIN: {NodeNOTIN, nil, nil, nil, nil, 60, nil, ldInfix},
  72. // Constant terminals
  73. TokenFALSE: {NodeFALSE, nil, nil, nil, nil, 0, ndTerm, nil},
  74. TokenTRUE: {NodeTRUE, nil, nil, nil, nil, 0, ndTerm, nil},
  75. TokenNULL: {NodeNULL, nil, nil, nil, nil, 0, ndTerm, nil},
  76. }
  77. }
  78. // Parser
  79. // ======
  80. /*
  81. Parser data structure
  82. */
  83. type parser struct {
  84. name string // Name to identify the input
  85. node *ASTNode // Current ast node
  86. tokens *LABuffer // Buffer which is connected to the channel which contains lex tokens
  87. rp RuntimeProvider // Runtime provider which creates runtime components
  88. }
  89. /*
  90. Parse parses a given input string and returns an AST.
  91. */
  92. func Parse(name string, input string) (*ASTNode, error) {
  93. return ParseWithRuntime(name, input, nil)
  94. }
  95. /*
  96. ParseWithRuntime parses a given input string and returns an AST decorated with
  97. runtime components.
  98. */
  99. func ParseWithRuntime(name string, input string, rp RuntimeProvider) (*ASTNode, error) {
  100. // Create a new parser with a look-ahead buffer of 3
  101. p := &parser{name, nil, NewLABuffer(Lex(name, input), 3), rp}
  102. // Read and set initial AST node
  103. node, err := p.next()
  104. if err != nil {
  105. return nil, err
  106. }
  107. p.node = node
  108. n, err := p.run(0)
  109. if err == nil && hasMoreStatements(p, n) {
  110. st := astNodeMap[TokenSTATEMENTS].instance(p, nil)
  111. st.Children = append(st.Children, n)
  112. for err == nil && hasMoreStatements(p, n) {
  113. // Skip semicolons
  114. if p.node.Token.ID == TokenSEMICOLON {
  115. skipToken(p, TokenSEMICOLON)
  116. }
  117. n, err = p.run(0)
  118. st.Children = append(st.Children, n)
  119. }
  120. n = st
  121. }
  122. if err == nil && p.node != nil && p.node.Token.ID != TokenEOF {
  123. token := *p.node.Token
  124. err = p.newParserError(ErrUnexpectedEnd, fmt.Sprintf("extra token id:%v (%v)",
  125. token.ID, token), token)
  126. }
  127. return n, err
  128. }
  129. /*
  130. run models the main parser function.
  131. */
  132. func (p *parser) run(rightBinding int) (*ASTNode, error) {
  133. var err error
  134. n := p.node
  135. p.node, err = p.next()
  136. if err != nil {
  137. return nil, err
  138. }
  139. // Start with the null denotation of this statement / expression
  140. if n.nullDenotation == nil {
  141. return nil, p.newParserError(ErrImpossibleNullDenotation,
  142. n.Token.String(), *n.Token)
  143. }
  144. left, err := n.nullDenotation(p, n)
  145. if err != nil {
  146. return nil, err
  147. }
  148. // Collect left denotations as long as the left binding power is greater
  149. // than the initial right one
  150. for rightBinding < p.node.binding {
  151. var nleft *ASTNode
  152. n = p.node
  153. if n.leftDenotation == nil {
  154. if left.Token.Lline < n.Token.Lline {
  155. // If the impossible left denotation is on a new line
  156. // we might be parsing a new statement
  157. return left, nil
  158. }
  159. return nil, p.newParserError(ErrImpossibleLeftDenotation,
  160. n.Token.String(), *n.Token)
  161. }
  162. p.node, err = p.next()
  163. if err != nil {
  164. return nil, err
  165. }
  166. // Get the next left denotation
  167. nleft, err = n.leftDenotation(p, n, left)
  168. left = nleft
  169. if err != nil {
  170. return nil, err
  171. }
  172. }
  173. return left, nil
  174. }
  175. /*
  176. next retrieves the next lexer token.
  177. */
  178. func (p *parser) next() (*ASTNode, error) {
  179. var preComments []MetaData
  180. var postComments []MetaData
  181. token, more := p.tokens.Next()
  182. // Skip over pre comment token
  183. for more && token.ID == TokenPRECOMMENT {
  184. preComments = append(preComments, NewLexTokenInstance(token))
  185. token, more = p.tokens.Next()
  186. }
  187. // Skip over post comment token
  188. for more && token.ID == TokenPOSTCOMMENT {
  189. postComments = append(postComments, NewLexTokenInstance(token))
  190. token, more = p.tokens.Next()
  191. }
  192. if !more {
  193. // Unexpected end of input - the associated token is an empty error token
  194. return nil, p.newParserError(ErrUnexpectedEnd, "", token)
  195. } else if token.ID == TokenError {
  196. // There was a lexer error wrap it in a parser error
  197. return nil, p.newParserError(ErrLexicalError, token.Val, token)
  198. } else if node, ok := astNodeMap[token.ID]; ok {
  199. // We got a normal AST component
  200. ret := node.instance(p, &token)
  201. ret.Meta = append(ret.Meta, preComments...) // Attach pre comments to the next AST node
  202. if len(postComments) > 0 && p.node != nil {
  203. p.node.Meta = append(p.node.Meta, postComments...) // Attach post comments to the previous AST node
  204. }
  205. return ret, nil
  206. }
  207. return nil, p.newParserError(ErrUnknownToken, fmt.Sprintf("id:%v (%v)", token.ID, token), token)
  208. }
  209. // Standard null denotation functions
  210. // ==================================
  211. /*
  212. ndTerm is used for terminals.
  213. */
  214. func ndTerm(p *parser, self *ASTNode) (*ASTNode, error) {
  215. return self, nil
  216. }
  217. /*
  218. ndInner returns the inner expression of an enclosed block and discard the
  219. block token. This method is used for brackets.
  220. */
  221. func ndInner(p *parser, self *ASTNode) (*ASTNode, error) {
  222. // Get the inner expression
  223. exp, err := p.run(0)
  224. if err != nil {
  225. return nil, err
  226. }
  227. // We return here the inner expression - discarding the bracket tokens
  228. return exp, skipToken(p, TokenRPAREN)
  229. }
  230. /*
  231. ndPrefix is used for prefix operators.
  232. */
  233. func ndPrefix(p *parser, self *ASTNode) (*ASTNode, error) {
  234. // Make sure a prefix will only prefix the next item
  235. val, err := p.run(self.binding + 20)
  236. if err != nil {
  237. return nil, err
  238. }
  239. self.Children = append(self.Children, val)
  240. return self, nil
  241. }
  242. // Null denotation functions for specific expressions
  243. // ==================================================
  244. /*
  245. ndImport is used to parse imports.
  246. */
  247. func ndImport(p *parser, self *ASTNode) (*ASTNode, error) {
  248. // Must specify a file path
  249. err := acceptChild(p, self, TokenSTRING)
  250. if err == nil {
  251. // Must specify AS
  252. if err = skipToken(p, TokenAS); err == nil {
  253. // Must specify an identifier
  254. err = acceptChild(p, self, TokenIDENTIFIER)
  255. }
  256. }
  257. return self, err
  258. }
  259. /*
  260. ndIdentifier is to parse identifiers and function calls.
  261. */
  262. func ndIdentifier(p *parser, self *ASTNode) (*ASTNode, error) {
  263. var parseMore, parseSegment, parseFuncCall, parseCompositionAccess func(parent *ASTNode) error
  264. parseMore = func(current *ASTNode) error {
  265. var err error
  266. if p.node.Token.ID == TokenDOT {
  267. err = parseSegment(current)
  268. } else if p.node.Token.ID == TokenLPAREN {
  269. err = parseFuncCall(current)
  270. } else if p.node.Token.ID == TokenLBRACK {
  271. err = parseCompositionAccess(current)
  272. }
  273. return err
  274. }
  275. parseSegment = func(current *ASTNode) error {
  276. var err error
  277. var next *ASTNode
  278. if err = skipToken(p, TokenDOT); err == nil {
  279. next = p.node
  280. if err = acceptChild(p, current, TokenIDENTIFIER); err == nil {
  281. err = parseMore(next)
  282. }
  283. }
  284. return err
  285. }
  286. parseFuncCall = func(current *ASTNode) error {
  287. err := skipToken(p, TokenLPAREN)
  288. fc := astNodeMap[TokenFUNCCALL].instance(p, nil)
  289. current.Children = append(current.Children, fc)
  290. // Read in parameters
  291. for err == nil && p.node.Token.ID != TokenRPAREN {
  292. // Parse all the expressions inside the directives
  293. exp, err := p.run(0)
  294. if err == nil {
  295. fc.Children = append(fc.Children, exp)
  296. if p.node.Token.ID == TokenCOMMA {
  297. err = skipToken(p, TokenCOMMA)
  298. }
  299. }
  300. }
  301. if err == nil {
  302. err = skipToken(p, TokenRPAREN)
  303. if err == nil {
  304. err = parseMore(current)
  305. }
  306. }
  307. return err
  308. }
  309. parseCompositionAccess = func(current *ASTNode) error {
  310. err := skipToken(p, TokenLBRACK)
  311. ca := astNodeMap[TokenCOMPACCESS].instance(p, nil)
  312. current.Children = append(current.Children, ca)
  313. // Parse all the expressions inside the directives
  314. exp, err := p.run(0)
  315. if err == nil {
  316. ca.Children = append(ca.Children, exp)
  317. if err = skipToken(p, TokenRBRACK); err == nil {
  318. err = parseMore(current)
  319. }
  320. }
  321. return err
  322. }
  323. return self, parseMore(self)
  324. }
  325. // Standard left denotation functions
  326. // ==================================
  327. /*
  328. ldInfix is used for infix operators.
  329. */
  330. func ldInfix(p *parser, self *ASTNode, left *ASTNode) (*ASTNode, error) {
  331. right, err := p.run(self.binding)
  332. if err != nil {
  333. return nil, err
  334. }
  335. self.Children = append(self.Children, left)
  336. self.Children = append(self.Children, right)
  337. return self, nil
  338. }
  339. // Helper functions
  340. // ================
  341. /*
  342. hasMoreStatements returns true if there are more statements to parse.
  343. */
  344. func hasMoreStatements(p *parser, currentNode *ASTNode) bool {
  345. nextNode := p.node
  346. if nextNode == nil || nextNode.Token.ID == TokenEOF {
  347. return false
  348. } else if nextNode.Token.ID == TokenSEMICOLON {
  349. return true
  350. }
  351. return currentNode != nil && currentNode.Token.Lline < nextNode.Token.Lline
  352. }
  353. /*
  354. skipToken skips over a given token.
  355. */
  356. func skipToken(p *parser, ids ...LexTokenID) error {
  357. var err error
  358. canSkip := func(id LexTokenID) bool {
  359. for _, i := range ids {
  360. if i == id {
  361. return true
  362. }
  363. }
  364. return false
  365. }
  366. if !canSkip(p.node.Token.ID) {
  367. if p.node.Token.ID == TokenEOF {
  368. return p.newParserError(ErrUnexpectedEnd, "", *p.node.Token)
  369. }
  370. return p.newParserError(ErrUnexpectedToken, p.node.Token.Val, *p.node.Token)
  371. }
  372. // This should never return an error unless we skip over EOF or complex tokens
  373. // like values
  374. p.node, err = p.next()
  375. return err
  376. }
  377. /*
  378. acceptChild accepts the current token as a child.
  379. */
  380. func acceptChild(p *parser, self *ASTNode, id LexTokenID) error {
  381. var err error
  382. current := p.node
  383. p.node, err = p.next()
  384. if err != nil {
  385. return err
  386. }
  387. if current.Token.ID == id {
  388. self.Children = append(self.Children, current)
  389. return nil
  390. }
  391. return p.newParserError(ErrUnexpectedToken, current.Token.Val, *current.Token)
  392. }