parser.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766
  1. /*
  2. * Public Domain Software
  3. *
  4. * I (Matthias Ladkau) am the author of the source code in this file.
  5. * I have placed the source code in this file in the public domain.
  6. *
  7. * For further information see: http://creativecommons.org/publicdomain/zero/1.0/
  8. */
  9. package parser
  10. import (
  11. "fmt"
  12. )
  13. /*
  14. Map of AST nodes corresponding to lexer tokens. The map determines how a given
  15. sequence of lexer tokens are organized into an AST.
  16. */
  17. var astNodeMap map[LexTokenID]*ASTNode
  18. func init() {
  19. astNodeMap = map[LexTokenID]*ASTNode{
  20. TokenEOF: {NodeEOF, nil, nil, nil, nil, 0, ndTerm, nil},
  21. // Value tokens
  22. TokenSTRING: {NodeSTRING, nil, nil, nil, nil, 0, ndTerm, nil},
  23. TokenNUMBER: {NodeNUMBER, nil, nil, nil, nil, 0, ndTerm, nil},
  24. TokenIDENTIFIER: {NodeIDENTIFIER, nil, nil, nil, nil, 0, ndIdentifier, nil},
  25. // Constructed tokens
  26. TokenSTATEMENTS: {NodeSTATEMENTS, nil, nil, nil, nil, 0, nil, nil},
  27. TokenFUNCCALL: {NodeFUNCCALL, nil, nil, nil, nil, 0, nil, nil},
  28. TokenCOMPACCESS: {NodeCOMPACCESS, nil, nil, nil, nil, 0, nil, nil},
  29. TokenLIST: {NodeLIST, nil, nil, nil, nil, 0, nil, nil},
  30. TokenMAP: {NodeMAP, nil, nil, nil, nil, 0, nil, nil},
  31. TokenPARAMS: {NodePARAMS, nil, nil, nil, nil, 0, nil, nil},
  32. /*
  33. TokenGUARD: {NodeGUARD, nil, nil, nil, 0, nil, nil},
  34. */
  35. // Condition operators
  36. TokenGEQ: {NodeGEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  37. TokenLEQ: {NodeLEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  38. TokenNEQ: {NodeNEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  39. TokenEQ: {NodeEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  40. TokenGT: {NodeGT, nil, nil, nil, nil, 60, nil, ldInfix},
  41. TokenLT: {NodeLT, nil, nil, nil, nil, 60, nil, ldInfix},
  42. // Grouping symbols
  43. TokenLPAREN: {"", nil, nil, nil, nil, 150, ndInner, nil},
  44. TokenRPAREN: {"", nil, nil, nil, nil, 0, nil, nil},
  45. TokenLBRACK: {"", nil, nil, nil, nil, 150, ndList, nil},
  46. TokenRBRACK: {"", nil, nil, nil, nil, 0, nil, nil},
  47. TokenLBRACE: {"", nil, nil, nil, nil, 150, ndMap, nil},
  48. TokenRBRACE: {"", nil, nil, nil, nil, 0, nil, nil},
  49. // Separators
  50. TokenDOT: {"", nil, nil, nil, nil, 0, nil, nil},
  51. TokenCOMMA: {"", nil, nil, nil, nil, 0, nil, nil},
  52. TokenSEMICOLON: {"", nil, nil, nil, nil, 0, nil, nil},
  53. // Grouping
  54. TokenCOLON: {NodeKVP, nil, nil, nil, nil, 60, nil, ldInfix},
  55. TokenEQUAL: {NodePRESET, nil, nil, nil, nil, 60, nil, ldInfix},
  56. // Arithmetic operators
  57. TokenPLUS: {NodePLUS, nil, nil, nil, nil, 110, ndPrefix, ldInfix},
  58. TokenMINUS: {NodeMINUS, nil, nil, nil, nil, 110, ndPrefix, ldInfix},
  59. TokenTIMES: {NodeTIMES, nil, nil, nil, nil, 120, nil, ldInfix},
  60. TokenDIV: {NodeDIV, nil, nil, nil, nil, 120, nil, ldInfix},
  61. TokenDIVINT: {NodeDIVINT, nil, nil, nil, nil, 120, nil, ldInfix},
  62. TokenMODINT: {NodeMODINT, nil, nil, nil, nil, 120, nil, ldInfix},
  63. // Assignment statement
  64. TokenASSIGN: {NodeASSIGN, nil, nil, nil, nil, 10, nil, ldInfix},
  65. // Import statement
  66. TokenIMPORT: {NodeIMPORT, nil, nil, nil, nil, 0, ndImport, nil},
  67. TokenAS: {"", nil, nil, nil, nil, 0, ndImport, nil},
  68. /*
  69. // Sink definition
  70. TokenSINK
  71. TokenKINDMATCH
  72. TokenSCOPEMATCH
  73. TokenSTATEMATCH
  74. TokenPRIORITY
  75. TokenSUPPRESSES
  76. */
  77. // Function definition
  78. TokenFUNC: {NodeFUNC, nil, nil, nil, nil, 0, ndFunc, nil},
  79. TokenRETURN: {NodeRETURN, nil, nil, nil, nil, 0, ndReturn, nil},
  80. // Boolean operators
  81. TokenAND: {NodeAND, nil, nil, nil, nil, 40, nil, ldInfix},
  82. TokenOR: {NodeOR, nil, nil, nil, nil, 30, nil, ldInfix},
  83. TokenNOT: {NodeNOT, nil, nil, nil, nil, 20, ndPrefix, nil},
  84. // Condition operators
  85. TokenLIKE: {NodeLIKE, nil, nil, nil, nil, 60, nil, ldInfix},
  86. TokenIN: {NodeIN, nil, nil, nil, nil, 60, nil, ldInfix},
  87. TokenHASPREFIX: {NodeHASPREFIX, nil, nil, nil, nil, 60, nil, ldInfix},
  88. TokenHASSUFFIX: {NodeHASSUFFIX, nil, nil, nil, nil, 60, nil, ldInfix},
  89. TokenNOTIN: {NodeNOTIN, nil, nil, nil, nil, 60, nil, ldInfix},
  90. // Constant terminals
  91. TokenFALSE: {NodeFALSE, nil, nil, nil, nil, 0, ndTerm, nil},
  92. TokenTRUE: {NodeTRUE, nil, nil, nil, nil, 0, ndTerm, nil},
  93. TokenNULL: {NodeNULL, nil, nil, nil, nil, 0, ndTerm, nil},
  94. /*
  95. // Conditional statements
  96. TokenIF
  97. TokenELIF
  98. TokenELSE
  99. // Loop statements
  100. TokenFOR
  101. TokenBREAK
  102. TokenCONTINUE
  103. */
  104. }
  105. }
  106. // Parser
  107. // ======
  108. /*
  109. Parser data structure
  110. */
  111. type parser struct {
  112. name string // Name to identify the input
  113. node *ASTNode // Current ast node
  114. tokens *LABuffer // Buffer which is connected to the channel which contains lex tokens
  115. rp RuntimeProvider // Runtime provider which creates runtime components
  116. }
  117. /*
  118. Parse parses a given input string and returns an AST.
  119. */
  120. func Parse(name string, input string) (*ASTNode, error) {
  121. return ParseWithRuntime(name, input, nil)
  122. }
  123. /*
  124. ParseWithRuntime parses a given input string and returns an AST decorated with
  125. runtime components.
  126. */
  127. func ParseWithRuntime(name string, input string, rp RuntimeProvider) (*ASTNode, error) {
  128. // Create a new parser with a look-ahead buffer of 3
  129. p := &parser{name, nil, NewLABuffer(Lex(name, input), 3), rp}
  130. // Read and set initial AST node
  131. node, err := p.next()
  132. if err != nil {
  133. return nil, err
  134. }
  135. p.node = node
  136. n, err := p.run(0)
  137. if err == nil && hasMoreStatements(p, n) {
  138. st := astNodeMap[TokenSTATEMENTS].instance(p, nil)
  139. st.Children = append(st.Children, n)
  140. for err == nil && hasMoreStatements(p, n) {
  141. // Skip semicolons
  142. if p.node.Token.ID == TokenSEMICOLON {
  143. skipToken(p, TokenSEMICOLON)
  144. }
  145. n, err = p.run(0)
  146. st.Children = append(st.Children, n)
  147. }
  148. n = st
  149. }
  150. if err == nil && p.node != nil && p.node.Token.ID != TokenEOF {
  151. token := *p.node.Token
  152. err = p.newParserError(ErrUnexpectedEnd, fmt.Sprintf("extra token id:%v (%v)",
  153. token.ID, token), token)
  154. }
  155. return n, err
  156. }
  157. /*
  158. run models the main parser function.
  159. */
  160. func (p *parser) run(rightBinding int) (*ASTNode, error) {
  161. var err error
  162. n := p.node
  163. p.node, err = p.next()
  164. if err != nil {
  165. return nil, err
  166. }
  167. // Start with the null denotation of this statement / expression
  168. if n.nullDenotation == nil {
  169. return nil, p.newParserError(ErrImpossibleNullDenotation,
  170. n.Token.String(), *n.Token)
  171. }
  172. left, err := n.nullDenotation(p, n)
  173. if err != nil {
  174. return nil, err
  175. }
  176. // Collect left denotations as long as the left binding power is greater
  177. // than the initial right one
  178. for rightBinding < p.node.binding {
  179. var nleft *ASTNode
  180. n = p.node
  181. if n.leftDenotation == nil {
  182. if left.Token.Lline < n.Token.Lline {
  183. // If the impossible left denotation is on a new line
  184. // we might be parsing a new statement
  185. return left, nil
  186. }
  187. return nil, p.newParserError(ErrImpossibleLeftDenotation,
  188. n.Token.String(), *n.Token)
  189. }
  190. p.node, err = p.next()
  191. if err != nil {
  192. return nil, err
  193. }
  194. // Get the next left denotation
  195. nleft, err = n.leftDenotation(p, n, left)
  196. left = nleft
  197. if err != nil {
  198. return nil, err
  199. }
  200. }
  201. return left, nil
  202. }
  203. /*
  204. next retrieves the next lexer token.
  205. */
  206. func (p *parser) next() (*ASTNode, error) {
  207. var preComments []MetaData
  208. var postComments []MetaData
  209. token, more := p.tokens.Next()
  210. // Skip over pre comment token
  211. for more && token.ID == TokenPRECOMMENT {
  212. preComments = append(preComments, NewLexTokenInstance(token))
  213. token, more = p.tokens.Next()
  214. }
  215. // Skip over post comment token
  216. for more && token.ID == TokenPOSTCOMMENT {
  217. postComments = append(postComments, NewLexTokenInstance(token))
  218. token, more = p.tokens.Next()
  219. }
  220. if !more {
  221. // Unexpected end of input - the associated token is an empty error token
  222. return nil, p.newParserError(ErrUnexpectedEnd, "", token)
  223. } else if token.ID == TokenError {
  224. // There was a lexer error wrap it in a parser error
  225. return nil, p.newParserError(ErrLexicalError, token.Val, token)
  226. } else if node, ok := astNodeMap[token.ID]; ok {
  227. // We got a normal AST component
  228. ret := node.instance(p, &token)
  229. ret.Meta = append(ret.Meta, preComments...) // Attach pre comments to the next AST node
  230. if len(postComments) > 0 && p.node != nil {
  231. p.node.Meta = append(p.node.Meta, postComments...) // Attach post comments to the previous AST node
  232. }
  233. return ret, nil
  234. }
  235. return nil, p.newParserError(ErrUnknownToken, fmt.Sprintf("id:%v (%v)", token.ID, token), token)
  236. }
  237. // Standard null denotation functions
  238. // ==================================
  239. /*
  240. ndTerm is used for terminals.
  241. */
  242. func ndTerm(p *parser, self *ASTNode) (*ASTNode, error) {
  243. return self, nil
  244. }
  245. /*
  246. ndInner returns the inner expression of an enclosed block and discard the
  247. block token. This method is used for brackets.
  248. */
  249. func ndInner(p *parser, self *ASTNode) (*ASTNode, error) {
  250. // Get the inner expression
  251. exp, err := p.run(0)
  252. if err != nil {
  253. return nil, err
  254. }
  255. // We return here the inner expression - discarding the bracket tokens
  256. return exp, skipToken(p, TokenRPAREN)
  257. }
  258. /*
  259. ndPrefix is used for prefix operators.
  260. */
  261. func ndPrefix(p *parser, self *ASTNode) (*ASTNode, error) {
  262. // Make sure a prefix will only prefix the next item
  263. val, err := p.run(self.binding + 20)
  264. if err != nil {
  265. return nil, err
  266. }
  267. self.Children = append(self.Children, val)
  268. return self, nil
  269. }
  270. // Null denotation functions for specific expressions
  271. // ==================================================
  272. /*
  273. ndImport is used to parse imports.
  274. */
  275. func ndImport(p *parser, self *ASTNode) (*ASTNode, error) {
  276. // Must specify a file path
  277. err := acceptChild(p, self, TokenSTRING)
  278. if err == nil {
  279. // Must specify AS
  280. if err = skipToken(p, TokenAS); err == nil {
  281. // Must specify an identifier
  282. err = acceptChild(p, self, TokenIDENTIFIER)
  283. }
  284. }
  285. return self, err
  286. }
  287. /*
  288. ndFunc is used to parse function definitions.
  289. */
  290. func ndFunc(p *parser, self *ASTNode) (*ASTNode, error) {
  291. // Must specify a function name
  292. err := acceptChild(p, self, TokenIDENTIFIER)
  293. // Read in parameters
  294. if err == nil {
  295. err = skipToken(p, TokenLPAREN)
  296. params := astNodeMap[TokenPARAMS].instance(p, nil)
  297. self.Children = append(self.Children, params)
  298. for err == nil && p.node.Token.ID != TokenRPAREN {
  299. // Parse all the expressions inside
  300. exp, err := p.run(0)
  301. if err == nil {
  302. params.Children = append(params.Children, exp)
  303. if p.node.Token.ID == TokenCOMMA {
  304. err = skipToken(p, TokenCOMMA)
  305. }
  306. }
  307. }
  308. if err == nil {
  309. err = skipToken(p, TokenRPAREN)
  310. }
  311. }
  312. if err == nil {
  313. // Parse the body
  314. self, err = parseInnerStatements(p, self)
  315. }
  316. return self, err
  317. }
  318. /*
  319. ndReturn is used to parse return statements.
  320. */
  321. func ndReturn(p *parser, self *ASTNode) (*ASTNode, error) {
  322. var err error
  323. if self.Token.Lline == p.node.Token.Lline {
  324. var val *ASTNode
  325. // Consume the next expression only if it is on the same line
  326. val, err = p.run(0)
  327. if err == nil {
  328. self.Children = append(self.Children, val)
  329. }
  330. }
  331. return self, err
  332. }
  333. /*
  334. ndIdentifier is to parse identifiers and function calls.
  335. */
  336. func ndIdentifier(p *parser, self *ASTNode) (*ASTNode, error) {
  337. var parseMore, parseSegment, parseFuncCall, parseCompositionAccess func(parent *ASTNode) error
  338. parseMore = func(current *ASTNode) error {
  339. var err error
  340. if p.node.Token.ID == TokenDOT {
  341. err = parseSegment(current)
  342. } else if p.node.Token.ID == TokenLPAREN {
  343. err = parseFuncCall(current)
  344. } else if p.node.Token.ID == TokenLBRACK {
  345. err = parseCompositionAccess(current)
  346. }
  347. return err
  348. }
  349. parseSegment = func(current *ASTNode) error {
  350. var err error
  351. var next *ASTNode
  352. if err = skipToken(p, TokenDOT); err == nil {
  353. next = p.node
  354. if err = acceptChild(p, current, TokenIDENTIFIER); err == nil {
  355. err = parseMore(next)
  356. }
  357. }
  358. return err
  359. }
  360. parseFuncCall = func(current *ASTNode) error {
  361. err := skipToken(p, TokenLPAREN)
  362. fc := astNodeMap[TokenFUNCCALL].instance(p, nil)
  363. current.Children = append(current.Children, fc)
  364. // Read in parameters
  365. for err == nil && p.node.Token.ID != TokenRPAREN {
  366. // Parse all the expressions inside the directives
  367. exp, err := p.run(0)
  368. if err == nil {
  369. fc.Children = append(fc.Children, exp)
  370. if p.node.Token.ID == TokenCOMMA {
  371. err = skipToken(p, TokenCOMMA)
  372. }
  373. }
  374. }
  375. if err == nil {
  376. err = skipToken(p, TokenRPAREN)
  377. if err == nil {
  378. err = parseMore(current)
  379. }
  380. }
  381. return err
  382. }
  383. parseCompositionAccess = func(current *ASTNode) error {
  384. err := skipToken(p, TokenLBRACK)
  385. ca := astNodeMap[TokenCOMPACCESS].instance(p, nil)
  386. current.Children = append(current.Children, ca)
  387. // Parse all the expressions inside the directives
  388. exp, err := p.run(0)
  389. if err == nil {
  390. ca.Children = append(ca.Children, exp)
  391. if err = skipToken(p, TokenRBRACK); err == nil {
  392. err = parseMore(current)
  393. }
  394. }
  395. return err
  396. }
  397. return self, parseMore(self)
  398. }
  399. /*
  400. ndList is used to collect elements of a list.
  401. */
  402. func ndList(p *parser, self *ASTNode) (*ASTNode, error) {
  403. // Create a list token
  404. st := astNodeMap[TokenLIST].instance(p, self.Token)
  405. // Get the inner expression
  406. for p.node.Token.ID != TokenRBRACK {
  407. // Parse all the expressions inside
  408. exp, err := p.run(0)
  409. if err != nil {
  410. return nil, err
  411. }
  412. st.Children = append(st.Children, exp)
  413. if p.node.Token.ID == TokenCOMMA {
  414. skipToken(p, TokenCOMMA)
  415. }
  416. }
  417. // Must have a closing bracket
  418. return st, skipToken(p, TokenRBRACK)
  419. }
  420. /*
  421. ndMap is used to collect elements of a map.
  422. */
  423. func ndMap(p *parser, self *ASTNode) (*ASTNode, error) {
  424. // Create a map token
  425. st := astNodeMap[TokenMAP].instance(p, self.Token)
  426. // Get the inner expression
  427. for p.node.Token.ID != TokenRBRACE {
  428. // Parse all the expressions inside
  429. exp, err := p.run(0)
  430. if err != nil {
  431. return nil, err
  432. }
  433. st.Children = append(st.Children, exp)
  434. if p.node.Token.ID == TokenCOMMA {
  435. if err := skipToken(p, TokenCOMMA); err != nil {
  436. return nil, err
  437. }
  438. }
  439. }
  440. // Must have a closing brace
  441. return st, skipToken(p, TokenRBRACE)
  442. }
  443. // Standard left denotation functions
  444. // ==================================
  445. /*
  446. ldInfix is used for infix operators.
  447. */
  448. func ldInfix(p *parser, self *ASTNode, left *ASTNode) (*ASTNode, error) {
  449. right, err := p.run(self.binding)
  450. if err != nil {
  451. return nil, err
  452. }
  453. self.Children = append(self.Children, left)
  454. self.Children = append(self.Children, right)
  455. return self, nil
  456. }
  457. // Helper functions
  458. // ================
  459. /*
  460. hasMoreStatements returns true if there are more statements to parse.
  461. */
  462. func hasMoreStatements(p *parser, currentNode *ASTNode) bool {
  463. nextNode := p.node
  464. if nextNode == nil || nextNode.Token.ID == TokenEOF {
  465. return false
  466. } else if nextNode.Token.ID == TokenSEMICOLON {
  467. return true
  468. }
  469. return currentNode != nil && currentNode.Token.Lline < nextNode.Token.Lline
  470. }
  471. /*
  472. skipToken skips over a given token.
  473. */
  474. func skipToken(p *parser, ids ...LexTokenID) error {
  475. var err error
  476. canSkip := func(id LexTokenID) bool {
  477. for _, i := range ids {
  478. if i == id {
  479. return true
  480. }
  481. }
  482. return false
  483. }
  484. if !canSkip(p.node.Token.ID) {
  485. if p.node.Token.ID == TokenEOF {
  486. return p.newParserError(ErrUnexpectedEnd, "", *p.node.Token)
  487. }
  488. return p.newParserError(ErrUnexpectedToken, p.node.Token.Val, *p.node.Token)
  489. }
  490. // This should never return an error unless we skip over EOF or complex tokens
  491. // like values
  492. p.node, err = p.next()
  493. return err
  494. }
  495. /*
  496. acceptChild accepts the current token as a child.
  497. */
  498. func acceptChild(p *parser, self *ASTNode, id LexTokenID) error {
  499. var err error
  500. current := p.node
  501. p.node, err = p.next()
  502. if err != nil {
  503. return err
  504. }
  505. if current.Token.ID == id {
  506. self.Children = append(self.Children, current)
  507. return nil
  508. }
  509. return p.newParserError(ErrUnexpectedToken, current.Token.Val, *current.Token)
  510. }
  511. /*
  512. parseInnerStatements collects the inner statements of a block statement. It
  513. is assumed that a block statement starts with a left brace '{' and ends with
  514. a right brace '}'.
  515. */
  516. func parseInnerStatements(p *parser, self *ASTNode) (*ASTNode, error) {
  517. // Must start with an opening brace
  518. if err := skipToken(p, TokenLBRACE); err != nil {
  519. return nil, err
  520. }
  521. // Always create a statements node
  522. st := astNodeMap[TokenSTATEMENTS].instance(p, nil)
  523. self.Children = append(self.Children, st)
  524. // Check if there are actually children
  525. if p.node != nil && p.node.Token.ID != TokenRBRACE {
  526. n, err := p.run(0)
  527. if p.node != nil && p.node.Token.ID != TokenEOF {
  528. st.Children = append(st.Children, n)
  529. for hasMoreStatements(p, n) {
  530. if p.node.Token.ID == TokenSEMICOLON {
  531. skipToken(p, TokenSEMICOLON)
  532. } else if p.node.Token.ID == TokenRBRACE {
  533. break
  534. }
  535. n, err = p.run(0)
  536. st.Children = append(st.Children, n)
  537. }
  538. }
  539. if err != nil {
  540. return nil, err
  541. }
  542. }
  543. // Must end with a closing brace
  544. return self, skipToken(p, TokenRBRACE)
  545. }