parser.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. /*
  2. * Public Domain Software
  3. *
  4. * I (Matthias Ladkau) am the author of the source code in this file.
  5. * I have placed the source code in this file in the public domain.
  6. *
  7. * For further information see: http://creativecommons.org/publicdomain/zero/1.0/
  8. */
  9. package parser
  10. import (
  11. "fmt"
  12. )
  13. /*
  14. Map of AST nodes corresponding to lexer tokens. The map determines how a given
  15. sequence of lexer tokens are organized into an AST.
  16. */
  17. var astNodeMap map[LexTokenID]*ASTNode
  18. func init() {
  19. astNodeMap = map[LexTokenID]*ASTNode{
  20. TokenEOF: {NodeEOF, nil, nil, nil, nil, 0, ndTerm, nil},
  21. // Value tokens
  22. TokenSTRING: {NodeSTRING, nil, nil, nil, nil, 0, ndTerm, nil},
  23. TokenNUMBER: {NodeNUMBER, nil, nil, nil, nil, 0, ndTerm, nil},
  24. TokenIDENTIFIER: {NodeIDENTIFIER, nil, nil, nil, nil, 0, ndIdentifier, nil},
  25. // Constructed tokens
  26. TokenSTATEMENTS: {NodeSTATEMENTS, nil, nil, nil, nil, 0, nil, nil},
  27. TokenFUNCCALL: {NodeFUNCCALL, nil, nil, nil, nil, 0, nil, nil},
  28. /*
  29. TokenLIST: {NodeLIST, nil, nil, nil, 0, nil, nil},
  30. TokenMAP: {NodeMAP, nil, nil, nil, 0, nil, nil},
  31. TokenGUARD: {NodeGUARD, nil, nil, nil, 0, nil, nil},
  32. */
  33. // Condition operators
  34. TokenGEQ: {NodeGEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  35. TokenLEQ: {NodeLEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  36. TokenNEQ: {NodeNEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  37. TokenEQ: {NodeEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  38. TokenGT: {NodeGT, nil, nil, nil, nil, 60, nil, ldInfix},
  39. TokenLT: {NodeLT, nil, nil, nil, nil, 60, nil, ldInfix},
  40. // Grouping symbols
  41. TokenLPAREN: {"", nil, nil, nil, nil, 150, ndInner, nil},
  42. TokenRPAREN: {"", nil, nil, nil, nil, 0, nil, nil},
  43. // Separators
  44. TokenDOT: {"", nil, nil, nil, nil, 0, nil, nil},
  45. TokenCOMMA: {"", nil, nil, nil, nil, 0, nil, nil},
  46. TokenSEMICOLON: {"", nil, nil, nil, nil, 0, nil, nil},
  47. // Arithmetic operators
  48. TokenPLUS: {NodePLUS, nil, nil, nil, nil, 110, ndPrefix, ldInfix},
  49. TokenMINUS: {NodeMINUS, nil, nil, nil, nil, 110, ndPrefix, ldInfix},
  50. TokenTIMES: {NodeTIMES, nil, nil, nil, nil, 120, nil, ldInfix},
  51. TokenDIV: {NodeDIV, nil, nil, nil, nil, 120, nil, ldInfix},
  52. TokenDIVINT: {NodeDIVINT, nil, nil, nil, nil, 120, nil, ldInfix},
  53. TokenMODINT: {NodeMODINT, nil, nil, nil, nil, 120, nil, ldInfix},
  54. // Assignment statement
  55. TokenASSIGN: {NodeASSIGN, nil, nil, nil, nil, 10, nil, ldInfix},
  56. // Import statement
  57. TokenIMPORT: {NodeIMPORT, nil, nil, nil, nil, 0, ndImport, nil},
  58. TokenAS: {"", nil, nil, nil, nil, 0, ndImport, nil},
  59. // Boolean operators
  60. TokenOR: {NodeOR, nil, nil, nil, nil, 30, nil, ldInfix},
  61. TokenAND: {NodeAND, nil, nil, nil, nil, 40, nil, ldInfix},
  62. TokenNOT: {NodeNOT, nil, nil, nil, nil, 20, ndPrefix, nil},
  63. // Condition operators
  64. TokenLIKE: {NodeLIKE, nil, nil, nil, nil, 60, nil, ldInfix},
  65. TokenIN: {NodeIN, nil, nil, nil, nil, 60, nil, ldInfix},
  66. TokenHASPREFIX: {NodeHASPREFIX, nil, nil, nil, nil, 60, nil, ldInfix},
  67. TokenHASSUFFIX: {NodeHASSUFFIX, nil, nil, nil, nil, 60, nil, ldInfix},
  68. TokenNOTIN: {NodeNOTIN, nil, nil, nil, nil, 60, nil, ldInfix},
  69. // Constant terminals
  70. TokenFALSE: {NodeFALSE, nil, nil, nil, nil, 0, ndTerm, nil},
  71. TokenTRUE: {NodeTRUE, nil, nil, nil, nil, 0, ndTerm, nil},
  72. TokenNULL: {NodeNULL, nil, nil, nil, nil, 0, ndTerm, nil},
  73. }
  74. }
  75. // Parser
  76. // ======
  77. /*
  78. Parser data structure
  79. */
  80. type parser struct {
  81. name string // Name to identify the input
  82. node *ASTNode // Current ast node
  83. tokens *LABuffer // Buffer which is connected to the channel which contains lex tokens
  84. rp RuntimeProvider // Runtime provider which creates runtime components
  85. }
  86. /*
  87. Parse parses a given input string and returns an AST.
  88. */
  89. func Parse(name string, input string) (*ASTNode, error) {
  90. return ParseWithRuntime(name, input, nil)
  91. }
  92. /*
  93. ParseWithRuntime parses a given input string and returns an AST decorated with
  94. runtime components.
  95. */
  96. func ParseWithRuntime(name string, input string, rp RuntimeProvider) (*ASTNode, error) {
  97. // Create a new parser with a look-ahead buffer of 3
  98. p := &parser{name, nil, NewLABuffer(Lex(name, input), 3), rp}
  99. // Read and set initial AST node
  100. node, err := p.next()
  101. if err != nil {
  102. return nil, err
  103. }
  104. p.node = node
  105. n, err := p.run(0)
  106. if err == nil && hasMoreStatements(p, n) {
  107. st := astNodeMap[TokenSTATEMENTS].instance(p, nil)
  108. st.Children = append(st.Children, n)
  109. for err == nil && hasMoreStatements(p, n) {
  110. // Skip semicolons
  111. if p.node.Token.ID == TokenSEMICOLON {
  112. skipToken(p, TokenSEMICOLON)
  113. }
  114. n, err = p.run(0)
  115. st.Children = append(st.Children, n)
  116. }
  117. n = st
  118. }
  119. if err == nil && p.node != nil && p.node.Token.ID != TokenEOF {
  120. token := *p.node.Token
  121. err = p.newParserError(ErrUnexpectedEnd, fmt.Sprintf("extra token id:%v (%v)",
  122. token.ID, token), token)
  123. }
  124. return n, err
  125. }
  126. /*
  127. run models the main parser function.
  128. */
  129. func (p *parser) run(rightBinding int) (*ASTNode, error) {
  130. var err error
  131. n := p.node
  132. p.node, err = p.next()
  133. if err != nil {
  134. return nil, err
  135. }
  136. // Start with the null denotation of this statement / expression
  137. if n.nullDenotation == nil {
  138. return nil, p.newParserError(ErrImpossibleNullDenotation,
  139. n.Token.String(), *n.Token)
  140. }
  141. left, err := n.nullDenotation(p, n)
  142. if err != nil {
  143. return nil, err
  144. }
  145. // Collect left denotations as long as the left binding power is greater
  146. // than the initial right one
  147. for rightBinding < p.node.binding {
  148. var nleft *ASTNode
  149. n = p.node
  150. if n.leftDenotation == nil {
  151. if left.Token.Lline < n.Token.Lline {
  152. // If the impossible left denotation is on a new line
  153. // we might be parsing a new statement
  154. return left, nil
  155. }
  156. return nil, p.newParserError(ErrImpossibleLeftDenotation,
  157. n.Token.String(), *n.Token)
  158. }
  159. p.node, err = p.next()
  160. if err != nil {
  161. return nil, err
  162. }
  163. // Get the next left denotation
  164. nleft, err = n.leftDenotation(p, n, left)
  165. left = nleft
  166. if err != nil {
  167. return nil, err
  168. }
  169. }
  170. return left, nil
  171. }
  172. /*
  173. next retrieves the next lexer token.
  174. */
  175. func (p *parser) next() (*ASTNode, error) {
  176. var preComments []MetaData
  177. var postComments []MetaData
  178. token, more := p.tokens.Next()
  179. // Skip over pre comment token
  180. for more && token.ID == TokenPRECOMMENT {
  181. preComments = append(preComments, NewLexTokenInstance(token))
  182. token, more = p.tokens.Next()
  183. }
  184. // Skip over post comment token
  185. for more && token.ID == TokenPOSTCOMMENT {
  186. postComments = append(postComments, NewLexTokenInstance(token))
  187. token, more = p.tokens.Next()
  188. }
  189. if !more {
  190. // Unexpected end of input - the associated token is an empty error token
  191. return nil, p.newParserError(ErrUnexpectedEnd, "", token)
  192. } else if token.ID == TokenError {
  193. // There was a lexer error wrap it in a parser error
  194. return nil, p.newParserError(ErrLexicalError, token.Val, token)
  195. } else if node, ok := astNodeMap[token.ID]; ok {
  196. // We got a normal AST component
  197. ret := node.instance(p, &token)
  198. ret.Meta = append(ret.Meta, preComments...) // Attach pre comments to the next AST node
  199. if len(postComments) > 0 && p.node != nil {
  200. p.node.Meta = append(p.node.Meta, postComments...) // Attach post comments to the previous AST node
  201. }
  202. return ret, nil
  203. }
  204. return nil, p.newParserError(ErrUnknownToken, fmt.Sprintf("id:%v (%v)", token.ID, token), token)
  205. }
  206. // Standard null denotation functions
  207. // ==================================
  208. /*
  209. ndTerm is used for terminals.
  210. */
  211. func ndTerm(p *parser, self *ASTNode) (*ASTNode, error) {
  212. return self, nil
  213. }
  214. /*
  215. ndInner returns the inner expression of an enclosed block and discard the
  216. block token. This method is used for brackets.
  217. */
  218. func ndInner(p *parser, self *ASTNode) (*ASTNode, error) {
  219. // Get the inner expression
  220. exp, err := p.run(0)
  221. if err != nil {
  222. return nil, err
  223. }
  224. // We return here the inner expression - discarding the bracket tokens
  225. return exp, skipToken(p, TokenRPAREN)
  226. }
  227. /*
  228. ndPrefix is used for prefix operators.
  229. */
  230. func ndPrefix(p *parser, self *ASTNode) (*ASTNode, error) {
  231. // Make sure a prefix will only prefix the next item
  232. val, err := p.run(self.binding + 20)
  233. if err != nil {
  234. return nil, err
  235. }
  236. self.Children = append(self.Children, val)
  237. return self, nil
  238. }
  239. // Null denotation functions for specific expressions
  240. // ==================================================
  241. /*
  242. ndImport is used to parse imports.
  243. */
  244. func ndImport(p *parser, self *ASTNode) (*ASTNode, error) {
  245. // Must specify a file path
  246. err := acceptChild(p, self, TokenSTRING)
  247. if err == nil {
  248. // Must specify AS
  249. if err = skipToken(p, TokenAS); err == nil {
  250. // Must specify an identifier
  251. err = acceptChild(p, self, TokenIDENTIFIER)
  252. }
  253. }
  254. return self, err
  255. }
  256. /*
  257. ndIdentifier is to parse identifiers and function calls.
  258. */
  259. func ndIdentifier(p *parser, self *ASTNode) (*ASTNode, error) {
  260. var parseMore, parseSegment, parseFuncCall func(parent *ASTNode) error
  261. parseMore = func(current *ASTNode) error {
  262. var err error
  263. if p.node.Token.ID == TokenDOT {
  264. err = parseSegment(current)
  265. } else if p.node.Token.ID == TokenLPAREN {
  266. err = parseFuncCall(current)
  267. }
  268. return err
  269. }
  270. parseSegment = func(current *ASTNode) error {
  271. var err error
  272. var next *ASTNode
  273. if err = skipToken(p, TokenDOT); err == nil {
  274. next = p.node
  275. if err = acceptChild(p, current, TokenIDENTIFIER); err == nil {
  276. err = parseMore(next)
  277. }
  278. }
  279. return err
  280. }
  281. parseFuncCall = func(current *ASTNode) error {
  282. err := skipToken(p, TokenLPAREN)
  283. fc := astNodeMap[TokenFUNCCALL].instance(p, nil)
  284. current.Children = append(current.Children, fc)
  285. // Read in parameters
  286. for err == nil && p.node.Token.ID != TokenRPAREN {
  287. // Parse all the expressions inside the directives
  288. exp, err := p.run(0)
  289. if err == nil {
  290. fc.Children = append(fc.Children, exp)
  291. if p.node.Token.ID == TokenCOMMA {
  292. err = skipToken(p, TokenCOMMA)
  293. }
  294. }
  295. }
  296. if err == nil {
  297. err = skipToken(p, TokenRPAREN)
  298. if err == nil {
  299. err = parseMore(current)
  300. }
  301. }
  302. return err
  303. }
  304. return self, parseMore(self)
  305. }
  306. // Standard left denotation functions
  307. // ==================================
  308. /*
  309. ldInfix is used for infix operators.
  310. */
  311. func ldInfix(p *parser, self *ASTNode, left *ASTNode) (*ASTNode, error) {
  312. right, err := p.run(self.binding)
  313. if err != nil {
  314. return nil, err
  315. }
  316. self.Children = append(self.Children, left)
  317. self.Children = append(self.Children, right)
  318. return self, nil
  319. }
  320. // Helper functions
  321. // ================
  322. /*
  323. hasMoreStatements returns true if there are more statements to parse.
  324. */
  325. func hasMoreStatements(p *parser, currentNode *ASTNode) bool {
  326. nextNode := p.node
  327. if nextNode == nil || nextNode.Token.ID == TokenEOF {
  328. return false
  329. } else if nextNode.Token.ID == TokenSEMICOLON {
  330. return true
  331. }
  332. return currentNode != nil && currentNode.Token.Lline < nextNode.Token.Lline
  333. }
  334. /*
  335. skipToken skips over a given token.
  336. */
  337. func skipToken(p *parser, ids ...LexTokenID) error {
  338. var err error
  339. canSkip := func(id LexTokenID) bool {
  340. for _, i := range ids {
  341. if i == id {
  342. return true
  343. }
  344. }
  345. return false
  346. }
  347. if !canSkip(p.node.Token.ID) {
  348. if p.node.Token.ID == TokenEOF {
  349. return p.newParserError(ErrUnexpectedEnd, "", *p.node.Token)
  350. }
  351. return p.newParserError(ErrUnexpectedToken, p.node.Token.Val, *p.node.Token)
  352. }
  353. // This should never return an error unless we skip over EOF or complex tokens
  354. // like values
  355. p.node, err = p.next()
  356. return err
  357. }
  358. /*
  359. acceptChild accepts the current token as a child.
  360. */
  361. func acceptChild(p *parser, self *ASTNode, id LexTokenID) error {
  362. var err error
  363. current := p.node
  364. p.node, err = p.next()
  365. if err != nil {
  366. return err
  367. }
  368. if current.Token.ID == id {
  369. self.Children = append(self.Children, current)
  370. return nil
  371. }
  372. return p.newParserError(ErrUnexpectedToken, current.Token.Val, *current.Token)
  373. }