parser.go 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. /*
  2. * Public Domain Software
  3. *
  4. * I (Matthias Ladkau) am the author of the source code in this file.
  5. * I have placed the source code in this file in the public domain.
  6. *
  7. * For further information see: http://creativecommons.org/publicdomain/zero/1.0/
  8. */
  9. package parser
  10. import (
  11. "fmt"
  12. )
  13. /*
  14. Map of AST nodes corresponding to lexer tokens. The map determines how a given
  15. sequence of lexer tokens are organized into an AST.
  16. */
  17. var astNodeMap map[LexTokenID]*ASTNode
  18. func init() {
  19. astNodeMap = map[LexTokenID]*ASTNode{
  20. TokenEOF: {NodeEOF, nil, nil, nil, nil, 0, ndTerm, nil},
  21. // Value tokens
  22. TokenSTRING: {NodeSTRING, nil, nil, nil, nil, 0, ndTerm, nil},
  23. TokenNUMBER: {NodeNUMBER, nil, nil, nil, nil, 0, ndTerm, nil},
  24. TokenIDENTIFIER: {NodeIDENTIFIER, nil, nil, nil, nil, 0, ndTerm, nil},
  25. // Constructed tokens
  26. TokenSTATEMENTS: {NodeSTATEMENTS, nil, nil, nil, nil, 0, nil, nil},
  27. TokenSEMICOLON: {"", nil, nil, nil, nil, 0, nil, nil},
  28. /*
  29. TokenLIST: {NodeLIST, nil, nil, nil, 0, nil, nil},
  30. TokenMAP: {NodeMAP, nil, nil, nil, 0, nil, nil},
  31. TokenGUARD: {NodeGUARD, nil, nil, nil, 0, nil, nil},
  32. */
  33. // Grouping symbols
  34. TokenLPAREN: {"", nil, nil, nil, nil, 150, ndInner, nil},
  35. TokenRPAREN: {"", nil, nil, nil, nil, 0, nil, nil},
  36. // Separators
  37. TokenCOMMA: {"", nil, nil, nil, nil, 0, nil, nil},
  38. // Assignment statement
  39. TokenASSIGN: {NodeASSIGN, nil, nil, nil, nil, 10, nil, ldInfix},
  40. // Simple arithmetic expressions
  41. TokenPLUS: {NodePLUS, nil, nil, nil, nil, 110, ndPrefix, ldInfix},
  42. TokenMINUS: {NodeMINUS, nil, nil, nil, nil, 110, ndPrefix, ldInfix},
  43. TokenTIMES: {NodeTIMES, nil, nil, nil, nil, 120, nil, ldInfix},
  44. TokenDIV: {NodeDIV, nil, nil, nil, nil, 120, nil, ldInfix},
  45. TokenDIVINT: {NodeDIVINT, nil, nil, nil, nil, 120, nil, ldInfix},
  46. TokenMODINT: {NodeMODINT, nil, nil, nil, nil, 120, nil, ldInfix},
  47. // Boolean operators
  48. TokenOR: {NodeOR, nil, nil, nil, nil, 30, nil, ldInfix},
  49. TokenAND: {NodeAND, nil, nil, nil, nil, 40, nil, ldInfix},
  50. TokenNOT: {NodeNOT, nil, nil, nil, nil, 20, ndPrefix, nil},
  51. // Condition operators
  52. TokenLIKE: {NodeLIKE, nil, nil, nil, nil, 60, nil, ldInfix},
  53. TokenIN: {NodeIN, nil, nil, nil, nil, 60, nil, ldInfix},
  54. TokenHASPREFIX: {NodeHASPREFIX, nil, nil, nil, nil, 60, nil, ldInfix},
  55. TokenHASSUFFIX: {NodeHASSUFFIX, nil, nil, nil, nil, 60, nil, ldInfix},
  56. TokenNOTIN: {NodeNOTIN, nil, nil, nil, nil, 60, nil, ldInfix},
  57. TokenGEQ: {NodeGEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  58. TokenLEQ: {NodeLEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  59. TokenNEQ: {NodeNEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  60. TokenEQ: {NodeEQ, nil, nil, nil, nil, 60, nil, ldInfix},
  61. TokenGT: {NodeGT, nil, nil, nil, nil, 60, nil, ldInfix},
  62. TokenLT: {NodeLT, nil, nil, nil, nil, 60, nil, ldInfix},
  63. // Constants
  64. TokenFALSE: {NodeFALSE, nil, nil, nil, nil, 0, ndTerm, nil},
  65. TokenTRUE: {NodeTRUE, nil, nil, nil, nil, 0, ndTerm, nil},
  66. TokenNULL: {NodeNULL, nil, nil, nil, nil, 0, ndTerm, nil},
  67. }
  68. }
  69. // Parser
  70. // ======
  71. /*
  72. Parser data structure
  73. */
  74. type parser struct {
  75. name string // Name to identify the input
  76. node *ASTNode // Current ast node
  77. tokens *LABuffer // Buffer which is connected to the channel which contains lex tokens
  78. rp RuntimeProvider // Runtime provider which creates runtime components
  79. }
  80. /*
  81. Parse parses a given input string and returns an AST.
  82. */
  83. func Parse(name string, input string) (*ASTNode, error) {
  84. return ParseWithRuntime(name, input, nil)
  85. }
  86. /*
  87. ParseWithRuntime parses a given input string and returns an AST decorated with
  88. runtime components.
  89. */
  90. func ParseWithRuntime(name string, input string, rp RuntimeProvider) (*ASTNode, error) {
  91. // Create a new parser with a look-ahead buffer of 3
  92. p := &parser{name, nil, NewLABuffer(Lex(name, input), 3), rp}
  93. // Read and set initial AST node
  94. node, err := p.next()
  95. if err != nil {
  96. return nil, err
  97. }
  98. p.node = node
  99. n, err := p.run(0)
  100. if err == nil && hasMoreStatements(p, n) {
  101. st := astNodeMap[TokenSTATEMENTS].instance(p, nil)
  102. st.Children = append(st.Children, n)
  103. for err == nil && hasMoreStatements(p, n) {
  104. // Skip semicolons
  105. if p.node.Token.ID == TokenSEMICOLON {
  106. skipToken(p, TokenSEMICOLON)
  107. }
  108. n, err = p.run(0)
  109. st.Children = append(st.Children, n)
  110. }
  111. n = st
  112. }
  113. if err == nil && p.node != nil && p.node.Token.ID != TokenEOF {
  114. token := *p.node.Token
  115. err = p.newParserError(ErrUnexpectedEnd, fmt.Sprintf("extra token id:%v (%v)",
  116. token.ID, token), token)
  117. }
  118. return n, err
  119. }
  120. /*
  121. run models the main parser function.
  122. */
  123. func (p *parser) run(rightBinding int) (*ASTNode, error) {
  124. var err error
  125. n := p.node
  126. p.node, err = p.next()
  127. if err != nil {
  128. return nil, err
  129. }
  130. // Start with the null denotation of this statement / expression
  131. if n.nullDenotation == nil {
  132. return nil, p.newParserError(ErrImpossibleNullDenotation,
  133. n.Token.String(), *n.Token)
  134. }
  135. left, err := n.nullDenotation(p, n)
  136. if err != nil {
  137. return nil, err
  138. }
  139. // Collect left denotations as long as the left binding power is greater
  140. // than the initial right one
  141. for rightBinding < p.node.binding {
  142. var nleft *ASTNode
  143. n = p.node
  144. if n.leftDenotation == nil {
  145. if left.Token.Lline < n.Token.Lline {
  146. // If the impossible left denotation is on a new line
  147. // we might be parsing a new statement
  148. return left, nil
  149. }
  150. return nil, p.newParserError(ErrImpossibleLeftDenotation,
  151. n.Token.String(), *n.Token)
  152. }
  153. p.node, err = p.next()
  154. if err != nil {
  155. return nil, err
  156. }
  157. // Get the next left denotation
  158. nleft, err = n.leftDenotation(p, n, left)
  159. left = nleft
  160. if err != nil {
  161. return nil, err
  162. }
  163. }
  164. return left, nil
  165. }
  166. /*
  167. next retrieves the next lexer token.
  168. */
  169. func (p *parser) next() (*ASTNode, error) {
  170. var preComments []*LexToken
  171. var postComments []*LexToken
  172. token, more := p.tokens.Next()
  173. // Skip over pre comment token
  174. for more && token.ID == TokenPRECOMMENT {
  175. preComments = append(preComments, NewLexTokenInstance(token))
  176. token, more = p.tokens.Next()
  177. }
  178. // Skip over post comment token
  179. for more && token.ID == TokenPOSTCOMMENT {
  180. postComments = append(postComments, NewLexTokenInstance(token))
  181. token, more = p.tokens.Next()
  182. }
  183. if !more {
  184. // Unexpected end of input - the associated token is an empty error token
  185. return nil, p.newParserError(ErrUnexpectedEnd, "", token)
  186. } else if token.ID == TokenError {
  187. // There was a lexer error wrap it in a parser error
  188. return nil, p.newParserError(ErrLexicalError, token.Val, token)
  189. } else if node, ok := astNodeMap[token.ID]; ok {
  190. // We got a normal AST component
  191. ret := node.instance(p, &token)
  192. ret.Meta = append(ret.Meta, preComments...) // Attach pre comments to the next AST node
  193. if len(postComments) > 0 && p.node != nil {
  194. p.node.Meta = append(p.node.Meta, postComments...) // Attach post comments to the previous AST node
  195. }
  196. return ret, nil
  197. }
  198. return nil, p.newParserError(ErrUnknownToken, fmt.Sprintf("id:%v (%v)", token.ID, token), token)
  199. }
  200. // Standard null denotation functions
  201. // ==================================
  202. /*
  203. ndTerm is used for terminals.
  204. */
  205. func ndTerm(p *parser, self *ASTNode) (*ASTNode, error) {
  206. return self, nil
  207. }
  208. /*
  209. ndInner returns the inner expression of an enclosed block and discard the
  210. block token. This method is used for brackets.
  211. */
  212. func ndInner(p *parser, self *ASTNode) (*ASTNode, error) {
  213. // Get the inner expression
  214. exp, err := p.run(0)
  215. if err != nil {
  216. return nil, err
  217. }
  218. // We return here the inner expression - discarding the bracket tokens
  219. return exp, skipToken(p, TokenRPAREN)
  220. }
  221. /*
  222. ndPrefix is used for prefix operators.
  223. */
  224. func ndPrefix(p *parser, self *ASTNode) (*ASTNode, error) {
  225. // Make sure a prefix will only prefix the next item
  226. val, err := p.run(self.binding + 20)
  227. if err != nil {
  228. return nil, err
  229. }
  230. self.Children = append(self.Children, val)
  231. return self, nil
  232. }
  233. // Standard left denotation functions
  234. // ==================================
  235. /*
  236. ldInfix is used for infix operators.
  237. */
  238. func ldInfix(p *parser, self *ASTNode, left *ASTNode) (*ASTNode, error) {
  239. right, err := p.run(self.binding)
  240. if err != nil {
  241. return nil, err
  242. }
  243. self.Children = append(self.Children, left)
  244. self.Children = append(self.Children, right)
  245. return self, nil
  246. }
  247. // Helper functions
  248. // ================
  249. /*
  250. hasMoreStatements returns true if there are more statements to parse.
  251. */
  252. func hasMoreStatements(p *parser, currentNode *ASTNode) bool {
  253. nextNode := p.node
  254. if nextNode == nil || nextNode.Token.ID == TokenEOF {
  255. return false
  256. } else if nextNode.Token.ID == TokenSEMICOLON {
  257. return true
  258. }
  259. return currentNode != nil && currentNode.Token.Lline < nextNode.Token.Lline
  260. }
  261. /*
  262. skipToken skips over a given token.
  263. */
  264. func skipToken(p *parser, ids ...LexTokenID) error {
  265. var err error
  266. canSkip := func(id LexTokenID) bool {
  267. for _, i := range ids {
  268. if i == id {
  269. return true
  270. }
  271. }
  272. return false
  273. }
  274. if !canSkip(p.node.Token.ID) {
  275. if p.node.Token.ID == TokenEOF {
  276. return p.newParserError(ErrUnexpectedEnd, "", *p.node.Token)
  277. }
  278. return p.newParserError(ErrUnexpectedToken, p.node.Token.Val, *p.node.Token)
  279. }
  280. // This should never return an error unless we skip over EOF or complex tokens
  281. // like values
  282. p.node, err = p.next()
  283. return err
  284. }
  285. /*
  286. acceptChild accepts the current token as a child.
  287. */
  288. func acceptChild(p *parser, self *ASTNode, id LexTokenID) error {
  289. var err error
  290. current := p.node
  291. p.node, err = p.next()
  292. if err != nil {
  293. return err
  294. }
  295. if current.Token.ID == id {
  296. self.Children = append(self.Children, current)
  297. return nil
  298. }
  299. return p.newParserError(ErrUnexpectedToken, current.Token.Val, *current.Token)
  300. }