parser.go 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. /*
  2. * Public Domain Software
  3. *
  4. * I (Matthias Ladkau) am the author of the source code in this file.
  5. * I have placed the source code in this file in the public domain.
  6. *
  7. * For further information see: http://creativecommons.org/publicdomain/zero/1.0/
  8. */
  9. package parser
  10. import (
  11. "fmt"
  12. )
  13. /*
  14. Map of AST nodes corresponding to lexer tokens
  15. */
  16. var astNodeMap map[LexTokenID]*ASTNode
  17. func init() {
  18. astNodeMap = map[LexTokenID]*ASTNode{
  19. TokenEOF: {NodeEOF, nil, nil, nil, 0, ndTerm, nil},
  20. // Value tokens
  21. TokenCOMMENT: {NodeCOMMENT, nil, nil, nil, 0, ndTerm, nil},
  22. TokenSTRING: {NodeSTRING, nil, nil, nil, 0, ndTerm, nil},
  23. TokenNUMBER: {NodeNUMBER, nil, nil, nil, 0, ndTerm, nil},
  24. TokenIDENTIFIER: {NodeIDENTIFIER, nil, nil, nil, 0, ndTerm, nil},
  25. // Constructed tokens
  26. TokenSTATEMENTS: {NodeSTATEMENTS, nil, nil, nil, 0, nil, nil},
  27. TokenSEMICOLON: {"", nil, nil, nil, 0, nil, nil},
  28. /*
  29. TokenLIST: {NodeLIST, nil, nil, nil, 0, nil, nil},
  30. TokenMAP: {NodeMAP, nil, nil, nil, 0, nil, nil},
  31. TokenGUARD: {NodeGUARD, nil, nil, nil, 0, nil, nil},
  32. */
  33. // Grouping symbols
  34. TokenLPAREN: {"", nil, nil, nil, 150, ndInner, nil},
  35. TokenRPAREN: {"", nil, nil, nil, 0, nil, nil},
  36. // Separators
  37. TokenCOMMA: {"", nil, nil, nil, 0, nil, nil},
  38. // Assignment statement
  39. TokenASSIGN: {NodeASSIGN, nil, nil, nil, 10, nil, ldInfix},
  40. // Simple arithmetic expressions
  41. TokenPLUS: {NodePLUS, nil, nil, nil, 110, ndPrefix, ldInfix},
  42. TokenMINUS: {NodeMINUS, nil, nil, nil, 110, ndPrefix, ldInfix},
  43. TokenTIMES: {NodeTIMES, nil, nil, nil, 120, nil, ldInfix},
  44. TokenDIV: {NodeDIV, nil, nil, nil, 120, nil, ldInfix},
  45. TokenDIVINT: {NodeDIVINT, nil, nil, nil, 120, nil, ldInfix},
  46. TokenMODINT: {NodeMODINT, nil, nil, nil, 120, nil, ldInfix},
  47. // Boolean operators
  48. TokenOR: {NodeOR, nil, nil, nil, 30, nil, ldInfix},
  49. TokenAND: {NodeAND, nil, nil, nil, 40, nil, ldInfix},
  50. TokenNOT: {NodeNOT, nil, nil, nil, 20, ndPrefix, nil},
  51. // Condition operators
  52. TokenLIKE: {NodeLIKE, nil, nil, nil, 60, nil, ldInfix},
  53. TokenIN: {NodeIN, nil, nil, nil, 60, nil, ldInfix},
  54. TokenHASPREFIX: {NodeHASPREFIX, nil, nil, nil, 60, nil, ldInfix},
  55. TokenHASSUFFIX: {NodeHASSUFFIX, nil, nil, nil, 60, nil, ldInfix},
  56. TokenNOTIN: {NodeNOTIN, nil, nil, nil, 60, nil, ldInfix},
  57. TokenGEQ: {NodeGEQ, nil, nil, nil, 60, nil, ldInfix},
  58. TokenLEQ: {NodeLEQ, nil, nil, nil, 60, nil, ldInfix},
  59. TokenNEQ: {NodeNEQ, nil, nil, nil, 60, nil, ldInfix},
  60. TokenEQ: {NodeEQ, nil, nil, nil, 60, nil, ldInfix},
  61. TokenGT: {NodeGT, nil, nil, nil, 60, nil, ldInfix},
  62. TokenLT: {NodeLT, nil, nil, nil, 60, nil, ldInfix},
  63. // Constants
  64. TokenFALSE: {NodeFALSE, nil, nil, nil, 0, ndTerm, nil},
  65. TokenTRUE: {NodeTRUE, nil, nil, nil, 0, ndTerm, nil},
  66. TokenNULL: {NodeNULL, nil, nil, nil, 0, ndTerm, nil},
  67. }
  68. }
  69. // Parser
  70. // ======
  71. /*
  72. Parser data structure
  73. */
  74. type parser struct {
  75. name string // Name to identify the input
  76. node *ASTNode // Current ast node
  77. tokens *LABuffer // Buffer which is connected to the channel which contains lex tokens
  78. rp RuntimeProvider // Runtime provider which creates runtime components
  79. }
  80. /*
  81. Parse parses a given input string and returns an AST.
  82. */
  83. func Parse(name string, input string) (*ASTNode, error) {
  84. return ParseWithRuntime(name, input, nil)
  85. }
  86. /*
  87. ParseWithRuntime parses a given input string and returns an AST decorated with
  88. runtime components.
  89. */
  90. func ParseWithRuntime(name string, input string, rp RuntimeProvider) (*ASTNode, error) {
  91. // Create a new parser with a look-ahead buffer of 3
  92. p := &parser{name, nil, NewLABuffer(Lex(name, input), 3), rp}
  93. // Read and set initial AST node
  94. node, err := p.next()
  95. if err != nil {
  96. return nil, err
  97. }
  98. p.node = node
  99. n, err := p.run(0)
  100. if err == nil && hasMoreStatements(p, n) {
  101. st := astNodeMap[TokenSTATEMENTS].instance(p, nil)
  102. st.Children = append(st.Children, n)
  103. for err == nil && hasMoreStatements(p, n) {
  104. // Skip semicolons
  105. if p.node.Token.ID == TokenSEMICOLON {
  106. skipToken(p, TokenSEMICOLON)
  107. }
  108. n, err = p.run(0)
  109. st.Children = append(st.Children, n)
  110. }
  111. n = st
  112. }
  113. if err == nil && p.node != nil && p.node.Token.ID != TokenEOF {
  114. token := *p.node.Token
  115. err = p.newParserError(ErrUnexpectedEnd, fmt.Sprintf("extra token id:%v (%v)",
  116. token.ID, token), token)
  117. }
  118. return n, err
  119. }
  120. /*
  121. run models the main parser function.
  122. */
  123. func (p *parser) run(rightBinding int) (*ASTNode, error) {
  124. var err error
  125. n := p.node
  126. p.node, err = p.next()
  127. if err != nil {
  128. return nil, err
  129. }
  130. // Start with the null denotation of this statement / expression
  131. if n.nullDenotation == nil {
  132. return nil, p.newParserError(ErrImpossibleNullDenotation,
  133. n.Token.String(), *n.Token)
  134. }
  135. left, err := n.nullDenotation(p, n)
  136. if err != nil {
  137. return nil, err
  138. }
  139. // Collect left denotations as long as the left binding power is greater
  140. // than the initial right one
  141. for rightBinding < p.node.binding {
  142. var nleft *ASTNode
  143. n = p.node
  144. if n.leftDenotation == nil {
  145. if left.Token.Lline < n.Token.Lline {
  146. // If the impossible left denotation is on a new line
  147. // we might be parsing a new statement
  148. return left, nil
  149. }
  150. return nil, p.newParserError(ErrImpossibleLeftDenotation,
  151. n.Token.String(), *n.Token)
  152. }
  153. p.node, err = p.next()
  154. if err != nil {
  155. return nil, err
  156. }
  157. // Get the next left denotation
  158. nleft, err = n.leftDenotation(p, n, left)
  159. left = nleft
  160. if err != nil {
  161. return nil, err
  162. }
  163. }
  164. return left, nil
  165. }
  166. /*
  167. next retrieves the next lexer token.
  168. */
  169. func (p *parser) next() (*ASTNode, error) {
  170. token, more := p.tokens.Next()
  171. if !more {
  172. // Unexpected end of input - the associated token is an empty error token
  173. return nil, p.newParserError(ErrUnexpectedEnd, "", token)
  174. } else if token.ID == TokenError {
  175. // There was a lexer error wrap it in a parser error
  176. return nil, p.newParserError(ErrLexicalError, token.Val, token)
  177. } else if node, ok := astNodeMap[token.ID]; ok {
  178. // We got a normal AST component
  179. return node.instance(p, &token), nil
  180. }
  181. return nil, p.newParserError(ErrUnknownToken, fmt.Sprintf("id:%v (%v)", token.ID, token), token)
  182. }
  183. // Standard null denotation functions
  184. // ==================================
  185. /*
  186. ndTerm is used for terminals.
  187. */
  188. func ndTerm(p *parser, self *ASTNode) (*ASTNode, error) {
  189. return self, nil
  190. }
  191. /*
  192. ndInner returns the inner expression of an enclosed block and discard the
  193. block token. This method is used for brackets.
  194. */
  195. func ndInner(p *parser, self *ASTNode) (*ASTNode, error) {
  196. // Get the inner expression
  197. exp, err := p.run(0)
  198. if err != nil {
  199. return nil, err
  200. }
  201. // We return here the inner expression - discarding the bracket tokens
  202. return exp, skipToken(p, TokenRPAREN)
  203. }
  204. /*
  205. ndPrefix is used for prefix operators.
  206. */
  207. func ndPrefix(p *parser, self *ASTNode) (*ASTNode, error) {
  208. // Make sure a prefix will only prefix the next item
  209. val, err := p.run(self.binding + 20)
  210. if err != nil {
  211. return nil, err
  212. }
  213. self.Children = append(self.Children, val)
  214. return self, nil
  215. }
  216. // Standard left denotation functions
  217. // ==================================
  218. /*
  219. ldInfix is used for infix operators.
  220. */
  221. func ldInfix(p *parser, self *ASTNode, left *ASTNode) (*ASTNode, error) {
  222. right, err := p.run(self.binding)
  223. if err != nil {
  224. return nil, err
  225. }
  226. self.Children = append(self.Children, left)
  227. self.Children = append(self.Children, right)
  228. return self, nil
  229. }
  230. // Helper functions
  231. // ================
  232. /*
  233. hasMoreStatements returns true if there are more statements to parse.
  234. */
  235. func hasMoreStatements(p *parser, currentNode *ASTNode) bool {
  236. nextNode := p.node
  237. if nextNode == nil || nextNode.Token.ID == TokenEOF {
  238. return false
  239. } else if nextNode.Token.ID == TokenSEMICOLON {
  240. return true
  241. }
  242. return currentNode != nil && currentNode.Token.Lline < nextNode.Token.Lline
  243. }
  244. /*
  245. skipToken skips over a given token.
  246. */
  247. func skipToken(p *parser, ids ...LexTokenID) error {
  248. var err error
  249. canSkip := func(id LexTokenID) bool {
  250. for _, i := range ids {
  251. if i == id {
  252. return true
  253. }
  254. }
  255. return false
  256. }
  257. if !canSkip(p.node.Token.ID) {
  258. if p.node.Token.ID == TokenEOF {
  259. return p.newParserError(ErrUnexpectedEnd, "", *p.node.Token)
  260. }
  261. return p.newParserError(ErrUnexpectedToken, p.node.Token.Val, *p.node.Token)
  262. }
  263. // This should never return an error unless we skip over EOF or complex tokens
  264. // like values
  265. p.node, err = p.next()
  266. return err
  267. }
  268. /*
  269. acceptChild accepts the current token as a child.
  270. */
  271. func acceptChild(p *parser, self *ASTNode, id LexTokenID) error {
  272. var err error
  273. current := p.node
  274. p.node, err = p.next()
  275. if err != nil {
  276. return err
  277. }
  278. if current.Token.ID == id {
  279. self.Children = append(self.Children, current)
  280. return nil
  281. }
  282. return p.newParserError(ErrUnexpectedToken, current.Token.Val, *current.Token)
  283. }