Browse Source

feat: Adding meta data storage to AST for ECAL

Matthias Ladkau 3 years ago
parent
commit
3e863f63c7

+ 5 - 5
lang/ecal/parser/const.go

@@ -53,10 +53,11 @@ const (
 	TokenEOF                     // End-of-file token
 	TokenANY                     // Unspecified token (used when building an AST from a Go map structure)
 
-	TokenCOMMENT    // Comment
-	TokenSTRING     // String constant
-	TokenNUMBER     // Number constant
-	TokenIDENTIFIER // Idendifier
+	TokenPRECOMMENT  // Comment /* ... */
+	TokenPOSTCOMMENT // Comment # ...
+	TokenSTRING      // String constant
+	TokenNUMBER      // Number constant
+	TokenIDENTIFIER  // Idendifier
 
 	// Constructed tokens which are generated by the parser not the lexer
 
@@ -174,7 +175,6 @@ Available parser AST node types
 const (
 	NodeEOF = "EOF"
 
-	NodeCOMMENT    = "comment"    // Comment
 	NodeSTRING     = "string"     // String constant
 	NodeNUMBER     = "number"     // Number constant
 	NodeIDENTIFIER = "identifier" // Idendifier

+ 18 - 9
lang/ecal/parser/helper.go

@@ -25,10 +25,11 @@ import (
 ASTNode models a node in the AST
 */
 type ASTNode struct {
-	Name     string     // Name of the node
-	Token    *LexToken  // Lexer token of this ASTNode
-	Children []*ASTNode // Child nodes
-	Runtime  Runtime    // Runtime component for this ASTNode
+	Name     string      // Name of the node
+	Token    *LexToken   // Lexer token of this ASTNode
+	Meta     []*LexToken // Meta data for this ASTNode (e.g. comments)
+	Children []*ASTNode  // Child nodes
+	Runtime  Runtime     // Runtime component for this ASTNode
 
 	binding        int                                                             // Binding power of this node
 	nullDenotation func(p *parser, self *ASTNode) (*ASTNode, error)                // Configure token as beginning node
@@ -40,7 +41,7 @@ Create a new instance of this ASTNode which is connected to a concrete lexer tok
 */
 func (n *ASTNode) instance(p *parser, t *LexToken) *ASTNode {
 
-	ret := &ASTNode{n.Name, t, make([]*ASTNode, 0, 2), nil, n.binding, n.nullDenotation, n.leftDenotation}
+	ret := &ASTNode{n.Name, t, nil, make([]*ASTNode, 0, 2), nil, n.binding, n.nullDenotation, n.leftDenotation}
 
 	if p.rp != nil {
 		ret.Runtime = p.rp.Runtime(ret)
@@ -121,9 +122,7 @@ func (n *ASTNode) levelString(indent int, buf *bytes.Buffer, printChildren int)
 
 	buf.WriteString(stringutil.GenerateRollingString(" ", indent*2))
 
-	if n.Name == NodeCOMMENT {
-		buf.WriteString(fmt.Sprintf("%v: %20v", n.Name, n.Token.Val))
-	} else if n.Name == NodeSTRING {
+	if n.Name == NodeSTRING {
 		buf.WriteString(fmt.Sprintf("%v: '%v'", n.Name, n.Token.Val))
 	} else if n.Name == NodeNUMBER {
 		buf.WriteString(fmt.Sprintf("%v: %v", n.Name, n.Token.Val))
@@ -133,6 +132,16 @@ func (n *ASTNode) levelString(indent int, buf *bytes.Buffer, printChildren int)
 		buf.WriteString(n.Name)
 	}
 
+	if len(n.Meta) > 0 {
+		buf.WriteString(" # ")
+		for i, c := range n.Meta {
+			buf.WriteString(c.Val)
+			if i < len(n.Meta)-1 {
+				buf.WriteString(" ")
+			}
+		}
+	}
+
 	buf.WriteString("\n")
 
 	if printChildren == -1 || printChildren > 0 {
@@ -280,7 +289,7 @@ func ASTFromJSONObject(jsonAST map[string]interface{}) (*ASTNode, error) {
 		linepos,            // Lpos
 	}
 
-	return &ASTNode{fmt.Sprint(name), token, astChildren, nil, 0, nil, nil}, nil
+	return &ASTNode{fmt.Sprint(name), token, nil, astChildren, nil, 0, nil, nil}, nil
 }
 
 // Look ahead buffer

+ 5 - 5
lang/ecal/parser/helper_test.go

@@ -34,7 +34,7 @@ Pos is different 2 vs 1
 Val is different 1 vs 2
 Lpos is different 3 vs 2
 {
-  "ID": 5,
+  "ID": 6,
   "Pos": 2,
   "Val": "1",
   "Identifier": false,
@@ -43,7 +43,7 @@ Lpos is different 3 vs 2
 }
 vs
 {
-  "ID": 5,
+  "ID": 6,
   "Pos": 1,
   "Val": "2",
   "Identifier": false,
@@ -76,11 +76,11 @@ number: 2
 
 Name is different number vs identifier
 Token is different:
-ID is different 5 vs 6
+ID is different 6 vs 7
 Val is different 1 vs a
 Identifier is different false vs true
 {
-  "ID": 5,
+  "ID": 6,
   "Pos": 1,
   "Val": "1",
   "Identifier": false,
@@ -89,7 +89,7 @@ Identifier is different false vs true
 }
 vs
 {
-  "ID": 6,
+  "ID": 7,
   "Pos": 1,
   "Val": "a",
   "Identifier": true,

+ 21 - 4
lang/ecal/parser/lexer.go

@@ -35,6 +35,20 @@ type LexToken struct {
 	Lpos       int        // Position in the input line this token appears
 }
 
+/*
+NewLexTokenInstance creates a new LexToken object instance from given LexToken values.
+*/
+func NewLexTokenInstance(t LexToken) *LexToken {
+	return &LexToken{
+		t.ID,
+		t.Pos,
+		t.Val,
+		t.Identifier,
+		t.Lline,
+		t.Lpos,
+	}
+}
+
 /*
 Equal checks if this LexToken equals another LexToken. Returns also a message describing
 what is the found difference.
@@ -112,8 +126,11 @@ func (t LexToken) String() string {
 	case t.ID == TokenError:
 		return fmt.Sprintf("Error: %s (%s)", t.Val, t.PosString())
 
-	case t.ID == TokenCOMMENT:
-		return fmt.Sprintf("c:'%s'", t.Val)
+	case t.ID == TokenPRECOMMENT:
+		return fmt.Sprintf("/* %s */", t.Val)
+
+	case t.ID == TokenPOSTCOMMENT:
+		return fmt.Sprintf("# %s", t.Val)
 
 	case t.ID > TOKENodeSYMBOLS && t.ID < TOKENodeKEYWORDS:
 		return fmt.Sprintf("%s", strings.ToUpper(t.Val))
@@ -667,7 +684,7 @@ func lexComment(l *lexer) lexFunc {
 			r = l.next(0)
 		}
 
-		l.emitTokenAndValue(TokenCOMMENT, l.input[l.start:l.pos-1], false)
+		l.emitTokenAndValue(TokenPOSTCOMMENT, l.input[l.start:l.pos], false)
 
 		if r == RuneEOF {
 			return nil
@@ -700,7 +717,7 @@ func lexComment(l *lexer) lexFunc {
 			}
 		}
 
-		l.emitTokenAndValue(TokenCOMMENT, l.input[l.start:l.pos-1], false)
+		l.emitTokenAndValue(TokenPRECOMMENT, l.input[l.start:l.pos-1], false)
 
 		// Consume final /
 

+ 8 - 8
lang/ecal/parser/lexer_test.go

@@ -62,14 +62,14 @@ func TestNextItem(t *testing.T) {
 func TestEquals(t *testing.T) {
 	l := LexToList("mytest", "not\n test")
 
-	if ok, msg := l[0].Equals(l[1], false); ok || msg != `ID is different 46 vs 6
+	if ok, msg := l[0].Equals(l[1], false); ok || msg != `ID is different 47 vs 7
 Pos is different 0 vs 5
 Val is different not vs test
 Identifier is different false vs true
 Lline is different 1 vs 2
 Lpos is different 1 vs 2
 {
-  "ID": 46,
+  "ID": 47,
   "Pos": 0,
   "Val": "not",
   "Identifier": false,
@@ -78,7 +78,7 @@ Lpos is different 1 vs 2
 }
 vs
 {
-  "ID": 6,
+  "ID": 7,
   "Pos": 5,
   "Val": "test",
   "Identifier": true,
@@ -248,9 +248,9 @@ func TestCommentLexing(t *testing.T) {
 	input := `name /* foo
 		bar
 	x*/ 'b/* - */la' /*test*/`
-	if res := LexToList("mytest", input); fmt.Sprint(res) != `["name" c:' foo
+	if res := LexToList("mytest", input); fmt.Sprint(res) != `["name" /*  foo
 		bar
-	x' "b/* - */la" c:'test' EOF]` {
+	x */ "b/* - */la" /* test */ EOF]` {
 		t.Error("Unexpected lexer result:", res)
 		return
 	}
@@ -265,17 +265,17 @@ func TestCommentLexing(t *testing.T) {
 	input = `foo
    1+ 2 # Some comment
 bar`
-	if res := LexToList("mytest", input); fmt.Sprint(res) != `["foo" v:"1" + v:"2" c:' Some comment' "bar" EOF]` {
+	if res := LexToList("mytest", input); fmt.Sprint(res) != `["foo" v:"1" + v:"2" #  Some comment
+ "bar" EOF]` {
 		t.Error("Unexpected lexer result:", res)
 		return
 	}
 
 	input = `1+ 2 # Some comment`
-	if res := LexToList("mytest", input); fmt.Sprint(res) != `[v:"1" + v:"2" c:' Some commen' EOF]` {
+	if res := LexToList("mytest", input); fmt.Sprint(res) != `[v:"1" + v:"2" #  Some comment EOF]` {
 		t.Error("Unexpected lexer result:", res)
 		return
 	}
-
 }
 
 func TestSinkLexing(t *testing.T) {

+ 59 - 36
lang/ecal/parser/parser.go

@@ -14,25 +14,25 @@ import (
 )
 
 /*
-Map of AST nodes corresponding to lexer tokens
+Map of AST nodes corresponding to lexer tokens. The map determines how a given
+sequence of lexer tokens are organized into an AST.
 */
 var astNodeMap map[LexTokenID]*ASTNode
 
 func init() {
 	astNodeMap = map[LexTokenID]*ASTNode{
-		TokenEOF: {NodeEOF, nil, nil, nil, 0, ndTerm, nil},
+		TokenEOF: {NodeEOF, nil, nil, nil, nil, 0, ndTerm, nil},
 
 		// Value tokens
 
-		TokenCOMMENT:    {NodeCOMMENT, nil, nil, nil, 0, ndTerm, nil},
-		TokenSTRING:     {NodeSTRING, nil, nil, nil, 0, ndTerm, nil},
-		TokenNUMBER:     {NodeNUMBER, nil, nil, nil, 0, ndTerm, nil},
-		TokenIDENTIFIER: {NodeIDENTIFIER, nil, nil, nil, 0, ndTerm, nil},
+		TokenSTRING:     {NodeSTRING, nil, nil, nil, nil, 0, ndTerm, nil},
+		TokenNUMBER:     {NodeNUMBER, nil, nil, nil, nil, 0, ndTerm, nil},
+		TokenIDENTIFIER: {NodeIDENTIFIER, nil, nil, nil, nil, 0, ndTerm, nil},
 
 		// Constructed tokens
 
-		TokenSTATEMENTS: {NodeSTATEMENTS, nil, nil, nil, 0, nil, nil},
-		TokenSEMICOLON:  {"", nil, nil, nil, 0, nil, nil},
+		TokenSTATEMENTS: {NodeSTATEMENTS, nil, nil, nil, nil, 0, nil, nil},
+		TokenSEMICOLON:  {"", nil, nil, nil, nil, 0, nil, nil},
 		/*
 			TokenLIST:       {NodeLIST, nil, nil, nil, 0, nil, nil},
 			TokenMAP:        {NodeMAP, nil, nil, nil, 0, nil, nil},
@@ -41,52 +41,52 @@ func init() {
 
 		// Grouping symbols
 
-		TokenLPAREN: {"", nil, nil, nil, 150, ndInner, nil},
-		TokenRPAREN: {"", nil, nil, nil, 0, nil, nil},
+		TokenLPAREN: {"", nil, nil, nil, nil, 150, ndInner, nil},
+		TokenRPAREN: {"", nil, nil, nil, nil, 0, nil, nil},
 
 		// Separators
 
-		TokenCOMMA: {"", nil, nil, nil, 0, nil, nil},
+		TokenCOMMA: {"", nil, nil, nil, nil, 0, nil, nil},
 
 		// Assignment statement
 
-		TokenASSIGN: {NodeASSIGN, nil, nil, nil, 10, nil, ldInfix},
+		TokenASSIGN: {NodeASSIGN, nil, nil, nil, nil, 10, nil, ldInfix},
 
 		// Simple arithmetic expressions
 
-		TokenPLUS:   {NodePLUS, nil, nil, nil, 110, ndPrefix, ldInfix},
-		TokenMINUS:  {NodeMINUS, nil, nil, nil, 110, ndPrefix, ldInfix},
-		TokenTIMES:  {NodeTIMES, nil, nil, nil, 120, nil, ldInfix},
-		TokenDIV:    {NodeDIV, nil, nil, nil, 120, nil, ldInfix},
-		TokenDIVINT: {NodeDIVINT, nil, nil, nil, 120, nil, ldInfix},
-		TokenMODINT: {NodeMODINT, nil, nil, nil, 120, nil, ldInfix},
+		TokenPLUS:   {NodePLUS, nil, nil, nil, nil, 110, ndPrefix, ldInfix},
+		TokenMINUS:  {NodeMINUS, nil, nil, nil, nil, 110, ndPrefix, ldInfix},
+		TokenTIMES:  {NodeTIMES, nil, nil, nil, nil, 120, nil, ldInfix},
+		TokenDIV:    {NodeDIV, nil, nil, nil, nil, 120, nil, ldInfix},
+		TokenDIVINT: {NodeDIVINT, nil, nil, nil, nil, 120, nil, ldInfix},
+		TokenMODINT: {NodeMODINT, nil, nil, nil, nil, 120, nil, ldInfix},
 
 		// Boolean operators
 
-		TokenOR:  {NodeOR, nil, nil, nil, 30, nil, ldInfix},
-		TokenAND: {NodeAND, nil, nil, nil, 40, nil, ldInfix},
-		TokenNOT: {NodeNOT, nil, nil, nil, 20, ndPrefix, nil},
+		TokenOR:  {NodeOR, nil, nil, nil, nil, 30, nil, ldInfix},
+		TokenAND: {NodeAND, nil, nil, nil, nil, 40, nil, ldInfix},
+		TokenNOT: {NodeNOT, nil, nil, nil, nil, 20, ndPrefix, nil},
 
 		// Condition operators
 
-		TokenLIKE:      {NodeLIKE, nil, nil, nil, 60, nil, ldInfix},
-		TokenIN:        {NodeIN, nil, nil, nil, 60, nil, ldInfix},
-		TokenHASPREFIX: {NodeHASPREFIX, nil, nil, nil, 60, nil, ldInfix},
-		TokenHASSUFFIX: {NodeHASSUFFIX, nil, nil, nil, 60, nil, ldInfix},
-		TokenNOTIN:     {NodeNOTIN, nil, nil, nil, 60, nil, ldInfix},
+		TokenLIKE:      {NodeLIKE, nil, nil, nil, nil, 60, nil, ldInfix},
+		TokenIN:        {NodeIN, nil, nil, nil, nil, 60, nil, ldInfix},
+		TokenHASPREFIX: {NodeHASPREFIX, nil, nil, nil, nil, 60, nil, ldInfix},
+		TokenHASSUFFIX: {NodeHASSUFFIX, nil, nil, nil, nil, 60, nil, ldInfix},
+		TokenNOTIN:     {NodeNOTIN, nil, nil, nil, nil, 60, nil, ldInfix},
 
-		TokenGEQ: {NodeGEQ, nil, nil, nil, 60, nil, ldInfix},
-		TokenLEQ: {NodeLEQ, nil, nil, nil, 60, nil, ldInfix},
-		TokenNEQ: {NodeNEQ, nil, nil, nil, 60, nil, ldInfix},
-		TokenEQ:  {NodeEQ, nil, nil, nil, 60, nil, ldInfix},
-		TokenGT:  {NodeGT, nil, nil, nil, 60, nil, ldInfix},
-		TokenLT:  {NodeLT, nil, nil, nil, 60, nil, ldInfix},
+		TokenGEQ: {NodeGEQ, nil, nil, nil, nil, 60, nil, ldInfix},
+		TokenLEQ: {NodeLEQ, nil, nil, nil, nil, 60, nil, ldInfix},
+		TokenNEQ: {NodeNEQ, nil, nil, nil, nil, 60, nil, ldInfix},
+		TokenEQ:  {NodeEQ, nil, nil, nil, nil, 60, nil, ldInfix},
+		TokenGT:  {NodeGT, nil, nil, nil, nil, 60, nil, ldInfix},
+		TokenLT:  {NodeLT, nil, nil, nil, nil, 60, nil, ldInfix},
 
 		// Constants
 
-		TokenFALSE: {NodeFALSE, nil, nil, nil, 0, ndTerm, nil},
-		TokenTRUE:  {NodeTRUE, nil, nil, nil, 0, ndTerm, nil},
-		TokenNULL:  {NodeNULL, nil, nil, nil, 0, ndTerm, nil},
+		TokenFALSE: {NodeFALSE, nil, nil, nil, nil, 0, ndTerm, nil},
+		TokenTRUE:  {NodeTRUE, nil, nil, nil, nil, 0, ndTerm, nil},
+		TokenNULL:  {NodeNULL, nil, nil, nil, nil, 0, ndTerm, nil},
 	}
 }
 
@@ -232,9 +232,25 @@ func (p *parser) run(rightBinding int) (*ASTNode, error) {
 next retrieves the next lexer token.
 */
 func (p *parser) next() (*ASTNode, error) {
+	var preComments []*LexToken
+	var postComments []*LexToken
 
 	token, more := p.tokens.Next()
 
+	// Skip over pre comment token
+
+	for more && token.ID == TokenPRECOMMENT {
+		preComments = append(preComments, NewLexTokenInstance(token))
+		token, more = p.tokens.Next()
+	}
+
+	// Skip over post comment token
+
+	for more && token.ID == TokenPOSTCOMMENT {
+		postComments = append(postComments, NewLexTokenInstance(token))
+		token, more = p.tokens.Next()
+	}
+
 	if !more {
 
 		// Unexpected end of input - the associated token is an empty error token
@@ -251,7 +267,14 @@ func (p *parser) next() (*ASTNode, error) {
 
 		// We got a normal AST component
 
-		return node.instance(p, &token), nil
+		ret := node.instance(p, &token)
+
+		ret.Meta = append(ret.Meta, preComments...) // Attach pre comments to the next AST node
+		if len(postComments) > 0 && p.node != nil {
+			p.node.Meta = append(p.node.Meta, postComments...) // Attach post comments to the previous AST node
+		}
+
+		return ret, nil
 	}
 
 	return nil, p.newParserError(ErrUnknownToken, fmt.Sprintf("id:%v (%v)", token.ID, token), token)

+ 15 - 11
lang/ecal/parser/parser_main_test.go

@@ -16,17 +16,21 @@ import (
 
 func TestCommentParsing(t *testing.T) {
 
-	// TODO: Comment parsing
-
-	//	input := `/* This
-	//	is  a comment */ a := 1 + 1 # foo bar`
-	/*
-		if _, err := UnitTestParse("mytest", input); err.Error() !=
-			"Parse error in mytest: Lexical error (invalid syntax while parsing string) (Line:1 Pos:1)" {
-			t.Error(err)
-			return
-		}
-	*/
+	// Comment parsing without statements
+
+	input := `/* This is  a comment*/ a := 1 + 1 # foo bar`
+	expectedOutput := `
+:=
+  identifier: a #  This is  a comment
+  plus
+    number: 1
+    number: 1 #  foo bar
+`[1:]
+
+	if res, err := UnitTestParse("mytest", input); err != nil || fmt.Sprint(res) != expectedOutput {
+		t.Error("Unexpected parser output:\n", res, "expected was:\n", expectedOutput, "Error:", err)
+		return
+	}
 }
 
 func TestSimpleExpressionParsing(t *testing.T) {

+ 3 - 1
lang/ecal/parser/prettyprinter.go

@@ -31,7 +31,6 @@ var bracketPrecedenceMap map[string]bool
 func init() {
 	prettyPrinterMap = map[string]*template.Template{
 
-		NodeCOMMENT:    template.Must(template.New(NodeTRUE).Parse("true")),
 		NodeSTRING:     template.Must(template.New(NodeTRUE).Parse("{{.qval}}")),
 		NodeNUMBER:     template.Must(template.New(NodeTRUE).Parse("{{.val}}")),
 		NodeIDENTIFIER: template.Must(template.New(NodeTRUE).Parse("{{.val}}")),
@@ -46,6 +45,9 @@ func init() {
 
 			NodeASSIGN = ":="
 		*/
+
+		NodeASSIGN + "_2": template.Must(template.New(NodeMINUS).Parse("{{.c1}} := {{.c2}}")),
+
 		// Arithmetic operators
 
 		NodePLUS + "_1":   template.Must(template.New(NodeMINUS).Parse("+{{.c1}}")),