krotik
/
common


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
							/*
 * Public Domain Software
 *
 * I (Matthias Ladkau) am the author of the source code in this file.
 * I have placed the source code in this file in the public domain.
 *
 * For further information see: http://creativecommons.org/publicdomain/zero/1.0/
 */

/*
Package parser contains a ECAL parser.

Lexer for Source Text

Lex() is a lexer function to convert a given search query into a list of tokens.

Based on a talk by Rob Pike: Lexical Scanning in Go

https://www.youtube.com/watch?v=HxaD_trXwRE

The lexer's output is pushed into a channel which is consumed by the parser.
This design enables the concurrent processing of the input text by lexer and
parser.

Parser

Parse() is a parser which produces a parse tree from a given set of lexer tokens.

Based on an article by Douglas Crockford: Top Down Operator Precedence

http://crockford.com/javascript/tdop/tdop.html

which is based on the ideas of Vaughan Pratt and his paper: Top Down Operator Precedence

http://portal.acm.org/citation.cfm?id=512931
https://tdop.github.io/

ParseWithRuntime() parses a given input and decorates the resulting parse tree
with runtime components which can be used to interpret the parsed query.
*/
package parser

/*
LexTokenID represents a unique lexer token ID
*/
type LexTokenID int

/*
Available lexer token types
*/
const (
	TokenError LexTokenID = iota // Lexing error token with a message as val
	TokenEOF                     // End-of-file token
	TokenAny                     // Unspecified token (used when building an AST from a Go map structure)

	TokenCOMMENT    // Comment
	TokenSTRING     // String constant
	TokenNUMBER     // Number constant
	TokenIDENTIFIER // Idendifier

	// Constructed tokens which are generated by the parser not the lexer

	TokenSTATEMENTS // A code block
	TokenLIST       // List value
	TokenMAP        // MAP value
	TokenGUARD      // Guard expressions for conditional statements

	TOKENodeSYMBOLS // Used to separate symbols from other tokens in this list

	// Condition operators

	TokenGEQ
	TokenLEQ
	TokenNEQ
	TokenEQ
	TokenGT
	TokenLT

	// Grouping symbols

	TokenLPAREN
	TokenRPAREN
	TokenLBRACK
	TokenRBRACK
	TokenLBRACE
	TokenRBRACE

	// Separators

	TokenDOT
	TokenCOMMA
	TokenCOLON
	TokenSEMICOLON

	// Arithmetic operators

	TokenPLUS
	TokenMINUS
	TokenTIMES
	TokenDIV
	TokenDIVINT
	TokenMODINT

	// Assignment statement

	TokenASSIGN

	// Data structure access

	TokenACCESS

	// The colon '' has a context specific meaning and is checked by the parser

	TOKENodeKEYWORDS // Used to separate keywords from other tokens in this list

	// Sink definition

	TokenSINK
	TokenKINDMATCH
	TokenSCOPEMATCH
	TokenSTATEMATCH
	TokenPRIORITY
	TokenSUPPRESSES

	// Function definition

	TokenFUNC

	// Boolean operators

	TokenAND
	TokenOR
	TokenNOT

	// Condition operators

	TokenLIKE
	TokenIN
	TokenHASPREFIX
	TokenHASSUFFIX
	TokenNOTIN

	// Constant terminals

	TokenFALSE
	TokenTRUE
	TokenNULL

	// Conditional statements

	TokenIF
	TokenELIF
	TokenELSE

	// Loop statements

	TokenFOR
	TokenBREAK
	TokenCONTINUE
)

/*
Available parser AST node types
*/
const (
	NodeEOF = "EOF"

	NodeVALUE = "value" // Simple value

	// Constructed tokens

	NodeSTATEMENTS = "statements" // List of statements
	NodeLIST       = "list"       // List value
	NodeMAP        = "map"        // Map value
	NodeGUARD      = "guard"      // Guard expressions for conditional statements

	// Map entries

	NodeMAPENTRY = "entry" // Map entry value

	// Boolean operators

	NodeOR  = "or"
	NodeAND = "and"
	NodeNOT = "not"

	// Condition operators

	NodeLIKE       = "like"
	NodeIN         = "in"
	NodeBEGINSWITH = "beginswith"
	NodeENDSWITH   = "endswith"
	NodeNOTIN      = "notin"

	NodeGEQ = ">="
	NodeLEQ = "<="
	NodeNEQ = "!="
	NodeEQ  = "=="
	NodeGT  = ">"
	NodeLT  = "<"

	// Constants

	NodeTRUE  = "true"
	NodeFALSE = "false"
	NodeNULL  = "null"

	// Arithmetic operators

	NodePLUS   = "plus"
	NodeMINUS  = "minus"
	NodeTIMES  = "times"
	NodeDIV    = "div"
	NodeMODINT = "modint"
	NodeDIVINT = "divint"

	// Assignment statement

	NodeASSIGN = ":="

	// Function call statement

	NodeFUNCCALL = "funccall"

	// Data structure access

	NodeACCESS = "access"

	// Sink definition

	NodeSINK       = "sink"
	NodeKINDMATCH  = "kindmatch"
	NodeSCOPEMATCH = "scopematch"
	NodeSTATEMATCH = "statematch"
	NodePRIORITY   = "priority"
	NodeSUPPRESSES = "suppresses"

	// Block statements

	NodeCOND = "cond"
	NodeLOOP = "loop"

	// Single statements

	NodeBREAK    = "break"
	NodeCONTINUE = "continue"
)