451 lines
		
	
	
		
			9.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			451 lines
		
	
	
		
			9.1 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
package govaluate
 | 
						|
 | 
						|
import (
 | 
						|
	"bytes"
 | 
						|
	"errors"
 | 
						|
	"fmt"
 | 
						|
	"regexp"
 | 
						|
	"strconv"
 | 
						|
	"time"
 | 
						|
	"unicode"
 | 
						|
)
 | 
						|
 | 
						|
func parseTokens(expression string, functions map[string]ExpressionFunction) ([]ExpressionToken, error) {
 | 
						|
 | 
						|
	var ret []ExpressionToken
 | 
						|
	var token ExpressionToken
 | 
						|
	var stream *lexerStream
 | 
						|
	var state lexerState
 | 
						|
	var err error
 | 
						|
	var found bool
 | 
						|
 | 
						|
	stream = newLexerStream(expression)
 | 
						|
	state = validLexerStates[0]
 | 
						|
 | 
						|
	for stream.canRead() {
 | 
						|
 | 
						|
		token, err, found = readToken(stream, state, functions)
 | 
						|
 | 
						|
		if err != nil {
 | 
						|
			return ret, err
 | 
						|
		}
 | 
						|
 | 
						|
		if !found {
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		state, err = getLexerStateForToken(token.Kind)
 | 
						|
		if err != nil {
 | 
						|
			return ret, err
 | 
						|
		}
 | 
						|
 | 
						|
		// append this valid token
 | 
						|
		ret = append(ret, token)
 | 
						|
	}
 | 
						|
 | 
						|
	err = checkBalance(ret)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
 | 
						|
	return ret, nil
 | 
						|
}
 | 
						|
 | 
						|
func readToken(stream *lexerStream, state lexerState, functions map[string]ExpressionFunction) (ExpressionToken, error, bool) {
 | 
						|
 | 
						|
	var function ExpressionFunction
 | 
						|
	var ret ExpressionToken
 | 
						|
	var tokenValue interface{}
 | 
						|
	var tokenTime time.Time
 | 
						|
	var tokenString string
 | 
						|
	var kind TokenKind
 | 
						|
	var character rune
 | 
						|
	var found bool
 | 
						|
	var completed bool
 | 
						|
	var err error
 | 
						|
 | 
						|
	// numeric is 0-9, or .
 | 
						|
	// string starts with '
 | 
						|
	// variable is alphanumeric, always starts with a letter
 | 
						|
	// bracket always means variable
 | 
						|
	// symbols are anything non-alphanumeric
 | 
						|
	// all others read into a buffer until they reach the end of the stream
 | 
						|
	for stream.canRead() {
 | 
						|
 | 
						|
		character = stream.readCharacter()
 | 
						|
 | 
						|
		if unicode.IsSpace(character) {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		kind = UNKNOWN
 | 
						|
 | 
						|
		// numeric constant
 | 
						|
		if isNumeric(character) {
 | 
						|
 | 
						|
			tokenString = readTokenUntilFalse(stream, isNumeric)
 | 
						|
			tokenValue, err = strconv.ParseFloat(tokenString, 64)
 | 
						|
 | 
						|
			if err != nil {
 | 
						|
				errorMsg := fmt.Sprintf("Unable to parse numeric value '%v' to float64\n", tokenString)
 | 
						|
				return ExpressionToken{}, errors.New(errorMsg), false
 | 
						|
			}
 | 
						|
			kind = NUMERIC
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		// comma, separator
 | 
						|
		if character == ',' {
 | 
						|
 | 
						|
			tokenValue = ","
 | 
						|
			kind = SEPARATOR
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		// escaped variable
 | 
						|
		if character == '[' {
 | 
						|
 | 
						|
			tokenValue, completed = readUntilFalse(stream, true, false, true, isNotClosingBracket)
 | 
						|
			kind = VARIABLE
 | 
						|
 | 
						|
			if !completed {
 | 
						|
				return ExpressionToken{}, errors.New("Unclosed parameter bracket"), false
 | 
						|
			}
 | 
						|
 | 
						|
			// above method normally rewinds us to the closing bracket, which we want to skip.
 | 
						|
			stream.rewind(-1)
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		// regular variable - or function?
 | 
						|
		if unicode.IsLetter(character) {
 | 
						|
 | 
						|
			tokenString = readTokenUntilFalse(stream, isVariableName)
 | 
						|
 | 
						|
			tokenValue = tokenString
 | 
						|
			kind = VARIABLE
 | 
						|
 | 
						|
			// boolean?
 | 
						|
			if tokenValue == "true" {
 | 
						|
 | 
						|
				kind = BOOLEAN
 | 
						|
				tokenValue = true
 | 
						|
			} else {
 | 
						|
 | 
						|
				if tokenValue == "false" {
 | 
						|
 | 
						|
					kind = BOOLEAN
 | 
						|
					tokenValue = false
 | 
						|
				}
 | 
						|
			}
 | 
						|
 | 
						|
			// textual operator?
 | 
						|
			if tokenValue == "in" || tokenValue == "IN" {
 | 
						|
 | 
						|
				// force lower case for consistency
 | 
						|
				tokenValue = "in"
 | 
						|
				kind = COMPARATOR
 | 
						|
			}
 | 
						|
 | 
						|
			// function?
 | 
						|
			function, found = functions[tokenString]
 | 
						|
			if found {
 | 
						|
				kind = FUNCTION
 | 
						|
				tokenValue = function
 | 
						|
			}
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		if !isNotQuote(character) {
 | 
						|
			tokenValue, completed = readUntilFalse(stream, true, false, true, isNotQuote)
 | 
						|
 | 
						|
			if !completed {
 | 
						|
				return ExpressionToken{}, errors.New("Unclosed string literal"), false
 | 
						|
			}
 | 
						|
 | 
						|
			// advance the stream one position, since reading until false assumes the terminator is a real token
 | 
						|
			stream.rewind(-1)
 | 
						|
 | 
						|
			// check to see if this can be parsed as a time.
 | 
						|
			tokenTime, found = tryParseTime(tokenValue.(string))
 | 
						|
			if found {
 | 
						|
				kind = TIME
 | 
						|
				tokenValue = tokenTime
 | 
						|
			} else {
 | 
						|
				kind = STRING
 | 
						|
			}
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		if character == '(' {
 | 
						|
			tokenValue = character
 | 
						|
			kind = CLAUSE
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		if character == ')' {
 | 
						|
			tokenValue = character
 | 
						|
			kind = CLAUSE_CLOSE
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		// must be a known symbol
 | 
						|
		tokenString = readTokenUntilFalse(stream, isNotAlphanumeric)
 | 
						|
		tokenValue = tokenString
 | 
						|
 | 
						|
		// quick hack for the case where "-" can mean "prefixed negation" or "minus", which are used
 | 
						|
		// very differently.
 | 
						|
		if state.canTransitionTo(PREFIX) {
 | 
						|
			_, found = prefixSymbols[tokenString]
 | 
						|
			if found {
 | 
						|
 | 
						|
				kind = PREFIX
 | 
						|
				break
 | 
						|
			}
 | 
						|
		}
 | 
						|
		_, found = modifierSymbols[tokenString]
 | 
						|
		if found {
 | 
						|
 | 
						|
			kind = MODIFIER
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		_, found = logicalSymbols[tokenString]
 | 
						|
		if found {
 | 
						|
 | 
						|
			kind = LOGICALOP
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		_, found = comparatorSymbols[tokenString]
 | 
						|
		if found {
 | 
						|
 | 
						|
			kind = COMPARATOR
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		_, found = ternarySymbols[tokenString]
 | 
						|
		if found {
 | 
						|
 | 
						|
			kind = TERNARY
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		errorMessage := fmt.Sprintf("Invalid token: '%s'", tokenString)
 | 
						|
		return ret, errors.New(errorMessage), false
 | 
						|
	}
 | 
						|
 | 
						|
	ret.Kind = kind
 | 
						|
	ret.Value = tokenValue
 | 
						|
 | 
						|
	return ret, nil, (kind != UNKNOWN)
 | 
						|
}
 | 
						|
 | 
						|
func readTokenUntilFalse(stream *lexerStream, condition func(rune) bool) string {
 | 
						|
 | 
						|
	var ret string
 | 
						|
 | 
						|
	stream.rewind(1)
 | 
						|
	ret, _ = readUntilFalse(stream, false, true, true, condition)
 | 
						|
	return ret
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
	Returns the string that was read until the given [condition] was false, or whitespace was broken.
 | 
						|
	Returns false if the stream ended before whitespace was broken or condition was met.
 | 
						|
*/
 | 
						|
func readUntilFalse(stream *lexerStream, includeWhitespace bool, breakWhitespace bool, allowEscaping bool, condition func(rune) bool) (string, bool) {
 | 
						|
 | 
						|
	var tokenBuffer bytes.Buffer
 | 
						|
	var character rune
 | 
						|
	var conditioned bool
 | 
						|
 | 
						|
	conditioned = false
 | 
						|
 | 
						|
	for stream.canRead() {
 | 
						|
 | 
						|
		character = stream.readCharacter()
 | 
						|
 | 
						|
		// Use backslashes to escape anything
 | 
						|
		if allowEscaping && character == '\\' {
 | 
						|
 | 
						|
			character = stream.readCharacter()
 | 
						|
			tokenBuffer.WriteString(string(character))
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		if unicode.IsSpace(character) {
 | 
						|
 | 
						|
			if breakWhitespace && tokenBuffer.Len() > 0 {
 | 
						|
				conditioned = true
 | 
						|
				break
 | 
						|
			}
 | 
						|
			if !includeWhitespace {
 | 
						|
				continue
 | 
						|
			}
 | 
						|
		}
 | 
						|
 | 
						|
		if condition(character) {
 | 
						|
			tokenBuffer.WriteString(string(character))
 | 
						|
		} else {
 | 
						|
			conditioned = true
 | 
						|
			stream.rewind(1)
 | 
						|
			break
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	return tokenBuffer.String(), conditioned
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
	Checks to see if any optimizations can be performed on the given [tokens], which form a complete, valid expression.
 | 
						|
	The returns slice will represent the optimized (or unmodified) list of tokens to use.
 | 
						|
*/
 | 
						|
func optimizeTokens(tokens []ExpressionToken) ([]ExpressionToken, error) {
 | 
						|
 | 
						|
	var token ExpressionToken
 | 
						|
	var symbol OperatorSymbol
 | 
						|
	var err error
 | 
						|
	var index int
 | 
						|
 | 
						|
	for index, token = range tokens {
 | 
						|
 | 
						|
		// if we find a regex operator, and the right-hand value is a constant, precompile and replace with a pattern.
 | 
						|
		if token.Kind != COMPARATOR {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		symbol = comparatorSymbols[token.Value.(string)]
 | 
						|
		if symbol != REQ && symbol != NREQ {
 | 
						|
			continue
 | 
						|
		}
 | 
						|
 | 
						|
		index++
 | 
						|
		token = tokens[index]
 | 
						|
		if token.Kind == STRING {
 | 
						|
 | 
						|
			token.Kind = PATTERN
 | 
						|
			token.Value, err = regexp.Compile(token.Value.(string))
 | 
						|
 | 
						|
			if err != nil {
 | 
						|
				return tokens, err
 | 
						|
			}
 | 
						|
 | 
						|
			tokens[index] = token
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return tokens, nil
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
	Checks the balance of tokens which have multiple parts, such as parenthesis.
 | 
						|
*/
 | 
						|
func checkBalance(tokens []ExpressionToken) error {
 | 
						|
 | 
						|
	var stream *tokenStream
 | 
						|
	var token ExpressionToken
 | 
						|
	var parens int
 | 
						|
 | 
						|
	stream = newTokenStream(tokens)
 | 
						|
 | 
						|
	for stream.hasNext() {
 | 
						|
 | 
						|
		token = stream.next()
 | 
						|
		if token.Kind == CLAUSE {
 | 
						|
			parens++
 | 
						|
			continue
 | 
						|
		}
 | 
						|
		if token.Kind == CLAUSE_CLOSE {
 | 
						|
			parens--
 | 
						|
			continue
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	if parens != 0 {
 | 
						|
		return errors.New("Unbalanced parenthesis")
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func isNumeric(character rune) bool {
 | 
						|
 | 
						|
	return unicode.IsDigit(character) || character == '.'
 | 
						|
}
 | 
						|
 | 
						|
func isNotQuote(character rune) bool {
 | 
						|
 | 
						|
	return character != '\'' && character != '"'
 | 
						|
}
 | 
						|
 | 
						|
func isNotAlphanumeric(character rune) bool {
 | 
						|
 | 
						|
	return !(unicode.IsDigit(character) ||
 | 
						|
		unicode.IsLetter(character) ||
 | 
						|
		character == '(' ||
 | 
						|
		character == ')' ||
 | 
						|
		!isNotQuote(character))
 | 
						|
}
 | 
						|
 | 
						|
func isVariableName(character rune) bool {
 | 
						|
 | 
						|
	return unicode.IsLetter(character) ||
 | 
						|
		unicode.IsDigit(character) ||
 | 
						|
		character == '_'
 | 
						|
}
 | 
						|
 | 
						|
func isNotClosingBracket(character rune) bool {
 | 
						|
 | 
						|
	return character != ']'
 | 
						|
}
 | 
						|
 | 
						|
/*
 | 
						|
	Attempts to parse the [candidate] as a Time.
 | 
						|
	Tries a series of standardized date formats, returns the Time if one applies,
 | 
						|
	otherwise returns false through the second return.
 | 
						|
*/
 | 
						|
func tryParseTime(candidate string) (time.Time, bool) {
 | 
						|
 | 
						|
	var ret time.Time
 | 
						|
	var found bool
 | 
						|
 | 
						|
	timeFormats := [...]string{
 | 
						|
		time.ANSIC,
 | 
						|
		time.UnixDate,
 | 
						|
		time.RubyDate,
 | 
						|
		time.Kitchen,
 | 
						|
		time.RFC3339,
 | 
						|
		time.RFC3339Nano,
 | 
						|
		"2006-01-02",                         // RFC 3339
 | 
						|
		"2006-01-02 15:04",                   // RFC 3339 with minutes
 | 
						|
		"2006-01-02 15:04:05",                // RFC 3339 with seconds
 | 
						|
		"2006-01-02 15:04:05-07:00",          // RFC 3339 with seconds and timezone
 | 
						|
		"2006-01-02T15Z0700",                 // ISO8601 with hour
 | 
						|
		"2006-01-02T15:04Z0700",              // ISO8601 with minutes
 | 
						|
		"2006-01-02T15:04:05Z0700",           // ISO8601 with seconds
 | 
						|
		"2006-01-02T15:04:05.999999999Z0700", // ISO8601 with nanoseconds
 | 
						|
	}
 | 
						|
 | 
						|
	for _, format := range timeFormats {
 | 
						|
 | 
						|
		ret, found = tryParseExactTime(candidate, format)
 | 
						|
		if found {
 | 
						|
			return ret, true
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	return time.Now(), false
 | 
						|
}
 | 
						|
 | 
						|
func tryParseExactTime(candidate string, format string) (time.Time, bool) {
 | 
						|
 | 
						|
	var ret time.Time
 | 
						|
	var err error
 | 
						|
 | 
						|
	ret, err = time.ParseInLocation(format, candidate, time.Local)
 | 
						|
	if err != nil {
 | 
						|
		return time.Now(), false
 | 
						|
	}
 | 
						|
 | 
						|
	return ret, true
 | 
						|
}
 |