using System; using System.Collections.Generic; using System.Linq; namespace NTERA.Interpreter.Compiler { public class Parser { protected Lexer Lexer { get; } protected FunctionDefinition SelfDefinition { get; } protected IList FunctionDefinitions { get; } protected VariableDictionary GlobalVariables { get; } protected VariableDictionary LocalVariables { get; } protected IList StringStatements { get; } protected IDictionary>> CsvDefinitions { get; } protected IEnumerator Enumerator { get; } protected bool hasPeeked = false; protected Token peekedToken = Token.Unknown; protected Token GetNextToken(bool peek = false) { if (peek && hasPeeked) return peekedToken; if (!hasPeeked) Enumerator.MoveNext(); peekedToken = Enumerator.Current; hasPeeked = peek; return Enumerator.Current; } protected Marker CurrentPosition => new Marker(Lexer.TokenMarker.Pointer + SelfDefinition.Position.Pointer, Lexer.TokenMarker.Line + SelfDefinition.Position.Line - 1, Lexer.TokenMarker.Column); public Parser(string input, FunctionDefinition selfDefinition, IList functionDefinitions, VariableDictionary globalVariables, VariableDictionary localVariables, IList stringStatements, IDictionary>> csvDefinitions) { Lexer = new Lexer(input); Enumerator = Lexer.GetEnumerator(); SelfDefinition = selfDefinition; FunctionDefinitions = functionDefinitions; GlobalVariables = globalVariables; LocalVariables = localVariables; StringStatements = stringStatements; CsvDefinitions = csvDefinitions; } public IEnumerable Parse(out List errors) { errors = new List(); List nodes = new List(); using (Enumerator) { do { var node = ParseLine(out var error); if (error != null) { errors.Add(error); nodes.Add(new ExecutionNode { Type = "error", Metadata = { ["message"] = error.ErrorMessage, ["symbol"] = error.SymbolMarker.ToString() }, Symbol = error.SymbolMarker }); //resynchronize to a new line while (Enumerator.MoveNext() && Enumerator.Current != Token.NewLine && Enumerator.Current != Token.EOF) { } } else if (node != null) { nodes.Add(node); } hasPeeked = false; } while (Enumerator.MoveNext()); } return nodes; } protected ExecutionNode ParseLine(out ParserError error) { error = null; switch (Enumerator.Current) { case Token.Identifer: if (GlobalVariables.ContainsKey(Lexer.Identifer) || LocalVariables.ContainsKey(Lexer.Identifer)) { string variableName = Lexer.Identifer; bool isGlobal = GlobalVariables.ContainsKey(variableName); var node = new ExecutionNode { Type = "assignment", Symbol = CurrentPosition }; var variable = GetVariable(out error); if (error != null) return null; if (GetNextToken() != Token.Equal && Enumerator.Current != Token.Increment && Enumerator.Current != Token.Decrement && !Enumerator.Current.IsArithmetic()) { error = new ParserError($"Unexpected token, expecting assignment: {Enumerator.Current}", CurrentPosition); return null; } ExecutionNode value = null; if (Enumerator.Current == Token.Increment) { value = OperateNodes(variable, CreateConstant(1), Token.Plus); } else if (Enumerator.Current == Token.Decrement) { value = OperateNodes(variable, CreateConstant(1), Token.Minus); } else if (Enumerator.Current != Token.Equal) { Token arithmeticToken = Enumerator.Current; if (GetNextToken() != Token.Equal) { error = new ParserError($"Unexpected token, expecting assignment: {Enumerator.Current}", CurrentPosition); return null; } ExecutionNode newValue = Expression(out error); value = OperateNodes(variable, newValue, arithmeticToken); } else { var type = isGlobal ? GlobalVariables[variableName].Type : LocalVariables[variableName].Type; value = type == ValueType.String ? StringExpression(out error) : Expression(out error); } if (error != null) return null; node.SubNodes = new[] { variable, new ExecutionNode { Type = "value", SubNodes = new[] { value } } }; return node; } else if (Lexer.Identifer == "CASE") { var node = new ExecutionNode { Type = "case", Symbol = CurrentPosition }; var value = Expression(out error); if (error != null) return null; if (Enumerator.Current == Token.NewLine || Enumerator.Current == Token.EOF) { node.Metadata["casetype"] = "value"; node.SubNodes = new[] { value }; return node; } if (Enumerator.Current == Token.Identifer) { if (Lexer.Identifer == "TO") { var value2 = Expression(out error); if (error != null) return null; node.Metadata["casetype"] = "to"; node.SubNodes = new[] { value, value2 }; return node; } } error = new ParserError($"Unexpected token: {Enumerator.Current}", CurrentPosition); return null; } else //treat as statement { string statementName = Lexer.Identifer; var node = new ExecutionNode { Type = "statement", Metadata = { ["name"] = statementName }, Symbol = CurrentPosition }; List parameters = new List(); if (StringStatements.Contains(statementName)) { var value = StringExpression(out error); if (error != null) return null; if (value != null) parameters.Add(value); node.SubNodes = parameters.ToArray(); return node; } if (GetNextToken(true) == Token.NewLine || GetNextToken(true) == Token.EOF) { return node; } else if (GetNextToken(true) == Token.Colon || GetNextToken(true) == Token.Equal) { error = new ParserError($"Undeclared variable: {statementName}", node.Symbol); return null; } while (Enumerator.Current != Token.NewLine && Enumerator.Current != Token.EOF) { parameters.Add(Expression(out error)); if (error != null) { error = new ParserError($"{error.ErrorMessage} (statement [{statementName}])", error.SymbolMarker); return null; } if (Enumerator.Current != Token.Comma && Enumerator.Current != Token.NewLine && Enumerator.Current != Token.EOF) { error = new ParserError($"Unexpected token: {Enumerator.Current}", CurrentPosition); return null; } } node.SubNodes = parameters.ToArray(); return node; } case Token.Function: case Token.Sharp: while (Enumerator.MoveNext() && Enumerator.Current != Token.NewLine && Enumerator.Current != Token.EOF) { } return null; case Token.NewLine: case Token.EOF: return null; default: error = new ParserError($"Unexpected token: {Enumerator.Current}", CurrentPosition); return null; } } protected ExecutionNode GetVariable(out ParserError error) { string variableName = Lexer.Identifer; var node = new ExecutionNode { Type = "variable", Metadata = { ["name"] = variableName }, Symbol = CurrentPosition }; List indices = new List(); error = null; while (GetNextToken(true) == Token.Colon) { GetNextToken(); var token = GetNextToken(); if (token == Token.LParen) { indices.Add(Expression(out error)); if (error != null) return null; if (Enumerator.Current != Token.RParen) { error = new ParserError("Invalid expression - Expected right bracket", CurrentPosition); return null; } } else if (token == Token.Value) { indices.Add(CreateConstant(Lexer.Value)); } else if (token == Token.Identifer) { IList> csvTable = CsvDefinitions .Where(x => x.Key.IndexOf(variableName, StringComparison.OrdinalIgnoreCase) >= 0) .OrderBy(x => x.Key.Equals(variableName, StringComparison.OrdinalIgnoreCase) ? 1 : 2) .FirstOrDefault().Value; IList alias = csvTable?.FirstOrDefault(x => x.Count > 1 && x[1] == Lexer.Identifer); if (alias != null) { indices.Add(CreateConstant(int.Parse(alias[0]))); continue; } if (GlobalVariables.ContainsKey(Lexer.Identifer) || LocalVariables.ContainsKey(Lexer.Identifer)) { var subNode = new ExecutionNode { Type = "variable", Metadata = { ["name"] = Lexer.Identifer }, Symbol = CurrentPosition }; indices.Add(subNode); continue; } if (FunctionDefinitions.Any(x => x.Name == Lexer.Identifer)) { indices.Add(Expression(out error)); if (error != null) return null; continue; } error = new ParserError($"Unknown identifier: {Lexer.Identifer}", CurrentPosition); return null; } } if (indices.Count > 0) { ExecutionNode indexNode = new ExecutionNode { Type = "index", SubNodes = indices.ToArray() }; node.SubNodes = new[] { indexNode }; } return node; } protected ExecutionNode GetFunction(out ParserError error) { error = null; Token token; Marker symbolMarker = CurrentPosition; List parameters = new List(); string functionName = Lexer.Identifer; if (GetNextToken() != Token.LParen) { error = new ParserError($"Unexpected token: {Enumerator.Current}", CurrentPosition); return null; } while ((token = GetNextToken(true)) == Token.Identifer || token == Token.Value || token.IsUnary()) { parameters.Add(Expression(out error)); if (error != null) return null; if (Enumerator.Current != Token.Comma && Enumerator.Current != Token.RParen) { error = new ParserError($"Unexpected token: {Enumerator.Current}", CurrentPosition); return null; } if (Enumerator.Current == Token.RParen) break; } if (Enumerator.Current != Token.RParen) { error = new ParserError($"Unexpected token: {Enumerator.Current}", CurrentPosition); return null; } var functionDefinition = FunctionDefinitions.FirstOrDefault(x => x.Name == functionName && x.Parameters.Length >= parameters.Count); if (functionDefinition == null) { error = new ParserError($"No matching method with same amount of parameters: {functionName} ({parameters.Count})", CurrentPosition); return null; } return CallMethod(functionName, symbolMarker, parameters.ToArray()); } private static readonly Dictionary OrderOfOps = new Dictionary { { Token.Or, 0 }, { Token.And, 0 }, { Token.Not, 0 }, { Token.Equal, 1 }, { Token.NotEqual, 1 }, { Token.Less, 1 }, { Token.More, 1 }, { Token.LessEqual, 1 }, { Token.MoreEqual, 1 }, { Token.Plus, 2 }, { Token.Minus, 2 }, { Token.Asterisk, 3 }, { Token.Slash, 3 }, { Token.Modulo, 3 }, { Token.Caret, 4 } }; protected ExecutionNode Expression(out ParserError error, bool useModulo = true) { error = null; var operators = new Stack(); var operands = new Stack(); Token token; void ProcessOperation(out ParserError localError) { localError = null; Token op = operators.Pop(); if (op.IsUnary() && operands.Count >= 1) { var operand = operands.Pop(); operands.Push(new ExecutionNode { Type = "operation", Metadata = { ["type"] = GetOperationName(op), ["unary"] = "true" }, SubNodes = new[] { operand } }); } else if (operands.Count >= 2) { ExecutionNode right = operands.Pop(); ExecutionNode left = operands.Pop(); operands.Push(new ExecutionNode { Type = "operation", Metadata = { ["type"] = GetOperationName(op), ["unary"] = "false" }, SubNodes = new[] { left, right } }); } else localError = new ParserError("Invalid expression - not enough operands", CurrentPosition); } void AttemptUnaryConversion(out ParserError localError) { localError = null; while (operators.Count > 0 && operators.Peek().IsUnary()) { ProcessOperation(out localError); if (localError != null) return; } } while ((token = GetNextToken()) != Token.NewLine && token != Token.EOF && token != Token.Comma && token != Token.Colon && token != Token.Format && token != Token.CloseBracket && (useModulo || token != Token.Modulo)) { if (token == Token.Value) { operands.Push(CreateConstant(Lexer.Value)); AttemptUnaryConversion(out error); if (error != null) return null; } else if (token == Token.Identifer) { if (GlobalVariables.ContainsKey(Lexer.Identifer) || LocalVariables.ContainsKey(Lexer.Identifer)) { operands.Push(GetVariable(out error)); if (error != null) return null; } else if (FunctionDefinitions.Any(x => x.Name == Lexer.Identifer)) { operands.Push(GetFunction(out error)); if (error != null) return null; } else { error = new ParserError($"Unknown identifier: {Lexer.Identifer}", CurrentPosition); return null; } } else if (token.IsArithmetic()) { if (token.IsUnary()) { operators.Push(token); continue; } if (!operands.Any() && !token.IsUnary()) { error = new ParserError($"Invalid unary operator: {token}", CurrentPosition); return null; } while (operators.Any() && OrderOfOps[token] <= OrderOfOps[operators.Peek()]) { ProcessOperation(out error); if (error != null) return null; } operators.Push(token); } else if (token == Token.LParen) { operands.Push(Expression(out var localError)); if (localError != null) { error = localError; return null; } } else if (token == Token.RParen) { break; } else { error = new ParserError($"Unexpected token: {token}", CurrentPosition); return null; } } while (operators.Any()) { ProcessOperation(out error); if (error != null) return null; } return operands.Pop(); } protected ExecutionNode StringExpression(out ParserError error) { error = null; ExecutionNode value = null; Lexer.Type = LexerType.String; while (Enumerator.MoveNext() && (Enumerator.Current == Token.Value || Enumerator.Current == Token.Format || Enumerator.Current == Token.OpenBracket)) { if (Enumerator.Current == Token.Value) { value = value == null ? CreateConstant(Lexer.Value) : OperateNodes(value, CreateConstant(Lexer.Value), Token.Plus); } else { List formatParams = new List(); Marker symbolMarker = CurrentPosition; bool isSpecialFormat = Enumerator.Current == Token.OpenBracket; do { Lexer.Type = LexerType.Both; var tempValue = Expression(out error, isSpecialFormat); if (error != null) return null; formatParams.Add(tempValue); } while (Enumerator.Current == Token.Comma); var formattedValue = CallMethod("_FORMAT", symbolMarker, formatParams.ToArray()); value = value == null ? formattedValue : OperateNodes(value, formattedValue, Token.Plus); Lexer.Type = LexerType.String; } } Lexer.Type = LexerType.Both; return value; } private static readonly Dictionary OperationNames = new Dictionary { [Token.Plus] = "add", [Token.Asterisk] = "multiply", [Token.Minus] = "subtract", [Token.Slash] = "divide", }; private static string GetOperationName(Token token) { return OperationNames.TryGetValue(token, out string result) ? result : token.ToString(); } private ExecutionNode CreateConstant(Value value) { return new ExecutionNode { Type = "constant", Metadata = { ["type"] = value.Type.ToString(), ["value"] = value.ToString() }, Symbol = CurrentPosition }; } private static ExecutionNode OperateNodes(ExecutionNode left, ExecutionNode right, Token token) { return new ExecutionNode { Type = "operation", Metadata = { ["type"] = GetOperationName(token) }, SubNodes = new[] { left, right } }; } private static ExecutionNode CallMethod(string methodName, Marker symbolMarker, params ExecutionNode[] parameters) { return new ExecutionNode { Type = "call", Metadata = { ["target"] = methodName }, Symbol = symbolMarker, SubNodes = new[] { new ExecutionNode { Type = "parameters", SubNodes = parameters.ToArray() } } }; } } }