using System; using System.Collections; using System.Collections.Generic; using System.Globalization; using System.Linq; using System.Text; using System.Text.RegularExpressions; namespace NTERA.Interpreter.Compiler { public class Lexer : IEnumerable { private readonly string source; private Marker sourceMarker; private char currentChar; private readonly IEnumerator currentEnumerator; private LexerType _type; public LexerType Type { get => _type; internal set { _type = value; InitTokenDictionaries(); } } public Marker TokenMarker { get; set; } public string Identifer { get; set; } public Value Value { get; set; } public Lexer(string input, LexerType type = LexerType.Both) { Type = type; source = input; sourceMarker = new Marker(-1, 1, 0); currentEnumerator = GetTokens(); currentEnumerator.MoveNext(); } public void GoTo(Marker marker) { sourceMarker = marker; } char GetNextChar(bool peek = false) { if (sourceMarker.Pointer + 1 >= source.Length) { sourceMarker.Pointer = source.Length; return currentChar = (char)0; } if (peek) return currentChar = source[sourceMarker.Pointer + 1]; sourceMarker.Column++; sourceMarker.Pointer++; if ((currentChar = source[sourceMarker.Pointer]) == '\n') { sourceMarker.Column = 0; sourceMarker.Line++; } return currentChar; } private static Dictionary TokenDictionary; private static Dictionary TokenLineDictionary; private Dictionary TokenCharDictionary; private static Dictionary BothModeTokens; private static Dictionary StringModeTokens; private void InitTokenDictionaries() { if (TokenDictionary == null || TokenLineDictionary == null) { TokenDictionary = new Dictionary(StringComparer.InvariantCultureIgnoreCase); TokenLineDictionary = new Dictionary(StringComparer.InvariantCultureIgnoreCase); foreach (Token token in Enum.GetValues(typeof(Token))) { foreach (var attribute in Utility.GetEnumAttributes(token)) { if (attribute.IsLineKeyword) TokenLineDictionary[attribute.Keyword] = token; else TokenDictionary[attribute.Keyword] = token; } } } if (BothModeTokens == null || StringModeTokens == null) { BothModeTokens = new Dictionary(); StringModeTokens = new Dictionary(); foreach (Token token in Enum.GetValues(typeof(Token))) { foreach (var attribute in Utility.GetEnumAttributes(token)) { if ((attribute.LexerContext & LexerType.String) > 0) StringModeTokens[attribute.Character] = token; BothModeTokens[attribute.Character] = token; } } } TokenCharDictionary = Type == LexerType.String ? StringModeTokens : BothModeTokens; } private static Regex PowRegex = new Regex(@"(\d+)p(\d+)"); private static bool IsWhitespace(char c) { return char.IsWhiteSpace(c) && c != '\n'; } private static bool IsEndOfLine(char c) { return c == '\n' || c == '\r' || c == '\0'; } private Token DetermineToken(char c) { if (TokenCharDictionary.TryGetValue(c, out Token charToken)) return charToken; switch (c) { case ';': //semicolon is comment while (currentChar != '\n') { if (currentChar == '\0') return Token.EOF; GetNextChar(); } return Token.NewLine; case '[': const string SkipStart = "[SKIPSTART]"; const string SkipEnd = "[SKIPEND]"; if (sourceMarker.Column > 1 || source.Substring(sourceMarker.Pointer, SkipStart.Length) != SkipStart) return Token.Unknown; while (GetNextChar() != '\0') { if (currentChar == '[' && source.Substring(sourceMarker.Pointer, SkipEnd.Length) == SkipEnd) { while (true) { switch (GetNextChar()) { case '\n': return Token.NewLine; case '\0': return Token.EOF; } } } } return Token.EOF; case '%': return Type == LexerType.String ? Token.Format : Token.Modulo; case '<': if (!Type.HasFlag(LexerType.Real)) break; if (GetNextChar(true) == '>') { GetNextChar(); return Token.NotEqual; } else if (GetNextChar(true) == '=') { GetNextChar(); return Token.LessEqual; } else return Token.Less; case '>': if (!Type.HasFlag(LexerType.Real)) break; if (GetNextChar(true) == '=') { GetNextChar(); return Token.MoreEqual; } else return Token.More; case '+': if (Type == LexerType.String) return Token.Unknown; if ((source[sourceMarker.Pointer] == '+' && source[sourceMarker.Pointer + 1] == '+') || source[sourceMarker.Pointer + 2] == '+') { GetNextChar(); return Token.Increment; } else return Token.Plus; case '-': if (Type == LexerType.String) return Token.Unknown; if ((source[sourceMarker.Pointer] == '-' && source[sourceMarker.Pointer + 1] == '-') || source[sourceMarker.Pointer + 2] == '-') { GetNextChar(); return Token.Decrement; } else return Token.Minus; case '=': if (Type == LexerType.String) return Token.Unknown; if ((source[sourceMarker.Pointer] == '=' && source[sourceMarker.Pointer + 1] == '=') || source[sourceMarker.Pointer + 2] == '=') GetNextChar(); return Token.Equal; case '&': if ((source[sourceMarker.Pointer] == '&' && source[sourceMarker.Pointer + 1] == '&') || source[sourceMarker.Pointer + 2] == '&') GetNextChar(); return Token.And; case '|': if ((source[sourceMarker.Pointer] == '|' && source[sourceMarker.Pointer + 1] == '|') || source[sourceMarker.Pointer + 2] == '|') GetNextChar(); return Token.Or; case '"': string str = ""; while (GetNextChar() != '"') { if (currentChar == '\\') { switch (char.ToLower(GetNextChar())) { case 'n': str += '\n'; break; case 't': str += '\t'; break; case '\\': str += '\\'; break; case '"': str += '"'; break; } } else if (currentChar == '\0') throw new Exception("Unexpected end of file"); else { str += currentChar; } } Value = new Value(str); return Token.Value; case (char)0: return Token.EOF; } return Token.Unknown; } private IEnumerator GetTokens() { sourceMarker = new Marker(-1, 1, 0); while (true) { while (IsWhitespace(GetNextChar()) && Type != LexerType.String || currentChar == '\r') { } TokenMarker = sourceMarker; Token token = DetermineToken(currentChar); if (token == Token.EOF) { yield return Token.EOF; yield break; } if (token != Token.Unknown) { yield return token; continue; } StringBuilder bodyBuilder = new StringBuilder(currentChar.ToString()); while (DetermineToken(GetNextChar(true)) == Token.Unknown && (!IsWhitespace(GetNextChar(true)) || Type == LexerType.String) && GetNextChar(true) != '\r') { bodyBuilder.Append(GetNextChar()); } string result = bodyBuilder.ToString(); if (double.TryParse(result, NumberStyles.Float, CultureInfo.InvariantCulture, out var real)) { Value = real; yield return Token.Value; continue; } if (result.StartsWith("0x") && int.TryParse(result.Replace("0x", ""), NumberStyles.HexNumber, CultureInfo.CurrentCulture, out int hexResult)) { Value = hexResult; yield return Token.Value; continue; } Match powMatch = PowRegex.Match(result); if (powMatch.Success) { int a = int.Parse(powMatch.Groups[1].Value); int b = int.Parse(powMatch.Groups[2].Value); Value = a << b; yield return Token.Value; continue; } Identifer = bodyBuilder.ToString(); if (TokenDictionary.TryGetValue(Identifer, out token)) { yield return token; continue; } if (Type == LexerType.String) { Value = char.IsWhiteSpace(Identifer[0]) ? Identifer.Substring(1) : Identifer; yield return Token.Value; continue; } if (TokenLineDictionary.TryGetValue(Identifer, out token)) { bodyBuilder = new StringBuilder(); while (!IsEndOfLine(GetNextChar(true))) bodyBuilder.Append(GetNextChar()); yield return token; string strValue = bodyBuilder.ToString(); if (strValue.Length > 0 && char.IsWhiteSpace(strValue[0])) strValue = strValue.Substring(1); Value = new Value(strValue); yield return Token.Value; yield return currentChar == '\0' ? Token.EOF : Token.NewLine; continue; } yield return Token.Identifer; if (currentChar == '\n') yield return Token.NewLine; } } public IEnumerator GetEnumerator() { return currentEnumerator; } IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); } private static readonly Dictionary OrderOfOps = new Dictionary { { Token.Or, 0 }, { Token.And, 0 }, { Token.Equal, 1 }, { Token.NotEqual, 1 }, { Token.Less, 1 }, { Token.More, 1 }, { Token.LessEqual, 1 }, { Token.MoreEqual, 1 }, { Token.Plus, 2 }, { Token.Minus, 2 }, { Token.Asterisk, 3 }, { Token.Slash, 3 }, { Token.Caret, 4 } }; public Value Expression() { Stack stack = new Stack(); Stack operators = new Stack(); void Operation(Token token) { Value b = stack.Pop(); Value a = stack.Pop(); Value result = a.Operate(b, token); stack.Push(result); } int i = 0; while (true) { if (currentEnumerator.Current == Token.Value) { stack.Push(Value); } else if (currentEnumerator.Current == Token.Identifer) { if (Type == LexerType.String) stack.Push(Identifer); else throw new ParserException("Undeclared variable " + Identifer, TokenMarker); } else if (currentEnumerator.Current == Token.LParen) { currentEnumerator.MoveNext(); stack.Push(Expression()); if (currentEnumerator.Current != Token.RParen) throw new ParserException($"Was expecting [LParen] got [{currentEnumerator.Current}]", TokenMarker); } else if (Type.HasFlag(LexerType.Real) && currentEnumerator.Current.IsArithmetic() && currentEnumerator.Current.IsUnary() && (i == 0)) // || previousToken == Token.LParen)) { stack.Push(0); operators.Push(currentEnumerator.Current); } else if (Type == LexerType.String && currentEnumerator.Current.IsStringOp() || Type.HasFlag(LexerType.Real) && currentEnumerator.Current.IsArithmetic()) { while (operators.Count > 0 && OrderOfOps[currentEnumerator.Current] <= OrderOfOps[operators.Peek()]) Operation(operators.Pop()); operators.Push(currentEnumerator.Current); } else { if (i == 0) { if (Type == LexerType.String) stack.Push(""); else throw new ParserException("Empty expression", TokenMarker); } break; } i++; currentEnumerator.MoveNext(); } while (operators.Count > 0) Operation(operators.Pop()); return Type == LexerType.String ? stack.Aggregate((a, b) => b.String + a.String) : stack.Pop(); } } }