123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517 |
- using System;
- using System.Collections;
- using System.Collections.Generic;
- using System.Globalization;
- using System.Linq;
- using System.Text;
- using System.Text.RegularExpressions;
- namespace NTERA.Interpreter.Compiler
- {
- public class Lexer : IEnumerable<Token>
- {
- private readonly string source;
- private Marker sourceMarker;
- private char currentChar;
- private readonly IEnumerator<Token> currentEnumerator;
- private LexerType _type;
- public LexerType Type
- {
- get => _type;
- internal set
- {
- _type = value;
- InitTokenDictionaries();
- }
- }
- public Marker TokenMarker { get; set; }
- public string Identifer { get; set; }
- public Value Value { get; set; }
- public Lexer(string input, LexerType type = LexerType.Both)
- {
- Type = type;
- source = input;
- sourceMarker = new Marker(-1, 1, 0);
- currentEnumerator = GetTokens();
- currentEnumerator.MoveNext();
- }
- public void GoTo(Marker marker)
- {
- sourceMarker = marker;
- }
- char GetNextChar(bool peek = false)
- {
- if (sourceMarker.Pointer + 1 >= source.Length)
- {
- sourceMarker.Pointer = source.Length;
- return currentChar = (char)0;
- }
- if (peek)
- return currentChar = source[sourceMarker.Pointer + 1];
- sourceMarker.Column++;
- sourceMarker.Pointer++;
- if ((currentChar = source[sourceMarker.Pointer]) == '\n')
- {
- sourceMarker.Column = 0;
- sourceMarker.Line++;
- }
- return currentChar;
- }
- protected static Dictionary<string, Token> TokenDictionary;
- protected Dictionary<char, Token> TokenCharDictionary;
- protected static Dictionary<char, Token> BothModeTokens;
- protected static Dictionary<char, Token> StringModeTokens;
- private static bool _initialized = false;
- private static readonly object _initializedLock = new object();
- private void InitTokenDictionaries()
- {
- if (_initialized)
- return;
- lock (_initializedLock)
- {
- if (_initialized)
- return;
- if (TokenDictionary == null)
- {
- TokenDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
- foreach (Token token in Enum.GetValues(typeof(Token)))
- {
- foreach (var attribute in Utility.GetEnumAttributes<Token, LexerKeywordAttribute>(token))
- {
- TokenDictionary[attribute.Keyword] = token;
- }
- }
- }
- if (BothModeTokens == null || StringModeTokens == null)
- {
- BothModeTokens = new Dictionary<char, Token>();
- StringModeTokens = new Dictionary<char, Token>();
- foreach (Token token in Enum.GetValues(typeof(Token)))
- {
- foreach (var attribute in Utility.GetEnumAttributes<Token, LexerCharacterAttribute>(token))
- {
- if ((attribute.LexerContext & LexerType.String) > 0)
- StringModeTokens[attribute.Character] = token;
- BothModeTokens[attribute.Character] = token;
- }
- }
- }
- TokenCharDictionary = Type == LexerType.String ? StringModeTokens : BothModeTokens;
- }
- }
- private static Regex PowRegex = new Regex(@"(\d+)p(\d+)");
- private static bool IsWhitespace(char c)
- {
- return char.IsWhiteSpace(c) && c != '\n';
- }
- private static bool IsEndOfLine(char c)
- {
- return c == '\n' || c == '\r' || c == '\0';
- }
- private Token DetermineToken(bool peek, bool useCurrent)
- {
- char c = useCurrent ? currentChar : GetNextChar(peek);
- if (TokenCharDictionary.TryGetValue(c, out Token charToken))
- return charToken;
- switch (c)
- {
- case ';': //semicolon is comment
- while (currentChar != '\n')
- {
- if (currentChar == '\0')
- return Token.EOF;
- GetNextChar();
- }
- return Token.NewLine;
- case '[':
- const string SkipStart = "[SKIPSTART]";
- const string SkipEnd = "[SKIPEND]";
- if (sourceMarker.Column > 1
- || source.Substring(sourceMarker.Pointer, SkipStart.Length) != SkipStart)
- return Token.Unknown;
- while (GetNextChar() != '\0')
- {
- if (currentChar == '[' && source.Substring(sourceMarker.Pointer, SkipEnd.Length) == SkipEnd)
- {
- while (true)
- {
- switch (GetNextChar())
- {
- case '\n':
- return Token.NewLine;
- case '\0':
- return Token.EOF;
- }
- }
- }
- }
- return Token.EOF;
- case '%':
- return Type == LexerType.String ? Token.Format : Token.Modulo;
- case '<':
- if (!Type.HasFlag(LexerType.Real))
- break;
- if (GetNextChar(true) == '>')
- {
- GetNextChar();
- return Token.NotEqual;
- }
- else if (GetNextChar(true) == '=')
- {
- GetNextChar();
- return Token.LessEqual;
- }
- else
- return Token.Less;
- case '>':
- if (!Type.HasFlag(LexerType.Real))
- break;
- if (GetNextChar(true) == '=')
- {
- GetNextChar();
- return Token.MoreEqual;
- }
- else
- return Token.More;
- case '+':
- if (Type == LexerType.String)
- return Token.Unknown;
- if (peek)
- GetNextChar();
- if (GetNextChar(true) == '+')
- {
- GetNextChar();
- return Token.Increment;
- }
- else
- return Token.Plus;
- case '-':
- if (Type == LexerType.String)
- return Token.Unknown;
- if (peek)
- GetNextChar();
- if (GetNextChar(true) == '-')
- {
- GetNextChar();
- return Token.Decrement;
- }
- else
- return Token.Minus;
- case '=':
- if (Type == LexerType.String)
- return Token.Unknown;
- if (peek)
- GetNextChar();
- if (GetNextChar(true) == '=')
- GetNextChar();
- return Token.Equal;
- case '&':
- if (peek)
- GetNextChar();
- if (GetNextChar(true) == '&')
- GetNextChar();
- return Token.And;
- case '|':
- if (peek)
- GetNextChar();
- if (GetNextChar(true) == '|')
- GetNextChar();
- return Token.Or;
- case '@':
- if (Type == LexerType.String)
- return Token.Unknown;
- if (GetNextChar(true) == '"')
- {
- GetNextChar();
- goto case '"';
- }
- return Token.AtSymbol;
- case '"':
- //if (peek)
- // GetNextChar();
- string str = "";
- while (GetNextChar() != '"')
- {
- if (currentChar == '\\')
- {
- switch (char.ToLower(GetNextChar()))
- {
- case 'n':
- str += '\n';
- break;
- case 't':
- str += '\t';
- break;
- case '\\':
- str += '\\';
- break;
- case '"':
- str += '"';
- break;
- }
- }
- else if (currentChar == '\0')
- throw new ParserException("Unexpected end of file");
- else
- {
- str += currentChar;
- }
- }
- Value = new Value(str);
- return Token.Value;
- case (char)0:
- return Token.EOF;
- }
- return Token.Unknown;
- }
- private IEnumerator<Token> GetTokens()
- {
- sourceMarker = new Marker(-1, 1, 0);
- while (true)
- {
- while (IsWhitespace(GetNextChar()) && Type != LexerType.String || currentChar == '\r')
- {
- }
- TokenMarker = sourceMarker;
- Token token = DetermineToken(false, true);
- if (token == Token.EOF)
- {
- yield return Token.EOF;
- yield break;
- }
- if (token != Token.Unknown)
- {
- yield return token;
- continue;
- }
- StringBuilder bodyBuilder = new StringBuilder(currentChar.ToString());
- while (DetermineToken(true, false) == Token.Unknown
- && (!IsWhitespace(GetNextChar(true)) || Type == LexerType.String)
- && GetNextChar(true) != '\r')
- {
- bodyBuilder.Append(GetNextChar());
- }
- string result = bodyBuilder.ToString();
- if (double.TryParse(result, NumberStyles.Float, CultureInfo.InvariantCulture, out var real))
- {
- Value = real;
- yield return Token.Value;
- continue;
- }
- if (result.StartsWith("0x") && int.TryParse(result.Replace("0x", ""), NumberStyles.HexNumber, CultureInfo.CurrentCulture, out int hexResult))
- {
- Value = hexResult;
- yield return Token.Value;
- continue;
- }
- Match powMatch = PowRegex.Match(result);
- if (powMatch.Success)
- {
- int a = int.Parse(powMatch.Groups[1].Value);
- int b = int.Parse(powMatch.Groups[2].Value);
- Value = a << b;
- yield return Token.Value;
- continue;
- }
- Identifer = bodyBuilder.ToString();
- if (TokenDictionary.TryGetValue(Identifer, out token))
- {
- yield return token;
- continue;
- }
- if (Type == LexerType.String)
- {
- Value = char.IsWhiteSpace(Identifer[0])
- ? Identifer.Substring(1)
- : Identifer;
- yield return Token.Value;
- continue;
- }
- yield return Token.Identifer;
- if (currentChar == '\n')
- yield return Token.NewLine;
- }
- }
- public IEnumerator<Token> GetEnumerator()
- {
- return currentEnumerator;
- }
- IEnumerator IEnumerable.GetEnumerator()
- {
- return GetEnumerator();
- }
- private static readonly Dictionary<Token, int> OrderOfOps = new Dictionary<Token, int>
- {
- { Token.Or, 0 }, { Token.And, 0 },
- { Token.Equal, 1 }, { Token.NotEqual, 1 },
- { Token.Less, 1 }, { Token.More, 1 }, { Token.LessEqual, 1 }, { Token.MoreEqual, 1 },
- { Token.Plus, 2 }, { Token.Minus, 2 },
- { Token.Asterisk, 3 }, { Token.Slash, 3 },
- { Token.Caret, 4 }
- };
-
- public Value Expression()
- {
- Stack<Value> stack = new Stack<Value>();
- Stack<Token> operators = new Stack<Token>();
- void Operation(Token token)
- {
- Value b = stack.Pop();
- Value a = stack.Pop();
- Value result = a.Operate(b, token);
- stack.Push(result);
- }
- int i = 0;
- while (true)
- {
- if (currentEnumerator.Current == Token.Value)
- {
- stack.Push(Value);
- }
- else if (currentEnumerator.Current == Token.Identifer)
- {
- if (Type == LexerType.String)
- stack.Push(Identifer);
- else
- throw new ParserException("Undeclared variable " + Identifer, TokenMarker);
- }
- else if (currentEnumerator.Current == Token.LParen)
- {
- currentEnumerator.MoveNext();
- stack.Push(Expression());
- if (currentEnumerator.Current != Token.RParen)
- throw new ParserException($"Was expecting [LParen] got [{currentEnumerator.Current}]", TokenMarker);
- }
- else if (Type.HasFlag(LexerType.Real) && currentEnumerator.Current.IsArithmetic()
- && currentEnumerator.Current.IsUnary() && (i == 0)) // || previousToken == Token.LParen))
- {
- stack.Push(0);
- operators.Push(currentEnumerator.Current);
- }
- else if (Type == LexerType.String && currentEnumerator.Current.IsStringOp()
- || Type.HasFlag(LexerType.Real) && currentEnumerator.Current.IsArithmetic())
- {
- while (operators.Count > 0 && OrderOfOps[currentEnumerator.Current] <= OrderOfOps[operators.Peek()])
- Operation(operators.Pop());
- operators.Push(currentEnumerator.Current);
- }
- else
- {
- if (i == 0)
- {
- if (Type == LexerType.String)
- stack.Push("");
- else
- throw new ParserException("Empty expression", TokenMarker);
- }
- break;
- }
- i++;
- currentEnumerator.MoveNext();
- }
- while (operators.Count > 0)
- Operation(operators.Pop());
- return Type == LexerType.String
- ? stack.Aggregate((a, b) => b.String + a.String)
- : stack.Pop();
- }
- }
- }
|