using System; using System.Collections; using System.Collections.Generic; using System.Globalization; using System.Text; using System.Text.RegularExpressions; namespace NTERA.Engine.Compiler { public class Lexer : IEnumerable { private readonly string source; private Marker sourceMarker; public char CurrentChar; private readonly IEnumerator currentEnumerator; public Marker TokenMarker { get; set; } public string Identifier { get; set; } public Value Value { get; set; } static Lexer() { InitTokenDictionaries(); } public Lexer(string input) { source = input; sourceMarker = new Marker(-1, 1, 0); currentEnumerator = GetTokens(); currentEnumerator.MoveNext(); } public void GoTo(Marker marker) { sourceMarker = marker; } public bool IsPeeking { get; protected set; } public bool SingleLineMode { get; protected set; } public char GetNextChar(bool peek = false) { IsPeeking = peek; if (sourceMarker.Pointer + 1 >= source.Length) { sourceMarker.Pointer = source.Length; return CurrentChar = (char)0; } if (peek) return CurrentChar = source[sourceMarker.Pointer + 1]; sourceMarker.Column++; sourceMarker.Pointer++; if ((CurrentChar = source[sourceMarker.Pointer]) == '\n') { sourceMarker.Column = 0; sourceMarker.Line++; } return CurrentChar; } protected static Dictionary TokenDictionary; protected static Dictionary TokenCharDictionary; private static bool _initialized = false; private static readonly object _initializedLock = new object(); private static void InitTokenDictionaries() { if (_initialized) return; lock (_initializedLock) { if (_initialized) return; if (TokenDictionary == null) { TokenDictionary = new Dictionary(StringComparer.InvariantCultureIgnoreCase); foreach (Token token in Enum.GetValues(typeof(Token))) { foreach (var attribute in Utility.GetEnumAttributes(token)) { TokenDictionary[attribute.Keyword] = token; } } } if (TokenCharDictionary == null) { TokenCharDictionary = new Dictionary(); foreach (Token token in Enum.GetValues(typeof(Token))) { foreach (var attribute in Utility.GetEnumAttributes(token)) { TokenCharDictionary[attribute.Character] = token; } } } } } private static Regex PowRegex = new Regex(@"(\d+)p(\d+)"); private static bool IsWhitespace(char c, bool singleLineMode) { return char.IsWhiteSpace(c) && (c != '\n' || singleLineMode); } private static bool IsEndOfLine(char c) { return c == '\n' || c == '\r' || c == '\0'; } private Token DetermineToken(bool peek, bool useCurrent) { char c = useCurrent ? CurrentChar : GetNextChar(peek); char adv; if (TokenCharDictionary.TryGetValue(c, out Token charToken)) return charToken; switch (c) { case ';': //semicolon is comment while (CurrentChar != '\n') { if (CurrentChar == '\0') return Token.EOF; GetNextChar(); } return Token.NewLine; case '[': const string SkipStart = "[SKIPSTART]"; const string SkipEnd = "[SKIPEND]"; if (sourceMarker.Column > 1 || source.Substring(sourceMarker.Pointer, SkipStart.Length) != SkipStart) return Token.Unknown; while (GetNextChar() != '\0') { if (CurrentChar == '[' && source.Substring(sourceMarker.Pointer, SkipEnd.Length) == SkipEnd) { while (true) { switch (GetNextChar()) { case '\n': return Token.NewLine; case '\0': return Token.EOF; } } } } return Token.EOF; case '{': if (sourceMarker.Pointer == 0 || source[sourceMarker.Pointer - 1] == '\n') { SingleLineMode = true; if (IsPeeking) GetNextChar(); while (CurrentChar != '\n') { if (CurrentChar == '\0') return Token.EOF; GetNextChar(); } return Token.NewLine; } return Token.OpenBracket; case '}': if (sourceMarker.Pointer == 0 || source[sourceMarker.Pointer - 1] == '\n') { SingleLineMode = false; if (IsPeeking) GetNextChar(); while (CurrentChar != '\n') { if (CurrentChar == '\0') return Token.EOF; GetNextChar(); } return Token.NewLine; } return Token.CloseBracket; case '<': if (GetNextChar(true) == '>') { GetNextChar(); return Token.NotEqual; } else if (GetNextChar(true) == '=') { GetNextChar(); return Token.LessEqual; } else return Token.Less; case '>': if (GetNextChar(true) == '=') { GetNextChar(); return Token.MoreEqual; } else return Token.More; case '+': adv = !peek && !IsPeeking ? GetNextChar(true) : source[sourceMarker.Pointer + 2]; if (adv == '=') { GetNextChar(); return Token.Append; } else if (adv == '+') { GetNextChar(); return Token.Increment; } return Token.Plus; case '-': if (peek) GetNextChar(); if (GetNextChar(true) == '-') { GetNextChar(); return Token.Decrement; } else return Token.Minus; case '=': adv = !peek && !IsPeeking ? GetNextChar(true) : source[sourceMarker.Pointer + 2]; if (adv == '=') GetNextChar(); return Token.Equal; case '&': if (peek) GetNextChar(); if (GetNextChar(true) == '&') GetNextChar(); return Token.And; case '\\': if (peek) GetNextChar(); if (GetNextChar(true) == '@') { GetNextChar(); return Token.TernaryEscape; } return Token.Unknown; case '|': if (peek) GetNextChar(); if (GetNextChar(true) == '|') GetNextChar(); return Token.Or; case (char)0: return Token.EOF; } return Token.Unknown; } private IEnumerator GetTokens() { sourceMarker = new Marker(-1, 1, 0); while (true) { while (IsWhitespace(GetNextChar(), SingleLineMode)) { } TokenMarker = sourceMarker; Token token = DetermineToken(false, true); if (token == Token.EOF) { yield return Token.EOF; yield break; } if (token != Token.Unknown) { yield return token; continue; } StringBuilder bodyBuilder = new StringBuilder(CurrentChar.ToString()); while (DetermineToken(true, false) == Token.Unknown && !IsWhitespace(GetNextChar(true), SingleLineMode)) { bodyBuilder.Append(GetNextChar()); } string result = bodyBuilder.ToString(); if (double.TryParse(result, NumberStyles.Float, CultureInfo.InvariantCulture, out var real)) { Value = real; yield return Token.Value; if (CurrentChar == '\n') yield return Token.NewLine; continue; } if (result.StartsWith("0x") && int.TryParse(result.Replace("0x", ""), NumberStyles.HexNumber, CultureInfo.CurrentCulture, out int hexResult)) { Value = hexResult; yield return Token.Value; if (CurrentChar == '\n') yield return Token.NewLine; continue; } Match powMatch = PowRegex.Match(result); if (powMatch.Success) { int a = int.Parse(powMatch.Groups[1].Value); int b = int.Parse(powMatch.Groups[2].Value); Value = a << b; yield return Token.Value; continue; } Identifier = bodyBuilder.ToString(); if (TokenDictionary.TryGetValue(Identifier, out token)) { yield return token; continue; } yield return Token.Identifer; if (CurrentChar == '\n') yield return Token.NewLine; } } public IEnumerator GetEnumerator() { return currentEnumerator; } IEnumerator IEnumerable.GetEnumerator() { return GetEnumerator(); } } }