using System; using System.Collections.Generic; using System.Globalization; using System.Text; namespace NTERA.Interpreter { public class Lexer { private readonly string source; private Marker sourceMarker; private char currentChar; private LexerType _type; public LexerType Type { get => _type; internal set { _type = value; InitTokenDictionaries(); } } public Marker TokenMarker { get; set; } public string Identifer { get; set; } public Value Value { get; set; } public Lexer(string input, LexerType type = LexerType.Both) { Type = type; source = input; sourceMarker = new Marker(-1, 1, 0); } public void GoTo(Marker marker) { sourceMarker = marker; } char GetNextChar(bool peek = false) { if (sourceMarker.Pointer + 1 >= source.Length) { sourceMarker.Pointer = source.Length; return currentChar = (char)0; } if (peek) return currentChar = source[sourceMarker.Pointer + 1]; sourceMarker.Column++; sourceMarker.Pointer++; if ((currentChar = source[sourceMarker.Pointer]) == '\n') { sourceMarker.Column = 1; sourceMarker.Line++; } return currentChar; } private Dictionary TokenDictionary; private Dictionary TokenLineDictionary; private Dictionary TokenCharDictionary; private void InitTokenDictionaries() { TokenDictionary = new Dictionary(StringComparer.InvariantCultureIgnoreCase); TokenLineDictionary = new Dictionary(StringComparer.InvariantCultureIgnoreCase); TokenCharDictionary = new Dictionary(); foreach (Token token in Enum.GetValues(typeof(Token))) { foreach (var attribute in Utility.GetEnumAttributes(token)) { if (attribute.IsLineKeyword) TokenLineDictionary[attribute.Keyword] = token; else TokenDictionary[attribute.Keyword] = token; } foreach (var attribute in Utility.GetEnumAttributes(token)) { if ((attribute.LexerContext & Type) > 0) TokenCharDictionary[attribute.Character] = token; } } } private static bool IsWhitespace(char c) { return char.IsWhiteSpace(c) && c != '\n'; } private static bool IsEndOfLine(char c) { return c == '\n' || c == '\r' || c == '\0'; } private static bool IsEscape(char c) { return c == '%' || c == '{'; } public IEnumerable GetTokens() { while (true) { while (IsWhitespace(GetNextChar()) && Type != LexerType.String) { } TokenMarker = sourceMarker; if (TokenCharDictionary.TryGetValue(currentChar, out Token charToken)) { yield return charToken; continue; } switch (currentChar) { case ';': //semicolon is comment while (currentChar != '\n') GetNextChar(); continue; case '<': if (!Type.HasFlag(LexerType.Real)) break; if (GetNextChar(true) == '>') { GetNextChar(); yield return Token.NotEqual; } else if (GetNextChar(true) == '=') { GetNextChar(); yield return Token.LessEqual; } else yield return Token.Less; continue; case '>': if (!Type.HasFlag(LexerType.Real)) break; if (GetNextChar(true) == '=') { GetNextChar(); yield return Token.MoreEqual; } else yield return Token.More; continue; case '+': if (GetNextChar(true) == '=') { GetNextChar(); yield return Token.Append; } else yield return Token.Plus; continue; case '%': StringBuilder builder = new StringBuilder(); while (GetNextChar() != '%') builder.Append(currentChar); Value = $"%{builder}%"; yield return Token.Value; continue; case '"': string str = ""; while (GetNextChar() != '"') { if (currentChar == '\\') { switch (char.ToLower(GetNextChar())) { case 'n': str += '\n'; break; case 't': str += '\t'; break; case '\\': str += '\\'; break; case '"': str += '"'; break; } } else { str += currentChar; } } Value = new Value(str); yield return Token.Value; continue; case (char)0: yield return Token.EOF; yield break; } StringBuilder bodyBuilder = new StringBuilder(currentChar.ToString()); while (!TokenCharDictionary.ContainsKey(GetNextChar(true)) && !IsEndOfLine(GetNextChar(true)) && (!IsWhitespace(GetNextChar(true)) || Type == LexerType.String) && (!IsEscape(GetNextChar(true)) || Type != LexerType.String)) { bodyBuilder.Append(GetNextChar()); } string result = bodyBuilder.ToString(); if (double.TryParse(result, NumberStyles.Float, CultureInfo.InvariantCulture, out var real)) { Value = real; yield return Token.Value; continue; } if (result.StartsWith("0x") && int.TryParse(result.Replace("0x", ""), NumberStyles.HexNumber, CultureInfo.CurrentCulture, out int hexResult)) { Value = hexResult; yield return Token.Value; continue; } Identifer = bodyBuilder.ToString(); if (TokenDictionary.TryGetValue(Identifer, out Token token)) { yield return token; continue; } if (TokenLineDictionary.TryGetValue(Identifer, out token)) { bodyBuilder = new StringBuilder(); while (!IsEndOfLine(GetNextChar(true))) bodyBuilder.Append(GetNextChar()); yield return token; Value = new Value(bodyBuilder.ToString().Substring(1)); yield return Token.Value; yield return currentChar == '\0' ? Token.EOF : Token.NewLine; continue; } yield return Token.Identifer; } } } }