using System; using System.Collections.Generic; using System.Text; namespace NTERA.Interpreter { public class Lexer { private readonly string source; private Marker sourceMarker; private char lastChar; public Marker TokenMarker { get; set; } public string Identifer { get; set; } public Value Value { get; set; } public Lexer(string input) { source = input; sourceMarker = new Marker(-1, 1, 0); InitTokenDictionaries(); } public void GoTo(Marker marker) { sourceMarker = marker; } char GetChar() { sourceMarker.Column++; sourceMarker.Pointer++; if (sourceMarker.Pointer >= source.Length) return lastChar = (char)0; if ((lastChar = source[sourceMarker.Pointer]) == '\n') { sourceMarker.Column = 1; sourceMarker.Line++; } return lastChar; } private readonly Dictionary TokenDictionary = new Dictionary(StringComparer.InvariantCultureIgnoreCase); private readonly Dictionary TokenLineDictionary = new Dictionary(StringComparer.InvariantCultureIgnoreCase); private readonly Dictionary TokenCharDictionary = new Dictionary(); private void InitTokenDictionaries() { foreach (Token token in Enum.GetValues(typeof(Token))) { foreach (var attribute in Utility.GetEnumAttributes(token)) { if (attribute.IsLineKeyword) TokenLineDictionary[attribute.Keyword] = token; else TokenDictionary[attribute.Keyword] = token; } foreach (var attribute in Utility.GetEnumAttributes(token)) { TokenCharDictionary[attribute.Character] = token; } } } public IEnumerable GetTokens() { while (true) { GetChar(); while (lastChar == ' ' || lastChar == '\t' || lastChar == '\r') GetChar(); TokenMarker = sourceMarker; if (char.IsLetter(lastChar)) { Identifer = lastChar.ToString(); while (char.IsLetterOrDigit(GetChar()) || lastChar == '_') Identifer += lastChar; if (TokenDictionary.TryGetValue(Identifer, out Token token)) { yield return token; continue; } if (TokenLineDictionary.TryGetValue(Identifer, out token)) { foreach (Token t in ReturnAsLine(token)) yield return t; continue; } switch (Identifer.ToUpper()) { case "REM": while (lastChar != '\n') GetChar(); continue; default: { yield return Token.Identifer; sourceMarker.Pointer--; continue; } } } if (char.IsDigit(lastChar)) { string num = ""; do { num += lastChar; } while (char.IsDigit(GetChar()) || lastChar == '.'); if (!double.TryParse(num, System.Globalization.NumberStyles.Float, System.Globalization.CultureInfo.InvariantCulture, out var real)) throw new Exception("ERROR while parsing number"); Value = new Value(real); yield return Token.Value; sourceMarker.Pointer--; continue; } if (TokenCharDictionary.TryGetValue(lastChar, out Token charToken)) { yield return charToken; continue; } switch (lastChar) { case '\'': while (lastChar != '\n') GetChar(); continue; case '<': GetChar(); if (lastChar == '>') yield return Token.NotEqual; else if (lastChar == '=') yield return Token.LessEqual; else yield return Token.Less; continue; case '>': GetChar(); if (lastChar == '=') yield return Token.MoreEqual; else yield return Token.More; continue; case '"': string str = ""; while (GetChar() != '"') { if (lastChar == '\\') { switch (char.ToLower(GetChar())) { case 'n': str += '\n'; break; case 't': str += '\t'; break; case '\\': str += '\\'; break; case '"': str += '"'; break; } } else { str += lastChar; } } Value = new Value(str); yield return Token.Value; continue; case (char)0: yield return Token.EOF; break; default: yield return Token.Unkown; continue; } break; } } public IEnumerable ReturnAsLine(Token token) { StringBuilder bodyBuilder = new StringBuilder(); while (lastChar != '\n') bodyBuilder.Append(GetChar()); yield return token; Value = new Value(bodyBuilder.ToString().TrimEnd()); yield return Token.Value; yield return Token.NewLine; } } }