123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260 |
- using System;
- using System.Collections.Generic;
- using System.Globalization;
- using System.Text;
- namespace NTERA.Interpreter
- {
- public class Lexer
- {
- private readonly string source;
- private Marker sourceMarker;
- private char currentChar;
- private LexerType _type;
- public LexerType Type
- {
- get => _type;
- internal set
- {
- _type = value;
- InitTokenDictionaries();
- }
- }
- public Marker TokenMarker { get; set; }
- public string Identifer { get; set; }
- public Value Value { get; set; }
- public Lexer(string input, LexerType type = LexerType.Both)
- {
- Type = type;
- source = input;
- sourceMarker = new Marker(-1, 1, 0);
- }
- public void GoTo(Marker marker)
- {
- sourceMarker = marker;
- }
- char GetNextChar(bool peek = false)
- {
- if (sourceMarker.Pointer + 1 >= source.Length)
- {
- sourceMarker.Pointer = source.Length;
- return currentChar = (char)0;
- }
- if (peek)
- return currentChar = source[sourceMarker.Pointer + 1];
- sourceMarker.Column++;
- sourceMarker.Pointer++;
- if ((currentChar = source[sourceMarker.Pointer]) == '\n')
- {
- sourceMarker.Column = 1;
- sourceMarker.Line++;
- }
- return currentChar;
- }
- private Dictionary<string, Token> TokenDictionary;
- private Dictionary<string, Token> TokenLineDictionary;
- private Dictionary<char, Token> TokenCharDictionary;
- private void InitTokenDictionaries()
- {
- TokenDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
- TokenLineDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
- TokenCharDictionary = new Dictionary<char, Token>();
- foreach (Token token in Enum.GetValues(typeof(Token)))
- {
- foreach (var attribute in Utility.GetEnumAttributes<Token, LexerKeywordAttribute>(token))
- {
- if (attribute.IsLineKeyword)
- TokenLineDictionary[attribute.Keyword] = token;
- else
- TokenDictionary[attribute.Keyword] = token;
- }
- foreach (var attribute in Utility.GetEnumAttributes<Token, LexerCharacterAttribute>(token))
- {
- if ((attribute.LexerContext & Type) > 0)
- TokenCharDictionary[attribute.Character] = token;
- }
- }
- }
- private static bool IsWhitespace(char c)
- {
- return char.IsWhiteSpace(c) && c != '\n';
- }
- private static bool IsEndOfLine(char c)
- {
- return c == '\n' || c == '\r' || c == '\0';
- }
- private static bool IsEscape(char c)
- {
- return c == '%' || c == '{';
- }
- public IEnumerable<Token> GetTokens()
- {
- while (true)
- {
- while (IsWhitespace(GetNextChar()) && Type != LexerType.String) { }
- TokenMarker = sourceMarker;
- if (TokenCharDictionary.TryGetValue(currentChar, out Token charToken))
- {
- yield return charToken;
- continue;
- }
- switch (currentChar)
- {
- case ';': //semicolon is comment
- while (currentChar != '\n')
- GetNextChar();
- continue;
- case '<':
- if (!Type.HasFlag(LexerType.Real))
- break;
-
- if (GetNextChar(true) == '>')
- {
- GetNextChar();
- yield return Token.NotEqual;
- }
- else if (GetNextChar(true) == '=')
- {
- GetNextChar();
- yield return Token.LessEqual;
- }
- else
- yield return Token.Less;
- continue;
- case '>':
- if (!Type.HasFlag(LexerType.Real))
- break;
- if (GetNextChar(true) == '=')
- {
- GetNextChar();
- yield return Token.MoreEqual;
- }
- else
- yield return Token.More;
- continue;
- case '+':
- if (GetNextChar(true) == '=')
- {
- GetNextChar();
- yield return Token.Append;
- }
- else
- yield return Token.Plus;
- continue;
- case '%':
- StringBuilder builder = new StringBuilder();
- while (GetNextChar() != '%')
- builder.Append(currentChar);
- Value = $"%{builder}%";
- yield return Token.Value;
- continue;
- case '"':
- string str = "";
- while (GetNextChar() != '"')
- {
- if (currentChar == '\\')
- {
- switch (char.ToLower(GetNextChar()))
- {
- case 'n': str += '\n'; break;
- case 't': str += '\t'; break;
- case '\\': str += '\\'; break;
- case '"': str += '"'; break;
- }
- }
- else
- {
- str += currentChar;
- }
- }
- Value = new Value(str);
- yield return Token.Value;
- continue;
- case (char)0:
- yield return Token.EOF;
- yield break;
- }
- StringBuilder bodyBuilder = new StringBuilder(currentChar.ToString());
- while (!TokenCharDictionary.ContainsKey(GetNextChar(true))
- && !IsEndOfLine(GetNextChar(true))
- && (!IsWhitespace(GetNextChar(true)) || Type == LexerType.String)
- && (!IsEscape(GetNextChar(true)) || Type != LexerType.String))
- {
- bodyBuilder.Append(GetNextChar());
- }
- string result = bodyBuilder.ToString();
- if (double.TryParse(result, NumberStyles.Float, CultureInfo.InvariantCulture, out var real))
- {
- Value = real;
- yield return Token.Value;
- continue;
- }
- if (result.StartsWith("0x") && int.TryParse(result.Replace("0x", ""), NumberStyles.HexNumber, CultureInfo.CurrentCulture, out int hexResult))
- {
- Value = hexResult;
- yield return Token.Value;
- continue;
- }
- Identifer = bodyBuilder.ToString();
- if (TokenDictionary.TryGetValue(Identifer, out Token token))
- {
- yield return token;
- continue;
- }
- if (TokenLineDictionary.TryGetValue(Identifer, out token))
- {
- bodyBuilder = new StringBuilder();
- while (!IsEndOfLine(GetNextChar(true)))
- bodyBuilder.Append(GetNextChar());
-
- yield return token;
- Value = new Value(bodyBuilder.ToString().Substring(1));
- yield return Token.Value;
- yield return currentChar == '\0' ? Token.EOF : Token.NewLine;
- continue;
- }
- yield return Token.Identifer;
- }
- }
- }
- }
|