123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218 |
- using System;
- using System.Collections.Generic;
- using System.Text;
- namespace NTERA.Interpreter
- {
- public class Lexer
- {
- private readonly string source;
- private Marker sourceMarker;
- private char lastChar;
- public Marker TokenMarker { get; set; }
- public string Identifer { get; set; }
- public Value Value { get; set; }
- public Lexer(string input)
- {
- source = input;
- sourceMarker = new Marker(-1, 1, 0);
- InitTokenDictionaries();
- }
- public void GoTo(Marker marker)
- {
- sourceMarker = marker;
- }
- char GetChar()
- {
- sourceMarker.Column++;
- sourceMarker.Pointer++;
- if (sourceMarker.Pointer >= source.Length)
- return lastChar = (char)0;
- if ((lastChar = source[sourceMarker.Pointer]) == '\n')
- {
- sourceMarker.Column = 1;
- sourceMarker.Line++;
- }
- return lastChar;
- }
- private readonly Dictionary<string, Token> TokenDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
- private readonly Dictionary<string, Token> TokenLineDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
- private readonly Dictionary<char, Token> TokenCharDictionary = new Dictionary<char, Token>();
- private void InitTokenDictionaries()
- {
- foreach (Token token in Enum.GetValues(typeof(Token)))
- {
- foreach (var attribute in Utility.GetEnumAttributes<Token, LexerKeywordAttribute>(token))
- {
- if (attribute.IsLineKeyword)
- TokenLineDictionary[attribute.Keyword] = token;
- else
- TokenDictionary[attribute.Keyword] = token;
- }
- foreach (var attribute in Utility.GetEnumAttributes<Token, LexerCharacterAttribute>(token))
- {
- TokenCharDictionary[attribute.Character] = token;
- }
- }
- }
- public IEnumerable<Token> GetTokens()
- {
- while (true)
- {
- GetChar();
- while (lastChar == ' ' || lastChar == '\t' || lastChar == '\r')
- GetChar();
- TokenMarker = sourceMarker;
- if (char.IsLetter(lastChar))
- {
- Identifer = lastChar.ToString();
- while (char.IsLetterOrDigit(GetChar()) || lastChar == '_')
- Identifer += lastChar;
- if (TokenDictionary.TryGetValue(Identifer, out Token token))
- {
- yield return token;
- continue;
- }
- if (TokenLineDictionary.TryGetValue(Identifer, out token))
- {
- foreach (Token t in ReturnAsLine(token))
- yield return t;
- continue;
- }
- switch (Identifer.ToUpper())
- {
- case "REM":
- while (lastChar != '\n')
- GetChar();
- continue;
- default:
- {
- yield return Token.Identifer;
- sourceMarker.Pointer--;
- continue;
- }
- }
- }
- if (char.IsDigit(lastChar))
- {
- string num = "";
- do
- {
- num += lastChar;
- }
- while (char.IsDigit(GetChar()) || lastChar == '.');
- if (!double.TryParse(num, System.Globalization.NumberStyles.Float, System.Globalization.CultureInfo.InvariantCulture, out var real))
- throw new Exception("ERROR while parsing number");
- Value = new Value(real);
- yield return Token.Value;
-
- sourceMarker.Pointer--;
- continue;
- }
- if (TokenCharDictionary.TryGetValue(lastChar, out Token charToken))
- {
- yield return charToken;
- continue;
- }
-
- switch (lastChar)
- {
- case '\'':
- while (lastChar != '\n')
- GetChar();
- continue;
- case '<':
- GetChar();
- if (lastChar == '>')
- yield return Token.NotEqual;
- else if (lastChar == '=')
- yield return Token.LessEqual;
- else
- yield return Token.Less;
- continue;
- case '>':
- GetChar();
- if (lastChar == '=')
- yield return Token.MoreEqual;
- else
- yield return Token.More;
- continue;
- case '"':
- string str = "";
- while (GetChar() != '"')
- {
- if (lastChar == '\\')
- {
- switch (char.ToLower(GetChar()))
- {
- case 'n': str += '\n'; break;
- case 't': str += '\t'; break;
- case '\\': str += '\\'; break;
- case '"': str += '"'; break;
- }
- }
- else
- {
- str += lastChar;
- }
- }
- Value = new Value(str);
- yield return Token.Value;
- continue;
- case (char)0:
- yield return Token.EOF;
- break;
- default:
- yield return Token.Unkown;
- continue;
- }
- break;
- }
- }
- public IEnumerable<Token> ReturnAsLine(Token token)
- {
- StringBuilder bodyBuilder = new StringBuilder();
- while (lastChar != '\n')
- bodyBuilder.Append(GetChar());
- yield return token;
- Value = new Value(bodyBuilder.ToString().TrimEnd());
- yield return Token.Value;
- yield return Token.NewLine;
- }
- }
- }
|