Lexer.cs 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Text;
  4. namespace NTERA.Interpreter
  5. {
  6. public class Lexer
  7. {
  8. private readonly string source;
  9. private Marker sourceMarker;
  10. private char lastChar;
  11. public Marker TokenMarker { get; set; }
  12. public string Identifer { get; set; }
  13. public Value Value { get; set; }
  14. public Lexer(string input)
  15. {
  16. source = input;
  17. sourceMarker = new Marker(-1, 1, 0);
  18. InitTokenDictionaries();
  19. }
  20. public void GoTo(Marker marker)
  21. {
  22. sourceMarker = marker;
  23. }
  24. char GetChar()
  25. {
  26. sourceMarker.Column++;
  27. sourceMarker.Pointer++;
  28. if (sourceMarker.Pointer >= source.Length)
  29. return lastChar = (char)0;
  30. if ((lastChar = source[sourceMarker.Pointer]) == '\n')
  31. {
  32. sourceMarker.Column = 1;
  33. sourceMarker.Line++;
  34. }
  35. return lastChar;
  36. }
  37. private readonly Dictionary<string, Token> TokenDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
  38. private readonly Dictionary<string, Token> TokenLineDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
  39. private readonly Dictionary<char, Token> TokenCharDictionary = new Dictionary<char, Token>();
  40. private void InitTokenDictionaries()
  41. {
  42. foreach (Token token in Enum.GetValues(typeof(Token)))
  43. {
  44. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerKeywordAttribute>(token))
  45. {
  46. if (attribute.IsLineKeyword)
  47. TokenLineDictionary[attribute.Keyword] = token;
  48. else
  49. TokenDictionary[attribute.Keyword] = token;
  50. }
  51. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerCharacterAttribute>(token))
  52. {
  53. TokenCharDictionary[attribute.Character] = token;
  54. }
  55. }
  56. }
  57. public IEnumerable<Token> GetTokens()
  58. {
  59. while (true)
  60. {
  61. GetChar();
  62. while (lastChar == ' ' || lastChar == '\t' || lastChar == '\r')
  63. GetChar();
  64. TokenMarker = sourceMarker;
  65. if (char.IsLetter(lastChar))
  66. {
  67. Identifer = lastChar.ToString();
  68. while (char.IsLetterOrDigit(GetChar()) || lastChar == '_')
  69. Identifer += lastChar;
  70. if (TokenDictionary.TryGetValue(Identifer, out Token token))
  71. {
  72. yield return token;
  73. continue;
  74. }
  75. if (TokenLineDictionary.TryGetValue(Identifer, out token))
  76. {
  77. foreach (Token t in ReturnAsLine(token))
  78. yield return t;
  79. continue;
  80. }
  81. switch (Identifer.ToUpper())
  82. {
  83. case "REM":
  84. while (lastChar != '\n')
  85. GetChar();
  86. continue;
  87. default:
  88. {
  89. yield return Token.Identifer;
  90. sourceMarker.Pointer--;
  91. continue;
  92. }
  93. }
  94. }
  95. if (char.IsDigit(lastChar))
  96. {
  97. string num = "";
  98. do
  99. {
  100. num += lastChar;
  101. }
  102. while (char.IsDigit(GetChar()) || lastChar == '.');
  103. if (!double.TryParse(num, System.Globalization.NumberStyles.Float, System.Globalization.CultureInfo.InvariantCulture, out var real))
  104. throw new Exception("ERROR while parsing number");
  105. Value = new Value(real);
  106. yield return Token.Value;
  107. sourceMarker.Pointer--;
  108. continue;
  109. }
  110. if (TokenCharDictionary.TryGetValue(lastChar, out Token charToken))
  111. {
  112. yield return charToken;
  113. continue;
  114. }
  115. switch (lastChar)
  116. {
  117. case '\'':
  118. while (lastChar != '\n')
  119. GetChar();
  120. continue;
  121. case '<':
  122. GetChar();
  123. if (lastChar == '>')
  124. yield return Token.NotEqual;
  125. else if (lastChar == '=')
  126. yield return Token.LessEqual;
  127. else
  128. yield return Token.Less;
  129. continue;
  130. case '>':
  131. GetChar();
  132. if (lastChar == '=')
  133. yield return Token.MoreEqual;
  134. else
  135. yield return Token.More;
  136. continue;
  137. case '"':
  138. string str = "";
  139. while (GetChar() != '"')
  140. {
  141. if (lastChar == '\\')
  142. {
  143. switch (char.ToLower(GetChar()))
  144. {
  145. case 'n': str += '\n'; break;
  146. case 't': str += '\t'; break;
  147. case '\\': str += '\\'; break;
  148. case '"': str += '"'; break;
  149. }
  150. }
  151. else
  152. {
  153. str += lastChar;
  154. }
  155. }
  156. Value = new Value(str);
  157. yield return Token.Value;
  158. continue;
  159. case (char)0:
  160. yield return Token.EOF;
  161. break;
  162. default:
  163. yield return Token.Unkown;
  164. continue;
  165. }
  166. break;
  167. }
  168. }
  169. public IEnumerable<Token> ReturnAsLine(Token token)
  170. {
  171. StringBuilder bodyBuilder = new StringBuilder();
  172. while (lastChar != '\n')
  173. bodyBuilder.Append(GetChar());
  174. yield return token;
  175. Value = new Value(bodyBuilder.ToString().TrimEnd());
  176. yield return Token.Value;
  177. yield return Token.NewLine;
  178. }
  179. }
  180. }