Lexer.cs 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Globalization;
  4. using System.Text;
  5. namespace NTERA.Interpreter
  6. {
  7. public class Lexer
  8. {
  9. private readonly string source;
  10. private Marker sourceMarker;
  11. private char currentChar;
  12. private LexerType _type;
  13. public LexerType Type
  14. {
  15. get => _type;
  16. internal set
  17. {
  18. _type = value;
  19. InitTokenDictionaries();
  20. }
  21. }
  22. public Marker TokenMarker { get; set; }
  23. public string Identifer { get; set; }
  24. public Value Value { get; set; }
  25. public Lexer(string input, LexerType type = LexerType.Both)
  26. {
  27. Type = type;
  28. source = input;
  29. sourceMarker = new Marker(-1, 1, 0);
  30. }
  31. public void GoTo(Marker marker)
  32. {
  33. sourceMarker = marker;
  34. }
  35. char GetNextChar(bool peek = false)
  36. {
  37. if (sourceMarker.Pointer + 1 >= source.Length)
  38. {
  39. sourceMarker.Pointer = source.Length;
  40. return currentChar = (char)0;
  41. }
  42. if (peek)
  43. return currentChar = source[sourceMarker.Pointer + 1];
  44. sourceMarker.Column++;
  45. sourceMarker.Pointer++;
  46. if ((currentChar = source[sourceMarker.Pointer]) == '\n')
  47. {
  48. sourceMarker.Column = 1;
  49. sourceMarker.Line++;
  50. }
  51. return currentChar;
  52. }
  53. private Dictionary<string, Token> TokenDictionary;
  54. private Dictionary<string, Token> TokenLineDictionary;
  55. private Dictionary<char, Token> TokenCharDictionary;
  56. private void InitTokenDictionaries()
  57. {
  58. TokenDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
  59. TokenLineDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
  60. TokenCharDictionary = new Dictionary<char, Token>();
  61. foreach (Token token in Enum.GetValues(typeof(Token)))
  62. {
  63. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerKeywordAttribute>(token))
  64. {
  65. if (attribute.IsLineKeyword)
  66. TokenLineDictionary[attribute.Keyword] = token;
  67. else
  68. TokenDictionary[attribute.Keyword] = token;
  69. }
  70. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerCharacterAttribute>(token))
  71. {
  72. if ((attribute.LexerContext & Type) > 0)
  73. TokenCharDictionary[attribute.Character] = token;
  74. }
  75. }
  76. }
  77. private static bool IsWhitespace(char c)
  78. {
  79. return char.IsWhiteSpace(c) && c != '\n';
  80. }
  81. private static bool IsEndOfLine(char c)
  82. {
  83. return c == '\n' || c == '\r' || c == '\0';
  84. }
  85. private static bool IsEscape(char c)
  86. {
  87. return c == '%' || c == '{';
  88. }
  89. public IEnumerable<Token> GetTokens()
  90. {
  91. while (true)
  92. {
  93. while (IsWhitespace(GetNextChar()) && Type != LexerType.String) { }
  94. TokenMarker = sourceMarker;
  95. if (TokenCharDictionary.TryGetValue(currentChar, out Token charToken))
  96. {
  97. yield return charToken;
  98. continue;
  99. }
  100. switch (currentChar)
  101. {
  102. case ';': //semicolon is comment
  103. while (currentChar != '\n')
  104. GetNextChar();
  105. continue;
  106. case '<':
  107. if (!Type.HasFlag(LexerType.Real))
  108. break;
  109. if (GetNextChar(true) == '>')
  110. {
  111. GetNextChar();
  112. yield return Token.NotEqual;
  113. }
  114. else if (GetNextChar(true) == '=')
  115. {
  116. GetNextChar();
  117. yield return Token.LessEqual;
  118. }
  119. else
  120. yield return Token.Less;
  121. continue;
  122. case '>':
  123. if (!Type.HasFlag(LexerType.Real))
  124. break;
  125. if (GetNextChar(true) == '=')
  126. {
  127. GetNextChar();
  128. yield return Token.MoreEqual;
  129. }
  130. else
  131. yield return Token.More;
  132. continue;
  133. case '+':
  134. if (GetNextChar(true) == '=')
  135. {
  136. GetNextChar();
  137. yield return Token.Append;
  138. }
  139. else
  140. yield return Token.Plus;
  141. continue;
  142. case '%':
  143. StringBuilder builder = new StringBuilder();
  144. while (GetNextChar() != '%')
  145. builder.Append(currentChar);
  146. Value = $"%{builder}%";
  147. yield return Token.Value;
  148. continue;
  149. case '"':
  150. string str = "";
  151. while (GetNextChar() != '"')
  152. {
  153. if (currentChar == '\\')
  154. {
  155. switch (char.ToLower(GetNextChar()))
  156. {
  157. case 'n': str += '\n'; break;
  158. case 't': str += '\t'; break;
  159. case '\\': str += '\\'; break;
  160. case '"': str += '"'; break;
  161. }
  162. }
  163. else
  164. {
  165. str += currentChar;
  166. }
  167. }
  168. Value = new Value(str);
  169. yield return Token.Value;
  170. continue;
  171. case (char)0:
  172. yield return Token.EOF;
  173. yield break;
  174. }
  175. StringBuilder bodyBuilder = new StringBuilder(currentChar.ToString());
  176. while (!TokenCharDictionary.ContainsKey(GetNextChar(true))
  177. && !IsEndOfLine(GetNextChar(true))
  178. && (!IsWhitespace(GetNextChar(true)) || Type == LexerType.String)
  179. && (!IsEscape(GetNextChar(true)) || Type != LexerType.String))
  180. {
  181. bodyBuilder.Append(GetNextChar());
  182. }
  183. string result = bodyBuilder.ToString();
  184. if (double.TryParse(result, NumberStyles.Float, CultureInfo.InvariantCulture, out var real))
  185. {
  186. Value = real;
  187. yield return Token.Value;
  188. continue;
  189. }
  190. if (result.StartsWith("0x") && int.TryParse(result.Replace("0x", ""), NumberStyles.HexNumber, CultureInfo.CurrentCulture, out int hexResult))
  191. {
  192. Value = hexResult;
  193. yield return Token.Value;
  194. continue;
  195. }
  196. Identifer = bodyBuilder.ToString();
  197. if (TokenDictionary.TryGetValue(Identifer, out Token token))
  198. {
  199. yield return token;
  200. continue;
  201. }
  202. if (TokenLineDictionary.TryGetValue(Identifer, out token))
  203. {
  204. bodyBuilder = new StringBuilder();
  205. while (!IsEndOfLine(GetNextChar(true)))
  206. bodyBuilder.Append(GetNextChar());
  207. yield return token;
  208. Value = new Value(bodyBuilder.ToString().Substring(1));
  209. yield return Token.Value;
  210. yield return currentChar == '\0' ? Token.EOF : Token.NewLine;
  211. continue;
  212. }
  213. yield return Token.Identifer;
  214. }
  215. }
  216. }
  217. }