Lexer.cs 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Globalization;
  4. using System.Text;
  5. namespace NTERA.Interpreter
  6. {
  7. public class Lexer
  8. {
  9. private readonly string source;
  10. private Marker sourceMarker;
  11. private char currentChar;
  12. private LexerType _type;
  13. public LexerType Type
  14. {
  15. get => _type;
  16. internal set
  17. {
  18. _type = value;
  19. InitTokenDictionaries();
  20. }
  21. }
  22. public Marker TokenMarker { get; set; }
  23. public string Identifer { get; set; }
  24. public Value Value { get; set; }
  25. public Lexer(string input, LexerType type = LexerType.Both)
  26. {
  27. Type = type;
  28. source = input;
  29. sourceMarker = new Marker(-1, 1, 0);
  30. }
  31. public void GoTo(Marker marker)
  32. {
  33. sourceMarker = marker;
  34. }
  35. char GetNextChar(bool peek = false)
  36. {
  37. if (sourceMarker.Pointer + 1 >= source.Length)
  38. {
  39. sourceMarker.Pointer = source.Length;
  40. return currentChar = (char)0;
  41. }
  42. if (peek)
  43. return currentChar = source[sourceMarker.Pointer + 1];
  44. sourceMarker.Column++;
  45. sourceMarker.Pointer++;
  46. if ((currentChar = source[sourceMarker.Pointer]) == '\n')
  47. {
  48. sourceMarker.Column = 1;
  49. sourceMarker.Line++;
  50. }
  51. return currentChar;
  52. }
  53. private Dictionary<string, Token> TokenDictionary;
  54. private Dictionary<string, Token> TokenLineDictionary;
  55. private Dictionary<char, Token> TokenCharDictionary;
  56. private void InitTokenDictionaries()
  57. {
  58. TokenDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
  59. TokenLineDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
  60. TokenCharDictionary = new Dictionary<char, Token>();
  61. foreach (Token token in Enum.GetValues(typeof(Token)))
  62. {
  63. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerKeywordAttribute>(token))
  64. {
  65. if (attribute.IsLineKeyword)
  66. TokenLineDictionary[attribute.Keyword] = token;
  67. else
  68. TokenDictionary[attribute.Keyword] = token;
  69. }
  70. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerCharacterAttribute>(token))
  71. {
  72. if ((attribute.LexerContext & Type) > 0)
  73. TokenCharDictionary[attribute.Character] = token;
  74. }
  75. }
  76. }
  77. private static bool IsWhitespace(char c)
  78. {
  79. return char.IsWhiteSpace(c) && c != '\n';
  80. }
  81. private static bool IsEndOfLine(char c)
  82. {
  83. return c == '\n' || c == '\r' || c == '\0';
  84. }
  85. private static bool IsEscape(char c)
  86. {
  87. return c == '%' || c == '{';
  88. }
  89. private Token DetermineToken(char c)
  90. {
  91. if (TokenCharDictionary.TryGetValue(currentChar, out Token charToken))
  92. return charToken;
  93. switch (currentChar)
  94. {
  95. case ';': //semicolon is comment
  96. while (currentChar != '\n')
  97. GetNextChar();
  98. return Token.NewLine;
  99. case '<':
  100. if (!Type.HasFlag(LexerType.Real))
  101. break;
  102. if (GetNextChar(true) == '>')
  103. {
  104. GetNextChar();
  105. return Token.NotEqual;
  106. }
  107. else if (GetNextChar(true) == '=')
  108. {
  109. GetNextChar();
  110. return Token.LessEqual;
  111. }
  112. else
  113. return Token.Less;
  114. case '>':
  115. if (!Type.HasFlag(LexerType.Real))
  116. break;
  117. if (GetNextChar(true) == '=')
  118. {
  119. GetNextChar();
  120. return Token.MoreEqual;
  121. }
  122. else
  123. return Token.More;
  124. case '+':
  125. if (GetNextChar(true) == '=')
  126. {
  127. GetNextChar();
  128. return Token.Append;
  129. }
  130. else
  131. return Token.Plus;
  132. case '%':
  133. StringBuilder builder = new StringBuilder();
  134. while (GetNextChar() != '%')
  135. builder.Append(currentChar);
  136. Value = $"%{builder}%";
  137. return Token.Value;
  138. case '"':
  139. string str = "";
  140. while (GetNextChar() != '"')
  141. {
  142. if (currentChar == '\\')
  143. {
  144. switch (char.ToLower(GetNextChar()))
  145. {
  146. case 'n': str += '\n'; break;
  147. case 't': str += '\t'; break;
  148. case '\\': str += '\\'; break;
  149. case '"': str += '"'; break;
  150. }
  151. }
  152. else
  153. {
  154. str += currentChar;
  155. }
  156. }
  157. Value = new Value(str);
  158. return Token.Value;
  159. case (char)0:
  160. return Token.EOF;
  161. }
  162. return Token.Unknown;
  163. }
  164. public IEnumerable<Token> GetTokens()
  165. {
  166. while (true)
  167. {
  168. while (IsWhitespace(GetNextChar()) && Type != LexerType.String || currentChar == '\r') { }
  169. TokenMarker = sourceMarker;
  170. Token token = DetermineToken(currentChar);
  171. if (token == Token.EOF)
  172. {
  173. yield return Token.EOF;
  174. yield break;
  175. }
  176. if (token != Token.Unknown)
  177. {
  178. yield return token;
  179. continue;
  180. }
  181. StringBuilder bodyBuilder = new StringBuilder(currentChar.ToString());
  182. while ((!IsEscape(GetNextChar(true)) || Type != LexerType.String)
  183. && DetermineToken(GetNextChar(true)) == Token.Unknown
  184. && (!IsWhitespace(GetNextChar(true)) || Type == LexerType.String)
  185. && GetNextChar(true) != '\r')
  186. {
  187. bodyBuilder.Append(GetNextChar());
  188. }
  189. string result = bodyBuilder.ToString();
  190. if (double.TryParse(result, NumberStyles.Float, CultureInfo.InvariantCulture, out var real))
  191. {
  192. Value = real;
  193. yield return Token.Value;
  194. continue;
  195. }
  196. if (result.StartsWith("0x") && int.TryParse(result.Replace("0x", ""), NumberStyles.HexNumber, CultureInfo.CurrentCulture, out int hexResult))
  197. {
  198. Value = hexResult;
  199. yield return Token.Value;
  200. continue;
  201. }
  202. Identifer = bodyBuilder.ToString();
  203. if (TokenDictionary.TryGetValue(Identifer, out token))
  204. {
  205. yield return token;
  206. continue;
  207. }
  208. if (Type == LexerType.String && char.IsWhiteSpace(Identifer[0]))
  209. Identifer = Identifer.Substring(1);
  210. if (TokenLineDictionary.TryGetValue(Identifer, out token))
  211. {
  212. bodyBuilder = new StringBuilder();
  213. while (!IsEndOfLine(GetNextChar(true)))
  214. bodyBuilder.Append(GetNextChar());
  215. yield return token;
  216. string strValue = bodyBuilder.ToString();
  217. if (strValue.Length > 0 && char.IsWhiteSpace(strValue[0]))
  218. strValue = strValue.Substring(1);
  219. Value = new Value(strValue);
  220. yield return Token.Value;
  221. yield return currentChar == '\0' ? Token.EOF : Token.NewLine;
  222. continue;
  223. }
  224. yield return Token.Identifer;
  225. }
  226. }
  227. }
  228. }