Lexer.cs 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405
  1. using System;
  2. using System.Collections;
  3. using System.Collections.Generic;
  4. using System.Globalization;
  5. using System.Text;
  6. using System.Text.RegularExpressions;
  7. namespace NTERA.Engine.Compiler
  8. {
  9. public class Lexer : IEnumerable<Token>
  10. {
  11. private readonly string source;
  12. private Marker sourceMarker;
  13. public char CurrentChar;
  14. private readonly IEnumerator<Token> currentEnumerator;
  15. public Marker TokenMarker { get; set; }
  16. public string Identifier { get; set; }
  17. public Value Value { get; set; }
  18. static Lexer()
  19. {
  20. InitTokenDictionaries();
  21. }
  22. public Lexer(string input)
  23. {
  24. source = input;
  25. sourceMarker = new Marker(-1, 1, 0);
  26. currentEnumerator = GetTokens();
  27. currentEnumerator.MoveNext();
  28. }
  29. public void GoTo(Marker marker)
  30. {
  31. sourceMarker = marker;
  32. }
  33. public bool IsPeeking { get; protected set; }
  34. public bool SingleLineMode { get; protected set; }
  35. public char GetNextChar(bool peek = false)
  36. {
  37. IsPeeking = peek;
  38. if (sourceMarker.Pointer + 1 >= source.Length)
  39. {
  40. sourceMarker.Pointer = source.Length;
  41. return CurrentChar = (char)0;
  42. }
  43. if (peek)
  44. return CurrentChar = source[sourceMarker.Pointer + 1];
  45. sourceMarker.Column++;
  46. sourceMarker.Pointer++;
  47. if ((CurrentChar = source[sourceMarker.Pointer]) == '\n')
  48. {
  49. sourceMarker.Column = 0;
  50. sourceMarker.Line++;
  51. }
  52. return CurrentChar;
  53. }
  54. protected static Dictionary<string, Token> TokenDictionary;
  55. protected static Dictionary<char, Token> TokenCharDictionary;
  56. private static bool _initialized = false;
  57. private static readonly object _initializedLock = new object();
  58. private static void InitTokenDictionaries()
  59. {
  60. if (_initialized)
  61. return;
  62. lock (_initializedLock)
  63. {
  64. if (_initialized)
  65. return;
  66. if (TokenDictionary == null)
  67. {
  68. TokenDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
  69. foreach (Token token in Enum.GetValues(typeof(Token)))
  70. {
  71. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerKeywordAttribute>(token))
  72. {
  73. TokenDictionary[attribute.Keyword] = token;
  74. }
  75. }
  76. }
  77. if (TokenCharDictionary == null)
  78. {
  79. TokenCharDictionary = new Dictionary<char, Token>();
  80. foreach (Token token in Enum.GetValues(typeof(Token)))
  81. {
  82. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerCharacterAttribute>(token))
  83. {
  84. TokenCharDictionary[attribute.Character] = token;
  85. }
  86. }
  87. }
  88. }
  89. }
  90. private static Regex PowRegex = new Regex(@"(\d+)p(\d+)");
  91. private static bool IsWhitespace(char c, bool singleLineMode)
  92. {
  93. return char.IsWhiteSpace(c) && (c != '\n' || singleLineMode);
  94. }
  95. private static bool IsEndOfLine(char c)
  96. {
  97. return c == '\n' || c == '\r' || c == '\0';
  98. }
  99. private Token DetermineToken(bool peek, bool useCurrent)
  100. {
  101. char c = useCurrent ? CurrentChar : GetNextChar(peek);
  102. if (TokenCharDictionary.TryGetValue(c, out Token charToken))
  103. return charToken;
  104. switch (c)
  105. {
  106. case ';': //semicolon is comment
  107. while (CurrentChar != '\n')
  108. {
  109. if (CurrentChar == '\0')
  110. return Token.EOF;
  111. GetNextChar();
  112. }
  113. return Token.NewLine;
  114. case '[':
  115. const string SkipStart = "[SKIPSTART]";
  116. const string SkipEnd = "[SKIPEND]";
  117. if (sourceMarker.Column > 1
  118. || source.Substring(sourceMarker.Pointer, SkipStart.Length) != SkipStart)
  119. return Token.Unknown;
  120. while (GetNextChar() != '\0')
  121. {
  122. if (CurrentChar == '[' && source.Substring(sourceMarker.Pointer, SkipEnd.Length) == SkipEnd)
  123. {
  124. while (true)
  125. {
  126. switch (GetNextChar())
  127. {
  128. case '\n':
  129. return Token.NewLine;
  130. case '\0':
  131. return Token.EOF;
  132. }
  133. }
  134. }
  135. }
  136. return Token.EOF;
  137. case '{':
  138. if (sourceMarker.Pointer == 0 || source[sourceMarker.Pointer - 1] == '\n')
  139. {
  140. SingleLineMode = true;
  141. if (IsPeeking)
  142. GetNextChar();
  143. while (CurrentChar != '\n')
  144. {
  145. if (CurrentChar == '\0')
  146. return Token.EOF;
  147. GetNextChar();
  148. }
  149. return Token.NewLine;
  150. }
  151. return Token.OpenBracket;
  152. case '}':
  153. if (sourceMarker.Pointer == 0 || source[sourceMarker.Pointer - 1] == '\n')
  154. {
  155. SingleLineMode = false;
  156. if (IsPeeking)
  157. GetNextChar();
  158. while (CurrentChar != '\n')
  159. {
  160. if (CurrentChar == '\0')
  161. return Token.EOF;
  162. GetNextChar();
  163. }
  164. return Token.NewLine;
  165. }
  166. return Token.CloseBracket;
  167. case '<':
  168. if (GetNextChar(true) == '>')
  169. {
  170. GetNextChar();
  171. return Token.NotEqual;
  172. }
  173. else if (GetNextChar(true) == '=')
  174. {
  175. GetNextChar();
  176. return Token.LessEqual;
  177. }
  178. else
  179. return Token.Less;
  180. case '>':
  181. if (GetNextChar(true) == '=')
  182. {
  183. GetNextChar();
  184. return Token.MoreEqual;
  185. }
  186. else
  187. return Token.More;
  188. case '+':
  189. if (peek)
  190. GetNextChar();
  191. if (GetNextChar(true) == '+')
  192. {
  193. GetNextChar();
  194. return Token.Increment;
  195. }
  196. else
  197. return Token.Plus;
  198. case '-':
  199. if (peek)
  200. GetNextChar();
  201. if (GetNextChar(true) == '-')
  202. {
  203. GetNextChar();
  204. return Token.Decrement;
  205. }
  206. else
  207. return Token.Minus;
  208. case '=':
  209. char adv = !peek && !IsPeeking
  210. ? GetNextChar(true)
  211. : source[sourceMarker.Pointer + 2];
  212. if (adv == '=')
  213. GetNextChar();
  214. return Token.Equal;
  215. case '&':
  216. if (peek)
  217. GetNextChar();
  218. if (GetNextChar(true) == '&')
  219. GetNextChar();
  220. return Token.And;
  221. case '\\':
  222. if (peek)
  223. GetNextChar();
  224. if (GetNextChar(true) == '@')
  225. {
  226. GetNextChar();
  227. return Token.TernaryEscape;
  228. }
  229. return Token.Unknown;
  230. case '|':
  231. if (peek)
  232. GetNextChar();
  233. if (GetNextChar(true) == '|')
  234. GetNextChar();
  235. return Token.Or;
  236. case (char)0:
  237. return Token.EOF;
  238. }
  239. return Token.Unknown;
  240. }
  241. private IEnumerator<Token> GetTokens()
  242. {
  243. sourceMarker = new Marker(-1, 1, 0);
  244. while (true)
  245. {
  246. while (IsWhitespace(GetNextChar(), SingleLineMode))
  247. {
  248. }
  249. TokenMarker = sourceMarker;
  250. Token token = DetermineToken(false, true);
  251. if (token == Token.EOF)
  252. {
  253. yield return Token.EOF;
  254. yield break;
  255. }
  256. if (token != Token.Unknown)
  257. {
  258. yield return token;
  259. continue;
  260. }
  261. StringBuilder bodyBuilder = new StringBuilder(CurrentChar.ToString());
  262. while (DetermineToken(true, false) == Token.Unknown
  263. && !IsWhitespace(GetNextChar(true), SingleLineMode))
  264. {
  265. bodyBuilder.Append(GetNextChar());
  266. }
  267. string result = bodyBuilder.ToString();
  268. if (double.TryParse(result, NumberStyles.Float, CultureInfo.InvariantCulture, out var real))
  269. {
  270. Value = real;
  271. yield return Token.Value;
  272. if (CurrentChar == '\n')
  273. yield return Token.NewLine;
  274. continue;
  275. }
  276. if (result.StartsWith("0x") && int.TryParse(result.Replace("0x", ""), NumberStyles.HexNumber, CultureInfo.CurrentCulture, out int hexResult))
  277. {
  278. Value = hexResult;
  279. yield return Token.Value;
  280. if (CurrentChar == '\n')
  281. yield return Token.NewLine;
  282. continue;
  283. }
  284. Match powMatch = PowRegex.Match(result);
  285. if (powMatch.Success)
  286. {
  287. int a = int.Parse(powMatch.Groups[1].Value);
  288. int b = int.Parse(powMatch.Groups[2].Value);
  289. Value = a << b;
  290. yield return Token.Value;
  291. continue;
  292. }
  293. Identifier = bodyBuilder.ToString();
  294. if (TokenDictionary.TryGetValue(Identifier, out token))
  295. {
  296. yield return token;
  297. continue;
  298. }
  299. yield return Token.Identifer;
  300. if (CurrentChar == '\n')
  301. yield return Token.NewLine;
  302. }
  303. }
  304. public IEnumerator<Token> GetEnumerator()
  305. {
  306. return currentEnumerator;
  307. }
  308. IEnumerator IEnumerable.GetEnumerator()
  309. {
  310. return GetEnumerator();
  311. }
  312. }
  313. }