Lexer.cs 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. using System;
  2. using System.Collections;
  3. using System.Collections.Generic;
  4. using System.Globalization;
  5. using System.Text;
  6. using System.Text.RegularExpressions;
  7. namespace NTERA.Engine.Compiler
  8. {
  9. public class Lexer : IEnumerable<Token>
  10. {
  11. private readonly string source;
  12. private Marker sourceMarker;
  13. public char CurrentChar;
  14. private readonly IEnumerator<Token> currentEnumerator;
  15. public Marker TokenMarker { get; set; }
  16. public string Identifier { get; set; }
  17. public Value Value { get; set; }
  18. static Lexer()
  19. {
  20. InitTokenDictionaries();
  21. }
  22. public Lexer(string input)
  23. {
  24. source = input;
  25. sourceMarker = new Marker(-1, 1, 0);
  26. currentEnumerator = GetTokens();
  27. currentEnumerator.MoveNext();
  28. }
  29. public void GoTo(Marker marker)
  30. {
  31. sourceMarker = marker;
  32. }
  33. public bool IsPeeking { get; protected set; }
  34. public bool SingleLineMode { get; protected set; }
  35. public char GetNextChar(bool peek = false)
  36. {
  37. IsPeeking = peek;
  38. if (sourceMarker.Pointer + 1 >= source.Length)
  39. {
  40. sourceMarker.Pointer = source.Length;
  41. return CurrentChar = (char)0;
  42. }
  43. if (peek)
  44. return CurrentChar = source[sourceMarker.Pointer + 1];
  45. sourceMarker.Column++;
  46. sourceMarker.Pointer++;
  47. if ((CurrentChar = source[sourceMarker.Pointer]) == '\n')
  48. {
  49. sourceMarker.Column = 0;
  50. sourceMarker.Line++;
  51. }
  52. return CurrentChar;
  53. }
  54. protected static Dictionary<string, Token> TokenDictionary;
  55. protected static Dictionary<char, Token> TokenCharDictionary;
  56. private static bool _initialized = false;
  57. private static readonly object _initializedLock = new object();
  58. private static void InitTokenDictionaries()
  59. {
  60. if (_initialized)
  61. return;
  62. lock (_initializedLock)
  63. {
  64. if (_initialized)
  65. return;
  66. if (TokenDictionary == null)
  67. {
  68. TokenDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
  69. foreach (Token token in Enum.GetValues(typeof(Token)))
  70. {
  71. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerKeywordAttribute>(token))
  72. {
  73. TokenDictionary[attribute.Keyword] = token;
  74. }
  75. }
  76. }
  77. if (TokenCharDictionary == null)
  78. {
  79. TokenCharDictionary = new Dictionary<char, Token>();
  80. foreach (Token token in Enum.GetValues(typeof(Token)))
  81. {
  82. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerCharacterAttribute>(token))
  83. {
  84. TokenCharDictionary[attribute.Character] = token;
  85. }
  86. }
  87. }
  88. }
  89. }
  90. private static Regex PowRegex = new Regex(@"(\d+)p(\d+)");
  91. private static bool IsWhitespace(char c, bool singleLineMode)
  92. {
  93. return char.IsWhiteSpace(c) && (c != '\n' || singleLineMode);
  94. }
  95. private static bool IsEndOfLine(char c)
  96. {
  97. return c == '\n' || c == '\r' || c == '\0';
  98. }
  99. private Token DetermineToken(bool peek, bool useCurrent)
  100. {
  101. char c = useCurrent ? CurrentChar : GetNextChar(peek);
  102. char adv;
  103. if (TokenCharDictionary.TryGetValue(c, out Token charToken))
  104. return charToken;
  105. switch (c)
  106. {
  107. case ';': //semicolon is comment
  108. while (CurrentChar != '\n')
  109. {
  110. if (CurrentChar == '\0')
  111. return Token.EOF;
  112. GetNextChar();
  113. }
  114. return Token.NewLine;
  115. case '[':
  116. const string SkipStart = "[SKIPSTART]";
  117. const string SkipEnd = "[SKIPEND]";
  118. if (sourceMarker.Column > 1
  119. || source.Length < sourceMarker.Pointer + SkipStart.Length
  120. || source.Substring(sourceMarker.Pointer, SkipStart.Length) != SkipStart)
  121. return Token.Unknown;
  122. while (GetNextChar() != '\0')
  123. {
  124. if (CurrentChar == '[' && source.Substring(sourceMarker.Pointer, SkipEnd.Length) == SkipEnd)
  125. {
  126. while (true)
  127. {
  128. switch (GetNextChar())
  129. {
  130. case '\n':
  131. return Token.NewLine;
  132. case '\0':
  133. return Token.EOF;
  134. }
  135. }
  136. }
  137. }
  138. return Token.EOF;
  139. case '{':
  140. if (sourceMarker.Pointer == 0 || source[sourceMarker.Pointer - 1] == '\n')
  141. {
  142. SingleLineMode = true;
  143. if (IsPeeking)
  144. GetNextChar();
  145. while (CurrentChar != '\n')
  146. {
  147. if (CurrentChar == '\0')
  148. return Token.EOF;
  149. GetNextChar();
  150. }
  151. return Token.NewLine;
  152. }
  153. return Token.OpenBracket;
  154. case '}':
  155. if (sourceMarker.Pointer == 0 || source[sourceMarker.Pointer - 1] == '\n')
  156. {
  157. SingleLineMode = false;
  158. if (IsPeeking)
  159. GetNextChar();
  160. while (CurrentChar != '\n')
  161. {
  162. if (CurrentChar == '\0')
  163. return Token.EOF;
  164. GetNextChar();
  165. }
  166. return Token.NewLine;
  167. }
  168. return Token.CloseBracket;
  169. case '<':
  170. if (GetNextChar(true) == '>')
  171. {
  172. GetNextChar();
  173. return Token.NotEqual;
  174. }
  175. else if (GetNextChar(true) == '<')
  176. {
  177. GetNextChar();
  178. return Token.ShiftLeft;
  179. }
  180. else if (GetNextChar(true) == '=')
  181. {
  182. GetNextChar();
  183. return Token.LessEqual;
  184. }
  185. else
  186. return Token.Less;
  187. case '>':
  188. if (GetNextChar(true) == '=')
  189. {
  190. GetNextChar();
  191. return Token.MoreEqual;
  192. }
  193. else if (GetNextChar(true) == '>')
  194. {
  195. GetNextChar();
  196. return Token.ShiftRight;
  197. }
  198. else
  199. return Token.More;
  200. case '+':
  201. adv = !peek && !IsPeeking
  202. ? GetNextChar(true)
  203. : source[sourceMarker.Pointer + 2];
  204. if (adv == '=')
  205. {
  206. GetNextChar();
  207. return Token.Append;
  208. }
  209. else if (adv == '+')
  210. {
  211. GetNextChar();
  212. return Token.Increment;
  213. }
  214. return Token.Plus;
  215. case '-':
  216. if (peek)
  217. GetNextChar();
  218. if (GetNextChar(true) == '-')
  219. {
  220. GetNextChar();
  221. return Token.Decrement;
  222. }
  223. else
  224. return Token.Minus;
  225. case '=':
  226. adv = !peek && !IsPeeking
  227. ? GetNextChar(true)
  228. : source[sourceMarker.Pointer + 2];
  229. if (adv == '=')
  230. GetNextChar();
  231. return Token.Equal;
  232. case '&':
  233. if (peek)
  234. GetNextChar();
  235. if (GetNextChar(true) == '&')
  236. GetNextChar();
  237. return Token.And;
  238. case '\\':
  239. if (peek)
  240. GetNextChar();
  241. if (GetNextChar(true) == '@')
  242. {
  243. GetNextChar();
  244. return Token.TernaryEscape;
  245. }
  246. return Token.Unknown;
  247. case '|':
  248. if (peek)
  249. GetNextChar();
  250. if (GetNextChar(true) == '|')
  251. GetNextChar();
  252. return Token.Or;
  253. case (char)0:
  254. return Token.EOF;
  255. }
  256. return Token.Unknown;
  257. }
  258. private IEnumerator<Token> GetTokens()
  259. {
  260. sourceMarker = new Marker(-1, 1, 0);
  261. while (true)
  262. {
  263. while (IsWhitespace(GetNextChar(), SingleLineMode))
  264. {
  265. }
  266. TokenMarker = sourceMarker;
  267. Token token = DetermineToken(false, true);
  268. if (token == Token.EOF)
  269. {
  270. yield return Token.EOF;
  271. yield break;
  272. }
  273. if (token != Token.Unknown)
  274. {
  275. yield return token;
  276. continue;
  277. }
  278. StringBuilder bodyBuilder = new StringBuilder(CurrentChar.ToString());
  279. while (DetermineToken(true, false) == Token.Unknown
  280. && !IsWhitespace(GetNextChar(true), SingleLineMode))
  281. {
  282. bodyBuilder.Append(GetNextChar());
  283. }
  284. string result = bodyBuilder.ToString();
  285. if (double.TryParse(result, NumberStyles.Float, CultureInfo.InvariantCulture, out var real))
  286. {
  287. Value = real;
  288. yield return Token.Value;
  289. if (CurrentChar == '\n')
  290. yield return Token.NewLine;
  291. continue;
  292. }
  293. if (result.StartsWith("0x") && int.TryParse(result.Replace("0x", ""), NumberStyles.HexNumber, CultureInfo.CurrentCulture, out int hexResult))
  294. {
  295. Value = hexResult;
  296. yield return Token.Value;
  297. if (CurrentChar == '\n')
  298. yield return Token.NewLine;
  299. continue;
  300. }
  301. Match powMatch = PowRegex.Match(result);
  302. if (powMatch.Success)
  303. {
  304. int a = int.Parse(powMatch.Groups[1].Value);
  305. int b = int.Parse(powMatch.Groups[2].Value);
  306. Value = a << b;
  307. yield return Token.Value;
  308. continue;
  309. }
  310. Identifier = bodyBuilder.ToString();
  311. if (TokenDictionary.TryGetValue(Identifier, out token))
  312. {
  313. yield return token;
  314. continue;
  315. }
  316. yield return Token.Identifer;
  317. if (CurrentChar == '\n')
  318. yield return Token.NewLine;
  319. }
  320. }
  321. public IEnumerator<Token> GetEnumerator()
  322. {
  323. return currentEnumerator;
  324. }
  325. IEnumerator IEnumerable.GetEnumerator()
  326. {
  327. return GetEnumerator();
  328. }
  329. }
  330. }