Lexer.cs 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. using System;
  2. using System.Collections;
  3. using System.Collections.Generic;
  4. using System.Globalization;
  5. using System.Text;
  6. using System.Text.RegularExpressions;
  7. namespace NTERA.Engine.Compiler
  8. {
  9. public class Lexer : IEnumerable<Token>
  10. {
  11. private readonly string source;
  12. private Marker sourceMarker;
  13. public char CurrentChar;
  14. private readonly IEnumerator<Token> currentEnumerator;
  15. public Marker TokenMarker { get; set; }
  16. public string Identifier { get; set; }
  17. public Value Value { get; set; }
  18. static Lexer()
  19. {
  20. InitTokenDictionaries();
  21. }
  22. public Lexer(string input)
  23. {
  24. source = input;
  25. sourceMarker = new Marker(-1, 1, 0);
  26. currentEnumerator = GetTokens();
  27. currentEnumerator.MoveNext();
  28. }
  29. public void GoTo(Marker marker)
  30. {
  31. sourceMarker = marker;
  32. }
  33. public bool IsPeeking { get; protected set; }
  34. public bool SingleLineMode { get; protected set; }
  35. public char GetNextChar(bool peek = false)
  36. {
  37. IsPeeking = peek;
  38. if (sourceMarker.Pointer + 1 >= source.Length)
  39. {
  40. sourceMarker.Pointer = source.Length;
  41. return CurrentChar = (char)0;
  42. }
  43. if (peek)
  44. return CurrentChar = source[sourceMarker.Pointer + 1];
  45. sourceMarker.Column++;
  46. sourceMarker.Pointer++;
  47. if ((CurrentChar = source[sourceMarker.Pointer]) == '\n')
  48. {
  49. sourceMarker.Column = 0;
  50. sourceMarker.Line++;
  51. }
  52. return CurrentChar;
  53. }
  54. protected static Dictionary<string, Token> TokenDictionary;
  55. protected static Dictionary<char, Token> TokenCharDictionary;
  56. private static bool _initialized = false;
  57. private static readonly object _initializedLock = new object();
  58. private static void InitTokenDictionaries()
  59. {
  60. if (_initialized)
  61. return;
  62. lock (_initializedLock)
  63. {
  64. if (_initialized)
  65. return;
  66. if (TokenDictionary == null)
  67. {
  68. TokenDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
  69. foreach (Token token in Enum.GetValues(typeof(Token)))
  70. {
  71. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerKeywordAttribute>(token))
  72. {
  73. TokenDictionary[attribute.Keyword] = token;
  74. }
  75. }
  76. }
  77. if (TokenCharDictionary == null)
  78. {
  79. TokenCharDictionary = new Dictionary<char, Token>();
  80. foreach (Token token in Enum.GetValues(typeof(Token)))
  81. {
  82. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerCharacterAttribute>(token))
  83. {
  84. TokenCharDictionary[attribute.Character] = token;
  85. }
  86. }
  87. }
  88. }
  89. }
  90. private static Regex PowRegex = new Regex(@"(\d+)p(\d+)");
  91. private static bool IsWhitespace(char c, bool singleLineMode)
  92. {
  93. return char.IsWhiteSpace(c) && (c != '\n' || singleLineMode);
  94. }
  95. private static bool IsEndOfLine(char c)
  96. {
  97. return c == '\n' || c == '\r' || c == '\0';
  98. }
  99. private Token DetermineToken(bool peek, bool useCurrent)
  100. {
  101. char c = useCurrent ? CurrentChar : GetNextChar(peek);
  102. char adv;
  103. if (TokenCharDictionary.TryGetValue(c, out Token charToken))
  104. return charToken;
  105. switch (c)
  106. {
  107. case ';': //semicolon is comment
  108. while (CurrentChar != '\n')
  109. {
  110. if (CurrentChar == '\0')
  111. return Token.EOF;
  112. GetNextChar();
  113. }
  114. return Token.NewLine;
  115. case '[':
  116. const string SkipStart = "[SKIPSTART]";
  117. const string SkipEnd = "[SKIPEND]";
  118. if (sourceMarker.Column > 1
  119. || source.Substring(sourceMarker.Pointer, SkipStart.Length) != SkipStart)
  120. return Token.Unknown;
  121. while (GetNextChar() != '\0')
  122. {
  123. if (CurrentChar == '[' && source.Substring(sourceMarker.Pointer, SkipEnd.Length) == SkipEnd)
  124. {
  125. while (true)
  126. {
  127. switch (GetNextChar())
  128. {
  129. case '\n':
  130. return Token.NewLine;
  131. case '\0':
  132. return Token.EOF;
  133. }
  134. }
  135. }
  136. }
  137. return Token.EOF;
  138. case '{':
  139. if (sourceMarker.Pointer == 0 || source[sourceMarker.Pointer - 1] == '\n')
  140. {
  141. SingleLineMode = true;
  142. if (IsPeeking)
  143. GetNextChar();
  144. while (CurrentChar != '\n')
  145. {
  146. if (CurrentChar == '\0')
  147. return Token.EOF;
  148. GetNextChar();
  149. }
  150. return Token.NewLine;
  151. }
  152. return Token.OpenBracket;
  153. case '}':
  154. if (sourceMarker.Pointer == 0 || source[sourceMarker.Pointer - 1] == '\n')
  155. {
  156. SingleLineMode = false;
  157. if (IsPeeking)
  158. GetNextChar();
  159. while (CurrentChar != '\n')
  160. {
  161. if (CurrentChar == '\0')
  162. return Token.EOF;
  163. GetNextChar();
  164. }
  165. return Token.NewLine;
  166. }
  167. return Token.CloseBracket;
  168. case '<':
  169. if (GetNextChar(true) == '>')
  170. {
  171. GetNextChar();
  172. return Token.NotEqual;
  173. }
  174. else if (GetNextChar(true) == '=')
  175. {
  176. GetNextChar();
  177. return Token.LessEqual;
  178. }
  179. else
  180. return Token.Less;
  181. case '>':
  182. if (GetNextChar(true) == '=')
  183. {
  184. GetNextChar();
  185. return Token.MoreEqual;
  186. }
  187. else
  188. return Token.More;
  189. case '+':
  190. adv = !peek && !IsPeeking
  191. ? GetNextChar(true)
  192. : source[sourceMarker.Pointer + 2];
  193. if (adv == '=')
  194. {
  195. GetNextChar();
  196. return Token.Append;
  197. }
  198. else if (adv == '+')
  199. {
  200. GetNextChar();
  201. return Token.Increment;
  202. }
  203. return Token.Plus;
  204. case '-':
  205. if (peek)
  206. GetNextChar();
  207. if (GetNextChar(true) == '-')
  208. {
  209. GetNextChar();
  210. return Token.Decrement;
  211. }
  212. else
  213. return Token.Minus;
  214. case '=':
  215. adv = !peek && !IsPeeking
  216. ? GetNextChar(true)
  217. : source[sourceMarker.Pointer + 2];
  218. if (adv == '=')
  219. GetNextChar();
  220. return Token.Equal;
  221. case '&':
  222. if (peek)
  223. GetNextChar();
  224. if (GetNextChar(true) == '&')
  225. GetNextChar();
  226. return Token.And;
  227. case '\\':
  228. if (peek)
  229. GetNextChar();
  230. if (GetNextChar(true) == '@')
  231. {
  232. GetNextChar();
  233. return Token.TernaryEscape;
  234. }
  235. return Token.Unknown;
  236. case '|':
  237. if (peek)
  238. GetNextChar();
  239. if (GetNextChar(true) == '|')
  240. GetNextChar();
  241. return Token.Or;
  242. case (char)0:
  243. return Token.EOF;
  244. }
  245. return Token.Unknown;
  246. }
  247. private IEnumerator<Token> GetTokens()
  248. {
  249. sourceMarker = new Marker(-1, 1, 0);
  250. while (true)
  251. {
  252. while (IsWhitespace(GetNextChar(), SingleLineMode))
  253. {
  254. }
  255. TokenMarker = sourceMarker;
  256. Token token = DetermineToken(false, true);
  257. if (token == Token.EOF)
  258. {
  259. yield return Token.EOF;
  260. yield break;
  261. }
  262. if (token != Token.Unknown)
  263. {
  264. yield return token;
  265. continue;
  266. }
  267. StringBuilder bodyBuilder = new StringBuilder(CurrentChar.ToString());
  268. while (DetermineToken(true, false) == Token.Unknown
  269. && !IsWhitespace(GetNextChar(true), SingleLineMode))
  270. {
  271. bodyBuilder.Append(GetNextChar());
  272. }
  273. string result = bodyBuilder.ToString();
  274. if (double.TryParse(result, NumberStyles.Float, CultureInfo.InvariantCulture, out var real))
  275. {
  276. Value = real;
  277. yield return Token.Value;
  278. if (CurrentChar == '\n')
  279. yield return Token.NewLine;
  280. continue;
  281. }
  282. if (result.StartsWith("0x") && int.TryParse(result.Replace("0x", ""), NumberStyles.HexNumber, CultureInfo.CurrentCulture, out int hexResult))
  283. {
  284. Value = hexResult;
  285. yield return Token.Value;
  286. if (CurrentChar == '\n')
  287. yield return Token.NewLine;
  288. continue;
  289. }
  290. Match powMatch = PowRegex.Match(result);
  291. if (powMatch.Success)
  292. {
  293. int a = int.Parse(powMatch.Groups[1].Value);
  294. int b = int.Parse(powMatch.Groups[2].Value);
  295. Value = a << b;
  296. yield return Token.Value;
  297. continue;
  298. }
  299. Identifier = bodyBuilder.ToString();
  300. if (TokenDictionary.TryGetValue(Identifier, out token))
  301. {
  302. yield return token;
  303. continue;
  304. }
  305. yield return Token.Identifer;
  306. if (CurrentChar == '\n')
  307. yield return Token.NewLine;
  308. }
  309. }
  310. public IEnumerator<Token> GetEnumerator()
  311. {
  312. return currentEnumerator;
  313. }
  314. IEnumerator IEnumerable.GetEnumerator()
  315. {
  316. return GetEnumerator();
  317. }
  318. }
  319. }