Lexer.cs 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424
  1. using System;
  2. using System.Collections;
  3. using System.Collections.Generic;
  4. using System.Globalization;
  5. using System.Text;
  6. using System.Text.RegularExpressions;
  7. namespace NTERA.Interpreter.Compiler
  8. {
  9. public class Lexer : IEnumerable<Token>
  10. {
  11. private readonly string source;
  12. private Marker sourceMarker;
  13. public char CurrentChar;
  14. private readonly IEnumerator<Token> currentEnumerator;
  15. public Marker TokenMarker { get; set; }
  16. public string Identifier { get; set; }
  17. public Value Value { get; set; }
  18. static Lexer()
  19. {
  20. InitTokenDictionaries();
  21. }
  22. public Lexer(string input)
  23. {
  24. source = input;
  25. sourceMarker = new Marker(-1, 1, 0);
  26. currentEnumerator = GetTokens();
  27. currentEnumerator.MoveNext();
  28. }
  29. public void GoTo(Marker marker)
  30. {
  31. sourceMarker = marker;
  32. }
  33. public bool IsPeeking { get; protected set; }
  34. public char GetNextChar(bool peek = false)
  35. {
  36. IsPeeking = peek;
  37. if (sourceMarker.Pointer + 1 >= source.Length)
  38. {
  39. sourceMarker.Pointer = source.Length;
  40. return CurrentChar = (char)0;
  41. }
  42. if (peek)
  43. return CurrentChar = source[sourceMarker.Pointer + 1];
  44. sourceMarker.Column++;
  45. sourceMarker.Pointer++;
  46. if ((CurrentChar = source[sourceMarker.Pointer]) == '\n')
  47. {
  48. sourceMarker.Column = 0;
  49. sourceMarker.Line++;
  50. }
  51. return CurrentChar;
  52. }
  53. protected static Dictionary<string, Token> TokenDictionary;
  54. protected static Dictionary<char, Token> TokenCharDictionary;
  55. private static bool _initialized = false;
  56. private static readonly object _initializedLock = new object();
  57. private static void InitTokenDictionaries()
  58. {
  59. if (_initialized)
  60. return;
  61. lock (_initializedLock)
  62. {
  63. if (_initialized)
  64. return;
  65. if (TokenDictionary == null)
  66. {
  67. TokenDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
  68. foreach (Token token in Enum.GetValues(typeof(Token)))
  69. {
  70. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerKeywordAttribute>(token))
  71. {
  72. TokenDictionary[attribute.Keyword] = token;
  73. }
  74. }
  75. }
  76. if (TokenCharDictionary == null)
  77. {
  78. TokenCharDictionary = new Dictionary<char, Token>();
  79. foreach (Token token in Enum.GetValues(typeof(Token)))
  80. {
  81. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerCharacterAttribute>(token))
  82. {
  83. TokenCharDictionary[attribute.Character] = token;
  84. }
  85. }
  86. }
  87. }
  88. }
  89. private static Regex PowRegex = new Regex(@"(\d+)p(\d+)");
  90. private static bool IsWhitespace(char c)
  91. {
  92. return char.IsWhiteSpace(c) && c != '\n';
  93. }
  94. private static bool IsEndOfLine(char c)
  95. {
  96. return c == '\n' || c == '\r' || c == '\0';
  97. }
  98. private Token DetermineToken(bool peek, bool useCurrent)
  99. {
  100. char c = useCurrent ? CurrentChar : GetNextChar(peek);
  101. if (TokenCharDictionary.TryGetValue(c, out Token charToken))
  102. return charToken;
  103. switch (c)
  104. {
  105. case ';': //semicolon is comment
  106. while (CurrentChar != '\n')
  107. {
  108. if (CurrentChar == '\0')
  109. return Token.EOF;
  110. GetNextChar();
  111. }
  112. return Token.NewLine;
  113. case '[':
  114. const string SkipStart = "[SKIPSTART]";
  115. const string SkipEnd = "[SKIPEND]";
  116. if (sourceMarker.Column > 1
  117. || source.Substring(sourceMarker.Pointer, SkipStart.Length) != SkipStart)
  118. return Token.Unknown;
  119. while (GetNextChar() != '\0')
  120. {
  121. if (CurrentChar == '[' && source.Substring(sourceMarker.Pointer, SkipEnd.Length) == SkipEnd)
  122. {
  123. while (true)
  124. {
  125. switch (GetNextChar())
  126. {
  127. case '\n':
  128. return Token.NewLine;
  129. case '\0':
  130. return Token.EOF;
  131. }
  132. }
  133. }
  134. }
  135. return Token.EOF;
  136. case '<':
  137. if (GetNextChar(true) == '>')
  138. {
  139. GetNextChar();
  140. return Token.NotEqual;
  141. }
  142. else if (GetNextChar(true) == '=')
  143. {
  144. GetNextChar();
  145. return Token.LessEqual;
  146. }
  147. else
  148. return Token.Less;
  149. case '>':
  150. if (GetNextChar(true) == '=')
  151. {
  152. GetNextChar();
  153. return Token.MoreEqual;
  154. }
  155. else
  156. return Token.More;
  157. case '+':
  158. if (peek)
  159. GetNextChar();
  160. if (GetNextChar(true) == '+')
  161. {
  162. GetNextChar();
  163. return Token.Increment;
  164. }
  165. else
  166. return Token.Plus;
  167. case '-':
  168. if (peek)
  169. GetNextChar();
  170. if (GetNextChar(true) == '-')
  171. {
  172. GetNextChar();
  173. return Token.Decrement;
  174. }
  175. else
  176. return Token.Minus;
  177. case '=':
  178. if (peek)
  179. GetNextChar();
  180. if (GetNextChar(true) == '=')
  181. GetNextChar();
  182. return Token.Equal;
  183. case '&':
  184. if (peek)
  185. GetNextChar();
  186. if (GetNextChar(true) == '&')
  187. GetNextChar();
  188. return Token.And;
  189. case '\\':
  190. if (peek)
  191. GetNextChar();
  192. if (GetNextChar(true) == '@')
  193. {
  194. GetNextChar();
  195. return Token.TernaryEscape;
  196. }
  197. return Token.Unknown;
  198. case '|':
  199. if (peek)
  200. GetNextChar();
  201. if (GetNextChar(true) == '|')
  202. GetNextChar();
  203. return Token.Or;
  204. case (char)0:
  205. return Token.EOF;
  206. }
  207. return Token.Unknown;
  208. }
  209. private IEnumerator<Token> GetTokens()
  210. {
  211. sourceMarker = new Marker(-1, 1, 0);
  212. while (true)
  213. {
  214. while (IsWhitespace(GetNextChar()))
  215. {
  216. }
  217. TokenMarker = sourceMarker;
  218. Token token = DetermineToken(false, true);
  219. if (token == Token.EOF)
  220. {
  221. yield return Token.EOF;
  222. yield break;
  223. }
  224. if (token != Token.Unknown)
  225. {
  226. yield return token;
  227. continue;
  228. }
  229. StringBuilder bodyBuilder = new StringBuilder(CurrentChar.ToString());
  230. while (DetermineToken(true, false) == Token.Unknown
  231. && !IsWhitespace(GetNextChar(true)))
  232. {
  233. bodyBuilder.Append(GetNextChar());
  234. }
  235. string result = bodyBuilder.ToString();
  236. if (double.TryParse(result, NumberStyles.Float, CultureInfo.InvariantCulture, out var real))
  237. {
  238. Value = real;
  239. yield return Token.Value;
  240. continue;
  241. }
  242. if (result.StartsWith("0x") && int.TryParse(result.Replace("0x", ""), NumberStyles.HexNumber, CultureInfo.CurrentCulture, out int hexResult))
  243. {
  244. Value = hexResult;
  245. yield return Token.Value;
  246. continue;
  247. }
  248. Match powMatch = PowRegex.Match(result);
  249. if (powMatch.Success)
  250. {
  251. int a = int.Parse(powMatch.Groups[1].Value);
  252. int b = int.Parse(powMatch.Groups[2].Value);
  253. Value = a << b;
  254. yield return Token.Value;
  255. continue;
  256. }
  257. Identifier = bodyBuilder.ToString();
  258. if (TokenDictionary.TryGetValue(Identifier, out token))
  259. {
  260. yield return token;
  261. continue;
  262. }
  263. yield return Token.Identifer;
  264. if (CurrentChar == '\n')
  265. yield return Token.NewLine;
  266. }
  267. }
  268. public IEnumerator<Token> GetEnumerator()
  269. {
  270. return currentEnumerator;
  271. }
  272. IEnumerator IEnumerable.GetEnumerator()
  273. {
  274. return GetEnumerator();
  275. }
  276. private static readonly Dictionary<Token, int> OrderOfOps = new Dictionary<Token, int>
  277. {
  278. { Token.Or, 0 }, { Token.And, 0 },
  279. { Token.Equal, 1 }, { Token.NotEqual, 1 },
  280. { Token.Less, 1 }, { Token.More, 1 }, { Token.LessEqual, 1 }, { Token.MoreEqual, 1 },
  281. { Token.Plus, 2 }, { Token.Minus, 2 },
  282. { Token.Asterisk, 3 }, { Token.Slash, 3 },
  283. { Token.Caret, 4 }
  284. };
  285. public Value Expression()
  286. {
  287. Stack<Value> stack = new Stack<Value>();
  288. Stack<Token> operators = new Stack<Token>();
  289. void Operation(Token token)
  290. {
  291. Value b = stack.Pop();
  292. Value a = stack.Pop();
  293. Value result = a.Operate(b, token);
  294. stack.Push(result);
  295. }
  296. int i = 0;
  297. while (true)
  298. {
  299. if (currentEnumerator.Current == Token.Value)
  300. {
  301. stack.Push(Value);
  302. }
  303. else if (currentEnumerator.Current == Token.Identifer)
  304. {
  305. throw new ParserException("Undeclared variable " + Identifier, TokenMarker);
  306. }
  307. else if (currentEnumerator.Current == Token.LParen)
  308. {
  309. currentEnumerator.MoveNext();
  310. stack.Push(Expression());
  311. if (currentEnumerator.Current != Token.RParen)
  312. throw new ParserException($"Was expecting [LParen] got [{currentEnumerator.Current}]", TokenMarker);
  313. }
  314. else if (currentEnumerator.Current.IsArithmetic() && currentEnumerator.Current.IsUnary() && i == 0)
  315. {
  316. stack.Push(0);
  317. operators.Push(currentEnumerator.Current);
  318. }
  319. else if (currentEnumerator.Current.IsArithmetic())
  320. {
  321. while (operators.Count > 0 && OrderOfOps[currentEnumerator.Current] <= OrderOfOps[operators.Peek()])
  322. Operation(operators.Pop());
  323. operators.Push(currentEnumerator.Current);
  324. }
  325. else
  326. {
  327. if (i == 0)
  328. throw new ParserException("Empty expression", TokenMarker);
  329. break;
  330. }
  331. i++;
  332. currentEnumerator.MoveNext();
  333. }
  334. while (operators.Count > 0)
  335. Operation(operators.Pop());
  336. return stack.Pop();
  337. }
  338. }
  339. }