Lexer.cs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. using System;
  2. using System.Collections;
  3. using System.Collections.Generic;
  4. using System.Globalization;
  5. using System.Linq;
  6. using System.Text;
  7. namespace NTERA.Interpreter
  8. {
  9. public class Lexer : IEnumerable<Token>
  10. {
  11. private readonly string source;
  12. private Marker sourceMarker;
  13. private char currentChar;
  14. private IEnumerator<Token> currentEnumerator;
  15. private LexerType _type;
  16. public LexerType Type
  17. {
  18. get => _type;
  19. internal set
  20. {
  21. _type = value;
  22. InitTokenDictionaries();
  23. }
  24. }
  25. public Marker TokenMarker { get; set; }
  26. public string Identifer { get; set; }
  27. public Value Value { get; set; }
  28. public Lexer(string input, LexerType type = LexerType.Both)
  29. {
  30. Type = type;
  31. source = input;
  32. sourceMarker = new Marker(-1, 1, 0);
  33. currentEnumerator = GetTokens();
  34. currentEnumerator.MoveNext();
  35. }
  36. public void GoTo(Marker marker)
  37. {
  38. sourceMarker = marker;
  39. }
  40. char GetNextChar(bool peek = false)
  41. {
  42. if (sourceMarker.Pointer + 1 >= source.Length)
  43. {
  44. sourceMarker.Pointer = source.Length;
  45. return currentChar = (char)0;
  46. }
  47. if (peek)
  48. return currentChar = source[sourceMarker.Pointer + 1];
  49. sourceMarker.Column++;
  50. sourceMarker.Pointer++;
  51. if ((currentChar = source[sourceMarker.Pointer]) == '\n')
  52. {
  53. sourceMarker.Column = 1;
  54. sourceMarker.Line++;
  55. }
  56. return currentChar;
  57. }
  58. private Dictionary<string, Token> TokenDictionary;
  59. private Dictionary<string, Token> TokenLineDictionary;
  60. private Dictionary<char, Token> TokenCharDictionary;
  61. private void InitTokenDictionaries()
  62. {
  63. TokenDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
  64. TokenLineDictionary = new Dictionary<string, Token>(StringComparer.InvariantCultureIgnoreCase);
  65. TokenCharDictionary = new Dictionary<char, Token>();
  66. foreach (Token token in Enum.GetValues(typeof(Token)))
  67. {
  68. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerKeywordAttribute>(token))
  69. {
  70. if (attribute.IsLineKeyword)
  71. TokenLineDictionary[attribute.Keyword] = token;
  72. else
  73. TokenDictionary[attribute.Keyword] = token;
  74. }
  75. foreach (var attribute in Utility.GetEnumAttributes<Token, LexerCharacterAttribute>(token))
  76. {
  77. if ((attribute.LexerContext & Type) > 0)
  78. TokenCharDictionary[attribute.Character] = token;
  79. }
  80. }
  81. }
  82. private static bool IsWhitespace(char c)
  83. {
  84. return char.IsWhiteSpace(c) && c != '\n';
  85. }
  86. private static bool IsEndOfLine(char c)
  87. {
  88. return c == '\n' || c == '\r' || c == '\0';
  89. }
  90. private static bool IsEscape(char c)
  91. {
  92. return c == '%' || c == '{';
  93. }
  94. private Token DetermineToken(char c)
  95. {
  96. if (TokenCharDictionary.TryGetValue(currentChar, out Token charToken))
  97. return charToken;
  98. switch (currentChar)
  99. {
  100. case ';': //semicolon is comment
  101. while (currentChar != '\n')
  102. GetNextChar();
  103. return Token.NewLine;
  104. case '<':
  105. if (!Type.HasFlag(LexerType.Real))
  106. break;
  107. if (GetNextChar(true) == '>')
  108. {
  109. GetNextChar();
  110. return Token.NotEqual;
  111. }
  112. else if (GetNextChar(true) == '=')
  113. {
  114. GetNextChar();
  115. return Token.LessEqual;
  116. }
  117. else
  118. return Token.Less;
  119. case '>':
  120. if (!Type.HasFlag(LexerType.Real))
  121. break;
  122. if (GetNextChar(true) == '=')
  123. {
  124. GetNextChar();
  125. return Token.MoreEqual;
  126. }
  127. else
  128. return Token.More;
  129. case '+':
  130. if (GetNextChar(true) == '=')
  131. {
  132. GetNextChar();
  133. return Token.Append;
  134. }
  135. else
  136. return Token.Plus;
  137. case '%':
  138. StringBuilder builder = new StringBuilder();
  139. while (GetNextChar() != '%')
  140. builder.Append(currentChar);
  141. Value = $"%{builder}%";
  142. return Token.Value;
  143. case '"':
  144. string str = "";
  145. while (GetNextChar() != '"')
  146. {
  147. if (currentChar == '\\')
  148. {
  149. switch (char.ToLower(GetNextChar()))
  150. {
  151. case 'n': str += '\n'; break;
  152. case 't': str += '\t'; break;
  153. case '\\': str += '\\'; break;
  154. case '"': str += '"'; break;
  155. }
  156. }
  157. else
  158. {
  159. str += currentChar;
  160. }
  161. }
  162. Value = new Value(str);
  163. return Token.Value;
  164. case (char)0:
  165. return Token.EOF;
  166. }
  167. return Token.Unknown;
  168. }
  169. private IEnumerator<Token> GetTokens()
  170. {
  171. sourceMarker = new Marker(-1, 1, 0);
  172. while (true)
  173. {
  174. while (IsWhitespace(GetNextChar()) && Type != LexerType.String || currentChar == '\r') { }
  175. TokenMarker = sourceMarker;
  176. Token token = DetermineToken(currentChar);
  177. if (token == Token.EOF)
  178. {
  179. yield return Token.EOF;
  180. yield break;
  181. }
  182. if (token != Token.Unknown)
  183. {
  184. yield return token;
  185. continue;
  186. }
  187. StringBuilder bodyBuilder = new StringBuilder(currentChar.ToString());
  188. while ((!IsEscape(GetNextChar(true)) || Type != LexerType.String)
  189. && DetermineToken(GetNextChar(true)) == Token.Unknown
  190. && (!IsWhitespace(GetNextChar(true)) || Type == LexerType.String)
  191. && GetNextChar(true) != '\r')
  192. {
  193. bodyBuilder.Append(GetNextChar());
  194. }
  195. string result = bodyBuilder.ToString();
  196. if (double.TryParse(result, NumberStyles.Float, CultureInfo.InvariantCulture, out var real))
  197. {
  198. Value = real;
  199. yield return Token.Value;
  200. continue;
  201. }
  202. if (result.StartsWith("0x") && int.TryParse(result.Replace("0x", ""), NumberStyles.HexNumber, CultureInfo.CurrentCulture, out int hexResult))
  203. {
  204. Value = hexResult;
  205. yield return Token.Value;
  206. continue;
  207. }
  208. Identifer = bodyBuilder.ToString();
  209. if (TokenDictionary.TryGetValue(Identifer, out token))
  210. {
  211. yield return token;
  212. continue;
  213. }
  214. if (Type == LexerType.String && char.IsWhiteSpace(Identifer[0]))
  215. Identifer = Identifer.Substring(1);
  216. if (TokenLineDictionary.TryGetValue(Identifer, out token))
  217. {
  218. bodyBuilder = new StringBuilder();
  219. while (!IsEndOfLine(GetNextChar(true)))
  220. bodyBuilder.Append(GetNextChar());
  221. yield return token;
  222. string strValue = bodyBuilder.ToString();
  223. if (strValue.Length > 0 && char.IsWhiteSpace(strValue[0]))
  224. strValue = strValue.Substring(1);
  225. Value = new Value(strValue);
  226. yield return Token.Value;
  227. yield return currentChar == '\0' ? Token.EOF : Token.NewLine;
  228. continue;
  229. }
  230. yield return Token.Identifer;
  231. }
  232. }
  233. public IEnumerator<Token> GetEnumerator()
  234. {
  235. return currentEnumerator;
  236. }
  237. IEnumerator IEnumerable.GetEnumerator()
  238. {
  239. return GetEnumerator();
  240. }
  241. private static readonly Dictionary<Token, int> OrderOfOps = new Dictionary<Token, int>
  242. {
  243. { Token.Or, 0 }, { Token.And, 0 },
  244. { Token.Equal, 1 }, { Token.NotEqual, 1 },
  245. { Token.Less, 1 }, { Token.More, 1 }, { Token.LessEqual, 1 }, { Token.MoreEqual, 1 },
  246. { Token.Plus, 2 }, { Token.Minus, 2 },
  247. { Token.Asterisk, 3 }, {Token.Slash, 3 },
  248. { Token.Caret, 4 }
  249. };
  250. public Value Expression(Interpreter context = null)
  251. {
  252. Stack<Value> stack = new Stack<Value>();
  253. Stack<Token> operators = new Stack<Token>();
  254. void Operation(Token token)
  255. {
  256. Value b = stack.Pop();
  257. Value a = stack.Pop();
  258. Value result = a.Operate(b, token);
  259. stack.Push(result);
  260. }
  261. int i = 0;
  262. while (true)
  263. {
  264. if (currentEnumerator.Current == Token.Value)
  265. {
  266. stack.Push(Value);
  267. }
  268. else if (currentEnumerator.Current == Token.Identifer)
  269. {
  270. if (context != null)
  271. {
  272. if (context.Variables.ContainsKey(Identifer))
  273. {
  274. string varName = Identifer;
  275. int index = 0;
  276. currentEnumerator.MoveNext();
  277. if (currentEnumerator.Current == Token.Colon)
  278. {
  279. currentEnumerator.MoveNext();
  280. index = (int)Expression(context).Real;
  281. }
  282. stack.Push(context.Variables[varName, index]);
  283. i++;
  284. continue;
  285. }
  286. if (context.FunctionDictionary.ContainsKey(Identifer))
  287. {
  288. string name = Identifer;
  289. List<Value> args = new List<Value>();
  290. currentEnumerator.MoveNext();
  291. if (currentEnumerator.Current != Token.LParen)
  292. throw new ParserException($"Was expecting [LParen] got [{currentEnumerator.Current}]", TokenMarker);
  293. while (currentEnumerator.MoveNext() && currentEnumerator.Current != Token.RParen)
  294. {
  295. args.Add(Expression(context));
  296. if (currentEnumerator.Current != Token.Comma)
  297. break;
  298. }
  299. stack.Push(context.FunctionDictionary[name](args));
  300. currentEnumerator.MoveNext();
  301. i++;
  302. continue;
  303. }
  304. }
  305. if (Type == LexerType.String)
  306. stack.Push(Identifer);
  307. else
  308. throw new ParserException("Undeclared variable " + Identifer, TokenMarker);
  309. }
  310. else if (currentEnumerator.Current == Token.LParen)
  311. {
  312. currentEnumerator.MoveNext();
  313. stack.Push(Expression());
  314. if (currentEnumerator.Current != Token.RParen)
  315. throw new ParserException($"Was expecting [LParen] got [{currentEnumerator.Current}]", TokenMarker);
  316. }
  317. else if (Type.HasFlag(LexerType.Real) && currentEnumerator.Current.IsArithmetic()
  318. && currentEnumerator.Current.IsUnary() && (i == 0)) // || previousToken == Token.LParen))
  319. {
  320. stack.Push(0);
  321. operators.Push(currentEnumerator.Current);
  322. }
  323. else if (Type == LexerType.String && currentEnumerator.Current.IsStringOp()
  324. || Type.HasFlag(LexerType.Real) && currentEnumerator.Current.IsArithmetic())
  325. {
  326. while (operators.Count > 0 && OrderOfOps[currentEnumerator.Current] <= OrderOfOps[operators.Peek()])
  327. Operation(operators.Pop());
  328. operators.Push(currentEnumerator.Current);
  329. }
  330. else
  331. {
  332. if (i == 0)
  333. {
  334. if (Type == LexerType.String)
  335. stack.Push("");
  336. else
  337. throw new ParserException("Empty expression", TokenMarker);
  338. }
  339. break;
  340. }
  341. i++;
  342. currentEnumerator.MoveNext();
  343. }
  344. while (operators.Count > 0)
  345. Operation(operators.Pop());
  346. return Type == LexerType.String
  347. ? stack.Aggregate((a, b) => b.String + a.String)
  348. : stack.Pop();
  349. }
  350. }
  351. }