Parser.cs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. namespace NTERA.Interpreter.Compiler
  5. {
  6. public class Parser
  7. {
  8. protected Lexer Lexer { get; }
  9. protected FunctionDefinition SelfDefinition { get; }
  10. protected IList<FunctionDefinition> FunctionDefinitions { get; }
  11. protected VariableDictionary GlobalVariables { get; }
  12. protected VariableDictionary LocalVariables { get; }
  13. protected IList<string> StringStatements { get; }
  14. protected IDictionary<string, IList<IList<string>>> CsvDefinitions { get; }
  15. protected IEnumerator<Token> Enumerator { get; }
  16. protected bool hasPeeked = false;
  17. protected Token peekedToken = Token.Unknown;
  18. protected Token GetNextToken(bool peek = false)
  19. {
  20. if (peek && hasPeeked)
  21. return peekedToken;
  22. if (!hasPeeked)
  23. Enumerator.MoveNext();
  24. peekedToken = Enumerator.Current;
  25. hasPeeked = peek;
  26. return Enumerator.Current;
  27. }
  28. protected Marker CurrentPosition => new Marker(Lexer.TokenMarker.Pointer + SelfDefinition.Position.Pointer,
  29. Lexer.TokenMarker.Line + SelfDefinition.Position.Line - 1,
  30. Lexer.TokenMarker.Column);
  31. public Parser(string input, FunctionDefinition selfDefinition, IList<FunctionDefinition> functionDefinitions, VariableDictionary globalVariables, VariableDictionary localVariables, IList<string> stringStatements, IDictionary<string, IList<IList<string>>> csvDefinitions)
  32. {
  33. Lexer = new Lexer(input);
  34. Enumerator = Lexer.GetEnumerator();
  35. SelfDefinition = selfDefinition;
  36. FunctionDefinitions = functionDefinitions;
  37. GlobalVariables = globalVariables;
  38. LocalVariables = localVariables;
  39. StringStatements = stringStatements;
  40. CsvDefinitions = csvDefinitions;
  41. }
  42. public IEnumerable<ExecutionNode> Parse(out List<ParserError> errors)
  43. {
  44. errors = new List<ParserError>();
  45. List<ExecutionNode> nodes = new List<ExecutionNode>();
  46. using (Enumerator)
  47. {
  48. do
  49. {
  50. var node = ParseLine(out var error);
  51. if (error != null)
  52. {
  53. errors.Add(error);
  54. nodes.Add(new ExecutionNode
  55. {
  56. Type = "error",
  57. Metadata =
  58. {
  59. ["message"] = error.ErrorMessage,
  60. ["symbol"] = error.SymbolMarker.ToString()
  61. },
  62. Symbol = error.SymbolMarker
  63. });
  64. //resynchronize to a new line
  65. while (Enumerator.MoveNext()
  66. && Enumerator.Current != Token.NewLine
  67. && Enumerator.Current != Token.EOF)
  68. {
  69. }
  70. }
  71. else if (node != null)
  72. {
  73. nodes.Add(node);
  74. }
  75. hasPeeked = false;
  76. } while (Enumerator.MoveNext());
  77. }
  78. return nodes;
  79. }
  80. protected ExecutionNode ParseLine(out ParserError error)
  81. {
  82. error = null;
  83. switch (Enumerator.Current)
  84. {
  85. case Token.Identifer:
  86. if (GlobalVariables.ContainsKey(Lexer.Identifer)
  87. || LocalVariables.ContainsKey(Lexer.Identifer))
  88. {
  89. string variableName = Lexer.Identifer;
  90. bool isGlobal = GlobalVariables.ContainsKey(variableName);
  91. var node = new ExecutionNode
  92. {
  93. Type = "assignment",
  94. Symbol = CurrentPosition
  95. };
  96. var variable = GetVariable(out error);
  97. if (error != null)
  98. return null;
  99. if (GetNextToken() != Token.Equal
  100. && Enumerator.Current != Token.Increment
  101. && Enumerator.Current != Token.Decrement
  102. && !Enumerator.Current.IsArithmetic())
  103. {
  104. error = new ParserError($"Unexpected token, expecting assignment: {Enumerator.Current}", CurrentPosition);
  105. return null;
  106. }
  107. ExecutionNode value = null;
  108. if (Enumerator.Current == Token.Increment)
  109. {
  110. value = OperateNodes(variable, CreateConstant(1), Token.Plus);
  111. }
  112. else if (Enumerator.Current == Token.Decrement)
  113. {
  114. value = OperateNodes(variable, CreateConstant(1), Token.Minus);
  115. }
  116. else if (Enumerator.Current != Token.Equal)
  117. {
  118. Token arithmeticToken = Enumerator.Current;
  119. if (GetNextToken() != Token.Equal)
  120. {
  121. error = new ParserError($"Unexpected token, expecting assignment: {Enumerator.Current}", CurrentPosition);
  122. return null;
  123. }
  124. ExecutionNode newValue = Expression(out error);
  125. value = OperateNodes(variable, newValue, arithmeticToken);
  126. }
  127. else
  128. {
  129. var type = isGlobal
  130. ? GlobalVariables[variableName].Type
  131. : LocalVariables[variableName].Type;
  132. value = type == ValueType.String
  133. ? StringExpression(out error)
  134. : Expression(out error);
  135. }
  136. if (error != null)
  137. return null;
  138. node.SubNodes = new[]
  139. {
  140. variable,
  141. new ExecutionNode
  142. {
  143. Type = "value",
  144. SubNodes = new[] { value }
  145. }
  146. };
  147. return node;
  148. }
  149. else if (Lexer.Identifer == "CASE")
  150. {
  151. var node = new ExecutionNode
  152. {
  153. Type = "case",
  154. Symbol = CurrentPosition
  155. };
  156. var value = Expression(out error);
  157. if (error != null)
  158. return null;
  159. if (Enumerator.Current == Token.NewLine
  160. || Enumerator.Current == Token.EOF)
  161. {
  162. node.Metadata["casetype"] = "value";
  163. node.SubNodes = new[] { value };
  164. return node;
  165. }
  166. if (Enumerator.Current == Token.Identifer)
  167. {
  168. if (Lexer.Identifer == "TO")
  169. {
  170. var value2 = Expression(out error);
  171. if (error != null)
  172. return null;
  173. node.Metadata["casetype"] = "to";
  174. node.SubNodes = new[] { value, value2 };
  175. return node;
  176. }
  177. }
  178. error = new ParserError($"Unexpected token: {Enumerator.Current}", CurrentPosition);
  179. return null;
  180. }
  181. else //treat as statement
  182. {
  183. string statementName = Lexer.Identifer;
  184. var node = new ExecutionNode
  185. {
  186. Type = "statement",
  187. Metadata =
  188. {
  189. ["name"] = statementName
  190. },
  191. Symbol = CurrentPosition
  192. };
  193. List<ExecutionNode> parameters = new List<ExecutionNode>();
  194. if (StringStatements.Contains(statementName))
  195. {
  196. var value = StringExpression(out error);
  197. if (error != null)
  198. return null;
  199. if (value != null)
  200. parameters.Add(value);
  201. node.SubNodes = parameters.ToArray();
  202. return node;
  203. }
  204. if (GetNextToken(true) == Token.NewLine
  205. || GetNextToken(true) == Token.EOF)
  206. {
  207. return node;
  208. }
  209. else if (GetNextToken(true) == Token.Colon
  210. || GetNextToken(true) == Token.Equal)
  211. {
  212. error = new ParserError($"Undeclared variable: {statementName}", node.Symbol);
  213. return null;
  214. }
  215. while (Enumerator.Current != Token.NewLine
  216. && Enumerator.Current != Token.EOF)
  217. {
  218. parameters.Add(Expression(out error));
  219. if (error != null)
  220. {
  221. error = new ParserError($"{error.ErrorMessage} (statement [{statementName}])", error.SymbolMarker);
  222. return null;
  223. }
  224. if (Enumerator.Current != Token.Comma
  225. && Enumerator.Current != Token.NewLine
  226. && Enumerator.Current != Token.EOF)
  227. {
  228. error = new ParserError($"Unexpected token: {Enumerator.Current}", CurrentPosition);
  229. return null;
  230. }
  231. }
  232. node.SubNodes = parameters.ToArray();
  233. return node;
  234. }
  235. case Token.Function:
  236. case Token.Sharp:
  237. while (Enumerator.MoveNext()
  238. && Enumerator.Current != Token.NewLine
  239. && Enumerator.Current != Token.EOF)
  240. {
  241. }
  242. return null;
  243. case Token.NewLine:
  244. case Token.EOF:
  245. return null;
  246. default:
  247. error = new ParserError($"Unexpected token: {Enumerator.Current}", CurrentPosition);
  248. return null;
  249. }
  250. }
  251. protected ExecutionNode GetVariable(out ParserError error)
  252. {
  253. string variableName = Lexer.Identifer;
  254. var node = new ExecutionNode
  255. {
  256. Type = "variable",
  257. Metadata =
  258. {
  259. ["name"] = variableName
  260. },
  261. Symbol = CurrentPosition
  262. };
  263. List<ExecutionNode> indices = new List<ExecutionNode>();
  264. error = null;
  265. while (GetNextToken(true) == Token.Colon)
  266. {
  267. GetNextToken();
  268. var token = GetNextToken();
  269. if (token == Token.LParen)
  270. {
  271. indices.Add(Expression(out error));
  272. if (error != null)
  273. return null;
  274. if (Enumerator.Current != Token.RParen)
  275. {
  276. error = new ParserError("Invalid expression - Expected right bracket", CurrentPosition);
  277. return null;
  278. }
  279. }
  280. else if (token == Token.Value)
  281. {
  282. indices.Add(CreateConstant(Lexer.Value));
  283. }
  284. else if (token == Token.Identifer)
  285. {
  286. IList<IList<string>> csvTable = CsvDefinitions
  287. .Where(x => x.Key.IndexOf(variableName, StringComparison.OrdinalIgnoreCase) >= 0)
  288. .OrderBy(x => x.Key.Equals(variableName, StringComparison.OrdinalIgnoreCase) ? 1 : 2)
  289. .FirstOrDefault().Value;
  290. IList<string> alias = csvTable?.FirstOrDefault(x => x.Count > 1 && x[1] == Lexer.Identifer);
  291. if (alias != null)
  292. {
  293. indices.Add(CreateConstant(int.Parse(alias[0])));
  294. continue;
  295. }
  296. if (GlobalVariables.ContainsKey(Lexer.Identifer)
  297. || LocalVariables.ContainsKey(Lexer.Identifer))
  298. {
  299. var subNode = new ExecutionNode
  300. {
  301. Type = "variable",
  302. Metadata =
  303. {
  304. ["name"] = Lexer.Identifer
  305. },
  306. Symbol = CurrentPosition
  307. };
  308. indices.Add(subNode);
  309. continue;
  310. }
  311. if (FunctionDefinitions.Any(x => x.Name == Lexer.Identifer))
  312. {
  313. indices.Add(Expression(out error));
  314. if (error != null)
  315. return null;
  316. continue;
  317. }
  318. error = new ParserError($"Unknown identifier: {Lexer.Identifer}", CurrentPosition);
  319. return null;
  320. }
  321. }
  322. if (indices.Count > 0)
  323. {
  324. ExecutionNode indexNode = new ExecutionNode
  325. {
  326. Type = "index",
  327. SubNodes = indices.ToArray()
  328. };
  329. node.SubNodes = new[] { indexNode };
  330. }
  331. return node;
  332. }
  333. protected ExecutionNode GetFunction(out ParserError error)
  334. {
  335. error = null;
  336. Token token;
  337. Marker symbolMarker = CurrentPosition;
  338. List<ExecutionNode> parameters = new List<ExecutionNode>();
  339. string functionName = Lexer.Identifer;
  340. if (GetNextToken() != Token.LParen)
  341. {
  342. error = new ParserError($"Unexpected token: {Enumerator.Current}", CurrentPosition);
  343. return null;
  344. }
  345. while ((token = GetNextToken(true)) == Token.Identifer
  346. || token == Token.Value
  347. || token.IsUnary())
  348. {
  349. parameters.Add(Expression(out error));
  350. if (error != null)
  351. return null;
  352. if (Enumerator.Current != Token.Comma
  353. && Enumerator.Current != Token.RParen)
  354. {
  355. error = new ParserError($"Unexpected token: {Enumerator.Current}", CurrentPosition);
  356. return null;
  357. }
  358. if (Enumerator.Current == Token.RParen)
  359. break;
  360. }
  361. if (Enumerator.Current != Token.RParen)
  362. {
  363. error = new ParserError($"Unexpected token: {Enumerator.Current}", CurrentPosition);
  364. return null;
  365. }
  366. var functionDefinition = FunctionDefinitions.FirstOrDefault(x => x.Name == functionName
  367. && x.Parameters.Length >= parameters.Count);
  368. if (functionDefinition == null)
  369. {
  370. error = new ParserError($"No matching method with same amount of parameters: {functionName} ({parameters.Count})", CurrentPosition);
  371. return null;
  372. }
  373. return CallMethod(functionName, symbolMarker, parameters.ToArray());
  374. }
  375. private static readonly Dictionary<Token, int> OrderOfOps = new Dictionary<Token, int>
  376. {
  377. { Token.Or, 0 }, { Token.And, 0 }, { Token.Not, 0 },
  378. { Token.Equal, 1 }, { Token.NotEqual, 1 },
  379. { Token.Less, 1 }, { Token.More, 1 }, { Token.LessEqual, 1 }, { Token.MoreEqual, 1 },
  380. { Token.Plus, 2 }, { Token.Minus, 2 },
  381. { Token.Asterisk, 3 }, { Token.Slash, 3 }, { Token.Modulo, 3 },
  382. { Token.Caret, 4 }
  383. };
  384. protected ExecutionNode Expression(out ParserError error, bool useModulo = true)
  385. {
  386. error = null;
  387. var operators = new Stack<Token>();
  388. var operands = new Stack<ExecutionNode>();
  389. Token token;
  390. void ProcessOperation(out ParserError localError)
  391. {
  392. localError = null;
  393. Token op = operators.Pop();
  394. if (op.IsUnary() && operands.Count >= 1)
  395. {
  396. var operand = operands.Pop();
  397. operands.Push(new ExecutionNode
  398. {
  399. Type = "operation",
  400. Metadata =
  401. {
  402. ["type"] = GetOperationName(op),
  403. ["unary"] = "true"
  404. },
  405. SubNodes = new[]
  406. {
  407. operand
  408. }
  409. });
  410. }
  411. else if (operands.Count >= 2)
  412. {
  413. ExecutionNode right = operands.Pop();
  414. ExecutionNode left = operands.Pop();
  415. operands.Push(new ExecutionNode
  416. {
  417. Type = "operation",
  418. Metadata =
  419. {
  420. ["type"] = GetOperationName(op),
  421. ["unary"] = "false"
  422. },
  423. SubNodes = new[]
  424. {
  425. left,
  426. right
  427. }
  428. });
  429. }
  430. else
  431. localError = new ParserError("Invalid expression - not enough operands", CurrentPosition);
  432. }
  433. void AttemptUnaryConversion(out ParserError localError)
  434. {
  435. localError = null;
  436. while (operators.Count > 0
  437. && operators.Peek().IsUnary())
  438. {
  439. ProcessOperation(out localError);
  440. if (localError != null)
  441. return;
  442. }
  443. }
  444. while ((token = GetNextToken()) != Token.NewLine
  445. && token != Token.EOF
  446. && token != Token.Comma
  447. && token != Token.Colon
  448. && token != Token.Format
  449. && token != Token.CloseBracket
  450. && (useModulo || token != Token.Modulo))
  451. {
  452. if (token == Token.Value)
  453. {
  454. operands.Push(CreateConstant(Lexer.Value));
  455. AttemptUnaryConversion(out error);
  456. if (error != null)
  457. return null;
  458. }
  459. else if (token == Token.Identifer)
  460. {
  461. if (GlobalVariables.ContainsKey(Lexer.Identifer)
  462. || LocalVariables.ContainsKey(Lexer.Identifer))
  463. {
  464. operands.Push(GetVariable(out error));
  465. if (error != null)
  466. return null;
  467. }
  468. else if (FunctionDefinitions.Any(x => x.Name == Lexer.Identifer))
  469. {
  470. operands.Push(GetFunction(out error));
  471. if (error != null)
  472. return null;
  473. }
  474. else
  475. {
  476. error = new ParserError($"Unknown identifier: {Lexer.Identifer}", CurrentPosition);
  477. return null;
  478. }
  479. }
  480. else if (token.IsArithmetic())
  481. {
  482. if (token.IsUnary())
  483. {
  484. operators.Push(token);
  485. continue;
  486. }
  487. if (!operands.Any() && !token.IsUnary())
  488. {
  489. error = new ParserError($"Invalid unary operator: {token}", CurrentPosition);
  490. return null;
  491. }
  492. while (operators.Any() && OrderOfOps[token] <= OrderOfOps[operators.Peek()])
  493. {
  494. ProcessOperation(out error);
  495. if (error != null)
  496. return null;
  497. }
  498. operators.Push(token);
  499. }
  500. else if (token == Token.LParen)
  501. {
  502. operands.Push(Expression(out var localError));
  503. if (localError != null)
  504. {
  505. error = localError;
  506. return null;
  507. }
  508. }
  509. else if (token == Token.RParen)
  510. {
  511. break;
  512. }
  513. else
  514. {
  515. error = new ParserError($"Unexpected token: {token}", CurrentPosition);
  516. return null;
  517. }
  518. }
  519. while (operators.Any())
  520. {
  521. ProcessOperation(out error);
  522. if (error != null)
  523. return null;
  524. }
  525. return operands.Pop();
  526. }
  527. protected ExecutionNode StringExpression(out ParserError error)
  528. {
  529. error = null;
  530. ExecutionNode value = null;
  531. Lexer.Type = LexerType.String;
  532. while (Enumerator.MoveNext()
  533. && (Enumerator.Current == Token.Value
  534. || Enumerator.Current == Token.Format
  535. || Enumerator.Current == Token.OpenBracket))
  536. {
  537. if (Enumerator.Current == Token.Value)
  538. {
  539. value = value == null
  540. ? CreateConstant(Lexer.Value)
  541. : OperateNodes(value, CreateConstant(Lexer.Value), Token.Plus);
  542. }
  543. else
  544. {
  545. List<ExecutionNode> formatParams = new List<ExecutionNode>();
  546. Marker symbolMarker = CurrentPosition;
  547. bool isSpecialFormat = Enumerator.Current == Token.OpenBracket;
  548. do
  549. {
  550. Lexer.Type = LexerType.Both;
  551. var tempValue = Expression(out error, isSpecialFormat);
  552. if (error != null)
  553. return null;
  554. formatParams.Add(tempValue);
  555. } while (Enumerator.Current == Token.Comma);
  556. var formattedValue = CallMethod("_FORMAT", symbolMarker, formatParams.ToArray());
  557. value = value == null
  558. ? formattedValue
  559. : OperateNodes(value, formattedValue, Token.Plus);
  560. Lexer.Type = LexerType.String;
  561. }
  562. }
  563. Lexer.Type = LexerType.Both;
  564. return value;
  565. }
  566. private static readonly Dictionary<Token, string> OperationNames = new Dictionary<Token, string>
  567. {
  568. [Token.Plus] = "add",
  569. [Token.Asterisk] = "multiply",
  570. [Token.Minus] = "subtract",
  571. [Token.Slash] = "divide",
  572. };
  573. private static string GetOperationName(Token token)
  574. {
  575. return OperationNames.TryGetValue(token, out string result)
  576. ? result
  577. : token.ToString();
  578. }
  579. private ExecutionNode CreateConstant(Value value)
  580. {
  581. return new ExecutionNode
  582. {
  583. Type = "constant",
  584. Metadata =
  585. {
  586. ["type"] = value.Type.ToString(),
  587. ["value"] = value.ToString()
  588. },
  589. Symbol = CurrentPosition
  590. };
  591. }
  592. private static ExecutionNode OperateNodes(ExecutionNode left, ExecutionNode right, Token token)
  593. {
  594. return new ExecutionNode
  595. {
  596. Type = "operation",
  597. Metadata =
  598. {
  599. ["type"] = GetOperationName(token)
  600. },
  601. SubNodes = new[]
  602. {
  603. left,
  604. right
  605. }
  606. };
  607. }
  608. private static ExecutionNode CallMethod(string methodName, Marker symbolMarker, params ExecutionNode[] parameters)
  609. {
  610. return new ExecutionNode
  611. {
  612. Type = "call",
  613. Metadata =
  614. {
  615. ["target"] = methodName
  616. },
  617. Symbol = symbolMarker,
  618. SubNodes = new[]
  619. {
  620. new ExecutionNode
  621. {
  622. Type = "parameters",
  623. SubNodes = parameters.ToArray()
  624. }
  625. }
  626. };
  627. }
  628. }
  629. }