UnityTextParserBase.cs 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Globalization;
  4. using System.Linq;
  5. using System.Text;
  6. namespace XUnity.AutoTranslator.Plugin.Core.Parsing
  7. {
  8. public abstract class UnityTextParserBase
  9. {
  10. private static readonly HashSet<char> ValidTagNameChars = new HashSet<char>
  11. {
  12. 'a', 'b', 'c', 'd','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','x','y','z',
  13. 'A', 'B', 'C', 'D','E','F','G','H','I','j','K','L','M','N','O','P','Q','R','S','T','U','V','X','Y','Z'
  14. };
  15. private HashSet<string> _ignored = new HashSet<string>();
  16. public UnityTextParserBase()
  17. {
  18. }
  19. protected void AddIgnoredTag( string name )
  20. {
  21. _ignored.Add( name );
  22. }
  23. public ParserResult Parse( string input )
  24. {
  25. StringBuilder textSinceLastChange = new StringBuilder();
  26. StringBuilder template = new StringBuilder();
  27. Dictionary<string, string> args = new Dictionary<string, string>();
  28. bool ignoringCurrentTag = false;
  29. char arg = 'A';
  30. Stack<string> tags = new Stack<string>();
  31. var state = ParsingState.Text;
  32. for( int i = 0 ; i < input.Length ; i++ )
  33. {
  34. var c = input[ i ];
  35. if( c != '<' && c != '>' )
  36. {
  37. textSinceLastChange.Append( c );
  38. }
  39. var previousState = state;
  40. switch( previousState )
  41. {
  42. case ParsingState.Text:
  43. state = ParseText( input, ref i );
  44. break;
  45. case ParsingState.NamingStartTag:
  46. state = ParseNamingStartTag( input, ref i );
  47. break;
  48. case ParsingState.NamingEndTag:
  49. state = ParseNamingEndTag( input, ref i );
  50. break;
  51. case ParsingState.FinishingStartTag:
  52. state = ParseFinishingStartTag( input, ref i );
  53. break;
  54. case ParsingState.FinishingEndTag:
  55. state = ParseFinishingEndTag( input, ref i );
  56. break;
  57. default:
  58. break;
  59. }
  60. bool stateChanged = state != previousState;
  61. if( stateChanged )
  62. {
  63. // whenever the state changes, we want to add text, potentially
  64. string text;
  65. if( c == '<' || c == '>' )
  66. {
  67. text = textSinceLastChange.ToString();
  68. textSinceLastChange = new StringBuilder();
  69. }
  70. else
  71. {
  72. text = TakeAllButLast( textSinceLastChange );
  73. }
  74. switch( previousState )
  75. {
  76. case ParsingState.Text:
  77. {
  78. if( !string.IsNullOrEmpty( text ) )
  79. {
  80. var key = "{{" + arg + "}}";
  81. arg++;
  82. args.Add( key, text );
  83. template.Append( key );
  84. }
  85. }
  86. break;
  87. case ParsingState.NamingStartTag:
  88. {
  89. ignoringCurrentTag = _ignored.Contains( text );
  90. tags.Push( text );
  91. if( !ignoringCurrentTag )
  92. {
  93. template.Append( "<" + text );
  94. if( state != ParsingState.FinishingStartTag )
  95. {
  96. template.Append( ">" );
  97. }
  98. }
  99. }
  100. break;
  101. case ParsingState.FinishingStartTag:
  102. {
  103. if( !ignoringCurrentTag )
  104. {
  105. template.Append( text + ">" );
  106. }
  107. }
  108. break;
  109. case ParsingState.NamingEndTag:
  110. {
  111. if( !ignoringCurrentTag )
  112. {
  113. template.Append( "<" + text );
  114. }
  115. if( state != ParsingState.FinishingEndTag )
  116. {
  117. if( !ignoringCurrentTag )
  118. {
  119. template.Append( ">" );
  120. }
  121. var tag = tags.Pop();
  122. ignoringCurrentTag = tags.Count > 0 && _ignored.Contains( tags.Peek() );
  123. }
  124. }
  125. break;
  126. case ParsingState.FinishingEndTag:
  127. {
  128. if( !ignoringCurrentTag )
  129. {
  130. template.Append( text + ">" );
  131. }
  132. var tag = tags.Pop();
  133. ignoringCurrentTag = tags.Count > 0 && _ignored.Contains( tags.Peek() );
  134. }
  135. break;
  136. }
  137. }
  138. }
  139. if( state == ParsingState.Text )
  140. {
  141. var text = textSinceLastChange.ToString();
  142. if( !string.IsNullOrEmpty( text ) )
  143. {
  144. var key = "{{" + arg + "}}";
  145. arg++;
  146. args.Add( key, text );
  147. template.Append( key );
  148. }
  149. }
  150. // finally, lets merge some of the arguments together
  151. var templateString = template.ToString();
  152. int idx = -1;
  153. while( ( idx = templateString.IndexOf( "}}{{" ) ) != -1 )
  154. {
  155. var arg1 = templateString[ idx - 1 ];
  156. var arg2 = templateString[ idx + 4 ];
  157. var key1 = "{{" + arg1 + "}}";
  158. var key2 = "{{" + arg2 + "}}";
  159. var text1 = args[ key1 ];
  160. var text2 = args[ key2 ];
  161. var fullText = text1 + text2;
  162. var fullKey = key1 + key2;
  163. var newKey = "{{" + ( ++arg ) + "}}";
  164. args.Remove( key1 );
  165. args.Remove( key2 );
  166. args.Add( newKey, fullText );
  167. templateString = templateString.Replace( fullKey, newKey );
  168. }
  169. return new ParserResult( input, templateString, args );
  170. }
  171. private string TakeAllButLast( StringBuilder builder )
  172. {
  173. if( builder.Length > 0 )
  174. {
  175. var str = builder.ToString( 0, builder.Length - 1 );
  176. builder.Remove( 0, builder.Length - 1 );
  177. return str;
  178. }
  179. return string.Empty;
  180. }
  181. private ParsingState ParseText( string s, ref int i )
  182. {
  183. if( s[ i ] == '<' )
  184. {
  185. if( i + 1 < s.Length && s[ i + 1 ] == '/' )
  186. {
  187. return ParsingState.NamingEndTag;
  188. }
  189. else
  190. {
  191. return ParsingState.NamingStartTag;
  192. }
  193. }
  194. else
  195. {
  196. return ParsingState.Text;
  197. }
  198. }
  199. private ParsingState ParseNamingStartTag( string s, ref int i )
  200. {
  201. if( ValidTagNameChars.Contains( s[ i ] ) )
  202. {
  203. return ParsingState.NamingStartTag;
  204. }
  205. else if( s[ i ] == '>' )
  206. {
  207. // we need to determine if we are inside or outside a tag after this!
  208. return ParsingState.Text;
  209. }
  210. else
  211. {
  212. return ParsingState.FinishingStartTag;
  213. }
  214. }
  215. private ParsingState ParseNamingEndTag( string s, ref int i )
  216. {
  217. if( ValidTagNameChars.Contains( s[ i ] ) )
  218. {
  219. return ParsingState.NamingEndTag;
  220. }
  221. else if( s[ i ] == '>' )
  222. {
  223. // we need to determine if we are inside or outside a tag after this!
  224. return ParsingState.Text;
  225. }
  226. else
  227. {
  228. return ParsingState.FinishingEndTag;
  229. }
  230. }
  231. private ParsingState ParseFinishingStartTag( string s, ref int i )
  232. {
  233. if( s[ i ] == '>' )
  234. {
  235. return ParsingState.Text;
  236. }
  237. else
  238. {
  239. return ParsingState.FinishingStartTag;
  240. }
  241. }
  242. private ParsingState ParseFinishingEndTag( string s, ref int i )
  243. {
  244. if( s[ i ] == '>' )
  245. {
  246. return ParsingState.Text;
  247. }
  248. else
  249. {
  250. return ParsingState.FinishingEndTag;
  251. }
  252. }
  253. private enum ParsingState
  254. {
  255. Text,
  256. NamingStartTag,
  257. NamingEndTag,
  258. FinishingStartTag,
  259. FinishingEndTag
  260. }
  261. }
  262. }