LanguageHelper.cs 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Linq;
  4. using System.Text;
  5. namespace XUnity.AutoTranslator.Plugin.Core.Utilities
  6. {
  7. internal static class LanguageHelper
  8. {
  9. private static readonly Dictionary<string, Func<string, bool>> LanguageSymbolChecks = new Dictionary<string, Func<string, bool>>( StringComparer.OrdinalIgnoreCase )
  10. {
  11. { "ja", ContainsJapaneseSymbols },
  12. { "ru", ContainsRussianSymbols },
  13. { "zh-CN", ContainsChineseSymbols },
  14. { "zh-TW", ContainsChineseSymbols },
  15. { "ko", ContainsKoreanSymbols },
  16. { "en", ContainsStandardLatinSymbols },
  17. };
  18. private static readonly HashSet<string> WhitespaceLanguages = new HashSet<string>
  19. {
  20. "ru", "ko", "en"
  21. };
  22. public static bool IsFromLanguageSupported( string code )
  23. {
  24. return LanguageSymbolChecks.ContainsKey( code );
  25. }
  26. public static bool RequiresWhitespaceUponLineMerging( string code )
  27. {
  28. return WhitespaceLanguages.Contains( code );
  29. }
  30. public static Func<string, bool> GetSymbolCheck( string language )
  31. {
  32. if( LanguageSymbolChecks.TryGetValue( language, out Func<string, bool> check ) )
  33. {
  34. return check;
  35. }
  36. return text => true;
  37. }
  38. public static bool ContainsJapaneseSymbols( string text )
  39. {
  40. // Unicode Kanji Table:
  41. // http://www.rikai.com/library/kanjitables/kanji_codes.unicode.shtml
  42. foreach( var c in text )
  43. {
  44. if( ( c >= '\u3021' && c <= '\u3029' ) // kana-like symbols
  45. || ( c >= '\u3031' && c <= '\u3035' ) // kana-like symbols
  46. || ( c >= '\u3041' && c <= '\u3096' ) // hiragana
  47. || ( c >= '\u30a1' && c <= '\u30fa' ) // katakana
  48. || ( c >= '\uff66' && c <= '\uff9d' ) // half-width katakana
  49. || ( c >= '\u4e00' && c <= '\u9faf' ) // CJK unifed ideographs - Common and uncommon kanji
  50. || ( c >= '\u3400' && c <= '\u4dbf' ) // CJK unified ideographs Extension A - Rare kanji ( 3400 - 4dbf)
  51. || ( c >= '\uf900' && c <= '\ufaff' ) ) // CJK Compatibility Ideographs
  52. {
  53. return true;
  54. }
  55. }
  56. return false;
  57. }
  58. public static bool ContainsKoreanSymbols( string text )
  59. {
  60. foreach( var c in text )
  61. {
  62. if( ( c >= '\uac00' && c <= '\ud7af' ) ) // Hangul Syllables
  63. {
  64. return true;
  65. }
  66. }
  67. return false;
  68. }
  69. public static bool ContainsChineseSymbols( string text )
  70. {
  71. foreach( var c in text )
  72. {
  73. if( ( c >= '\u4e00' && c <= '\u9faf' )
  74. || ( c >= '\u3400' && c <= '\u4dbf' )
  75. || ( c >= '\uf900' && c <= '\ufaff' ) )
  76. {
  77. return true;
  78. }
  79. }
  80. return false;
  81. }
  82. public static bool ContainsRussianSymbols( string text )
  83. {
  84. foreach( var c in text )
  85. {
  86. if( ( c >= '\u0400' && c <= '\u04ff' )
  87. || ( c >= '\u0500' && c <= '\u052f' )
  88. || ( c >= '\u2de0' && c <= '\u2dff' )
  89. || ( c >= '\ua640' && c <= '\ua69f' )
  90. || ( c >= '\u1c80' && c <= '\u1c88' )
  91. || ( c >= '\ufe2e' && c <= '\ufe2f' )
  92. || ( c == '\u1d2b' || c == '\u1d78' ) )
  93. {
  94. return true;
  95. }
  96. }
  97. return false;
  98. }
  99. public static bool ContainsStandardLatinSymbols( string text )
  100. {
  101. foreach( var c in text )
  102. {
  103. if( ( c >= '\u0041' && c <= '\u005a' )
  104. || ( c >= '\u0061' && c <= '\u007a' ) )
  105. {
  106. return true;
  107. }
  108. }
  109. return false;
  110. }
  111. }
  112. }