diff --git a/JSMR.Infrastructure/Common/Languages/LanguageIdentifier.cs b/JSMR.Infrastructure/Common/Languages/LanguageIdentifier.cs index ba8400b..ad1a611 100644 --- a/JSMR.Infrastructure/Common/Languages/LanguageIdentifier.cs +++ b/JSMR.Infrastructure/Common/Languages/LanguageIdentifier.cs @@ -26,6 +26,9 @@ public class LanguageIdentifier : ILanguageIdentifier public Language GetLanguage(string text) { + if (string.IsNullOrWhiteSpace(text)) + return Language.Unknown; + var rankedLanguages = _identifier.Identify(text).Where(x => _languages.Contains(x.Item1.Iso639_3)); var identifiedLanguage = rankedLanguages.OrderBy(x => x.Item2).FirstOrDefault(); diff --git a/JSMR.Tests/Unit/LanguageIdentifierTests.cs b/JSMR.Tests/Unit/LanguageIdentifierTests.cs new file mode 100644 index 0000000..f527a24 --- /dev/null +++ b/JSMR.Tests/Unit/LanguageIdentifierTests.cs @@ -0,0 +1,21 @@ +using JSMR.Domain.Enums; +using JSMR.Infrastructure.Common.Languages; +using Shouldly; + +namespace JSMR.Tests.Unit; + +public class LanguageIdentifierTests +{ + private readonly LanguageIdentifier _languageIdentifier = new(); + + [Theory] + [InlineData("これは日本語のサンプル文章です。今日はいい天気ですね。", Language.Japanese)] + [InlineData("This is an English sample sentence used for language detection.", Language.English)] + [InlineData("這是一段中文(繁體)的範例文字,用於語言辨識。", Language.ChineseTraditional)] + [InlineData("이것은 한국어 예문으로, 언어 식별을 위한 문장입니다。", Language.Korean)] + [InlineData("", Language.Unknown)] + public void Identify_Languages(string text, Language expectedLanguage) + { + _languageIdentifier.GetLanguage(text).ShouldBe(expectedLanguage); + } +} \ No newline at end of file