From 06f8c2ec9e5362d70c28269cbf87056c90cc94be Mon Sep 17 00:00:00 2001 From: Brian Bicknell Date: Mon, 3 Nov 2025 23:25:25 -0500 Subject: [PATCH] Addded language identifier tests. --- .../Common/Languages/LanguageIdentifier.cs | 3 +++ JSMR.Tests/Unit/LanguageIdentifierTests.cs | 21 +++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 JSMR.Tests/Unit/LanguageIdentifierTests.cs diff --git a/JSMR.Infrastructure/Common/Languages/LanguageIdentifier.cs b/JSMR.Infrastructure/Common/Languages/LanguageIdentifier.cs index ba8400b..ad1a611 100644 --- a/JSMR.Infrastructure/Common/Languages/LanguageIdentifier.cs +++ b/JSMR.Infrastructure/Common/Languages/LanguageIdentifier.cs @@ -26,6 +26,9 @@ public class LanguageIdentifier : ILanguageIdentifier public Language GetLanguage(string text) { + if (string.IsNullOrWhiteSpace(text)) + return Language.Unknown; + var rankedLanguages = _identifier.Identify(text).Where(x => _languages.Contains(x.Item1.Iso639_3)); var identifiedLanguage = rankedLanguages.OrderBy(x => x.Item2).FirstOrDefault(); diff --git a/JSMR.Tests/Unit/LanguageIdentifierTests.cs b/JSMR.Tests/Unit/LanguageIdentifierTests.cs new file mode 100644 index 0000000..f527a24 --- /dev/null +++ b/JSMR.Tests/Unit/LanguageIdentifierTests.cs @@ -0,0 +1,21 @@ +using JSMR.Domain.Enums; +using JSMR.Infrastructure.Common.Languages; +using Shouldly; + +namespace JSMR.Tests.Unit; + +public class LanguageIdentifierTests +{ + private readonly LanguageIdentifier _languageIdentifier = new(); + + [Theory] + [InlineData("これは日本語のサンプル文章です。今日はいい天気ですね。", Language.Japanese)] + [InlineData("This is an English sample sentence used for language detection.", Language.English)] + [InlineData("這是一段中文(繁體)的範例文字,用於語言辨識。", Language.ChineseTraditional)] + [InlineData("이것은 한국어 예문으로, 언어 식별을 위한 문장입니다。", Language.Korean)] + [InlineData("", Language.Unknown)] + public void Identify_Languages(string text, Language expectedLanguage) + { + _languageIdentifier.GetLanguage(text).ShouldBe(expectedLanguage); + } +} \ No newline at end of file