diff --git a/SubRenamer.Tests/MatcherTests/FilenameNfkcTests.cs b/SubRenamer.Tests/MatcherTests/FilenameNfkcTests.cs new file mode 100644 index 0000000..d7bc46e --- /dev/null +++ b/SubRenamer.Tests/MatcherTests/FilenameNfkcTests.cs @@ -0,0 +1,33 @@ +using SubRenamer.Core; +using SubRenamer.Helper; + +namespace SubRenamer.Tests.MatcherTests; + +/// +/// Test for filename normalization +/// +/// NFKC is means Unicode Normalization Form KC (Compatibility Composition) +/// https://unicode.org/reports/tr15/ +/// +[TestFixture] +public class FilenameNfkcTests +{ + [Test] + public void Basic() + { + var normalizer = new MatcherFilenameNormalizer(); + List originalItems = [ + new("", "\u30CF\u309A", "\u30D5\u3099"), + ]; + + var normalizedItems = normalizer.Normalize(originalItems); + + Assert.That(normalizedItems, Is.EqualTo([ + new MatchItem("", "\u30D1", "\u30D6"), + ]), "Normalize"); + + Assert.That(normalizer.Denormalize(normalizedItems), Is.EqualTo(originalItems), "Denormalize"); + + normalizer.Clear(); + } +} diff --git a/SubRenamer.Tests/MatcherTests/TopLevelTests.cs b/SubRenamer.Tests/MatcherTests/TopLevelTests.cs index 8a8f8ba..54b9032 100644 --- a/SubRenamer.Tests/MatcherTests/TopLevelTests.cs +++ b/SubRenamer.Tests/MatcherTests/TopLevelTests.cs @@ -1,5 +1,6 @@ using System.Text.Json; using SubRenamer.Core; +using SubRenamer.Helper; namespace SubRenamer.Tests.MatcherTests; @@ -26,7 +27,8 @@ private static IEnumerable TestData [Test, TestCaseSource(nameof(TestData))] public void TestCasesFromJson(string name, List input, List expected) { - var actual = Matcher.Execute(input); + var normalizer = new MatcherFilenameNormalizer(); + var actual = Matcher.Execute(normalizer.Normalize(input)); var jsonOpts = new JsonSerializerOptions { WriteIndented = true, Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping }; TestContext.Progress.WriteLine("{1}\n\n \ud83c\udf1f Matcher Test Case: {0}\n\n{1}", name, new string('=', 50)); @@ -37,6 +39,7 @@ public void TestCasesFromJson(string name, List input, List +/// Handles normalization and denormalization of filenames in MatchItems to ensure consistent Unicode handling. +/// Uses NormalizationForm.FormKC for compatibility normalization with composition. +/// +public class MatcherFilenameNormalizer +{ + private readonly Dictionary _normalizedToRawVideos = new(); + private readonly Dictionary _normalizedToRawSubtitles = new(); + + /// + /// Normalizes the filenames in a list of MatchItems using NormalizationForm.FormKC. + /// + /// The list of MatchItems to normalize. + /// A new list of MatchItems with normalized filenames. + public List Normalize(IReadOnlyList matchItems) + { + if (matchItems.Count == 0) return []; + + var result = new List(matchItems.Count); + foreach (var item in matchItems) + { + var normalizedVideo = item.Video.Normalize(NormalizationForm.FormKC); + var normalizedSubtitle = item.Subtitle.Normalize(NormalizationForm.FormKC); + + if (!string.IsNullOrEmpty(item.Video)) + _normalizedToRawVideos[normalizedVideo] = item.Video; + if (!string.IsNullOrEmpty(item.Subtitle)) + _normalizedToRawSubtitles[normalizedSubtitle] = item.Subtitle; + + result.Add(new Core.MatchItem(item.Key, normalizedVideo, normalizedSubtitle)); + } + + return result; + } + + /// + /// Denormalizes the filenames in a list of MatchItems back to their original form. + /// + /// The list of MatchItems to denormalize. + /// A new list of MatchItems with original filenames. + /// Thrown when a normalized filename cannot be mapped back to its original form. + public List Denormalize(IReadOnlyList matchItems) + { + if (matchItems.Count == 0) return []; + + var result = new List(matchItems.Count); + foreach (var item in matchItems) + { + var originalVideo = !string.IsNullOrEmpty(item.Video) ? _normalizedToRawVideos[item.Video] : string.Empty; + var originalSubtitle = !string.IsNullOrEmpty(item.Subtitle) + ? _normalizedToRawSubtitles[item.Subtitle] + : string.Empty; + result.Add(new Core.MatchItem(item.Key, originalVideo, originalSubtitle)); + } + + return result; + } + + /// + /// Clears the internal mapping dictionaries. + /// + public void Clear() + { + _normalizedToRawVideos.Clear(); + _normalizedToRawSubtitles.Clear(); + } +} \ No newline at end of file diff --git a/SubRenamer/ViewModels/MainViewModel.cs b/SubRenamer/ViewModels/MainViewModel.cs index 903a8c3..847e1dd 100644 --- a/SubRenamer/ViewModels/MainViewModel.cs +++ b/SubRenamer/ViewModels/MainViewModel.cs @@ -174,8 +174,10 @@ partial void OnSubSyncEnabledChanged(bool value) [RelayCommand] private void PerformMatch() { + var filenameNormalizer = new MatcherFilenameNormalizer(); ShowRenameTasks = false; var inputItems = MatcherDataConverter.ConvertMatchItems(MatchList); + inputItems = filenameNormalizer.Normalize(inputItems); var m = Config.Get().MatchMode; var resultRaw = Matcher.Execute(inputItems, new MatcherOptions() { @@ -183,6 +185,8 @@ private void PerformMatch() VideoRegex = (m != MatchMode.Diff) ? (m == MatchMode.Manual ? Config.Get().ManualVideoRegex : Config.Get().VideoRegex) : null, SubtitleRegex = (m != MatchMode.Diff) ? (m == MatchMode.Manual ? Config.Get().ManualSubtitle : Config.Get().SubtitleRegex) : null, }); + resultRaw = filenameNormalizer.Denormalize(resultRaw); + filenameNormalizer.Clear(); var result = MatcherDataConverter.ConvertMatchItems(resultRaw); result.ForEach(UpdateMatchItemStatus); MatchList = new ObservableCollection(result);