diff --git a/src/SIL.Machine/Corpora/ParatextProjectSettings.cs b/src/SIL.Machine/Corpora/ParatextProjectSettings.cs index 286c6c27..ecbcfa0f 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectSettings.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectSettings.cs @@ -104,13 +104,13 @@ public string GetBookFileName(string bookId) return FileNamePrefix + bookPart + FileNameSuffix; } - public IEnumerable GetAllScriptureBookFileNames() + public IEnumerable GetAllScriptureBookIds() { BookSet scriptureBooks = Canon.ScriptureBooks; scriptureBooks.SelectAll(); foreach (string bookId in scriptureBooks.SelectedBookIds) { - yield return GetBookFileName(bookId); + yield return bookId; } } diff --git a/src/SIL.Machine/Corpora/ParatextProjectVersificationErrorDetectorBase.cs b/src/SIL.Machine/Corpora/ParatextProjectVersificationErrorDetectorBase.cs index 4faa7861..98660613 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectVersificationErrorDetectorBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectVersificationErrorDetectorBase.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.IO; using System.Text; +using SIL.Scripture; namespace SIL.Machine.Corpora { @@ -20,15 +21,21 @@ ParatextProjectSettings settings } public IReadOnlyList GetUsfmVersificationErrors( - UsfmVersificationErrorDetector handler = null + UsfmVersificationErrorDetector handler = null, + HashSet books = null ) { - handler = handler ?? new UsfmVersificationErrorDetector(_settings.Versification); - foreach (string fileName in _settings.GetAllScriptureBookFileNames()) + handler = handler ?? new UsfmVersificationErrorDetector(_settings); + foreach (string bookId in _settings.GetAllScriptureBookIds()) { + string fileName = _settings.GetBookFileName(bookId); + if (!_paratextProjectFileHandler.Exists(fileName)) continue; + if (books != null && !books.Contains(Canon.BookIdToNumber(bookId))) + continue; + string usfm; using (var reader = new StreamReader(_paratextProjectFileHandler.Open(fileName))) { diff --git a/src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs b/src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs index c908a94e..e7c1a0a2 100644 --- a/src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs +++ b/src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs @@ -30,6 +30,7 @@ public UsfmVersificationError( int expectedVerse, int actualChapter, int actualVerse, + string projectName, VerseRef? verseRef = null ) { @@ -39,8 +40,11 @@ public UsfmVersificationError( _actualChapter = actualChapter; _actualVerse = actualVerse; _verseRef = verseRef; + ProjectName = projectName; } + public string ProjectName { get; private set; } + public UsfmVersificationErrorType Type { get; private set; } // Returns true if there is an error @@ -100,14 +104,20 @@ public string ExpectedVerseRef { get { + if (Type == UsfmVersificationErrorType.ExtraVerse) + return ""; + // We do not want to throw an exception here, and the VerseRef constructor can throw // an exception with certain invalid verse data; use TryParse instead. - if (!VerseRef.TryParse($"{_bookNum} {_expectedChapter}:{_expectedVerse}", out VerseRef defaultVerseRef)) + if ( + !VerseRef.TryParse( + $"{Canon.BookNumberToId(_bookNum)} {_expectedChapter}:{_expectedVerse}", + out VerseRef defaultVerseRef + ) + ) { return DefaultVerse(_expectedChapter, _expectedVerse); } - if (Type == UsfmVersificationErrorType.ExtraVerse) - return ""; if ( Type == UsfmVersificationErrorType.MissingVerseSegment && VerseRef.TryParse( @@ -154,7 +164,12 @@ public string ActualVerseRef } else { - if (VerseRef.TryParse($"{_bookNum} {_actualChapter}:{_actualVerse}", out VerseRef actualVerseRef)) + if ( + VerseRef.TryParse( + $"{Canon.BookNumberToId(_bookNum)} {_actualChapter}:{_actualVerse}", + out VerseRef actualVerseRef + ) + ) { return actualVerseRef.ToString(); } @@ -172,15 +187,17 @@ private string DefaultVerse(int chapter, int verse) public class UsfmVersificationErrorDetector : UsfmParserHandlerBase { + private readonly string _projectName; private readonly ScrVers _versification; private int _currentBook; private int _currentChapter; private VerseRef _currentVerse; private readonly List _errors; - public UsfmVersificationErrorDetector(ScrVers versification) + public UsfmVersificationErrorDetector(ParatextProjectSettings settings) { - _versification = versification; + _projectName = settings.Name; + _versification = settings.Versification; _currentBook = 0; _currentChapter = 0; _currentVerse = new VerseRef(); @@ -198,7 +215,8 @@ public override void EndUsfm(UsfmParserState state) _versification.GetLastChapter(_currentBook), _versification.GetLastVerse(_currentBook, _versification.GetLastChapter(_currentBook)), _currentChapter, - _currentVerse.AllVerses().Last().VerseNum + _currentVerse.AllVerses().Last().VerseNum, + _projectName ); if (versificationError.CheckError()) _errors.Add(versificationError); @@ -227,7 +245,8 @@ string pubNumber _currentChapter, _versification.GetLastVerse(_currentBook, _currentChapter), _currentChapter, - _currentVerse.AllVerses().Last().VerseNum + _currentVerse.AllVerses().Last().VerseNum, + _projectName ); if (versificationError.CheckError()) _errors.Add(versificationError); @@ -254,6 +273,7 @@ string pubNumber _currentVerse.AllVerses().Last().VerseNum, _currentChapter, _currentVerse.AllVerses().Last().VerseNum, + _projectName, _currentVerse ); if (versificationError.CheckError()) diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs index 822af209..d4582db8 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs @@ -6,7 +6,7 @@ namespace SIL.Machine.Corpora; [TestFixture] -public class ParatextProjectQuoteConventionDetectorTests +public class ParatextProjectVersificationErrorDetectorTests { [Test] public void GetUsfmVersificationErrors_NoErrors() @@ -75,6 +75,8 @@ public void GetUsfmVersificationErrors_MissingVerse() IReadOnlyList errors = env.GetUsfmVersificationErrors(); Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse)); + Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:15")); + Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:14")); } [Test] @@ -93,6 +95,8 @@ public void GetUsfmVersificationErrors_MissingChapter() IReadOnlyList errors = env.GetUsfmVersificationErrors(); Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingChapter)); + Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:15")); + Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 0:0")); } [Test] @@ -128,6 +132,8 @@ public void GetUsfmVersificationErrors_ExtraVerse() IReadOnlyList errors = env.GetUsfmVersificationErrors(); Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse)); + Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("")); + Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:16")); } [Test] @@ -161,6 +167,8 @@ public void GetUsfmVersificationErrors_InvalidVerse() IReadOnlyList errors = env.GetUsfmVersificationErrors(); Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.InvalidVerseRange)); + Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:12-13")); + Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:13-12")); } [Test] @@ -196,6 +204,8 @@ public void GetUsfmVersificationErrors_ExtraVerseSegment() IReadOnlyList errors = env.GetUsfmVersificationErrors(); Assert.That(errors, Has.Count.EqualTo(2), JsonSerializer.Serialize(errors)); Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerseSegment)); + Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:14")); + Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:14a")); } [Test] @@ -233,6 +243,8 @@ public void GetUsfmVersificationErrors_MissingVerseSegment() IReadOnlyList errors = env.GetUsfmVersificationErrors(); Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerseSegment)); + Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:13a")); + Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:13")); } [Test] @@ -289,6 +301,8 @@ public void GetUsfmVersificationErrors_ExtraVerse_ExcludedInCustomVrs() IReadOnlyList errors = env.GetUsfmVersificationErrors(); Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse)); + Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("")); + Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:13")); } [Test] @@ -341,6 +355,8 @@ public void GetUsfmVersificationErrors_MultipleBooks() IReadOnlyList errors = env.GetUsfmVersificationErrors(); Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse)); + Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("2JN 1:13")); + Assert.That(errors[0].ActualVerseRef, Is.EqualTo("2JN 1:12")); } [Test] @@ -375,6 +391,10 @@ public void GetUsfmVersificationErrors_MultipleChapters() Assert.That(errors, Has.Count.EqualTo(2), JsonSerializer.Serialize(errors)); Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse)); Assert.That(errors[1].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse)); + Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("2JN 1:13")); + Assert.That(errors[0].ActualVerseRef, Is.EqualTo("2JN 1:12")); + Assert.That(errors[1].ExpectedVerseRef, Is.EqualTo("")); + Assert.That(errors[1].ActualVerseRef, Is.EqualTo("2JN 2:1")); } private class TestEnvironment(ParatextProjectSettings? settings = null, Dictionary? files = null)