Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/SIL.Machine/Corpora/ParatextProjectSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -104,13 +104,13 @@ public string GetBookFileName(string bookId)
return FileNamePrefix + bookPart + FileNameSuffix;
}

public IEnumerable<string> GetAllScriptureBookFileNames()
public IEnumerable<string> GetAllScriptureBookIds()
{
BookSet scriptureBooks = Canon.ScriptureBooks;
scriptureBooks.SelectAll();
foreach (string bookId in scriptureBooks.SelectedBookIds)
{
yield return GetBookFileName(bookId);
yield return bookId;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.IO;
using System.Text;
using SIL.Scripture;

namespace SIL.Machine.Corpora
{
Expand All @@ -20,15 +21,21 @@ ParatextProjectSettings settings
}

public IReadOnlyList<UsfmVersificationError> GetUsfmVersificationErrors(
UsfmVersificationErrorDetector handler = null
UsfmVersificationErrorDetector handler = null,
HashSet<int> books = null
)
{
handler = handler ?? new UsfmVersificationErrorDetector(_settings.Versification);
foreach (string fileName in _settings.GetAllScriptureBookFileNames())
handler = handler ?? new UsfmVersificationErrorDetector(_settings);
foreach (string bookId in _settings.GetAllScriptureBookIds())
{
string fileName = _settings.GetBookFileName(bookId);

if (!_paratextProjectFileHandler.Exists(fileName))
continue;

if (books != null && !books.Contains(Canon.BookIdToNumber(bookId)))
continue;

string usfm;
using (var reader = new StreamReader(_paratextProjectFileHandler.Open(fileName)))
{
Expand Down
36 changes: 28 additions & 8 deletions src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ public UsfmVersificationError(
int expectedVerse,
int actualChapter,
int actualVerse,
string projectName,
VerseRef? verseRef = null
)
{
Expand All @@ -39,8 +40,11 @@ public UsfmVersificationError(
_actualChapter = actualChapter;
_actualVerse = actualVerse;
_verseRef = verseRef;
ProjectName = projectName;
}

public string ProjectName { get; private set; }

public UsfmVersificationErrorType Type { get; private set; }

// Returns true if there is an error
Expand Down Expand Up @@ -100,14 +104,20 @@ public string ExpectedVerseRef
{
get
{
if (Type == UsfmVersificationErrorType.ExtraVerse)
return "";

// We do not want to throw an exception here, and the VerseRef constructor can throw
// an exception with certain invalid verse data; use TryParse instead.
if (!VerseRef.TryParse($"{_bookNum} {_expectedChapter}:{_expectedVerse}", out VerseRef defaultVerseRef))
if (
!VerseRef.TryParse(
$"{Canon.BookNumberToId(_bookNum)} {_expectedChapter}:{_expectedVerse}",
out VerseRef defaultVerseRef
)
)
{
return DefaultVerse(_expectedChapter, _expectedVerse);
}
if (Type == UsfmVersificationErrorType.ExtraVerse)
return "";
if (
Type == UsfmVersificationErrorType.MissingVerseSegment
&& VerseRef.TryParse(
Expand Down Expand Up @@ -154,7 +164,12 @@ public string ActualVerseRef
}
else
{
if (VerseRef.TryParse($"{_bookNum} {_actualChapter}:{_actualVerse}", out VerseRef actualVerseRef))
if (
VerseRef.TryParse(
$"{Canon.BookNumberToId(_bookNum)} {_actualChapter}:{_actualVerse}",
out VerseRef actualVerseRef
)
)
{
return actualVerseRef.ToString();
}
Expand All @@ -172,15 +187,17 @@ private string DefaultVerse(int chapter, int verse)

public class UsfmVersificationErrorDetector : UsfmParserHandlerBase
{
private readonly string _projectName;
private readonly ScrVers _versification;
private int _currentBook;
private int _currentChapter;
private VerseRef _currentVerse;
private readonly List<UsfmVersificationError> _errors;

public UsfmVersificationErrorDetector(ScrVers versification)
public UsfmVersificationErrorDetector(ParatextProjectSettings settings)
{
_versification = versification;
_projectName = settings.Name;
_versification = settings.Versification;
_currentBook = 0;
_currentChapter = 0;
_currentVerse = new VerseRef();
Expand All @@ -198,7 +215,8 @@ public override void EndUsfm(UsfmParserState state)
_versification.GetLastChapter(_currentBook),
_versification.GetLastVerse(_currentBook, _versification.GetLastChapter(_currentBook)),
_currentChapter,
_currentVerse.AllVerses().Last().VerseNum
_currentVerse.AllVerses().Last().VerseNum,
_projectName
);
if (versificationError.CheckError())
_errors.Add(versificationError);
Expand Down Expand Up @@ -227,7 +245,8 @@ string pubNumber
_currentChapter,
_versification.GetLastVerse(_currentBook, _currentChapter),
_currentChapter,
_currentVerse.AllVerses().Last().VerseNum
_currentVerse.AllVerses().Last().VerseNum,
_projectName
);
if (versificationError.CheckError())
_errors.Add(versificationError);
Expand All @@ -254,6 +273,7 @@ string pubNumber
_currentVerse.AllVerses().Last().VerseNum,
_currentChapter,
_currentVerse.AllVerses().Last().VerseNum,
_projectName,
_currentVerse
);
if (versificationError.CheckError())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
namespace SIL.Machine.Corpora;

[TestFixture]
public class ParatextProjectQuoteConventionDetectorTests
public class ParatextProjectVersificationErrorDetectorTests
{
[Test]
public void GetUsfmVersificationErrors_NoErrors()
Expand Down Expand Up @@ -75,6 +75,8 @@ public void GetUsfmVersificationErrors_MissingVerse()
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors));
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse));
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:15"));
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:14"));
}

[Test]
Expand All @@ -93,6 +95,8 @@ public void GetUsfmVersificationErrors_MissingChapter()
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors));
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingChapter));
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:15"));
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 0:0"));
}

[Test]
Expand Down Expand Up @@ -128,6 +132,8 @@ public void GetUsfmVersificationErrors_ExtraVerse()
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors));
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse));
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo(""));
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:16"));
}

[Test]
Expand Down Expand Up @@ -161,6 +167,8 @@ public void GetUsfmVersificationErrors_InvalidVerse()
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors));
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.InvalidVerseRange));
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:12-13"));
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:13-12"));
}

[Test]
Expand Down Expand Up @@ -196,6 +204,8 @@ public void GetUsfmVersificationErrors_ExtraVerseSegment()
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
Assert.That(errors, Has.Count.EqualTo(2), JsonSerializer.Serialize(errors));
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerseSegment));
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:14"));
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:14a"));
}

[Test]
Expand Down Expand Up @@ -233,6 +243,8 @@ public void GetUsfmVersificationErrors_MissingVerseSegment()
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors));
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerseSegment));
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("3JN 1:13a"));
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:13"));
}

[Test]
Expand Down Expand Up @@ -289,6 +301,8 @@ public void GetUsfmVersificationErrors_ExtraVerse_ExcludedInCustomVrs()
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors));
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse));
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo(""));
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("3JN 1:13"));
}

[Test]
Expand Down Expand Up @@ -341,6 +355,8 @@ public void GetUsfmVersificationErrors_MultipleBooks()
IReadOnlyList<UsfmVersificationError> errors = env.GetUsfmVersificationErrors();
Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors));
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse));
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("2JN 1:13"));
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("2JN 1:12"));
}

[Test]
Expand Down Expand Up @@ -375,6 +391,10 @@ public void GetUsfmVersificationErrors_MultipleChapters()
Assert.That(errors, Has.Count.EqualTo(2), JsonSerializer.Serialize(errors));
Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse));
Assert.That(errors[1].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse));
Assert.That(errors[0].ExpectedVerseRef, Is.EqualTo("2JN 1:13"));
Assert.That(errors[0].ActualVerseRef, Is.EqualTo("2JN 1:12"));
Assert.That(errors[1].ExpectedVerseRef, Is.EqualTo(""));
Assert.That(errors[1].ActualVerseRef, Is.EqualTo("2JN 2:1"));
}

private class TestEnvironment(ParatextProjectSettings? settings = null, Dictionary<string, string>? files = null)
Expand Down
Loading