Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 0b82cf0

Browse files
committedJan 30, 2025·
Preserve embeddings (fig, fm, notes and cross references)
Only translate ft text Configure preserving / stripping embedded and style markers
1 parent 3a9b17e commit 0b82cf0

10 files changed

+394
-135
lines changed
 

‎src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs

+10-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,9 @@ public string UpdateUsfm(
2323
string bookId,
2424
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)> rows,
2525
string fullName = null,
26-
UpdateUsfmBehavior behavior = UpdateUsfmBehavior.PreferExisting
26+
UpdateUsfmTextBehavior textBehavior = UpdateUsfmTextBehavior.PreferExisting,
27+
UpdateUsfmIntraVerseMarkerBehavior embeddedBehavior = UpdateUsfmIntraVerseMarkerBehavior.Preserve,
28+
UpdateUsfmIntraVerseMarkerBehavior styleBehavior = UpdateUsfmIntraVerseMarkerBehavior.Strip
2729
)
2830
{
2931
string fileName = _settings.GetBookFileName(bookId);
@@ -36,7 +38,13 @@ public string UpdateUsfm(
3638
usfm = reader.ReadToEnd();
3739
}
3840

39-
var handler = new UpdateUsfmParserHandler(rows, fullName is null ? null : $"- {fullName}", behavior);
41+
var handler = new UpdateUsfmParserHandler(
42+
rows,
43+
fullName is null ? null : $"- {fullName}",
44+
textBehavior,
45+
embeddedBehavior,
46+
styleBehavior
47+
);
4048
try
4149
{
4250
UsfmParser.Parse(usfm, handler, _settings.Stylesheet, _settings.Versification);

‎src/SIL.Machine/Corpora/ScriptureRefUsfmParserHandlerBase.cs

+102-21
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
using System.Collections.Generic;
22
using System.Linq;
3+
using SIL.Extensions;
34
using SIL.Scripture;
45

56
namespace SIL.Machine.Corpora
@@ -9,7 +10,8 @@ public enum ScriptureTextType
910
None,
1011
NonVerse,
1112
Verse,
12-
Note
13+
Embedded,
14+
NoteText
1315
}
1416

1517
public abstract class ScriptureRefUsfmParserHandlerBase : UsfmParserHandlerBase
@@ -19,6 +21,9 @@ public abstract class ScriptureRefUsfmParserHandlerBase : UsfmParserHandlerBase
1921
private readonly Stack<ScriptureTextType> _curTextType;
2022
private bool _duplicateVerse = false;
2123

24+
private bool _inEmbedded;
25+
public bool InNoteText { get; private set; }
26+
2227
protected ScriptureRefUsfmParserHandlerBase()
2328
{
2429
_curElements = new Stack<ScriptureElement>();
@@ -59,7 +64,7 @@ string pubNumber
5964
// ignore duplicate verses
6065
_duplicateVerse = true;
6166
}
62-
else if (VerseRef.AreOverlappingVersesRanges(number, _curVerseRef.Verse))
67+
else if (VerseRef.AreOverlappingVersesRanges(verse1: number, verse2: _curVerseRef.Verse))
6368
{
6469
// merge overlapping verse ranges in to one range
6570
VerseRef verseRef = _curVerseRef.Clone();
@@ -153,20 +158,36 @@ public override void EndSidebar(UsfmParserState state, string marker, bool close
153158

154159
public override void StartNote(UsfmParserState state, string marker, string caller, string category)
155160
{
156-
if (CurrentTextType != ScriptureTextType.None && !_duplicateVerse)
161+
_inEmbedded = true;
162+
StartEmbedded(state, marker, caller, category);
163+
}
164+
165+
public override void EndNote(UsfmParserState state, string marker, bool closed)
166+
{
167+
EndNoteText(state);
168+
EndEmbedded(state, marker, null, closed);
169+
_inEmbedded = false;
170+
}
171+
172+
public virtual void StartEmbedded(UsfmParserState state, string marker, string caller, string category)
173+
{
174+
if (_curVerseRef.IsDefault)
175+
UpdateVerseRef(state.VerseRef, marker);
176+
177+
if (!_duplicateVerse)
157178
{
158179
// if we hit a note in a verse paragraph and we aren't in a verse, then start a non-verse segment
159180
CheckConvertVerseParaToNonVerse(state);
160181
NextElement(marker);
161-
StartNoteText(state);
162182
}
163183
}
164184

165-
public override void EndNote(UsfmParserState state, string marker, bool closed)
166-
{
167-
if (CurrentTextType == ScriptureTextType.Note && !_duplicateVerse)
168-
EndNoteText(state);
169-
}
185+
public virtual void EndEmbedded(
186+
UsfmParserState state,
187+
string marker,
188+
IReadOnlyList<UsfmAttribute> attributes,
189+
bool closed
190+
) { }
170191

171192
public override void Text(UsfmParserState state, string text)
172193
{
@@ -187,9 +208,37 @@ public override void StartChar(
187208
IReadOnlyList<UsfmAttribute> attributes
188209
)
189210
{
211+
if (IsEmbeddedPart(markerWithoutPlus))
212+
EndNoteText(state);
213+
190214
// if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse
191215
// segment
192216
CheckConvertVerseParaToNonVerse(state);
217+
218+
if (IsEmbeddedCharacter(markerWithoutPlus))
219+
{
220+
_inEmbedded = true;
221+
StartEmbedded(state, markerWithoutPlus, null, null);
222+
}
223+
224+
if (IsNoteText(markerWithoutPlus))
225+
{
226+
StartNoteText(state);
227+
}
228+
}
229+
230+
public override void EndChar(
231+
UsfmParserState state,
232+
string marker,
233+
IReadOnlyList<UsfmAttribute> attributes,
234+
bool closed
235+
)
236+
{
237+
if (IsEmbeddedCharacter(marker))
238+
{
239+
EndEmbedded(state, marker, attributes, closed);
240+
_inEmbedded = false;
241+
}
193242
}
194243

195244
protected virtual void StartVerseText(UsfmParserState state, IReadOnlyList<ScriptureRef> scriptureRefs) { }
@@ -200,8 +249,25 @@ protected virtual void StartNonVerseText(UsfmParserState state, ScriptureRef scr
200249

201250
protected virtual void EndNonVerseText(UsfmParserState state, ScriptureRef scriptureRef) { }
202251

252+
public virtual void StartNoteText(UsfmParserState state)
253+
{
254+
InNoteText = true;
255+
_curTextType.Push(ScriptureTextType.NoteText);
256+
StartNoteText(state, CreateNonVerseRef());
257+
}
258+
203259
protected virtual void StartNoteText(UsfmParserState state, ScriptureRef scriptureRef) { }
204260

261+
public virtual void EndNoteText(UsfmParserState state)
262+
{
263+
if (_curTextType.Count > 0 && _curTextType.Peek() == ScriptureTextType.NoteText)
264+
{
265+
EndNoteText(state, CreateNonVerseRef());
266+
_curTextType.Pop();
267+
InNoteText = false;
268+
}
269+
}
270+
205271
protected virtual void EndNoteText(UsfmParserState state, ScriptureRef scriptureRef) { }
206272

207273
private void StartVerseText(UsfmParserState state)
@@ -227,22 +293,11 @@ private void StartNonVerseText(UsfmParserState state)
227293

228294
private void EndNonVerseText(UsfmParserState state)
229295
{
296+
EndEmbeddedElements();
230297
EndNonVerseText(state, CreateNonVerseRef());
231298
_curTextType.Pop();
232299
}
233300

234-
private void StartNoteText(UsfmParserState state)
235-
{
236-
_curTextType.Push(ScriptureTextType.Note);
237-
StartNoteText(state, CreateNonVerseRef());
238-
}
239-
240-
private void EndNoteText(UsfmParserState state)
241-
{
242-
EndNoteText(state, CreateNonVerseRef());
243-
_curTextType.Pop();
244-
}
245-
246301
private void UpdateVerseRef(VerseRef verseRef, string marker)
247302
{
248303
if (!VerseRef.AreOverlappingVersesRanges(verseRef, _curVerseRef))
@@ -270,6 +325,12 @@ private void EndParentElement()
270325
_curElements.Pop();
271326
}
272327

328+
private void EndEmbeddedElements()
329+
{
330+
if (_curElements.Count > 0 && IsEmbeddedCharacter(_curElements.Peek().Name))
331+
_curElements.Pop();
332+
}
333+
273334
private IReadOnlyList<ScriptureRef> CreateVerseRefs()
274335
{
275336
return _curVerseRef.HasMultiple
@@ -300,5 +361,25 @@ private void CheckConvertVerseParaToNonVerse(UsfmParserState state)
300361
StartNonVerseText(state);
301362
}
302363
}
364+
365+
public bool InEmbedded(string marker)
366+
{
367+
return _inEmbedded || IsEmbeddedCharacter(marker);
368+
}
369+
370+
private static bool IsNoteText(string marker)
371+
{
372+
return marker == "ft";
373+
}
374+
375+
public static bool IsEmbeddedPart(string marker)
376+
{
377+
return !(marker is null) && marker.Length > 0 && marker[0].IsOneOf('f', 'x', 'z');
378+
}
379+
380+
private static bool IsEmbeddedCharacter(string marker)
381+
{
382+
return marker.IsOneOf("f", "fe", "fig", "fm", "x");
383+
}
303384
}
304385
}

‎src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs

+92-47
Original file line numberDiff line numberDiff line change
@@ -4,40 +4,53 @@
44

55
namespace SIL.Machine.Corpora
66
{
7-
public enum UpdateUsfmBehavior
7+
public enum UpdateUsfmTextBehavior
88
{
99
PreferExisting,
1010
PreferNew,
1111
StripExisting
1212
}
1313

14+
public enum UpdateUsfmIntraVerseMarkerBehavior
15+
{
16+
Preserve,
17+
Strip,
18+
}
19+
1420
/***
1521
* This is a USFM parser handler that can be used to replace the existing text in a USFM file with the specified
1622
* text.
1723
*/
1824
public class UpdateUsfmParserHandler : ScriptureRefUsfmParserHandlerBase
1925
{
26+
private static readonly HashSet<string> UntranslatedParagraphTag = new HashSet<string> { "r", "rem", };
2027
private readonly IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)> _rows;
2128
private readonly List<UsfmToken> _tokens;
2229
private readonly List<UsfmToken> _newTokens;
2330
private readonly string _idText;
24-
private readonly UpdateUsfmBehavior _behavior;
31+
private readonly UpdateUsfmTextBehavior _textBehavior;
32+
private readonly UpdateUsfmIntraVerseMarkerBehavior _embeddedBehavior;
33+
private readonly UpdateUsfmIntraVerseMarkerBehavior _styleBehavior;
2534
private readonly Stack<bool> _replace;
2635
private int _rowIndex;
2736
private int _tokenIndex;
2837

2938
public UpdateUsfmParserHandler(
3039
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)> rows = null,
3140
string idText = null,
32-
UpdateUsfmBehavior behavior = UpdateUsfmBehavior.PreferExisting
41+
UpdateUsfmTextBehavior textBehavior = UpdateUsfmTextBehavior.PreferExisting,
42+
UpdateUsfmIntraVerseMarkerBehavior embeddedBehavior = UpdateUsfmIntraVerseMarkerBehavior.Preserve,
43+
UpdateUsfmIntraVerseMarkerBehavior styleBehavior = UpdateUsfmIntraVerseMarkerBehavior.Strip
3344
)
3445
{
3546
_rows = rows ?? Array.Empty<(IReadOnlyList<ScriptureRef>, string)>();
3647
_tokens = new List<UsfmToken>();
3748
_newTokens = new List<UsfmToken>();
3849
_idText = idText;
3950
_replace = new Stack<bool>();
40-
_behavior = behavior;
51+
_textBehavior = textBehavior;
52+
_embeddedBehavior = embeddedBehavior;
53+
_styleBehavior = styleBehavior;
4154
}
4255

4356
public IReadOnlyList<UsfmToken> Tokens => _tokens;
@@ -176,30 +189,39 @@ bool closed
176189
)
177190
{
178191
// strip out char-style markers in verses that are being replaced
179-
if (closed && ReplaceWithNewTokens(state))
192+
if (ReplaceWithNewTokens(state, closed: closed))
180193
SkipTokens(state);
194+
else
195+
CollectTokens(state);
181196

182197
base.EndChar(state, marker, attributes, closed);
183198
}
184199

185-
public override void StartNote(UsfmParserState state, string marker, string caller, string category)
200+
public override void StartEmbedded(UsfmParserState state, string marker, string caller, string category)
186201
{
187202
// strip out notes in verses that are being replaced
188203
if (ReplaceWithNewTokens(state))
189204
SkipTokens(state);
190205
else
191206
CollectTokens(state);
192207

193-
base.StartNote(state, marker, caller, category);
208+
base.StartEmbedded(state, marker, caller, category);
194209
}
195210

196-
public override void EndNote(UsfmParserState state, string marker, bool closed)
211+
public override void EndEmbedded(
212+
UsfmParserState state,
213+
string marker,
214+
IReadOnlyList<UsfmAttribute> attributes,
215+
bool closed
216+
)
197217
{
198218
// strip out notes in verses that are being replaced
199-
if (closed && ReplaceWithNewTokens(state))
219+
if (ReplaceWithNewTokens(state, closed: closed))
200220
SkipTokens(state);
221+
else
222+
CollectTokens(state);
201223

202-
base.EndNote(state, marker, closed);
224+
base.EndEmbedded(state, marker, attributes, closed);
203225
}
204226

205227
public override void Ref(UsfmParserState state, string marker, string display, string target)
@@ -271,25 +293,7 @@ protected override void EndNonVerseText(UsfmParserState state, ScriptureRef scri
271293
protected override void StartNoteText(UsfmParserState state, ScriptureRef scriptureRef)
272294
{
273295
IReadOnlyList<string> rowTexts = AdvanceRows(new[] { scriptureRef });
274-
var newTokens = new List<UsfmToken>();
275-
if (rowTexts.Count > 0)
276-
{
277-
newTokens.Add(state.Token);
278-
newTokens.Add(new UsfmToken(UsfmTokenType.Character, "ft", null, "ft*"));
279-
for (int i = 0; i < rowTexts.Count; i++)
280-
{
281-
string text = rowTexts[i];
282-
if (i < rowTexts.Count - 1)
283-
text += " ";
284-
newTokens.Add(new UsfmToken(text));
285-
}
286-
newTokens.Add(new UsfmToken(UsfmTokenType.End, state.Token.EndMarker, null, null));
287-
PushNewTokens(newTokens);
288-
}
289-
else
290-
{
291-
PushTokensAsPrevious();
292-
}
296+
PushNewTokens(rowTexts.Select(t => new UsfmToken(t + " ")));
293297
}
294298

295299
protected override void EndNoteText(UsfmParserState state, ScriptureRef scriptureRef)
@@ -362,29 +366,58 @@ private void SkipTokens(UsfmParserState state)
362366
_tokenIndex = state.Index + 1 + state.SpecialTokenCount;
363367
}
364368

365-
private bool ReplaceWithNewTokens(UsfmParserState state)
369+
private bool ReplaceWithNewTokens(UsfmParserState state, bool closed = true)
366370
{
367-
bool newText = _replace.Count > 0 && _replace.Peek();
368-
int tokenEnd = state.Index + state.SpecialTokenCount;
369-
bool existingText = false;
370-
for (int index = _tokenIndex; index <= tokenEnd; index++)
371+
bool untranslatableParagraph =
372+
state.ParaTag?.Marker != null && IsUntranslatedParagraph(state.ParaTag.Marker);
373+
if (_textBehavior == UpdateUsfmTextBehavior.StripExisting)
371374
{
372-
if (state.Tokens[index].Type == UsfmTokenType.Text && state.Tokens[index].Text.Length > 0)
373-
{
374-
existingText = true;
375-
break;
376-
}
375+
if (untranslatableParagraph)
376+
ClearNewTokens();
377+
else
378+
AddNewTokens();
379+
return true;
377380
}
381+
382+
bool newText = _replace.Count > 0 && _replace.Peek();
383+
bool inEmbedded = InEmbedded(state.Token.Marker);
384+
bool isStyleTag = state.Token.Marker != null && !IsEmbeddedPart(state.Token.Marker);
385+
386+
bool existingText = state
387+
.Tokens.Skip(_tokenIndex)
388+
.Take(state.Index + 1 + state.SpecialTokenCount - _tokenIndex)
389+
.Any(t => t.Type == UsfmTokenType.Text && t.Text.Length > 0);
390+
378391
bool useNewTokens =
379-
_behavior == UpdateUsfmBehavior.StripExisting
380-
|| (newText && !existingText)
381-
|| (newText && _behavior == UpdateUsfmBehavior.PreferNew);
392+
!untranslatableParagraph
393+
&& newText
394+
&& (!existingText || _textBehavior == UpdateUsfmTextBehavior.PreferNew)
395+
&& (!inEmbedded || InNoteText);
382396

383397
if (useNewTokens)
384-
_tokens.AddRange(_newTokens);
398+
AddNewTokens();
385399

386-
_newTokens.Clear();
387-
return useNewTokens;
400+
if (untranslatableParagraph || (existingText && _textBehavior == UpdateUsfmTextBehavior.PreferExisting))
401+
ClearNewTokens();
402+
403+
// figure out when to skip the existing text
404+
bool withinNewText = _replace.Any(r => r);
405+
if (withinNewText && inEmbedded)
406+
{
407+
if (_embeddedBehavior == UpdateUsfmIntraVerseMarkerBehavior.Strip)
408+
return true;
409+
410+
if (!InNoteText)
411+
return false;
412+
}
413+
414+
bool skipTokens = useNewTokens && closed;
415+
416+
if (newText && isStyleTag)
417+
{
418+
skipTokens = _styleBehavior == UpdateUsfmIntraVerseMarkerBehavior.Strip;
419+
}
420+
return skipTokens;
388421
}
389422

390423
private void PushNewTokens(IEnumerable<UsfmToken> tokens)
@@ -393,14 +426,26 @@ private void PushNewTokens(IEnumerable<UsfmToken> tokens)
393426
_newTokens.AddRange(tokens);
394427
}
395428

396-
private void PushTokensAsPrevious()
429+
private void AddNewTokens()
397430
{
398-
_replace.Push(_replace.Peek());
431+
if (_newTokens.Count > 0)
432+
_tokens.AddRange(_newTokens);
433+
_newTokens.Clear();
434+
}
435+
436+
private void ClearNewTokens()
437+
{
438+
_newTokens.Clear();
399439
}
400440

401441
private void PopNewTokens()
402442
{
403443
_replace.Pop();
404444
}
445+
446+
public static bool IsUntranslatedParagraph(string tag)
447+
{
448+
return !(tag is null) && UntranslatedParagraphTag.Contains(tag);
449+
}
405450
}
406451
}

‎src/SIL.Machine/Corpora/UsfmTextBase.cs

+3
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,9 @@ public override void Text(UsfmParserState state, string text)
258258
}
259259
else if (text.Length > 0 && (CurrentTextType != ScriptureTextType.Verse || state.IsVerseText))
260260
{
261+
if (InEmbedded(state.Token.Marker) && !InNoteText)
262+
return;
263+
261264
if (
262265
state.PrevToken?.Type == UsfmTokenType.End
263266
&& (rowText.Length == 0 || char.IsWhiteSpace(rowText[rowText.Length - 1]))

‎tests/SIL.Machine.Tests/Corpora/TestData/usfm/Tes/41MATTes.SFM

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,19 @@
11
\id MAT - Test
2-
\f + \fr 1.0 \ft \f*
32
\h Matthew
43
\mt Matthew
5-
\ip An introduction to Matthew\fe + \ft This is an endnote.\fe*
4+
\ip An introduction to Matthew with an empty comment\fe + \ft \fe*
65
\p \rq MAT 1\rq* Here is another paragraph.
76
\p and with a \w keyword|a special concept\w* in it.
87
\p and a \weirdtaglookingthing that is not an actual tag.
98
\c 1
109
\s Chapter One
11-
\v 1 Chapter \pn one\+pro WON\+pro*\pn*, verse one.\f + \fr 1:1: \ft This is a footnote.\f*
10+
\v 1 Chapter \pn one\+pro WON\+pro*\pn*, verse \f + \fr 1:1: \ft This is a footnote for v1.\f*one.
1211
\li1
1312
\v 2 \bd C\bd*hapter one,
14-
\li2 verse\f + \fr 1:2: \ft This is a footnote.\f* two.
13+
\li2 verse\f + \fr 1:2: \ft This is a footnote for v2.\f* two.
1514
\v 3 Chapter one \w*,
1615
\li2 verse three.
17-
\v 4 Chapter one,
16+
\v 4 Chapter one,
1817
\li2 verse four,
1918
\v 5 Chapter one,
2019
\li2 verse \fig Figure 1|src="image1.png" size="col" ref="1:5"\fig* five.
@@ -27,7 +26,7 @@
2726
\s1 Chapter \it Two \it*
2827
\p
2928
\p
30-
\v 1 Chapter \add two\add*, verse \f + \fr 2:1: \ft This is a footnote.\f*one.
29+
\v 1 Chapter \add two\add*, verse \f + \fr 2:1: \ft This is a \bd footnote.\bd*\f*one.
3130
\v 2-3 Chapter two, // verse \fm ∆\fm*two.
3231
\esb
3332
\ms This is a sidebar
@@ -38,9 +37,9 @@
3837
\p
3938
\v 6 Chapter two, verse \w six|strong="12345" \w*.
4039
\p
41-
\v 6 Bad verse. \x - \xo abc\xt 123\x* and more content.
40+
\v 6 Bad verse. \x - \xo 2:3-4 \xt Cool Book 3:24 \xta The annotation \x* and more content.
4241
\p
43-
\v 5 Chapter two, verse five \rq (MAT 3:1)\rq*.
42+
\v 5 Chapter two, verse five\rq (MAT 3:1)\rq*.
4443
\v 7a Chapter two, verse seven A,
4544
\s Section header \ts-s\*
4645
\p
@@ -55,6 +54,7 @@
5554
\v 11-12
5655
\restore restore information
5756
\c 3
57+
\r (Mark 1:2-3; Luke 4:5-6)
5858
\cl PSALM 3
5959
\s1 Section 1
6060
\mt1 Major Title 1

‎tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs

+110-21
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,12 @@ public void GetUsfm_Verse_CharStyle()
1515

1616
string target = UpdateUsfm(rows);
1717
Assert.That(target, Contains.Substring("\\id MAT - Test\r\n"));
18-
Assert.That(target, Contains.Substring("\\v 1 First verse of the first chapter.\r\n"));
18+
Assert.That(
19+
target,
20+
Contains.Substring(
21+
"\\v 1 First verse of the first chapter. \\f + \\fr 1:1: \\ft This is a footnote for v1.\\f*\r\n\\li1\r\n\\v 2"
22+
)
23+
);
1924
}
2025

2126
[Test]
@@ -28,7 +33,7 @@ public void GetUsfm_IdText()
2833
[Test]
2934
public void GetUsfm_StripAllText()
3035
{
31-
string target = UpdateUsfm(behavior: UpdateUsfmBehavior.StripExisting);
36+
string target = UpdateUsfm(textBehavior: UpdateUsfmTextBehavior.StripExisting);
3237
Assert.That(target, Contains.Substring("\\id MAT\r\n"));
3338
Assert.That(target, Contains.Substring("\\v 1\r\n"));
3439
Assert.That(target, Contains.Substring("\\s\r\n"));
@@ -43,7 +48,7 @@ public void GetUsfm_PreferExisting()
4348
(ScrRef("MAT 1:6"), "Text 6"),
4449
(ScrRef("MAT 1:7"), "Text 7"),
4550
};
46-
string target = UpdateUsfm(rows, behavior: UpdateUsfmBehavior.PreferExisting);
51+
string target = UpdateUsfm(rows, textBehavior: UpdateUsfmTextBehavior.PreferExisting);
4752
Assert.That(target, Contains.Substring("\\id MAT - Test\r\n"));
4853
Assert.That(target, Contains.Substring("\\v 6 Verse 6 content.\r\n"));
4954
Assert.That(target, Contains.Substring("\\v 7 Text 7\r\n"));
@@ -57,26 +62,43 @@ public void GetUsfm_PreferRows()
5762
(ScrRef("MAT 1:6"), "Text 6"),
5863
(ScrRef("MAT 1:7"), "Text 7"),
5964
};
60-
string target = UpdateUsfm(rows, behavior: UpdateUsfmBehavior.PreferNew);
65+
string target = UpdateUsfm(rows, textBehavior: UpdateUsfmTextBehavior.PreferNew);
6166
Assert.That(target, Contains.Substring("\\id MAT - Test\r\n"));
6267
Assert.That(target, Contains.Substring("\\v 6 Text 6\r\n"));
6368
Assert.That(target, Contains.Substring("\\v 7 Text 7\r\n"));
6469
}
6570

6671
[Test]
67-
public void GetUsfm_Verse_SkipNote()
72+
public void GetUsfm_Verse_StripNote()
6873
{
6974
var rows = new List<(IReadOnlyList<ScriptureRef>, string)>
7075
{
7176
(ScrRef("MAT 2:1"), "First verse of the second chapter.")
7277
};
7378

74-
string target = UpdateUsfm(rows);
79+
string target = UpdateUsfm(rows, embeddedBehavior: UpdateUsfmIntraVerseMarkerBehavior.Strip);
7580
Assert.That(target, Contains.Substring("\\v 1 First verse of the second chapter.\r\n"));
7681
}
7782

7883
[Test]
79-
public void GetUsfm_Verse_ReplaceNote()
84+
public void GetUsfm_Verse_StripNotesWithUpdatedVerseText()
85+
{
86+
var rows = new List<(IReadOnlyList<ScriptureRef>, string)>
87+
{
88+
(ScrRef("MAT 1:1"), "First verse of the first chapter.")
89+
};
90+
91+
string target = UpdateUsfm(rows, embeddedBehavior: UpdateUsfmIntraVerseMarkerBehavior.Strip);
92+
Assert.That(target, Contains.Substring("\\id MAT - Test\r\n"));
93+
Assert.That(
94+
target,
95+
Contains.Substring("\\ip An introduction to Matthew with an empty comment\\fe + \\ft \\fe*")
96+
);
97+
Assert.That(target, Contains.Substring("\\v 1 First verse of the first chapter.\r\n\\li1\r\n\\v 2"));
98+
}
99+
100+
[Test]
101+
public void GetUsfm_Verse_ReplaceNoteKeepReference()
80102
{
81103
var rows = new List<(IReadOnlyList<ScriptureRef>, string)>
82104
{
@@ -87,7 +109,45 @@ public void GetUsfm_Verse_ReplaceNote()
87109
string target = UpdateUsfm(rows);
88110
Assert.That(
89111
target,
90-
Contains.Substring("\\v 1 First verse of the second chapter. \\f + \\ft This is a new footnote.\\f*\r\n")
112+
Contains.Substring(
113+
"\\v 1 First verse of the second chapter. \\f + \\fr 2:1: \\ft This is a new footnote. \\f*\r\n"
114+
)
115+
);
116+
}
117+
118+
[Test]
119+
public void GetUsfm_Verse_PreserveFiguresAndReferences()
120+
{
121+
var rows = new List<(IReadOnlyList<ScriptureRef>, string)>
122+
{
123+
// fig
124+
(ScrRef("MAT 1:5"), "Fifth verse of the first chapter."),
125+
(ScrRef("MAT 1:5/1:fig"), "figure text not updated"),
126+
// r
127+
(ScrRef("MAT 2:0/1:r"), "parallel reference not updated"),
128+
// rq
129+
(ScrRef("MAT 2:5/1:rq"), "quote reference not updated"),
130+
// xo
131+
(ScrRef("MAT 2:6/3:xo"), "Cross reference not update"),
132+
// xt
133+
(ScrRef("MAT 2:6/4:xt"), "cross reference - target reference not updated"),
134+
// xta
135+
(ScrRef("MAT 2:6/5:xta"), "cross reference annotation updated"),
136+
};
137+
138+
string target = UpdateUsfm(rows);
139+
Assert.That(
140+
target,
141+
Contains.Substring(
142+
"\\v 5 Fifth verse of the first chapter.\r\n\\li2 \\fig Figure 1|src=\"image1.png\" size=\"col\" ref=\"1:5\"\\fig*\r\n\\v 6"
143+
)
144+
);
145+
Assert.That(target, Contains.Substring("\\r (Mark 1:2-3; Luke 4:5-6)\r\n"));
146+
Assert.That(
147+
target,
148+
Contains.Substring(
149+
"\\v 6 Bad verse. \\x - \\xo 2:3-4 \\xt Cool Book 3:24 \\xta The annotation \\x* and more content.\r\n"
150+
)
91151
);
92152
}
93153

@@ -100,7 +160,12 @@ public void GetUsfm_Verse_RowVerseSegment()
100160
};
101161

102162
string target = UpdateUsfm(rows);
103-
Assert.That(target, Contains.Substring("\\v 1 First verse of the second chapter.\r\n"));
163+
Assert.That(
164+
target,
165+
Contains.Substring(
166+
"\\v 1 First verse of the second chapter. \\f + \\fr 2:1: \\ft This is a \\bd footnote.\\bd*\\f*\r\n"
167+
)
168+
);
104169
}
105170

106171
[Test]
@@ -124,7 +189,12 @@ public void GetUsfm_Verse_MultipleParas()
124189
};
125190

126191
string target = UpdateUsfm(rows);
127-
Assert.That(target, Contains.Substring("\\v 2 Second verse of the first chapter.\r\n\\li2\r\n"));
192+
Assert.That(
193+
target,
194+
Contains.Substring(
195+
"\\v 2 Second verse of the first chapter.\r\n\\li2 \\f + \\fr 1:2: \\ft This is a footnote for v2.\\f*"
196+
)
197+
);
128198
}
129199

130200
[Test]
@@ -200,7 +270,7 @@ public void GetUsfm_MergeVerseSegments()
200270
};
201271

202272
string target = UpdateUsfm(rows);
203-
Assert.That(target, Contains.Substring("\\v 2-3 Verse 2. Verse 2a. Verse 2b.\r\n"));
273+
Assert.That(target, Contains.Substring("\\v 2-3 Verse 2. Verse 2a. Verse 2b. \\fm ∆\\fm*\r\n"));
204274
}
205275

206276
[Test]
@@ -212,7 +282,7 @@ public void GetUsfm_Verse_OptBreak()
212282
(ScrRef("MAT 2:3"), "Third verse of the second chapter.")
213283
};
214284

215-
string target = UpdateUsfm(rows);
285+
string target = UpdateUsfm(rows, embeddedBehavior: UpdateUsfmIntraVerseMarkerBehavior.Strip);
216286
Assert.That(
217287
target,
218288
Contains.Substring("\\v 2-3 Second verse of the second chapter. Third verse of the second chapter.\r\n")
@@ -278,7 +348,12 @@ public void GetUsfm_NonVerse_Relaxed()
278348

279349
string target = UpdateUsfm(rows);
280350
Assert.That(target, Contains.Substring("\\s The first chapter.\r\n"));
281-
Assert.That(target, Contains.Substring("\\v 1 First verse of the first chapter.\r\n"));
351+
Assert.That(
352+
target,
353+
Contains.Substring(
354+
"\\v 1 First verse of the first chapter. \\f + \\fr 1:1: \\ft This is a footnote for v1.\\f*\r\n"
355+
)
356+
);
282357
Assert.That(
283358
target,
284359
Contains.Substring("\\tr \\tc1 The first cell of the table. \\tc2 The second cell of the table.\r\n")
@@ -294,7 +369,7 @@ public void GetUsfm_NonVerse_Sidebar()
294369
{
295370
var rows = new List<(IReadOnlyList<ScriptureRef>, string)>
296371
{
297-
(ScrRef("MAT 2:3/1:esb/1:ms"), "The first paragraph of the sidebar.")
372+
(ScrRef("MAT 2:3/2:esb/1:ms"), "The first paragraph of the sidebar.")
298373
};
299374

300375
string target = UpdateUsfm(rows);
@@ -326,7 +401,7 @@ public void GetUsfm_NonVerse_OptBreak()
326401
{
327402
var rows = new List<(IReadOnlyList<ScriptureRef>, string)>
328403
{
329-
(ScrRef("MAT 2:3/1:esb/2:p"), "The second paragraph of the sidebar.")
404+
(ScrRef("MAT 2:3/2:esb/2:p"), "The second paragraph of the sidebar.")
330405
};
331406

332407
string target = UpdateUsfm(rows);
@@ -345,6 +420,18 @@ public void GetUsfm_NonVerse_Milestone()
345420
Assert.That(target, Contains.Substring("\\s A new section header. \\ts-s\\*\r\n"));
346421
}
347422

423+
[Test]
424+
public void GetUsfm_NonVerse_KeepNote()
425+
{
426+
var rows = new List<(IReadOnlyList<ScriptureRef>, string)>
427+
{
428+
(ScrRef("MAT 1:0/3:ip"), "The introductory paragraph.")
429+
};
430+
431+
string target = UpdateUsfm(rows, embeddedBehavior: UpdateUsfmIntraVerseMarkerBehavior.Preserve);
432+
Assert.That(target, Contains.Substring("\\ip The introductory paragraph. \\fe + \\ft \\fe*\r\n"));
433+
}
434+
348435
[Test]
349436
public void GetUsfm_NonVerse_SkipNote()
350437
{
@@ -353,7 +440,7 @@ public void GetUsfm_NonVerse_SkipNote()
353440
(ScrRef("MAT 1:0/3:ip"), "The introductory paragraph.")
354441
};
355442

356-
string target = UpdateUsfm(rows);
443+
string target = UpdateUsfm(rows, embeddedBehavior: UpdateUsfmIntraVerseMarkerBehavior.Strip);
357444
Assert.That(target, Contains.Substring("\\ip The introductory paragraph.\r\n"));
358445
}
359446

@@ -369,7 +456,7 @@ public void GetUsfm_NonVerse_ReplaceNote()
369456
string target = UpdateUsfm(rows);
370457
Assert.That(
371458
target,
372-
Contains.Substring("\\ip The introductory paragraph. \\fe + \\ft This is a new endnote.\\fe*\r\n")
459+
Contains.Substring("\\ip The introductory paragraph. \\fe + \\ft This is a new endnote. \\fe*\r\n")
373460
);
374461
}
375462

@@ -426,7 +513,7 @@ public void GetUsfm_Verse_PretranslationsBeforeText()
426513
};
427514

428515
string target = UpdateUsfm(rows);
429-
Assert.That(target, Contains.Substring("\\ip The introductory paragraph.\r\n"));
516+
Assert.That(target, Contains.Substring("\\ip The introductory paragraph. \\fe + \\ft \\fe*\r\n"));
430517
}
431518

432519
private static ScriptureRef[] ScrRef(params string[] refs)
@@ -438,18 +525,20 @@ private static string UpdateUsfm(
438525
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)>? rows = null,
439526
string? source = null,
440527
string? idText = null,
441-
UpdateUsfmBehavior behavior = UpdateUsfmBehavior.PreferNew
528+
UpdateUsfmTextBehavior textBehavior = UpdateUsfmTextBehavior.PreferNew,
529+
UpdateUsfmIntraVerseMarkerBehavior embeddedBehavior = UpdateUsfmIntraVerseMarkerBehavior.Preserve,
530+
UpdateUsfmIntraVerseMarkerBehavior styleBehavior = UpdateUsfmIntraVerseMarkerBehavior.Strip
442531
)
443532
{
444533
if (source is null)
445534
{
446535
var updater = new FileParatextProjectTextUpdater(CorporaTestHelpers.UsfmTestProjectPath);
447-
return updater.UpdateUsfm("MAT", rows, idText, behavior);
536+
return updater.UpdateUsfm("MAT", rows, idText, textBehavior, embeddedBehavior, styleBehavior);
448537
}
449538
else
450539
{
451540
source = source.Trim().ReplaceLineEndings("\r\n") + "\r\n";
452-
var updater = new UpdateUsfmParserHandler(rows, idText, behavior);
541+
var updater = new UpdateUsfmParserHandler(rows, idText, textBehavior, embeddedBehavior, styleBehavior);
453542
UsfmParser.Parse(source, updater);
454543
return updater.GetUsfm();
455544
}

‎tests/SIL.Machine.Tests/Corpora/UsfmFileTextTests.cs

+23-19
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ public void GetRows_NonEmptyText_AllText()
7373

7474
IText text = corpus["MAT"];
7575
TextRow[] rows = text.GetRows().ToArray();
76-
Assert.That(rows, Has.Length.EqualTo(50));
76+
Assert.That(rows, Has.Length.EqualTo(51));
7777

7878
Assert.That(rows[0].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:h", corpus.Versification)));
7979
Assert.That(rows[0].Text, Is.EqualTo("Matthew"));
@@ -82,10 +82,10 @@ public void GetRows_NonEmptyText_AllText()
8282
Assert.That(rows[1].Text, Is.EqualTo("Matthew"));
8383

8484
Assert.That(rows[2].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/3:ip", corpus.Versification)));
85-
Assert.That(rows[2].Text, Is.EqualTo("An introduction to Matthew"));
85+
Assert.That(rows[2].Text, Is.EqualTo("An introduction to Matthew with an empty comment"));
8686

8787
Assert.That(rows[3].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/3:ip/1:fe", corpus.Versification)));
88-
Assert.That(rows[3].Text, Is.EqualTo("This is an endnote."));
88+
Assert.That(rows[3].Text, Is.EqualTo(""));
8989

9090
Assert.That(rows[4].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/4:p", corpus.Versification)));
9191
Assert.That(rows[4].Text, Is.EqualTo("MAT 1 Here is another paragraph."));
@@ -100,10 +100,10 @@ public void GetRows_NonEmptyText_AllText()
100100
Assert.That(rows[8].Text, Is.EqualTo("Chapter One"));
101101

102102
Assert.That(rows[10].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:1/1:f", corpus.Versification)));
103-
Assert.That(rows[10].Text, Is.EqualTo("1:1: This is a footnote."));
103+
Assert.That(rows[10].Text, Is.EqualTo("This is a footnote for v1."));
104104

105105
Assert.That(rows[12].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:2/1:f", corpus.Versification)));
106-
Assert.That(rows[12].Text, Is.EqualTo("1:2: This is a footnote."));
106+
Assert.That(rows[12].Text, Is.EqualTo("This is a footnote for v2."));
107107

108108
Assert.That(rows[19].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:0/1:tr/1:tc1", corpus.Versification)));
109109
Assert.That(rows[19].Text, Is.EqualTo("Row one, column one."));
@@ -124,12 +124,12 @@ public void GetRows_NonEmptyText_AllText()
124124
Assert.That(rows[24].Text, Is.Empty);
125125

126126
Assert.That(rows[26].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:1/1:f", corpus.Versification)));
127-
Assert.That(rows[26].Text, Is.EqualTo("2:1: This is a footnote."));
127+
Assert.That(rows[26].Text, Is.EqualTo("This is a footnote."));
128128

129-
Assert.That(rows[29].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:3/1:esb/1:ms", corpus.Versification)));
129+
Assert.That(rows[29].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:3/2:esb/1:ms", corpus.Versification)));
130130
Assert.That(rows[29].Text, Is.EqualTo("This is a sidebar"));
131131

132-
Assert.That(rows[30].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:3/1:esb/2:p", corpus.Versification)));
132+
Assert.That(rows[30].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:3/2:esb/2:p", corpus.Versification)));
133133
Assert.That(rows[30].Text, Is.EqualTo("Here is some sidebar content."));
134134

135135
Assert.That(rows[36].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:7a/1:s", corpus.Versification)));
@@ -149,7 +149,7 @@ public void GetRows_SentenceStart()
149149
Assert.That(rows, Has.Length.EqualTo(24));
150150

151151
Assert.That(rows[3].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:4", corpus.Versification)));
152-
Assert.That(rows[3].Text, Is.EqualTo("Chapter one,verse four,"));
152+
Assert.That(rows[3].Text, Is.EqualTo("Chapter one, verse four,"));
153153
Assert.That(rows[3].IsSentenceStart, Is.True);
154154

155155
Assert.That(rows[4].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:5", corpus.Versification)));
@@ -184,13 +184,15 @@ public void GetRows_IncludeMarkers()
184184
Assert.That(rows[0].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:1", corpus.Versification)));
185185
Assert.That(
186186
rows[0].Text,
187-
Is.EqualTo("Chapter \\pn one\\+pro WON\\+pro*\\pn*, verse one.\\f + \\fr 1:1: \\ft This is a footnote.\\f*")
187+
Is.EqualTo(
188+
"Chapter \\pn one\\+pro WON\\+pro*\\pn*, verse \\f + \\fr 1:1: \\ft This is a footnote for v1.\\f*one."
189+
)
188190
);
189191

190192
Assert.That(rows[1].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:2", corpus.Versification)));
191193
Assert.That(
192194
rows[1].Text,
193-
Is.EqualTo("\\bd C\\bd*hapter one, \\li2 verse\\f + \\fr 1:2: \\ft This is a footnote.\\f* two.")
195+
Is.EqualTo("\\bd C\\bd*hapter one, \\li2 verse\\f + \\fr 1:2: \\ft This is a footnote for v2.\\f* two.")
194196
);
195197

196198
Assert.That(rows[4].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:5", corpus.Versification)));
@@ -204,7 +206,7 @@ public void GetRows_IncludeMarkers()
204206
Assert.That(rows[8].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:1", corpus.Versification)));
205207
Assert.That(
206208
rows[8].Text,
207-
Is.EqualTo("Chapter \\add two\\add*, verse \\f + \\fr 2:1: \\ft This is a footnote.\\f*one.")
209+
Is.EqualTo("Chapter \\add two\\add*, verse \\f + \\fr 2:1: \\ft This is a \\bd footnote.\\bd*\\f*one.")
208210
);
209211

210212
Assert.That(rows[9].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:2", corpus.Versification)));
@@ -229,7 +231,7 @@ public void GetRows_IncludeMarkers()
229231
Assert.That(rows[12].Text, Is.EqualTo("Chapter two, verse four."));
230232

231233
Assert.That(rows[13].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:5", corpus.Versification)));
232-
Assert.That(rows[13].Text, Is.EqualTo("Chapter two, verse five \\rq (MAT 3:1)\\rq*."));
234+
Assert.That(rows[13].Text, Is.EqualTo("Chapter two, verse five\\rq (MAT 3:1)\\rq*."));
233235

234236
Assert.That(rows[14].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:6", corpus.Versification)));
235237
Assert.That(rows[14].Text, Is.EqualTo("Chapter two, verse \\w six|strong=\"12345\" \\w*."));
@@ -254,21 +256,23 @@ public void GetRows_IncludeMarkers_AllText()
254256

255257
IText text = corpus["MAT"];
256258
TextRow[] rows = text.GetRows().ToArray();
257-
Assert.That(rows, Has.Length.EqualTo(46));
259+
Assert.That(rows, Has.Length.EqualTo(47));
258260

259261
Assert.That(rows[2].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/3:ip", corpus.Versification)));
260-
Assert.That(rows[2].Text, Is.EqualTo("An introduction to Matthew\\fe + \\ft This is an endnote.\\fe*"));
262+
Assert.That(rows[2].Text, Is.EqualTo("An introduction to Matthew with an empty comment\\fe + \\ft \\fe*"));
261263

262264
Assert.That(rows[8].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:1", corpus.Versification)));
263265
Assert.That(
264266
rows[8].Text,
265-
Is.EqualTo("Chapter \\pn one\\+pro WON\\+pro*\\pn*, verse one.\\f + \\fr 1:1: \\ft This is a footnote.\\f*")
267+
Is.EqualTo(
268+
"Chapter \\pn one\\+pro WON\\+pro*\\pn*, verse \\f + \\fr 1:1: \\ft This is a footnote for v1.\\f*one."
269+
)
266270
);
267271

268272
Assert.That(rows[9].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:2", corpus.Versification)));
269273
Assert.That(
270274
rows[9].Text,
271-
Is.EqualTo("\\bd C\\bd*hapter one, \\li2 verse\\f + \\fr 1:2: \\ft This is a footnote.\\f* two.")
275+
Is.EqualTo("\\bd C\\bd*hapter one, \\li2 verse\\f + \\fr 1:2: \\ft This is a footnote for v2.\\f* two.")
272276
);
273277

274278
Assert.That(rows[12].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:5", corpus.Versification)));
@@ -285,10 +289,10 @@ public void GetRows_IncludeMarkers_AllText()
285289
Assert.That(rows[22].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:1", corpus.Versification)));
286290
Assert.That(
287291
rows[22].Text,
288-
Is.EqualTo("Chapter \\add two\\add*, verse \\f + \\fr 2:1: \\ft This is a footnote.\\f*one.")
292+
Is.EqualTo("Chapter \\add two\\add*, verse \\f + \\fr 2:1: \\ft This is a \\bd footnote.\\bd*\\f*one.")
289293
);
290294

291-
Assert.That(rows[26].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:3/1:esb/2:p", corpus.Versification)));
295+
Assert.That(rows[26].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 2:3/2:esb/2:p", corpus.Versification)));
292296
Assert.That(rows[26].Text, Is.EqualTo("Here is some sidebar // content."));
293297
}
294298
}

‎tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs

+6-2
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,11 @@ string sfmFileName in Directory
4949
string bookId;
5050
if (!targetSettings.IsBookFileName(sfmFileName, out bookId))
5151
continue;
52-
string newUsfm = updater.UpdateUsfm(bookId, pretranslations, behavior: UpdateUsfmBehavior.StripExisting);
52+
string newUsfm = updater.UpdateUsfm(
53+
bookId,
54+
pretranslations,
55+
textBehavior: UpdateUsfmTextBehavior.StripExisting
56+
);
5357
Assert.That(newUsfm, Is.Not.Null);
5458
}
5559
}
@@ -150,7 +154,7 @@ async Task GetUsfmAsync(string projectPath)
150154
string newUsfm = updater.UpdateUsfm(
151155
bookId,
152156
pretranslations,
153-
behavior: UpdateUsfmBehavior.StripExisting
157+
textBehavior: UpdateUsfmTextBehavior.StripExisting
154158
);
155159
Assert.That(newUsfm, Is.Not.Null);
156160
}

‎tests/SIL.Machine.Tests/Corpora/UsfmMemoryTextTests.cs

+26-1
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,32 @@ public void GetRows_VersePara_BeginningNonVerseSegment()
148148
includeAllText: true
149149
);
150150

151-
Assert.That(rows, Has.Length.EqualTo(4), string.Join(",", rows.Select(tr => tr.Text)));
151+
Assert.That(rows, Has.Length.EqualTo(5), string.Join(",", rows.Select(tr => tr.Text)));
152+
Assert.That(rows[0].Text, Is.EqualTo(""));
153+
Assert.That(rows[0].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:q1")));
154+
Assert.That(rows[1].Text, Is.EqualTo("World"));
155+
Assert.That(rows[1].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:q1/1:f")));
156+
}
157+
158+
[Test]
159+
public void GetRows_VersePara_CommentFirst()
160+
{
161+
TextRow[] rows = GetRows(
162+
@"\id MAT - Test
163+
\f \fr 119 \ft World \f*
164+
\ip This is a comment
165+
\c 1
166+
\v 1 First verse in line!?!
167+
\c 2
168+
",
169+
includeAllText: true
170+
);
171+
172+
Assert.That(rows[0].Text, Is.EqualTo("World"));
173+
Assert.That(rows[0].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/1:f")));
174+
Assert.That(rows[1].Text, Is.EqualTo("This is a comment"));
175+
Assert.That(rows[1].Ref, Is.EqualTo(ScriptureRef.Parse("MAT 1:0/2:ip")));
176+
Assert.That(rows, Has.Length.EqualTo(3), string.Join(",", rows.Select(tr => tr.Text)));
152177
}
153178

154179
[Test]

‎tests/SIL.Machine.Tests/Corpora/UsfmTokenizerTests.cs

+14-14
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,22 @@ public void Tokenize()
1919
Assert.That(tokens[0].LineNumber, Is.EqualTo(1));
2020
Assert.That(tokens[0].ColumnNumber, Is.EqualTo(1));
2121

22-
Assert.That(tokens[37].Type, Is.EqualTo(UsfmTokenType.Text));
23-
Assert.That(tokens[37].Text, Is.EqualTo("Chapter One "));
24-
Assert.That(tokens[37].LineNumber, Is.EqualTo(10));
25-
Assert.That(tokens[37].ColumnNumber, Is.EqualTo(4));
22+
Assert.That(tokens[30].Type, Is.EqualTo(UsfmTokenType.Text));
23+
Assert.That(tokens[30].Text, Is.EqualTo("Chapter One "));
24+
Assert.That(tokens[30].LineNumber, Is.EqualTo(9));
25+
Assert.That(tokens[30].ColumnNumber, Is.EqualTo(4));
2626

27-
Assert.That(tokens[38].Type, Is.EqualTo(UsfmTokenType.Verse));
28-
Assert.That(tokens[38].Marker, Is.EqualTo("v"));
29-
Assert.That(tokens[38].Data, Is.EqualTo("1"));
30-
Assert.That(tokens[38].LineNumber, Is.EqualTo(11));
31-
Assert.That(tokens[38].ColumnNumber, Is.EqualTo(1));
27+
Assert.That(tokens[31].Type, Is.EqualTo(UsfmTokenType.Verse));
28+
Assert.That(tokens[31].Marker, Is.EqualTo("v"));
29+
Assert.That(tokens[31].Data, Is.EqualTo("1"));
30+
Assert.That(tokens[31].LineNumber, Is.EqualTo(10));
31+
Assert.That(tokens[31].ColumnNumber, Is.EqualTo(1));
3232

33-
Assert.That(tokens[47].Type, Is.EqualTo(UsfmTokenType.Note));
34-
Assert.That(tokens[47].Marker, Is.EqualTo("f"));
35-
Assert.That(tokens[47].Data, Is.EqualTo("+"));
36-
Assert.That(tokens[47].LineNumber, Is.EqualTo(11));
37-
Assert.That(tokens[47].ColumnNumber, Is.EqualTo(52));
33+
Assert.That(tokens[40].Type, Is.EqualTo(UsfmTokenType.Note));
34+
Assert.That(tokens[40].Marker, Is.EqualTo("f"));
35+
Assert.That(tokens[40].Data, Is.EqualTo("+"));
36+
Assert.That(tokens[40].LineNumber, Is.EqualTo(10));
37+
Assert.That(tokens[40].ColumnNumber, Is.EqualTo(48));
3838
}
3939

4040
[Test]

0 commit comments

Comments
 (0)
Please sign in to comment.