1
1
using System . Collections . Generic ;
2
2
using System . Linq ;
3
+ using SIL . Extensions ;
3
4
using SIL . Scripture ;
4
5
5
6
namespace SIL . Machine . Corpora
@@ -9,7 +10,8 @@ public enum ScriptureTextType
9
10
None ,
10
11
NonVerse ,
11
12
Verse ,
12
- Note
13
+ Embedded ,
14
+ NoteText
13
15
}
14
16
15
17
public abstract class ScriptureRefUsfmParserHandlerBase : UsfmParserHandlerBase
@@ -19,6 +21,9 @@ public abstract class ScriptureRefUsfmParserHandlerBase : UsfmParserHandlerBase
19
21
private readonly Stack < ScriptureTextType > _curTextType ;
20
22
private bool _duplicateVerse = false ;
21
23
24
+ private bool _inEmbedded ;
25
+ public bool InNoteText { get ; private set ; }
26
+
22
27
protected ScriptureRefUsfmParserHandlerBase ( )
23
28
{
24
29
_curElements = new Stack < ScriptureElement > ( ) ;
@@ -59,7 +64,7 @@ string pubNumber
59
64
// ignore duplicate verses
60
65
_duplicateVerse = true ;
61
66
}
62
- else if ( VerseRef . AreOverlappingVersesRanges ( number , _curVerseRef . Verse ) )
67
+ else if ( VerseRef . AreOverlappingVersesRanges ( verse1 : number , verse2 : _curVerseRef . Verse ) )
63
68
{
64
69
// merge overlapping verse ranges in to one range
65
70
VerseRef verseRef = _curVerseRef . Clone ( ) ;
@@ -153,20 +158,36 @@ public override void EndSidebar(UsfmParserState state, string marker, bool close
153
158
154
159
public override void StartNote ( UsfmParserState state , string marker , string caller , string category )
155
160
{
156
- if ( CurrentTextType != ScriptureTextType . None && ! _duplicateVerse )
161
+ _inEmbedded = true ;
162
+ StartEmbedded ( state , marker , caller , category ) ;
163
+ }
164
+
165
+ public override void EndNote ( UsfmParserState state , string marker , bool closed )
166
+ {
167
+ EndNoteText ( state ) ;
168
+ EndEmbedded ( state , marker , null , closed ) ;
169
+ _inEmbedded = false ;
170
+ }
171
+
172
+ public virtual void StartEmbedded ( UsfmParserState state , string marker , string caller , string category )
173
+ {
174
+ if ( _curVerseRef . IsDefault )
175
+ UpdateVerseRef ( state . VerseRef , marker ) ;
176
+
177
+ if ( ! _duplicateVerse )
157
178
{
158
179
// if we hit a note in a verse paragraph and we aren't in a verse, then start a non-verse segment
159
180
CheckConvertVerseParaToNonVerse ( state ) ;
160
181
NextElement ( marker ) ;
161
- StartNoteText ( state ) ;
162
182
}
163
183
}
164
184
165
- public override void EndNote ( UsfmParserState state , string marker , bool closed )
166
- {
167
- if ( CurrentTextType == ScriptureTextType . Note && ! _duplicateVerse )
168
- EndNoteText ( state ) ;
169
- }
185
+ public virtual void EndEmbedded (
186
+ UsfmParserState state ,
187
+ string marker ,
188
+ IReadOnlyList < UsfmAttribute > attributes ,
189
+ bool closed
190
+ ) { }
170
191
171
192
public override void Text ( UsfmParserState state , string text )
172
193
{
@@ -187,9 +208,37 @@ public override void StartChar(
187
208
IReadOnlyList < UsfmAttribute > attributes
188
209
)
189
210
{
211
+ if ( IsEmbeddedPart ( markerWithoutPlus ) )
212
+ EndNoteText ( state ) ;
213
+
190
214
// if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse
191
215
// segment
192
216
CheckConvertVerseParaToNonVerse ( state ) ;
217
+
218
+ if ( IsEmbeddedCharacter ( markerWithoutPlus ) )
219
+ {
220
+ _inEmbedded = true ;
221
+ StartEmbedded ( state , markerWithoutPlus , null , null ) ;
222
+ }
223
+
224
+ if ( IsNoteText ( markerWithoutPlus ) )
225
+ {
226
+ StartNoteText ( state ) ;
227
+ }
228
+ }
229
+
230
+ public override void EndChar (
231
+ UsfmParserState state ,
232
+ string marker ,
233
+ IReadOnlyList < UsfmAttribute > attributes ,
234
+ bool closed
235
+ )
236
+ {
237
+ if ( IsEmbeddedCharacter ( marker ) )
238
+ {
239
+ EndEmbedded ( state , marker , attributes , closed ) ;
240
+ _inEmbedded = false ;
241
+ }
193
242
}
194
243
195
244
protected virtual void StartVerseText ( UsfmParserState state , IReadOnlyList < ScriptureRef > scriptureRefs ) { }
@@ -200,8 +249,25 @@ protected virtual void StartNonVerseText(UsfmParserState state, ScriptureRef scr
200
249
201
250
protected virtual void EndNonVerseText ( UsfmParserState state , ScriptureRef scriptureRef ) { }
202
251
252
+ public virtual void StartNoteText ( UsfmParserState state )
253
+ {
254
+ InNoteText = true ;
255
+ _curTextType . Push ( ScriptureTextType . NoteText ) ;
256
+ StartNoteText ( state , CreateNonVerseRef ( ) ) ;
257
+ }
258
+
203
259
protected virtual void StartNoteText ( UsfmParserState state , ScriptureRef scriptureRef ) { }
204
260
261
+ public virtual void EndNoteText ( UsfmParserState state )
262
+ {
263
+ if ( _curTextType . Count > 0 && _curTextType . Peek ( ) == ScriptureTextType . NoteText )
264
+ {
265
+ EndNoteText ( state , CreateNonVerseRef ( ) ) ;
266
+ _curTextType . Pop ( ) ;
267
+ InNoteText = false ;
268
+ }
269
+ }
270
+
205
271
protected virtual void EndNoteText ( UsfmParserState state , ScriptureRef scriptureRef ) { }
206
272
207
273
private void StartVerseText ( UsfmParserState state )
@@ -227,22 +293,11 @@ private void StartNonVerseText(UsfmParserState state)
227
293
228
294
private void EndNonVerseText ( UsfmParserState state )
229
295
{
296
+ EndEmbeddedElements ( ) ;
230
297
EndNonVerseText ( state , CreateNonVerseRef ( ) ) ;
231
298
_curTextType . Pop ( ) ;
232
299
}
233
300
234
- private void StartNoteText ( UsfmParserState state )
235
- {
236
- _curTextType . Push ( ScriptureTextType . Note ) ;
237
- StartNoteText ( state , CreateNonVerseRef ( ) ) ;
238
- }
239
-
240
- private void EndNoteText ( UsfmParserState state )
241
- {
242
- EndNoteText ( state , CreateNonVerseRef ( ) ) ;
243
- _curTextType . Pop ( ) ;
244
- }
245
-
246
301
private void UpdateVerseRef ( VerseRef verseRef , string marker )
247
302
{
248
303
if ( ! VerseRef . AreOverlappingVersesRanges ( verseRef , _curVerseRef ) )
@@ -270,6 +325,12 @@ private void EndParentElement()
270
325
_curElements . Pop ( ) ;
271
326
}
272
327
328
+ private void EndEmbeddedElements ( )
329
+ {
330
+ if ( _curElements . Count > 0 && IsEmbeddedCharacter ( _curElements . Peek ( ) . Name ) )
331
+ _curElements . Pop ( ) ;
332
+ }
333
+
273
334
private IReadOnlyList < ScriptureRef > CreateVerseRefs ( )
274
335
{
275
336
return _curVerseRef . HasMultiple
@@ -300,5 +361,25 @@ private void CheckConvertVerseParaToNonVerse(UsfmParserState state)
300
361
StartNonVerseText ( state ) ;
301
362
}
302
363
}
364
+
365
+ public bool InEmbedded ( string marker )
366
+ {
367
+ return _inEmbedded || IsEmbeddedCharacter ( marker ) ;
368
+ }
369
+
370
+ private static bool IsNoteText ( string marker )
371
+ {
372
+ return marker == "ft" ;
373
+ }
374
+
375
+ public static bool IsEmbeddedPart ( string marker )
376
+ {
377
+ return ! ( marker is null ) && marker . Length > 0 && marker [ 0 ] . IsOneOf ( 'f' , 'x' , 'z' ) ;
378
+ }
379
+
380
+ private static bool IsEmbeddedCharacter ( string marker )
381
+ {
382
+ return marker . IsOneOf ( "f" , "fe" , "fig" , "fm" , "x" ) ;
383
+ }
303
384
}
304
385
}
0 commit comments