Skip to content

Commit c3187b1

Browse files
committed
[API] LT-8457: Sort References Properly
* Add IStTxtPara.ReferenceForSorting(ISegment, int) to compute a computer-sortable reference string * Add TsStringUtils.IsNullOrPlaceholder(ITsString toTest, string placeholder) Part of https://jira.sil.org/browse/LT-8457 +semver: minor
1 parent 656e804 commit c3187b1

File tree

8 files changed

+312
-30
lines changed

8 files changed

+312
-30
lines changed

LCM.sln.DotSettings

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
<s:String x:Key="/Default/PatternsAndTemplates/Todo/TodoPatterns/=C6562928DAAA5C419C0A4E5109498163/Pattern/@EntryValue">(?&lt;=\W|^)(?&lt;TAG&gt;REVIEW)(\W|$)(.*)</s:String>
99
<s:String x:Key="/Default/PatternsAndTemplates/Todo/TodoPatterns/=C6562928DAAA5C419C0A4E5109498163/TodoIconStyle/@EntryValue">Normal</s:String>
1010
<s:Boolean x:Key="/Default/UserDictionary/Words/=analyses/@EntryIndexedValue">True</s:Boolean>
11+
<s:Boolean x:Key="/Default/UserDictionary/Words/=bldr/@EntryIndexedValue">True</s:Boolean>
1112
<s:Boolean x:Key="/Default/UserDictionary/Words/=Charis/@EntryIndexedValue">True</s:Boolean>
1213
<s:Boolean x:Key="/Default/UserDictionary/Words/=Duolos/@EntryIndexedValue">True</s:Boolean>
1314
<s:Boolean x:Key="/Default/UserDictionary/Words/=flid/@EntryIndexedValue">True</s:Boolean>

src/SIL.LCModel.Core/Text/TsStringUtils.cs

+9-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2004-2020 SIL International
1+
// Copyright (c) 2004-2022 SIL International
22
// This software is licensed under the LGPL, version 2.1 or later
33
// (http://www.gnu.org/licenses/lgpl-2.1.html)
44

@@ -1856,6 +1856,14 @@ public static bool IsNullOrEmpty(ITsString testMe)
18561856
{
18571857
return testMe == null || testMe.Length <= 0;
18581858
}
1859+
1860+
/// <returns>
1861+
/// True if the string is null, empty, or a placeholder (e.g. ***)
1862+
/// </returns>
1863+
public static bool IsNullOrPlaceholder(ITsString testMe, string placeholder)
1864+
{
1865+
return IsNullOrEmpty(testMe) || testMe.Text.Equals(placeholder);
1866+
}
18591867
}
18601868
#endregion
18611869

src/SIL.LCModel/DomainImpl/ScrTxtPara.cs

+75-7
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
1-
// Copyright (c) 2003-2018 SIL International
1+
// Copyright (c) 2003-2022 SIL International
22
// This software is licensed under the LGPL, version 2.1 or later
33
// (http://www.gnu.org/licenses/lgpl-2.1.html)
44

55
using System;
66
using System.Collections;
77
using System.Collections.Generic;
88
using System.Diagnostics;
9+
using System.Linq;
10+
using System.Text;
11+
using System.Text.RegularExpressions;
912
using Icu;
1013
using SIL.LCModel.Core.KernelInterfaces;
1114
using SIL.LCModel.Core.Scripture;
@@ -2117,25 +2120,90 @@ public override ITsString Reference(ISegment seg, int ich)
21172120
{
21182121
var stText = Owner as IStText;
21192122
if (stText == null)
2120-
return Cache.MakeUserTss("unknown"); // should never happen, I think?
2123+
return Cache.MakeUserTss(Strings.ksStars); // should never happen, I think?
21212124
if (stText.OwningFlid == ScrSectionTags.kflidContent)
21222125
{
21232126
// Body of Scripture. Figure a book/chapter/verse
21242127
IScrBook book = (IScrBook) stText.Owner.Owner;
21252128
string mainRef = ScriptureServices.FullScrRef(this, ich, book.BestUIAbbrev).Trim();
21262129
return Cache.MakeUserTss(mainRef + ScriptureServices.VerseSegLabel(this, SegmentsOS.IndexOf(seg)));
21272130
}
2128-
if (stText.OwningFlid == ScrSectionTags.kflidHeading)
2131+
//if (stText.OwningFlid == ScrSectionTags.kflidHeading)
2132+
//{
2133+
// // use the section title without qualifiers.
2134+
// return stText.Title.BestVernacularAnalysisAlternative;
2135+
//}
2136+
if (stText.OwningFlid == ScrBookTags.kflidTitle)
21292137
{
2130-
// use the section title without qualifiers.
21312138
return stText.Title.BestVernacularAnalysisAlternative;
21322139
}
2133-
if (stText.OwningFlid == ScrBookTags.kflidTitle)
2140+
return Cache.MakeUserTss(Strings.ksStars); // should never happen, I think?
2141+
}
2142+
2143+
/// <inheritdoc/>
2144+
public override ITsString ReferenceForSorting(ISegment seg, int ich)
2145+
{
2146+
if (!(Owner is IStText stText))
21342147
{
2135-
return stText.Title.BestVernacularAnalysisAlternative;
2148+
return Scripture.Name.NotFoundTss;
2149+
}
2150+
2151+
// Use a prefix to make scripture references sort together when mixed with other references
2152+
// (Scripture is sorted canonically, but a comparer for mixed references would sort alphabetically)
2153+
var bldr = new StringBuilder(RefForSortingPrefix);
2154+
switch (stText.OwningFlid)
2155+
{
2156+
case ScrSectionTags.kflidContent:
2157+
RefForSortAddBookInfo(bldr, (IScrBook) stText.Owner.Owner);
2158+
2159+
// Append the numerical portion of the reference, including any letter indicating part of a verse
2160+
var refSansBookBldr = new StringBuilder(ScriptureServices.FullScrRef(this, ich, string.Empty).Trim());
2161+
var numbersInRef = new Regex(@"\d+").Matches(refSansBookBldr.ToString());
2162+
foreach (var number in numbersInRef.Cast<Match>().Reverse())
2163+
{
2164+
ZeroPadForStringComparison(refSansBookBldr, number.Index, number.Length);
2165+
}
2166+
bldr.Append(" ").Append(refSansBookBldr).Append(ScriptureServices.VerseSegLabel(this, SegmentsOS.IndexOf(seg)));
2167+
break;
2168+
case ScrBookTags.kflidTitle:
2169+
RefForSortAddBookInfo(bldr, (IScrBook) stText.Owner);
2170+
// The book title should sort before anything else in the book
2171+
bldr.Append(" 0");
2172+
break;
2173+
default:
2174+
return Cache.MakeUserTss(Strings.ksStars);
2175+
}
2176+
2177+
// add ich
2178+
bldr.Append(" ").Append(ZeroPadForStringComparison(ich));
2179+
return Cache.MakeUserTss(bldr.ToString());
2180+
}
2181+
2182+
protected internal const string RefForSortingPrefix = "0 Scr ";
2183+
2184+
protected static void RefForSortAddBookInfo(StringBuilder bldr, IScrBook book)
2185+
{
2186+
// Append the book number to sort in canonical order.
2187+
bldr.Append(book.CanonicalNum);
2188+
// Append the book name. It makes no difference for sorting, but could make debugging easier.
2189+
bldr.Append("_").Append(book.BestUIAbbrev);
2190+
}
2191+
2192+
protected internal static void ZeroPadForStringComparison(StringBuilder bldr, int index, int cExistingDigits)
2193+
{
2194+
for (var remaining = 5 - cExistingDigits; remaining > 0; remaining--)
2195+
{
2196+
bldr.Insert(index, "0");
21362197
}
2137-
return Cache.MakeUserTss("unknown"); // should never happen, I think?
21382198
}
2199+
2200+
protected internal static string ZeroPadForStringComparison(string intInRef)
2201+
{
2202+
var bldr = new StringBuilder(intInRef);
2203+
ZeroPadForStringComparison(bldr, 0, intInRef.Length);
2204+
return bldr.ToString();
2205+
}
2206+
21392207
/// ------------------------------------------------------------------------------------
21402208
/// <summary>
21412209
/// Gets the footnote sequence.

src/SIL.LCModel/DomainImpl/StTxtPara.cs

+53-5
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ public IStTxtPara PreviousParagraph
5050
/// <summary>
5151
/// Return a Reference (e.g., Scripture reference, or text abbreviation/para #/sentence#) for the specified character
5252
/// position (in the whole paragraph), which is assumed to belong to the specified segment.
53-
/// (For now, ich is not actually used, but it may become important if we decide not to split segements for
53+
/// (For now, ich is not actually used, but it may become important if we decide not to split segments for
5454
/// verse numbers.)
5555
/// Overridden in ScrTxtPara to handle special cases for Scripture refs.
5656
/// </summary>
@@ -100,10 +100,6 @@ public virtual ITsString Reference(ISegment seg, int ich)
100100
if (bldr.Length > 0)
101101
bldr.Replace(bldr.Length, bldr.Length, " ", props);
102102

103-
// if Scripture.IsResponsibleFor(stText) we should try to get the verse number of the annotation.
104-
//if (stText.OwningFlid == (int)Text.TextTags.kflidContents)
105-
//{
106-
107103
// Insert paragraph number.
108104
int ipara = stText.ParagraphsOS.IndexOf(this) + 1;
109105
bldr.Replace(bldr.Length, bldr.Length, ipara.ToString(), props);
@@ -117,6 +113,58 @@ public virtual ITsString Reference(ISegment seg, int ich)
117113
return bldr.GetString();
118114
}
119115

116+
/// <inheritdoc/>
117+
public virtual ITsString ReferenceForSorting(ISegment seg, int ich)
118+
{
119+
if (!(Owner is IStText stText))
120+
{
121+
return TsStringUtils.EmptyString(Cache.DefaultUserWs);
122+
}
123+
124+
ITsString tssName = null;
125+
var fUsingAbbr = false;
126+
if (stText.Owner is IText text)
127+
{
128+
tssName = text.Abbreviation.BestVernacularAnalysisAlternative;
129+
if (!TsStringUtils.IsNullOrPlaceholder(tssName, stText.Title.NotFoundTss.Text))
130+
{
131+
fUsingAbbr = true;
132+
}
133+
}
134+
if (!fUsingAbbr)
135+
{
136+
tssName = stText.Title.BestVernacularAnalysisAlternative;
137+
}
138+
139+
// Make a TsTextProps specifying only the writing system.
140+
var propBldr = TsStringUtils.MakePropsBldr();
141+
var wsActual = tssName.get_Properties(0).GetIntPropValues((int)FwTextPropType.ktptWs, out _);
142+
propBldr.SetIntPropValues((int)FwTextPropType.ktptWs, (int)FwTextPropVar.ktpvDefault, wsActual);
143+
var props = propBldr.GetTextProps();
144+
145+
var bldr = TsStringUtils.IsNullOrPlaceholder(tssName, stText.Title.NotFoundTss.Text) ? new TsStrBldr() : tssName.GetBldr();
146+
147+
// Start with a space even if we don't have a title, so untitled texts sort to the top.
148+
bldr.Append(" ", props);
149+
150+
// Insert paragraph and segment numbers.
151+
var iPara = stText.ParagraphsOS.IndexOf(this) + 1;
152+
var iSeg = SegmentsOS.IndexOf(seg) + 1;
153+
bldr.Append(ZeroPadForStringComparison(iPara), props).Append(".", props).Append(ZeroPadForStringComparison(iSeg), props);
154+
155+
// Insert the offset so that two references in the same segment are sorted properly (LT-8457)
156+
bldr.Append(" ", props).Append(ZeroPadForStringComparison(ich), props);
157+
158+
return bldr.GetString();
159+
}
160+
161+
/// <summary>Pads the given int with zeroes to the max length of an int</summary>
162+
protected internal static string ZeroPadForStringComparison(int i)
163+
{
164+
// because int.MaxValue.ToString().Length is 10
165+
return i.ToString("D10");
166+
}
167+
120168
/// ------------------------------------------------------------------------------------
121169
/// <summary>
122170
/// Finds the ORC of the specified picture and deletes it from the paragraph and any

src/SIL.LCModel/InterfaceAdditions.cs

+16-6
Original file line numberDiff line numberDiff line change
@@ -3136,15 +3136,25 @@ IStTxtPara PreviousParagraph
31363136
List<IConstChartWordGroup> GetChartCellRefs();
31373137

31383138
/// ------------------------------------------------------------------------------------
3139-
/// <summary>
3140-
/// Return a Reference (e.g., Scripture reference, or text abbreviation/para #/sentence#) for the specified character
3141-
/// position (in the whole paragraph), which is assumed to belong to the specified segment.
3142-
/// (For now, ich is not actually used, but it may become important if we decide not to split segements for
3143-
/// verse numbers.)
3144-
/// </summary>
3139+
/// <summary>
3140+
/// Return a Reference (e.g., Scripture reference, or text abbreviation+para #+sentence #) for the specified character
3141+
/// position (in the whole paragraph), which is assumed to belong to the specified segment.
3142+
/// (For now, ich is not actually used, but it may become important if we decide not to split segments for
3143+
/// verse numbers.)
3144+
/// </summary>
31453145
/// ------------------------------------------------------------------------------------
31463146
ITsString Reference(ISegment seg, int ich);
31473147

3148+
/// ------------------------------------------------------------------------------------
3149+
/// <summary>
3150+
/// Return a Reference (e.g., Scripture reference, or text abbreviation+para #+sentence #) for the specified character
3151+
/// position (in the whole paragraph), which is assumed to belong to the specified segment.
3152+
/// To allow greater accuracy and precision in sorting, numbers are zero-padded to the length of <see cref="int.MaxValue"/> and ich
3153+
/// is included at the end.
3154+
/// </summary>
3155+
/// ------------------------------------------------------------------------------------
3156+
ITsString ReferenceForSorting(ISegment seg, int ich);
3157+
31483158
/// ------------------------------------------------------------------------------------
31493159
/// <summary>
31503160
/// Splits the paragraph at the specified character index.

tests/SIL.LCModel.Core.Tests/Text/TsStringUtilsTests.cs

+23-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2004-2021 SIL International
1+
// Copyright (c) 2004-2022 SIL International
22
// This software is licensed under the LGPL, version 2.1 or later
33
// (http://www.gnu.org/licenses/lgpl-2.1.html)
44

@@ -2110,6 +2110,28 @@ public void RemoveIllegalXmlChars()
21102110
Assert.That(TsStringUtils.RemoveIllegalXmlChars(outOfOrderSurrogates).Text, Is.EqualTo("\xd800\xdc00z"));
21112111
}
21122112

2113+
[Test]
2114+
public void IsNull_OrMissing_Null()
2115+
{
2116+
Assert.That(TsStringUtils.IsNullOrEmpty(null), Is.True, "null is null or empty");
2117+
Assert.That(TsStringUtils.IsNullOrPlaceholder(null, "***"), Is.True, "null is null or placeholder");
2118+
}
2119+
2120+
[TestCase("", ExpectedResult = true)]
2121+
[TestCase("***", ExpectedResult = false)]
2122+
[TestCase("t", ExpectedResult = false)]
2123+
public bool IsNullOrEmpty(string actual)
2124+
{
2125+
return TsStringUtils.IsNullOrEmpty(TsStringUtils.MakeString(actual, m_wsf.UserWs));
2126+
}
2127+
2128+
[TestCase("", ExpectedResult = true)]
2129+
[TestCase("***", ExpectedResult = true)]
2130+
[TestCase("t", ExpectedResult = false)]
2131+
public bool IsNullOrPlaceholder(string actual)
2132+
{
2133+
return TsStringUtils.IsNullOrPlaceholder(TsStringUtils.MakeString(actual, m_wsf.UserWs), "***");
2134+
}
21132135
#endregion
21142136
}
21152137
}

tests/SIL.LCModel.Tests/DomainImpl/ScrTxtParaTests.cs

+57-10
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright (c) 2003-2018 SIL International
1+
// Copyright (c) 2003-2022 SIL International
22
// This software is licensed under the LGPL, version 2.1 or later
33
// (http://www.gnu.org/licenses/lgpl-2.1.html)
44

@@ -549,23 +549,70 @@ public void Reference()
549549
AddDataToMatthew();
550550
var para1 = (IStTxtPara) m_book.SectionsOS[1].ContentOA.ParagraphsOS[0]; // Actually ScrTxtPara
551551
var seg = para1.SegmentsOS[1]; // first content ref, after the chapter and verse number stuff.
552-
Assert.That(para1.Reference(seg, seg.BeginOffset + 1).Text, Is.EqualTo("MAT 1:1"));
552+
var v1Seg1Idx = seg.BeginOffset + 1;
553+
Assert.That(para1.Reference(seg, v1Seg1Idx).Text, Is.EqualTo("MAT 1:1"));
554+
Assert.That(para1.ReferenceForSorting(seg, v1Seg1Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00001 {v1Seg1Idx:D10}"));
553555
AddRunToMockedPara(para1, "Verse two second sentence.", null);
554-
var v2seg1 = para1.SegmentsOS[3]; // first segment of two-sentence verse
555-
Assert.That(para1.Reference(v2seg1, v2seg1.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2a"));
556-
var v2seg2 = para1.SegmentsOS[4]; // first segment of two-sentence verse
557-
Assert.That(para1.Reference(v2seg2, v2seg2.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2b"));
556+
var v2Seg1 = para1.SegmentsOS[3]; // first segment of two-sentence verse
557+
var v2Seg1Idx = v2Seg1.BeginOffset + 1;
558+
Assert.That(para1.Reference(v2Seg1, v2Seg1Idx).Text, Is.EqualTo("MAT 1:2a"));
559+
Assert.That(para1.ReferenceForSorting(v2Seg1, v2Seg1Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002a {v2Seg1Idx:D10}"));
560+
var v2Seg2 = para1.SegmentsOS[4]; // first segment of two-sentence verse
561+
var v2Seg2Idx = v2Seg2.BeginOffset + 1;
562+
Assert.That(para1.Reference(v2Seg2, v2Seg2Idx).Text, Is.EqualTo("MAT 1:2b"));
563+
Assert.That(para1.ReferenceForSorting(v2Seg2, v2Seg2Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002b {v2Seg2Idx:D10}"));
558564
IStTxtPara para2 = AddParaToMockedSectionContent((IScrSection)para1.Owner.Owner, ScrStyleNames.NormalParagraph);
559565
AddRunToMockedPara(para2, "Verse 2 seg 3", null);
560-
var v2seg3 = para2.SegmentsOS[0]; // third segment of three-sentence verse split over two paragraphs.
561-
Assert.That(para2.Reference(v2seg3, v2seg3.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2c"));
566+
var v2Seg3 = para2.SegmentsOS[0]; // third segment of three-sentence verse split over two paragraphs.
567+
var v2Seg3Idx = v2Seg3.BeginOffset + 1;
568+
Assert.That(para2.Reference(v2Seg3, v2Seg3Idx).Text, Is.EqualTo("MAT 1:2c"));
569+
Assert.That(para2.ReferenceForSorting(v2Seg3, v2Seg3Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002c {v2Seg3Idx:D10}"));
562570
var newSection = AddSectionToMockedBook(m_book);
563571
IStTxtPara para3 = AddParaToMockedSectionContent(newSection, ScrStyleNames.NormalParagraph);
564572
AddRunToMockedPara(para3, "Verse 2 seg 4", null);
565-
var v2seg4 = para3.SegmentsOS[0]; // fourth segment of four-sentence verse split over two sections(!).
573+
var v2Seg4 = para3.SegmentsOS[0]; // fourth segment of four-sentence verse split over two sections(!).
574+
var v2Seg4Idx = v2Seg4.BeginOffset + 1;
566575
// JohnT: arguably this should give MAT 1:2d. The current implementation does not detect the
567576
// segments in the previous section.
568-
Assert.That(para3.Reference(v2seg4, v2seg4.BeginOffset + 1).Text, Is.EqualTo("MAT 1:2"));
577+
Assert.That(para3.Reference(v2Seg4, v2Seg4Idx).Text, Is.EqualTo("MAT 1:2"));
578+
Assert.That(para3.ReferenceForSorting(v2Seg4, v2Seg4Idx).Text, Is.EqualTo($"0 Scr 40_MAT 00001:00002 {v2Seg4Idx:D10}"));
579+
580+
var scrBook1Samuel = CreateBookData(9, "1 Samuel");
581+
//var scrBookSusanna = CreateBookData(75/*?*/, "Susanna");
582+
// TODO (Hasso) 2022.03: Enoch or some other >100 book
583+
}
584+
585+
[Test]
586+
public void Reference_IntroPara()
587+
{
588+
// add section and empty paragraph
589+
var section = AddSectionToMockedBook(m_book, true);
590+
var para = AddParaToMockedSectionContent(section, ScrStyleNames.IntroParagraph);
591+
var seg = para.SegmentsOS[0];
592+
const int ich = 3;
593+
Assert.That(para.Reference(seg, ich).Text, Is.EqualTo("Matthew (TODO)"));
594+
Assert.That(para.ReferenceForSorting(seg, ich).Text, Is.EqualTo("0 Scr 40_MAT 00001:00000 0000000003"));
595+
}
596+
597+
[Test]
598+
public void Reference_BookTitle()
599+
{
600+
// add title
601+
var title = AddTitleToMockedBook(m_book, "This is the title");
602+
var para = (IStTxtPara)title.ParagraphsOS[0];
603+
var seg = para.SegmentsOS[0];
604+
const int ich = 5;
605+
Assert.That(para.Reference(seg, ich).Text, Is.EqualTo("Matthew (Title)"));
606+
Assert.That(para.ReferenceForSorting(seg, ich).Text, Is.EqualTo("0 Scr 40_MAT 0 0000000005"));
607+
}
608+
609+
[TestCase("", ExpectedResult = "00000")]
610+
[TestCase("9", ExpectedResult = "00009")]
611+
[TestCase("176", ExpectedResult = "00176")]
612+
[TestCase("31103", ExpectedResult = "31103")]
613+
public string ZeroPadForStringComparison(string intInRef)
614+
{
615+
return ScrTxtPara.ZeroPadForStringComparison(intInRef);
569616
}
570617
#endregion
571618

0 commit comments

Comments
 (0)