Skip to content

Commit 51868ce

Browse files
committed
[DUCK] 修复“去年上半年”的“上半年”结合错误
1 parent dda8328 commit 51868ce

File tree

10 files changed

+40
-29
lines changed

10 files changed

+40
-29
lines changed

duckling-fork-chinese/core/src/main/scala/com/xiaomi/duckling/dimension/time/Rules.scala

+13-13
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ trait Rules extends DimRules {
276276
name = "recent/last/next <duration>",
277277
pattern = List(RecentPattern.regex, or(isNotLatentDuration, isFuzzyNotLatentDuration).predicate),
278278
prod = tokens {
279-
case Token(_, GroupMatch(_ :: g1 :: g2 :: _)) :: Token(Duration, DurationData(v, g, _, fuzzy, _)) :: _ =>
279+
case Token(_, GroupMatch(_ :: g1 :: g2 :: _)) :: Token(Duration, DurationData(v, g, _, fuzzy, half, _)) :: _ =>
280280
val s = if (StringUtils.isBlank(g1)) g2 else g1
281281
// 月必须是x个月
282282
s match {
@@ -291,7 +291,7 @@ trait Rules extends DimRules {
291291
cycleN(notImmediate = false, Day, 30)
292292
}
293293
// = 1 已经在 this <cycle> 中定义过了
294-
else if (s == "" && (g == Day || v == 1)) None
294+
else if (s == "" && (g == Day || v == 1 || half)) None
295295
else {
296296
val td1 = cycleN(notImmediate = false, g, v)
297297
g match {
@@ -311,7 +311,7 @@ trait Rules extends DimRules {
311311
cycleN(notImmediate = false, Day, 7 * v).map(t => tt(t.at(Hint.Recent)))
312312
} else tt(cycleNth(g, 0))
313313
case "" | "" | "之前" | "往前" | "向前" | "过去" | "过去" =>
314-
if (s == "" && (g == Day || g == Year)) None
314+
if (s == "" && (g == Day || g == Year || half)) None
315315
else if (s == "过去" && fuzzy) None
316316
else if (v > 1) tt(cycleN(notImmediate = true, g, -v).at(Hint.Recent))
317317
else tt(cycleNth(g, -1).at(Hint.Recent))
@@ -327,7 +327,7 @@ trait Rules extends DimRules {
327327
name = "n <cycle> next/last 1: <duration> 之后",
328328
pattern = List(isNotLatentDuration.predicate, "((之|以)?(后|前))|过后".regex),
329329
prod = optTokens {
330-
case (options: Options, Token(Duration, DurationData(v, grain, false, _, _)) :: Token(_, GroupMatch(s :: _)) :: _) =>
330+
case (options: Options, Token(Duration, DurationData(v, grain, false, _, _, _)) :: Token(_, GroupMatch(s :: _)) :: _) =>
331331
val offset = if (s.endsWith("")) v else -v
332332
val roundGrain = if (options.timeOptions.inheritGrainOfDuration) grain else NoGrain
333333
tt(finalRule(cycleNth(grain, offset, roundGrain)))
@@ -341,7 +341,7 @@ trait Rules extends DimRules {
341341
name = "n <cycle> next/last: 过 <duration>",
342342
pattern = List("".regex, isNotLatentDuration.predicate),
343343
prod = optTokens {
344-
case (options: Options, _ :: Token(Duration, DurationData(v, grain, _, _, _)) :: _ ) =>
344+
case (options: Options, _ :: Token(Duration, DurationData(v, grain, _, _, _, _)) :: _ ) =>
345345
val roundGrain = if (options.timeOptions.inheritGrainOfDuration) grain else NoGrain
346346
tt(finalRule(cycleNth(grain, v, roundGrain)))
347347
}
@@ -354,7 +354,7 @@ trait Rules extends DimRules {
354354
name = "n <cycle> next/last 3:过 <duration> 之后",
355355
pattern = List("".regex, isNotLatentDuration.predicate, "之?(后|前)".regex),
356356
prod = optTokens {
357-
case (options: Options, _ :: Token(Duration, DurationData(v, grain, _, _, _)) :: _ ) =>
357+
case (options: Options, _ :: Token(Duration, DurationData(v, grain, _, _, _, _)) :: _ ) =>
358358
val roundGrain = if (options.timeOptions.inheritGrainOfDuration) grain else NoGrain
359359
tt(finalRule(cycleNth(grain, v, roundGrain)))
360360
}
@@ -367,7 +367,7 @@ trait Rules extends DimRules {
367367
name = "<duration> before/after <time>",
368368
pattern = List(isNotLatentDuration.predicate, "之?(前|后)的?".regex, isNotLatent.predicate),
369369
prod = tokens {
370-
case Token(Duration, DurationData(v, g, _, _, _)) :: Token(_, GroupMatch(_ :: s :: _)) ::
370+
case Token(Duration, DurationData(v, g, _, _, _, _)) :: Token(_, GroupMatch(_ :: s :: _)) ::
371371
Token(Time, td: TimeData) :: _ =>
372372
if (g > td.timeGrain) {
373373
val sign = if (s == "") -1 else 1
@@ -407,7 +407,7 @@ trait Rules extends DimRules {
407407
name = "<time> before/after 2",
408408
pattern = List(and(isADayOfMonth, isNotLatent).predicate, "".regex, isNotLatentDuration.predicate, "[之以]?([前后])".regex),
409409
prod = tokens {
410-
case Token(Time, td: TimeData) :: _ :: Token(Duration, DurationData(v, g, _, _, _)) :: Token(_, GroupMatch(_ :: d :: _)) :: _ =>
410+
case Token(Time, td: TimeData) :: _ :: Token(Duration, DurationData(v, g, _, _, _, _)) :: Token(_, GroupMatch(_ :: d :: _)) :: _ =>
411411
val dv = if (d == "") -v else v
412412
val dg = if (g <= Day) g else Day
413413

@@ -420,7 +420,7 @@ trait Rules extends DimRules {
420420
name = "<time> before/after 3",
421421
pattern = List(and(isADayOfMonth, isNotLatent).predicate, isNotLatentDuration.predicate, "[之以]?([前后])".regex),
422422
prod = tokens {
423-
case Token(Time, td: TimeData) :: Token(Duration, DurationData(v, g, _, _, _)) :: Token(_, GroupMatch(_ :: d :: _)) :: _ =>
423+
case Token(Time, td: TimeData) :: Token(Duration, DurationData(v, g, _, _, _, _)) :: Token(_, GroupMatch(_ :: d :: _)) :: _ =>
424424
val dv = if (d == "") -v else v
425425
val dg = if (g <= Day) g else Day
426426

@@ -433,7 +433,7 @@ trait Rules extends DimRules {
433433
name = "<time> after duration",
434434
pattern = List(isADayOfMonth.predicate, "(再?过|往后(数|推)|后面?的第?)".regex, isNotLatentDuration.predicate),
435435
prod = tokens {
436-
case Token(Time, td: TimeData) :: _ :: Token(Duration, DurationData(v, g, _, _, _)) :: _ =>
436+
case Token(Time, td: TimeData) :: _ :: Token(Duration, DurationData(v, g, _, _, _, _)) :: _ =>
437437
val dv = v
438438
val dg = if (g <= Day) g else Day
439439

@@ -446,7 +446,7 @@ trait Rules extends DimRules {
446446
name = "<time> before duration",
447447
pattern = List(isADayOfMonth.predicate, "(往前(数|推)|前面?的第?)".regex, isNotLatentDuration.predicate),
448448
prod = tokens {
449-
case Token(Time, td: TimeData) :: _ :: Token(Duration, DurationData(v, g, _, _, _)) :: _ =>
449+
case Token(Time, td: TimeData) :: _ :: Token(Duration, DurationData(v, g, _, _, _, _)) :: _ =>
450450
val dv = -v
451451
val dg = if (g <= Day) g else Day
452452

@@ -513,7 +513,7 @@ trait Rules extends DimRules {
513513
name = "in a <duration>",
514514
pattern = List(isNotLatentDuration.predicate, "[之以]?内".regex),
515515
prod = tokens {
516-
case Token(Duration, DurationData(value, grain, _, _, _)) :: _ =>
516+
case Token(Duration, DurationData(value, grain, _, _, _, _)) :: _ =>
517517
val (g, v) = grain match {
518518
case Month => (Day, value * 30)
519519
case Week => (Day, value * 7)
@@ -598,7 +598,7 @@ trait Rules extends DimRules {
598598
List(isHint(RecentNominal).predicate, "(往|向|之)?(前|后)的?".regex, isNotLatentDuration.predicate),
599599
prod = optTokens {
600600
case (options, Token(_, td1: TimeData) :: Token(_, GroupMatch(_ :: s :: _)) ::
601-
Token(_, DurationData(value, grain, latent, _, _)) :: _)
601+
Token(_, DurationData(value, grain, latent, _, _, _)) :: _)
602602
if options.timeOptions.sequence && td1.timeGrain == grain =>
603603
val sign = if (s == "") -1 else 1
604604
val td2 = cycleN(notImmediate = false, grain, sign * value)

duckling-fork-chinese/core/src/main/scala/com/xiaomi/duckling/dimension/time/date/Rules.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ trait Rules extends DimRules {
257257
name = "date - <ordinal> <quarter>",
258258
pattern = List("".regex, isAQuarterOfYear.predicate),
259259
prod = {
260-
case (options, _ :: Token(Duration, DurationData(value, _, _, _, _)) :: _) =>
260+
case (options, _ :: Token(Duration, DurationData(value, _, _, _, _, _)) :: _) =>
261261
for (td <- interval(Closed, month(3 * value - 2), month(3 * value), options.timeOptions.beforeEndOfInterval)) yield {
262262
Token(Date, td.copy(reset = (Grain.resetTo(Quarter), 0)))
263263
}

duckling-fork-chinese/core/src/main/scala/com/xiaomi/duckling/dimension/time/duration/Duration.scala

+2-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ case object Duration extends Dimension with Rules {
4141
* @param latent 是否是潜在时间
4242
* @param fuzzy 是否是模糊时间(几天)
4343
*/
44-
case class DurationData(value: Int, grain: Grain, latent: Boolean = false, fuzzy: Boolean = false, override val schema: Option[String] = None)
44+
case class DurationData(value: Int, grain: Grain, latent: Boolean = false, fuzzy: Boolean = false, half: Boolean = false,
45+
override val schema: Option[String] = None)
4546
extends ResolvedValue
4647
with Resolvable {
4748

duckling-fork-chinese/core/src/main/scala/com/xiaomi/duckling/dimension/time/duration/Rules.scala

+3-3
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ trait Rules extends DimRules {
149149
pattern = List("半个?".regex, isDimension(TimeGrain).predicate),
150150
prod = tokens {
151151
case _ :: Token(TimeGrain, GrainData(grain, _, _)) :: _ =>
152-
for (d <- timesOneAndAHalf(grain, 0)) yield Token(Duration, d)
152+
for (d <- timesOneAndAHalf(grain, 0)) yield Token(Duration, d.copy(half = true))
153153
}
154154
)
155155

@@ -187,7 +187,7 @@ trait Rules extends DimRules {
187187
pattern =
188188
List(isDimension(Duration).predicate, isDimension(Duration).predicate),
189189
prod = tokens {
190-
case Token(_, b@DurationData(v, g, _, _, _)) :: Token(_, a@DurationData(_, dg, _, _, _)) :: _ if g > dg =>
190+
case Token(_, b@DurationData(v, g, _, _, _, _)) :: Token(_, a@DurationData(_, dg, _, _, _, _)) :: _ if g > dg =>
191191
// ❌ 两年三月
192192
// ✅ 一分三十秒
193193
if (!b.latent && g != Month && (!a.latent || dg != Hour) || (b.latent && g == Minute)) Token(Duration, b + a)
@@ -207,7 +207,7 @@ trait Rules extends DimRules {
207207
isNotLatentDuration.predicate
208208
),
209209
prod = tokens {
210-
case t1 :: Token(TimeGrain, GrainData(g, _, _)) :: _ :: Token(_, dd@DurationData(_, dg, _, _, _)) :: _
210+
case t1 :: Token(TimeGrain, GrainData(g, _, _)) :: _ :: Token(_, dd@DurationData(_, dg, _, _, _, _)) :: _
211211
if g > dg && g != Month =>
212212
for (i <- getIntValue(t1)) yield Token(Duration, DurationData(i.toInt, g, schema = durationSchema(i.toInt.toString, g)) + dd)
213213
}

duckling-fork-chinese/core/src/main/scala/com/xiaomi/duckling/dimension/time/duration/package.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,11 @@ package object duration {
6464
}
6565

6666
def isNotLatentDuration: Predicate = {
67-
case Token(Duration, DurationData(_, _, latent, _, _)) => !latent
67+
case Token(Duration, DurationData(_, _, latent, _, _, _)) => !latent
6868
}
6969

7070
def isFuzzyNotLatentDuration: Predicate = {
71-
case Token(Duration, DurationData(_, _, _, fuzzy, _)) => fuzzy
71+
case Token(Duration, DurationData(_, _, _, fuzzy, _, _)) => fuzzy
7272
}
7373

7474
def isNotLatentGrain: Predicate = {

duckling-fork-chinese/core/src/main/scala/com/xiaomi/duckling/dimension/time/predicates.scala

+2-2
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ object predicates {
249249
}
250250

251251
val isAQuarterOfYear: Predicate = {
252-
case Token(Duration, DurationData(value, grain, _, _, _)) =>
252+
case Token(Duration, DurationData(value, grain, _, _, _, _)) =>
253253
grain == Quarter && value >= 1 && value <= 4
254254
}
255255

@@ -302,7 +302,7 @@ object predicates {
302302
}
303303

304304
val isDurationAmountGt1: Predicate = {
305-
case Token(Duration, DurationData(value, _, _, _, _)) => value > 1
305+
case Token(Duration, DurationData(value, _, _, _, _, _)) => value > 1
306306
}
307307

308308
val isGrainGeDay: Predicate = {

duckling-fork-chinese/learning/src/main/scala/com/xiaomi/duckling/dimension/time/Examples.scala

+8
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,14 @@ object Examples extends DimExamples {
126126
),
127127
List("上半年")
128128
),
129+
(
130+
localDateTimeInterval(
131+
LocalDateTime.of(2012, 1, 1, 0, 0, 0),
132+
LocalDateTime.of(2012, 7, 1, 0, 0, 0),
133+
Month
134+
),
135+
List("去年上半年")
136+
),
129137
(
130138
localDateTimeInterval(
131139
LocalDateTime.of(2014, 2, 4, 0, 0, 0),

duckling-fork-chinese/learning/src/main/scala/com/xiaomi/duckling/dimension/time/duration/Examples.scala

+5-4
Original file line numberDiff line numberDiff line change
@@ -36,15 +36,16 @@ object Examples extends DimExamples {
3636
(DurationData(3, Quarter, schema = Some("P3Q")), List("3个季度")),
3737
(DurationData(2, Year, schema = Some("P2Y")), List("两年", "2年")),
3838
(DurationData(2000, Year, schema = Some("P2000Y")), List("两千年")),
39-
(DurationData(30, Minute, schema = Some("PT0H30M")), List("半小时")),
39+
(DurationData(30, Minute, schema = Some("PT0H30M"), half = true), List("半小时")),
4040
(DurationData(30, Minute, schema = Some("PT30M")), List("0.5小时")),
4141
(DurationData(90, Minute, schema = Some("PT90M")), List("1.5小时", "1.50小时", "一点五小时", "一点五零小时")),
42-
(DurationData(30, Minute, schema = Some("PT30M")), List("30分钟", "半个钟头")),
43-
(DurationData(12, Hour, schema = Some("P0DT12H")), List("半天")),
42+
(DurationData(30, Minute, schema = Some("PT30M")), List("30分钟")),
43+
(DurationData(30, Minute, schema = Some("PT30M"), half = true), List("半个钟头")),
44+
(DurationData(12, Hour, schema = Some("P0DT12H"), half = true), List("半天")),
4445
(DurationData(90, Minute, schema = Some("PT1H30M")), List("一个半小时", "一小时30分钟", "一个小时30分钟")),
4546
(DurationData(2, Hour, schema = Some("PT2H")), List("两个小时整")),
4647
(DurationData(45, Day, schema = Some("P1M15D")), List("一个半月")),
47-
(DurationData(15, Day, schema = Some("P0M15D")), List("半个月")),
48+
(DurationData(15, Day, schema = Some("P0M15D"), half = true), List("半个月")),
4849
(DurationData(27, Month, schema = Some("P2Y3M")), List("两年零三个月","两年外加三个月", "两年加上三个月", "两年加三个月", "两年三个月")),
4950
(DurationData(31719604, Second, schema = Some("P1Y2DT3H4S")), List("1年两天3小时四秒"))
5051
)

duckling-fork-chinese/learning/src/main/scala/com/xiaomi/duckling/task/NaiveBayesDebug.scala

+3-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package com.xiaomi.duckling.task
1818

1919
import org.apache.commons.lang3.time.StopWatch
2020
import org.json4s.jackson.Serialization.write
21-
2221
import com.xiaomi.duckling.Api
2322
import com.xiaomi.duckling.Api.formatToken
2423
import com.xiaomi.duckling.JsonSerde._
@@ -27,6 +26,8 @@ import com.xiaomi.duckling.dimension.FullDimensions
2726
import com.xiaomi.duckling.ranking.Ranker
2827
import com.xiaomi.duckling.ranking.Testing.testContext
2928

29+
import java.time.ZonedDateTime
30+
3031
object NaiveBayesDebug {
3132
private val context = testContext // .copy(referenceTime = ZonedDateTime.now())
3233

@@ -49,7 +50,7 @@ object NaiveBayesDebug {
4950
val targets = FullDimensions.convert(dim.split(","))
5051
val options = Options(targets = targets, withLatent = false, full = true)
5152
options.rankOptions.setRanker(Some(Ranker.NaiveBayes))
52-
options.rankOptions.setWinnerOnly(true)
53+
options.rankOptions.setWinnerOnly(false)
5354
options.rankOptions.setCombinationRank(false)
5455
options.rankOptions.setRangeRankAhead(false)
5556
options.timeOptions.setResetTimeOfDay(false)

duckling-fork-chinese/server/src/main/scala/com/xiaomi/duckling/TokenVisualization.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ object TokenVisualization {
7676
case SimpleValue(instant) => instant.grain.name()
7777
case _ => ""
7878
}
79-
case DurationData(_, grain, _, _, _) => grain.name()
79+
case DurationData(_, grain, _, _, _, _) => grain.name()
8080
case _ => ""
8181
}
8282
val node = answer.token.node

0 commit comments

Comments
 (0)