Skip to content

Commit 8c899a7

Browse files
aitelintaitelint
andauthored
[JA DateTimeV2] Merged refinements (#2950)
* Merged refinements * Fixed specs formatting * Minor renamed variables according to review Co-authored-by: aitelint <Fabrizio.Sorba@telusinternational.com>
1 parent 5ebfa9b commit 8c899a7

23 files changed

Lines changed: 784 additions & 142 deletions

.NET/Microsoft.Recognizers.Definitions.Common/Japanese/DateTimeDefinitions.cs

Lines changed: 27 additions & 20 deletions
Large diffs are not rendered by default.

.NET/Microsoft.Recognizers.Text.DateTime/Chinese/Extractors/ChineseDurationExtractorConfiguration.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ public ChineseDurationExtractorConfiguration(IDateTimeOptionsConfiguration confi
5656

5757
public Dictionary<string, long> UnitValueMap { get; }
5858

59+
public Dictionary<Regex, Regex> AmbiguityDurationFiltersDict => null;
60+
5961
Regex ICJKDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex;
6062

6163
Regex ICJKDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex;

.NET/Microsoft.Recognizers.Text.DateTime/Constants.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ public static class Constants
5353

5454
// AmPm time representation for time parser
5555
public const string Comment_AmPm = "ampm";
56+
public const string Comment_Am = "am";
5657

5758
// Prefix early/late for time parser
5859
public const string Comment_Early = "early";
@@ -267,6 +268,7 @@ public static class Constants
267268
public const string TimexFuzzyDay = "XX";
268269
public const string DateTimexConnector = "-";
269270
public const string TimeTimexConnector = ":";
271+
public const string TimexSeparator = ",";
270272
public const string GeneralPeriodPrefix = "P";
271273
public const string TimeTimexPrefix = "T";
272274

.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKDurationExtractor.cs

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
// Licensed under the MIT License.
33

44
using System.Collections.Generic;
5-
5+
using System.Linq;
6+
using System.Text.RegularExpressions;
67
using Microsoft.Recognizers.Text.Utilities;
78
using DateObject = System.DateTime;
89

@@ -54,9 +55,32 @@ public List<ExtractResult> Extract(string source, DateObject referenceTime)
5455
res = MergeMultipleDuration(source, res);
5556
}
5657

58+
res = FilterAmbiguity(res, source);
59+
5760
return res;
5861
}
5962

63+
private List<ExtractResult> FilterAmbiguity(List<ExtractResult> extractResults, string text)
64+
{
65+
if (this.config.AmbiguityDurationFiltersDict != null)
66+
{
67+
foreach (var regex in this.config.AmbiguityDurationFiltersDict)
68+
{
69+
foreach (var extractResult in extractResults)
70+
{
71+
if (regex.Key.IsMatch(text))
72+
{
73+
var matches = regex.Value.Matches(text).Cast<Match>();
74+
extractResults = extractResults.Where(er => !matches.Any(m => m.Index < er.Start + er.Length && m.Index + m.Length > er.Start))
75+
.ToList();
76+
}
77+
}
78+
}
79+
}
80+
81+
return extractResults;
82+
}
83+
6084
private List<ExtractResult> MergeMultipleDuration(string text, List<ExtractResult> extractorResults)
6185
{
6286
if (extractorResults.Count <= 1)

.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/BaseCJKMergedDateTimeExtractor.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,17 @@ private void AddMod(List<ExtractResult> ers, string text)
129129
er.Metadata = AssignModMetadata(er.Metadata);
130130
}
131131

132+
match = this.config.UntilRegex.MatchBegin(afterStr, trim: true);
133+
134+
if (match.Success)
135+
{
136+
var modLength = match.Index + match.Length;
137+
er.Length += modLength;
138+
er.Text = text.Substring(er.Start ?? 0, er.Length ?? 0);
139+
140+
er.Metadata = AssignModMetadata(er.Metadata);
141+
}
142+
132143
match = this.config.SincePrefixRegex.MatchEnd(beforeStr, trim: true);
133144

134145
if (match.Success && AmbiguousRangeChecker(beforeStr, text, er))

.NET/Microsoft.Recognizers.Text.DateTime/Extractors/CJK/ICJKDurationExtractorConfiguration.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,7 @@ public interface ICJKDurationExtractorConfiguration : IDateTimeOptionsConfigurat
3232

3333
Dictionary<string, long> UnitValueMap { get; }
3434

35+
Dictionary<Regex, Regex> AmbiguityDurationFiltersDict { get; }
36+
3537
}
3638
}

.NET/Microsoft.Recognizers.Text.DateTime/Japanese/Extractors/JapaneseDurationExtractorConfiguration.cs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
using System.Globalization;
77
using System.Linq;
88
using System.Text.RegularExpressions;
9-
109
using Microsoft.Recognizers.Definitions.Japanese;
10+
using Microsoft.Recognizers.Definitions.Utilities;
1111
using Microsoft.Recognizers.Text.NumberWithUnit;
1212
using Microsoft.Recognizers.Text.NumberWithUnit.Japanese;
1313

@@ -48,6 +48,8 @@ public JapaneseDurationExtractorConfiguration(IDateTimeOptionsConfiguration conf
4848

4949
UnitMap = DateTimeDefinitions.ParserConfigurationUnitMap.ToDictionary(k => k.Key, k => k.Value);
5050
UnitValueMap = DateTimeDefinitions.DurationUnitValueMap;
51+
AmbiguityDurationFiltersDict = DefinitionLoader.LoadAmbiguityFilters(DateTimeDefinitions.AmbiguityDurationFiltersDict);
52+
5153
}
5254

5355
public IExtractor InternalExtractor { get; }
@@ -56,6 +58,8 @@ public JapaneseDurationExtractorConfiguration(IDateTimeOptionsConfiguration conf
5658

5759
public Dictionary<string, long> UnitValueMap { get; }
5860

61+
public Dictionary<Regex, Regex> AmbiguityDurationFiltersDict { get; }
62+
5963
Regex ICJKDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex;
6064

6165
Regex ICJKDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex;

.NET/Microsoft.Recognizers.Text.DateTime/Korean/Extractors/KoreanDurationExtractorConfiguration.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ public KoreanDurationExtractorConfiguration(IDateTimeOptionsConfiguration config
5656

5757
public Dictionary<string, long> UnitValueMap { get; }
5858

59+
public Dictionary<Regex, Regex> AmbiguityDurationFiltersDict => null;
60+
5961
Regex ICJKDurationExtractorConfiguration.DurationUnitRegex => DurationUnitRegex;
6062

6163
Regex ICJKDurationExtractorConfiguration.DurationConnectorRegex => DurationConnectorRegex;

.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateParser.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,8 +318,9 @@ protected DateTimeResolutionResult ParseImplicitDate(string text, DateObject ref
318318
// handle "明日から3週間" (3 weeks from tomorrow)
319319
var durationResult = this.config.DurationExtractor.Extract(text, referenceDate);
320320
var unitMatch = this.config.DurationRelativeDurationUnitRegex.Match(text);
321+
var isWithin = this.config.DurationRelativeDurationUnitRegex.MatchEnd(text, trim: true).Groups[Constants.WithinGroupName].Success;
321322

322-
if (exactMatch.Success && unitMatch.Success && (durationResult.Count > 0) &&
323+
if ((exactMatch.Success || isWithin) && unitMatch.Success && (durationResult.Count > 0) &&
323324
string.IsNullOrEmpty(unitMatch.Groups["few"].Value))
324325
{
325326
var pr = this.config.DurationParser.Parse(durationResult[0], referenceDate);

.NET/Microsoft.Recognizers.Text.DateTime/Parsers/CJK/BaseCJKDateTimeParser.cs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,14 @@ private DateTimeResolutionResult MergeDateAndTime(string text, DateObject refere
158158
var pastDate = (DateObject)((DateTimeResolutionResult)pr1.Value).PastValue;
159159
var time = (DateObject)((DateTimeResolutionResult)pr2.Value).FutureValue;
160160

161+
// handle cases with time like 25時 which resolve to the next day
162+
var timexHour = TimexUtility.ParseHourFromTimeTimex(pr2.TimexStr);
163+
if (timexHour > Constants.DayHourCount)
164+
{
165+
futureDate = futureDate.AddDays(1);
166+
pastDate = pastDate.AddDays(1);
167+
}
168+
161169
var hour = time.Hour;
162170
var min = time.Minute;
163171
var sec = time.Second;
@@ -178,7 +186,6 @@ private DateTimeResolutionResult MergeDateAndTime(string text, DateObject refere
178186
timeStr = timeStr.Substring(0, timeStr.Length - 4);
179187
}
180188

181-
timeStr = "T" + hour.ToString("D2", CultureInfo.InvariantCulture) + timeStr.Substring(3);
182189
ret.Timex = pr1.TimexStr + timeStr;
183190

184191
var val = (DateTimeResolutionResult)pr2.Value;

0 commit comments

Comments
 (0)