Skip to content

Commit 18fe661

Browse files
MichaelMWWMichael Wang (Centific Technologies Inc)
andauthored
[EN DateTimeV2] Add support for time range with duration (#3174)
* Datetime for longer span - local draft commit * DateTimeForLongerSpan - Implement for from * DateTimeForLongerSpan - from datetime for duration local commit * DateTimeForLongerSpan - Parsing time period with duration initial commit * DateTimeForLongerSpan - Parsing time period with duration update test cases * DateTimeForLongerSpan - Add support for time range with duration - Update test case to trigger rebuild * DateTimeForLongerSpan - Add support for time range with duration - Update test case to trigger rebuild --------- Co-authored-by: Michael Wang (Centific Technologies Inc) <v-michwang@microsoft.com>
1 parent dde290b commit 18fe661

11 files changed

Lines changed: 697 additions & 3 deletions

File tree

.NET/Microsoft.Recognizers.Definitions.Common/English/DateTimeDefinitions.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,9 @@ public static class DateTimeDefinitions
316316
public static readonly string TasksModeSupressionRegexes = $@"({AmPmDescRegex}|{TasksModeSpecialDescRegex}|{TasksModeHolidayListSupression}|{DecadeRegex}|{DecadeWithCenturyRegex}|{QuarterRegex}|{QuarterRegexYearFront}|{AllHalfYearRegex}|{SeasonRegex})";
317317
public const string TasksModeNextPrefix = @"(?<next>next\s+)";
318318
public static readonly string TasksModeDurationToDatePatterns = $@"\b({TasksModeNextPrefix}((?<week>week)|(?<month>month)|(?<year>year)))\b";
319+
public static readonly string TimePeriodFromForRegex = $@"(from\s+)(?<time1>(({TimeRegex2}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?<leftDesc>{DescRegex}))?))\s*for\s+(.*?)\s+({DurationUnitRegex})(\s+(.*?)\s+({DurationUnitRegex}))?";
320+
public static readonly string TimePeriodForFromRegex = $@"for\s+(.*?)\s+({DurationUnitRegex})(\s+(.*?)\s+({DurationUnitRegex}))?\s+(from\s+)(?<time1>(({TimeRegex2}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?<leftDesc>{DescRegex}))?))";
321+
public static readonly string TimePeriodWithDurationRegex = $@"({TimePeriodFromForRegex}|{TimePeriodForFromRegex})";
319322
public static readonly Dictionary<string, string> UnitMap = new Dictionary<string, string>
320323
{
321324
{ @"decades", @"10Y" },

.NET/Microsoft.Recognizers.Text.DateTime/English/Extractors/EnglishTimePeriodExtractorConfiguration.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ public class EnglishTimePeriodExtractorConfiguration : BaseDateTimeOptionsConfig
6464
public static readonly Regex GeneralEndingRegex =
6565
new Regex(DateTimeDefinitions.GeneralEndingRegex, RegexFlags, RegexTimeOut);
6666

67+
public static readonly Regex TimePeriodWithDurationRegex =
68+
new Regex(DateTimeDefinitions.TimePeriodWithDurationRegex, RegexFlags, RegexTimeOut);
69+
6770
private const RegexOptions RegexFlags = RegexOptions.Singleline | RegexOptions.ExplicitCapture;
6871

6972
public EnglishTimePeriodExtractorConfiguration(IDateTimeOptionsConfiguration config)

.NET/Microsoft.Recognizers.Text.DateTime/English/Parsers/EnglishTimePeriodParserConfiguration.cs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ public EnglishTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration c
2727
TimeOfDayRegex = EnglishTimePeriodExtractorConfiguration.TimeOfDayRegex;
2828
GeneralEndingRegex = EnglishTimePeriodExtractorConfiguration.GeneralEndingRegex;
2929
TillRegex = EnglishTimePeriodExtractorConfiguration.TillRegex;
30+
TimePeriodWithDurationRegex = EnglishTimePeriodExtractorConfiguration.TimePeriodWithDurationRegex;
31+
DurationParser = config.DurationParser;
32+
DurationExtractor = config.DurationExtractor;
3033

3134
Numbers = config.Numbers;
3235
UtilityConfiguration = config.UtilityConfiguration;
@@ -40,6 +43,10 @@ public EnglishTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration c
4043

4144
public IDateTimeParser TimeZoneParser { get; }
4245

46+
public IDateTimeParser DurationParser { get; }
47+
48+
public IDateTimeExtractor DurationExtractor { get; }
49+
4350
public Regex SpecificTimeFromToRegex { get; }
4451

4552
public Regex SpecificTimeBetweenAndRegex { get; }
@@ -54,6 +61,8 @@ public EnglishTimePeriodParserConfiguration(ICommonDateTimeParserConfiguration c
5461

5562
public Regex TillRegex { get; }
5663

64+
public Regex TimePeriodWithDurationRegex { get; }
65+
5766
public IImmutableDictionary<string, int> Numbers { get; }
5867

5968
public IDateTimeUtilityConfiguration UtilityConfiguration { get; }

.NET/Microsoft.Recognizers.Text.DateTime/Extractors/BaseTimePeriodExtractor.cs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
using System.Collections.Generic;
66
using System.Linq;
77
using System.Text.RegularExpressions;
8-
8+
using Microsoft.Recognizers.Text.DateTime.English;
99
using Microsoft.Recognizers.Text.InternalCache;
1010
using Microsoft.Recognizers.Text.Utilities;
1111
using DateObject = System.DateTime;
@@ -55,6 +55,7 @@ private List<ExtractResult> ExtractImpl(string text, DateObject reference)
5555
{
5656
var tokens = new List<Token>();
5757
tokens.AddRange(MatchSimpleCases(text));
58+
tokens.AddRange(MatchTimePeriodWithDurationCases(text));
5859
tokens.AddRange(MergeTwoTimePoints(text, reference));
5960
tokens.AddRange(MatchTimeOfDay(text));
6061

@@ -153,6 +154,22 @@ private List<Token> MatchSimpleCases(string text)
153154
return ret;
154155
}
155156

157+
// Cases like "from 6am for 3 hours" and "for 3 hours from 6 am" are extracted as timerange here.
158+
private List<Token> MatchTimePeriodWithDurationCases(string text)
159+
{
160+
var ret = new List<Token>();
161+
if (this.config as EnglishTimePeriodExtractorConfiguration != null)
162+
{
163+
Match match = EnglishTimePeriodExtractorConfiguration.TimePeriodWithDurationRegex.Match(text);
164+
if (match.Success)
165+
{
166+
ret.Add(new Token(match.Index, match.Index + match.Length));
167+
}
168+
}
169+
170+
return ret;
171+
}
172+
156173
private bool StartsWithTimeZone(string afterText)
157174
{
158175
var startsWithTimeZone = false;

.NET/Microsoft.Recognizers.Text.DateTime/Parsers/BaseTimePeriodParser.cs

Lines changed: 138 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
using System;
55
using System.Collections.Generic;
66
using System.Globalization;
7-
7+
using Microsoft.Recognizers.Text.DateTime.English;
88
using Microsoft.Recognizers.Text.Utilities;
99

1010
using DateObject = System.DateTime;
@@ -120,6 +120,11 @@ private DateTimeResolutionResult InternalParse(string entityText, DateObject ref
120120
innerResult = ParseTimeOfDay(entityText, referenceTime);
121121
}
122122

123+
if (!innerResult.Success)
124+
{
125+
innerResult = ParseTimePeroidWithDuration(entityText, referenceTime);
126+
}
127+
123128
return innerResult;
124129
}
125130

@@ -669,6 +674,138 @@ private DateTimeResolutionResult ParseSpecificTimeCases(string text, DateObject
669674
return ret;
670675
}
671676

677+
// Cases like "from 6am for 3 hours" and "for 3 hours from 6am" are parsing here.
678+
private DateTimeResolutionResult ParseTimePeroidWithDuration(string text, DateObject referenceTime)
679+
{
680+
var parserConfig = this.config as EnglishTimePeriodParserConfiguration;
681+
var ret = new DateTimeResolutionResult();
682+
if (parserConfig != null)
683+
{
684+
var match = parserConfig.TimePeriodWithDurationRegex.MatchExact(text, trim: true);
685+
686+
if (match.Success)
687+
{
688+
var erDuration = parserConfig.DurationExtractor.Extract(text);
689+
690+
if (erDuration is null || erDuration.Count == 0)
691+
{
692+
return ret;
693+
}
694+
695+
var prDuration = parserConfig.DurationParser.Parse(erDuration[0]);
696+
int year = referenceTime.Year, month = referenceTime.Month, day = referenceTime.Day;
697+
698+
// Cases like "half past seven" are not handled here
699+
if (match.Groups[Constants.PrefixGroupName].Success)
700+
{
701+
return ret;
702+
}
703+
704+
// Cases like "4" is different with "4:00" as the Timex is different "T04H" vs "T04H00M"
705+
int beginHour;
706+
int beginMinute = Constants.InvalidMinute;
707+
int beginSecond = Constants.InvalidSecond;
708+
709+
// Get time1
710+
var hourGroup = match.Groups[Constants.HourGroupName];
711+
712+
var hourStr = hourGroup.Captures[0].Value;
713+
714+
if (config.Numbers.ContainsKey(hourStr))
715+
{
716+
beginHour = config.Numbers[hourStr];
717+
}
718+
else
719+
{
720+
beginHour = int.Parse(hourStr, CultureInfo.InvariantCulture);
721+
}
722+
723+
var time1StartIndex = match.Groups["time1"].Index;
724+
var time1EndIndex = time1StartIndex + match.Groups["time1"].Length;
725+
726+
// Get beginMinute (if exists)
727+
for (int i = 0; i < match.Groups[Constants.MinuteGroupName].Captures.Count; i++)
728+
{
729+
var minuteCapture = match.Groups[Constants.MinuteGroupName].Captures[i];
730+
if (minuteCapture.Index >= time1StartIndex && minuteCapture.Index + minuteCapture.Length <= time1EndIndex)
731+
{
732+
beginMinute = int.Parse(minuteCapture.Value, CultureInfo.InvariantCulture);
733+
}
734+
}
735+
736+
// Get beginSecond (if exists)
737+
for (int i = 0; i < match.Groups[Constants.SecondGroupName].Captures.Count; i++)
738+
{
739+
var secondCapture = match.Groups[Constants.SecondGroupName].Captures[i];
740+
if (secondCapture.Index >= time1StartIndex && secondCapture.Index + secondCapture.Length <= time1EndIndex)
741+
{
742+
beginSecond = int.Parse(secondCapture.Value, CultureInfo.InvariantCulture);
743+
}
744+
}
745+
746+
// Desc here means descriptions like "am / pm / o'clock"
747+
// Get leftDesc (if exists)
748+
var leftDesc = match.Groups["leftDesc"].Value;
749+
for (int i = 0; i < match.Groups[Constants.DescGroupName].Captures.Count; i++)
750+
{
751+
var descCapture = match.Groups[Constants.DescGroupName].Captures[i];
752+
if (descCapture.Index >= time1StartIndex && descCapture.Index + descCapture.Length <= time1EndIndex && string.IsNullOrEmpty(leftDesc))
753+
{
754+
leftDesc = descCapture.Value;
755+
}
756+
}
757+
758+
var beginDateTime = DateObject.MinValue.SafeCreateFromValue(year, month, day, beginHour, beginMinute >= 0 ? beginMinute : 0, beginSecond >= 0 ? beginSecond : 0);
759+
760+
var hasLeftAm = !string.IsNullOrEmpty(leftDesc) && leftDesc.StartsWith("a", StringComparison.Ordinal);
761+
var hasLeftPm = !string.IsNullOrEmpty(leftDesc) && leftDesc.StartsWith("p", StringComparison.Ordinal);
762+
763+
// one of the time point has description like 'am' or 'pm'
764+
if (hasLeftAm)
765+
{
766+
if (beginHour >= Constants.HalfDayHourCount)
767+
{
768+
beginDateTime = beginDateTime.AddHours(-Constants.HalfDayHourCount);
769+
}
770+
}
771+
else if (hasLeftPm)
772+
{
773+
if (beginHour < Constants.HalfDayHourCount)
774+
{
775+
beginDateTime = beginDateTime.AddHours(Constants.HalfDayHourCount);
776+
}
777+
}
778+
779+
var endDateTime = beginDateTime.AddSeconds(Convert.ToInt32((prDuration.Value as DateTimeResolutionResult).FutureValue, CultureInfo.InvariantCulture));
780+
781+
var beginStr = DateTimeFormatUtil.ShortTime(beginDateTime.Hour, beginMinute, beginSecond);
782+
var endStr = DateTimeFormatUtil.ShortTime(endDateTime.Hour, endDateTime.Minute, endDateTime.Second);
783+
784+
ret.Success = true;
785+
786+
ret.Timex = $"({beginStr},{endStr},{DateTimeFormatUtil.LuisTimeSpan(endDateTime - beginDateTime)})";
787+
788+
ret.FutureValue = ret.PastValue = new Tuple<DateObject, DateObject>(
789+
beginDateTime,
790+
endDateTime);
791+
792+
ret.SubDateTimeEntities = new List<object>();
793+
var er = new ExtractResult()
794+
{
795+
Start = time1StartIndex,
796+
Length = time1EndIndex - time1StartIndex,
797+
Text = text.Substring(time1StartIndex, time1EndIndex - time1StartIndex),
798+
Type = $"{Constants.SYS_DATETIME_TIME}",
799+
};
800+
801+
var pr = this.config.TimeParser.Parse(er, referenceTime);
802+
ret.SubDateTimeEntities.Add(pr);
803+
}
804+
}
805+
806+
return ret;
807+
}
808+
672809
private DateTimeResolutionResult MergeTwoTimePoints(string text, DateObject referenceTime)
673810
{
674811
var ret = new DateTimeResolutionResult();

Patterns/English/English-DateTime.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,15 @@ TasksModeNextPrefix: !simpleRegex
768768
TasksModeDurationToDatePatterns: !nestedRegex
769769
def: \b({TasksModeNextPrefix}((?<week>week)|(?<month>month)|(?<year>year)))\b
770770
references: [TasksModeNextPrefix]
771+
TimePeriodFromForRegex: !nestedRegex
772+
def: (from\s+)(?<time1>(({TimeRegex2}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?<leftDesc>{DescRegex}))?))\s*for\s+(.*?)\s+({DurationUnitRegex})(\s+(.*?)\s+({DurationUnitRegex}))?
773+
references: [ TimeRegex2, FirstTimeRegexInTimeRange, TimeRegexWithDotConnector, TillRegex, HourRegex, PeriodHourNumRegex, DescRegex, PmRegex, AmRegex, RangePrefixRegex, DurationUnitRegex ]
774+
TimePeriodForFromRegex: !nestedRegex
775+
def: for\s+(.*?)\s+({DurationUnitRegex})(\s+(.*?)\s+({DurationUnitRegex}))?\s+(from\s+)(?<time1>(({TimeRegex2}|{FirstTimeRegexInTimeRange})|({HourRegex}|{PeriodHourNumRegex})(\s*(?<leftDesc>{DescRegex}))?))
776+
references: [ TimeRegex2, FirstTimeRegexInTimeRange, TimeRegexWithDotConnector, TillRegex, HourRegex, PeriodHourNumRegex, DescRegex, PmRegex, AmRegex, RangePrefixRegex, DurationUnitRegex ]
777+
TimePeriodWithDurationRegex: !nestedRegex
778+
def: ({TimePeriodFromForRegex}|{TimePeriodForFromRegex})
779+
references: [ TimePeriodFromForRegex, TimePeriodForFromRegex ]
771780
UnitMap: !dictionary
772781
types: [ string, string ]
773782
entries:

0 commit comments

Comments
 (0)