Skip to content

Commit cb8f16d

Browse files
MichaelMWWMichael Wang (Centific Technologies Inc)
andauthored
Add support for French decades with century (#3153)
* Draft commit * Update regexes * Fix DecodeRegex not defined before use for typescript * Add testcase for "not able to recognize French in the 90s" * Resolved review comments and added DateTimeModel specs --------- Co-authored-by: Michael Wang (Centific Technologies Inc) <v-michwang@microsoft.com>
1 parent 1b88159 commit cb8f16d

6 files changed

Lines changed: 202 additions & 36 deletions

File tree

.NET/Microsoft.Recognizers.Definitions.Common/French/DateTimeDefinitions.cs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,9 @@ public static class DateTimeDefinitions
173173
public static readonly string PeriodTimeOfDayWithDateRegex = $@"\b(({TimeOfDayRegex}))\b";
174174
public const string LessThanRegex = @"^\b$";
175175
public const string MoreThanRegex = @"^\b$";
176-
public const string DurationUnitRegex = @"(?<unit>ann[eé]es?|ans?|mois|semaines?|jours?|heures?|hrs?|h|minutes?|mins?|secondes?|secs?|journ[eé]e)\b";
176+
public const string DecadeRegex = @"(?<decade>(?:dix|vingt|trente|quarante|cinquante|soixante-dix|soixante|quatre-vingt-dix|quatre-vingts|deux\s+mille))";
177+
public static readonly string DecadeWithCenturyInnerRegex = $@"(((?<century>\d|1\d|2\d)?((?<decade>\d0)\b)|(?<decade>\d0)(?=s))|(({CenturyRegex}(\s+)(et\s+)?)?{DecadeRegex})|({CenturyRegex}(\s+)(et\s+)?(?<decade>dix|centaines)))";
178+
public static readonly string DurationUnitRegex = $@"(?<unit>\bann[eé]es?(?!\s+{DecadeWithCenturyInnerRegex})\b|ans?|mois|semaines?|jours?|heures?|hrs?|h|minutes?|mins?|secondes?|secs?|journ[eé]e)\b";
177179
public const string SuffixAndRegex = @"(?<suffix>\s*(et)\s+(une?\s+)?(?<suffix_num>demi|quart))";
178180
public const string PeriodicRegex = @"\b(?<periodic>quotidien(ne)?|journellement|mensuel(le)?|jours?|hebdomadaire|bihebdomadaire|annuel(lement)?)\b";
179181
public static readonly string EachUnitRegex = $@"(?<each>(chaque|toutes les|tous les)(?<other>\s+autres)?\s*{DurationUnitRegex})";
@@ -255,8 +257,7 @@ public static class DateTimeDefinitions
255257
public const string NumberAsTimeRegex = @"^\b$";
256258
public const string TimeBeforeAfterRegex = @"^\b$";
257259
public const string DateNumberConnectorRegex = @"^\s*(?<connector>\s+[aà])\s*$";
258-
public const string DecadeRegex = @"^\b$";
259-
public const string DecadeWithCenturyRegex = @"^\b$";
260+
public static readonly string DecadeWithCenturyRegex = $@"(les\s+)?(années)\s+{DecadeWithCenturyInnerRegex}";
260261
public const string RelativeDecadeRegex = @"^\b$";
261262
public static readonly string YearSuffix = $@"(,?(\s*à)?\s*({DateYearRegex}|{FullTextYearRegex}))";
262263
public const string SuffixAfterRegex = @"^\b$";
@@ -720,11 +721,19 @@ public static class DateTimeDefinitions
720721
public const string NightRegex = @"\b(minuit|nuit)\b";
721722
public static readonly Dictionary<string, int> WrittenDecades = new Dictionary<string, int>
722723
{
723-
{ @"", 0 }
724+
{ @"dix", 10 },
725+
{ @"vingt", 20 },
726+
{ @"trente", 30 },
727+
{ @"quarante", 40 },
728+
{ @"cinquante", 50 },
729+
{ @"soixante", 60 },
730+
{ @"soixante-dix", 70 },
731+
{ @"quatre-vingt", 80 },
732+
{ @"quatre-vingt-dix", 90 }
724733
};
725734
public static readonly Dictionary<string, int> SpecialDecadeCases = new Dictionary<string, int>
726735
{
727-
{ @"", 0 }
736+
{ @"deux mille", 2000 }
728737
};
729738
public const string DefaultLanguageFallback = @"DMY";
730739
public static readonly string[] DurationDateRestrictions = { };

.NET/Microsoft.Recognizers.Text.DateTime/French/Parsers/FrenchDatePeriodParserConfiguration.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ public FrenchDatePeriodParserConfiguration(ICommonDateTimeParserConfiguration co
104104
SeasonMap = config.SeasonMap;
105105
SpecialYearPrefixesMap = config.SpecialYearPrefixesMap;
106106
WrittenDecades = config.WrittenDecades;
107+
Numbers = config.Numbers;
107108
SpecialDecadeCases = config.SpecialDecadeCases;
108109
}
109110

Patterns/French/French-DateTime.yaml

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -399,8 +399,14 @@ LessThanRegex: !simpleRegex
399399
MoreThanRegex: !simpleRegex
400400
# TODO: modify below regex according to the counterpart in English
401401
def: ^\b$
402-
DurationUnitRegex: !simpleRegex
403-
def: (?<unit>ann[eé]es?|ans?|mois|semaines?|jours?|heures?|hrs?|h|minutes?|mins?|secondes?|secs?|journ[eé]e)\b
402+
DecadeRegex: !simpleRegex
403+
def: (?<decade>(?:dix|vingt|trente|quarante|cinquante|soixante-dix|soixante|quatre-vingt-dix|quatre-vingts|deux\s+mille))
404+
DecadeWithCenturyInnerRegex: !nestedRegex
405+
def: (((?<century>\d|1\d|2\d)?((?<decade>\d0)\b)|(?<decade>\d0)(?=s))|(({CenturyRegex}(\s+)(et\s+)?)?{DecadeRegex})|({CenturyRegex}(\s+)(et\s+)?(?<decade>dix|centaines)))
406+
references: [ CenturyRegex, DecadeRegex ]
407+
DurationUnitRegex: !nestedRegex
408+
def: (?<unit>\bann[eé]es?(?!\s+{DecadeWithCenturyInnerRegex})\b|ans?|mois|semaines?|jours?|heures?|hrs?|h|minutes?|mins?|secondes?|secs?|journ[eé]e)\b
409+
references: [ DecadeWithCenturyInnerRegex ]
404410
SuffixAndRegex: !simpleRegex
405411
def: (?<suffix>\s*(et)\s+(une?\s+)?(?<suffix_num>demi|quart))
406412
PeriodicRegex: !simpleRegex
@@ -592,12 +598,9 @@ TimeBeforeAfterRegex: !simpleRegex
592598
def: ^\b$
593599
DateNumberConnectorRegex: !simpleRegex
594600
def: ^\s*(?<connector>\s+[aà])\s*$
595-
DecadeRegex: !simpleRegex
596-
# TODO: modify below regex according to the counterpart in English
597-
def: ^\b$
598-
DecadeWithCenturyRegex: !simpleRegex
599-
# TODO: modify below regex according to the counterpart in English
600-
def: ^\b$
601+
DecadeWithCenturyRegex: !nestedRegex
602+
def: (les\s+)?(années)\s+{DecadeWithCenturyInnerRegex}
603+
references: [ DecadeWithCenturyInnerRegex ]
601604
RelativeDecadeRegex: !simpleRegex
602605
# TODO: modify below regex according to the counterpart in English
603606
def: ^\b$
@@ -1079,14 +1082,20 @@ NightRegex: !simpleRegex
10791082
def: \b(minuit|nuit)\b
10801083
WrittenDecades: !dictionary
10811084
types: [ string, int ]
1082-
# TODO: modify below dictionary according to the counterpart in English
10831085
entries:
1084-
'': 0
1086+
'dix': 10
1087+
'vingt': 20
1088+
'trente': 30
1089+
'quarante': 40
1090+
'cinquante': 50
1091+
'soixante': 60
1092+
'soixante-dix': 70
1093+
'quatre-vingt': 80
1094+
'quatre-vingt-dix': 90
10851095
SpecialDecadeCases: !dictionary
10861096
types: [ string, int ]
1087-
# TODO: modify below dictionary there're special cases for written decades
10881097
entries:
1089-
'': 0
1098+
'deux mille': 2000
10901099
DefaultLanguageFallback: DMY
10911100
DurationDateRestrictions: []
10921101
# Cases collected from mined data

Specs/DateTime/French/DatePeriodExtractor.json

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4678,7 +4678,7 @@
46784678
},
46794679
{
46804680
"Input": "Dans les années 1970",
4681-
"NotSupported": "dotnet, javascript, python, java",
4681+
"NotSupported": "javascript, python, java",
46824682
"Results": [
46834683
{
46844684
"Text": "les années 1970",
@@ -4690,7 +4690,7 @@
46904690
},
46914691
{
46924692
"Input": "Dans les années 2000, il est né.",
4693-
"NotSupported": "dotnet, javascript, python, java",
4693+
"NotSupported": "javascript, python, java",
46944694
"Results": [
46954695
{
46964696
"Text": "les années 2000",
@@ -4726,7 +4726,7 @@
47264726
},
47274727
{
47284728
"Input": "Dans les années 70",
4729-
"NotSupported": "dotnet, javascript, python, java",
4729+
"NotSupported": "javascript, python, java",
47304730
"Results": [
47314731
{
47324732
"Text": "les années 70",
@@ -4737,11 +4737,11 @@
47374737
]
47384738
},
47394739
{
4740-
"Input": "Dans les années 40",
4741-
"NotSupported": "dotnet, javascript, python, java",
4740+
"Input": "Dans les années 20",
4741+
"NotSupported": "javascript, python, java",
47424742
"Results": [
47434743
{
4744-
"Text": "les années 40",
4744+
"Text": "les années 20",
47454745
"Type": "daterange",
47464746
"Start": 5,
47474747
"Length": 13
@@ -4750,7 +4750,7 @@
47504750
},
47514751
{
47524752
"Input": "Dans les années soixante-dix",
4753-
"NotSupported": "dotnet, javascript, python, java",
4753+
"NotSupported": "javascript, python, java",
47544754
"Results": [
47554755
{
47564756
"Text": "les années soixante-dix",
@@ -4762,7 +4762,7 @@
47624762
},
47634763
{
47644764
"Input": "Dans les années dix-neuf soixante-dix",
4765-
"NotSupported": "dotnet, javascript, python, java",
4765+
"NotSupported": "javascript, python, java",
47664766
"Results": [
47674767
{
47684768
"Text": "les années dix-neuf soixante-dix",
@@ -4772,6 +4772,30 @@
47724772
}
47734773
]
47744774
},
4775+
{
4776+
"Input": "Dans les années mille quatre cent vingt",
4777+
"NotSupported": "javascript, python, java",
4778+
"Results": [
4779+
{
4780+
"Text": "les années mille quatre cent vingt",
4781+
"Type": "daterange",
4782+
"Start": 5,
4783+
"Length": 34
4784+
}
4785+
]
4786+
},
4787+
{
4788+
"Input": "Dans les années deux mille",
4789+
"NotSupported": "javascript, python, java",
4790+
"Results": [
4791+
{
4792+
"Text": "les années deux mille",
4793+
"Type": "daterange",
4794+
"Start": 5,
4795+
"Length": 21
4796+
}
4797+
]
4798+
},
47754799
{
47764800
"Input": "Dans les deux mille dix",
47774801
"NotSupported": "dotnet, javascript, python, java",
@@ -4786,7 +4810,7 @@
47864810
},
47874811
{
47884812
"Input": "Dans les années 2010",
4789-
"NotSupported": "dotnet, javascript, python, java",
4813+
"NotSupported": "javascript, python, java",
47904814
"Results": [
47914815
{
47924816
"Text": "les années 2010",
@@ -4810,7 +4834,7 @@
48104834
},
48114835
{
48124836
"Input": "Dans les années 2000",
4813-
"NotSupported": "dotnet, javascript, python, java",
4837+
"NotSupported": "javascript, python, java",
48144838
"Results": [
48154839
{
48164840
"Text": "les années 2000",

Specs/DateTime/French/DatePeriodParser.json

Lines changed: 78 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4034,7 +4034,7 @@
40344034
},
40354035
{
40364036
"Input": "Dans les années 2000, il est né.",
4037-
"NotSupported": "dotnet, javascript, python, java",
4037+
"NotSupported": "javascript, python, java",
40384038
"Results": [
40394039
{
40404040
"Text": "les années 2000",
@@ -4057,7 +4057,7 @@
40574057
},
40584058
{
40594059
"Input": "Dans les années 1970's",
4060-
"NotSupported": "dotnet, javascript, python, java",
4060+
"NotSupported": "javascript, python, java",
40614061
"Results": [
40624062
{
40634063
"Text": "les années 1970",
@@ -4080,7 +4080,7 @@
40804080
},
40814081
{
40824082
"Input": "Dans les années 70s",
4083-
"NotSupported": "dotnet, javascript, python, java",
4083+
"NotSupported": "javascript, python, java",
40844084
"Results": [
40854085
{
40864086
"Text": "les années 70",
@@ -4103,7 +4103,7 @@
41034103
},
41044104
{
41054105
"Input": "Dans les années 70's",
4106-
"NotSupported": "dotnet, javascript, python, java",
4106+
"NotSupported": "javascript, python, java",
41074107
"Results": [
41084108
{
41094109
"Text": "les années 70",
@@ -4149,7 +4149,7 @@
41494149
},
41504150
{
41514151
"Input": "Dans les années 40",
4152-
"NotSupported": "dotnet, javascript, python, java",
4152+
"NotSupported": "javascript, python, java",
41534153
"Results": [
41544154
{
41554155
"Text": "les années 40",
@@ -4172,7 +4172,7 @@
41724172
},
41734173
{
41744174
"Input": "Dans les années soixante-dix",
4175-
"NotSupported": "dotnet, javascript, python, java",
4175+
"NotSupported": "javascript, python, java",
41764176
"Results": [
41774177
{
41784178
"Text": "les années soixante-dix",
@@ -4193,9 +4193,78 @@
41934193
}
41944194
]
41954195
},
4196+
{
4197+
"Input": "Dans les années dix-neuf soixante-dix",
4198+
"NotSupported": "javascript, python, java",
4199+
"Results": [
4200+
{
4201+
"Text": "les années dix-neuf soixante-dix",
4202+
"Type": "daterange",
4203+
"Value": {
4204+
"Timex": "(1970-01-01,1980-01-01,P10Y)",
4205+
"FutureResolution": {
4206+
"startDate": "1970-01-01",
4207+
"endDate": "1980-01-01"
4208+
},
4209+
"PastResolution": {
4210+
"startDate": "1970-01-01",
4211+
"endDate": "1980-01-01"
4212+
}
4213+
},
4214+
"Start": 5,
4215+
"Length": 32
4216+
}
4217+
]
4218+
},
4219+
{
4220+
"Input": "Dans les années mille quatre cent vingt",
4221+
"NotSupported": "javascript, python, java",
4222+
"Results": [
4223+
{
4224+
"Text": "les années mille quatre cent vingt",
4225+
"Type": "daterange",
4226+
"Value": {
4227+
"Timex": "(1420-01-01,1430-01-01,P10Y)",
4228+
"FutureResolution": {
4229+
"startDate": "1420-01-01",
4230+
"endDate": "1430-01-01"
4231+
},
4232+
"PastResolution": {
4233+
"startDate": "1420-01-01",
4234+
"endDate": "1430-01-01"
4235+
}
4236+
},
4237+
"Start": 5,
4238+
"Length": 34
4239+
}
4240+
]
4241+
},
4242+
{
4243+
"Input": "Dans les années deux mille",
4244+
"NotSupported": "javascript, python, java",
4245+
"Results": [
4246+
{
4247+
"Text": "les années deux mille",
4248+
"Type": "daterange",
4249+
"Value": {
4250+
"Timex": "(2000-01-01,2010-01-01,P10Y)",
4251+
"FutureResolution": {
4252+
"startDate": "2000-01-01",
4253+
"endDate": "2010-01-01"
4254+
},
4255+
"PastResolution": {
4256+
"startDate": "2000-01-01",
4257+
"endDate": "2010-01-01"
4258+
}
4259+
},
4260+
"Start": 5,
4261+
"Length": 21
4262+
}
4263+
]
4264+
},
41964265
{
41974266
"Input": "Dans les années 1970",
4198-
"NotSupported": "dotnet, javascript, python, java",
4267+
"NotSupported": "javascript, python, java",
41994268
"Results": [
42004269
{
42014270
"Text": "les années 1970",
@@ -4241,7 +4310,7 @@
42414310
},
42424311
{
42434312
"Input": "Dans les années 2010",
4244-
"NotSupported": "dotnet, javascript, python, java",
4313+
"NotSupported": "javascript, python, java",
42454314
"Results": [
42464315
{
42474316
"Text": "les années 2010",
@@ -4287,7 +4356,7 @@
42874356
},
42884357
{
42894358
"Input": "Dans les années 2000",
4290-
"NotSupported": "dotnet, javascript, python, java",
4359+
"NotSupported": "javascript, python, java",
42914360
"Results": [
42924361
{
42934362
"Text": "les années 2000",

0 commit comments

Comments
 (0)