Skip to content

Commit 09aaf53

Browse files
authored
Merge pull request #296 from American-Institutes-for-Research/HEA-847/kg_gathered_aliases
Hea 847/kg gathered aliases
2 parents 83ad261 + b7a0540 commit 09aaf53

3 files changed

Lines changed: 158 additions & 3 deletions

File tree

pipelines/assets/livelihood_activity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -910,7 +910,7 @@ def get_instances_from_dataframe(
910910
]
911911
if unrecognized_labels.empty:
912912
# Keep the same shape as the non-empty case (label, rows, datapoint_count, in_summary)
913-
unrecognized_labels = pd.DataFrame(columns=["label", "rows", "datapoint_count", "in_summary"])
913+
unrecognized_labels = pd.DataFrame(columns=["label", "rows", "datapoints", "summary_datapoints"])
914914
else:
915915
# Boolean mask of which cells are used
916916
# Count datapoints per row

pipelines/assets/livelihood_activity_regexes.json

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,12 @@
133133
false,
134134
"income"
135135
],
136+
[
137+
"revenu total (?:cueillette|p[êe]che)",
138+
null,
139+
false,
140+
"income"
141+
],
136142
[
137143
"other use \\({unit_of_measure_pattern}\\)",
138144
null,
@@ -145,6 +151,12 @@
145151
false,
146152
"percentage_kcals"
147153
],
154+
[
155+
"(?:kcals? total|total kcals?) (?:cueillette|p[êe]che)(?: \\(%\\))?",
156+
null,
157+
false,
158+
"percentage_kcals"
159+
],
148160
[
149161
"kcals per (?P<unit_of_measure_id>kg|l)",
150162
null,
@@ -776,7 +788,19 @@
776788
"quantity_produced"
777789
],
778790
[
779-
"(?:wild foods?{separator_pattern} )?{product_pattern}{separator_pattern} \\(?{unit_of_measure_pattern} gathered\\)?",
791+
"aliment [0-9]+{separator_pattern}{product_pattern}{separator_pattern}{unit_of_measure_pattern} (?:cueillis?|collectés?|ramassés?|gathered)",
792+
null,
793+
true,
794+
"quantity_produced"
795+
],
796+
[
797+
"aliment [0-9]+{separator_pattern}{unit_of_measure_pattern} (?:cueillis?|collectés?|ramassés?|gathered) *[-:/]? *\\(?(?P<product_id>{label_pattern})?\\)?",
798+
null,
799+
true,
800+
"quantity_produced"
801+
],
802+
[
803+
"(?:(?:wild foods?|cueillette){separator_pattern} )?{product_pattern}{separator_pattern} \\(?{unit_of_measure_pattern} (?:gathered|cueillis?|collectés?|ramassés?)\\)?",
780804
null,
781805
true,
782806
"quantity_produced"
@@ -788,7 +812,7 @@
788812
"quantity_produced"
789813
],
790814
[
791-
"{product_pattern}{separator_pattern}\\(?{unit_of_measure_pattern} gathered\\)?",
815+
"{product_pattern}{separator_pattern}\\(?{unit_of_measure_pattern} (?:gathered|cueillis?|collectés?|ramassés?)\\)?",
792816
null,
793817
true,
794818
"quantity_produced"

pipelines_tests/test_assets/test_livelihood_activity_regexes.json

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,5 +1360,136 @@
13601360
"is_start": true,
13611361
"product_id": "eau pour le bétail",
13621362
"attribute": "name_of_local_measure"
1363+
},
1364+
"karité (kg cueillis)": {
1365+
"is_start": true,
1366+
"product_id": "karité",
1367+
"unit_of_measure_id": "kg",
1368+
"attribute": "quantity_produced"
1369+
},
1370+
"karité - kg cueillis": {
1371+
"is_start": true,
1372+
"product_id": "karité",
1373+
"unit_of_measure_id": "kg",
1374+
"attribute": "quantity_produced"
1375+
},
1376+
"karité: kg cueillis": {
1377+
"is_start": true,
1378+
"product_id": "karité",
1379+
"unit_of_measure_id": "kg",
1380+
"attribute": "quantity_produced"
1381+
},
1382+
"karité kg cueillis": {
1383+
"is_start": true,
1384+
"product_id": "karité",
1385+
"unit_of_measure_id": "kg",
1386+
"attribute": "quantity_produced"
1387+
},
1388+
"karité (kg collectés)": {
1389+
"is_start": true,
1390+
"product_id": "karité",
1391+
"unit_of_measure_id": "kg",
1392+
"attribute": "quantity_produced"
1393+
},
1394+
"karité - kg ramassé": {
1395+
"is_start": true,
1396+
"product_id": "karité",
1397+
"unit_of_measure_id": "kg",
1398+
"attribute": "quantity_produced"
1399+
},
1400+
"karité - litres cueillis": {
1401+
"is_start": true,
1402+
"product_id": "karité",
1403+
"unit_of_measure_id": "litres",
1404+
"attribute": "quantity_produced"
1405+
},
1406+
"cueillette: champignons (kg collectés)": {
1407+
"is_start": true,
1408+
"product_id": "champignons",
1409+
"unit_of_measure_id": "kg",
1410+
"attribute": "quantity_produced"
1411+
},
1412+
"cueillette: fumbwa (kg collectés)": {
1413+
"is_start": true,
1414+
"product_id": "fumbwa",
1415+
"unit_of_measure_id": "kg",
1416+
"attribute": "quantity_produced"
1417+
},
1418+
"cueillette: chenille (kg collectés)": {
1419+
"is_start": true,
1420+
"product_id": "chenille",
1421+
"unit_of_measure_id": "kg",
1422+
"attribute": "quantity_produced"
1423+
},
1424+
"arbre véritable - kg cueillis": {
1425+
"is_start": true,
1426+
"product_id": "arbre véritable",
1427+
"unit_of_measure_id": "kg",
1428+
"attribute": "quantity_produced"
1429+
},
1430+
"cactus rouge et local- kg cueillis": {
1431+
"is_start": true,
1432+
"product_id": "cactus rouge et local",
1433+
"unit_of_measure_id": "kg",
1434+
"attribute": "quantity_produced"
1435+
},
1436+
"aliment 1 - kg cueillis": {
1437+
"is_start": true,
1438+
"unit_of_measure_id": "kg",
1439+
"attribute": "quantity_produced"
1440+
},
1441+
"aliment 1 - kg cueillis karité": {
1442+
"is_start": true,
1443+
"product_id": "karité",
1444+
"unit_of_measure_id": "kg",
1445+
"attribute": "quantity_produced"
1446+
},
1447+
"aliment 1 - kg cueillis (fonio sauvage)": {
1448+
"is_start": true,
1449+
"product_id": "fonio sauvage",
1450+
"unit_of_measure_id": "kg",
1451+
"attribute": "quantity_produced"
1452+
},
1453+
"aliment 1 - kg cueillis nere": {
1454+
"is_start": true,
1455+
"product_id": "nere",
1456+
"unit_of_measure_id": "kg",
1457+
"attribute": "quantity_produced"
1458+
},
1459+
"aliment 1 : amande de karité - kg cueillis": {
1460+
"is_start": true,
1461+
"product_id": "amande de karité",
1462+
"unit_of_measure_id": "kg",
1463+
"attribute": "quantity_produced"
1464+
},
1465+
"aliment 1 - raisin sauvage kg cueillis": {
1466+
"is_start": true,
1467+
"product_id": "raisin sauvage",
1468+
"unit_of_measure_id": "kg",
1469+
"attribute": "quantity_produced"
1470+
},
1471+
"aliment 1 graines et poudre de néré - kg cueillis": {
1472+
"is_start": true,
1473+
"product_id": "graines et poudre de néré",
1474+
"unit_of_measure_id": "kg",
1475+
"attribute": "quantity_produced"
1476+
},
1477+
"aliment 1: grain de nere - kg cueillis": {
1478+
"is_start": true,
1479+
"product_id": "grain de nere",
1480+
"unit_of_measure_id": "kg",
1481+
"attribute": "quantity_produced"
1482+
},
1483+
"revenu total cueillette": {
1484+
"attribute": "income"
1485+
},
1486+
"kcal total cueillette (%)": {
1487+
"attribute": "percentage_kcals"
1488+
},
1489+
"revenu total pêche": {
1490+
"attribute": "income"
1491+
},
1492+
"total kcal pêche (%)": {
1493+
"attribute": "percentage_kcals"
13631494
}
13641495
}

0 commit comments

Comments
 (0)