Skip to content

Commit b955679

Browse files
committed
Add regexes to detect common wild foods Livelihood Activities - see HEA-572
1 parent f819089 commit b955679

3 files changed

Lines changed: 49 additions & 1 deletion

File tree

pipelines/assets/livelihood_activity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,7 @@ def get_livelihood_activity_regexes() -> list:
179179
# Create regex patterns for metadata attributes to replace the placeholders in the regexes
180180
placeholder_patterns = {
181181
"label_pattern": r"[a-zà-ÿ][a-zà-ÿ',/ \.\>\-\(\)]+?",
182-
"product_pattern": r"(?P<product_id>[a-zà-ÿ][a-zà-ÿ',/ \.\>\-\(\)]+?)",
182+
"product_pattern": r"(?P<product_id>[a-zà-ÿ][a-zà-ÿ1-9',/ \.\>\-\(\)]+?)",
183183
"season_pattern": r"(?P<season>season [12]|saison [12]|[12][a-z] season||[12][a-zà-ÿ] saison|r[eé]colte principale|principale r[eé]colte|gu|deyr+?)", # NOQA: E501
184184
"additional_identifier_pattern": r"\(?(?P<additional_identifier>rainfed|irrigated|pluviale?|irriguée|submersion libre|submersion contrôlée|flottant)\)?",
185185
"unit_of_measure_pattern": r"(?P<unit_of_measure_id>[a-z]+)",

pipelines/assets/livelihood_activity_regexes.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,24 @@
703703
true,
704704
"quantity_produced"
705705
],
706+
[
707+
"(?:wild foods?{separator_pattern} )?{product_pattern}{separator_pattern} \\(?{unit_of_measure_pattern} gathered\\)?",
708+
null,
709+
true,
710+
"quantity_produced"
711+
],
712+
[
713+
"(?:fish|fish \\(?dry\\)?|fish \\(?fresh\\)?){separator_pattern} {product_pattern}{separator_pattern} \\(?{unit_of_measure_pattern} gathered\\)?",
714+
null,
715+
true,
716+
"quantity_produced"
717+
],
718+
[
719+
"{product_pattern}{separator_pattern}\\(?{unit_of_measure_pattern} gathered\\)?",
720+
null,
721+
true,
722+
"quantity_produced"
723+
],
706724
[
707725
"{product_pattern} (?P<season>[1|2]è[m|r]e récolte){separator_pattern} {nbr_pattern} mois",
708726
null,

pipelines_tests/test_assets/test_livelihood_activity_regexes.json

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -829,5 +829,35 @@
829829
"attribute": "payment_per_time",
830830
"product_id": "grain",
831831
"unit_of_measure_id": "kg"
832+
},
833+
"wild food: avocado (kg gathered)": {
834+
"is_start": true,
835+
"product_id": "avocado",
836+
"unit_of_measure_id": "kg",
837+
"attribute": "quantity_produced"
838+
},
839+
"mangoes (kg gathered)": {
840+
"is_start": true,
841+
"product_id": "mangoes",
842+
"unit_of_measure_id": "kg",
843+
"attribute": "quantity_produced"
844+
},
845+
"okra - kg gathered": {
846+
"is_start": true,
847+
"product_id": "okra",
848+
"unit_of_measure_id": "kg",
849+
"attribute": "quantity_produced"
850+
},
851+
"Fish (dry) : Tilapia (dry/smoked) (kg gathered)": {
852+
"is_start": true,
853+
"product_id": "tilapia (dry/smoked)",
854+
"unit_of_measure_id": "kg",
855+
"attribute": "quantity_produced"
856+
},
857+
"Fish type 2 (dried) - kg gathered": {
858+
"is_start": true,
859+
"product_id": "fish type 2 (dried)",
860+
"unit_of_measure_id": "kg",
861+
"attribute": "quantity_produced"
832862
}
833863
}

0 commit comments

Comments
 (0)