Skip to content

Commit 6cde198

Browse files
committed
Better handlng of summary activities - see HEA-807
1 parent 58cb52a commit 6cde198

1 file changed

Lines changed: 91 additions & 53 deletions

File tree

pipelines/assets/livelihood_activity.py

Lines changed: 91 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -744,8 +744,14 @@ def get_instances_from_dataframe(
744744
continue
745745

746746
# When we process the values for the LivelihoodActivity records, we need to know the actual attribute
747-
# that the values in this row are for
748-
activity_attribute = label_attributes["attribute"]
747+
# that the values in this row are for. Livelihood Summary rows are grouped by percentage_kcals, income and
748+
# expenditure, so we can keep the activity_attribute from the previous Livelihood Strategy if it hasn't
749+
# been set by the label_attributes.
750+
if (
751+
activity_type != ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY
752+
or label_attributes["attribute"]
753+
):
754+
activity_attribute = label_attributes["attribute"]
749755

750756
if label_attributes["is_start"]:
751757
# We are starting a new livelihood activity, so append the previous livelihood strategy
@@ -789,23 +795,6 @@ def get_instances_from_dataframe(
789795
"Found Livelihood Activities from row %s, but there is no Livelihood Strategy defined." % row
790796
)
791797

792-
# Copy the attribute from the previous livelihood strategy if this is a Livelihood Summary and the
793-
# attribute hasn't been set by the label_attributes.
794-
if (
795-
activity_type == ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY
796-
and not activity_attribute
797-
and previous_livelihood_strategy
798-
and previous_livelihood_activities_for_strategy
799-
):
800-
for attribute in ["income", "expenditure", "percentage_kcals"]:
801-
if attribute in previous_livelihood_activities_for_strategy[0]:
802-
activity_attribute = attribute
803-
break
804-
if not activity_attribute:
805-
raise ValueError(
806-
f"Could not determine attribute for Livelihood Summary strategy from row {row}"
807-
)
808-
809798
# Copy the product_id for MilkProduction and ButterProduction from the previous livelihood strategy
810799
# if necessary.
811800
if (
@@ -1071,15 +1060,45 @@ def get_instances_from_dataframe(
10711060

10721061
# Headings like CROP PRODUCTION: set the strategy type for subsequent rows.
10731062
# Some other labels imply specific strategy types, such as MilkProduction, MeatProduction or LivestockSales
1074-
if label_attributes["strategy_type"]:
1063+
# For Livelihood Summary activities, the strategy_type is always set from the label_attributes.
1064+
if (
1065+
label_attributes["strategy_type"]
1066+
or activity_type == ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY
1067+
):
10751068
strategy_type = label_attributes["strategy_type"]
1069+
1070+
# In the Summary section at the top of the Data worksheet, many of the labels are ambiguous but the
1071+
# rows are organized into percentage_kcals, income and expenditure sections. Therefore, we can set the
1072+
# Strategy Type based on the activity_attribute.
1073+
if strategy_type == "ReliefGift_or_Purchase":
1074+
if activity_attribute in ("percentage_kcals", "income"):
1075+
strategy_type = LivelihoodStrategyType.RELIEF_GIFT_OTHER
1076+
elif activity_attribute == "expenditure":
1077+
strategy_type = LivelihoodStrategyType.OTHER_PURCHASE
1078+
else:
1079+
errors.append(
1080+
"Invalid strategy_type %s for attribute %s from label '%s'"
1081+
% (strategy_type, activity_attribute, label)
1082+
)
1083+
activity_attribute = None
1084+
elif strategy_type == "CashIncome_or_Purchase":
1085+
if activity_attribute == "income":
1086+
strategy_type = LivelihoodStrategyType.OTHER_CASH_INCOME
1087+
elif activity_attribute == "expenditure":
1088+
strategy_type = LivelihoodStrategyType.OTHER_PURCHASE
1089+
else:
1090+
errors.append(
1091+
"Invalid strategy_type %s for attribute %s from label '%s'"
1092+
% (strategy_type, activity_attribute, label)
1093+
)
1094+
activity_attribute = None
1095+
10761096
# Get the valid fields names so we can determine if the attribute is stored in LivelihoodActivity.extra
1077-
# LivestockProduction is an artificial, composite strategy type representing the sum of
1078-
# MilkProduction, ButterProduction and MeatProduction. It isn't stored in the database, and it only
1079-
# requires income, expenditure and kcals_consumed, so we use the base LivelihoodActivity model.
1097+
# Livelihood Summary activities only contain kcals, income and expenditure, and aren't stored in
1098+
# the database, so can use the base LivelihoodActivity model.
10801099
model = (
10811100
LivelihoodActivity
1082-
if strategy_type == "LivestockProduction"
1101+
if activity_type == ActivityLabel.LivelihoodActivityType.LIVELIHOOD_SUMMARY
10831102
else class_from_name(f"baseline.models.{strategy_type}")
10841103
)
10851104
activity_field_names = [field.name for field in model._meta.concrete_fields]
@@ -1419,36 +1438,53 @@ def get_instances_from_dataframe(
14191438
"num_livelihood_strategies": len(livelihood_strategies),
14201439
"num_livelihood_activities": len(livelihood_activities),
14211440
"num_unrecognized_labels": len(unrecognized_labels),
1422-
"pct_rows_recognized": round(
1423-
(
1424-
1
1425-
- len(
1426-
df.iloc[num_header_rows:][
1427-
prepare_lookup(df.iloc[num_header_rows:]["A"]).isin(unrecognized_labels["label"])
1428-
]
1429-
)
1430-
/ len(df.iloc[num_header_rows:])
1431-
)
1432-
* 100
1433-
),
1434-
"pct_used_rows_recognized": round(
1435-
(
1436-
1
1437-
- len(
1438-
df.iloc[num_header_rows:][
1439-
prepare_lookup(df.iloc[num_header_rows:]["A"]).isin(
1440-
unrecognized_labels[unrecognized_labels["datapoints"] > 0]["label"]
1441-
)
1442-
]
1443-
)
1444-
/ len(df.iloc[num_header_rows:])
1445-
)
1446-
* 100
1447-
),
1448-
"preview": MetadataValue.md(f"```json\n{json.dumps(result, indent=4, ensure_ascii=False)}\n```"),
14491441
}
14501442
if not unrecognized_labels.empty:
14511443
metadata["unrecognized_labels"] = MetadataValue.md(unrecognized_labels.to_markdown(index=False))
1444+
metadata["pct_rows_recognized"] = round(
1445+
(
1446+
1
1447+
- len(
1448+
df.iloc[num_header_rows:][
1449+
prepare_lookup(df.iloc[num_header_rows:]["A"]).isin(unrecognized_labels["label"])
1450+
]
1451+
)
1452+
/ len(df.iloc[num_header_rows:])
1453+
)
1454+
* 100,
1455+
1,
1456+
)
1457+
metadata["pct_used_rows_recognized"] = round(
1458+
(
1459+
1
1460+
- len(
1461+
df.iloc[num_header_rows:][
1462+
prepare_lookup(df.iloc[num_header_rows:]["A"]).isin(
1463+
unrecognized_labels[unrecognized_labels["datapoints"] > 0]["label"]
1464+
)
1465+
]
1466+
)
1467+
/ len(df.iloc[num_header_rows:])
1468+
)
1469+
* 100,
1470+
1,
1471+
)
1472+
metadata["pct_used_summary_rows_recognized"] = round(
1473+
(
1474+
1
1475+
- len(
1476+
df.iloc[num_header_rows:][
1477+
prepare_lookup(df.iloc[num_header_rows:]["A"]).isin(
1478+
unrecognized_labels[unrecognized_labels["summary_datapoints"] > 0]["label"]
1479+
)
1480+
]
1481+
)
1482+
/ len(df.iloc[num_header_rows:])
1483+
)
1484+
* 100,
1485+
1,
1486+
)
1487+
metadata["preview"] = MetadataValue.md(f"```json\n{json.dumps(result, indent=4, ensure_ascii=False)}\n```")
14521488

14531489
if errors:
14541490
if config.strict:
@@ -1515,10 +1551,12 @@ def get_annotated_instances_from_dataframe(
15151551
# Annotate the output metadata with completeness information
15161552
# Get the summary dataframe, grouped by strategy_type
15171553
summary_df = pd.DataFrame(reported_summary_output.value["LivelihoodActivity"])
1518-
for col in ["income", "expenditure", "kcals_consumed"]:
1554+
for col in ["income", "expenditure", "percentage_kcals", "kcals_consumed"]:
15191555
summary_df[col] = pd.to_numeric(summary_df[col], errors="coerce").fillna(0)
15201556
summary_df = (
1521-
summary_df[["strategy_type", "income", "expenditure", "kcals_consumed"]].groupby("strategy_type").sum()
1557+
summary_df[["strategy_type", "income", "expenditure", "percentage_kcals", "kcals_consumed"]]
1558+
.groupby("strategy_type")
1559+
.sum()
15221560
)
15231561

15241562
# Add the recognized Livelihood Activities, also grouped by strategy_type

0 commit comments

Comments
 (0)