@@ -744,8 +744,14 @@ def get_instances_from_dataframe(
744744 continue
745745
746746 # When we process the values for the LivelihoodActivity records, we need to know the actual attribute
747- # that the values in this row are for
748- activity_attribute = label_attributes ["attribute" ]
747+ # that the values in this row are for. Livelihood Summary rows are grouped by percentage_kcals, income and
748+ # expenditure, so we can keep the activity_attribute from the previous Livelihood Strategy if it hasn't
749+ # been set by the label_attributes.
750+ if (
751+ activity_type != ActivityLabel .LivelihoodActivityType .LIVELIHOOD_SUMMARY
752+ or label_attributes ["attribute" ]
753+ ):
754+ activity_attribute = label_attributes ["attribute" ]
749755
750756 if label_attributes ["is_start" ]:
751757 # We are starting a new livelihood activity, so append the previous livelihood strategy
@@ -789,23 +795,6 @@ def get_instances_from_dataframe(
789795 "Found Livelihood Activities from row %s, but there is no Livelihood Strategy defined." % row
790796 )
791797
792- # Copy the attribute from the previous livelihood strategy if this is a Livelihood Summary and the
793- # attribute hasn't been set by the label_attributes.
794- if (
795- activity_type == ActivityLabel .LivelihoodActivityType .LIVELIHOOD_SUMMARY
796- and not activity_attribute
797- and previous_livelihood_strategy
798- and previous_livelihood_activities_for_strategy
799- ):
800- for attribute in ["income" , "expenditure" , "percentage_kcals" ]:
801- if attribute in previous_livelihood_activities_for_strategy [0 ]:
802- activity_attribute = attribute
803- break
804- if not activity_attribute :
805- raise ValueError (
806- f"Could not determine attribute for Livelihood Summary strategy from row { row } "
807- )
808-
809798 # Copy the product_id for MilkProduction and ButterProduction from the previous livelihood strategy
810799 # if necessary.
811800 if (
@@ -1071,15 +1060,45 @@ def get_instances_from_dataframe(
10711060
10721061 # Headings like CROP PRODUCTION: set the strategy type for subsequent rows.
10731062 # Some other labels imply specific strategy types, such as MilkProduction, MeatProduction or LivestockSales
1074- if label_attributes ["strategy_type" ]:
1063+ # For Livelihood Summary activities, the strategy_type is always set from the label_attributes.
1064+ if (
1065+ label_attributes ["strategy_type" ]
1066+ or activity_type == ActivityLabel .LivelihoodActivityType .LIVELIHOOD_SUMMARY
1067+ ):
10751068 strategy_type = label_attributes ["strategy_type" ]
1069+
1070+ # In the Summary section at the top of the Data worksheet, many of the labels are ambiguous but the
1071+ # rows are organized into percentage_kcals, income and expenditure sections. Therefore, we can set the
1072+ # Strategy Type based on the activity_attribute.
1073+ if strategy_type == "ReliefGift_or_Purchase" :
1074+ if activity_attribute in ("percentage_kcals" , "income" ):
1075+ strategy_type = LivelihoodStrategyType .RELIEF_GIFT_OTHER
1076+ elif activity_attribute == "expenditure" :
1077+ strategy_type = LivelihoodStrategyType .OTHER_PURCHASE
1078+ else :
1079+ errors .append (
1080+ "Invalid strategy_type %s for attribute %s from label '%s'"
1081+ % (strategy_type , activity_attribute , label )
1082+ )
1083+ activity_attribute = None
1084+ elif strategy_type == "CashIncome_or_Purchase" :
1085+ if activity_attribute == "income" :
1086+ strategy_type = LivelihoodStrategyType .OTHER_CASH_INCOME
1087+ elif activity_attribute == "expenditure" :
1088+ strategy_type = LivelihoodStrategyType .OTHER_PURCHASE
1089+ else :
1090+ errors .append (
1091+ "Invalid strategy_type %s for attribute %s from label '%s'"
1092+ % (strategy_type , activity_attribute , label )
1093+ )
1094+ activity_attribute = None
1095+
10761096 # Get the valid fields names so we can determine if the attribute is stored in LivelihoodActivity.extra
1077- # LivestockProduction is an artificial, composite strategy type representing the sum of
1078- # MilkProduction, ButterProduction and MeatProduction. It isn't stored in the database, and it only
1079- # requires income, expenditure and kcals_consumed, so we use the base LivelihoodActivity model.
1097+ # Livelihood Summary activities only contain kcals, income and expenditure, and aren't stored in
1098+ # the database, so can use the base LivelihoodActivity model.
10801099 model = (
10811100 LivelihoodActivity
1082- if strategy_type == "LivestockProduction"
1101+ if activity_type == ActivityLabel . LivelihoodActivityType . LIVELIHOOD_SUMMARY
10831102 else class_from_name (f"baseline.models.{ strategy_type } " )
10841103 )
10851104 activity_field_names = [field .name for field in model ._meta .concrete_fields ]
@@ -1419,36 +1438,53 @@ def get_instances_from_dataframe(
14191438 "num_livelihood_strategies" : len (livelihood_strategies ),
14201439 "num_livelihood_activities" : len (livelihood_activities ),
14211440 "num_unrecognized_labels" : len (unrecognized_labels ),
1422- "pct_rows_recognized" : round (
1423- (
1424- 1
1425- - len (
1426- df .iloc [num_header_rows :][
1427- prepare_lookup (df .iloc [num_header_rows :]["A" ]).isin (unrecognized_labels ["label" ])
1428- ]
1429- )
1430- / len (df .iloc [num_header_rows :])
1431- )
1432- * 100
1433- ),
1434- "pct_used_rows_recognized" : round (
1435- (
1436- 1
1437- - len (
1438- df .iloc [num_header_rows :][
1439- prepare_lookup (df .iloc [num_header_rows :]["A" ]).isin (
1440- unrecognized_labels [unrecognized_labels ["datapoints" ] > 0 ]["label" ]
1441- )
1442- ]
1443- )
1444- / len (df .iloc [num_header_rows :])
1445- )
1446- * 100
1447- ),
1448- "preview" : MetadataValue .md (f"```json\n { json .dumps (result , indent = 4 , ensure_ascii = False )} \n ```" ),
14491441 }
14501442 if not unrecognized_labels .empty :
14511443 metadata ["unrecognized_labels" ] = MetadataValue .md (unrecognized_labels .to_markdown (index = False ))
1444+ metadata ["pct_rows_recognized" ] = round (
1445+ (
1446+ 1
1447+ - len (
1448+ df .iloc [num_header_rows :][
1449+ prepare_lookup (df .iloc [num_header_rows :]["A" ]).isin (unrecognized_labels ["label" ])
1450+ ]
1451+ )
1452+ / len (df .iloc [num_header_rows :])
1453+ )
1454+ * 100 ,
1455+ 1 ,
1456+ )
1457+ metadata ["pct_used_rows_recognized" ] = round (
1458+ (
1459+ 1
1460+ - len (
1461+ df .iloc [num_header_rows :][
1462+ prepare_lookup (df .iloc [num_header_rows :]["A" ]).isin (
1463+ unrecognized_labels [unrecognized_labels ["datapoints" ] > 0 ]["label" ]
1464+ )
1465+ ]
1466+ )
1467+ / len (df .iloc [num_header_rows :])
1468+ )
1469+ * 100 ,
1470+ 1 ,
1471+ )
1472+ metadata ["pct_used_summary_rows_recognized" ] = round (
1473+ (
1474+ 1
1475+ - len (
1476+ df .iloc [num_header_rows :][
1477+ prepare_lookup (df .iloc [num_header_rows :]["A" ]).isin (
1478+ unrecognized_labels [unrecognized_labels ["summary_datapoints" ] > 0 ]["label" ]
1479+ )
1480+ ]
1481+ )
1482+ / len (df .iloc [num_header_rows :])
1483+ )
1484+ * 100 ,
1485+ 1 ,
1486+ )
1487+ metadata ["preview" ] = MetadataValue .md (f"```json\n { json .dumps (result , indent = 4 , ensure_ascii = False )} \n ```" )
14521488
14531489 if errors :
14541490 if config .strict :
@@ -1515,10 +1551,12 @@ def get_annotated_instances_from_dataframe(
15151551 # Annotate the output metadata with completeness information
15161552 # Get the summary dataframe, grouped by strategy_type
15171553 summary_df = pd .DataFrame (reported_summary_output .value ["LivelihoodActivity" ])
1518- for col in ["income" , "expenditure" , "kcals_consumed" ]:
1554+ for col in ["income" , "expenditure" , "percentage_kcals" , " kcals_consumed" ]:
15191555 summary_df [col ] = pd .to_numeric (summary_df [col ], errors = "coerce" ).fillna (0 )
15201556 summary_df = (
1521- summary_df [["strategy_type" , "income" , "expenditure" , "kcals_consumed" ]].groupby ("strategy_type" ).sum ()
1557+ summary_df [["strategy_type" , "income" , "expenditure" , "percentage_kcals" , "kcals_consumed" ]]
1558+ .groupby ("strategy_type" )
1559+ .sum ()
15221560 )
15231561
15241562 # Add the recognized Livelihood Activities, also grouped by strategy_type
0 commit comments