@@ -744,8 +744,14 @@ def get_instances_from_dataframe(
744744 continue
745745
746746 # When we process the values for the LivelihoodActivity records, we need to know the actual attribute
747- # that the values in this row are for
748- activity_attribute = label_attributes ["attribute" ]
747+ # that the values in this row are for. Livelihood Summary rows are grouped by percentage_kcals, income and
748+ # expenditure, so we can keep the activity_attribute from the previous Livelihood Strategy if it hasn't
749+ # been set by the label_attributes.
750+ if (
751+ activity_type != ActivityLabel .LivelihoodActivityType .LIVELIHOOD_SUMMARY
752+ or label_attributes ["attribute" ]
753+ ):
754+ activity_attribute = label_attributes ["attribute" ]
749755
750756 if label_attributes ["is_start" ]:
751757 # We are starting a new livelihood activity, so append the previous livelihood strategy
@@ -789,30 +795,14 @@ def get_instances_from_dataframe(
789795 "Found Livelihood Activities from row %s, but there is no Livelihood Strategy defined." % row
790796 )
791797
792- # Copy the attribute from the previous livelihood strategy if this is a Livelihood Summary and the
793- # attribute hasn't been set by the label_attributes.
794- if (
795- activity_type == ActivityLabel .LivelihoodActivityType .LIVELIHOOD_SUMMARY
796- and not activity_attribute
797- and previous_livelihood_strategy
798- and previous_livelihood_activities_for_strategy
799- ):
800- for attribute in ["income" , "expenditure" , "percentage_kcals" ]:
801- if attribute in previous_livelihood_activities_for_strategy [0 ]:
802- activity_attribute = attribute
803- break
804- if not activity_attribute :
805- raise ValueError (
806- f"Could not determine attribute for Livelihood Summary strategy from row { row } "
807- )
808-
809798 # Copy the product_id for MilkProduction and ButterProduction from the previous livelihood strategy
810799 # if necessary.
811800 if (
812801 livelihood_strategy ["strategy_type" ] in ["MilkProduction" , "ButterProduction" ]
813802 and ("product_id" not in livelihood_strategy or not livelihood_strategy ["product_id" ])
814803 and livelihood_strategy ["season" ] == season2_name
815804 and previous_livelihood_strategy
805+ and "product_id" in previous_livelihood_strategy
816806 and previous_livelihood_strategy ["product_id" ]
817807 ):
818808 livelihood_strategy ["attribute_rows" ]["product_id" ] = row
@@ -1071,15 +1061,45 @@ def get_instances_from_dataframe(
10711061
10721062 # Headings like CROP PRODUCTION: set the strategy type for subsequent rows.
10731063 # Some other labels imply specific strategy types, such as MilkProduction, MeatProduction or LivestockSales
1074- if label_attributes ["strategy_type" ]:
1064+ # For Livelihood Summary activities, the strategy_type is always set from the label_attributes.
1065+ if (
1066+ label_attributes ["strategy_type" ]
1067+ or activity_type == ActivityLabel .LivelihoodActivityType .LIVELIHOOD_SUMMARY
1068+ ):
10751069 strategy_type = label_attributes ["strategy_type" ]
1070+
1071+ # In the Summary section at the top of the Data worksheet, many of the labels are ambiguous but the
1072+ # rows are organized into percentage_kcals, income and expenditure sections. Therefore, we can set the
1073+ # Strategy Type based on the activity_attribute.
1074+ if strategy_type == "ReliefGift_or_Purchase" :
1075+ if activity_attribute in ("percentage_kcals" , "income" ):
1076+ strategy_type = LivelihoodStrategyType .RELIEF_GIFT_OTHER
1077+ elif activity_attribute == "expenditure" :
1078+ strategy_type = LivelihoodStrategyType .OTHER_PURCHASE
1079+ else :
1080+ errors .append (
1081+ "Invalid strategy_type %s for attribute %s from label '%s'"
1082+ % (strategy_type , activity_attribute , label )
1083+ )
1084+ activity_attribute = None
1085+ elif strategy_type == "CashIncome_or_Purchase" :
1086+ if activity_attribute == "income" :
1087+ strategy_type = LivelihoodStrategyType .OTHER_CASH_INCOME
1088+ elif activity_attribute == "expenditure" :
1089+ strategy_type = LivelihoodStrategyType .OTHER_PURCHASE
1090+ else :
1091+ errors .append (
1092+ "Invalid strategy_type %s for attribute %s from label '%s'"
1093+ % (strategy_type , activity_attribute , label )
1094+ )
1095+ activity_attribute = None
1096+
10761097 # Get the valid fields names so we can determine if the attribute is stored in LivelihoodActivity.extra
1077- # LivestockProduction is an artificial, composite strategy type representing the sum of
1078- # MilkProduction, ButterProduction and MeatProduction. It isn't stored in the database, and it only
1079- # requires income, expenditure and kcals_consumed, so we use the base LivelihoodActivity model.
1098+ # Livelihood Summary activities only contain kcals, income and expenditure, and aren't stored in
1099+ # the database, so can use the base LivelihoodActivity model.
10801100 model = (
10811101 LivelihoodActivity
1082- if strategy_type == "LivestockProduction"
1102+ if activity_type == ActivityLabel . LivelihoodActivityType . LIVELIHOOD_SUMMARY
10831103 else class_from_name (f"baseline.models.{ strategy_type } " )
10841104 )
10851105 activity_field_names = [field .name for field in model ._meta .concrete_fields ]
@@ -1419,36 +1439,53 @@ def get_instances_from_dataframe(
14191439 "num_livelihood_strategies" : len (livelihood_strategies ),
14201440 "num_livelihood_activities" : len (livelihood_activities ),
14211441 "num_unrecognized_labels" : len (unrecognized_labels ),
1422- "pct_rows_recognized" : round (
1423- (
1424- 1
1425- - len (
1426- df .iloc [num_header_rows :][
1427- prepare_lookup (df .iloc [num_header_rows :]["A" ]).isin (unrecognized_labels ["label" ])
1428- ]
1429- )
1430- / len (df .iloc [num_header_rows :])
1431- )
1432- * 100
1433- ),
1434- "pct_used_rows_recognized" : round (
1435- (
1436- 1
1437- - len (
1438- df .iloc [num_header_rows :][
1439- prepare_lookup (df .iloc [num_header_rows :]["A" ]).isin (
1440- unrecognized_labels [unrecognized_labels ["datapoints" ] > 0 ]["label" ]
1441- )
1442- ]
1443- )
1444- / len (df .iloc [num_header_rows :])
1445- )
1446- * 100
1447- ),
1448- "preview" : MetadataValue .md (f"```json\n { json .dumps (result , indent = 4 , ensure_ascii = False )} \n ```" ),
14491442 }
14501443 if not unrecognized_labels .empty :
14511444 metadata ["unrecognized_labels" ] = MetadataValue .md (unrecognized_labels .to_markdown (index = False ))
1445+ metadata ["pct_rows_recognized" ] = round (
1446+ (
1447+ 1
1448+ - len (
1449+ df .iloc [num_header_rows :][
1450+ prepare_lookup (df .iloc [num_header_rows :]["A" ]).isin (unrecognized_labels ["label" ])
1451+ ]
1452+ )
1453+ / len (df .iloc [num_header_rows :])
1454+ )
1455+ * 100 ,
1456+ 1 ,
1457+ )
1458+ metadata ["pct_used_rows_recognized" ] = round (
1459+ (
1460+ 1
1461+ - len (
1462+ df .iloc [num_header_rows :][
1463+ prepare_lookup (df .iloc [num_header_rows :]["A" ]).isin (
1464+ unrecognized_labels [unrecognized_labels ["datapoints" ] > 0 ]["label" ]
1465+ )
1466+ ]
1467+ )
1468+ / len (df .iloc [num_header_rows :])
1469+ )
1470+ * 100 ,
1471+ 1 ,
1472+ )
1473+ metadata ["pct_used_summary_rows_recognized" ] = round (
1474+ (
1475+ 1
1476+ - len (
1477+ df .iloc [num_header_rows :][
1478+ prepare_lookup (df .iloc [num_header_rows :]["A" ]).isin (
1479+ unrecognized_labels [unrecognized_labels ["summary_datapoints" ] > 0 ]["label" ]
1480+ )
1481+ ]
1482+ )
1483+ / len (df .iloc [num_header_rows :])
1484+ )
1485+ * 100 ,
1486+ 1 ,
1487+ )
1488+ metadata ["preview" ] = MetadataValue .md (f"```json\n { json .dumps (result , indent = 4 , ensure_ascii = False )} \n ```" )
14521489
14531490 if errors :
14541491 if config .strict :
@@ -1515,10 +1552,12 @@ def get_annotated_instances_from_dataframe(
15151552 # Annotate the output metadata with completeness information
15161553 # Get the summary dataframe, grouped by strategy_type
15171554 summary_df = pd .DataFrame (reported_summary_output .value ["LivelihoodActivity" ])
1518- for col in ["income" , "expenditure" , "kcals_consumed" ]:
1555+ for col in ["income" , "expenditure" , "percentage_kcals" , " kcals_consumed" ]:
15191556 summary_df [col ] = pd .to_numeric (summary_df [col ], errors = "coerce" ).fillna (0 )
15201557 summary_df = (
1521- summary_df [["strategy_type" , "income" , "expenditure" , "kcals_consumed" ]].groupby ("strategy_type" ).sum ()
1558+ summary_df [["strategy_type" , "income" , "expenditure" , "percentage_kcals" , "kcals_consumed" ]]
1559+ .groupby ("strategy_type" )
1560+ .sum ()
15221561 )
15231562
15241563 # Add the recognized Livelihood Activities, also grouped by strategy_type
0 commit comments