|
91 | 91 | ActivityLabel, |
92 | 92 | LivelihoodActivityScenario, |
93 | 93 | LivelihoodStrategyType, |
| 94 | + WealthGroupCategory, |
94 | 95 | ) |
95 | 96 |
|
96 | 97 | # Indexes of header rows in the Data3 dataframe (wealth_group_category, district, village, household size) |
@@ -2002,110 +2003,158 @@ def get_annotated_instances_from_dataframe( |
2002 | 2003 | ) |
2003 | 2004 |
|
2004 | 2005 | # Annotate the output metadata with completeness information |
2005 | | - # Get the summary dataframe, grouped by strategy_type |
| 2006 | + # Get the summary dataframe, filtered to the BaselineLivelihoodActivities and grouped by strategy_type |
2006 | 2007 | summary_df = pd.DataFrame(reported_summary_output.value["LivelihoodActivity"]) |
| 2008 | + summary_df = summary_df[ |
| 2009 | + (summary_df["scenario"] == LivelihoodActivityScenario.BASELINE) # Baseline activities only |
| 2010 | + & summary_df["wealth_group"].apply(lambda x: x[3] == "") # Baseline-level activities, not community ones. |
| 2011 | + ] |
2007 | 2012 | for col in ["income", "expenditure", "percentage_kcals", "kcals_consumed"]: |
2008 | 2013 | summary_df[col] = pd.to_numeric(summary_df[col], errors="coerce").fillna(0) |
| 2014 | + summary_df["wealth_group_category"] = summary_df["wealth_group"].apply(lambda x: x[2]) |
2009 | 2015 | summary_df = ( |
2010 | | - summary_df[["strategy_type", "income", "expenditure", "percentage_kcals", "kcals_consumed"]] |
2011 | | - .groupby("strategy_type") |
| 2016 | + summary_df[ |
| 2017 | + [ |
| 2018 | + "strategy_type", |
| 2019 | + "wealth_group_category", |
| 2020 | + "income", |
| 2021 | + "expenditure", |
| 2022 | + "percentage_kcals", |
| 2023 | + "kcals_consumed", |
| 2024 | + ] |
| 2025 | + ] |
| 2026 | + .groupby(["strategy_type", "wealth_group_category"]) |
2012 | 2027 | .sum() |
2013 | 2028 | ) |
2014 | 2029 |
|
2015 | | - # Add the recognized Livelihood Activities, also grouped by strategy_type |
| 2030 | + # Add the recognized Livelihood Activities, also filtered to the BaselineLivelihoodActivities and grouped by strategy_type |
2016 | 2031 | recognized_activities_df = pd.DataFrame(output.value["LivelihoodActivity"]) |
2017 | | - for column in ["income", "expenditure", "kcals_consumed"]: |
| 2032 | + recognized_activities_df = recognized_activities_df[ |
| 2033 | + (recognized_activities_df["scenario"] == LivelihoodActivityScenario.BASELINE) |
| 2034 | + & recognized_activities_df["wealth_group"].apply( |
| 2035 | + lambda x: x[3] == "" |
| 2036 | + ) # Baseline-level activities, not community ones. |
| 2037 | + ] |
| 2038 | + for column in ["income", "expenditure", "percentage_kcals", "kcals_consumed"]: |
2018 | 2039 | if column in recognized_activities_df: |
2019 | 2040 | recognized_activities_df[column] = pd.to_numeric( |
2020 | 2041 | recognized_activities_df[column], errors="coerce" |
2021 | 2042 | ).fillna(0) |
2022 | 2043 | else: |
2023 | 2044 | recognized_activities_df[column] = 0 |
| 2045 | + recognized_activities_df["wealth_group_category"] = recognized_activities_df["wealth_group"].apply( |
| 2046 | + lambda x: x[2] |
| 2047 | + ) |
| 2048 | + recognized_activities_df = ( |
| 2049 | + recognized_activities_df[ |
| 2050 | + [ |
| 2051 | + "strategy_type", |
| 2052 | + "wealth_group_category", |
| 2053 | + "income", |
| 2054 | + "expenditure", |
| 2055 | + "percentage_kcals", |
| 2056 | + "kcals_consumed", |
| 2057 | + ] |
| 2058 | + ] |
| 2059 | + .groupby(["strategy_type", "wealth_group_category"]) |
| 2060 | + .sum() |
| 2061 | + ) |
| 2062 | + |
2024 | 2063 | summary_df = summary_df.join( |
2025 | | - recognized_activities_df[["strategy_type", "income", "expenditure", "kcals_consumed"]] |
2026 | | - .groupby("strategy_type") |
2027 | | - .sum(), |
2028 | | - on="strategy_type", |
| 2064 | + recognized_activities_df, |
| 2065 | + on=["strategy_type", "wealth_group_category"], |
2029 | 2066 | lsuffix="_summary", |
2030 | 2067 | rsuffix="_recognized", |
2031 | 2068 | ).fillna(0) |
2032 | 2069 |
|
2033 | | - # Add a totals row at the end |
2034 | | - summary_df.loc["Total"] = summary_df.sum(numeric_only=True) |
2035 | | - |
2036 | | - # Add completeness percentages |
2037 | | - summary_df = summary_df.round(0) |
2038 | | - summary_df["income_completeness"] = summary_df.apply( |
2039 | | - lambda row: ( |
2040 | | - round(row["income_recognized"] / row["income_summary"] * 100, 1) |
2041 | | - if row["income_summary"] > 0 |
2042 | | - else pd.NA |
2043 | | - ), |
2044 | | - axis=1, |
2045 | | - ) |
2046 | | - summary_df["expenditure_completeness"] = summary_df.apply( |
2047 | | - lambda row: ( |
2048 | | - round(row["expenditure_recognized"] / row["expenditure_summary"] * 100, 1) |
2049 | | - if row["expenditure_summary"] > 0 |
2050 | | - else pd.NA |
2051 | | - ), |
2052 | | - axis=1, |
2053 | | - ) |
2054 | | - summary_df["kcals_consumed_completeness"] = summary_df.apply( |
2055 | | - lambda row: ( |
2056 | | - round(row["kcals_consumed_recognized"] / row["kcals_consumed_summary"] * 100, 1) |
2057 | | - if row["kcals_consumed_summary"] > 0 |
2058 | | - else pd.NA |
2059 | | - ), |
2060 | | - axis=1, |
2061 | | - ) |
2062 | | - # Format the numbers as integers, for better display in the markdown table |
2063 | | - for column in ["income", "expenditure", "kcals_consumed"]: |
2064 | | - for source in ["recognized", "summary"]: |
2065 | | - summary_df[f"{column}_{source}"] = summary_df.apply( |
2066 | | - lambda row: ( |
2067 | | - int(row[f"{column}_{source}"]) |
2068 | | - if (pd.notna(row[f"{column}_recognized"]) and row[f"{column}_recognized"] > 0) |
2069 | | - or (pd.notna(row[f"{column}_summary"]) and row[f"{column}_summary"] > 0) |
2070 | | - else pd.NA |
2071 | | - ), |
2072 | | - axis="columns", |
2073 | | - ) |
2074 | | - |
2075 | | - # Transpose and reorder the columns and rows |
2076 | | - # Sort the rows so that Strategy Types appear in the same order as in the BSS |
| 2070 | + # Sort the rows so that Strategy Types and Wealth Group Categories appear in the same order as in the BSS |
| 2071 | + summary_df = summary_df.reset_index(drop=False) |
2077 | 2072 | ordered_strategy_types = ["LivestockProduction"] + [x for x in LivelihoodStrategyType] + ["Total"] |
2078 | 2073 | summary_df["strategy_type"] = pd.Categorical( |
2079 | | - summary_df.reset_index(drop=False)["strategy_type"], |
| 2074 | + summary_df["strategy_type"], |
2080 | 2075 | categories=ordered_strategy_types, |
2081 | 2076 | ordered=True, |
2082 | 2077 | ) |
2083 | | - summary_df = summary_df.reset_index(drop=True).sort_values(by="strategy_type") |
2084 | | - summary_df = summary_df[ |
2085 | | - [ |
2086 | | - "strategy_type", |
2087 | | - "kcals_consumed_recognized", |
2088 | | - "kcals_consumed_summary", |
2089 | | - "kcals_consumed_completeness", |
2090 | | - "income_recognized", |
2091 | | - "income_summary", |
2092 | | - "income_completeness", |
2093 | | - "expenditure_recognized", |
2094 | | - "expenditure_summary", |
2095 | | - "expenditure_completeness", |
| 2078 | + ordered_wealth_group_categories = list( |
| 2079 | + WealthGroupCategory.objects.all().order_by("ordering").values_list("code", flat=True) |
| 2080 | + ) + ["All"] |
| 2081 | + summary_df["wealth_group_category"] = pd.Categorical( |
| 2082 | + summary_df["wealth_group_category"], |
| 2083 | + categories=ordered_wealth_group_categories, |
| 2084 | + ordered=True, |
| 2085 | + ) |
| 2086 | + summary_df = summary_df.sort_values(by=["strategy_type", "wealth_group_category"]) |
| 2087 | + |
| 2088 | + completeness_dfs = {} |
| 2089 | + for column in ["income", "expenditure", "percentage_kcals", "kcals_consumed"]: |
| 2090 | + completeness_df = summary_df[ |
| 2091 | + ["strategy_type", "wealth_group_category", f"{column}_recognized", f"{column}_summary"] |
| 2092 | + ] |
| 2093 | + completeness_df.columns = ["strategy_type", "wealth_group_category", "recognized", "summary"] |
| 2094 | + |
| 2095 | + # Ignore irrelevant strategy types |
| 2096 | + completeness_df = completeness_df[ |
| 2097 | + completeness_df["strategy_type"].isin( |
| 2098 | + completeness_df.groupby("strategy_type")[["recognized", "summary"]] |
| 2099 | + .sum() |
| 2100 | + .sum(axis=1) |
| 2101 | + .loc[lambda x: x > 0] |
| 2102 | + .index |
| 2103 | + ) |
2096 | 2104 | ] |
2097 | | - ] |
2098 | | - summary_df = summary_df.set_index("strategy_type").transpose() |
| 2105 | + |
| 2106 | + # Add a total row |
| 2107 | + completeness_df = completeness_df.set_index(["strategy_type", "wealth_group_category"]) |
| 2108 | + completeness_df = completeness_df.unstack() |
| 2109 | + completeness_df.loc["Total"] = completeness_df.sum() |
| 2110 | + completeness_df = completeness_df.stack() |
| 2111 | + # Add the difference |
| 2112 | + completeness_df["unrecognized"] = completeness_df["summary"] - completeness_df["recognized"] |
| 2113 | + # Add the completeness percentage |
| 2114 | + completeness_df[f"{column}_completeness"] = completeness_df.apply( |
| 2115 | + lambda row: (round(row["recognized"] / row["summary"] * 100, 1) if row["summary"] > 0 else pd.NA), |
| 2116 | + axis=1, |
| 2117 | + ) |
| 2118 | + # Format the numbers as integers, for better display in the markdown table |
| 2119 | + if column in ["income", "expenditure", "kcals_consumed"]: |
| 2120 | + for source in ["recognized", "summary", "unrecognized"]: |
| 2121 | + completeness_df[source] = completeness_df.apply( |
| 2122 | + lambda row: ( |
| 2123 | + int(row[source]) |
| 2124 | + if (pd.notna(row["recognized"]) and row["recognized"] > 0) |
| 2125 | + or (pd.notna(row["summary"]) and row["summary"] > 0) |
| 2126 | + else pd.NA |
| 2127 | + ), |
| 2128 | + axis=1, |
| 2129 | + ) |
| 2130 | + else: |
| 2131 | + # Format percentage_kcals as percentages. |
| 2132 | + for source in ["recognized", "summary", "unrecognized"]: |
| 2133 | + completeness_df[source] = (completeness_df[source] * 100).round(1).replace(-0.0, 0.0) |
| 2134 | + |
| 2135 | + completeness_dfs[column] = completeness_df |
2099 | 2136 |
|
2100 | 2137 | # Add the completeness summary to the output metadata |
2101 | | - output.metadata["pct_kcals_consumed_recognized"] = float( |
2102 | | - summary_df.loc["kcals_consumed_completeness", "Total"] |
2103 | | - ) |
2104 | | - output.metadata["pct_income_recognized"] = float(summary_df.loc["income_completeness", "Total"]) |
2105 | | - output.metadata["pct_expenditure_recognized"] = float(summary_df.loc["expenditure_completeness", "Total"]) |
2106 | | - output.metadata["completeness_summary"] = MetadataValue.md( |
2107 | | - summary_df.replace(pd.NA, None).to_markdown(floatfmt=",.0f") |
2108 | | - ) |
| 2138 | + def get_overall_recognized_percentage(metric: str) -> int: |
| 2139 | + recognized_total = completeness_dfs[metric].loc["Total", "recognized"].sum() |
| 2140 | + summary_total = completeness_dfs[metric].loc["Total", "summary"].sum() |
| 2141 | + return round(recognized_total / summary_total * 100) if summary_total > 0 else 0 |
| 2142 | + |
| 2143 | + output.metadata["pct_income_recognized"] = get_overall_recognized_percentage("income") |
| 2144 | + output.metadata["pct_expenditure_recognized"] = get_overall_recognized_percentage("expenditure") |
| 2145 | + output.metadata["pct_kcals_consumed_recognized"] = get_overall_recognized_percentage("kcals_consumed") |
| 2146 | + completeness_summary = "### Completeness of recognized Livelihood Activities compared to the summary for Baseline-level activities\n" |
| 2147 | + for column in ["income", "expenditure", "percentage_kcals", "kcals_consumed"]: |
| 2148 | + completeness_summary += f"<details>\n\n<summary>{column.replace('_', ' ').title()}</summary>\n\n" |
| 2149 | + completeness_summary += f"#### {column.replace('_', ' ').title()}\n\n" |
| 2150 | + completeness_summary += ( |
| 2151 | + completeness_dfs[column] |
| 2152 | + .replace(pd.NA, None) |
| 2153 | + .reset_index() |
| 2154 | + .to_markdown(index=False, floatfmt=",.1f", intfmt=",") |
| 2155 | + ) |
| 2156 | + completeness_summary += "\n\n</details>\n\n" |
| 2157 | + output.metadata["completeness_summary"] = MetadataValue.md(completeness_summary) |
2109 | 2158 |
|
2110 | 2159 | # Move the preview and errors metadata item to the end of the dict |
2111 | 2160 | if "errors" in output.metadata: |
|
0 commit comments