Skip to content

Commit 646cb24

Browse files
committed
Made changes to the variable names
1 parent 4153081 commit 646cb24

File tree

2 files changed

+91
-39
lines changed

2 files changed

+91
-39
lines changed

scripts/2-process/smithsonian_process.py

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def process_totals_by_units(args, count_data):
8686

8787
data[unit] = total_objects
8888

89-
data = pd.DataFrame(data.items(), columns=["Unit", "Count"])
89+
data = pd.DataFrame(data.items(), columns=["Unit", "Total_objects"])
9090
data.sort_values("Unit", ascending=True, inplace=True)
9191
data.reset_index(drop=True, inplace=True)
9292
file_path = shared.path_join(
@@ -104,43 +104,43 @@ def process_totals_by_records(args, count_data):
104104

105105
for row in count_data.itertuples(index=False):
106106
unit = str(row.UNIT)
107-
cc0_records = int(row.CC0_RECORDS)
108-
cc0_records_with_cc0_media = int(row.CC0_RECORDS_WITH_CC0_MEDIA)
107+
CC0_records = int(row.CC0_RECORDS)
108+
CC0_records_with_CC0_media = int(row.CC0_RECORDS_WITH_CC0_MEDIA)
109109
total_objects = int(row.TOTAL_OBJECTS)
110110

111-
if cc0_records == 0 and cc0_records_with_cc0_media == 0:
111+
if CC0_records == 0 and CC0_records_with_CC0_media == 0:
112112
continue
113113

114114
if unit not in data:
115115
data[unit] = {
116-
"CC0_RECORDS": 0,
117-
"CC0_RECORDS_WITH_CC0_MEDIA": 0,
118-
"TOTAL_OBJECTS": 0,
116+
"CC0_records": 0,
117+
"CC0_records_with_CC0_media": 0,
118+
"Total_objects": 0,
119119
}
120120

121-
data[unit]["CC0_RECORDS"] += cc0_records
122-
data[unit]["CC0_RECORDS_WITH_CC0_MEDIA"] += cc0_records_with_cc0_media
123-
data[unit]["TOTAL_OBJECTS"] += total_objects
121+
data[unit]["CC0_records"] += CC0_records
122+
data[unit]["CC0_records_with_CC0_media"] += CC0_records_with_CC0_media
123+
data[unit]["Total_objects"] += total_objects
124124

125125
data = (
126126
pd.DataFrame.from_dict(data, orient="index")
127127
.reset_index()
128128
.rename(columns={"index": "Unit"})
129129
)
130-
data["CC0_WITHOUT_MEDIA_PERCENTAGE"] = (
130+
data["CC0_without_media_percentage"] = (
131131
(
132-
(data["CC0_RECORDS"] - data["CC0_RECORDS_WITH_CC0_MEDIA"])
133-
/ data["TOTAL_OBJECTS"]
132+
(data["CC0_records"] - data["CC0_records_with_CC0_media"])
133+
/ data["Total_objects"]
134134
)
135135
* 100
136136
).round(2)
137137

138-
data["CC0_WITH_MEDIA_PERCENTAGE"] = (
139-
(data["CC0_RECORDS_WITH_CC0_MEDIA"] / data["TOTAL_OBJECTS"]) * 100
138+
data["CC0_with_media_percentage"] = (
139+
(data["CC0_records_with_CC0_media"] / data["Total_objects"]) * 100
140140
).round(2)
141141

142-
data["OTHERS_PERCENTAGE"] = (
143-
((data["TOTAL_OBJECTS"] - data["CC0_RECORDS"]) / data["TOTAL_OBJECTS"])
142+
data["Others_percentage"] = (
143+
((data["Total_objects"] - data["CC0_records"]) / data["Total_objects"])
144144
* 100
145145
).round(2)
146146

scripts/3-report/smithsonian_report.py

Lines changed: 74 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,12 @@ def smithsonian_intro(args):
105105
)
106106
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
107107
data = shared.open_data_file(LOGGER, file_path)
108-
total_objects = data["TOTAL_OBJECTS"].sum()
109-
cc0_records = data["CC0_RECORDS"].sum()
110-
cc0_records_with_media = data["CC0_RECORDS_WITH_CC0_MEDIA"].sum()
111-
cc0_media_percentage = f"{data['CC0_WITH_MEDIA_PERCENTAGE'].mean():.2f}%"
108+
total_objects = data["Total_objects"].sum()
109+
CC0_records = data["CC0_records"].sum()
110+
CC0_records_with_media = data["CC0_records_with_CC0_media"].sum()
111+
CC0_media_percentage = f"{data['CC0_with_media_percentage'].mean():.2f}%"
112112
num_units = len(data)
113-
min_unit = data["TOTAL_OBJECTS"].min()
113+
min_unit = data["Total_objects"].min()
114114
shared.update_readme(
115115
args,
116116
SECTION_FILE,
@@ -123,33 +123,33 @@ def smithsonian_intro(args):
123123
" It serves as the main legal tool used by Smithsonian."
124124
"\n"
125125
f"The results indicate a total record of {total_objects} objects,"
126-
f" with a breakdown of {cc0_records} objects without CC0 Media and"
127-
f" {cc0_records_with_media} objects with CC0 Media, taking a"
128-
f" percentage of {cc0_media_percentage} in each unit."
126+
f" with a breakdown of {CC0_records} objects without CC0 Media and"
127+
f" {CC0_records_with_media} objects with CC0 Media, taking a"
128+
f" percentage of {CC0_media_percentage} in each unit."
129129
f" There are {num_units} unique units in the data"
130130
" representing museums, libraries, zoos and many other"
131131
f" with a minimum of {min_unit} objects.",
132132
)
133133

134134

135-
def plot_totals_by_units(args):
135+
def plot_totals_by_top10_units(args):
136136
"""
137-
Create plots showing totals by units
137+
Create plots showing totals by top 10 units
138138
"""
139-
LOGGER.info(plot_totals_by_units.__doc__.strip())
139+
LOGGER.info(plot_totals_by_top10_units.__doc__.strip())
140140
file_path = shared.path_join(
141141
PATHS["data_2-process"],
142-
"smithsonian_totals_by_records.csv",
142+
"smithsonian_totals_by_units.csv",
143143
)
144144
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
145145
name_label = "Unit"
146-
data_label = "TOTAL_OBJECTS"
146+
data_label = "Total_objects"
147147
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
148-
data["TOTAL_OBJECTS"] = data["TOTAL_OBJECTS"].astype(int)
148+
data["Total_objects"] = data["Total_objects"].astype(int)
149149
data.sort_values(data_label, ascending=True, inplace=True)
150-
average_unit = data["TOTAL_OBJECTS"].mean()
151150
data = data.head(10)
152-
title = "Totals by Units"
151+
average_unit = data["Total_objects"].mean()
152+
title = "Top 10 Units"
153153
plt = plot.combined_plot(
154154
args=args,
155155
data=data,
@@ -178,7 +178,57 @@ def plot_totals_by_units(args):
178178
"This shows the distribution of top 10"
179179
" units/ sub providers across smithsonian"
180180
f" with an average of {average_unit} objects"
181-
" across the sub providers.",
181+
" across the top 10 sub providers.",
182+
)
183+
184+
185+
def plot_totals_by_lowest10_units(args):
186+
"""
187+
Create plots showing totals by lowest 10 units
188+
"""
189+
LOGGER.info(plot_totals_by_lowest10_units.__doc__.strip())
190+
file_path = shared.path_join(
191+
PATHS["data_2-process"],
192+
"smithsonian_totals_by_units.csv",
193+
)
194+
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
195+
name_label = "Unit"
196+
data_label = "Total_objects"
197+
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
198+
data["Total_objects"] = data["Total_objects"].astype(int)
199+
data.sort_values(data_label, ascending=True, inplace=True)
200+
data = data.tail(10)
201+
average_unit = data["Total_objects"].mean()
202+
title = "Totals by Units"
203+
plt = plot.combined_plot(
204+
args=args,
205+
data=data,
206+
title=title,
207+
name_label=name_label,
208+
data_label=data_label,
209+
)
210+
211+
image_path = shared.path_join(
212+
PATHS["data_phase"], "smithsonian_totals_by_unit.png"
213+
)
214+
LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
215+
216+
if args.enable_save:
217+
# Create the directory if it does not exist
218+
os.makedirs(PATHS["data_phase"], exist_ok=True)
219+
plt.savefig(image_path)
220+
221+
shared.update_readme(
222+
args,
223+
SECTION_FILE,
224+
SECTION_TITLE,
225+
title,
226+
image_path,
227+
"Plots showing totals by units.",
228+
"This shows the distribution of lowest 10"
229+
" units/ sub providers across smithsonian"
230+
f" with an average of {average_unit} objects"
231+
" across the lowest 10 sub providers.",
182232
)
183233

184234

@@ -194,9 +244,9 @@ def plot_totals_by_records(args):
194244
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
195245
name_label = "Unit"
196246
stack_labels = [
197-
"CC0_WITHOUT_MEDIA_PERCENTAGE",
198-
"CC0_WITH_MEDIA_PERCENTAGE",
199-
"OTHERS_PERCENTAGE",
247+
"CC0_without_media_percentage",
248+
"CC0_with_media_percentage",
249+
"Others_percentage",
200250
]
201251
data = shared.open_data_file(LOGGER, file_path, index_col=name_label)
202252
data = data.head(10)
@@ -225,7 +275,8 @@ def plot_totals_by_records(args):
225275
image_path,
226276
"Plots showing totals by CC0 records.",
227277
"This is the breakdown of CC0 records"
228-
" without media and CC0 records with media.",
278+
" without media, CC0 records with media and records"
279+
" that are not associated with CC0.",
229280
)
230281

231282

@@ -238,7 +289,8 @@ def main():
238289
)
239290
shared.check_completion_file_exists(args, last_entry)
240291
smithsonian_intro(args)
241-
plot_totals_by_units(args)
292+
plot_totals_by_top10_units(args)
293+
plot_totals_by_lowest10_units(args)
242294
plot_totals_by_records(args)
243295

244296
# Add and commit changes

0 commit comments

Comments
 (0)