@@ -111,25 +111,25 @@ def smithsonian_intro(args):
111111 CC0_records_with_media = data ["CC0_records_with_CC0_media" ].sum ()
112112 CC0_media_percentage = f"{ data ['CC0_with_media_percentage' ].mean ():.2f} %"
113113 num_units = len (data )
114- min_unit = data ["Total_objects" ].min ()
114+ min_object = data ["Total_objects" ].min ()
115115 shared .update_readme (
116116 args ,
117117 SECTION_FILE ,
118118 SECTION_TITLE ,
119119 "Overview" ,
120120 None ,
121121 None ,
122- "The Smithsonian data returns the overall "
122+ "The Smithsonian Institute data returns the overall"
123123 " statistics of CC0 legal tool records."
124- " It serves as the main legal tool used by Smithsonian."
124+ " It serves as the main legal tool used by Smithsonian Institute ."
125125 "\n "
126- f"The results indicate a total record of { total_objects } objects,"
127- f" with a breakdown of { CC0_records } objects without CC0 Media and"
128- f" { CC0_records_with_media } objects with CC0 Media, taking a"
129- f" percentage of { CC0_media_percentage } in each unit ."
126+ f"The results indicate a total record of { total_objects :, } objects,"
127+ f" with a breakdown of { CC0_records :, } objects without CC0 Media and"
128+ f" { CC0_records_with_media :, } objects with CC0 Media, taking a"
129+ f" percentage of { CC0_media_percentage } in each institute member ."
130130 f" There are { num_units } unique units in the data"
131- " representing museums, libraries, zoos and many other"
132- f" with a minimum of { min_unit } objects." ,
131+ " representing museums, libraries, zoos and other institutions "
132+ f" with a minimum of { min_object } objects." ,
133133 )
134134
135135
@@ -143,20 +143,21 @@ def plot_totals_by_top10_units(args):
143143 "smithsonian_totals_by_units.csv" ,
144144 )
145145 LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
146- name_label = "Unit "
146+ name_label = "Data_source "
147147 data_label = "Total_objects"
148148 data = shared .open_data_file (LOGGER , file_path , index_col = name_label )
149149 data ["Total_objects" ] = data ["Total_objects" ].astype (int )
150150 data .sort_values (data_label , ascending = True , inplace = True )
151151 data = data .tail (10 )
152152 average_unit = data ["Total_objects" ].mean ()
153- title = "Top 10 Units"
153+ title = "Totals by 10 Units"
154154 plt = plot .combined_plot (
155155 args = args ,
156156 data = data ,
157157 title = title ,
158158 name_label = name_label ,
159159 data_label = data_label ,
160+ bar_ylabel = "Data Sources" ,
160161 )
161162
162163 image_path = shared .path_join (
@@ -175,11 +176,11 @@ def plot_totals_by_top10_units(args):
175176 SECTION_TITLE ,
176177 title ,
177178 image_path ,
178- "Plots showing totals by units." ,
179- "This shows the distribution of top 10"
180- " units/ sub providers across smithsonian "
181- f" with an average of { average_unit } objects"
182- " across the top 10 sub providers ." ,
179+ "Plots showing totals by units. This shows the"
180+ " distribution of top 10 institute member across "
181+ " Smithsonian Institute with an average of "
182+ f" { average_unit :, } objects across the top 10 "
183+ " Institute members ." ,
183184 )
184185
185186
@@ -193,7 +194,7 @@ def plot_totals_by_lowest10_units(args):
193194 "smithsonian_totals_by_units.csv" ,
194195 )
195196 LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
196- name_label = "Unit "
197+ name_label = "Data_source "
197198 data_label = "Total_objects"
198199 data = shared .open_data_file (LOGGER , file_path , index_col = name_label )
199200 data ["Total_objects" ] = data ["Total_objects" ].astype (int )
@@ -207,6 +208,7 @@ def plot_totals_by_lowest10_units(args):
207208 title = title ,
208209 name_label = name_label ,
209210 data_label = data_label ,
211+ bar_ylabel = "Data Sources" ,
210212 )
211213
212214 image_path = shared .path_join (
@@ -227,40 +229,94 @@ def plot_totals_by_lowest10_units(args):
227229 image_path ,
228230 "Plots showing totals by units." ,
229231 "This shows the distribution of lowest 10"
230- " units/ sub providers across smithsonian "
232+ " institute member across Smithsonian Institute "
231233 f" with an average of { average_unit } objects"
232- " across the lowest 10 sub providers ." ,
234+ " across the lowest 10 institute members ." ,
233235 )
234236
235237
236- def plot_totals_by_records (args ):
238+ def plot_totals_by_top10_unit_records (args ):
237239 """
238- Create plots showing totals by records
240+ Create plots showing breakdown of CC0 records by top 10 units
239241 """
240- LOGGER .info (plot_totals_by_records .__doc__ .strip ())
242+ LOGGER .info (plot_totals_by_top10_unit_records .__doc__ .strip ())
241243 file_path = shared .path_join (
242244 PATHS ["data_2-process" ],
243245 "smithsonian_totals_by_records.csv" ,
244246 )
245247 LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
246- name_label = "Unit"
248+ name_label = "Data_source"
249+ data_label = "Total_objects"
247250 stack_labels = [
248251 "CC0_without_media_percentage" ,
249252 "CC0_with_media_percentage" ,
250253 "Others_percentage" ,
251254 ]
252255 data = shared .open_data_file (LOGGER , file_path , index_col = name_label )
256+ data .sort_values (data_label , ascending = True , inplace = True )
257+ data = data .tail (10 )
258+ title = "Breakdown of CC0 records by top 10 units"
259+ plt = plot .stacked_barh_plot (
260+ args = args ,
261+ data = data ,
262+ title = title ,
263+ name_label = name_label ,
264+ stack_labels = stack_labels ,
265+ ylabel = "Data Sources" ,
266+ )
267+ image_path = shared .path_join (
268+ PATHS ["data_phase" ], "smithsonian_by_top10_unit_records.png"
269+ )
270+ LOGGER .info (f"image file: { image_path .replace (PATHS ['repo' ], '.' )} " )
271+ if args .enable_save :
272+ # Create the directory if it does not exist
273+ os .makedirs (PATHS ["data_phase" ], exist_ok = True )
274+ plt .savefig (image_path )
275+
276+ shared .update_readme (
277+ args ,
278+ SECTION_FILE ,
279+ SECTION_TITLE ,
280+ title ,
281+ image_path ,
282+ "Plots showing totals by CC0 records. This is the"
283+ " top 10 units with a breakdown of CC0 records"
284+ " without media, CC0 records with media and records"
285+ " that are not associated with CC0." ,
286+ )
287+
288+
289+ def plot_totals_by_lowest10_unit_records (args ):
290+ """
291+ Create plots showing breakdown of CC0 records by lowest 10 units
292+ """
293+ LOGGER .info (plot_totals_by_lowest10_unit_records .__doc__ .strip ())
294+ file_path = shared .path_join (
295+ PATHS ["data_2-process" ],
296+ "smithsonian_totals_by_records.csv" ,
297+ )
298+ LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
299+ name_label = "Data_source"
300+ data_label = "Total_objects"
301+ stack_labels = [
302+ "CC0_without_media_percentage" ,
303+ "CC0_with_media_percentage" ,
304+ "Others_percentage" ,
305+ ]
306+ data = shared .open_data_file (LOGGER , file_path , index_col = name_label )
307+ data .sort_values (data_label , ascending = True , inplace = True )
253308 data = data .head (10 )
254- title = "Totals by records"
309+ title = "Breakdown of CC0 records by lowest 10 units "
255310 plt = plot .stacked_barh_plot (
256311 args = args ,
257312 data = data ,
258313 title = title ,
259314 name_label = name_label ,
260315 stack_labels = stack_labels ,
316+ ylabel = "Data Sources" ,
261317 )
262318 image_path = shared .path_join (
263- PATHS ["data_phase" ], "smithsonian_by_records .png"
319+ PATHS ["data_phase" ], "smithsonian_by_lowest10_unit_records .png"
264320 )
265321 LOGGER .info (f"image file: { image_path .replace (PATHS ['repo' ], '.' )} " )
266322 if args .enable_save :
@@ -274,8 +330,8 @@ def plot_totals_by_records(args):
274330 SECTION_TITLE ,
275331 title ,
276332 image_path ,
277- "Plots showing totals by CC0 records." ,
278- "This is the breakdown of CC0 records"
333+ "Plots showing totals by CC0 records. This is the"
334+ " lowest 10 units with a breakdown of CC0 records"
279335 " without media, CC0 records with media and records"
280336 " that are not associated with CC0." ,
281337 )
@@ -292,7 +348,8 @@ def main():
292348 smithsonian_intro (args )
293349 plot_totals_by_top10_units (args )
294350 plot_totals_by_lowest10_units (args )
295- plot_totals_by_records (args )
351+ plot_totals_by_top10_unit_records (args )
352+ plot_totals_by_lowest10_unit_records (args )
296353
297354 # Add and commit changes
298355 args = shared .git_add_and_commit (
0 commit comments