@@ -105,12 +105,12 @@ def smithsonian_intro(args):
105105 )
106106 LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
107107 data = shared .open_data_file (LOGGER , file_path )
108- total_objects = data ["TOTAL_OBJECTS " ].sum ()
109- cc0_records = data ["CC0_RECORDS " ].sum ()
110- cc0_records_with_media = data ["CC0_RECORDS_WITH_CC0_MEDIA " ].sum ()
111- cc0_media_percentage = f"{ data ['CC0_WITH_MEDIA_PERCENTAGE ' ].mean ():.2f} %"
108+ total_objects = data ["Total_objects " ].sum ()
109+ CC0_records = data ["CC0_records " ].sum ()
110+ CC0_records_with_media = data ["CC0_records_with_CC0_media " ].sum ()
111+ CC0_media_percentage = f"{ data ['CC0_with_media_percentage ' ].mean ():.2f} %"
112112 num_units = len (data )
113- min_unit = data ["TOTAL_OBJECTS " ].min ()
113+ min_unit = data ["Total_objects " ].min ()
114114 shared .update_readme (
115115 args ,
116116 SECTION_FILE ,
@@ -123,33 +123,33 @@ def smithsonian_intro(args):
123123 " It serves as the main legal tool used by Smithsonian."
124124 "\n "
125125 f"The results indicate a total record of { total_objects } objects,"
126- f" with a breakdown of { cc0_records } objects without CC0 Media and"
127- f" { cc0_records_with_media } objects with CC0 Media, taking a"
128- f" percentage of { cc0_media_percentage } in each unit."
126+ f" with a breakdown of { CC0_records } objects without CC0 Media and"
127+ f" { CC0_records_with_media } objects with CC0 Media, taking a"
128+ f" percentage of { CC0_media_percentage } in each unit."
129129 f" There are { num_units } unique units in the data"
130130 " representing museums, libraries, zoos and many other"
131131 f" with a minimum of { min_unit } objects." ,
132132 )
133133
134134
135- def plot_totals_by_units (args ):
135+ def plot_totals_by_top10_units (args ):
136136 """
137- Create plots showing totals by units
137+ Create plots showing totals by top 10 units
138138 """
139- LOGGER .info (plot_totals_by_units .__doc__ .strip ())
139+ LOGGER .info (plot_totals_by_top10_units .__doc__ .strip ())
140140 file_path = shared .path_join (
141141 PATHS ["data_2-process" ],
142- "smithsonian_totals_by_records .csv" ,
142+ "smithsonian_totals_by_units .csv" ,
143143 )
144144 LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
145145 name_label = "Unit"
146- data_label = "TOTAL_OBJECTS "
146+ data_label = "Total_objects "
147147 data = shared .open_data_file (LOGGER , file_path , index_col = name_label )
148- data ["TOTAL_OBJECTS " ] = data ["TOTAL_OBJECTS " ].astype (int )
148+ data ["Total_objects " ] = data ["Total_objects " ].astype (int )
149149 data .sort_values (data_label , ascending = True , inplace = True )
150- average_unit = data ["TOTAL_OBJECTS" ].mean ()
151150 data = data .head (10 )
152- title = "Totals by Units"
151+ average_unit = data ["Total_objects" ].mean ()
152+ title = "Top 10 Units"
153153 plt = plot .combined_plot (
154154 args = args ,
155155 data = data ,
@@ -178,7 +178,57 @@ def plot_totals_by_units(args):
178178 "This shows the distribution of top 10"
179179 " units/ sub providers across smithsonian"
180180 f" with an average of { average_unit } objects"
181- " across the sub providers." ,
181+ " across the top 10 sub providers." ,
182+ )
183+
184+
185+ def plot_totals_by_lowest10_units (args ):
186+ """
187+ Create plots showing totals by lowest 10 units
188+ """
189+ LOGGER .info (plot_totals_by_lowest10_units .__doc__ .strip ())
190+ file_path = shared .path_join (
191+ PATHS ["data_2-process" ],
192+ "smithsonian_totals_by_units.csv" ,
193+ )
194+ LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
195+ name_label = "Unit"
196+ data_label = "Total_objects"
197+ data = shared .open_data_file (LOGGER , file_path , index_col = name_label )
198+ data ["Total_objects" ] = data ["Total_objects" ].astype (int )
199+ data .sort_values (data_label , ascending = True , inplace = True )
200+ data = data .tail (10 )
201+ average_unit = data ["Total_objects" ].mean ()
202+ title = "Totals by Units"
203+ plt = plot .combined_plot (
204+ args = args ,
205+ data = data ,
206+ title = title ,
207+ name_label = name_label ,
208+ data_label = data_label ,
209+ )
210+
211+ image_path = shared .path_join (
212+ PATHS ["data_phase" ], "smithsonian_totals_by_unit.png"
213+ )
214+ LOGGER .info (f"image file: { image_path .replace (PATHS ['repo' ], '.' )} " )
215+
216+ if args .enable_save :
217+ # Create the directory if it does not exist
218+ os .makedirs (PATHS ["data_phase" ], exist_ok = True )
219+ plt .savefig (image_path )
220+
221+ shared .update_readme (
222+ args ,
223+ SECTION_FILE ,
224+ SECTION_TITLE ,
225+ title ,
226+ image_path ,
227+ "Plots showing totals by units." ,
228+ "This shows the distribution of lowest 10"
229+ " units/ sub providers across smithsonian"
230+ f" with an average of { average_unit } objects"
231+ " across the lowest 10 sub providers." ,
182232 )
183233
184234
@@ -194,9 +244,9 @@ def plot_totals_by_records(args):
194244 LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
195245 name_label = "Unit"
196246 stack_labels = [
197- "CC0_WITHOUT_MEDIA_PERCENTAGE " ,
198- "CC0_WITH_MEDIA_PERCENTAGE " ,
199- "OTHERS_PERCENTAGE " ,
247+ "CC0_without_media_percentage " ,
248+ "CC0_with_media_percentage " ,
249+ "Others_percentage " ,
200250 ]
201251 data = shared .open_data_file (LOGGER , file_path , index_col = name_label )
202252 data = data .head (10 )
@@ -225,7 +275,8 @@ def plot_totals_by_records(args):
225275 image_path ,
226276 "Plots showing totals by CC0 records." ,
227277 "This is the breakdown of CC0 records"
228- " without media and CC0 records with media." ,
278+ " without media, CC0 records with media and records"
279+ " that are not associated with CC0." ,
229280 )
230281
231282
@@ -238,7 +289,8 @@ def main():
238289 )
239290 shared .check_completion_file_exists (args , last_entry )
240291 smithsonian_intro (args )
241- plot_totals_by_units (args )
292+ plot_totals_by_top10_units (args )
293+ plot_totals_by_lowest10_units (args )
242294 plot_totals_by_records (args )
243295
244296 # Add and commit changes
0 commit comments