-
Notifications
You must be signed in to change notification settings - Fork 17
Expand file tree
/
Copy pathallocation_tool.py
More file actions
559 lines (454 loc) · 25.4 KB
/
allocation_tool.py
File metadata and controls
559 lines (454 loc) · 25.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
import os
import numpy as np
import pandas as pd
from osgeo import gdal
from PyQt5.QtCore import QObject, pyqtSignal
import shutil
# GDAL exceptions
gdal.UseExceptions()
class AllocationTool(QObject):
progress_updated = pyqtSignal(int)
def __init__(self):
super(AllocationTool, self).__init__()
self.data_folder = None
def set_working_directory(self, directory):
'''
Set up the working directory
:param directory: your local directory with all dat files
'''
self.data_folder = directory
os.chdir(self.data_folder)
###Step1 Create the Fitting Modeling Region Map###
def image_to_array(self,image):
# Set up a GDAL dataset
in_ds = gdal.Open(image)
# Set up a GDAL band
in_band = in_ds.GetRasterBand(1)
# Create Numpy Array1
arr = in_band.ReadAsArray()
return arr
def array_to_image(self, in_fn, out_fn, data, data_type, nodata=None):
'''
Create image from array
:param in_fn: datasource to copy projection and geotransform from
:param out_fn:path to the file to create
:param data:the NumPy array
:param data_type:output data type
:param nodata:optional NoData value
:return:
'''
in_ds = gdal.Open(in_fn)
output_format = out_fn.split('.')[-1].upper()
if (output_format == 'TIF'):
output_format = 'GTIFF'
elif (output_format == 'RST'):
output_format = 'rst'
driver = gdal.GetDriverByName(output_format)
out_ds = driver.Create(out_fn, in_ds.RasterXSize, in_ds.RasterYSize, 1, data_type, options=["BigTIFF=YES"])
out_band = out_ds.GetRasterBand(1)
out_ds.SetGeoTransform(in_ds.GetGeoTransform())
out_ds.SetProjection(in_ds.GetProjection().encode('utf-8', 'backslashreplace').decode('utf-8'))
if nodata is not None:
out_band.SetNoDataValue(nodata)
out_band.WriteArray(data)
out_band.FlushCache()
out_ds.FlushCache()
return
def tabulation_bin_id_HRP(self, risk30_hrp, municipality, out_fn1):
"""
This function is to create fitting modeling region array(tabulation_bin_id_masked)
and fitting modeling region map(tabulation_bin_image)
:param risk30_hrp: The 30-class vulnerability map for the CAL/HRP
:param municipality: Subdivision image
:param out_fn1: user input
:return: tabulation_bin_id_masked: tabulation bin id array in CAL/HRP
"""
# Convert risk30_hrp to NumPy array
arr1 = self.image_to_array(risk30_hrp)
# Convert municipality to NumPy array2
arr2 = self.image_to_array(municipality)
# Create a mask where the risk30_hrp value larger than 1 reclassed into 1
mask_arr_HRP = np.where(arr1 > 0,1, arr1)
# Calculate tabulation bin id with mask
tabulation_bin_id_masked = np.add(arr1*1000, arr2) * mask_arr_HRP
# Convert the array to signed 16-bit integer (int16) data type
tabulation_bin_id_masked = tabulation_bin_id_masked.astype(np.int16)
# Create the final image using tabulation_bin_image function
self.array_to_image(risk30_hrp, out_fn1, tabulation_bin_id_masked,
gdal.GDT_Int16, -1)
return tabulation_bin_id_masked
###Step2 Calculate the Relative Frequencies###
def create_relative_frequency_table(self, tabulation_bin_id_masked, deforestation_hrp, csv_name):
"""
Create dataframe
:param tabulation_bin_id_masked: array with id and total deforestation
:param deforestation_hrp: Deforestation Map during the CAL/HRP
:return: merged_df: relative frequency dataframe
"""
# Calculate array area of the bin [integer] (in pixels) for Col3 using np.unique and counts function, excluding 0
unique, counts = np.unique(tabulation_bin_id_masked[tabulation_bin_id_masked != 0], return_counts=True)
# Convert to array
arr_counts = np.asarray((unique, counts)).T
# Calculate total deforestation within the bin [integer] array for Col2
arr3 = self.image_to_array(deforestation_hrp)
# deforestation_within_bin will have tabulation_bin_id value in deforestation pixel
deforestation_within_bin = tabulation_bin_id_masked * arr3
# Use np.unique to counts total deforestation in each bin
unique1, counts1 = np.unique(deforestation_within_bin[deforestation_within_bin != 0], return_counts=True)
# Convert to array
arr_counts_deforestion = np.asarray((unique1, counts1)).T
# Create pandas DataFrames
df1 = pd.DataFrame(arr_counts_deforestion, columns=['ID', 'Total Deforestation(pixel)'])
df2 = pd.DataFrame(arr_counts, columns=['ID', 'Area of the Bin(pixel)'])
# Merge the two DataFrames based on the 'id' column using an outer join to include all rows from both DataFrames
merged_df = pd.merge(df1, df2, on='ID', how='outer').fillna(0)
# Calculate Average Deforestation by performing the division operation of col2 and col3 and add a new column to merged_df
merged_df['Average Deforestation(pixel)'] = merged_df.iloc[:, 1].astype(float) / merged_df.iloc[:, 2].astype(float)
# Sort the DataFrame based on the 'ID'
merged_df = merged_df.sort_values(by='ID')
# Reset the index to have consecutive integer indices
merged_df = merged_df.reset_index(drop=True)
csv_file_path = csv_name
merged_df.to_csv(csv_file_path, index=False)
return merged_df
###Step 3 Fitting Phase: create the fitted density map###
def create_fit_density_map(self,risk30_hrp, tabulation_bin_id_masked, merged_df, out_fn2):
'''
Create the fitting density map, this function used for fitting phase (CAL and HRP)
:param risk30_hrp: the 30-class vulnerability map for the CAL/HRP
:param tabulation_bin_id_masked: array for tabulation bin id in fitting Phase
:param merged_df: relative frequency dataframe
:return:
'''
# Insert index=0 row into first row of merged_df DataFrame
new_row = pd.DataFrame({'ID': [0], 'Total Deforestation(pixel)': [0], 'Area of the Bin(pixel)': [0],
'Average Deforestation(pixel)': [0]})
merged_df = pd.concat([new_row, merged_df]).reset_index(drop=True)
# Using numpy.searchsorted() to assign values to 'id'
df_sorted = merged_df.sort_values('ID')
sorted_indices = df_sorted['ID'].searchsorted(tabulation_bin_id_masked)
relative_frequency_arr = tabulation_bin_id_masked[:] = df_sorted['Average Deforestation(pixel)'].values[sorted_indices]
# Calculate areal_resolution_of_map_pixels
in_ds4 = gdal.Open(risk30_hrp)
P1 = in_ds4.GetGeoTransform()[1]
P2 = abs(in_ds4.GetGeoTransform()[5])
areal_resolution_of_map_pixels = P1 * P2 / 10000
# Relative_frequency multiplied by the areal resolution of the map pixels to express the probabilities as densities
fit_density_arr=relative_frequency_arr * areal_resolution_of_map_pixels
# Create the final fit_density_map image using tabulation_bin_image function
self.array_to_image(risk30_hrp, out_fn2, fit_density_arr, gdal.GDT_Float32, -1)
return
###Step 3 Prediction Phase: create the adjusted predicted density map###
def tabulation_bin_id_VP (self, risk30_vp, municipality, out_fn1):
"""
This function is to create modeling region array(tabulation_bin_id_VP_masked)
and modeling region map(tabulation_bin_image_vp)
:param risk30_vp: The 30-class vulnerability map for the CNF/VP
:param municipality: Subdivision image
:param out_fn1: user input
:return: tabulation_bin_id_VP_masked: tabulation bin id array in CNF/VP
"""
# Convert municipality and risk30_vp to NumPy array
arr2 = self.image_to_array(municipality)
arr4 = self.image_to_array(risk30_vp)
# Create a mask where the risk30_hrp value larger than 1 reclassed into 1
mask_arr_VP = np.where(arr4 > 0, 1, arr4)
# Calculate tabulation bin id of CNF/VP with mask
tabulation_bin_id_VP_masked = np.add(arr4 * 1000, arr2) * mask_arr_VP
# Convert the array to signed 16-bit integer (int16) data type
tabulation_bin_id_VP_masked = tabulation_bin_id_VP_masked.astype(np.int16)
#Write image to disk
self.array_to_image(risk30_vp, out_fn1, tabulation_bin_id_VP_masked,
gdal.GDT_Int16, -1)
return tabulation_bin_id_VP_masked
def calculate_prediction_density_arr(self,risk30_vp, tabulation_bin_id_VP_masked, csv):
'''
Calculate the prediction density arry
:param tabulation_bin_id_VP_masked: array for tabulation bin id in CNF/VP
:param csv: relative frequency table
:param risk30_vp: the 30-class vulnerability map for the CNF/VP
:return: prediction_density_arr: modeled deforestation (MD)
'''
# Read Relative Frequency csv file
merged_df=pd.read_csv(csv)
# Insert index=0 row into first row of merged_df DataFrame
new_row = pd.DataFrame({'ID': [0], 'Total Deforestation(pixel)': [0], 'Area of the Bin(pixel)': [0],
'Average Deforestation(pixel)': [0]})
merged_df = pd.concat([new_row, merged_df]).reset_index(drop=True)
# Using numpy.searchsorted() to assign values to 'id'
df_sorted = merged_df.sort_values('ID')
sorted_indices = df_sorted['ID'].searchsorted(tabulation_bin_id_VP_masked)
relative_frequency_arr = tabulation_bin_id_VP_masked[:] = df_sorted['Average Deforestation(pixel)'].values[sorted_indices]
# Calculate areal_resolution_of_map_pixels
in_ds4 = gdal.Open(risk30_vp)
P1 = in_ds4.GetGeoTransform()[1]
P2 = abs(in_ds4.GetGeoTransform()[5])
areal_resolution_of_map_pixels = P1 * P2 / 10000
# Relative_frequency multiplied by the areal resolution of the map pixels to express the probabilities as densities
prediction_density_arr=relative_frequency_arr * areal_resolution_of_map_pixels
return prediction_density_arr
def calculate_adjustment_ratio_cnf(self, prediction_density_arr, deforestation_cnf):
'''
Calculate the Adjustment Ratio (AR) in CNF
:param prediction_density_arr: modeled deforestation (MD)
:param deforestation_cnf: deforestation binary map in cnf
:return: AR
'''
# Sum up the pixels in the prediction density map. This is the modeled deforestation (MD).
MD = np.sum(prediction_density_arr)
# Calculate areal_resolution_of_map_pixels
in_ds5 = gdal.Open(deforestation_cnf)
P1 = in_ds5.GetGeoTransform()[1]
P2 = abs(in_ds5.GetGeoTransform()[5])
areal_resolution_of_map_pixels = P1 * P2 / 10000
# Convert deforestation_cnf to array in ha
arr5 = self.image_to_array(deforestation_cnf)
arr5_ha=arr5 * areal_resolution_of_map_pixels
# Calculate the Actual Deforestation (AD) during the confirmation period
AD = np.sum(arr5_ha)
# AR = AD / MD
AR = AD / MD
return AR
def calculate_adjustment_ratio(self,prediction_density_arr, expected_deforestation):
'''
Calculate the Adjustment Ratio (AR) in VP
:param prediction_density_arr: modeled deforestation (MD)
:param expected_deforestation: user input
:return: AR
'''
# Sum up the pixels in the prediction density map. This is the modeled deforestation (MD).
MD = np.sum(prediction_density_arr)
# AR = ED / MD
AR = expected_deforestation / MD
return AR
def adjusted_prediction_density_array (self, prediction_density_arr, risk30_vp, AR):
'''
Create adjusted prediction density array
:param prediction_density_arr:modeled deforestation (MD)
:param risk30_vp: risk30_vp image
:param AR:Adjustment Ratio
:return: adjusted_prediction_density_np_arr
'''
# Calculate the maximum density
# Calculate areal_resolution_of_map_pixels
in_ds4 = gdal.Open(risk30_vp)
P1 = in_ds4.GetGeoTransform()[1]
P2 = abs(in_ds4.GetGeoTransform()[5])
maximum_density = P1 * P2 / 10000
# Adjusted_Prediction_Density_Map = AR x Prediction_Density _Map
adjusted_prediction_density_arr=AR*prediction_density_arr
# Reclassify all pixels greater than the maximum (e.g., 0.09) to be the maximum
adjusted_prediction_density_arr[adjusted_prediction_density_arr > maximum_density] = maximum_density
return adjusted_prediction_density_arr
def adjusted_prediction_density_map (self, prediction_density_arr, risk30_vp, AR, out_fn2):
'''
Create adjusted prediction density map
:param prediction_density_arr:modeled deforestation (MD)
:param risk30_vp: risk30_vp image
:param AR:Adjustment Ratio
:param out_fn2: user input
:return:
'''
# Calculate the maximum density
# Calculate areal_resolution_of_map_pixels
in_ds4 = gdal.Open(risk30_vp)
P1 = in_ds4.GetGeoTransform()[1]
P2 = abs(in_ds4.GetGeoTransform()[5])
maximum_density = P1 * P2 / 10000
# Adjusted_Prediction_Density_Map = AR x Prediction_Density _Map
adjusted_prediction_density_arr=AR*prediction_density_arr
# Reclassify all pixels greater than the maximum (e.g., 0.09) to be the maximum
adjusted_prediction_density_arr[adjusted_prediction_density_arr > maximum_density] = maximum_density
# Create imagery
self.array_to_image(risk30_vp, out_fn2, adjusted_prediction_density_arr, gdal.GDT_Float32, -1)
return
def replace_ref_system(self, in_fn, out_fn):
'''
RST raster format: correct reference system name in rdc file
:param in_fn: datasource to copy correct projection name
:param out_fn: rst raster file
'''
if out_fn.split('.')[-1] == 'rst':
if in_fn.split('.')[-1] == 'rst':
read_file_name, _ = os.path.splitext(in_fn)
write_file_name, _ = os.path.splitext(out_fn)
temp_file_path = 'rdc_temp.rdc'
with open(read_file_name + '.rdc', 'r') as read_file:
for line in read_file:
if line.startswith("ref. system :"):
correct_name = line
break
if correct_name:
with open(write_file_name + '.rdc', 'r') as read_file, open(temp_file_path, 'w') as write_file:
for line in read_file:
if line.startswith("ref. system :"):
write_file.write(correct_name)
else:
write_file.write(line)
# Move the temp file to replace the original
shutil.move(temp_file_path, write_file_name + '.rdc')
elif in_fn.split('.')[-1] == 'tif':
# Read projection information from the .tif file using GDAL
dataset = gdal.Open(in_fn)
projection = dataset.GetProjection()
dataset = None
# Extract the reference system name from the wkt projection
ref_system_name = projection.split('PROJCS["')[1].split('"')[0]
write_file_name, _ = os.path.splitext(out_fn)
temp_file_path = 'rdc_temp.rdc'
with open(write_file_name + '.rdc', 'r') as read_file, open(temp_file_path, 'w') as write_file:
for line in read_file:
if line.startswith("ref. system :"):
write_file.write(f"ref. system : {ref_system_name}\n")
else:
write_file.write(line)
shutil.move(temp_file_path, write_file_name + '.rdc')
def execute_workflow_fit(self, directory,risk30_hrp,municipality, deforestation_hrp, csv_name, out_fn1, out_fn2):
'''
Create workflow function for CAL and HRP
'''
self.progress_updated.emit(0)
data_folder = self.set_working_directory(directory)
self.progress_updated.emit(10)
tabulation_bin_id_masked = self.tabulation_bin_id_HRP(risk30_hrp,municipality,out_fn1)
self.replace_ref_system(municipality, out_fn1)
self.progress_updated.emit(50)
merged_df = self.create_relative_frequency_table(tabulation_bin_id_masked,
deforestation_hrp, csv_name)
self.progress_updated.emit(75)
self.create_fit_density_map(risk30_hrp, tabulation_bin_id_masked,
merged_df, out_fn2)
self.replace_ref_system(municipality, out_fn2)
self.progress_updated.emit(100)
# After processing, emit processCompleted or any other signal as needed
return
def execute_workflow_cnf(self, directory, max_iterations, csv, municipality, deforestation_cnf, risk30_vp, out_fn1, out_fn2):
'''
Create workflow function for CNF
:param max_iterations: maximum number of iterations
'''
self.progress_updated.emit(0)
data_folder = self.set_working_directory(directory)
self.progress_updated.emit(10)
tabulation_bin_id_VP_masked = self.tabulation_bin_id_VP(risk30_vp, municipality, out_fn1)
self.replace_ref_system(municipality, out_fn1)
self.progress_updated.emit(30)
# Check modeling region IDs present in the prediction stage but absent in the fitting stage
id_difference, pre_model_region_id = self.check_modeling_region_ids(csv, out_fn1)
# If there are missing bins, calculate the relative frequency and create a new csv file
if id_difference.size > 0:
csv = self.calculate_missing_bins_rf(id_difference, csv,pre_model_region_id)
self.progress_updated.emit(40)
prediction_density_arr = self.calculate_prediction_density_arr(risk30_vp, tabulation_bin_id_VP_masked,csv)
self.progress_updated.emit(50)
AR = self.calculate_adjustment_ratio_cnf(prediction_density_arr, deforestation_cnf)
self.progress_updated.emit(75)
# Set a maximum number of iterations to avoid infinite loop
iteration_count = 0
new_prediction_density_arr = None
# When AR > 1.00001 and iteration_count <= max_iterations, treat the result as new prediction density map to iterate the AR util AR is <=1.00001 or iteration_count = max_iterations
while AR > 1.00001 and iteration_count <= max_iterations:
new_prediction_density_arr = self.adjusted_prediction_density_array(prediction_density_arr, risk30_vp, AR)
AR = self.calculate_adjustment_ratio_cnf(new_prediction_density_arr, deforestation_cnf)
prediction_density_arr=new_prediction_density_arr
iteration_count += 1
if iteration_count <= int(max_iterations):
selected_density_arr = new_prediction_density_arr if new_prediction_density_arr is not None else prediction_density_arr
self.adjusted_prediction_density_map(selected_density_arr, risk30_vp, AR, out_fn2)
self.replace_ref_system(municipality, out_fn2)
else:
print("Maximum number of iterations reached. Please reset the maximum number of iterations.")
self.progress_updated.emit(100)
return id_difference , iteration_count
def execute_workflow_vp(self, directory,max_iterations, csv, municipality, expected_deforestation, risk30_vp, out_fn1, out_fn2):
'''
Create workflow function for VP
:param max_iterations: maximum number of iterations
'''
self.progress_updated.emit(0)
data_folder = self.set_working_directory(directory)
self.progress_updated.emit(10)
tabulation_bin_id_VP_masked = self.tabulation_bin_id_VP(risk30_vp, municipality, out_fn1)
self.replace_ref_system(municipality, out_fn1)
self.progress_updated.emit(30)
# Check modeling region IDs present in the prediction stage but absent in the fitting stage
id_difference, pre_model_region_id = self.check_modeling_region_ids(csv, out_fn1)
# If there are missing bins, calculate the relative frequency and create a new csv file
if id_difference.size > 0:
csv=self.calculate_missing_bins_rf(id_difference, csv,pre_model_region_id)
self.progress_updated.emit(40)
prediction_density_arr = self.calculate_prediction_density_arr(risk30_vp, tabulation_bin_id_VP_masked,csv)
self.progress_updated.emit(50)
AR = self.calculate_adjustment_ratio(prediction_density_arr, expected_deforestation)
self.progress_updated.emit(75)
# Set a maximum number of iterations to avoid infinite loop
iteration_count = 0
new_prediction_density_arr = None
# When AR > 1.00001 and iteration_count <= max_iterations, treat the result as new prediction density map to iterate the AR util AR is <=1.00001 or iteration_count = max_iterations
while AR > 1.00001 and iteration_count <= max_iterations:
new_prediction_density_arr = self.adjusted_prediction_density_array(prediction_density_arr, risk30_vp, AR)
AR = self.calculate_adjustment_ratio(new_prediction_density_arr, expected_deforestation)
prediction_density_arr = new_prediction_density_arr
iteration_count += 1
# Emitting progress based on the current iteration_count and max_iterations
if iteration_count <= int(max_iterations):
selected_density_arr = new_prediction_density_arr if new_prediction_density_arr is not None else prediction_density_arr
self.adjusted_prediction_density_map(selected_density_arr, risk30_vp, AR, out_fn2)
self.replace_ref_system(municipality, out_fn2)
else:
print("Maximum number of iterations reached. Please reset the maximum number of iterations.")
self.progress_updated.emit(100)
return id_difference, iteration_count
def check_modeling_region_ids(self, csv, out_fn):
'''
Check modeling region IDs present in the prediction stage but absent in the fitting stage.
:param csv: csv file of relative frequency in fitiing stage
:param out_fn: modeling region image in prediction stage
:return: id_difference: A set of modeling region IDs np array that exist only in the prediction stage
'''
fit_model_region_id = pd.read_csv(csv)['ID'].to_numpy()
pre_model_region_arr = self.image_to_array(out_fn)
pre_model_region_id = np.unique(pre_model_region_arr[pre_model_region_arr != 0])
id_difference = np.setdiff1d(pre_model_region_id, fit_model_region_id)
return id_difference, pre_model_region_id
def calculate_missing_bins_rf (self, id_difference, csv, pre_model_region_id):
'''
If one or more empty bins are found, compute the jurisdiction-wide weighted average of relative frequencies for
missing bins and create a new csv file
:param csv: csv file of relative frequency in the fitting stage
:param id_difference: A set of modeling region IDs np array that exist only in the prediction stage
:param pre_model_region_id: Prediction modeling region ID np array
:return
'''
# Convert modeling region ids to vulnerability zone id
df=pd.read_csv(csv)
df['v_zone'] = (df['ID'] // 1000).astype(int)
# Convert missing bin ids to vulnerability zone id
missing_v_zone = [x // 1000 for x in id_difference]
# Select rows
filtered_df = df[df['v_zone'].isin(missing_v_zone)].copy()
# Created new column
filtered_df['Total Deforestation(pixel)'] = filtered_df['Area of the Bin(pixel)'] * filtered_df['Average Deforestation(pixel)']
# Group by and sum area and weighted relative frequency
aggregated_df = filtered_df.groupby('v_zone')[['Total Deforestation(pixel)', 'Area of the Bin(pixel)']].sum().reset_index()
# Calculate Average Deforestation
aggregated_df['Average Deforestation(pixel)']=aggregated_df['Total Deforestation(pixel)']/aggregated_df['Area of the Bin(pixel)']
# Create a new dataframe id_difference_df
id_difference_df = pd.DataFrame(id_difference, columns=['ID'])
id_difference_df['v_zone'] = missing_v_zone
# Create missing bins dataframe by outer join aggregated_df and id_difference_df
missing_bins_df = pd.merge(id_difference_df,aggregated_df , on='v_zone', how='outer')
# Insert missing bins dataframe back to csv file
df_new = pd.concat([df, missing_bins_df], ignore_index=True)
# Sorting by column "ID"
df_new=df_new.sort_values(by=['ID'], ascending=True)
# Drop column 'v_zone'
df_new=df_new.drop(['v_zone'], axis=1)
# Read CNF Modeling Region ID
mr_cnf_df=pd.DataFrame(pre_model_region_id, columns=['ID'])
df_new_cnf=pd.merge(df_new, mr_cnf_df, on='ID', how='inner')
# Save the new result to csv
base, ext = os.path.splitext(csv)
new_csv = f"{base}_adjusted_for_prediction{ext}"
df_new_cnf.to_csv(new_csv, index=False)
return new_csv