|
| 1 | +from logging import getLogger |
| 2 | +from pathlib import Path |
| 3 | + |
| 4 | +import cv2 |
| 5 | +import matplotlib.pyplot as plt |
| 6 | +import numpy as np |
| 7 | +import pandas as pd |
| 8 | +from scipy.interpolate import interp1d |
| 9 | + |
| 10 | +from geophires_docs import _PROJECT_ROOT |
| 11 | + |
| 12 | +_log = getLogger(__name__) |
| 13 | + |
| 14 | +_BUILD_DIR: Path = _PROJECT_ROOT / 'build' / 'generate_fervo_project_red_2026_docs' |
| 15 | + |
| 16 | +_PRODUCTION_CSV_FILENAME = 'project_red_2026_production_data.csv' |
| 17 | +_MODEL_CSV_FILENAME = 'project_red_2026_model_data.csv' |
| 18 | +_STEADY_STATE_CSV_FILENAME = 'project_red_2026_variance_analysis.csv' |
| 19 | +_REGENERATED_GRAPH_FILENAME = 'project_red_2026_figure-5_regenerated.png' |
| 20 | + |
| 21 | +_STEADY_STATE_START_YEARS = 0.25 |
| 22 | + |
| 23 | +_HOUGH_MIN_DIST_PX = 4 |
| 24 | + |
| 25 | +_MODEL_OUTLIER_STD_THRESHOLD = 2.5 |
| 26 | +_MODEL_ROLLING_WINDOW = 15 |
| 27 | + |
| 28 | + |
| 29 | +def extract_plot_data( |
| 30 | + image_path: Path | str, production_image_path: Path | str | None = None |
| 31 | +) -> tuple[pd.DataFrame, pd.DataFrame]: |
| 32 | + img = cv2.imread(str(image_path)) |
| 33 | + if img is None: |
| 34 | + raise FileNotFoundError(f'Could not load image at {image_path}') |
| 35 | + |
| 36 | + hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) |
| 37 | + |
| 38 | + height, width, _ = img.shape |
| 39 | + x_min_px, x_max_px = int(width * 0.10), int(width * 0.96) |
| 40 | + y_min_px, y_max_px = int(height * 0.06), int(height * 0.80) |
| 41 | + |
| 42 | + x_min_val, x_max_val = 0.0, 2.0 |
| 43 | + y_min_val, y_max_val = 0.0, 200.0 |
| 44 | + |
| 45 | + def pixel_to_data(px: float, py: float) -> tuple[float, float]: |
| 46 | + x = x_min_val + (px - x_min_px) / (x_max_px - x_min_px) * (x_max_val - x_min_val) |
| 47 | + y = y_min_val + (y_max_px - py) / (y_max_px - y_min_px) * (y_max_val - y_min_val) |
| 48 | + return x, y |
| 49 | + |
| 50 | + plot_mask = np.zeros((height, width), dtype=np.uint8) |
| 51 | + plot_mask[y_min_px:y_max_px, x_min_px:x_max_px] = 255 |
| 52 | + |
| 53 | + prod_target_path = production_image_path if production_image_path else image_path |
| 54 | + df_prod = _extract_red_circles(prod_target_path, plot_mask, pixel_to_data) |
| 55 | + df_model = _extract_black_dashed_line(hsv, plot_mask, pixel_to_data) |
| 56 | + |
| 57 | + return df_prod, df_model |
| 58 | + |
| 59 | + |
| 60 | +def _calculate_variance_analysis( |
| 61 | + df_actual: pd.DataFrame, df_model: pd.DataFrame, steady_state_start_years: float = _STEADY_STATE_START_YEARS |
| 62 | +) -> pd.DataFrame: |
| 63 | + df_steady_state = df_actual[df_actual['Time_Years'] > steady_state_start_years].copy() |
| 64 | + |
| 65 | + model_interpolator = interp1d( |
| 66 | + df_model['Time_Years'], |
| 67 | + df_model['Temperature_C'], |
| 68 | + kind='linear', |
| 69 | + fill_value='extrapolate', |
| 70 | + ) |
| 71 | + |
| 72 | + df_steady_state['Model_Temperature_C'] = model_interpolator(df_steady_state['Time_Years']) |
| 73 | + df_steady_state['Error_C'] = df_steady_state['Temperature_C'] - df_steady_state['Model_Temperature_C'] |
| 74 | + |
| 75 | + return df_steady_state.reset_index(drop=True) |
| 76 | + |
| 77 | + |
| 78 | +def _extract_red_circles( |
| 79 | + img_path: Path | str, |
| 80 | + plot_mask: np.ndarray, |
| 81 | + pixel_to_data, |
| 82 | +) -> pd.DataFrame: |
| 83 | + img = cv2.imread(str(img_path)) |
| 84 | + if img is None: |
| 85 | + raise FileNotFoundError(f'Could not load image at {img_path}') |
| 86 | + |
| 87 | + hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) |
| 88 | + |
| 89 | + lower_red1 = np.array([0, 50, 50]) |
| 90 | + upper_red1 = np.array([15, 255, 255]) |
| 91 | + lower_red2 = np.array([165, 50, 50]) |
| 92 | + upper_red2 = np.array([180, 255, 255]) |
| 93 | + |
| 94 | + mask_red1 = cv2.inRange(hsv, lower_red1, upper_red1) |
| 95 | + mask_red2 = cv2.inRange(hsv, lower_red2, upper_red2) |
| 96 | + mask_red = cv2.bitwise_or(mask_red1, mask_red2) |
| 97 | + mask_red = cv2.bitwise_and(mask_red, plot_mask) |
| 98 | + |
| 99 | + y_coords, x_coords = np.where(mask_red > 0) |
| 100 | + |
| 101 | + if len(x_coords) == 0: |
| 102 | + _log.warning('No red pixels found in the production data mask.') |
| 103 | + return pd.DataFrame(columns=['Time_Years', 'Temperature_C']) |
| 104 | + |
| 105 | + df_pixels = pd.DataFrame({'x': x_coords, 'y': y_coords}) |
| 106 | + |
| 107 | + bin_size = int(_HOUGH_MIN_DIST_PX) |
| 108 | + df_pixels['x_binned'] = (df_pixels['x'] // bin_size) * bin_size + (bin_size // 2) |
| 109 | + centerline = df_pixels.groupby('x_binned', as_index=False)[['x', 'y']].mean() |
| 110 | + |
| 111 | + _log.info(f'Red-marker detection: Extracted {len(centerline)} binned centerline points from edited mask.') |
| 112 | + |
| 113 | + production_data = [pixel_to_data(row['x'], row['y']) for _, row in centerline.iterrows()] |
| 114 | + df_prod = pd.DataFrame(production_data, columns=['Time_Years', 'Temperature_C']) |
| 115 | + return df_prod.sort_values('Time_Years').reset_index(drop=True) |
| 116 | + |
| 117 | + |
| 118 | +def _extract_black_dashed_line(hsv: np.ndarray, plot_mask: np.ndarray, pixel_to_data) -> pd.DataFrame: |
| 119 | + lower_black = np.array([0, 0, 0]) |
| 120 | + upper_black = np.array([180, 255, 50]) |
| 121 | + mask_black = cv2.inRange(hsv, lower_black, upper_black) |
| 122 | + mask_black = cv2.bitwise_and(mask_black, plot_mask) |
| 123 | + |
| 124 | + y_coords, x_coords = np.where(mask_black > 0) |
| 125 | + model_data = [pixel_to_data(px, py) for px, py in zip(x_coords, y_coords)] |
| 126 | + |
| 127 | + df_model_raw = pd.DataFrame(model_data, columns=['Time_Years', 'Temperature_C']) |
| 128 | + if df_model_raw.empty: |
| 129 | + return df_model_raw |
| 130 | + |
| 131 | + df_model_raw['Time_Years_Rounded'] = df_model_raw['Time_Years'].round(3) |
| 132 | + df_model = ( |
| 133 | + df_model_raw.groupby('Time_Years_Rounded', as_index=False)['Temperature_C'] |
| 134 | + .mean() |
| 135 | + .rename(columns={'Time_Years_Rounded': 'Time_Years'}) |
| 136 | + .sort_values('Time_Years') |
| 137 | + .reset_index(drop=True) |
| 138 | + ) |
| 139 | + |
| 140 | + rolling_median = ( |
| 141 | + df_model['Temperature_C'].rolling(window=_MODEL_ROLLING_WINDOW, center=True, min_periods=3).median() |
| 142 | + ) |
| 143 | + rolling_std = df_model['Temperature_C'].rolling(window=_MODEL_ROLLING_WINDOW, center=True, min_periods=3).std() |
| 144 | + rolling_std = rolling_std.fillna(rolling_std.median()).replace(0, rolling_std.median()) |
| 145 | + |
| 146 | + deviations = (df_model['Temperature_C'] - rolling_median).abs() |
| 147 | + inlier_mask = deviations <= _MODEL_OUTLIER_STD_THRESHOLD * rolling_std |
| 148 | + inlier_mask = inlier_mask.fillna(True) |
| 149 | + |
| 150 | + n_before = len(df_model) |
| 151 | + df_model = df_model[inlier_mask].reset_index(drop=True) |
| 152 | + _log.info(f'Model-curve outlier rejection: {n_before - len(df_model)} of {n_before} points dropped') |
| 153 | + |
| 154 | + return df_model |
| 155 | + |
| 156 | + |
| 157 | +def _regenerate_graph_from_csv( |
| 158 | + production_csv_path: Path, |
| 159 | + model_csv_path: Path, |
| 160 | + output_path: Path, |
| 161 | +) -> None: |
| 162 | + df_prod = pd.read_csv(production_csv_path) |
| 163 | + df_model = pd.read_csv(model_csv_path) |
| 164 | + |
| 165 | + fig, ax = plt.subplots(figsize=(10, 6)) |
| 166 | + |
| 167 | + ax.scatter( |
| 168 | + df_prod['Time_Years'], |
| 169 | + df_prod['Temperature_C'], |
| 170 | + facecolors='none', |
| 171 | + edgecolors='#d62728', |
| 172 | + s=22, |
| 173 | + linewidths=1.0, |
| 174 | + alpha=0.85, |
| 175 | + label=f'Measured flowing temperature (Project Red), n={len(df_prod)}', |
| 176 | + ) |
| 177 | + ax.plot( |
| 178 | + df_model['Time_Years'], |
| 179 | + df_model['Temperature_C'], |
| 180 | + color='black', |
| 181 | + linestyle='--', |
| 182 | + linewidth=1.5, |
| 183 | + label='Modeled output', |
| 184 | + ) |
| 185 | + |
| 186 | + ax.set_xlabel('Time (Years)', fontsize=12) |
| 187 | + ax.set_ylabel('Flowing Temperature (°C)', fontsize=12) |
| 188 | + ax.set_title('Fervo Project Red: Measured vs. Modeled Flowing Temperature (Regenerated)', fontsize=13) |
| 189 | + ax.set_xlim(0.0, 2.0) |
| 190 | + ax.set_ylim(0.0, 200.0) |
| 191 | + ax.grid(True, linestyle='--', alpha=0.5) |
| 192 | + ax.legend(loc='best') |
| 193 | + |
| 194 | + output_path.parent.mkdir(parents=True, exist_ok=True) |
| 195 | + fig.savefig(output_path, dpi=150, bbox_inches='tight') |
| 196 | + plt.close(fig) |
| 197 | + |
| 198 | + |
| 199 | +if __name__ == '__main__': |
| 200 | + IMAGE_PATH = Path(__file__).parent / 'fervo-project-red-2026_figure-5_measured-flowing-temperature.png' |
| 201 | + PRODUCTION_IMAGE_PATH = ( |
| 202 | + Path(__file__).parent / 'fervo_project_red-2026_graph-data-extraction_production-series-edited.png' |
| 203 | + ) |
| 204 | + |
| 205 | + _BUILD_DIR.mkdir(parents=True, exist_ok=True) |
| 206 | + production_csv_path = _BUILD_DIR / _PRODUCTION_CSV_FILENAME |
| 207 | + model_csv_path = _BUILD_DIR / _MODEL_CSV_FILENAME |
| 208 | + steady_state_csv_path = _BUILD_DIR / _STEADY_STATE_CSV_FILENAME |
| 209 | + regenerated_graph_path = _BUILD_DIR / _REGENERATED_GRAPH_FILENAME |
| 210 | + |
| 211 | + _log.info('Extracting data from image...') |
| 212 | + df_actual, df_model = extract_plot_data(IMAGE_PATH, PRODUCTION_IMAGE_PATH) |
| 213 | + |
| 214 | + _log.info(f'Extracted {len(df_actual)} production data points.') |
| 215 | + _log.info(f'Extracted {len(df_model)} model line data points.') |
| 216 | + |
| 217 | + df_actual.to_csv(production_csv_path, index=False) |
| 218 | + df_model.to_csv(model_csv_path, index=False) |
| 219 | + _log.info(f'Wrote production data CSV: {production_csv_path}') |
| 220 | + _log.info(f'Wrote model data CSV: {model_csv_path}') |
| 221 | + |
| 222 | + if not df_actual.empty and not df_model.empty: |
| 223 | + df_steady_state = _calculate_variance_analysis(df_actual, df_model) |
| 224 | + |
| 225 | + rmse = float(np.sqrt((df_steady_state['Error_C'] ** 2).mean())) |
| 226 | + mae = float(df_steady_state['Error_C'].abs().mean()) |
| 227 | + max_error = float(df_steady_state['Error_C'].abs().max()) |
| 228 | + |
| 229 | + _log.info(f'--- STATISTICAL ALIGNMENT (Steady-State > {_STEADY_STATE_START_YEARS} Years) ---') |
| 230 | + _log.info(f'Root Mean Square Error (RMSE): {rmse:.2f} °C') |
| 231 | + _log.info(f'Mean Absolute Error (MAE): {mae:.2f} °C') |
| 232 | + _log.info(f'Max Absolute Error: {max_error:.2f} °C') |
| 233 | + |
| 234 | + df_steady_state.to_csv(steady_state_csv_path, index=False) |
| 235 | + _log.info(f'Wrote variance analysis CSV: {steady_state_csv_path}') |
| 236 | + |
| 237 | + _regenerate_graph_from_csv(production_csv_path, model_csv_path, regenerated_graph_path) |
| 238 | + _log.info(f'Wrote regenerated graph: {regenerated_graph_path}') |
0 commit comments