Skip to content

Commit 939a8a9

Browse files
WIP - extract production/modeled temperature data in preparation to statistically compare to example Gringarten curve
1 parent 5c933b1 commit 939a8a9

6 files changed

Lines changed: 245 additions & 0 deletions

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ docs/hip_ra_x_parameters.rst
114114
_site/
115115
/docs/geophires_x.rstx
116116
/docs/temperature.txt
117+
118+
src/geophires_docs/fervo_project_red-2026_graph-data-extraction.xcf
119+
117120
/geophires_x.rst
118121
/modules.rst
119122
/Useful sites for Sphinx docstrings.txt

docs/Fervo_Project_Red-2026.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
2+
{#FIXME WIP TODO#}
3+
4+
https://gtp.scientificwebservices.com/geophires/?geophires-example-id=Fervo_Project_Red-2026
127 KB
Loading
60.3 KB
Loading
61.8 KB
Loading
Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
from logging import getLogger
2+
from pathlib import Path
3+
4+
import cv2
5+
import matplotlib.pyplot as plt
6+
import numpy as np
7+
import pandas as pd
8+
from scipy.interpolate import interp1d
9+
10+
from geophires_docs import _PROJECT_ROOT
11+
12+
_log = getLogger(__name__)
13+
14+
_BUILD_DIR: Path = _PROJECT_ROOT / 'build' / 'generate_fervo_project_red_2026_docs'
15+
16+
_PRODUCTION_CSV_FILENAME = 'project_red_2026_production_data.csv'
17+
_MODEL_CSV_FILENAME = 'project_red_2026_model_data.csv'
18+
_STEADY_STATE_CSV_FILENAME = 'project_red_2026_variance_analysis.csv'
19+
_REGENERATED_GRAPH_FILENAME = 'project_red_2026_figure-5_regenerated.png'
20+
21+
_STEADY_STATE_START_YEARS = 0.25
22+
23+
_HOUGH_MIN_DIST_PX = 4
24+
25+
_MODEL_OUTLIER_STD_THRESHOLD = 2.5
26+
_MODEL_ROLLING_WINDOW = 15
27+
28+
29+
def extract_plot_data(
30+
image_path: Path | str, production_image_path: Path | str | None = None
31+
) -> tuple[pd.DataFrame, pd.DataFrame]:
32+
img = cv2.imread(str(image_path))
33+
if img is None:
34+
raise FileNotFoundError(f'Could not load image at {image_path}')
35+
36+
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
37+
38+
height, width, _ = img.shape
39+
x_min_px, x_max_px = int(width * 0.10), int(width * 0.96)
40+
y_min_px, y_max_px = int(height * 0.06), int(height * 0.80)
41+
42+
x_min_val, x_max_val = 0.0, 2.0
43+
y_min_val, y_max_val = 0.0, 200.0
44+
45+
def pixel_to_data(px: float, py: float) -> tuple[float, float]:
46+
x = x_min_val + (px - x_min_px) / (x_max_px - x_min_px) * (x_max_val - x_min_val)
47+
y = y_min_val + (y_max_px - py) / (y_max_px - y_min_px) * (y_max_val - y_min_val)
48+
return x, y
49+
50+
plot_mask = np.zeros((height, width), dtype=np.uint8)
51+
plot_mask[y_min_px:y_max_px, x_min_px:x_max_px] = 255
52+
53+
prod_target_path = production_image_path if production_image_path else image_path
54+
df_prod = _extract_red_circles(prod_target_path, plot_mask, pixel_to_data)
55+
df_model = _extract_black_dashed_line(hsv, plot_mask, pixel_to_data)
56+
57+
return df_prod, df_model
58+
59+
60+
def _calculate_variance_analysis(
61+
df_actual: pd.DataFrame, df_model: pd.DataFrame, steady_state_start_years: float = _STEADY_STATE_START_YEARS
62+
) -> pd.DataFrame:
63+
df_steady_state = df_actual[df_actual['Time_Years'] > steady_state_start_years].copy()
64+
65+
model_interpolator = interp1d(
66+
df_model['Time_Years'],
67+
df_model['Temperature_C'],
68+
kind='linear',
69+
fill_value='extrapolate',
70+
)
71+
72+
df_steady_state['Model_Temperature_C'] = model_interpolator(df_steady_state['Time_Years'])
73+
df_steady_state['Error_C'] = df_steady_state['Temperature_C'] - df_steady_state['Model_Temperature_C']
74+
75+
return df_steady_state.reset_index(drop=True)
76+
77+
78+
def _extract_red_circles(
79+
img_path: Path | str,
80+
plot_mask: np.ndarray,
81+
pixel_to_data,
82+
) -> pd.DataFrame:
83+
img = cv2.imread(str(img_path))
84+
if img is None:
85+
raise FileNotFoundError(f'Could not load image at {img_path}')
86+
87+
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
88+
89+
lower_red1 = np.array([0, 50, 50])
90+
upper_red1 = np.array([15, 255, 255])
91+
lower_red2 = np.array([165, 50, 50])
92+
upper_red2 = np.array([180, 255, 255])
93+
94+
mask_red1 = cv2.inRange(hsv, lower_red1, upper_red1)
95+
mask_red2 = cv2.inRange(hsv, lower_red2, upper_red2)
96+
mask_red = cv2.bitwise_or(mask_red1, mask_red2)
97+
mask_red = cv2.bitwise_and(mask_red, plot_mask)
98+
99+
y_coords, x_coords = np.where(mask_red > 0)
100+
101+
if len(x_coords) == 0:
102+
_log.warning('No red pixels found in the production data mask.')
103+
return pd.DataFrame(columns=['Time_Years', 'Temperature_C'])
104+
105+
df_pixels = pd.DataFrame({'x': x_coords, 'y': y_coords})
106+
107+
bin_size = int(_HOUGH_MIN_DIST_PX)
108+
df_pixels['x_binned'] = (df_pixels['x'] // bin_size) * bin_size + (bin_size // 2)
109+
centerline = df_pixels.groupby('x_binned', as_index=False)[['x', 'y']].mean()
110+
111+
_log.info(f'Red-marker detection: Extracted {len(centerline)} binned centerline points from edited mask.')
112+
113+
production_data = [pixel_to_data(row['x'], row['y']) for _, row in centerline.iterrows()]
114+
df_prod = pd.DataFrame(production_data, columns=['Time_Years', 'Temperature_C'])
115+
return df_prod.sort_values('Time_Years').reset_index(drop=True)
116+
117+
118+
def _extract_black_dashed_line(hsv: np.ndarray, plot_mask: np.ndarray, pixel_to_data) -> pd.DataFrame:
119+
lower_black = np.array([0, 0, 0])
120+
upper_black = np.array([180, 255, 50])
121+
mask_black = cv2.inRange(hsv, lower_black, upper_black)
122+
mask_black = cv2.bitwise_and(mask_black, plot_mask)
123+
124+
y_coords, x_coords = np.where(mask_black > 0)
125+
model_data = [pixel_to_data(px, py) for px, py in zip(x_coords, y_coords)]
126+
127+
df_model_raw = pd.DataFrame(model_data, columns=['Time_Years', 'Temperature_C'])
128+
if df_model_raw.empty:
129+
return df_model_raw
130+
131+
df_model_raw['Time_Years_Rounded'] = df_model_raw['Time_Years'].round(3)
132+
df_model = (
133+
df_model_raw.groupby('Time_Years_Rounded', as_index=False)['Temperature_C']
134+
.mean()
135+
.rename(columns={'Time_Years_Rounded': 'Time_Years'})
136+
.sort_values('Time_Years')
137+
.reset_index(drop=True)
138+
)
139+
140+
rolling_median = (
141+
df_model['Temperature_C'].rolling(window=_MODEL_ROLLING_WINDOW, center=True, min_periods=3).median()
142+
)
143+
rolling_std = df_model['Temperature_C'].rolling(window=_MODEL_ROLLING_WINDOW, center=True, min_periods=3).std()
144+
rolling_std = rolling_std.fillna(rolling_std.median()).replace(0, rolling_std.median())
145+
146+
deviations = (df_model['Temperature_C'] - rolling_median).abs()
147+
inlier_mask = deviations <= _MODEL_OUTLIER_STD_THRESHOLD * rolling_std
148+
inlier_mask = inlier_mask.fillna(True)
149+
150+
n_before = len(df_model)
151+
df_model = df_model[inlier_mask].reset_index(drop=True)
152+
_log.info(f'Model-curve outlier rejection: {n_before - len(df_model)} of {n_before} points dropped')
153+
154+
return df_model
155+
156+
157+
def _regenerate_graph_from_csv(
158+
production_csv_path: Path,
159+
model_csv_path: Path,
160+
output_path: Path,
161+
) -> None:
162+
df_prod = pd.read_csv(production_csv_path)
163+
df_model = pd.read_csv(model_csv_path)
164+
165+
fig, ax = plt.subplots(figsize=(10, 6))
166+
167+
ax.scatter(
168+
df_prod['Time_Years'],
169+
df_prod['Temperature_C'],
170+
facecolors='none',
171+
edgecolors='#d62728',
172+
s=22,
173+
linewidths=1.0,
174+
alpha=0.85,
175+
label=f'Measured flowing temperature (Project Red), n={len(df_prod)}',
176+
)
177+
ax.plot(
178+
df_model['Time_Years'],
179+
df_model['Temperature_C'],
180+
color='black',
181+
linestyle='--',
182+
linewidth=1.5,
183+
label='Modeled output',
184+
)
185+
186+
ax.set_xlabel('Time (Years)', fontsize=12)
187+
ax.set_ylabel('Flowing Temperature (°C)', fontsize=12)
188+
ax.set_title('Fervo Project Red: Measured vs. Modeled Flowing Temperature (Regenerated)', fontsize=13)
189+
ax.set_xlim(0.0, 2.0)
190+
ax.set_ylim(0.0, 200.0)
191+
ax.grid(True, linestyle='--', alpha=0.5)
192+
ax.legend(loc='best')
193+
194+
output_path.parent.mkdir(parents=True, exist_ok=True)
195+
fig.savefig(output_path, dpi=150, bbox_inches='tight')
196+
plt.close(fig)
197+
198+
199+
if __name__ == '__main__':
200+
IMAGE_PATH = Path(__file__).parent / 'fervo-project-red-2026_figure-5_measured-flowing-temperature.png'
201+
PRODUCTION_IMAGE_PATH = (
202+
Path(__file__).parent / 'fervo_project_red-2026_graph-data-extraction_production-series-edited.png'
203+
)
204+
205+
_BUILD_DIR.mkdir(parents=True, exist_ok=True)
206+
production_csv_path = _BUILD_DIR / _PRODUCTION_CSV_FILENAME
207+
model_csv_path = _BUILD_DIR / _MODEL_CSV_FILENAME
208+
steady_state_csv_path = _BUILD_DIR / _STEADY_STATE_CSV_FILENAME
209+
regenerated_graph_path = _BUILD_DIR / _REGENERATED_GRAPH_FILENAME
210+
211+
_log.info('Extracting data from image...')
212+
df_actual, df_model = extract_plot_data(IMAGE_PATH, PRODUCTION_IMAGE_PATH)
213+
214+
_log.info(f'Extracted {len(df_actual)} production data points.')
215+
_log.info(f'Extracted {len(df_model)} model line data points.')
216+
217+
df_actual.to_csv(production_csv_path, index=False)
218+
df_model.to_csv(model_csv_path, index=False)
219+
_log.info(f'Wrote production data CSV: {production_csv_path}')
220+
_log.info(f'Wrote model data CSV: {model_csv_path}')
221+
222+
if not df_actual.empty and not df_model.empty:
223+
df_steady_state = _calculate_variance_analysis(df_actual, df_model)
224+
225+
rmse = float(np.sqrt((df_steady_state['Error_C'] ** 2).mean()))
226+
mae = float(df_steady_state['Error_C'].abs().mean())
227+
max_error = float(df_steady_state['Error_C'].abs().max())
228+
229+
_log.info(f'--- STATISTICAL ALIGNMENT (Steady-State > {_STEADY_STATE_START_YEARS} Years) ---')
230+
_log.info(f'Root Mean Square Error (RMSE): {rmse:.2f} °C')
231+
_log.info(f'Mean Absolute Error (MAE): {mae:.2f} °C')
232+
_log.info(f'Max Absolute Error: {max_error:.2f} °C')
233+
234+
df_steady_state.to_csv(steady_state_csv_path, index=False)
235+
_log.info(f'Wrote variance analysis CSV: {steady_state_csv_path}')
236+
237+
_regenerate_graph_from_csv(production_csv_path, model_csv_path, regenerated_graph_path)
238+
_log.info(f'Wrote regenerated graph: {regenerated_graph_path}')

0 commit comments

Comments
 (0)