@@ -1057,18 +1057,25 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"):
10571057 lam = float (metadata ["lambda" ])
10581058 except :
10591059 raise ValueError ("Parquet metadata does not contain 'lambda'." )
1060+
1061+ def _normalise_lambda (v ):
1062+ """
1063+ Round-trip a lambda value through :.5f formatting to produce a
1064+ canonical float. This eliminates IEEE754 arithmetic precision
1065+ differences (e.g. 0.05 + 0.001 = 0.051000000000000004 vs
1066+ float("0.051") = 0.051) while keeping values as floats so that
1067+ the alchemlyb index check passes.
1068+ """
1069+ return float (f"{ float (v ):.5f} " )
1070+
10601071 if not is_mbar :
10611072 try :
1062- # Normalise to floats to match the DataFrame column type expected
1063- # by alchemlyb (handles both old float and new string metadata).
1064- lambda_grad = [float (v ) for v in metadata ["lambda_grad" ]]
1073+ lambda_grad = [_normalise_lambda (v ) for v in metadata ["lambda_grad" ]]
10651074 except :
10661075 raise ValueError ("Parquet metadata does not contain 'lambda grad'" )
10671076 else :
10681077 try :
1069- # Normalise to floats to match the DataFrame column type expected
1070- # by alchemlyb (handles both old float and new string metadata).
1071- lambda_grad = [float (v ) for v in metadata ["lambda_grad" ]]
1078+ lambda_grad = [_normalise_lambda (v ) for v in metadata ["lambda_grad" ]]
10721079 except :
10731080 lambda_grad = []
10741081
@@ -1082,13 +1089,13 @@ def _somd2_extract(parquet_file, T=None, estimator="MBAR"):
10821089 # Convert to a pandas dataframe.
10831090 df = table .to_pandas ()
10841091
1085- # Normalise column names to floats so that comparisons are consistent
1086- # regardless of whether the parquet was written with float keys (old
1087- # sire) or formatted string keys (new sire). float("0.10000") and
1088- # float( "0.1 ") give the same IEEE754 value, so old and new files are
1089- # handled identically and the alchemlyb index check passes .
1092+ # Normalise column names through the same :.5f round-trip so that
1093+ # comparisons against lambda_grad and lam are consistent regardless of
1094+ # whether the parquet was written with arithmetic float keys (old sire,
1095+ # e.g. "0.051000000000000004 ") or :.5f string keys ( new sire, e.g.
1096+ # "0.05100"). The alchemlyb index check also requires float columns .
10901097 df .columns = [
1091- float (c )
1098+ _normalise_lambda (c )
10921099 if isinstance (c , str )
10931100 and c .replace ("." , "" , 1 ).replace ("-" , "" , 1 ).isdigit ()
10941101 else c
0 commit comments