Skip to content

Commit f01c3c1

Browse files
dont keep results from pca if too low variance retained
1 parent d565ed4 commit f01c3c1

1 file changed

Lines changed: 1 addition & 0 deletions

File tree

WDoc/utils/tasks/query.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ def semantic_batching(
209209
vr = np.cumsum(pca.explained_variance_ratio_)[-1]
210210
if vr <= 0.90:
211211
red(f"Found lower than exepcted PCA explained variance ratio: {vr:.4f}")
212+
assert vr >= 0.75, f"Found substancially low explained variance ratio afer pca at {vr:.4f} so not using dimension reduction"
212213
embeddings = pd.DataFrame(
213214
columns=[f"v_{i}" for i in range(embeds_reduced.shape[1])],
214215
index=[i for i in range(len(texts))],

0 commit comments

Comments
 (0)