From 4890236f3a055380db306f72a79ed163b6ff6e30 Mon Sep 17 00:00:00 2001 From: Sergio Burdisso Date: Tue, 4 Nov 2025 16:00:33 +0100 Subject: [PATCH] Make SklearnEmbedder to return a dense array As is, the embed() method returns a `scipy.sparse._csr.csr_matrix` object which can't be used with `np.average()` directly that is used to get the topic embeddings. This is because `np.average()` try to cast the passed object using `np.asanyarray()` which returns it as an array with empty shape. This fix simply uses the `.toarray()` method of `scipy.sparse._csr.csr_matrix` to return always a numpy array (dense). --- bertopic/backend/_sklearn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bertopic/backend/_sklearn.py b/bertopic/backend/_sklearn.py index d8150fe6..8e60f823 100644 --- a/bertopic/backend/_sklearn.py +++ b/bertopic/backend/_sklearn.py @@ -65,4 +65,4 @@ def embed(self, documents, verbose=False): except NotFittedError: embeddings = self.pipe.fit_transform(documents) - return embeddings + return embeddings.toarray() if hasattr(embeddings, "toarray") else embeddings