File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 99import itertools
1010import logging
1111import multiprocessing
12- import os
1312import pickle
1413import sqlite3
1514import tempfile
@@ -175,7 +174,6 @@ def partition(
175174 clusters = self .cluster (pair_scores , threshold )
176175 clusters = self ._add_singletons (data , clusters )
177176 clusters = list (clusters )
178- _cleanup_scores (pair_scores )
179177 return clusters
180178
181179 def _add_singletons (self , data : Data , clusters : Clusters ) -> Clusters :
@@ -514,7 +512,6 @@ def join(
514512 links = pair_scores [pair_scores ["score" ] > threshold ]
515513
516514 links = list (links )
517- _cleanup_scores (pair_scores )
518515 return links
519516
520517 def one_to_one (self , scores : Scores , threshold : float = 0.0 ) -> Links :
@@ -1468,14 +1465,3 @@ def flatten_training(
14681465 y .extend ([encoded_y ] * len (pairs ))
14691466
14701467 return examples , numpy .array (y )
1471-
1472-
1473- def _cleanup_scores (arr : Scores ) -> None :
1474- try :
1475- mmap_file = arr .filename # type: ignore
1476- except AttributeError :
1477- pass
1478- else :
1479- del arr
1480- if mmap_file :
1481- os .remove (mmap_file )
Original file line number Diff line number Diff line change 1111import queue
1212import tempfile
1313from typing import TYPE_CHECKING , overload
14+ import weakref
1415
1516import numpy
1617
@@ -176,9 +177,28 @@ def scoreDuplicates(
176177 else :
177178 scored_pairs = numpy .array ([], dtype = dtype )
178179
180+ # See https://docs.python.org/3/library/weakref.html#comparing-finalizers-with-del-methods
181+ scored_pairs .remove = weakref .finalize (scored_pairs , _cleanup_scores , scored_pairs )
182+ scored_pairs .removed = property (_is_removed )
183+
179184 return scored_pairs
180185
181186
187+ def _cleanup_scores (arr : Scores ) -> None :
188+ try :
189+ mmap_file = arr .filename # type: ignore
190+ except AttributeError :
191+ pass
192+ else :
193+ del arr
194+ if mmap_file :
195+ os .remove (mmap_file )
196+
197+
198+ def _is_removed (self ):
199+ return not self .remove .alive
200+
201+
182202def fillQueue (
183203 queue : _Queue , iterable : Iterable [Any ], stop_signals : int , chunk_size : int = 20000
184204) -> None :
You can’t perform that action at this time.
0 commit comments