22
33# AUTOGENERATED! DO NOT EDIT! File to edit: ../../src/dataset/dataset_helper.ipynb.
44
5- # %% auto 0
5+ # %% auto # 0
66__all__ = ['check_duplicate_in_dataset' , 'check_duplicates_in_dataset' , 'shuffle_tensor_dataset' , 'get_unique_elements_indices' ,
77 'uniquify_tensor_dataset' , 'balance_tensor_dataset' ]
88
9- # %% ../../src/dataset/dataset_helper.ipynb 2
9+ # %% ../../src/dataset/dataset_helper.ipynb #06272f6f-b4e3-4504-a90a-feebbf6ad821
1010from ..imports import *
1111from ..utils .config_loader import *
1212from ..utils .async_fn import run_parallel_jobs
1313
14- # %% ../../src/dataset/dataset_helper.ipynb 4
14+ # %% ../../src/dataset/dataset_helper.ipynb #36032308-bd0e-4409-9db0-9d89fc258e5a
1515def check_duplicate_in_dataset (x , dataset ):
1616 """Check if 'x' is in 'dataset'"""
1717 # x ... [ *]
@@ -22,7 +22,7 @@ def check_duplicate_in_dataset(x, dataset):
2222 comp = torch .all (comp , dim = 1 )
2323 return comp .any ().item ()
2424
25- # %% ../../src/dataset/dataset_helper.ipynb 5
25+ # %% ../../src/dataset/dataset_helper.ipynb #2a8886b0-2b43-4c06-9992-b859d11e698e
2626def check_duplicates_in_dataset (xs , dataset , return_ind = False , invert = False ):
2727 '''
2828 Checks if `xs` is are `dataset`. Boolean `invert` changes if we count duplicates (False) or ones that are not in dataset (True).
@@ -53,7 +53,7 @@ def get_comp(x, dataset):
5353 if return_ind : return num , comp .squeeze () #comp is [i_xs, i_dataset] pairs
5454 return num
5555
56- # %% ../../src/dataset/dataset_helper.ipynb 9
56+ # %% ../../src/dataset/dataset_helper.ipynb #9ea47154-d7c2-435b-8d08-0999c744af1a
5757def shuffle_tensor_dataset (x , y = None , * z , cpu_copy = True ):
5858 '''Assumes numpy or tensor objects with same length.'''
5959 rand_indx = torch .randperm (x .shape [0 ])
@@ -86,7 +86,7 @@ def _cpu_array_index(var):
8686
8787 return x [rand_indx ]
8888
89- # %% ../../src/dataset/dataset_helper.ipynb 10
89+ # %% ../../src/dataset/dataset_helper.ipynb #c8005564-1f96-41da-94f5-16d2f9f48b2a
9090def get_unique_elements_indices (tensor ):
9191 '''Returns indices of unique_elements in `tensor`.'''
9292 tensor_unique , ptrs , cnt = torch .unique (tensor , dim = 0 , return_inverse = True , return_counts = True )
@@ -99,7 +99,7 @@ def get_unique_elements_indices(tensor):
9999
100100 return tensor [idx ], idx
101101
102- # %% ../../src/dataset/dataset_helper.ipynb 11
102+ # %% ../../src/dataset/dataset_helper.ipynb #a9295de7-54e1-435b-aeea-26746019dd95
103103def uniquify_tensor_dataset (x , y = None , * z ):
104104 '''`x` has to be tensor, assumes numpy or tensor obj for `y` and `z`'''
105105 x , x_idx = get_unique_elements_indices (x )
@@ -112,7 +112,7 @@ def uniquify_tensor_dataset(x, y=None, *z):
112112
113113 return x
114114
115- # %% ../../src/dataset/dataset_helper.ipynb 12
115+ # %% ../../src/dataset/dataset_helper.ipynb #e63e9933-a2ce-47b3-a957-58705e060fc7
116116def balance_tensor_dataset (x , y , * z , samples : int = None , make_unique : bool = True , y_uniques = None , shuffle_lables : bool = True , add_balance_fn : callable = None , njobs = 1 ):
117117 '''Assumes `x` is tensor and `y` is tensor or numpy.'''
118118
0 commit comments