@@ -1869,13 +1869,7 @@ def _compute_metadata_generator(
18691869 skip_failures = True ,
18701870 warn_failures = True ,
18711871):
1872- # @todo can switch to this if we require `fiftyone>=0.22.2`
1873- # num_workers = fou.recommend_thread_pool_workers(num_workers)
1874-
1875- if hasattr (fou , "recommend_thread_pool_workers" ):
1876- num_workers = fou .recommend_thread_pool_workers (num_workers )
1877- elif num_workers is None :
1878- num_workers = fo .config .max_thread_pool_workers or 8
1872+ num_workers = fou .recommend_thread_pool_workers (num_workers )
18791873
18801874 if not overwrite :
18811875 sample_collection = sample_collection .exists ("metadata" , False )
@@ -1890,38 +1884,35 @@ def _compute_metadata_generator(
18901884 return
18911885
18921886 inputs = zip (ids , filepaths , media_types )
1893- values = {}
18941887
1895- try :
1896- num_computed = 0
1897- with contextlib .ExitStack () as exit_context :
1898- pb = fou .ProgressBar (total = num_total )
1899- exit_context .enter_context (pb )
1900-
1901- if num_workers > 1 :
1902- pool = multiprocessing .dummy .Pool (processes = num_workers )
1903- exit_context .enter_context (pool )
1904- tasks = pool .imap_unordered (_do_compute_metadata , inputs )
1905- else :
1906- tasks = map (_do_compute_metadata , inputs )
1888+ with contextlib .ExitStack () as context :
1889+ if num_workers > 1 :
1890+ pool = multiprocessing .dummy .Pool (processes = num_workers )
1891+ context .enter_context (pool )
1892+ tasks = pool .imap_unordered (_do_compute_metadata , inputs )
1893+ else :
1894+ tasks = map (_do_compute_metadata , inputs )
19071895
1908- for sample_id , metadata in pb (tasks ):
1909- values [sample_id ] = metadata
1896+ num_computed = 0
1897+ with fou .get_default_batcher (
1898+ tasks , progress = True , total = num_total
1899+ ) as batcher :
1900+ for batch in batcher :
1901+ sample_collection .set_values (
1902+ "metadata" , dict (batch ), key_field = "id"
1903+ )
19101904
1911- num_computed += 1
1912- if num_computed % 10 == 0 :
1913- progress = num_computed / num_total
1914- label = f"Computed { num_computed } of { num_total } "
1915- yield ctx .trigger (
1916- "set_progress" , dict (progress = progress , label = label )
1917- )
1918- finally :
1919- sample_collection .set_values ("metadata" , values , key_field = "id" )
1905+ num_computed += len (batch )
1906+ progress = num_computed / num_total
1907+ label = f"Computed { num_computed } of { num_total } "
1908+ yield ctx .trigger (
1909+ "set_progress" , dict (progress = progress , label = label )
1910+ )
19201911
19211912 if skip_failures and not warn_failures :
19221913 return
19231914
1924- num_missing = len (sample_collection .exists ("metadata" , False )) + 1
1915+ num_missing = len (sample_collection .exists ("metadata" , False ))
19251916 if num_missing > 0 :
19261917 msg = (
19271918 "Failed to populate metadata on %d samples. "
@@ -1937,33 +1928,10 @@ def _compute_metadata_generator(
19371928
19381929def _do_compute_metadata (args ):
19391930 sample_id , filepath , media_type = args
1940- metadata = _compute_sample_metadata (
1941- filepath , media_type , skip_failures = True
1942- )
1931+ metadata = fomm ._compute_sample_metadata (filepath , media_type )
19431932 return sample_id , metadata
19441933
19451934
1946- def _compute_sample_metadata (filepath , media_type , skip_failures = False ):
1947- if not skip_failures :
1948- return _get_metadata (filepath , media_type )
1949-
1950- try :
1951- return _get_metadata (filepath , media_type )
1952- except :
1953- return None
1954-
1955-
1956- def _get_metadata (filepath , media_type ):
1957- if media_type == fom .IMAGE :
1958- metadata = fomm .ImageMetadata .build_for (filepath )
1959- elif media_type == fom .VIDEO :
1960- metadata = fomm .VideoMetadata .build_for (filepath )
1961- else :
1962- metadata = fomm .Metadata .build_for (filepath )
1963-
1964- return metadata
1965-
1966-
19671935class GenerateThumbnails (foo .Operator ):
19681936 @property
19691937 def config (self ):
0 commit comments