Skip to content

Commit fb40efe

Browse files
committed
optimizing compute metadata generator
1 parent 4e22a8c commit fb40efe

4 files changed

Lines changed: 28 additions & 60 deletions

File tree

plugins/io/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -929,7 +929,7 @@ def _import_media_and_labels(ctx):
929929
)
930930

931931
if metadata:
932-
for _ids in fou.iter_batches(ids, 100000):
932+
for _ids in fou.iter_batches(ids, 10000):
933933
ctx.dataset.select(_ids).compute_metadata(overwrite=True)
934934
else:
935935
num_added = 0

plugins/io/fiftyone.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: "@voxel51/io"
22
description: A collection of import/export utilities
3-
version: 1.1.0
3+
version: 1.2.0
44
fiftyone:
55
version: ">=0.22.2"
66
url: https://github.com/voxel51/fiftyone-plugins/tree/main/plugins/io

plugins/utils/__init__.py

Lines changed: 24 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1869,13 +1869,7 @@ def _compute_metadata_generator(
18691869
skip_failures=True,
18701870
warn_failures=True,
18711871
):
1872-
# @todo can switch to this if we require `fiftyone>=0.22.2`
1873-
# num_workers = fou.recommend_thread_pool_workers(num_workers)
1874-
1875-
if hasattr(fou, "recommend_thread_pool_workers"):
1876-
num_workers = fou.recommend_thread_pool_workers(num_workers)
1877-
elif num_workers is None:
1878-
num_workers = fo.config.max_thread_pool_workers or 8
1872+
num_workers = fou.recommend_thread_pool_workers(num_workers)
18791873

18801874
if not overwrite:
18811875
sample_collection = sample_collection.exists("metadata", False)
@@ -1890,38 +1884,35 @@ def _compute_metadata_generator(
18901884
return
18911885

18921886
inputs = zip(ids, filepaths, media_types)
1893-
values = {}
18941887

1895-
try:
1896-
num_computed = 0
1897-
with contextlib.ExitStack() as exit_context:
1898-
pb = fou.ProgressBar(total=num_total)
1899-
exit_context.enter_context(pb)
1900-
1901-
if num_workers > 1:
1902-
pool = multiprocessing.dummy.Pool(processes=num_workers)
1903-
exit_context.enter_context(pool)
1904-
tasks = pool.imap_unordered(_do_compute_metadata, inputs)
1905-
else:
1906-
tasks = map(_do_compute_metadata, inputs)
1888+
with contextlib.ExitStack() as context:
1889+
if num_workers > 1:
1890+
pool = multiprocessing.dummy.Pool(processes=num_workers)
1891+
context.enter_context(pool)
1892+
tasks = pool.imap_unordered(_do_compute_metadata, inputs)
1893+
else:
1894+
tasks = map(_do_compute_metadata, inputs)
19071895

1908-
for sample_id, metadata in pb(tasks):
1909-
values[sample_id] = metadata
1896+
num_computed = 0
1897+
with fou.get_default_batcher(
1898+
tasks, progress=True, total=num_total
1899+
) as batcher:
1900+
for batch in batcher:
1901+
sample_collection.set_values(
1902+
"metadata", dict(batch), key_field="id"
1903+
)
19101904

1911-
num_computed += 1
1912-
if num_computed % 10 == 0:
1913-
progress = num_computed / num_total
1914-
label = f"Computed {num_computed} of {num_total}"
1915-
yield ctx.trigger(
1916-
"set_progress", dict(progress=progress, label=label)
1917-
)
1918-
finally:
1919-
sample_collection.set_values("metadata", values, key_field="id")
1905+
num_computed += len(batch)
1906+
progress = num_computed / num_total
1907+
label = f"Computed {num_computed} of {num_total}"
1908+
yield ctx.trigger(
1909+
"set_progress", dict(progress=progress, label=label)
1910+
)
19201911

19211912
if skip_failures and not warn_failures:
19221913
return
19231914

1924-
num_missing = len(sample_collection.exists("metadata", False)) + 1
1915+
num_missing = len(sample_collection.exists("metadata", False))
19251916
if num_missing > 0:
19261917
msg = (
19271918
"Failed to populate metadata on %d samples. "
@@ -1937,33 +1928,10 @@ def _compute_metadata_generator(
19371928

19381929
def _do_compute_metadata(args):
19391930
sample_id, filepath, media_type = args
1940-
metadata = _compute_sample_metadata(
1941-
filepath, media_type, skip_failures=True
1942-
)
1931+
metadata = fomm._compute_sample_metadata(filepath, media_type)
19431932
return sample_id, metadata
19441933

19451934

1946-
def _compute_sample_metadata(filepath, media_type, skip_failures=False):
1947-
if not skip_failures:
1948-
return _get_metadata(filepath, media_type)
1949-
1950-
try:
1951-
return _get_metadata(filepath, media_type)
1952-
except:
1953-
return None
1954-
1955-
1956-
def _get_metadata(filepath, media_type):
1957-
if media_type == fom.IMAGE:
1958-
metadata = fomm.ImageMetadata.build_for(filepath)
1959-
elif media_type == fom.VIDEO:
1960-
metadata = fomm.VideoMetadata.build_for(filepath)
1961-
else:
1962-
metadata = fomm.Metadata.build_for(filepath)
1963-
1964-
return metadata
1965-
1966-
19671935
class GenerateThumbnails(foo.Operator):
19681936
@property
19691937
def config(self):

plugins/utils/fiftyone.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
name: "@voxel51/utils"
22
description: A collection of utility operators
3-
version: 1.2.0
3+
version: 1.3.0
44
fiftyone:
5-
version: ">=0.22"
5+
version: ">=0.22.2"
66
url: https://github.com/voxel51/fiftyone-plugins/tree/main/plugins/utils
77
license: Apache 2.0
88
operators:

0 commit comments

Comments
 (0)