From 7c81cdfc95050a1376631b05033851548ed5a535 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Wed, 1 Apr 2026 18:14:05 +0100 Subject: [PATCH 1/3] bump datasets --- front/admin_ui/poetry.lock | 16 ++++++++++------ jobs/cache_maintenance/poetry.lock | 16 ++++++++++------ jobs/mongodb_migration/poetry.lock | 16 ++++++++++------ libs/libapi/poetry.lock | 16 ++++++++++------ libs/libcommon/poetry.lock | 16 ++++++++++------ libs/libcommon/pyproject.toml | 2 +- services/admin/poetry.lock | 16 ++++++++++------ services/api/poetry.lock | 16 ++++++++++------ services/rows/poetry.lock | 16 ++++++++++------ services/search/poetry.lock | 16 ++++++++++------ services/sse-api/poetry.lock | 16 ++++++++++------ services/webhook/poetry.lock | 16 ++++++++++------ services/worker/poetry.lock | 16 ++++++++++------ 13 files changed, 121 insertions(+), 73 deletions(-) diff --git a/front/admin_ui/poetry.lock b/front/admin_ui/poetry.lock index a3dfd6ea9..a0de307c0 100644 --- a/front/admin_ui/poetry.lock +++ b/front/admin_ui/poetry.lock @@ -672,15 +672,13 @@ tests = ["pytest", "pytest-cov", "pytest-xdist"] [[package]] name = "datasets" -version = "4.8.4" +version = "4.8.5.dev0" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.10.0" groups = ["main"] -files = [ - {file = "datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d"}, - {file = "datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52"}, -] +files = [] +develop = false [package.dependencies] dill = ">=0.3.0,<0.4.2" @@ -714,6 +712,12 @@ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elastics torch = ["torch"] vision = ["Pillow (>=9.4.0)"] +[package.source] +type = "git" +url = "https://github.com/huggingface/datasets" +reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" + [[package]] name = "dill" version = "0.3.8" @@ -1519,7 +1523,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = "^4.8.4" +datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/jobs/cache_maintenance/poetry.lock b/jobs/cache_maintenance/poetry.lock index 5b52445b8..9ea54a2d0 100644 --- a/jobs/cache_maintenance/poetry.lock +++ b/jobs/cache_maintenance/poetry.lock @@ -640,15 +640,13 @@ xml-validation = ["lxml (>=4,<6)"] [[package]] name = "datasets" -version = "4.8.4" +version = "4.8.5.dev0" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.10.0" groups = ["main"] -files = [ - {file = "datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d"}, - {file = "datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52"}, -] +files = [] +develop = false [package.dependencies] dill = ">=0.3.0,<0.4.2" @@ -682,6 +680,12 @@ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elastics torch = ["torch"] vision = ["Pillow (>=9.4.0)"] +[package.source] +type = "git" +url = "https://github.com/huggingface/datasets" +reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" + [[package]] name = "defusedxml" version = "0.7.1" @@ -1199,7 +1203,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = "^4.8.4" +datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/jobs/mongodb_migration/poetry.lock b/jobs/mongodb_migration/poetry.lock index f2896d001..71d83a7ec 100644 --- a/jobs/mongodb_migration/poetry.lock +++ b/jobs/mongodb_migration/poetry.lock @@ -640,15 +640,13 @@ xml-validation = ["lxml (>=4,<6)"] [[package]] name = "datasets" -version = "4.8.4" +version = "4.8.5.dev0" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.10.0" groups = ["main"] -files = [ - {file = "datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d"}, - {file = "datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52"}, -] +files = [] +develop = false [package.dependencies] dill = ">=0.3.0,<0.4.2" @@ -682,6 +680,12 @@ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elastics torch = ["torch"] vision = ["Pillow (>=9.4.0)"] +[package.source] +type = "git" +url = "https://github.com/huggingface/datasets" +reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" + [[package]] name = "defusedxml" version = "0.7.1" @@ -1199,7 +1203,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = "^4.8.4" +datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/libs/libapi/poetry.lock b/libs/libapi/poetry.lock index 9e6ceb49d..19f6dd39d 100644 --- a/libs/libapi/poetry.lock +++ b/libs/libapi/poetry.lock @@ -640,15 +640,13 @@ xml-validation = ["lxml (>=4,<6)"] [[package]] name = "datasets" -version = "4.8.4" +version = "4.8.5.dev0" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.10.0" groups = ["main"] -files = [ - {file = "datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d"}, - {file = "datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52"}, -] +files = [] +develop = false [package.dependencies] dill = ">=0.3.0,<0.4.2" @@ -682,6 +680,12 @@ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elastics torch = ["torch"] vision = ["Pillow (>=9.4.0)"] +[package.source] +type = "git" +url = "https://github.com/huggingface/datasets" +reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" + [[package]] name = "defusedxml" version = "0.7.1" @@ -1218,7 +1222,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = "^4.8.4" +datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/libs/libcommon/poetry.lock b/libs/libcommon/poetry.lock index 17aced77f..ad246d019 100644 --- a/libs/libcommon/poetry.lock +++ b/libs/libcommon/poetry.lock @@ -675,15 +675,13 @@ xml-validation = ["lxml (>=4,<6)"] [[package]] name = "datasets" -version = "4.8.4" +version = "4.8.5.dev0" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.10.0" groups = ["main"] -files = [ - {file = "datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d"}, - {file = "datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52"}, -] +files = [] +develop = false [package.dependencies] dill = ">=0.3.0,<0.4.2" @@ -717,6 +715,12 @@ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elastics torch = ["torch"] vision = ["Pillow (>=9.4.0)"] +[package.source] +type = "git" +url = "https://github.com/huggingface/datasets" +reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" + [[package]] name = "defusedxml" version = "0.7.1" @@ -4680,4 +4684,4 @@ propcache = ">=0.2.1" [metadata] lock-version = "2.1" python-versions = "3.12.11" -content-hash = "f5d6164ae9685e20255129423c595b708f9744b381dc0de71f9a48f7566db4a8" +content-hash = "c0fbc2a82c6b54dd37f1880b500857cdadbf97af9c110e15ce22c172d6b72d88" diff --git a/libs/libcommon/pyproject.toml b/libs/libcommon/pyproject.toml index 9d45b2608..b359d1f76 100644 --- a/libs/libcommon/pyproject.toml +++ b/libs/libcommon/pyproject.toml @@ -10,7 +10,7 @@ python = "3.12.11" anyio = ">=3.4.0,<5" appdirs = "^1.4.4" cryptography = "^43.0.1" -datasets = "^4.8.4" +datasets = { git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" } duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/services/admin/poetry.lock b/services/admin/poetry.lock index c083f03ec..17e08904b 100644 --- a/services/admin/poetry.lock +++ b/services/admin/poetry.lock @@ -640,15 +640,13 @@ xml-validation = ["lxml (>=4,<6)"] [[package]] name = "datasets" -version = "4.8.4" +version = "4.8.5.dev0" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.10.0" groups = ["main"] -files = [ - {file = "datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d"}, - {file = "datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52"}, -] +files = [] +develop = false [package.dependencies] dill = ">=0.3.0,<0.4.2" @@ -682,6 +680,12 @@ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elastics torch = ["torch"] vision = ["Pillow (>=9.4.0)"] +[package.source] +type = "git" +url = "https://github.com/huggingface/datasets" +reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" + [[package]] name = "defusedxml" version = "0.7.1" @@ -1222,7 +1226,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = "^4.8.4" +datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/services/api/poetry.lock b/services/api/poetry.lock index 1dd0dc85f..f6d8a4918 100644 --- a/services/api/poetry.lock +++ b/services/api/poetry.lock @@ -640,15 +640,13 @@ xml-validation = ["lxml (>=4,<6)"] [[package]] name = "datasets" -version = "4.8.4" +version = "4.8.5.dev0" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.10.0" groups = ["main"] -files = [ - {file = "datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d"}, - {file = "datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52"}, -] +files = [] +develop = false [package.dependencies] dill = ">=0.3.0,<0.4.2" @@ -682,6 +680,12 @@ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elastics torch = ["torch"] vision = ["Pillow (>=9.4.0)"] +[package.source] +type = "git" +url = "https://github.com/huggingface/datasets" +reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" + [[package]] name = "defusedxml" version = "0.7.1" @@ -1259,7 +1263,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = "^4.8.4" +datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/services/rows/poetry.lock b/services/rows/poetry.lock index f43997716..5ed1a85a0 100644 --- a/services/rows/poetry.lock +++ b/services/rows/poetry.lock @@ -661,15 +661,13 @@ xml-validation = ["lxml (>=4,<6)"] [[package]] name = "datasets" -version = "4.8.4" +version = "4.8.5.dev0" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.10.0" groups = ["main"] -files = [ - {file = "datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d"}, - {file = "datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52"}, -] +files = [] +develop = false [package.dependencies] dill = ">=0.3.0,<0.4.2" @@ -703,6 +701,12 @@ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elastics torch = ["torch"] vision = ["Pillow (>=9.4.0)"] +[package.source] +type = "git" +url = "https://github.com/huggingface/datasets" +reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" + [[package]] name = "defusedxml" version = "0.7.1" @@ -1280,7 +1284,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = "^4.8.4" +datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/services/search/poetry.lock b/services/search/poetry.lock index 13f980b34..a128138a8 100644 --- a/services/search/poetry.lock +++ b/services/search/poetry.lock @@ -640,15 +640,13 @@ xml-validation = ["lxml (>=4,<6)"] [[package]] name = "datasets" -version = "4.8.4" +version = "4.8.5.dev0" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.10.0" groups = ["main"] -files = [ - {file = "datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d"}, - {file = "datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52"}, -] +files = [] +develop = false [package.dependencies] dill = ">=0.3.0,<0.4.2" @@ -682,6 +680,12 @@ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elastics torch = ["torch"] vision = ["Pillow (>=9.4.0)"] +[package.source] +type = "git" +url = "https://github.com/huggingface/datasets" +reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" + [[package]] name = "defusedxml" version = "0.7.1" @@ -1259,7 +1263,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = "^4.8.4" +datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/services/sse-api/poetry.lock b/services/sse-api/poetry.lock index 0234b592e..f2e0e48d1 100644 --- a/services/sse-api/poetry.lock +++ b/services/sse-api/poetry.lock @@ -640,15 +640,13 @@ xml-validation = ["lxml (>=4,<6)"] [[package]] name = "datasets" -version = "4.8.4" +version = "4.8.5.dev0" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.10.0" groups = ["main"] -files = [ - {file = "datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d"}, - {file = "datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52"}, -] +files = [] +develop = false [package.dependencies] dill = ">=0.3.0,<0.4.2" @@ -682,6 +680,12 @@ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elastics torch = ["torch"] vision = ["Pillow (>=9.4.0)"] +[package.source] +type = "git" +url = "https://github.com/huggingface/datasets" +reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" + [[package]] name = "defusedxml" version = "0.7.1" @@ -1290,7 +1294,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = "^4.8.4" +datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/services/webhook/poetry.lock b/services/webhook/poetry.lock index bef00b2f2..b70f9b7ba 100644 --- a/services/webhook/poetry.lock +++ b/services/webhook/poetry.lock @@ -640,15 +640,13 @@ xml-validation = ["lxml (>=4,<6)"] [[package]] name = "datasets" -version = "4.8.4" +version = "4.8.5.dev0" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.10.0" groups = ["main"] -files = [ - {file = "datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d"}, - {file = "datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52"}, -] +files = [] +develop = false [package.dependencies] dill = ">=0.3.0,<0.4.2" @@ -682,6 +680,12 @@ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elastics torch = ["torch"] vision = ["Pillow (>=9.4.0)"] +[package.source] +type = "git" +url = "https://github.com/huggingface/datasets" +reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" + [[package]] name = "defusedxml" version = "0.7.1" @@ -1259,7 +1263,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = "^4.8.4" +datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/services/worker/poetry.lock b/services/worker/poetry.lock index 67bf5aa35..17786e2a4 100644 --- a/services/worker/poetry.lock +++ b/services/worker/poetry.lock @@ -983,15 +983,13 @@ files = [ [[package]] name = "datasets" -version = "4.8.4" +version = "4.8.5.dev0" description = "HuggingFace community-driven open-source library of datasets" optional = false python-versions = ">=3.10.0" groups = ["main"] -files = [ - {file = "datasets-4.8.4-py3-none-any.whl", hash = "sha256:cdc8bee4698e549d78bf1fed6aea2eebc760b22b084f07e6fc020c6577a6ce6d"}, - {file = "datasets-4.8.4.tar.gz", hash = "sha256:a1429ed853275ce7943a01c6d2e25475b4501eb758934362106a280470df3a52"}, -] +files = [] +develop = false [package.dependencies] dill = ">=0.3.0,<0.4.2" @@ -1025,6 +1023,12 @@ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "aiohttp", "decorator", "elastics torch = ["torch"] vision = ["Pillow (>=9.4.0)"] +[package.source] +type = "git" +url = "https://github.com/huggingface/datasets" +reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" + [[package]] name = "defusedxml" version = "0.7.1" @@ -1696,7 +1700,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = "^4.8.4" +datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" From cc986a6a779e168af32916b4444ef3ddddac6a4f Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Wed, 1 Apr 2026 18:15:53 +0100 Subject: [PATCH 2/3] add agent-traces format --- .gitignore | 1 + services/worker/src/worker/dtos.py | 12 +++++++++++- .../job_runners/dataset/compatible_libraries.py | 12 +++++++++++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 0eaf92e66..2648020ca 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ # local configurations .idea .vscode/*.log +settings.json # tests coverage .coverage diff --git a/services/worker/src/worker/dtos.py b/services/worker/src/worker/dtos.py index 3bf643158..9d89a31c7 100644 --- a/services/worker/src/worker/dtos.py +++ b/services/worker/src/worker/dtos.py @@ -256,7 +256,17 @@ class IsValidResponse(TypedDict): DatasetLibrary = Literal["mlcroissant", "webdataset", "datasets", "pandas", "dask", "polars", "lance"] DatasetFormat = Literal[ - "json", "csv", "parquet", "imagefolder", "audiofolder", "webdataset", "text", "arrow", "optimized-parquet", "lance" + "json", + "csv", + "parquet", + "imagefolder", + "audiofolder", + "webdataset", + "text", + "arrow", + "optimized-parquet", + "lance", + "agent-traces", ] ProgrammingLanguage = Literal["python"] diff --git a/services/worker/src/worker/job_runners/dataset/compatible_libraries.py b/services/worker/src/worker/job_runners/dataset/compatible_libraries.py index 5214be28b..6440d92ae 100644 --- a/services/worker/src/worker/job_runners/dataset/compatible_libraries.py +++ b/services/worker/src/worker/job_runners/dataset/compatible_libraries.py @@ -11,7 +11,7 @@ import datasets.data_files import pyarrow.parquet as pq import yaml -from datasets import BuilderConfig, DownloadConfig +from datasets import BuilderConfig, DownloadConfig, Features from datasets.data_files import ( NON_WORDS_CHARS, DataFilesDict, @@ -29,6 +29,7 @@ _MODULE_TO_METADATA_FILE_NAMES, _PACKAGED_DATASETS_MODULES, ) +from datasets.packaged_modules.json.json import AGENT_TRACES_FEATURES from datasets.utils.metadata import MetadataConfigs from huggingface_hub import DatasetCard, DatasetCardData, HfFileSystem from libcommon.constants import LOADING_METHODS_MAX_CONFIGS @@ -889,6 +890,15 @@ def compute_compatible_libraries_response( ): formats.append("optimized-parquet") + # Agent Traces + if "json" in formats: + if infos: + for info in infos: + if "features" in info and isinstance(info["features"], dict): + if Features.from_dict(info["features"]) == AGENT_TRACES_FEATURES: + formats.append("agent-traces") + break + return DatasetCompatibleLibrariesResponse(libraries=libraries, formats=formats) From 724aafc40c0da08423b33baeb8e1e74db9eaa6f2 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest Date: Wed, 1 Apr 2026 18:32:38 +0100 Subject: [PATCH 3/3] bump again to have file_name too --- front/admin_ui/poetry.lock | 6 +++--- jobs/cache_maintenance/poetry.lock | 6 +++--- jobs/mongodb_migration/poetry.lock | 6 +++--- libs/libapi/poetry.lock | 6 +++--- libs/libcommon/poetry.lock | 6 +++--- libs/libcommon/pyproject.toml | 2 +- services/admin/poetry.lock | 6 +++--- services/api/poetry.lock | 6 +++--- services/rows/poetry.lock | 6 +++--- services/search/poetry.lock | 6 +++--- services/sse-api/poetry.lock | 6 +++--- services/webhook/poetry.lock | 6 +++--- services/worker/poetry.lock | 6 +++--- 13 files changed, 37 insertions(+), 37 deletions(-) diff --git a/front/admin_ui/poetry.lock b/front/admin_ui/poetry.lock index a0de307c0..8c1302705 100644 --- a/front/admin_ui/poetry.lock +++ b/front/admin_ui/poetry.lock @@ -715,8 +715,8 @@ vision = ["Pillow (>=9.4.0)"] [package.source] type = "git" url = "https://github.com/huggingface/datasets" -reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" -resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" +resolved_reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" [[package]] name = "dill" @@ -1523,7 +1523,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} +datasets = {git = "https://github.com/huggingface/datasets", rev = "1cacbe629f208d7ace2c92db91a67b0060e0e07b"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/jobs/cache_maintenance/poetry.lock b/jobs/cache_maintenance/poetry.lock index 9ea54a2d0..18af8e20c 100644 --- a/jobs/cache_maintenance/poetry.lock +++ b/jobs/cache_maintenance/poetry.lock @@ -683,8 +683,8 @@ vision = ["Pillow (>=9.4.0)"] [package.source] type = "git" url = "https://github.com/huggingface/datasets" -reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" -resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" +resolved_reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" [[package]] name = "defusedxml" @@ -1203,7 +1203,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} +datasets = {git = "https://github.com/huggingface/datasets", rev = "1cacbe629f208d7ace2c92db91a67b0060e0e07b"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/jobs/mongodb_migration/poetry.lock b/jobs/mongodb_migration/poetry.lock index 71d83a7ec..0ca39a5f2 100644 --- a/jobs/mongodb_migration/poetry.lock +++ b/jobs/mongodb_migration/poetry.lock @@ -683,8 +683,8 @@ vision = ["Pillow (>=9.4.0)"] [package.source] type = "git" url = "https://github.com/huggingface/datasets" -reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" -resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" +resolved_reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" [[package]] name = "defusedxml" @@ -1203,7 +1203,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} +datasets = {git = "https://github.com/huggingface/datasets", rev = "1cacbe629f208d7ace2c92db91a67b0060e0e07b"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/libs/libapi/poetry.lock b/libs/libapi/poetry.lock index 19f6dd39d..3104a67e2 100644 --- a/libs/libapi/poetry.lock +++ b/libs/libapi/poetry.lock @@ -683,8 +683,8 @@ vision = ["Pillow (>=9.4.0)"] [package.source] type = "git" url = "https://github.com/huggingface/datasets" -reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" -resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" +resolved_reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" [[package]] name = "defusedxml" @@ -1222,7 +1222,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} +datasets = {git = "https://github.com/huggingface/datasets", rev = "1cacbe629f208d7ace2c92db91a67b0060e0e07b"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/libs/libcommon/poetry.lock b/libs/libcommon/poetry.lock index ad246d019..fa96a229b 100644 --- a/libs/libcommon/poetry.lock +++ b/libs/libcommon/poetry.lock @@ -718,8 +718,8 @@ vision = ["Pillow (>=9.4.0)"] [package.source] type = "git" url = "https://github.com/huggingface/datasets" -reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" -resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" +resolved_reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" [[package]] name = "defusedxml" @@ -4684,4 +4684,4 @@ propcache = ">=0.2.1" [metadata] lock-version = "2.1" python-versions = "3.12.11" -content-hash = "c0fbc2a82c6b54dd37f1880b500857cdadbf97af9c110e15ce22c172d6b72d88" +content-hash = "f0fa8746dd596b0b1f3b4b79c89011a5475cf36617e0be32e11ba4ad7559223f" diff --git a/libs/libcommon/pyproject.toml b/libs/libcommon/pyproject.toml index b359d1f76..bb3249f7f 100644 --- a/libs/libcommon/pyproject.toml +++ b/libs/libcommon/pyproject.toml @@ -10,7 +10,7 @@ python = "3.12.11" anyio = ">=3.4.0,<5" appdirs = "^1.4.4" cryptography = "^43.0.1" -datasets = { git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" } +datasets = { git = "https://github.com/huggingface/datasets", rev = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" } duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/services/admin/poetry.lock b/services/admin/poetry.lock index 17e08904b..2b217d3ea 100644 --- a/services/admin/poetry.lock +++ b/services/admin/poetry.lock @@ -683,8 +683,8 @@ vision = ["Pillow (>=9.4.0)"] [package.source] type = "git" url = "https://github.com/huggingface/datasets" -reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" -resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" +resolved_reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" [[package]] name = "defusedxml" @@ -1226,7 +1226,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} +datasets = {git = "https://github.com/huggingface/datasets", rev = "1cacbe629f208d7ace2c92db91a67b0060e0e07b"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/services/api/poetry.lock b/services/api/poetry.lock index f6d8a4918..39a1504ab 100644 --- a/services/api/poetry.lock +++ b/services/api/poetry.lock @@ -683,8 +683,8 @@ vision = ["Pillow (>=9.4.0)"] [package.source] type = "git" url = "https://github.com/huggingface/datasets" -reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" -resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" +resolved_reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" [[package]] name = "defusedxml" @@ -1263,7 +1263,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} +datasets = {git = "https://github.com/huggingface/datasets", rev = "1cacbe629f208d7ace2c92db91a67b0060e0e07b"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/services/rows/poetry.lock b/services/rows/poetry.lock index 5ed1a85a0..d60eaea7b 100644 --- a/services/rows/poetry.lock +++ b/services/rows/poetry.lock @@ -704,8 +704,8 @@ vision = ["Pillow (>=9.4.0)"] [package.source] type = "git" url = "https://github.com/huggingface/datasets" -reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" -resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" +resolved_reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" [[package]] name = "defusedxml" @@ -1284,7 +1284,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} +datasets = {git = "https://github.com/huggingface/datasets", rev = "1cacbe629f208d7ace2c92db91a67b0060e0e07b"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/services/search/poetry.lock b/services/search/poetry.lock index a128138a8..100c02d36 100644 --- a/services/search/poetry.lock +++ b/services/search/poetry.lock @@ -683,8 +683,8 @@ vision = ["Pillow (>=9.4.0)"] [package.source] type = "git" url = "https://github.com/huggingface/datasets" -reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" -resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" +resolved_reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" [[package]] name = "defusedxml" @@ -1263,7 +1263,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} +datasets = {git = "https://github.com/huggingface/datasets", rev = "1cacbe629f208d7ace2c92db91a67b0060e0e07b"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/services/sse-api/poetry.lock b/services/sse-api/poetry.lock index f2e0e48d1..8d6504a2b 100644 --- a/services/sse-api/poetry.lock +++ b/services/sse-api/poetry.lock @@ -683,8 +683,8 @@ vision = ["Pillow (>=9.4.0)"] [package.source] type = "git" url = "https://github.com/huggingface/datasets" -reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" -resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" +resolved_reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" [[package]] name = "defusedxml" @@ -1294,7 +1294,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} +datasets = {git = "https://github.com/huggingface/datasets", rev = "1cacbe629f208d7ace2c92db91a67b0060e0e07b"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/services/webhook/poetry.lock b/services/webhook/poetry.lock index b70f9b7ba..a35b511e2 100644 --- a/services/webhook/poetry.lock +++ b/services/webhook/poetry.lock @@ -683,8 +683,8 @@ vision = ["Pillow (>=9.4.0)"] [package.source] type = "git" url = "https://github.com/huggingface/datasets" -reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" -resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" +resolved_reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" [[package]] name = "defusedxml" @@ -1263,7 +1263,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} +datasets = {git = "https://github.com/huggingface/datasets", rev = "1cacbe629f208d7ace2c92db91a67b0060e0e07b"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0" diff --git a/services/worker/poetry.lock b/services/worker/poetry.lock index 17786e2a4..a4929a44a 100644 --- a/services/worker/poetry.lock +++ b/services/worker/poetry.lock @@ -1026,8 +1026,8 @@ vision = ["Pillow (>=9.4.0)"] [package.source] type = "git" url = "https://github.com/huggingface/datasets" -reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" -resolved_reference = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698" +reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" +resolved_reference = "1cacbe629f208d7ace2c92db91a67b0060e0e07b" [[package]] name = "defusedxml" @@ -1700,7 +1700,7 @@ anyio = ">=3.4.0,<5" appdirs = "^1.4.4" async-lru = "^2.0.5" cryptography = "^43.0.1" -datasets = {git = "https://github.com/huggingface/datasets", rev = "2887fb6f4ffbd1e195ccd32050ae7563d6d26698"} +datasets = {git = "https://github.com/huggingface/datasets", rev = "1cacbe629f208d7ace2c92db91a67b0060e0e07b"} duckdb = "^1.2.2" environs = "^14.3.0" filelock = "^3.18.0"