|
6 | 6 | from kili.core.graphql.operations.asset.mutations import GQL_APPEND_MANY_ASSETS |
7 | 7 | from kili.domain.project import ProjectId |
8 | 8 | from kili.services.asset_import import import_assets |
9 | | -from kili.services.asset_import.exceptions import MimeTypeError |
| 9 | +from kili.services.asset_import.exceptions import ImportValidationError |
10 | 10 | from tests.unit.services.asset_import.base import ImportTestCase |
11 | 11 | from tests.unit.services.asset_import.mocks import ( |
12 | 12 | mocked_request_signed_urls, |
|
20 | 20 | @patch("kili.utils.bucket.upload_data_via_rest", mocked_upload_data_via_rest) |
21 | 21 | class TestContentType(ImportTestCase): |
22 | 22 | def test_cannot_upload_an_image_to_video_project(self, *_): |
23 | | - self.kili.kili_api_gateway.get_project.return_value = {"inputType": "VIDEO_LEGACY"} |
| 23 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "VIDEO"} |
24 | 24 | url = "https://storage.googleapis.com/label-public-staging/car/car_1.jpg" |
25 | 25 | path_image = self.downloader(url) |
26 | 26 | assets = [{"content": path_image, "external_id": "image"}] |
27 | | - with pytest.raises(MimeTypeError): |
| 27 | + # Extension check runs before MIME type check, so ImportValidationError is raised first |
| 28 | + with pytest.raises(ImportValidationError): |
28 | 29 | import_assets(self.kili, ProjectId(self.project_id), assets, disable_tqdm=True) |
29 | 30 |
|
30 | 31 | def test_cannot_import_files_not_found_to_an_image_project(self, *_): |
@@ -84,3 +85,184 @@ def test_import_assets_verify(self, mocked_verify_batch_imported, *_): |
84 | 85 | mocked_verify_batch_imported.assert_not_called() |
85 | 86 | import_assets(self.kili, ProjectId("project_id"), assets, verify=True) |
86 | 87 | mocked_verify_batch_imported.assert_called_once() |
| 88 | + |
| 89 | + |
| 90 | +@patch("kili.utils.bucket.generate_unique_id", mocked_unique_id) |
| 91 | +@patch("kili.utils.bucket.request_signed_urls", mocked_request_signed_urls) |
| 92 | +@patch("kili.utils.bucket.upload_data_via_rest", mocked_upload_data_via_rest) |
| 93 | +class TestFileExtensionValidation(ImportTestCase): |
| 94 | + """Tests that the import service validates file extensions before uploading.""" |
| 95 | + |
| 96 | + # --- IMAGE --- |
| 97 | + def test_image_project_rejects_video_extension(self, *_): |
| 98 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "IMAGE"} |
| 99 | + assets = [{"content": "https://example.com/clip.mp4", "external_id": "wrong"}] |
| 100 | + with pytest.raises(ImportValidationError, match=r"\.mp4"): |
| 101 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 102 | + |
| 103 | + def test_image_project_rejects_audio_extension(self, *_): |
| 104 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "IMAGE"} |
| 105 | + assets = [{"content": "https://example.com/sound.mp3", "external_id": "wrong"}] |
| 106 | + with pytest.raises(ImportValidationError, match=r"\.mp3"): |
| 107 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 108 | + |
| 109 | + def test_image_project_accepts_jpg_extension(self, *_): |
| 110 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "IMAGE"} |
| 111 | + assets = [{"content": "https://example.com/image.jpg", "external_id": "ok", "id": "uid"}] |
| 112 | + # Should not raise ImportValidationError |
| 113 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 114 | + |
| 115 | + def test_image_project_accepts_tif_extension(self, *_): |
| 116 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "IMAGE"} |
| 117 | + assets = [{"content": "https://example.com/geo.tif", "external_id": "ok", "id": "uid"}] |
| 118 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 119 | + |
| 120 | + # --- VIDEO --- |
| 121 | + def test_video_project_rejects_image_extension(self, *_): |
| 122 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "VIDEO"} |
| 123 | + assets = [{"content": "https://example.com/photo.jpg", "external_id": "wrong"}] |
| 124 | + with pytest.raises(ImportValidationError, match=r"\.jpg"): |
| 125 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 126 | + |
| 127 | + def test_video_project_rejects_pdf_extension(self, *_): |
| 128 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "VIDEO"} |
| 129 | + assets = [{"content": "https://example.com/doc.pdf", "external_id": "wrong"}] |
| 130 | + with pytest.raises(ImportValidationError, match=r"\.pdf"): |
| 131 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 132 | + |
| 133 | + def test_video_project_accepts_mp4_extension(self, *_): |
| 134 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "VIDEO"} |
| 135 | + assets = [{"content": "https://example.com/vid.mp4", "external_id": "ok"}] |
| 136 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 137 | + |
| 138 | + def test_video_project_accepts_mkv_extension(self, *_): |
| 139 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "VIDEO"} |
| 140 | + assets = [{"content": "https://example.com/vid.mkv", "external_id": "ok"}] |
| 141 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 142 | + |
| 143 | + # --- AUDIO --- |
| 144 | + def test_audio_project_rejects_image_extension(self, *_): |
| 145 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "AUDIO"} |
| 146 | + assets = [{"content": "https://example.com/photo.jpg", "external_id": "wrong"}] |
| 147 | + with pytest.raises(ImportValidationError, match=r"\.jpg"): |
| 148 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 149 | + |
| 150 | + def test_audio_project_rejects_pdf_extension(self, *_): |
| 151 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "AUDIO"} |
| 152 | + assets = [{"content": "https://example.com/doc.pdf", "external_id": "wrong"}] |
| 153 | + with pytest.raises(ImportValidationError, match=r"\.pdf"): |
| 154 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 155 | + |
| 156 | + def test_audio_project_accepts_mp3_extension(self, *_): |
| 157 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "AUDIO"} |
| 158 | + assets = [{"content": "https://example.com/audio.mp3", "external_id": "ok"}] |
| 159 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 160 | + |
| 161 | + def test_audio_project_accepts_wav_extension(self, *_): |
| 162 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "AUDIO"} |
| 163 | + assets = [{"content": "https://example.com/audio.wav", "external_id": "ok"}] |
| 164 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 165 | + |
| 166 | + # --- PDF --- |
| 167 | + def test_pdf_project_rejects_image_extension(self, *_): |
| 168 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "PDF"} |
| 169 | + assets = [{"content": "https://example.com/photo.jpg", "external_id": "wrong"}] |
| 170 | + with pytest.raises(ImportValidationError, match=r"\.jpg"): |
| 171 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 172 | + |
| 173 | + def test_pdf_project_rejects_video_extension(self, *_): |
| 174 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "PDF"} |
| 175 | + assets = [{"content": "https://example.com/clip.mp4", "external_id": "wrong"}] |
| 176 | + with pytest.raises(ImportValidationError, match=r"\.mp4"): |
| 177 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 178 | + |
| 179 | + def test_pdf_project_accepts_pdf_extension(self, *_): |
| 180 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "PDF"} |
| 181 | + assets = [{"content": "https://example.com/doc.pdf", "external_id": "ok"}] |
| 182 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 183 | + |
| 184 | + # --- GEOSPATIAL --- |
| 185 | + def test_geospatial_project_rejects_image_extension(self, *_): |
| 186 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "GEOSPATIAL"} |
| 187 | + assets = [{"content": "https://example.com/photo.jpg", "external_id": "wrong"}] |
| 188 | + with pytest.raises(ImportValidationError, match=r"\.jpg"): |
| 189 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 190 | + |
| 191 | + def test_geospatial_project_accepts_tif_extension(self, *_): |
| 192 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "GEOSPATIAL"} |
| 193 | + assets = [{"content": "https://example.com/geo.tif", "external_id": "ok"}] |
| 194 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 195 | + |
| 196 | + def test_geospatial_project_accepts_jp2_extension(self, *_): |
| 197 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "GEOSPATIAL"} |
| 198 | + assets = [{"content": "https://example.com/geo.jp2", "external_id": "ok"}] |
| 199 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 200 | + |
| 201 | + def test_geospatial_project_rejects_wrong_extension_in_multi_layer(self, *_): |
| 202 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "GEOSPATIAL"} |
| 203 | + assets = [ |
| 204 | + { |
| 205 | + "multi_layer_content": [ |
| 206 | + {"path": "/local/layer.jpg", "name": "layer1"}, |
| 207 | + ], |
| 208 | + "external_id": "wrong_multi_layer", |
| 209 | + } |
| 210 | + ] |
| 211 | + with pytest.raises(ImportValidationError, match=r"\.jpg"): |
| 212 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 213 | + |
| 214 | + def test_geospatial_project_accepts_correct_extension_in_multi_layer(self, *_): |
| 215 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "GEOSPATIAL"} |
| 216 | + assets = [ |
| 217 | + { |
| 218 | + "multi_layer_content": [ |
| 219 | + {"path": "/local/layer.tif", "name": "layer1"}, |
| 220 | + {"path": "/local/layer2.tiff", "name": "layer2"}, |
| 221 | + ], |
| 222 | + "external_id": "ok_multi_layer", |
| 223 | + } |
| 224 | + ] |
| 225 | + # Should not raise ImportValidationError (may raise later on file access) |
| 226 | + with pytest.raises(Exception) as exc_info: |
| 227 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 228 | + assert not isinstance(exc_info.value, ImportValidationError) |
| 229 | + |
| 230 | + # --- TEXT --- |
| 231 | + def test_text_project_rejects_video_extension(self, *_): |
| 232 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "TEXT"} |
| 233 | + assets = [{"content": "https://example.com/clip.mp4", "external_id": "wrong"}] |
| 234 | + with pytest.raises(ImportValidationError, match=r"\.mp4"): |
| 235 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 236 | + |
| 237 | + def test_text_project_accepts_txt_extension(self, *_): |
| 238 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "TEXT"} |
| 239 | + assets = [{"content": "https://example.com/file.txt", "external_id": "ok"}] |
| 240 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 241 | + |
| 242 | + def test_text_project_skips_validation_for_raw_text(self, *_): |
| 243 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "TEXT"} |
| 244 | + assets = [{"content": "this is raw text with no extension", "external_id": "ok"}] |
| 245 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 246 | + |
| 247 | + # --- LLM_RLHF --- |
| 248 | + def test_llm_project_rejects_non_json_extension(self, *_): |
| 249 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "LLM_RLHF"} |
| 250 | + assets = [{"content": "https://example.com/data.txt", "external_id": "wrong"}] |
| 251 | + with pytest.raises(ImportValidationError, match=r"\.txt"): |
| 252 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 253 | + |
| 254 | + def test_llm_project_accepts_json_extension(self, *_): |
| 255 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "LLM_RLHF"} |
| 256 | + assets = [{"content": "https://example.com/data.json", "external_id": "ok"}] |
| 257 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 258 | + |
| 259 | + # --- no extension in URL skips validation --- |
| 260 | + def test_extensionless_url_skips_validation_for_image_project(self, *_): |
| 261 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "IMAGE"} |
| 262 | + assets = [{"content": "https://example.com/no-ext", "external_id": "ok", "id": "uid"}] |
| 263 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
| 264 | + |
| 265 | + def test_extensionless_url_skips_validation_for_video_project(self, *_): |
| 266 | + self.kili.kili_api_gateway.get_project.return_value = {"inputType": "VIDEO"} |
| 267 | + assets = [{"content": "https://hosted-data", "external_id": "ok"}] |
| 268 | + import_assets(self.kili, ProjectId(self.project_id), assets) |
0 commit comments