mitodl
diff --git a/‎frontends/api/src/generated/v0/api.ts‎
Lines changed: 6 additions & 0 deletions b/‎frontends/api/src/generated/v0/api.ts‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎frontends/api/src/generated/v1/api.ts‎
Lines changed: 6 additions & 0 deletions b/‎frontends/api/src/generated/v1/api.ts‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎frontends/main/src/page-components/LearningResourceExpanded/CallToActionSection.tsx‎
Lines changed: 13 additions & 6 deletions b/‎frontends/main/src/page-components/LearningResourceExpanded/CallToActionSection.tsx‎
Lines changed: 13 additions & 6 deletions
diff --git a/‎learning_resources/etl/ocw.py‎
Lines changed: 87 additions & 3 deletions b/‎learning_resources/etl/ocw.py‎
Lines changed: 87 additions & 3 deletions
diff --git a/‎learning_resources/etl/ocw_test.py‎
Lines changed: 8 additions & 18 deletions b/‎learning_resources/etl/ocw_test.py‎
Lines changed: 8 additions & 18 deletions
diff --git a/‎learning_resources/etl/pipelines_test.py‎
Lines changed: 1 addition & 1 deletion b/‎learning_resources/etl/pipelines_test.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎learning_resources/migrations/0112_contentfile_youtube_id.py‎
Lines changed: 17 additions & 0 deletions b/‎learning_resources/migrations/0112_contentfile_youtube_id.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎learning_resources/models.py‎
Lines changed: 1 addition & 0 deletions b/‎learning_resources/models.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎learning_resources/serializers.py‎
Lines changed: 1 addition & 0 deletions b/‎learning_resources/serializers.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎learning_resources/serializers_test.py‎
Lines changed: 2 additions & 0 deletions b/‎learning_resources/serializers_test.py‎
Lines changed: 2 additions & 0 deletions
@@ -210,13 +210,20 @@ const ImageSection: React.FC<{
   config: ImageConfig
 }> = ({ resource, config }) => {
   const aspect = config.width / config.height
-  if (
-    resource?.resource_type === "video" &&
-    resource?.url &&
-    resource?.platform?.code === PlatformEnum.Youtube
-  ) {
+  const youtubeId =
+    resource?.resource_type === "video"
+      ? resource.content_files?.[0]?.youtube_id
+      : null
+  const youtubeUrl = youtubeId
+    ? `https://www.youtube.com/watch?v=${youtubeId}`
+    : resource?.resource_type === "video" &&
+        resource?.platform?.code === PlatformEnum.Youtube
+      ? resource.url
+      : null
+
+  if (resource && youtubeUrl) {
     return (
-      <VideoFrame src={resource.url} title={resource.title} aspect={aspect} />
+      <VideoFrame src={youtubeUrl} title={resource.title} aspect={aspect} />
     )
   } else if (resource) {
     const imageUrl =
 
@@ -139,9 +139,15 @@ def transform_content_files(
         if obj.key.endswith("data.json"):
             try:
                 resource_json = safe_load_json(get_s3_object_and_read(obj), obj.key)
-                transformed_resource = transform_contentfile(
-                    obj.key, resource_json, s3_resource, force_overwrite
-                )
+
+                if resource_json.get("resourcetype"):
+                    transformed_resource = transform_contentfile(
+                        obj.key, resource_json, s3_resource, force_overwrite
+                    )
+                else:
+                    transformed_resource = transform_contentfile_legacy(
+                        obj.key, resource_json, s3_resource, force_overwrite
+                    )
                 if transformed_resource:
                     yield transformed_resource
 
@@ -171,7 +177,9 @@ def transform_page(s3_key: str, page_data: dict) -> dict:
         "title": page_data.get("title"),
         "content_title": page_data.get("title"),
         "content": page_data.get("content"),
+        "content_tags": page_data.get("learning_resource_types"),
         "key": s3_path,
+        "description": page_data.get("description"),
         "published": True,
     }
 
@@ -231,6 +239,82 @@ def transform_contentfile(
     contentfile_data: dict,
     s3_resource: boto3.resource,
     force_overwrite: bool,  # noqa: FBT001
+) -> dict:
+    """
+    Transform the data from data.json (new format) for a content file
+
+    Args:
+        s3_key (str): S3 path for the data.json file
+        contentfile_data (dict): JSON data from data.json
+        s3_resource (boto3.resource): The S3 resource
+        force_overwrite (bool): Overwrite document text if true
+
+    Returns:
+        dict: transformed content file data
+    """
+    s3_path = s3_key.split("data.json", maxsplit=1)[0]
+    s3_path = urlparse(s3_path).path.lstrip("/")
+
+    file_type = contentfile_data.get("file_type")
+    video_files = contentfile_data.get("video_files", {})
+
+    if contentfile_data.get("resourcetype") == "Video":
+        content_type = CONTENT_TYPE_VIDEO
+        youtube_id = contentfile_data.get("video_metadata", {}).get("youtube_id")
+        if youtube_id:
+            image_src = f"https://i.ytimg.com/vi/{youtube_id}/hqdefault.jpg"
+        else:
+            image_src = None
+        file_s3_path = video_files.get("video_transcript_file")
+        file_extension = Path(
+            contentfile_data.get("file") or video_files.get("archive_url") or ""
+        ).suffix
+    else:
+        content_type = get_content_type(file_type)
+        file_s3_path = contentfile_data.get("file") or ""
+        image_src = None
+        file_extension = Path(file_s3_path).suffix
+        youtube_id = None
+
+    title = contentfile_data.get("title")
+
+    if title in ("3play caption file", "3play pdf file") or not file_s3_path:
+        return None
+
+    contentfile_data = {
+        "description": clean_data(contentfile_data.get("content")),
+        "file_type": file_type,
+        "content_type": content_type,
+        "url": urljoin(settings.OCW_BASE_URL, urlparse(s3_path).path.lstrip("/")),
+        "title": title,
+        "content_title": title,
+        "key": s3_path,
+        "content_tags": contentfile_data.get("learning_resource_types"),
+        "published": True,
+        "file_extension": file_extension,
+    }
+
+    if not file_s3_path.startswith("courses"):
+        file_s3_path = "courses" + file_s3_path.split("courses")[1]
+
+    content_json = get_file_content(s3_path, file_s3_path, s3_resource, force_overwrite)
+    if content_json:
+        contentfile_data["content"] = content_json.get("content")
+
+    if image_src:
+        contentfile_data["image_src"] = image_src
+
+    if youtube_id:
+        contentfile_data["youtube_id"] = youtube_id
+
+    return contentfile_data
+
+
+def transform_contentfile_legacy(
+    s3_key: str,
+    contentfile_data: dict,
+    s3_resource: boto3.resource,
+    force_overwrite: bool,  # noqa: FBT001
 ) -> dict:
     """
     Transform the data from data.json for a content file
 
@@ -58,11 +58,13 @@ def test_transform_content_files(settings, mocker, base_ocw_url):
         transform_content_files(s3_resource, OCW_TEST_PREFIX, False)  # noqa: FBT003
     )
 
-    assert len(content_data) == 5
+    assert len(content_data) == 4
 
     assert content_data[0] == {
         "content": "Pages Section",
+        "content_tags": [],
         "content_type": "page",
+        "description": "Description of Pages",
         "key": (
             "courses/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/pages/"
         ),
@@ -74,7 +76,9 @@ def test_transform_content_files(settings, mocker, base_ocw_url):
 
     assert content_data[1] == {
         "content": "Course Meeting Times Lecture",
+        "content_tags": [],
         "content_type": "page",
+        "description": "Description of Syllabus",
         "key": "courses/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/pages/syllabus/",
         "published": True,
         "title": "Syllabus",
@@ -110,23 +114,9 @@ def test_transform_content_files(settings, mocker, base_ocw_url):
         "title": None,
         "content_title": None,
         "url": f"{ocw_url}/courses/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/resources/video/",
-        "image_src": "https://img.youtube.com/vi/vKer2U5W5-s/default.jpg",
-        "file_extension": ".mp4",
-    }
-
-    assert content_data[4] == {
-        "content": "TEXT",
-        "content_type": "video",
-        "description": "Video Description, no file",
-        "file_type": "video/mp4",
-        "key": "courses/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/resources/video_no_file/",
-        "content_tags": ["Old Videos"],
-        "published": True,
-        "title": None,
-        "content_title": None,
-        "url": f"{ocw_url}/courses/16-01-unified-engineering-i-ii-iii-iv-fall-2005-spring-2006/resources/video_no_file/",
-        "image_src": "https://img.youtube.com/vi/vKer2U5W5-s/default.jpg",
+        "image_src": "https://i.ytimg.com/vi/vKer2U5W5-s/hqdefault.jpg",
         "file_extension": ".mp4",
+        "youtube_id": "vKer2U5W5-s",
     }
 
 
@@ -146,7 +136,7 @@ def test_transform_content_files_exceptions(settings, mocker):
         transform_content_files(s3_resource, OCW_TEST_PREFIX, False)  # noqa: FBT003
     )
     assert len(content_data) == 0
-    assert mock_log.call_count == 7
+    assert mock_log.call_count == 5
 
 
 @mock_aws
 
@@ -270,7 +270,7 @@ def test_ocw_courses_etl(
     run = resource.runs.first()
     assert run.instructors.count() == 10
     assert run.run_id == "97db384ef34009a64df7cb86cf701979"
-    assert run.content_files.count() == (0 if skip_content_files else 5)
+    assert run.content_files.count() == (0 if skip_content_files else 4)
     assert mock_cf_actions.call_count == (0 if skip_content_files else 1)
     assert mock_calc_score.call_count == (0 if skip_content_files else 1)
 
 
@@ -0,0 +1,17 @@
+# Generated by Django 4.2.29 on 2026-04-14 19:47
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("learning_resources", "0111_add_edx_module_id_idx"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="contentfile",
+            name="youtube_id",
+            field=models.CharField(blank=True, max_length=32, null=True),
+        ),
+    ]
@@ -1199,6 +1199,7 @@ class ContentFile(TimestampedModel):
     summary = models.TextField(blank=True, default="")
     flashcards = models.JSONField(blank=True, default=list)
     duration = models.CharField(max_length=11, null=True, blank=True)  # noqa: DJ001
+    youtube_id = models.CharField(max_length=32, null=True, blank=True)  # noqa: DJ001
 
     def save(self, **kwargs):
         self.checksum = checksum_for_content(self.content)
 
@@ -1449,6 +1449,7 @@ class Meta:
             "edx_module_id",
             "summary",
             "flashcards",
+            "youtube_id",
         ]
 
 
 
@@ -613,6 +613,7 @@ def test_content_file_serializer(settings, expected_types, has_channels):
         "content_language": "en",
         "content_title": "test title",
         "edx_module_id": "edx_module_id",
+        "youtube_id": "youtube_id",
     }
     platform = PlatformType.ocw.name
     course = factories.CourseFactory.create(platform=platform)
@@ -724,6 +725,7 @@ def test_content_file_serializer(settings, expected_types, has_channels):
             "summary": content_file.summary,
             "flashcards": content_file.flashcards,
             "direct_learning_resource_id": direct_learning_resource.id,
+            "youtube_id": content_file.youtube_id,
         },
     )
Original file line number	Diff line number	Diff line change
`@@ -1449,6 +1449,7 @@ class Meta:`
`1449`	`1449`	`"edx_module_id",`
`1450`	`1450`	`"summary",`
`1451`	`1451`	`"flashcards",`
	`1452`	`+ "youtube_id",`
`1452`	`1453`	`]`
`1453`	`1454`
`1454`	`1455`
Original file line number	Diff line number	Diff line change
`@@ -613,6 +613,7 @@ def test_content_file_serializer(settings, expected_types, has_channels):`
`613`	`613`	`"content_language": "en",`
`614`	`614`	`"content_title": "test title",`
`615`	`615`	`"edx_module_id": "edx_module_id",`
	`616`	`+ "youtube_id": "youtube_id",`
`616`	`617`	`}`
`617`	`618`	`platform = PlatformType.ocw.name`
`618`	`619`	`course = factories.CourseFactory.create(platform=platform)`
`@@ -724,6 +725,7 @@ def test_content_file_serializer(settings, expected_types, has_channels):`
`724`	`725`	`"summary": content_file.summary,`
`725`	`726`	`"flashcards": content_file.flashcards,`
`726`	`727`	`"direct_learning_resource_id": direct_learning_resource.id,`
	`728`	`+ "youtube_id": content_file.youtube_id,`
`727`	`729`	`},`
`728`	`730`	`)`
`729`	`731`