@@ -358,7 +358,11 @@ public function updateCourseContent(Course $course, User $user, $force = false):
358358 $ apiToken = $ this ->getApiToken ($ user );
359359
360360 $ canvasApi = new CanvasApi ($ apiDomain , $ apiToken );
361+ $ tempPages = [];
362+ $ pageUrls = [];
363+ $ asyncFetch = true ;
361364
365+ $ start_time = microtime (true );
362366 foreach ($ urls as $ contentType => $ url ) {
363367 $ response = $ canvasApi ->apiGet ($ url );
364368
@@ -392,15 +396,121 @@ public function updateCourseContent(Course $course, User $user, $force = false):
392396 continue ;
393397 }
394398
395- $ this ->saveOrUpdateContentItem ($ canvasApi , $ course , $ contentType , $ content , $ force );
399+ $ lmsContent = $ this ->normalizeLmsContent ($ course , $ contentType , $ content );
400+ if (!$ lmsContent ) {
401+ continue ;
402+ }
403+
404+ /* Check to see if the existing content item is already in the database and hasn't been updated since.
405+ The $force variable is used to force the full rescan, and skips the 'already exists' check */
406+
407+ $ contentItem = $ this ->contentItemRepo ->findOneBy ([
408+ 'contentType ' => $ contentType ,
409+ 'lmsContentId ' => $ lmsContent ['id ' ],
410+ 'course ' => $ course ,
411+ ]);
412+
413+ if (!$ force && $ contentItem ) {
414+ $ contentItemUpdated = $ contentItem ->getUpdated ();
415+ $ lmsUpdated = new \DateTime ($ lmsContent ['updated ' ], UtilityService::$ timezone );
416+ if ($ contentItemUpdated == $ lmsUpdated ) {
417+ $ contentItem ->setActive (true );
418+ continue ;
419+ }
420+ $ output ->writeln ('Content item already exists but is out of date. Updating ' . $ contentType . ': ' . $ lmsContent ['title ' ]);
421+ }
422+ else {
423+ $ output ->writeln ('New content item - ' . $ contentType . ': ' . $ lmsContent ['title ' ]);
424+ }
425+
426+ if (!$ contentItem ) {
427+ $ contentItem = new ContentItem ();
428+ $ contentItem ->setCourse ($ course )
429+ ->setLmsContentId ($ lmsContent ['id ' ])
430+ ->setActive (true )
431+ ->setContentType ($ contentType );
432+ $ this ->entityManager ->persist ($ contentItem );
433+ }
434+
435+ if ('page ' === $ contentType ) {
436+ $ url = "courses/ {$ course ->getLmsCourseId ()}/pages/ {$ lmsContent ['id ' ]}" ;
437+ if ($ asyncFetch ) {
438+ /* NEW PAGE FETCH: New asynchronous batch fetch. The real magic is in the $pageUrls handler beneath this foreach loop (line ~305). */
439+ $ tempContentItems [] = $ contentItem ;
440+ $ pageUrls [] = $ url ;
441+ continue ;
442+ }
443+ else {
444+ /* OLD PAGE FETCH: 1-at-a-time synchronous fetch */
445+ $ pageResponse = $ canvasApi ->apiGet ($ url );
446+ $ pageObj = $ pageResponse ->getContent ();
447+
448+ if (!empty ($ pageObj ['body ' ])) {
449+ $ lmsContent ['body ' ] = $ pageObj ['body ' ];
450+ }
451+ }
452+ }
453+
454+ /* get HTML file content */
455+ if (('file ' === $ contentType ) && ('html ' === $ content ['mime_class ' ])) {
456+ $ lmsContent ['body ' ] = file_get_contents ($ content ['url ' ]);
457+ }
458+
459+ // some content types don't have an updated date, so we'll compare content
460+ // to find out if content has changed.
461+ if (in_array ($ contentType , ['syllabus ' , 'discussion_topic ' , 'announcement ' , 'quiz ' ])) {
462+ if ($ contentItem ->getBody () === $ lmsContent ['body ' ]) {
463+ if ($ contentItem ->getUpdated ()) {
464+ $ lmsContent ['updated ' ] = $ contentItem ->getUpdated ()->format ('c ' );
465+ }
466+ }
467+ }
468+
469+ $ contentItem ->update ($ lmsContent );
470+ if ($ contentItem ->getBody () !== null ) {
471+ $ contentItems [] = $ contentItem ;
472+ }
473+ }
474+ }
475+ }
476+
477+ // If there are any pages to fetch, handle that now...
478+ if (count ($ pageUrls ) > 0 ) {
479+
480+ $ output ->writeln ('Fetching contents for ' . count ($ pageUrls ) . ' pages asynchronously... ' );
481+
482+ // Request pages in a batch instead of synchronously
483+ $ allPages = $ canvasApi ->apiGetBatch ($ pageUrls );
484+
485+ // Save indices for the tempContentItems array so it will be easier (O(1)) to match up...
486+ $ tempContentItemsIndexById = [];
487+ foreach ($ tempContentItems as $ index => $ item ) {
488+ $ tempContentItemsIndexById [$ item ->getLmsContentId ()] = $ index ;
489+ }
490+
491+ foreach ($ allPages as $ pageData ) {
492+ $ lmsContent = $ this ->normalizeLmsContent ($ course , 'page ' , json_decode ($ pageData , true ));
493+
494+ if (!empty ($ lmsContent ['body ' ])) {
495+ $ lmsContentId = $ lmsContent ['id ' ];
496+ // If the item exists in the tempContentItems array... Update and add to contentItems to scan.
497+ if (isset ($ tempContentItemsIndexById [$ lmsContentId ])) {
498+ $ index = $ tempContentItemsIndexById [$ lmsContentId ];
499+ $ tempContentItems [$ index ]->update ($ lmsContent );
500+ $ contentItems [] = $ tempContentItems [$ index ];
501+ }
396502 }
397503 }
398504 }
399505
400506 // push any updates made to content items to DB
401507 $ this ->entityManager ->flush ();
402508
403- return $ this ->contentItemList ;
509+ // Log how long things took (compare synchronous vs asynchronous page fetch)
510+ $ end_time = microtime (true );
511+ $ output ->writeln ('updateCourseContent - time taken: ' . ($ end_time - $ start_time ) . ' seconds ' );
512+
513+ return $ contentItems ;
404514 }
405515
406516 public function getCourseSections (Course $ course , User $ user )
0 commit comments