Skip to content

Commit 704d8ac

Browse files
authored
Merge pull request ucfopen#1073 from mbusch3/async-pages-from-lms
Adds an async batch fetch function for getting Canvas Pages.
2 parents c6a6a7e + 44a5ed2 commit 704d8ac

2 files changed

Lines changed: 172 additions & 4 deletions

File tree

src/Lms/Canvas/CanvasApi.php

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@ class CanvasApi {
1212

1313
protected $session;
1414
protected $baseUrl;
15-
protected $httpClient;
1615
protected $apiToken;
16+
protected $httpClient;
1717

1818
public function __construct($baseUrl, $apiToken)
1919
{
@@ -87,6 +87,64 @@ public function apiGet(string $url, array $options = [], int $perPage = 100, ?Lm
8787
return $lmsResponse;
8888
}
8989

90+
91+
// API call GET multiple (batch): Asynchronous requests
92+
public function apiGetBatch(array $paths): array
93+
{
94+
if(count($paths) == 0) {
95+
return [];
96+
}
97+
98+
$multi = curl_multi_init();
99+
$handles = [];
100+
$output = new ConsoleOutput();
101+
102+
// Create a handle for each path. These can be "watched" with curl_multi_exec
103+
foreach ($paths as $i => $url) {
104+
if (strpos($url, $this->baseUrl) === false) {
105+
$url = "https://{$this->baseUrl}/api/v1/{$url}";
106+
}
107+
$ch = curl_init($url);
108+
109+
curl_setopt_array($ch, [
110+
CURLOPT_RETURNTRANSFER => true,
111+
CURLOPT_HTTPHEADER => ["Authorization: Bearer {$this->apiToken}"],
112+
]);
113+
114+
curl_multi_add_handle($multi, $ch);
115+
$handles[$i] = $ch;
116+
}
117+
118+
// curl_multi_exec loop: See https://www.php.net/manual/en/function.curl-multi-exec.php#113002
119+
$running = null;
120+
do {
121+
curl_multi_exec($multi, $running);
122+
curl_multi_select($multi);
123+
} while ($running > 0);
124+
125+
// Gather results for each handle
126+
$responses = [];
127+
foreach ($handles as $i => $ch) {
128+
$content = curl_multi_getcontent($ch);
129+
$status = curl_getinfo($ch, CURLINFO_HTTP_CODE);
130+
$error = curl_error($ch);
131+
132+
if($status == 200) {
133+
$responses[] = $content;
134+
}
135+
else {
136+
$output->writeln($status . " error fetching " . $paths[$i] . ": " . $error);
137+
}
138+
139+
curl_multi_remove_handle($multi, $ch);
140+
curl_close($ch);
141+
}
142+
143+
curl_multi_close($multi);
144+
return $responses;
145+
}
146+
147+
90148
public function apiPost($url, $options, $sendAuthorized = true)
91149
{
92150
$lmsResponse = new LmsResponse();
@@ -403,4 +461,4 @@ public function apiDelete($url, $options = []) {
403461

404462
}
405463

406-
}
464+
}

src/Lms/Canvas/CanvasLms.php

Lines changed: 112 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,11 @@ public function updateCourseContent(Course $course, User $user, $force = false):
358358
$apiToken = $this->getApiToken($user);
359359

360360
$canvasApi = new CanvasApi($apiDomain, $apiToken);
361+
$tempPages = [];
362+
$pageUrls = [];
363+
$asyncFetch = true;
361364

365+
$start_time = microtime(true);
362366
foreach ($urls as $contentType => $url) {
363367
$response = $canvasApi->apiGet($url);
364368

@@ -392,15 +396,121 @@ public function updateCourseContent(Course $course, User $user, $force = false):
392396
continue;
393397
}
394398

395-
$this->saveOrUpdateContentItem($canvasApi, $course, $contentType, $content, $force);
399+
$lmsContent = $this->normalizeLmsContent($course, $contentType, $content);
400+
if (!$lmsContent) {
401+
continue;
402+
}
403+
404+
/* Check to see if the existing content item is already in the database and hasn't been updated since.
405+
The $force variable is used to force the full rescan, and skips the 'already exists' check */
406+
407+
$contentItem = $this->contentItemRepo->findOneBy([
408+
'contentType' => $contentType,
409+
'lmsContentId' => $lmsContent['id'],
410+
'course' => $course,
411+
]);
412+
413+
if (!$force && $contentItem) {
414+
$contentItemUpdated = $contentItem->getUpdated();
415+
$lmsUpdated = new \DateTime($lmsContent['updated'], UtilityService::$timezone);
416+
if ($contentItemUpdated == $lmsUpdated) {
417+
$contentItem->setActive(true);
418+
continue;
419+
}
420+
$output->writeln('Content item already exists but is out of date. Updating ' . $contentType . ': ' . $lmsContent['title']);
421+
}
422+
else {
423+
$output->writeln('New content item - ' . $contentType . ': ' . $lmsContent['title']);
424+
}
425+
426+
if (!$contentItem) {
427+
$contentItem = new ContentItem();
428+
$contentItem->setCourse($course)
429+
->setLmsContentId($lmsContent['id'])
430+
->setActive(true)
431+
->setContentType($contentType);
432+
$this->entityManager->persist($contentItem);
433+
}
434+
435+
if ('page' === $contentType) {
436+
$url = "courses/{$course->getLmsCourseId()}/pages/{$lmsContent['id']}";
437+
if($asyncFetch) {
438+
/* NEW PAGE FETCH: New asynchronous batch fetch. The real magic is in the $pageUrls handler beneath this foreach loop (line ~305). */
439+
$tempContentItems[] = $contentItem;
440+
$pageUrls[] = $url;
441+
continue;
442+
}
443+
else {
444+
/* OLD PAGE FETCH: 1-at-a-time synchronous fetch */
445+
$pageResponse = $canvasApi->apiGet($url);
446+
$pageObj = $pageResponse->getContent();
447+
448+
if (!empty($pageObj['body'])) {
449+
$lmsContent['body'] = $pageObj['body'];
450+
}
451+
}
452+
}
453+
454+
/* get HTML file content */
455+
if (('file' === $contentType) && ('html' === $content['mime_class'])) {
456+
$lmsContent['body'] = file_get_contents($content['url']);
457+
}
458+
459+
// some content types don't have an updated date, so we'll compare content
460+
// to find out if content has changed.
461+
if (in_array($contentType, ['syllabus', 'discussion_topic', 'announcement', 'quiz'])) {
462+
if ($contentItem->getBody() === $lmsContent['body']) {
463+
if ($contentItem->getUpdated()) {
464+
$lmsContent['updated'] = $contentItem->getUpdated()->format('c');
465+
}
466+
}
467+
}
468+
469+
$contentItem->update($lmsContent);
470+
if($contentItem->getBody() !== null) {
471+
$contentItems[] = $contentItem;
472+
}
473+
}
474+
}
475+
}
476+
477+
// If there are any pages to fetch, handle that now...
478+
if(count($pageUrls) > 0) {
479+
480+
$output->writeln('Fetching contents for ' . count($pageUrls) . ' pages asynchronously...');
481+
482+
// Request pages in a batch instead of synchronously
483+
$allPages = $canvasApi->apiGetBatch($pageUrls);
484+
485+
// Save indices for the tempContentItems array so it will be easier (O(1)) to match up...
486+
$tempContentItemsIndexById = [];
487+
foreach($tempContentItems as $index => $item) {
488+
$tempContentItemsIndexById[$item->getLmsContentId()] = $index;
489+
}
490+
491+
foreach($allPages as $pageData) {
492+
$lmsContent = $this->normalizeLmsContent($course, 'page', json_decode($pageData, true));
493+
494+
if (!empty($lmsContent['body'])) {
495+
$lmsContentId = $lmsContent['id'];
496+
// If the item exists in the tempContentItems array... Update and add to contentItems to scan.
497+
if(isset($tempContentItemsIndexById[$lmsContentId])) {
498+
$index = $tempContentItemsIndexById[$lmsContentId];
499+
$tempContentItems[$index]->update($lmsContent);
500+
$contentItems[] = $tempContentItems[$index];
501+
}
396502
}
397503
}
398504
}
399505

400506
// push any updates made to content items to DB
401507
$this->entityManager->flush();
402508

403-
return $this->contentItemList;
509+
// Log how long things took (compare synchronous vs asynchronous page fetch)
510+
$end_time = microtime(true);
511+
$output->writeln('updateCourseContent - time taken: ' . ($end_time - $start_time) . ' seconds');
512+
513+
return $contentItems;
404514
}
405515

406516
public function getCourseSections(Course $course, User $user)

0 commit comments

Comments
 (0)