-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathRebuildQueryserviceData.php
More file actions
116 lines (94 loc) · 3.91 KB
/
RebuildQueryserviceData.php
File metadata and controls
116 lines (94 loc) · 3.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
<?php
namespace App\Console\Commands;
use App\Constants\MediawikiNamespace;
use App\Jobs\SpawnQueryserviceUpdaterJob;
use App\QueryserviceNamespace;
use App\Traits;
use App\Wiki;
use App\WikiSetting;
use Illuminate\Console\Command;
use Illuminate\Support\Facades\Log;
use Illuminate\Support\Facades\Queue;
class RebuildQueryserviceData extends Command {
use Traits\PageFetcher;
protected $signature = 'wbs-qs:rebuild {--domain=*} {--chunkSize=50} {--queueName=default} {--sparqlUrlFormat=http://queryservice.default.svc.cluster.local:9999/bigdata/namespace/%s/sparql}';
protected $description = 'Rebuild the queryservice data for a certain wiki or all wikis';
protected int $chunkSize;
protected string $sparqlUrlFormat;
protected string $queueName;
public function handle() {
$this->chunkSize = intval($this->option('chunkSize'));
$this->sparqlUrlFormat = $this->option('sparqlUrlFormat');
$this->queueName = $this->option('queueName');
$this->apiUrl = getenv('PLATFORM_MW_BACKEND_HOST') . '/w/api.php';
$wikiDomains = $this->option('domain');
$exitCode = 0;
$wikis = count($wikiDomains) !== 0
? Wiki::whereIn('domain', $wikiDomains)->get()
: Wiki::query()->get();
$jobTotal = 0;
$skippedWikis = 0;
$processedWikis = 0;
foreach ($wikis as $wiki) {
try {
$entities = $this->getEntitiesForWiki($wiki);
$sparqlUrl = $this->getSparqlUrl($wiki);
} catch (\Exception $ex) {
Log::error(
'Failed to get prerequisites for enqueuing wiki ' . $wiki->domain . ', will not dispatch jobs.',
[$ex],
);
$exitCode = 1;
$skippedWikis += 1;
break;
}
$entityChunks = array_chunk($entities, $this->chunkSize);
foreach ($entityChunks as $entityChunk) {
Queue::pushOn($this->queueName, new SpawnQueryserviceUpdaterJob(
$wiki->domain,
implode(',', $entityChunk),
$sparqlUrl,
));
}
$jobTotal += count($entityChunks);
$processedWikis += 1;
Log::info('Dispatched ' . count($entityChunks) . ' job(s) for wiki ' . $wiki->domain . '.');
}
Log::info(
'Done. Jobs dispatched: ' . $jobTotal . ' Wikis processed: ' . $processedWikis . ' Wikis skipped: ' . $skippedWikis
);
return $exitCode;
}
private function getEntitiesForWiki(Wiki $wiki): array {
$items = $this->fetchPagesInNamespace($wiki->domain, MediawikiNamespace::item);
$properties = $this->fetchPagesInNamespace($wiki->domain, MediawikiNamespace::property);
$lexemesSetting = WikiSetting::where(
[
'wiki_id' => $wiki->id,
'name' => WikiSetting::wwExtEnableWikibaseLexeme,
],
)->first();
$hasLexemesEnabled = $lexemesSetting !== null && $lexemesSetting->value === '1';
$lexemes = $hasLexemesEnabled
? $this->fetchPagesInNamespace($wiki->domain, MediawikiNamespace::lexeme)
: [];
$merged = array_merge($items, $properties, $lexemes);
$this->stripPrefixes($merged);
return $merged;
}
private function getSparqlUrl(Wiki $wiki): string {
$match = QueryserviceNamespace::where(['wiki_id' => $wiki->id])->first();
if (!$match) {
throw new \Exception(
'Unable to find queryservice namespace record for wiki ' . $wiki->domain
);
}
return sprintf($this->sparqlUrlFormat, $match->namespace);
}
private static function stripPrefixes(array &$items): void {
foreach ($items as &$item) {
$e = explode(':', $item);
$item = end($e);
}
}
}