Skip to content

Commit 52ffe4b

Browse files
authored
Merge pull request #15 from michaelkubina-subhh/5.0.6_subhh_feature_structure_path
[FEATURE] implement structure_path feature
2 parents 5b59152 + fad8397 commit 52ffe4b

12 files changed

Lines changed: 245 additions & 1 deletion

File tree

Classes/Common/Indexer.php

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,13 @@ class Indexer
7474
*/
7575
protected static array $processedDocs = [];
7676

77+
/**
78+
* @access protected
79+
* @static
80+
* @var array List of already extracted structure nodes for structure path
81+
*/
82+
protected static array $extractedStructurePathNodes = [];
83+
7784
/**
7885
* @access protected
7986
* @static
@@ -319,6 +326,10 @@ protected static function processLogical(Document $document, array $logicalUnit)
319326
$solrDoc->setField('toplevel', $logicalUnit['id'] == $doc->toplevelId ? true : false);
320327
$solrDoc->setField('title', $metadata['title'][0], self::$fields['fieldboost']['title']);
321328
$solrDoc->setField('volume', $metadata['volume'][0], self::$fields['fieldboost']['volume']);
329+
// extract structure path
330+
self::$extractedStructurePathNodes[$logicalUnit['id']] = self::extractStructurePathNodes($doc->tableOfContents, $logicalUnit['id']);
331+
$processedStructurePath = self::buildStructurePathData(self::$extractedStructurePathNodes[$logicalUnit['id']], $document->getCurrentDocument()->toplevelId);
332+
$solrDoc->setField('structure_path', json_encode($processedStructurePath, JSON_UNESCAPED_UNICODE));
322333
// verify date formatting
323334
if(strtotime($metadata['date'][0])) {
324335
$solrDoc->setField('date', self::getFormattedDate($metadata['date'][0]));
@@ -404,6 +415,21 @@ protected static function processPhysical(Document $document, int $page, array $
404415
$solrDoc->setField('type', $physicalUnit['type'], self::$fields['fieldboost']['type']);
405416
$solrDoc->setField('collection', $doc->metadataArray[$doc->toplevelId]['collection']);
406417
$solrDoc->setField('location', $document->getLocation());
418+
// pick only the deepest structure paths
419+
$associatedPaths = [];
420+
foreach ($doc->smLinks['p2l'][$physicalUnit['id']] as $logicalId) {
421+
$path = self::$extractedStructurePathNodes[$logicalId] ?? [];
422+
if (!empty($path)) {
423+
$associatedPaths[$logicalId] = $path;
424+
}
425+
}
426+
$deepestPaths = self::filterDeepestStructurePaths($associatedPaths);
427+
$processedStructurePath = [];
428+
foreach ($deepestPaths as $path) {
429+
$segments = self::buildStructurePathData($path, $document->getCurrentDocument()->toplevelId);
430+
$processedStructurePath[] = json_encode($segments, JSON_UNESCAPED_UNICODE);
431+
}
432+
$solrDoc->setField('structure_path', $processedStructurePath);
407433

408434
$solrDoc->setField('fulltext', $fullText);
409435
if (is_array($doc->metadataArray[$doc->toplevelId])) {
@@ -639,6 +665,147 @@ private static function removeAppendsFromAuthor($authors)
639665
return $authors;
640666
}
641667

668+
/**
669+
* Extract nodes alongside the structure map in direct line to the target id and return them as flattened array.
670+
*
671+
* @access private
672+
*
673+
* @static
674+
*
675+
* @param array $nodes Tree or Sub-Tree, where the target id should be extracted from if present
676+
* @param string $targetId The ID of the logical structure element to be found
677+
* @param array $path An intermediate array that keeps track of the current branch that is being looked up
678+
*
679+
* @return array
680+
*/
681+
private static function extractStructurePathNodes(array $nodes, string $targetId, array $path = []): array
682+
{
683+
foreach ($nodes as $node) {
684+
// remember where we came from
685+
$currentPath = array_merge($path, [$node]);
686+
if ($node['id'] == $targetId) {
687+
return $currentPath;
688+
}
689+
if (!empty($node['children'])) {
690+
$result = self::extractStructurePathNodes($node['children'], $targetId, $currentPath);
691+
if ($result) {
692+
return $result;
693+
}
694+
}
695+
}
696+
return [];
697+
}
698+
699+
/**
700+
* Filters those structure path nodes that are the descending into the structure tree the most and removes any that resemble a "prefix" of another.
701+
*
702+
* @access private
703+
*
704+
* @static
705+
*
706+
* @param array $paths The array containing all structure path nodes associated with a physical page
707+
*
708+
* @return array
709+
*/
710+
private static function filterDeepestStructurePaths(array $paths): array
711+
{
712+
if (count($paths) <= 1) {
713+
return $paths;
714+
}
715+
716+
$deepestPath = [];
717+
foreach ($paths as $currentLogicalId => $currentPath) {
718+
$currentIds = array_column($currentPath, 'id');
719+
$isPrefix = false;
720+
721+
foreach ($paths as $comparisonLogicalId => $comparisonPath) {
722+
if ($currentLogicalId === $comparisonLogicalId) {
723+
continue;
724+
}
725+
$comparisonIds = array_column($comparisonPath, 'id');
726+
// check if structure path is part/prefix of another structure path
727+
if (
728+
count($currentIds) < count($comparisonIds)
729+
&& array_slice($comparisonIds, 0, count($currentIds)) === $currentIds
730+
) {
731+
$isPrefix = true;
732+
break;
733+
}
734+
}
735+
736+
if (!$isPrefix) {
737+
$deepestPath[$currentLogicalId] = $currentPath;
738+
}
739+
}
740+
return $deepestPath;
741+
}
742+
743+
/**
744+
* Create the actual array with the required data for the structure path that will be JSON encoded and indexed.
745+
*
746+
* @access private
747+
*
748+
* @static
749+
*
750+
* @param array $path The structure path nodes that shall be processed
751+
* @param string $cutoffId The logical id at which ancestors and itself will not be part of the structure path data
752+
*
753+
* @return array
754+
*/
755+
private static function buildStructurePathData(array $path, string $cutoffId): array
756+
{
757+
$cutoffIndex = array_search($cutoffId, array_column($path, 'id'));
758+
if ($cutoffIndex !== false) {
759+
$path = array_slice($path, $cutoffIndex + 1);
760+
}
761+
762+
$segments = [];
763+
foreach ($path as $node) {
764+
$segments[] = self::buildStructurePathSegments($node);
765+
}
766+
return $segments;
767+
}
768+
769+
/**
770+
* Gets the label or type of a structure path node with corresponding tag
771+
*
772+
* @access private
773+
*
774+
* @static
775+
*
776+
* @param array $node The current node that should be processed
777+
*
778+
* @return array
779+
*/
780+
private static function buildStructurePathSegments(array $node): array
781+
{
782+
if (!empty($node['label'])) {
783+
return [
784+
'label' => $node['label'],
785+
];
786+
}
787+
if (!empty($node['orderlabel'])) {
788+
return [
789+
'label' => $node['orderlabel'],
790+
];
791+
}
792+
if (!empty($node['volume'])) {
793+
$value = !empty($node['year'])
794+
? $node['volume'] . ' ' . $node['year']
795+
: $node['volume'];
796+
797+
return [
798+
'label' => $value,
799+
];
800+
}
801+
if (!empty($node['type'])) {
802+
return [
803+
'type' => $node['type'],
804+
];
805+
}
806+
return ['label' => ''];
807+
}
808+
642809
/**
643810
* Handle exception.
644811
*

Classes/Common/Solr/SearchResult/ResultDocument.php

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,12 @@ class ResultDocument
7373
*/
7474
private ?string $type;
7575

76+
/**
77+
* @access private
78+
* @var array The JSON encoded structure path(s)
79+
*/
80+
private array $structurePath = [];
81+
7682
/**
7783
* @access private
7884
* @var Page[] All pages in which search phrase was found
@@ -117,6 +123,7 @@ public function __construct(Document $record, array $highlighting, array $fields
117123
$this->title = $record[$fields['title']];
118124
$this->toplevel = $record[$fields['toplevel']] ?? false;
119125
$this->type = $record[$fields['type']];
126+
$this->structurePath = $record[$fields['structure_path']] ?? [];
120127

121128
if (!empty($highlighting[$this->id])) {
122129
$highlightingForRecord = $highlighting[$this->id][$fields['fulltext']];
@@ -225,6 +232,18 @@ public function getType(): ?string
225232
return $this->type;
226233
}
227234

235+
/**
236+
* Get the structure path(s)
237+
*
238+
* @access public
239+
*
240+
* @return array
241+
*/
242+
public function getStructurePath(): array
243+
{
244+
return $this->structurePath;
245+
}
246+
228247
/**
229248
* Get all result's pages which contain search phrase.
230249
*

Classes/Common/Solr/Solr.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ public static function getFields(): array
256256
self::$fields['type'] = $solrFields['type'];
257257
self::$fields['title'] = $solrFields['title'];
258258
self::$fields['volume'] = $solrFields['volume'];
259+
self::$fields['structure_path'] = $solrFields['structurePath'];
259260
self::$fields['date'] = $solrFields['date'];
260261
self::$fields['thumbnail'] = $solrFields['thumbnail'];
261262
self::$fields['default'] = $solrFields['default'];

Classes/Common/Solr/SolrSearch.php

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -443,7 +443,7 @@ public function prepare()
443443
$params['listMetadataRecords'] = [];
444444

445445
// Restrict the fields to the required ones.
446-
$params['fields'] = 'uid,id,page,title,thumbnail,partof,toplevel,type';
446+
$params['fields'] = 'uid,id,page,title,thumbnail,partof,toplevel,type,structure_path';
447447

448448
if ($this->listedMetadata) {
449449
foreach ($this->listedMetadata as $metadata) {
@@ -525,6 +525,31 @@ public function submit($start, $rows, $processResults = true)
525525
$searchResult['page'] = $doc['page'];
526526
$searchResult['thumbnail'] = $doc['thumbnail'];
527527
$searchResult['structure'] = $doc['type'];
528+
// create string(s) from structure path(s)
529+
$encodedStructurePaths = $doc['structure_path'] ?? [];
530+
if (!is_array($encodedStructurePaths)) {
531+
$encodedStructurePaths = [$encodedStructurePaths];
532+
}
533+
$structurePathStrings = [];
534+
foreach ($encodedStructurePaths as $jsonString) {
535+
if (!is_string($jsonString) || $jsonString === '') {
536+
continue;
537+
}
538+
$segments = json_decode($jsonString, true);
539+
if ($segments === null && json_last_error() !== JSON_ERROR_NONE) {
540+
continue;
541+
}
542+
$structurePathLabels = [];
543+
foreach ($segments as $currentSegment) {
544+
if (isset($currentSegment['type'])) {
545+
$structurePathLabels[] = Helper::translate($currentSegment['type'], 'tx_dlf_structures', $this->settings['storagePid']);
546+
} elseif (!empty($currentSegment['label'])) {
547+
$structurePathLabels[] = $currentSegment['label'];
548+
}
549+
}
550+
$structurePathStrings[] = implode('', $structurePathLabels);
551+
}
552+
$searchResult['structure_path'] = $structurePathStrings;
528553
$searchResult['title'] = $doc['title'];
529554
foreach ($params['listMetadataRecords'] as $indexName => $solrField) {
530555
if (isset($doc['metadata'][$indexName])) {
@@ -826,6 +851,7 @@ private function getDocument(Document $record, array $highlighting, array $field
826851
'title' => $resultDocument->getTitle(),
827852
'toplevel' => $resultDocument->getToplevel(),
828853
'type' => $resultDocument->getType(),
854+
'structure_path' => $resultDocument->getStructurePath(),
829855
'uid' => !empty($resultDocument->getUid()) ? $resultDocument->getUid() : $parameters['uid'],
830856
'highlight' => $resultDocument->getHighlightsIds(),
831857
];

Configuration/FlexForms/ListView.xml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,14 @@
7676
</config>
7777
</TCEforms>
7878
</settings.getTitle>
79+
<settings.getStructurePath>
80+
<exclude>1</exclude>
81+
<label>LLL:EXT:dlf/Resources/Private/Language/locallang_be.xlf:flexform.getStructurePath</label>
82+
<config>
83+
<type>check</type>
84+
<default>0</default>
85+
</config>
86+
</settings.getStructurePath>
7987
<settings.basketButton>
8088
<TCEforms>
8189
<onChange>reload</onChange>

Resources/Private/Language/de.locallang_be.xlf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414
<source><![CDATA[Show only documents from the selected collection(s)]]></source>
1515
<target><![CDATA[Nur Dokumente der ausgewählten Kollektion(en) berücksichtigen]]></target>
1616
</trans-unit>
17+
<trans-unit id="flexform.getStructurePath" approved="yes">
18+
<source><![CDATA[Show breadcrumb/path to result location within the structure map]]></source>
19+
<target><![CDATA[Breadcrumb/Pfad des Treffers innerhalb des Strukturbaums anzeigen]]></target>
20+
</trans-unit>
1721
<trans-unit id="flexform.getTitle" approved="yes">
1822
<source><![CDATA[Show title of parent document if document has no title itself]]></source>
1923
<target><![CDATA[Bei Bedarf Titel des übergeordneten Dokuments anzeigen]]></target>

Resources/Private/Language/de.locallang_labels.xlf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,10 @@
776776
<trans-unit id="config.solr.fields.volume">
777777
<target>Solr-Schema-Feld "volume" : Volume field is mandatory for identifying documents (Standard ist "volume")</target>
778778
<source>Solr Schema Field "volume" : Volume field is mandatory for identifying documents (default is "volume")</source>
779+
</trans-unit>
780+
<trans-unit id="config.solr.fields.structurePath">
781+
<target>Solr-Schema-Feld "structure_path" : Field providing context about the location of a resource in the structure map (Standard ist "structure_path")</target>
782+
<source>Solr Schema Field "structure_path" : Field providing context about the location of a resource in the structure map (default is "structure_path")</source>
779783
</trans-unit>
780784
<trans-unit id="config.solr.fields.date">
781785
<target>Solr Schema Field "date" : The date a resource was issued or created. Used for datesearch (Standard ist "date")</target>

Resources/Private/Language/de.locallang_metadata.xlf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,10 @@
8989
<source><![CDATA[Rights Information]]></source>
9090
<target><![CDATA[Rechteinformation]]></target>
9191
</trans-unit>
92+
<trans-unit id="metadata.structure_path" approved="yes">
93+
<source><![CDATA[Structure Path]]></source>
94+
<target><![CDATA[Strukturpfad]]></target>
95+
</trans-unit>
9296
<trans-unit id="metadata.shelfmark" approved="yes">
9397
<source><![CDATA[Shelfmark]]></source>
9498
<target><![CDATA[Signatur]]></target>

Resources/Private/Language/locallang_be.xlf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414
<trans-unit id="flexform.excludeOtherCollections">
1515
<source><![CDATA[Show only documents from the selected collection(s)]]></source>
1616
</trans-unit>
17+
<trans-unit id="flexform.getStructurePath">
18+
<source><![CDATA[Show breadcrumb/path to result location within the structure map]]></source>
19+
</trans-unit>
1720
<trans-unit id="flexform.library">
1821
<source><![CDATA[Providing library]]></source>
1922
</trans-unit>

Resources/Private/Language/locallang_labels.xlf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,9 @@
587587
<trans-unit id="config.solr.fields.volume">
588588
<source>Solr Schema Field "volume" : Volume field is mandatory for identifying documents (default is "volume")</source>
589589
</trans-unit>
590+
<trans-unit id="config.solr.fields.structurePath">
591+
<source>Solr Schema Field "structure_path" : Field providing context about the location of a resource in the structure map (default is "structure_path")</source>
592+
</trans-unit>
590593
<trans-unit id="config.solr.fields.date">
591594
<source>Solr Schema Field "date" : The date a resource was issued or created. Used for datesearch (default is "date")</source>
592595
</trans-unit>

0 commit comments

Comments
 (0)