Skip to content

Commit 5e06d62

Browse files
authored
feat: add PDF viewer with highlighting (#238)
1 parent 678afe0 commit 5e06d62

45 files changed

Lines changed: 3212 additions & 26 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

examples/discovery-search-app/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
"carbon-components": "^10.6.0",
2929
"carbon-components-react": "^7.7.0",
3030
"classnames": "^2.2.6",
31+
"core-js": "^2.6.12",
3132
"cors": "^2.8.5",
3233
"dotenv": "^8.1.0",
3334
"express": "^4.17.1",

packages/discovery-react-components/src/components/DocumentPreview/DocumentPreview.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import SimpleDocument from './components/SimpleDocument/SimpleDocument';
88
import withErrorBoundary, { WithErrorBoundaryProps } from 'utils/hoc/withErrorBoundary';
99
import { defaultMessages, Messages } from './messages';
1010
import HtmlView from './components/HtmlView/HtmlView';
11+
import PdfViewerWithHighlight from './components/PdfViewerHighlight/PdfViewerWithHighlight';
1112
import { isCsvFile, isJsonFile } from './utils/documentData';
1213

1314
const { ZOOM_IN, ZOOM_OUT } = PreviewToolbar;
@@ -154,6 +155,7 @@ function PreviewDocument({
154155
const ErrorBoundDocumentPreview: any = withErrorBoundary(DocumentPreview);
155156
ErrorBoundDocumentPreview.PreviewToolbar = PreviewToolbar;
156157
ErrorBoundDocumentPreview.PreviewDocument = PreviewDocument;
158+
ErrorBoundDocumentPreview.PdfViewerWithHighlight = PdfViewerWithHighlight;
157159

158160
export default ErrorBoundDocumentPreview;
159161
export { ErrorBoundDocumentPreview as DocumentPreview };

packages/discovery-react-components/src/components/DocumentPreview/__fixtures__/DiscoComponent-ja.pdf.ts

Lines changed: 2 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
{
2+
"document_id": "feab8705259090b89fbcbb15942cb10d",
3+
"result_metadata": {
4+
"collection_id": "b6cdf1cd-902c-8ea3-0000-017d32224d8f"
5+
},
6+
"enriched_text": [
7+
{
8+
"entities": [
9+
{
10+
"model_name": "natural_language_understanding",
11+
"mentions": [
12+
{
13+
"confidence": 0.9950965,
14+
"location": {
15+
"end": 2,
16+
"begin": 0
17+
},
18+
"text": "最初"
19+
}
20+
],
21+
"text": "最初",
22+
"type": "Ordinal"
23+
}
24+
]
25+
}
26+
],
27+
"metadata": {
28+
"parent_document_id": "feab8705259090b89fbcbb15942cb10d",
29+
"customer_id": "IBMid-270001M55T"
30+
},
31+
"extracted_metadata": {
32+
"sha1": "4FF2B41ED7A77975ABB21D9E4025DF31335E6451",
33+
"numPages": "1",
34+
"filename": "DiscoComponents-ja-updated.pdf",
35+
"file_type": "pdf",
36+
"text_mappings": "{\"text_mappings\":[{\"page\":{\"page_number\":1,\"bbox\":[54.51987838745117,87.82411193847656,400.4930725097656,194.260009765625]},\"field\":{\"name\":\"title\",\"index\":0,\"span\":[0,20]}},{\"page\":{\"page_number\":1,\"bbox\":[54.51987838745117,411.83612060546875,262.9510192871094,425.62003993988037]},\"field\":{\"name\":\"subtitle\",\"index\":0,\"span\":[0,19]}},{\"page\":{\"page_number\":1,\"bbox\":[268.46466064453125,416.1183776855469,325.5726318359375,425.375319480896]},\"field\":{\"name\":\"subtitle\",\"index\":1,\"span\":[0,3]}},{\"page\":{\"page_number\":1,\"bbox\":[54.51987838745117,644.3582763671875,313.07745361328125,653.6152181625366]},\"field\":{\"name\":\"subtitle\",\"index\":2,\"span\":[0,15]}},{\"page\":{\"page_number\":1,\"bbox\":[54.51987838745117,456.12786865234375,95.6172866821289,463.06002855300903]},\"field\":{\"name\":\"text\",\"index\":0,\"span\":[0,4]}},{\"page\":{\"page_number\":1,\"bbox\":[100.0745620727539,452.9471435546875,257.0570983886719,463.06002855300903]},\"field\":{\"name\":\"text\",\"index\":0,\"span\":[4,27]}},{\"page\":{\"page_number\":1,\"bbox\":[261.5120849609375,452.9471435546875,408.1592712402344,463.0600233078003]},\"field\":{\"name\":\"text\",\"index\":0,\"span\":[27,49]}},{\"page\":{\"page_number\":1,\"bbox\":[412.5315856933594,456.12786865234375,464.3571472167969,463.06002855300903]},\"field\":{\"name\":\"text\",\"index\":0,\"span\":[49,54]}},{\"page\":{\"page_number\":1,\"bbox\":[54.51987838745117,452.9471435546875,534.0211791992188,596.2600049972534]},\"field\":{\"name\":\"text\",\"index\":0,\"span\":[54,234]}},{\"page\":{\"page_number\":1,\"bbox\":[54.519996643066406,679.4979858398438,535.1033325195312,723.2200269699097]},\"field\":{\"name\":\"text\",\"index\":0,\"span\":[234,353]}}],\"pages\":[{\"page_number\":0,\"height\":842.0,\"width\":595.0,\"origin\":\"TopLeft\"}]}",
37+
"title": "Discovery Component README Japanese",
38+
"publicationdate": "2021-11-18"
39+
},
40+
"subtitle": ["Discovery Component", "の使用", "サンプルアプリケーションの実行"],
41+
"html": "<html><head><meta charset=\"UTF-8\"/><meta name=\"publicationdate\" content=\"2021-11-18\"/><meta name=\"numPages\" content=\"1\"/><title>Discovery Component README Japanese</title><style>.css_1902558513 { font: bold 18.96pt '/MS-PGothic-Bold'; } .css_904416330 { font: 11.04pt '/SymbolMT'; } .css_1548729052 { font: bold 18.96pt '/Tahoma-Bold-Bold'; } .css_2121319508 { font: bold 54.96pt '/Tahoma-Bold-Bold'; } .css_1950597664 { font: 13.92pt '/Tahoma'; } .css_1579914921 { font: 13.92pt '/MS-PGothic'; }</style></head><body><section id=\"1\" data-level=\"1\"><p text-alignment=\"left\"><span class=\"title css_2121319508\"><bbox page=\"1\" x=\"54.51987838745117\" y=\"87.82411193847656\" height=\"106.43589782714844\" width=\"345.97319412231445\">Discovery Components</bbox></span></p><p text-alignment=\"left\"><span class=\"subtitle css_1548729052\"><bbox page=\"1\" x=\"54.51987838745117\" y=\"411.83612060546875\" height=\"13.783919334411621\" width=\"208.4311408996582\">Discovery Component</bbox></span></p><p text-alignment=\"left\"><span class=\"subtitle css_1902558513\"><bbox page=\"1\" x=\"268.46466064453125\" y=\"416.1183776855469\" height=\"9.256941795349121\" width=\"57.10797119140625\">の使用</bbox></span></p><p text-alignment=\"left\"><span class=\"text css_1579914921\"><bbox page=\"1\" x=\"54.51987838745117\" y=\"456.12786865234375\" height=\"6.932159900665283\" width=\"41.097408294677734\">最初に</bbox></span></p><p text-alignment=\"left\"><span class=\"text css_1950597664\"><bbox page=\"1\" x=\"100.0745620727539\" y=\"452.9471435546875\" height=\"10.112884998321533\" width=\"156.98253631591797\">IBM Watson Discovery の</bbox></span></p><p text-alignment=\"left\"><span class=\"text css_1950597664\"><bbox page=\"1\" x=\"261.5120849609375\" y=\"452.9471435546875\" height=\"10.112879753112793\" width=\"146.64718627929688\">Improve and Customize</bbox></span></p><p text-alignment=\"left\"><span class=\"text css_1579914921\"><bbox page=\"1\" x=\"412.5315856933594\" y=\"456.12786865234375\" height=\"6.932159900665283\" width=\"51.8255615234375\">ページで</bbox></span></p><p text-alignment=\"left\"><span class=\"text css_1950597664\"><bbox page=\"1\" x=\"54.51987838745117\" y=\"452.9471435546875\" height=\"143.31286144256592\" width=\"479.5013008117676\">Document retrieval プロジェクトをカスタマイズする必要があります。たとえばファセットや検索 バーや検索結果を設定できます。その後 Discovery component を使ったアプリケ ーションを作成します。アプリケーションは指定したプロジェクトの設定をロードしま す。 必要なソフトウェア: git, nvm, yarn または npm</bbox></span></p><p text-alignment=\"left\"><span class=\"subtitle css_1902558513\"><bbox page=\"1\" x=\"54.51987838745117\" y=\"644.3582763671875\" height=\"9.256941795349121\" width=\"258.5575752258301\">サンプルアプリケーションの実行</bbox></span></p><p text-alignment=\"left\"><span class=\"text css_904416330\"><bbox page=\"1\" x=\"54.519996643066406\" y=\"679.4979858398438\" height=\"43.72204113006592\" width=\"480.58333587646484\">• サンプルアプリケーションはこのライブラリーが提供するコアコンポーネントのカタログです。実際のデ ータを使ってコンポーネントがどのように動くかを簡単に見ることができます。コードを変更して、カスタ マイズする方法を確認することもできます。</bbox></span></p></section></body></html>",
42+
"text": [
43+
"最初に IBM Watson Discovery の Improve and Customize ページで Document retrieval プロジェクトをカスタマイズする必要があります。たとえばファセットや検索 バーや検索結果を設定できます。その後 Discovery component を使ったアプリケ ーションを作成します。アプリケーションは指定したプロジェクトの設定をロードしま す。 必要なソフトウェア: git, nvm, yarn または npm • サンプルアプリケーションはこのライブラリーが提供するコアコンポーネントのカタログです。実際のデ ータを使ってコンポーネントがどのように動くかを簡単に見ることができます。コードを変更して、カスタ マイズする方法を確認することもできます。"
44+
],
45+
"title": "Discovery Components",
46+
"document_passages": [
47+
{
48+
"passage_text": "Discovery Components",
49+
"start_offset": 0,
50+
"end_offset": 20,
51+
"field": "title"
52+
}
53+
],
54+
"table_results_references": []
55+
}
Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import React, { FC, useMemo, useEffect } from 'react';
2+
import cx from 'classnames';
3+
import { settings } from 'carbon-components';
4+
import { QueryResult } from 'ibm-watson/discovery/v2';
5+
import { ProcessedDoc } from 'utils/document';
6+
import { TextMappings } from '../../types';
7+
import { PdfDisplayProps } from '../PdfViewer/types';
8+
import { PdfRenderedText } from '../PdfViewer/PdfViewerTextLayer';
9+
import { ExtractedDocumentInfo } from './utils/common/documentUtils';
10+
import { Highlighter } from './utils/Highlighter';
11+
import { HighlightProps } from './types';
12+
13+
type Props = PdfDisplayProps &
14+
HighlightProps & {
15+
/**
16+
* Class name to style highlight layer
17+
*/
18+
className?: string;
19+
20+
/**
21+
* Parsed document information
22+
*/
23+
parsedDocument: ExtractedDocumentInfo | null;
24+
25+
/**
26+
* PDF text content information in a page from parsed PDF
27+
*/
28+
pdfRenderedText: PdfRenderedText | null;
29+
};
30+
31+
/**
32+
* Text highlight layer for PdfViewer
33+
*/
34+
const PdfViewerHighlight: FC<Props> = ({
35+
className,
36+
highlightClassName,
37+
document,
38+
parsedDocument,
39+
page,
40+
highlights,
41+
pdfRenderedText,
42+
scale,
43+
_useHtmlBbox = true,
44+
_usePdfTextItem = true
45+
}) => {
46+
const highlighter = useHighlighter({
47+
document,
48+
textMappings: parsedDocument?.textMappings,
49+
processedDoc: _useHtmlBbox ? parsedDocument?.processedDoc : undefined,
50+
pdfRenderedText: (_usePdfTextItem && pdfRenderedText) || undefined,
51+
pageNum: page
52+
});
53+
54+
const { textDivs } = pdfRenderedText || {};
55+
useEffect(() => {
56+
if (highlighter) {
57+
highlighter.setTextContentDivs(textDivs);
58+
}
59+
}, [highlighter, textDivs]);
60+
61+
const highlightBoxes = useMemo(() => {
62+
return highlights.map(highlight => {
63+
return highlighter?.getHighlight(highlight);
64+
});
65+
}, [highlighter, highlights]);
66+
67+
return (
68+
<div className={cx(`${settings.prefix}--document-preview-pdf-viewer-highlight`, className)}>
69+
{highlightBoxes.map((hl, hlIndex) => {
70+
return (
71+
<React.Fragment key={`k-${hlIndex}`}>
72+
{hl?.boxes.map((item, index) => {
73+
const padding = 0;
74+
const [left, top, right, bottom] = item.bbox;
75+
return (
76+
<div
77+
key={`${left}${top}${right}${bottom}_${index}`}
78+
className={cx(
79+
`${settings.prefix}--document-preview-pdf-viewer-highlight--item`,
80+
highlightClassName,
81+
hl.className
82+
)}
83+
style={{
84+
left: `${(left - padding) * scale}px`,
85+
top: `${(top - padding) * scale}px`,
86+
width: `${(right - left + padding) * scale}px`,
87+
height: `${(bottom - top + padding) * scale}px`
88+
}}
89+
data-testid="highlight"
90+
/>
91+
);
92+
})}
93+
</React.Fragment>
94+
);
95+
})}
96+
</div>
97+
);
98+
};
99+
100+
const useHighlighter = ({
101+
document,
102+
textMappings,
103+
processedDoc,
104+
pdfRenderedText,
105+
pageNum
106+
}: {
107+
document: QueryResult;
108+
textMappings?: TextMappings;
109+
processedDoc?: ProcessedDoc;
110+
pdfRenderedText?: PdfRenderedText;
111+
pageNum: number;
112+
}) => {
113+
return useMemo(() => {
114+
if (textMappings) {
115+
return new Highlighter({
116+
document,
117+
textMappings,
118+
pageNum,
119+
htmlBboxInfo: processedDoc && {
120+
bboxes: processedDoc.bboxes,
121+
styles: processedDoc.styles
122+
},
123+
pdfTextContentInfo:
124+
pdfRenderedText?.textContent && pdfRenderedText?.viewport ? pdfRenderedText : undefined
125+
});
126+
}
127+
return null;
128+
}, [document, pageNum, pdfRenderedText, processedDoc, textMappings]);
129+
};
130+
131+
export default PdfViewerHighlight;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Carbon highlight color for white theme
2+
// https://www.carbondesignsystem.com/guidelines/color/usage/
3+
$highlight: #d0e2ff;
4+
5+
.withTextSelection {
6+
display: flex;
7+
8+
.rightPane {
9+
flex: 1 1 30%;
10+
height: 100vh;
11+
overflow-y: scroll;
12+
13+
p {
14+
margin-bottom: 0.5rem;
15+
}
16+
}
17+
.text {
18+
overflow-wrap: break-word;
19+
white-space: pre-wrap;
20+
font-size: 10pt;
21+
font-family: 'Courier New', Courier, monospace;
22+
}
23+
24+
.highlight {
25+
opacity: 0.3;
26+
background: darken($highlight, 30%);
27+
}
28+
}

0 commit comments

Comments
 (0)