Skip to content

Commit 27a5b77

Browse files
Site upload transcript working (#17)
* initial working transcript upload * working upload with course list --------- Co-authored-by: boldizsarbenedek <bb.boldizsarbenedek@gmail.com>
1 parent d8d93ff commit 27a5b77

File tree

9 files changed

+702
-32
lines changed

9 files changed

+702
-32
lines changed

my-app/package-lock.json

Lines changed: 200 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

my-app/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"ldrs": "^1.1.6",
1616
"mobx": "^6.13.7",
1717
"mobx-react-lite": "^4.1.0",
18+
"pdfjs-dist": "^5.1.91",
1819
"react": "^19.0.0",
1920
"react-dom": "^19.0.0",
2021
"react-router-dom": "^7.4.0",
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
6+
<title>PDF to Text</title>
7+
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.10.377/pdf.min.js"></script>
8+
</head>
9+
<body>
10+
<h1>Upload PDF to Extract Text</h1>
11+
<input type="file" id="pdf-file" accept="application/pdf">
12+
<pre id="output"></pre>
13+
14+
<script>
15+
document.getElementById('pdf-file').addEventListener('change', function(event) {
16+
const file = event.target.files[0];
17+
if (file && file.type === 'application/pdf') {
18+
const reader = new FileReader();
19+
reader.onload = function(e) {
20+
const pdfData = new Uint8Array(e.target.result);
21+
22+
// Using PDF.js to read the PDF
23+
pdfjsLib.getDocument(pdfData).promise.then(function(pdf) {
24+
let textContent = '';
25+
26+
const numPages = pdf.numPages;
27+
let pagePromises = [];
28+
29+
for (let pageNum = 1; pageNum <= numPages; pageNum++) {
30+
pagePromises.push(pdf.getPage(pageNum).then(function(page) {
31+
return page.getTextContent().then(function(text) {
32+
textContent += text.items.map(item => item.str).join(" XXX ") + " New page \n";
33+
});
34+
}));
35+
}
36+
37+
// After all pages are processed, output the text
38+
Promise.all(pagePromises).then(function() {
39+
document.getElementById('output').textContent = textContent;
40+
});
41+
}).catch(function(error) {
42+
document.getElementById('output').textContent = 'Error reading PDF: ' + error;
43+
});
44+
};
45+
reader.readAsArrayBuffer(file);
46+
} else {
47+
alert('Please upload a valid PDF file.');
48+
}
49+
});
50+
</script>
51+
</body>
52+
</html>
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-16">
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
6+
<title>Transcript Scraper</title>
7+
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.min.js"></script>
8+
<script src="transcript-scraper.js?1" defer></script>
9+
</head>
10+
<body>
11+
<h1>Upload PDF to Extract Text</h1>
12+
<h3>Takes in National Official Transcript of Records, and prints out all the course codes that person has completed in KTH. </h3>
13+
14+
<input type="file" id="PDF-Scraper-Input" />
15+
16+
<pre id="transcript-scraper.js:output"></pre>
17+
<pre id="PDF-Scraper-Error", style = "visibility: hidden; color: red" ></pre>
18+
19+
20+
</body>
21+
</html>

0 commit comments

Comments
 (0)