Skip to content

Commit 249fee8

Browse files
authored
Merge branch 'main' into prereq-tree
2 parents dc1d233 + 3db95e0 commit 249fee8

9 files changed

Lines changed: 233 additions & 259 deletions

File tree

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
import React from 'react';
2+
import { observer } from "mobx-react-lite";
3+
import * as pdfjsLib from "pdfjs-dist";
4+
import pdfWorker from "pdfjs-dist/build/pdf.worker?url";
5+
import { useState } from "react";
6+
import UploadField from '../views/Components/SideBarComponents/UploadField';
7+
8+
pdfjsLib.GlobalWorkerOptions.workerSrc = pdfWorker;
9+
10+
const UploadTranscriptPresenter = observer(({ model }) => {
11+
const [errorMessage, setErrorMessage] = useState(""); // Stores error message
12+
const [errorVisibility, setErrorVisibility] = useState("hidden"); // Controls visibility
13+
const [fileInputValue, setFileInputValue] = useState(""); // Controls upload field state
14+
15+
async function transcriptScraperFunction(file) {
16+
console.log(file);
17+
//const pdfjsLib = window['pdfjsLib'];
18+
//pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/2.16.105/pdf.worker.min.js';
19+
if (!file) {
20+
console.log("element: 'PDF-Scraper-Input' changed, but we havent gotten a file yet.");
21+
return;
22+
}
23+
if (file.type !== "application/pdf") {
24+
throwTranscriptScraperError("Uploaded file isn't PDF.");
25+
return;
26+
}
27+
28+
setErrorVisibility("hidden");
29+
30+
31+
const arrayBuffer = await file.arrayBuffer();
32+
const typedArray = new Uint8Array(arrayBuffer);
33+
try {
34+
const pdf = await pdfjsLib.getDocument({ data: typedArray }).promise;
35+
let extractedText = '';
36+
37+
//this is our array we are going to work with
38+
let textObjects = [];
39+
40+
41+
//we will parse the whole pdf page-by-page, and going to push all the content into our array
42+
for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) {
43+
const page = await pdf.getPage(pageNum);
44+
const textContent = await page.getTextContent();
45+
//pushing all the text items from the page into our array
46+
textObjects.push(...textContent.items);
47+
}
48+
49+
50+
evaluatePDFtextObjectArray(textObjects);
51+
52+
53+
54+
//document.getElementById('transcript-scraper.js:output').textContent = localStorage.getItem("completedCourses") || 'No matching text found.';
55+
}
56+
catch (e) {
57+
throwTranscriptScraperError("While parsing the pdf something went wrong." + e);
58+
}
59+
}
60+
61+
function throwTranscriptScraperError(txt) {
62+
console.log("PDF-Scraper-Error: " + txt);
63+
setErrorMessage("Error: " + txt);
64+
setErrorVisibility("visible");
65+
}
66+
67+
function writeLocalStorage_completedCourses(codesArr) {
68+
//Getting the local storage contents
69+
let local = [];
70+
if (localStorage.getItem("completedCourses"))
71+
local = JSON.parse(localStorage.getItem("completedCourses"));
72+
else {
73+
localStorage.setItem("completedCourses", '[]');
74+
}
75+
76+
local.sort();
77+
78+
let newcodes = local.concat(codesArr);
79+
newcodes = [... new Set(newcodes)];
80+
81+
82+
localStorage.setItem("completedCourses", JSON.stringify(newcodes));
83+
console.log(newcodes);
84+
85+
window.dispatchEvent(new Event("completedCourses changed"));
86+
}
87+
88+
function evaluatePDFtextObjectArray(textObjects) {
89+
let scrapedCodes = [];
90+
91+
//initializing couple flags.
92+
let flagKTH = false;
93+
let flagKTH_NeverSet = true;
94+
let flagTable = false;
95+
let flagTableDone = false;
96+
97+
let flagErrorRecords = false;
98+
99+
//we are going to go through each text object which is inside the pdf file.
100+
for (let i = 0; i < textObjects.length; i++) {
101+
//we are going to look for our university, KTH
102+
//current ladok generated National Official transcripts start at xposition 56.692
103+
if ((!flagKTH) && (textObjects[i].transform[4] === 56.692))
104+
if ((textObjects[i].str == "Kungliga Tekniska högskolan") || (textObjects[i].str == "KTH Royal Institute of Technology")) {
105+
flagKTH = true;
106+
flagKTH_NeverSet = false;
107+
continue;
108+
}
109+
110+
if ((!flagErrorRecords) && ((textObjects[i].str == "Resultatintyg") || (textObjects[i].str == "Official Transcript of Records"))) {
111+
flagErrorRecords = true;
112+
}
113+
114+
if (flagKTH) {
115+
//we have found KTH, the very next table containing records should be the one with completed courses
116+
//TODO: this might not be necessarily true, you might need to have a similar code to KTH checker, to check if its
117+
// 'completed courses'/'avslutade kurser'
118+
119+
120+
//the very first text in a table is always Code/Kod; we will start describing it; and we will detect when a new table starts
121+
//and check if its accidentally the same table which just got cut in half by a newline or an actually different table
122+
if ((textObjects[i].str === "Code") || (textObjects[i].str === "Kod")) {
123+
if (flagTable) flagTableDone = true; //we have already found one table and transcribed it
124+
125+
if (!flagTableDone) {
126+
flagTable = true;
127+
} else {
128+
if (textObjects[i - 2].transform[4] !== 497.66899718999997) {
129+
//the new table (that is the new found "Kod" / "Code" is not because unexpected page break, therefore we are done transcribing
130+
//KTH courses, these are either uncomplete courses, or courses from other universities
131+
flagTable = false;
132+
//console.log("----------------------------\nfinished table!");
133+
flagKTH = false;
134+
}
135+
}
136+
}
137+
//we are looking for text objects which are precisely at x coord 56.692; and also there exists such an element 12 ahead in the array
138+
//which is at coord 510.233; these are hardcoded values into the ladok pdf generator
139+
//for good measures we also make sure the text is not longer that 7 chars; the longest course ID found so far at KTH
140+
if ((textObjects[i].transform[4] === 56.692) && (textObjects[i + 12].transform[4] === 510.233) && (textObjects[i].str.length < 8))
141+
if (flagTable) {
142+
//console.log(textObjects[i].str, textObjects[i].transform[4]);
143+
//extractedText+= textObjects[i].str + "\n";
144+
scrapedCodes.push(textObjects[i].str);
145+
}
146+
147+
}
148+
149+
}
150+
151+
if (flagErrorRecords && (scrapedCodes.length == 0)) {
152+
throwTranscriptScraperError("Provided Official Transcript of Records instead of National Official transcript of records.");
153+
return;
154+
}
155+
156+
if (flagKTH_NeverSet) {
157+
throwTranscriptScraperError("Provided pdf doesn't contain KTH.");
158+
return;
159+
}
160+
//console.log(scrapedCodes);
161+
//console.log(localStorage.getItem("completedCourses"));
162+
if (scrapedCodes.length == 0) {
163+
throwTranscriptScraperError("Couldn't find any tables to transcribe.");
164+
return;
165+
}
166+
writeLocalStorage_completedCourses(scrapedCodes);
167+
//console.log(localStorage.getItem("completedCourses"));
168+
}
169+
170+
const handleFileChange = (event) => {
171+
const file = event.target.files[0];
172+
console.log("vliza");
173+
//document.getElementById('PDF-Scraper-Error').style.visibility = "visible";
174+
transcriptScraperFunction(file);
175+
//document.getElementById('PDF-Scraper-Input').value = '';
176+
setFileInputValue('');
177+
};
178+
179+
return (
180+
<UploadField
181+
errorMessage={errorMessage}
182+
errorVisibility={errorVisibility}
183+
handleFileChange={handleFileChange}
184+
fileInputValue = {fileInputValue}
185+
/>);
186+
});
187+
188+
export { UploadTranscriptPresenter };

my-app/src/scripts/transcript-scraper/transcript-scraper-utils.jsx

Lines changed: 0 additions & 75 deletions
This file was deleted.

my-app/src/views/Components/FavouriteDropdown.jsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ import React from 'react';
22

33
function FavouritesDropdown(props) {
44
return (
5-
<div className="absolute mt-2 w-48 bg-white border border-solid border-black rounded-lg z-50">
5+
<div className="absolute mt-2 w-48 bg-white border border-solid border-black rounded-lg z-50 overflow-y-auto">
66
{props.favouriteCourses.length > 0 ? (
77
props.favouriteCourses.map(course => (
88
<div

my-app/src/views/Components/SideBarComponents/CourseTranscriptList.jsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ export default function CourseTranscriptList() {
4747
</div>
4848

4949
{/* Container for multiple items per row */}
50-
<div className="grid grid-cols-3 w-full max-[1200px]:grid-cols-2 gap-1 sm:gap-2">
50+
<div className="grid grid-cols-3 w-full max-[1200px]:grid-cols-2 max-[700px]:grid-cols-1 gap-1 sm:gap-2">
5151
{items.map((item, index) => (
5252
<div
5353
key={index}

my-app/src/views/Components/SideBarComponents/DropDownField.jsx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ export default function DropDownField({options}) {
2424
<p className="text-sm opacity-50"> - filter description</p>
2525
</div>
2626
</div>
27-
<div className="relative inline-block text-left w-full">
27+
<div className="relative flex justify-center text-left w-full">
2828
{/* Dropdown Button */}
2929
<button
3030
onClick={toggleDropdown}
@@ -35,11 +35,11 @@ export default function DropDownField({options}) {
3535

3636
{/* Dropdown Menu */}
3737
{isOpen && (
38-
<div className="absolute mt-2 w-48 bg-[#aba8e0] border border-gray-200 rounded-lg shadow-lg z-10 ">
38+
<div className="absolute bottom-10 mt-2 w-48 bg-[#aba8e0] border border-gray-200 rounded-lg shadow-lg z-10 ">
3939
<ul className="">
4040
{items.map((item, index) => (
4141
<li key={index} className="flex items-center p-2 hover:bg-gray-500">
42-
<label class="flex-auto py-3 px-4 inline-flex gap-x-2 -mt-px -ms-px
42+
<label className="flex-auto py-3 px-4 inline-flex gap-x-2 -mt-px -ms-px
4343
first:rounded-t-md last:rounded-b-md sm:first:rounded-s-md sm:mt-0 sm:first:ms-0 s
4444
m:first:rounded-se-none sm:last:rounded-es-none sm:last:rounded-e-md text-sm font-medium
4545
focus:z-10 border border-gray-200 shadow-2xs cursor-pointer">
@@ -50,7 +50,7 @@ export default function DropDownField({options}) {
5050
onChange={() => handleCheckboxChange(item)}
5151
className="mr-2 sr-only peer"
5252
/>
53-
<div class="relative w-11 h-6 bg-gray-200 peer-focus:outline-none peer-focus:ring-4
53+
<div className="relative w-11 h-6 bg-gray-200 peer-focus:outline-none peer-focus:ring-4
5454
peer-focus:ring-blue-300 rounded-full peer
5555
peer-checked:after:translate-x-full rtl:peer-checked:after:-translate-x-full
5656
peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:start-[2px]

0 commit comments

Comments
 (0)