diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1c82177..19f8250 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -14,9 +14,14 @@ jobs: push: false - name: Start a docker compose run: docker-compose up -d + - name: Update fonduer to the latest commit + run: | + docker-compose exec -T -u 0 jupyter apt-get update + docker-compose exec -T -u 0 jupyter apt-get install -y git + docker-compose exec -T jupyter pip install --upgrade git+https://github.com/HazyResearch/fonduer.git - name: Run a command inside a container run: | - cat << EOF | docker exec -i fonduer-tutorials_jupyter_1 /bin/bash - + cat << EOF | docker-compose exec -T jupyter /bin/bash - # Convert ipynb to py find . -name "*.ipynb" -not -path '*/\.*' | xargs jupyter nbconvert --to script sed -i -e "s/get_ipython().run_line_magic('matplotlib', 'inline')/import matplotlib\nmatplotlib.use('Agg')/" */*.py diff --git a/docker-compose.yml b/docker-compose.yml index bac552f..837c501 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,4 @@ -version: "3" +version: "3.6" services: jupyter: diff --git a/hardware/max_storage_temp_tutorial.ipynb b/hardware/max_storage_temp_tutorial.ipynb index c9044b0..69ff9d5 100644 --- a/hardware/max_storage_temp_tutorial.ipynb +++ b/hardware/max_storage_temp_tutorial.ipynb @@ -139,7 +139,8 @@ "metadata": {}, "outputs": [], "source": [ - "corpus_parser = Parser(session, structural=True, lingual=True, visual=True, pdf_path=pdf_path)\n", + "from fonduer.parser.visual_parser import PdfVisualParser\n", + "corpus_parser = Parser(session, structural=True, lingual=True, visual_parser=PdfVisualParser(pdf_path))\n", "%time corpus_parser.apply(doc_preprocessor, parallelism=PARALLEL)" ] }, diff --git a/hardware_image/transistor_image_tutorial.ipynb b/hardware_image/transistor_image_tutorial.ipynb index 6548f11..81f68cc 100644 --- a/hardware_image/transistor_image_tutorial.ipynb +++ b/hardware_image/transistor_image_tutorial.ipynb @@ -122,8 +122,9 @@ "metadata": {}, "outputs": [], "source": [ + "from fonduer.parser.visual_parser import PdfVisualParser\n", "corpus_parser = Parser(\n", - " session, structural=True, lingual=True, visual=True, pdf_path=pdf_path, flatten=[]\n", + " session, structural=True, lingual=True, visual_parser=PdfVisualParser(pdf_path), flatten=[]\n", ")\n", "corpus_parser.apply(doc_preprocessor, parallelism=PARALLEL)" ]