Skip to content

Commit 13af717

Browse files
committed
Output HTML tables
1 parent c06de67 commit 13af717

3 files changed

Lines changed: 20 additions & 6 deletions

File tree

ace/export.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,4 +98,18 @@ def export_database(db, foldername, skip_empty=True):
9898
}
9999

100100
with (foldername / 'export.json').open('w') as f:
101-
json.dump(export_md, f)
101+
json.dump(export_md, f)
102+
103+
# Save table HTML files if available
104+
tables_dir = foldername / 'tables'
105+
tables_dir.mkdir(parents=True, exist_ok=True)
106+
107+
for art in articles:
108+
art_dir = tables_dir / str(art.id)
109+
art_dir.mkdir(parents=True, exist_ok=True)
110+
111+
for t in art.tables:
112+
if t.input_html:
113+
table_file = art_dir / f"{t.id}.html"
114+
with table_file.open('w', encoding='utf-8') as f:
115+
f.write(t.input_html)

ace/ingest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,13 @@ def _parse_article(args):
4949
# Fallback to original source identification
5050
source = manager.identify_source(html)
5151
if source is None:
52-
logger.warning("Could not identify source for %s" % f)
52+
logger.info("Could not identify source for %s" % f)
5353
return f, None
5454

5555
article = source.parse_article(html, pmid, metadata_dir=metadata_dir, **kwargs)
5656
return f, article
5757
except Exception as e:
58-
logger.warning("Error parsing article %s: %s" % (f, str(e)))
58+
logger.info("Error parsing article %s: %s" % (f, str(e)))
5959
return f, None
6060

6161

ace/sources.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1801,18 +1801,18 @@ def _extract_tables_from_javascript(self, soup):
18011801
# Break after finding and successfully parsing tables
18021802
break
18031803
else:
1804-
logger.warning("No tables found in JavaScript data after parsing")
1804+
logger.info("No tables found in JavaScript data after parsing")
18051805
else:
18061806
logger.debug("Could not find tfviewerdata assignment")
18071807

18081808
except Exception as e:
1809-
logger.warning(f"Error extracting tables from JavaScript: {e}")
1809+
logger.info(f"Error extracting tables from JavaScript: {e}")
18101810
import traceback
18111811
logger.debug(traceback.format_exc())
18121812
continue
18131813

18141814
if not tables:
1815-
logger.warning("No tables could be extracted from JavaScript data")
1815+
logger.info("No tables could be extracted from JavaScript data")
18161816

18171817
return tables
18181818

0 commit comments

Comments
 (0)