Skip to content

Commit b49f18d

Browse files
committed
Change usage of config
1 parent ed2fbbf commit b49f18d

5 files changed

Lines changed: 148 additions & 143 deletions

File tree

ace/__init__.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,43 @@
1-
# emacs: -*- mode: python-mode; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
2-
# ex: set sts=4 ts=4 sw=4 et:
1+
# emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
2+
# ex: set sts=4 sw=4 et:
33
"""ACE -- Automated Coordinate Extraction.
44
"""
5-
__all__ = ["config", "ingest", "database", "datatable", "set_logging_level", "scrape", "sources", "tableparser", "tests", "__version__"]
5+
__all__ = [
6+
"config", "ingest", "database", "datatable", "set_logging_level",
7+
"scrape", "sources", "tableparser", "tests", "__version__"
8+
]
69

710
import logging
811
import sys
912
import os
1013

1114
from .version import __version__
1215

16+
1317
def set_logging_level(level=None):
1418
"""Set package-wide logging level
1519
16-
Args
17-
level : Logging level constant from logging module (warning, error, info, etc.)
20+
Args:
21+
level: Logging level constant from logging module
22+
(warning, error, info, etc.)
1823
"""
1924
if level is None:
2025
level = os.environ.get('ACE_LOGLEVEL', 'warn')
2126
logger.setLevel(getattr(logging, level.upper()))
2227
return logger.getEffectiveLevel()
2328

29+
2430
def _setup_logger(logger):
2531
# Basic logging setup
2632
console = logging.StreamHandler(sys.stdout)
27-
console.setFormatter(logging.Formatter("%(levelname)-6s %(module)-7s %(message)s"))
33+
formatter = logging.Formatter(
34+
"%(levelname)-6s %(module)-7s %(message)s"
35+
)
36+
console.setFormatter(formatter)
2837
logger.addHandler(console)
2938
set_logging_level()
3039

40+
3141
# Set up logger
3242
logger = logging.getLogger("ace")
3343
_setup_logger(logger)

ace/database.py

Lines changed: 82 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
# Database stuff and models
22

3-
from sqlalchemy import (TypeDecorator, Table, Column, Integer, Float, String, Boolean,
4-
ForeignKey, DateTime, Text)
5-
from sqlalchemy.orm import relationship, backref, sessionmaker
3+
from sqlalchemy import (
4+
TypeDecorator, Table, Column, Integer, Float, String, Boolean,
5+
ForeignKey, DateTime, Text
6+
)
7+
from sqlalchemy.orm import relationship, sessionmaker
68
from sqlalchemy import create_engine
79
from sqlalchemy.ext.declarative import declarative_base
810
from sqlalchemy.ext.associationproxy import association_proxy
@@ -13,9 +15,8 @@
1315
import logging
1416
import sys
1517
from os import path
16-
import datetime
1718

18-
from . import config
19+
from .config import get_config
1920
from . import extract
2021

2122
logger = logging.getLogger(__name__)
@@ -32,22 +33,32 @@ def __init__(self, adapter=None, db_name=None, user=None, password=None):
3233
''' Connect to DB and initialize instance. '''
3334

3435
# Default to settings in config file if none passed
35-
if adapter is None: adapter = config.SQL_ADAPTER
36+
if adapter is None:
37+
adapter = get_config('SQL_ADAPTER')
3638

3739
# Generate DB URI
3840
if adapter == 'sqlite':
39-
db_uri = config.SQLITE_URI if db_name is None else db_name
41+
db_uri = get_config('SQLITE_URI') if db_name is None else db_name
4042
elif adapter == 'mysql':
41-
db_name = config.MYSQL_DB if db_name is None else db_name
42-
if user is None: user = config.MYSQL_USER
43-
if password is None: password = config.MYSQL_PASSWORD
44-
db_uri = 'mysql://%s:%s@localhost/%s' % (user, password, db_name)
43+
db_name = get_config('MYSQL_DB') if db_name is None else db_name
44+
if user is None:
45+
user = get_config('MYSQL_USER')
46+
if password is None:
47+
password = get_config('MYSQL_PASSWORD')
48+
db_uri = f'mysql://{user}:{password}@localhost/{db_name}'
4549
else:
46-
raise ValueError("Value of SQL_ADAPTER in settings must be either 'sqlite' or 'mysql'")
50+
raise ValueError(
51+
"SQL_ADAPTER must be either 'sqlite' or 'mysql'"
52+
)
4753

48-
engine = create_engine(db_uri, echo=False, connect_args={'timeout': 15})
54+
engine = create_engine(
55+
db_uri,
56+
echo=False,
57+
connect_args={'timeout': 15}
58+
)
4959

50-
if adapter == 'mysql': engine.execute("SET sql_mode=''")
60+
if adapter == 'mysql':
61+
engine.execute("SET sql_mode=''")
5162

5263
Session = sessionmaker(bind=engine)
5364
Base.metadata.create_all(engine)
@@ -60,8 +71,6 @@ def add(self, record):
6071
def save(self):
6172
''' Commit all stored records to file. '''
6273
self.session.commit()
63-
# except Exception as err:
64-
# print(err)
6574

6675
def delete_article(self, pmid):
6776
article = self.session.query(Article).filter_by(id=pmid).first()
@@ -71,23 +80,37 @@ def delete_article(self, pmid):
7180
def print_stats(self):
7281
''' Summarize the current state of the DB. '''
7382
n_articles = self.session.query(Article).count()
74-
n_articles_with_coordinates = self.session.query(Article).join(Table).filter(Table.n_activations>0).distinct('article_id').count()
83+
n_articles_with_coordinates = self.session.query(Article) \
84+
.join(Table) \
85+
.filter(Table.n_activations > 0) \
86+
.distinct('article_id') \
87+
.count()
7588
n_tables = self.session.query(Table).count()
7689
n_activations = self.session.query(Activation).count()
7790
n_links = self.session.query(NeurovaultLink).count()
78-
n_articles_with_links = self.session.query(NeurovaultLink).distinct('article_id').count()
79-
print(f"The database currently contains: {n_articles} articles.\n"
80-
f"{n_articles_with_coordinates} have coordinates, and {n_articles_with_links} have NeuroVault links.\n"
81-
f"Total of {n_tables} tables, {n_activations} activations and {n_links} NeuroVault links.")
91+
n_articles_with_links = self.session.query(NeurovaultLink) \
92+
.distinct('article_id') \
93+
.count()
94+
95+
print(
96+
f"The database currently contains: {n_articles} articles.\n"
97+
f"{n_articles_with_coordinates} have coordinates, and "
98+
f"{n_articles_with_links} have NeuroVault links.\n"
99+
f"Total of {n_tables} tables, {n_activations} activations "
100+
f"and {n_links} NeuroVault links."
101+
)
82102

83103
def article_exists(self, pmid):
84104
''' Check if an article already exists in the database. '''
85-
return self.session.query(exists().where(Article.id==pmid)).scalar()
105+
return self.session.query(
106+
exists().where(Article.id == pmid)
107+
).scalar()
86108

87109
@property
88110
def articles(self):
89111
return self.session.query(Article).all()
90112

113+
91114
# Create a JSONString column type for convenience
92115
class JsonString(TypeDecorator):
93116
impl = Text
@@ -121,16 +144,25 @@ class Article(Base):
121144
abstract = Column(Text)
122145
citation = Column(Text)
123146
pubmed_metadata = Column(JsonString)
124-
created_at = Column(DateTime, default=datetime.datetime.utcnow)
125-
updated_at = Column(DateTime, default=datetime.datetime.utcnow,
126-
onupdate=datetime.datetime.utcnow)
127-
128-
tables = relationship('Table', cascade="all, delete-orphan",
129-
backref='article')
130-
131-
neurovault_links = relationship('NeurovaultLink', cascade="all, delete-orphan",
132-
backref='article')
133-
147+
created_at = Column(DateTime, default=datetime.utcnow)
148+
updated_at = Column(
149+
DateTime,
150+
default=datetime.utcnow,
151+
onupdate=datetime.utcnow
152+
)
153+
154+
tables = relationship(
155+
'Table',
156+
cascade="all, delete-orphan",
157+
backref='article'
158+
)
159+
160+
neurovault_links = relationship(
161+
'NeurovaultLink',
162+
cascade="all, delete-orphan",
163+
backref='article'
164+
)
165+
134166
features = association_proxy('tags', 'feature')
135167

136168
def __init__(self, text, pmid=None, doi=None, metadata=None):
@@ -160,8 +192,11 @@ class Table(Base):
160192

161193
id = Column(Integer, primary_key=True)
162194
article_id = Column(Integer, ForeignKey('articles.id'))
163-
activations = relationship('Activation', cascade="all, delete-orphan",
164-
backref='table')
195+
activations = relationship(
196+
'Activation',
197+
cascade="all, delete-orphan",
198+
backref='table'
199+
)
165200
position = Column(Integer) # The serial position of occurrence
166201
number = Column(String(10)) # The stated table ID (e.g., 1, 2b)
167202
label = Column(String(200)) # The full label (e.g., Table 1, Table 2b)
@@ -172,19 +207,7 @@ class Table(Base):
172207
input_html = Column(LongText)
173208

174209
def finalize(self):
175-
''' Any cleanup and updating operations we need to do before saving. '''
176-
177-
# # Remove duplicate activations--most commonly produced by problems with
178-
# # the grouping code.
179-
# act_defs = set()
180-
# to_keep = []
181-
# for a in self.activations:
182-
# definition = json.dumps([a.x, a.y, a.z, a.groups])
183-
# if definition not in act_defs:
184-
# act_defs.add(definition)
185-
# to_keep.append(a)
186-
# self.activations = to_keep
187-
210+
''' Any cleanup before saving. '''
188211
self.n_activations = len(self.activations)
189212

190213

@@ -234,35 +257,35 @@ def add_col(self, key, val):
234257
# Validates Peak. Considers peak invalid if:
235258
# * At least one of X, Y, Z is nil or missing
236259
# * Any |coordinate| > 100
237-
# * Two or more columns are zeroes (most of the time this
238-
# will indicate a problem, but occasionally a real coordinate)
239-
# Depending on config, either excludes peak, or allows it through
240-
# but flags potential problems for later inspection.
260+
# * Two or more columns are zeroes
241261
def validate(self):
242-
243262
for c in [self.x, self.y, self.z]:
244263
if c == '' or c is None:
245-
logger.debug('Missing x, y, or z coordinate information: [%s, %s, %s]' % tuple(
246-
[str(e) for e in [self.x, self.y, self.z]]))
264+
logger.debug(
265+
'Missing x, y, or z coordinate: [%s, %s, %s]',
266+
self.x, self.y, self.z
267+
)
247268
return False
248269
try:
249270
if abs(c) >= 100:
250271
logger.debug(
251-
'Invalid coordinates: at least one dimension (x,y,z) >= 100.')
272+
'Invalid coordinates: dimension >= 100.'
273+
)
252274
return False
253-
except:
254-
print(c)
255-
print(sys.exc_info()[0])
275+
except Exception:
276+
logger.exception("Error validating coordinate")
256277
raise
257278

258279
sorted_xyz = sorted([abs(self.x), abs(self.y), abs(self.z)])
259280
if sorted_xyz[0] == 0 and sorted_xyz[1] == 0:
260281
logger.debug(
261-
"At least two dimensions have value == 0; coordinate is probably not real.")
282+
"Two dimensions have value 0; probably not real."
283+
)
262284
return False
263285

264286
return True
265287

288+
266289
class NeurovaultLink(Base):
267290

268291
__tablename__ = 'Neurovaultlinks'

ace/export.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,10 +107,10 @@ def export_database(db, foldername, skip_empty=True, table_html=False):
107107

108108
for art in articles:
109109
art_dir = tables_dir / str(art.id)
110-
art_dir.mkdir(parents=True, exist_ok=True)
111110

112111
for t in art.tables:
113112
if t.input_html:
113+
art_dir.mkdir(parents=True, exist_ok=True)
114114
table_file = art_dir / f"{t.id}.html"
115115
with table_file.open('w', encoding='utf-8') as f:
116116
f.write(t.input_html)

0 commit comments

Comments
 (0)