forked from cesarbruschetta/doi_request-experiments
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconsult_doi_request_and_extract_data.py
More file actions
65 lines (55 loc) · 2.06 KB
/
consult_doi_request_and_extract_data.py
File metadata and controls
65 lines (55 loc) · 2.06 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
Script para consultar se os DOIS dados ainda estao apresentando erro 404
"""
import sys
import argparse
import pandas as pd
import pandas.io.sql as sqlio
import psycopg2
def main(sargs):
parser = argparse.ArgumentParser(description="")
parser.add_argument(
"--host", required=True, help="""Host to connect to Postgres DB"""
)
parser.add_argument(
"--port", required=True, help="""Port to connect to Postgres DB"""
)
parser.add_argument("--user", required=True, help="User to connect")
parser.add_argument("--password", required=True, help="Password to connect")
parser.add_argument("--database", help="Database to connect")
args = parser.parse_args(sargs)
df_doi_not_found = pd.read_csv("./df_doi_not_found.csv")
connection = psycopg2.connect(
host=args.host,
port=args.port,
user=args.user,
password=args.password,
database=args.database,
)
SQL_DOI_PROCESSED = """
SELECT
journal, pid, doi, submission_status, coalesce(feedback_status, 'semValor') as feedback_status, feedback_xml
FROM deposit
WHERE code IN ( {0} )""".format(
", ".join(
[
"'{0}_{1}'".format(c, p)
for c, p in zip(
df_doi_not_found.collection.to_list(),
df_doi_not_found.pid.to_list(),
)
]
)
)
df_doi_processed = sqlio.read_sql_query(SQL_DOI_PROCESSED, connection)
SQL_ALL_DOI_PROCESSED = """ SELECT pid FROM deposit WHERE prefix LIKE '10.15%' """
df_all_processed = sqlio.read_sql_query(SQL_ALL_DOI_PROCESSED, connection)
pids = df_all_processed.pid.to_list()
df_doi_not_processed = df_doi_not_found.query("pid not in @pids")
# Salvado arquivos para consultas futuras
print("Gerando arquivo df_doi_not_processed.csv")
df_doi_not_processed.to_csv("./df_doi_not_processed.csv")
print("Gerando arquivo df_doi_processed.csv")
df_doi_processed.to_csv("./df_doi_processed.csv")
if __name__ == "__main__":
main(sys.argv[1:])