|
3 | 3 | Fetch arXiv articles that use a CC legal tool using the OAI-PMH API. |
4 | 4 | OAI-PMH: Open Archives Initiative Protocol for Metadata Havesting. |
5 | 5 | """ |
| 6 | + |
6 | 7 | # Standard library |
7 | 8 | import argparse |
8 | 9 | import csv |
|
61 | 62 | HEADER_COUNT = ["TOOL_IDENTIFIER", "COUNT"] |
62 | 63 | HEADER_YEAR = ["TOOL_IDENTIFIER", "YEAR", "COUNT"] |
63 | 64 | QUARTER = os.path.basename(PATHS["data_quarter"]) |
| 65 | +SUBSUMED_CATEGORIES = { |
| 66 | + # https://arxiv.org/archive/alg-geom |
| 67 | + # "The alg-geom archive has been subsumed into Algebraic Geometry |
| 68 | + # (math.AG)." |
| 69 | + "alg-geom": "math.AG", |
| 70 | + # https://arxiv.org/archive/chao-dyn |
| 71 | + # "The chao-dyn archive has been subsumed into Chaotic Dynamics (nlin.CD)." |
| 72 | + "chao-dyn": "nlin.CD", |
| 73 | + # https://arxiv.org/archive/dg-ga |
| 74 | + # "The dg-ga archive has been subsumed into Differential Geometry |
| 75 | + # (math.DG)." |
| 76 | + "dg-ga": "math.DG", |
| 77 | + # https://arxiv.org/archive/solv-int |
| 78 | + # "The solv-int archive has been subsumed into Exactly Solvable and |
| 79 | + # Integrable Systems (nlin.SI)." |
| 80 | + "solv-int": "nlin.SI", |
| 81 | + # https://arxiv.org/archive/q-alg |
| 82 | + # "The q-alg archive has been subsumed into Quantum Algebra (math.QA)." |
| 83 | + "q-alg": "math.QA", |
| 84 | +} |
64 | 85 |
|
65 | 86 |
|
66 | 87 | # parsing arguments function |
@@ -247,6 +268,8 @@ def extract_record_metadata(args, record): |
247 | 268 | categories_elem = record.find(".//{http://arxiv.org/OAI/arXiv/}categories") |
248 | 269 | if categories_elem is not None and categories_elem.text: |
249 | 270 | metadata["categories"] = categories_elem.text.strip().split() |
| 271 | + for index, code in enumerate(metadata["categories"]): |
| 272 | + metadata["categories"][index] = SUBSUMED_CATEGORIES.get(code, code) |
250 | 273 | else: |
251 | 274 | metadata["categories"] = False |
252 | 275 |
|
|
0 commit comments