-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathretriever.py
50 lines (40 loc) · 1.57 KB
/
retriever.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import arxiv
import json
from datetime import datetime
# Define the categories
categories = [
"cs.AI", # Computer Science - Artificial Intelligence
"cs.LG", # Computer Science - Machine Learning
"math.GM", # Mathematics - General Mathematics
"physics.gen-ph", # Physics - General Physics
"econ.EM", # Economics - Econometrics
"q-bio.GN", # Quantitative Biology - Genomics
"stat.ME", # Statistics - Methodology
"astro-ph.GA", # Astrophysics - Galaxy Astrophysics
"cond-mat.mtrl-sci", # Condensed Matter - Materials Science
"nlin.AO" # Nonlinear Sciences - Adaptation and Self-Organizing
]
if __name__ == "__main__":
all_papers = []
today = datetime.now().strftime("%Y%m%d%H%M")
client = arxiv.Client()
for category in categories:
search = arxiv.Search(
query=f"cat:{category} AND submittedDate:[201801010600 TO {today}]",
max_results=100,
sort_by=arxiv.SortCriterion.Relevance
)
print(f"Category {category}...")
i = 0
for i, result in enumerate(client.results(search)):
paper_data = {
"title": result.title,
"abstract": result.summary,
"category": category
}
all_papers.append(paper_data)
print(f"Retrieved {i+1} papers")
# Write the papers data to a JSON file
with open("arxiv_papers.json", "w", encoding='utf-8') as f:
json.dump(all_papers, f, ensure_ascii=False)
print(f"Successfully saved {len(all_papers)} papers' data to arxiv_papers.json")