Skip to content

Commit e491091

Browse files
author
jravenel
committed
feat: Add ArXiv agent integration
1 parent a865e02 commit e491091

File tree

8 files changed

+1665
-1547
lines changed

8 files changed

+1665
-1547
lines changed

Makefile

+3
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,9 @@ chat-glassdoor-agent: .venv
200200
chat-powerpoint-agent: .venv
201201
@ docker compose run abi bash -c 'poetry install && poetry run chat-powerpoint-agent'
202202

203+
chat-arxiv-agent: .venv
204+
@ docker compose run abi bash -c 'poetry install && poetry run chat-arxiv-agent'
205+
203206
.DEFAULT_GOAL := chat-supervisor-agent
204207

205208
.PHONY: test chat-supervisor-agent chat-support-agent chat-content-agent chat-finance-agent chat-growth-agent chat-opendata-agent chat-operations-agent chat-sales-agent api sh lock add abi-add

poetry.lock

+1,281-1,547
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pyproject.toml

+3
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ wordcloud = "^1.9.4"
4343
imapclient = "^3.0.1"
4444
pynacl = "^1.5.0"
4545
python-pptx = "^1.0.2"
46+
arxiv = "^2.1.3"
4647

4748
[tool.poetry.group.terminal-agent.dependencies]
4849
langchain-anthropic = "^0.3.0"
@@ -113,6 +114,8 @@ chat-matplotlib-agent = "src.apps.terminal_agent.main:run_matplotlib_agent"
113114
abi = "src.cli:main"
114115
api = "src.api:api"
115116

117+
chat-arxiv-agent = "src.apps.terminal_agent.main:run_arxiv_agent"
118+
116119
[build-system]
117120
requires = ["poetry-core"]
118121
build-backend = "poetry.core.masonry.api"

src/apps/terminal_agent/main.py

+7
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
# Expert analytics assistants
5454
from src.assistants.expert.analytics.PlotlyAssistant import create_plotly_agent
5555
from src.assistants.expert.analytics.MatplotlibAssistant import create_matplotlib_agent
56+
from src.assistants.expert.domain.ArXivAssistant import create_arxiv_agent
5657

5758
def on_tool_response(message: str):
5859
try:
@@ -381,3 +382,9 @@ def run_powerpoint_agent():
381382
agent.on_tool_usage(lambda message: print_tool_usage(message.tool_calls[0]['name']))
382383
agent.on_tool_response(on_tool_response)
383384
run_agent(agent)
385+
386+
def run_arxiv_agent():
387+
agent = create_arxiv_agent()
388+
agent.on_tool_usage(lambda message: print_tool_usage(message.tool_calls[0]['name']))
389+
agent.on_tool_response(on_tool_response)
390+
run_agent(agent)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
from langchain_openai import ChatOpenAI
2+
from abi.services.agent.Agent import Agent, AgentConfiguration, AgentSharedState, MemorySaver
3+
from src import secret, config
4+
from src.integrations.ArXivIntegration import ArXivIntegration, ArXivIntegrationConfiguration
5+
from src.pipelines.arxiv.ArXivPaperPipeline import ArXivPaperPipeline, ArXivPaperPipelineConfiguration
6+
from abi.services.ontology_store.adaptors.secondary.OntologyStoreService__SecondaryAdaptor__Filesystem import OntologyStoreService__SecondaryAdaptor__Filesystem
7+
from abi.services.ontology_store.OntologyStoreService import OntologyStoreService
8+
9+
NAME = "ArXiv Assistant"
10+
DESCRIPTION = "Search and analyze research papers from ArXiv"
11+
SYSTEM_PROMPT = """You are an ArXiv research assistant. You can help users search for papers, get paper details, and analyze research trends.
12+
You have access to the following tools:
13+
- search_arxiv_papers: Search for papers on ArXiv
14+
- get_arxiv_paper: Get metadata for a specific paper
15+
- arxiv_paper_pipeline: Add papers to the knowledge graph
16+
17+
When users ask about papers, first search for relevant papers using search_arxiv_papers. Then you can get detailed information about specific papers using get_arxiv_paper.
18+
Use arxiv_paper_pipeline to add important papers to the knowledge graph for future reference."""
19+
20+
class ArXivAssistant(Agent):
21+
"""Assistant for interacting with ArXiv papers."""
22+
pass
23+
24+
def create_arxiv_agent(
25+
agent_shared_state: AgentSharedState = None,
26+
agent_configuration: AgentConfiguration = None
27+
) -> Agent:
28+
"""Creates an ArXiv assistant agent.
29+
30+
Args:
31+
agent_shared_state (AgentSharedState, optional): Shared state for the agent
32+
agent_configuration (AgentConfiguration, optional): Configuration for the agent
33+
34+
Returns:
35+
Agent: The configured ArXiv assistant agent
36+
"""
37+
# Initialize model
38+
model = ChatOpenAI(
39+
model="gpt-4",
40+
temperature=0,
41+
api_key=secret.get('OPENAI_API_KEY')
42+
)
43+
44+
# Initialize tools
45+
tools = []
46+
47+
# Initialize ontology store
48+
ontology_store = OntologyStoreService(
49+
OntologyStoreService__SecondaryAdaptor__Filesystem(
50+
store_path=config.ontology_store_path
51+
)
52+
)
53+
54+
# Add ArXiv integration and pipeline tools
55+
arxiv_integration_config = ArXivIntegrationConfiguration()
56+
tools += ArXivIntegration.as_tools(arxiv_integration_config)
57+
58+
arxiv_pipeline = ArXivPaperPipeline(
59+
ArXivPaperPipelineConfiguration(
60+
arxiv_integration_config=arxiv_integration_config,
61+
ontology_store=ontology_store
62+
)
63+
)
64+
tools += arxiv_pipeline.as_tools()
65+
66+
# Use provided configuration or create default
67+
if agent_configuration is None:
68+
agent_configuration = AgentConfiguration(
69+
system_prompt=SYSTEM_PROMPT
70+
)
71+
72+
# Use provided shared state or create new
73+
if agent_shared_state is None:
74+
agent_shared_state = AgentSharedState()
75+
76+
return ArXivAssistant(
77+
name="arxiv_assistant",
78+
description=DESCRIPTION,
79+
chat_model=model,
80+
tools=tools,
81+
state=agent_shared_state,
82+
configuration=agent_configuration,
83+
memory=MemorySaver()
84+
)

src/integrations/ArXivIntegration.py

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
from dataclasses import dataclass
2+
from typing import List, Optional
3+
import arxiv
4+
from langchain_core.tools import StructuredTool
5+
from pydantic import BaseModel, Field
6+
from abi.integration import Integration
7+
8+
@dataclass
9+
class ArXivIntegrationConfiguration:
10+
"""Configuration for ArXiv integration."""
11+
max_results: int = 10
12+
13+
class ArXivIntegration(Integration):
14+
"""Integration with the ArXiv API."""
15+
16+
def __init__(self, configuration: ArXivIntegrationConfiguration):
17+
self.__configuration = configuration
18+
self.__client = arxiv.Client()
19+
20+
def search_papers(self, query: str, max_results: Optional[int] = None) -> List[dict]:
21+
"""Search for papers on ArXiv.
22+
23+
Args:
24+
query: Search query string
25+
max_results: Maximum number of results to return
26+
27+
Returns:
28+
List of paper metadata dictionaries
29+
"""
30+
search = arxiv.Search(
31+
query=query,
32+
max_results=max_results or self.__configuration.max_results
33+
)
34+
35+
results = []
36+
for paper in self.__client.results(search):
37+
results.append({
38+
"id": paper.entry_id.split("/")[-1],
39+
"title": paper.title,
40+
"authors": [str(author) for author in paper.authors],
41+
"summary": paper.summary,
42+
"published": paper.published,
43+
"updated": paper.updated,
44+
"categories": paper.categories,
45+
"links": [link.href for link in paper.links],
46+
"pdf_url": paper.pdf_url
47+
})
48+
return results
49+
50+
def get_paper(self, paper_id: str) -> dict:
51+
"""Get metadata for a specific paper.
52+
53+
Args:
54+
paper_id: ArXiv paper ID
55+
56+
Returns:
57+
Paper metadata dictionary
58+
"""
59+
search = arxiv.Search(id_list=[paper_id])
60+
paper = next(self.__client.results(search))
61+
return {
62+
"id": paper.entry_id.split("/")[-1],
63+
"title": paper.title,
64+
"authors": [str(author) for author in paper.authors],
65+
"summary": paper.summary,
66+
"published": paper.published,
67+
"updated": paper.updated,
68+
"categories": paper.categories,
69+
"links": [link.href for link in paper.links],
70+
"pdf_url": paper.pdf_url
71+
}
72+
73+
@staticmethod
74+
def as_tools(configuration: ArXivIntegrationConfiguration) -> List[StructuredTool]:
75+
"""Get tools for the ArXiv integration.
76+
77+
Args:
78+
configuration: ArXiv integration configuration
79+
80+
Returns:
81+
List of tools
82+
"""
83+
integration = ArXivIntegration(configuration)
84+
85+
class SearchPapersParameters(BaseModel):
86+
query: str = Field(..., description="Search query string")
87+
max_results: Optional[int] = Field(None, description="Maximum number of results to return")
88+
89+
class GetPaperParameters(BaseModel):
90+
paper_id: str = Field(..., description="ArXiv paper ID")
91+
92+
return [
93+
StructuredTool(
94+
name="search_arxiv_papers",
95+
description="Search for papers on ArXiv",
96+
func=lambda **kwargs: integration.search_papers(**kwargs),
97+
args_schema=SearchPapersParameters
98+
),
99+
StructuredTool(
100+
name="get_arxiv_paper",
101+
description="Get metadata for a specific ArXiv paper",
102+
func=lambda **kwargs: integration.get_paper(**kwargs),
103+
args_schema=GetPaperParameters
104+
)
105+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
@prefix owl: <http://www.w3.org/2002/07/owl#> .
2+
@prefix dc11: <http://purl.org/dc/elements/1.1/> .
3+
@prefix dc: <http://purl.org/dc/terms/> .
4+
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
5+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
6+
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
7+
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
8+
@prefix bfo: <http://purl.obolibrary.org/obo/> .
9+
@prefix cco: <https://www.commoncoreontologies.org/> .
10+
@prefix abi: <http://ontology.naas.ai/abi/> .
11+
12+
<http://ontology.naas.ai/abi/ArXivOntology> rdf:type owl:Ontology ;
13+
owl:imports <https://www.commoncoreontologies.org/AgentOntology> ;
14+
owl:versionIRI <https://github.com/jupyter-naas/abi/tree/cli/src/ontologies/application-level/ArXivOntology.ttl> ;
15+
dc11:contributor "AI Assistant" ;
16+
dc:description "Application ontology for ArXiv research papers and authors."@en ;
17+
dc:title "ArXiv Ontology" .
18+
19+
#################################################################
20+
# Classes
21+
#################################################################
22+
23+
abi:ArXivPaper rdf:type owl:Class ;
24+
rdfs:subClassOf abi:Document ;
25+
rdfs:label "ArXiv Paper"@en ;
26+
skos:definition "A research paper published on the ArXiv platform."@en ;
27+
skos:example "A physics preprint submitted to arXiv.org"@en .
28+
29+
abi:ArXivAuthor rdf:type owl:Class ;
30+
rdfs:subClassOf abi:User ;
31+
rdfs:label "ArXiv Author"@en ;
32+
skos:definition "An author who has published papers on ArXiv."@en ;
33+
skos:example "A researcher who submits papers to arXiv.org"@en .
34+
35+
abi:ArXivCategory rdf:type owl:Class ;
36+
rdfs:subClassOf abi:Category ;
37+
rdfs:label "ArXiv Category"@en ;
38+
skos:definition "A subject category used to classify papers on ArXiv."@en ;
39+
skos:example "Physics, Mathematics, Computer Science"@en .
40+
41+
#################################################################
42+
# Object Properties
43+
#################################################################
44+
45+
abi:hasAuthor rdf:type owl:ObjectProperty ;
46+
rdfs:domain abi:ArXivPaper ;
47+
rdfs:range abi:ArXivAuthor ;
48+
rdfs:label "has author"@en .
49+
50+
abi:hasCategory rdf:type owl:ObjectProperty ;
51+
rdfs:domain abi:ArXivPaper ;
52+
rdfs:range abi:ArXivCategory ;
53+
rdfs:label "has category"@en .
54+
55+
#################################################################
56+
# Data Properties
57+
#################################################################
58+
59+
abi:arxivId rdf:type owl:DatatypeProperty ;
60+
rdfs:domain abi:ArXivPaper ;
61+
rdfs:range xsd:string ;
62+
rdfs:label "arxiv id"@en .
63+
64+
abi:abstract rdf:type owl:DatatypeProperty ;
65+
rdfs:domain abi:ArXivPaper ;
66+
rdfs:range xsd:string ;
67+
rdfs:label "abstract"@en .
68+
69+
abi:publishedDate rdf:type owl:DatatypeProperty ;
70+
rdfs:domain abi:ArXivPaper ;
71+
rdfs:range xsd:dateTime ;
72+
rdfs:label "published date"@en .

0 commit comments

Comments
 (0)