jupyter-naas
diff --git a/‎Makefile
+1-1 b/‎Makefile
+1-1
diff --git a/‎src/custom/arxiv-agent/assistants/ArXivAssistant.py ‎src/custom/modules/arxiv_agent/assistants/ArXivAssistant.py
+25-7 b/‎src/custom/arxiv-agent/assistants/ArXivAssistant.py ‎src/custom/modules/arxiv_agent/assistants/ArXivAssistant.py
+25-7
diff --git a/‎src/custom/arxiv-agent/integrations/ArXivIntegration.py ‎src/custom/modules/arxiv_agent/integrations/ArXivIntegration.py b/‎src/custom/arxiv-agent/integrations/ArXivIntegration.py ‎src/custom/modules/arxiv_agent/integrations/ArXivIntegration.py
diff --git a/‎src/custom/arxiv-agent/ontologies/ArXivOntology.ttl ‎src/custom/modules/arxiv_agent/ontologies/ArXivOntology.ttl b/‎src/custom/arxiv-agent/ontologies/ArXivOntology.ttl ‎src/custom/modules/arxiv_agent/ontologies/ArXivOntology.ttl
diff --git a/‎src/custom/arxiv-agent/pipelines/ArXivPaperPipeline.py ‎src/custom/modules/arxiv_agent/pipelines/ArXivPaperPipeline.py
+56-17 b/‎src/custom/arxiv-agent/pipelines/ArXivPaperPipeline.py ‎src/custom/modules/arxiv_agent/pipelines/ArXivPaperPipeline.py
+56-17
@@ -105,7 +105,7 @@ chat-powerpoint-agent: .venv
 	@ docker compose run abi bash -c 'poetry install && poetry run python -m src.core.apps.terminal_agent.main generic_run_agent PowerPointAssistant'
 
 chat-arxiv-agent: .venv
-	@ docker compose run abi bash -c 'poetry install && poetry run chat-arxiv-agent'
+	@ docker compose run abi bash -c 'poetry install && poetry run python -m src.core.apps.terminal_agent.main generic_run_agent ArXivAssistant'
 
 .DEFAULT_GOAL := chat-supervisor-agent
 
 
@@ -1,27 +1,35 @@
 from langchain_openai import ChatOpenAI
 from abi.services.agent.Agent import Agent, AgentConfiguration, AgentSharedState, MemorySaver
 from src import secret, config
-from src.integrations.ArXivIntegration import ArXivIntegration, ArXivIntegrationConfiguration
-from src.pipelines.arxiv.ArXivPaperPipeline import ArXivPaperPipeline, ArXivPaperPipelineConfiguration
+from src.custom.modules.arxiv_agent.integrations.ArXivIntegration import ArXivIntegration, ArXivIntegrationConfiguration
+from src.custom.modules.arxiv_agent.pipelines.ArXivPaperPipeline import ArXivPaperPipeline, ArXivPaperPipelineConfiguration
 from abi.services.ontology_store.adaptors.secondary.OntologyStoreService__SecondaryAdaptor__Filesystem import OntologyStoreService__SecondaryAdaptor__Filesystem
 from abi.services.ontology_store.OntologyStoreService import OntologyStoreService
+from src.custom.modules.arxiv_agent.workflows.ArXivQueryWorkflow import ArXivQueryWorkflow, ArXivQueryWorkflowConfiguration
 
 NAME = "ArXiv Assistant"
+SLUG = "arxiv-assistant"
 DESCRIPTION = "Search and analyze research papers from ArXiv"
+AVATAR_URL = "https://upload.wikimedia.org/wikipedia/commons/thumb/a/a8/ArXiv_web.svg/1200px-ArXiv_web.svg.png"
 SYSTEM_PROMPT = """You are an ArXiv research assistant. You can help users search for papers, get paper details, and analyze research trends.
 You have access to the following tools:
 - search_arxiv_papers: Search for papers on ArXiv
 - get_arxiv_paper: Get metadata for a specific paper
-- arxiv_paper_pipeline: Add papers to the knowledge graph
+- arxiv_paper_pipeline: Add papers to the knowledge graph and download PDFs
+- query_arxiv_authors: Find the authors of a paper in the knowledge graph
+- query_arxiv_papers: Find papers by author or category in the knowledge graph
+- execute_arxiv_query: Run a custom SPARQL query on the knowledge graph
 
-When users ask about papers, first search for relevant papers using search_arxiv_papers. Then you can get detailed information about specific papers using get_arxiv_paper.
-Use arxiv_paper_pipeline to add important papers to the knowledge graph for future reference."""
+When users ask about papers, first search for relevant papers using search_arxiv_papers. 
+Then you can get detailed information about specific papers using get_arxiv_paper.
+Use arxiv_paper_pipeline to add important papers to the knowledge graph for future reference.
+Use the query tools to search for information in papers you've already added to the knowledge graph."""
 
 class ArXivAssistant(Agent):
     """Assistant for interacting with ArXiv papers."""
     pass
 
-def create_arxiv_agent(
+def create_agent(
     agent_shared_state: AgentSharedState = None,
     agent_configuration: AgentConfiguration = None
 ) -> Agent:
@@ -58,11 +66,21 @@ def create_arxiv_agent(
     arxiv_pipeline = ArXivPaperPipeline(
         ArXivPaperPipelineConfiguration(
             arxiv_integration_config=arxiv_integration_config,
-            ontology_store=ontology_store
+            ontology_store=ontology_store,
+            storage_base_path="storage/triplestore/application-level/arxiv",
+            pdf_storage_path="datastore/application-level/arxiv"
         )
     )
     tools += arxiv_pipeline.as_tools()
 
+    # Add ArXiv query workflow
+    arxiv_query_workflow = ArXivQueryWorkflow(
+        ArXivQueryWorkflowConfiguration(
+            storage_path="storage/triplestore/application-level/arxiv"
+        )
+    )
+    tools += arxiv_query_workflow.as_tools()
+
     # Use provided configuration or create default
     if agent_configuration is None:
         agent_configuration = AgentConfiguration(
 
@@ -1,5 +1,9 @@
 from dataclasses import dataclass
 from datetime import datetime
+import os
+import uuid
+import re
+import requests
 from rdflib import Graph
 from pydantic import BaseModel, Field
 from typing import Optional
@@ -8,19 +12,21 @@
 
 from abi.pipeline import Pipeline, PipelineConfiguration
 from abi.utils.Graph import ABIGraph, ABI, BFO
-from src.integrations.ArXivIntegration import ArXivIntegration, ArXivIntegrationConfiguration
+from src.custom.modules.arxiv_agent.integrations.ArXivIntegration import ArXivIntegration, ArXivIntegrationConfiguration
 from abi.services.ontology_store.OntologyStorePorts import IOntologyStoreService
 
 @dataclass
 class ArXivPaperPipelineConfiguration(PipelineConfiguration):
     """Configuration for ArXivPaperPipeline."""
     arxiv_integration_config: ArXivIntegrationConfiguration
     ontology_store: IOntologyStoreService
-    ontology_store_name: str = "arxiv"
+    storage_base_path: str = "storage/triplestore/application-level/arxiv"
+    pdf_storage_path: str = "datastore/application-level/arxiv"
 
 class ArXivPaperPipelineParameters(BaseModel):
     """Parameters for ArXivPaperPipeline."""
     paper_id: str = Field(..., description="ArXiv paper ID")
+    download_pdf: bool = Field(True, description="Whether to download the paper's PDF")
 
 class ArXivPaperPipeline(Pipeline):
     """Pipeline for adding ArXiv papers to the ontology."""
@@ -29,20 +35,18 @@ def __init__(self, configuration: ArXivPaperPipelineConfiguration):
         super().__init__(configuration)
         self.__configuration = configuration
         self.__arxiv_integration = ArXivIntegration(configuration.arxiv_integration_config)
+        
+        # Ensure storage directories exist
+        os.makedirs(self.__configuration.storage_base_path, exist_ok=True)
+        os.makedirs(self.__configuration.pdf_storage_path, exist_ok=True)
 
     def run(self, parameters: ArXivPaperPipelineParameters) -> Graph:
         # Init graph
-        try:
-            existing_graph = self.__configuration.ontology_store.get(self.__configuration.ontology_store_name)
-            graph = ABIGraph()
-            for triple in existing_graph:
-                graph.add(triple)
-        except Exception:
-            graph = ABIGraph()
+        graph = ABIGraph()
 
         # Get paper data
         paper_data = self.__arxiv_integration.get_paper(parameters.paper_id)
-
+        
         # Add paper to graph
         paper = graph.add_individual_to_prefix(
             prefix=ABI,
@@ -86,25 +90,60 @@ def run(self, parameters: ArXivPaperPipelineParameters) -> Graph:
             )
             graph.add((paper, ABI.hasCategory, cat))
 
-        self.__configuration.ontology_store.store(self.__configuration.ontology_store_name, graph)
+        # Generate a unique filename based on paper title and UUID
+        # Clean the title to create a valid filename
+        safe_title = re.sub(r'[^\w\s-]', '', paper_data["title"])
+        safe_title = re.sub(r'[\s-]+', '_', safe_title).lower()
+        safe_title = safe_title[:50]  # Limit length
+        unique_id = str(uuid.uuid4())
+        
+        # Store the TTL file
+        ttl_filename = f"{safe_title}_{unique_id}.ttl"
+        ttl_filepath = os.path.join(self.__configuration.storage_base_path, ttl_filename)
+        
+        with open(ttl_filepath, 'wb') as f:
+            f.write(graph.serialize(format="turtle").encode('utf-8'))
+        
+        print(f"Paper metadata stored at: {ttl_filepath}")
+        
+        # Download PDF if requested
+        if parameters.download_pdf and paper_data["pdf_url"]:
+            try:
+                pdf_filename = f"{safe_title}_{unique_id}.pdf"
+                pdf_filepath = os.path.join(self.__configuration.pdf_storage_path, pdf_filename)
+                
+                # Add PDF file path to graph
+                graph.add((paper, ABI.localFilePath, pdf_filepath))
+                
+                response = requests.get(paper_data["pdf_url"], stream=True)
+                response.raise_for_status()
+                
+                with open(pdf_filepath, 'wb') as pdf_file:
+                    for chunk in response.iter_content(chunk_size=8192):
+                        pdf_file.write(chunk)
+                
+                print(f"PDF downloaded to: {pdf_filepath}")
+                
+                # Update the TTL file to include the PDF file path
+                with open(ttl_filepath, 'wb') as f:
+                    f.write(graph.serialize(format="turtle").encode('utf-8'))
+            except Exception as e:
+                print(f"Error downloading PDF: {e}")
+        
         return graph
 
     def as_tools(self) -> list[StructuredTool]:
         return [
             StructuredTool(
                 name="arxiv_paper_pipeline",
-                description="Adds an ArXiv paper to the ontology",
+                description="Adds an ArXiv paper to the ontology and optionally downloads the PDF",
                 func=lambda **kwargs: self.run(ArXivPaperPipelineParameters(**kwargs)),
                 args_schema=ArXivPaperPipelineParameters
             )
         ]
 
     def as_api(self, router: APIRouter) -> None:
-        """Adds API endpoints for this pipeline to the given router.
-        
-        Args:
-            router (APIRouter): FastAPI router to add endpoints to
-        """
+        """Adds API endpoints for this pipeline to the given router."""
         @router.post("/arxiv/paper")
         def run(parameters: ArXivPaperPipelineParameters):
             return self.run(parameters).serialize(format="turtle")