Skip to content

Ena interface #359

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 11 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
240 changes: 240 additions & 0 deletions app/api/ena/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
import {
ENAAssembly,
PrimaryDataApiResult,
} from "app/apis/catalog/brc-analytics-catalog/common/entities";
import { NextResponse } from "next/server";

const items_limit = 10000;
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

max allowed items return, should maybe be configurable


async function fetchDataFromENA(
url: string,
redirect_arg: RequestRedirect = "manual"
): Promise<PrimaryDataApiResult> {
const response: Response = await fetch(url, { redirect: redirect_arg });
if (response.status !== 200) {
if (response.status === 301) {
console.debug(
`ENA Rest API has been updated, internal API call needs update: ${url}`
);
return {
count: 0,
data: [],
error: `ENA Rest API has been updated!!!`,
status: 301,
};
}
const errorMessageText = await response.text();
let errorMessage;
try {
errorMessage = JSON.parse(errorMessageText).message;
} catch (e) {
errorMessage = `errorMessageText: ${e}`;
}
return {
count: 0,
data: [],
error: `from ENA, ${errorMessage}`,
status: response.status,
};
}
const result = await response.json();
return Promise.resolve({
count: "count" in result ? parseInt(result.count) : result.length,
data: result,
error: "",
status: response.status,
});
}

async function processAccessionIds(filter: string): Promise<string> {
// Identify any GCG/GCA accession IDS in the filter string, if they exist
// we need to translate them into sample IDs
// Will convert the following string
// 'accession=GCA_009859065.2 AND accession=GCF_009859065.2'
// into
// (sample_accession="SAMN09946140" OR sample_accession="SAMN0994555") OR sample_accession="NO_SAMPLE"
// GCA_009859065.2 generated 2 sample_accession and GCF_009859065.2 generated 0 sample_accession

function getSampleAcceionIds(filter: string): {
accessions: string[];
accessionsDict: { [key: string]: string[] };
accessionsUrl: string;
} {
const accessions: string[] = [];
const accessionsDict: { [key: string]: string[] } = {};
const accessionRegex = /(accession\s*=\s*)("?)(GC[FA]_\d+\.\d+)("?)/g;
let match;
let counterCounter = 0;
while ((match = accessionRegex.exec(filter)) !== null) {
counterCounter++;
accessions.push(match[3]);
if (accessionsDict[match[3]]) {
accessionsDict[match[3]].push(
`${match[1]}${match[2]}${match[3]}${match[4]}`
);
} else {
accessionsDict[match[3]] = [
`${match[1]}${match[2]}${match[3]}${match[4]}`,
];
}
}

if (filter.match(/\b(accession\s*=)\s*/g)?.length !== counterCounter) {
throw new Error(
`GCF/GCA syntax error, on or multiple accession id have incorrect format, should be GCF_XXXXXXX.X or GCA_XXXXXXX.X`
);
}
if (accessions.length === 0) {
accessions.push("NO_ACCESSION");
}
const queryParams = new URLSearchParams({
query: accessions
.map((item) => `assembly_set_accession="${item}"`)
.join(" OR "),
});
// Construct the URL for the API request to fetch sample IDs based on accession IDs
return {
accessions: accessions,
accessionsDict: accessionsDict,
accessionsUrl: `https://www.ebi.ac.uk/ena/portal/api/search?result=assembly&fields=assembly_set_accession,sample_accession&${queryParams.toString()}&format=json`,
};
}

function updateFilterWithSampleIds(
filter: string,
accessions: string[],
accessionsDict: { [key: string]: string[] },
response: { data: []; error: string; status: number }
): string {
// Extract sample IDs from the API response and add them to the sample_ids array
const sample_ids: { [key: string]: string[] } = {};
for (const assembly of response.data as ENAAssembly[]) {
if (!sample_ids[assembly.assembly_set_accession]) {
sample_ids[assembly.assembly_set_accession] = [
assembly.sample_accession,
];
} else {
sample_ids[assembly.assembly_set_accession].push(
assembly.sample_accession
);
}
}
for (const accession of accessions) {
if (sample_ids[accession]) {
for (const accessionExpression of accessionsDict[accession]) {
filter = filter.replace(
accessionExpression,
"(" +
sample_ids[accession]
.map((sample_id) => `sample_accession="${sample_id}"`)
.join(" OR ") +
")"
);
}
} else {
for (const accessionExpression of accessionsDict[accession]) {
filter = filter.replace(
accessionExpression,
`sample_accession="NO_SAMPLE"`
);
}
}
}
return filter;
}

if (filter.includes("accession=")) {
// Find accessions id and create an url and mappers to
// convert the accession id to sample id
const { accessions, accessionsDict, accessionsUrl } =
getSampleAcceionIds(filter);

// if (accessions.length === 1 && accessions[0] === "NO_ACCESSION") {
// throw new Error(
// `GCF/GCA syntax error, on or multiple accession id have incorrect format, should be GCF_XXXXXXX.X or GCA_XXXXXXX.X`
// );
// }

// Fetch the data from the API
const accession_response = await fetchDataFromENA(accessionsUrl);
if (accession_response.status !== 200) {
throw new Error(
`ENA API error: status: ${accession_response.status}, message: ${JSON.stringify(accession_response.data)}`
);
}

// Update the filter string with the sample IDs
return updateFilterWithSampleIds(
filter,
accessions,
accessionsDict,
accession_response
);
}
return filter;
}

export async function POST(request: Request): Promise<NextResponse> {
let { filter } = await request.json();
const fields = [
"accession",
"sra_md5",
"base_count",
"study_accession",
"sample_accession",
"instrument_platform",
"instrument_model",
"library_layout",
];
try {
filter = await processAccessionIds(filter);
} catch (error) {
return NextResponse.json(
{ count: 0, data: [], error: `${error}` },
{ status: 500 }
);
}

const runQueryParams = new URLSearchParams({
query: filter,
});

const filter_url = `${runQueryParams.toString().replace(/%3D%3D/g, "%3D")}`;
const count_url = `https://www.ebi.ac.uk/ena/portal/api/count?result=read_run&${filter_url}&format=json`;

const count_response = await fetchDataFromENA(count_url);
if (count_response.status !== 200) {
return NextResponse.json(
{ count: 0, data: [], error: count_response.error },
{
status: count_response.status,
}
);
}

const count: number = count_response.count;
if (count === 0) {
return NextResponse.json({ count: 0, data: [] }, { status: 200 });
}
if (count > items_limit) {
return NextResponse.json({
count: 0,
data: [],
error: `To many entries return: ${count}, please add filters to reduce the number of entries.`,
});
}

const url_search = `https://www.ebi.ac.uk/ena/portal/api/search?result=read_run&${filter_url}&fields=${fields.join(",")}&limit=${items_limit}&format=json`;
//console.debug(`Search URL: ${url_search}`);
const search_response = await fetchDataFromENA(url_search);
if (search_response.status !== 200) {
return NextResponse.json(search_response.data, {
status: search_response.status,
});
}

return NextResponse.json(
{ count: count, data: search_response.data },
{ status: search_response.status }
);
}
32 changes: 32 additions & 0 deletions app/apis/catalog/brc-analytics-catalog/common/entities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,38 @@ export interface EntitiesResponsePagination {
total: number;
}

export interface ENAAssembly {
assembly_set_accession: string;
sample_accession: string;
}

export interface ENAReadRuns {
accession: string;
base_count: string;
sample_accession: string;
studies_accession: string;
}

export interface PrimaryDataApiResult {
count: number;
data: [];
error: string;
status: number;
}

export interface RunReadsFields {
description: string;
name: string;
type: string;
}

export interface ReadRunStatistics {
bases: number;
biosamples: number;
read_runs: number;
studies: number;
}

export interface WorkflowCategory {
category: string;
description: string;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import { GridPaperSection } from "@databiosphere/findable-ui/lib/components/common/Section/section.styles";

import styled from "@emotion/styled";

export const StyledSection = styled(GridPaperSection)`
flex-direction: column;
`;

export const SectionContent = styled.div`
flex: 1;
display: flex;
flex-direction: column;
align-items: center;
img {
width: 600px;
height: 600px;
}
`;

export const BlinkingDots = styled.span`
font-weight: bold;
@keyframes blink {
0% {
opacity: 1;
}
33% {
opacity: 0;
}
66% {
opacity: 0;
}
100% {
opacity: 1;
}
}
.dot:nth-child(1) {
animation: blink 1.5s infinite;
}
.dot:nth-child(2) {
animation: blink 1.5s infinite 0.5s;
}
.dot:nth-child(3) {
animation: blink 1.5s infinite 1s;
}
`;

export const ReactPageNation = styled.span`
/* General container styling */
.container {
max-width: 800px;
margin: 50px auto;
padding: 20px;
background-color: #f9f9f9;
border-radius: 8px;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
font-family: "Arial", sans-serif;
}

/* Blog posts list styling */
.list-group-item {
background-color: white;
border: 1px solid #ddd;
border-radius: 8px;
padding: 20px;
margin-bottom: 15px;
transition: box-shadow 0.3s ease;
}

.list-group-item p {
color: #666;
line-height: 1.6;
}

/* Pagination styling */
.pagination {
display: flex;
justify-content: center;
padding: 0px;

list-style: none;
}

.pagination li {
margin: 0 1px;
font-size: 0.7rem;
}

.pagination li a {
display: inline-block;
padding: 5px 10px 5px 10px;
background-color: white;
color: #212b36;
border-radius: 5px;
text-decoration: none;
transition:
background-color 0.3s ease,
color 0.3s ease;
}

.pagination li a:hover {
background-color: #212b36;
color: white;
}

.pagination li.active a {
border-color: #212b36;
border-style: dashed;
border-width: 1px;
}
`;
Loading
Loading