-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add control flow for pre-sorted bed files #69
Labels
Comments
From most recent uniwig commits, it appears that, even if the flag std::vector<chromosome> chromosomes;
chromosomes = read_bed_vec(bedPath); std::vector<chromosome> read_bed_vec(const char *bedPath)
{
//vector of vector of regions to store regions in one vector per chromosome
//std::cout << "\nInput file: " << bedPath << "\n";
std::cout << "\nReading chromosomes" << std::endl;
gzFile fp;
kstream_t *ks;
kstring_t str = {0, 0, 0};
// int32_t k = 0;
fp = bedPath && strcmp(bedPath, "-") ? gzopen(bedPath, "r") : gzdopen(0, "r");
if (fp == 0)
{
fprintf(stderr, "ERROR: failed to open the input file\n");
exit(1);
}
ks = ks_init(fp);
chromosome chr;
char chrom[100] = "";
std::vector<chromosome> chromosomes;
while (ks_getuntil(ks, KS_SEP_LINE, &str, 0) >= 0)
{
char *ctg, *rest;
int32_t st, en;
ctg = parse_bed(str.s, &st, &en, &rest);
//std:: cout << "\n" << ctg << "\t" << st << "\t" << en;
if (strcmp(chrom, "") == 0)
{
strcpy(chrom, ctg);
chr.chrom = std::string(chrom);
chr.starts.push_back(st);
chr.ends.push_back(en);
continue;
}
if (ctg)
{
if(strcmp(chrom, ctg) != 0)
{
// std::cout << "\nI'm here, chrom = " << chrom << ", ctg = " << ctg << ", compare result: "<< strcmp(chrom, ctg) <<"\n";
kx::radix_sort(chr.starts.begin(),chr.starts.end());
kx::radix_sort(chr.ends.begin(),chr.ends.end());
chromosomes.push_back(chr);
strcpy(chrom, ctg);
chr.chrom = std::string(chrom);
std::vector<int> start;
std::vector<int> end;
chr.ends = end;
chr.starts = start;
}
chr.starts.push_back(st);
chr.ends.push_back(en);
}
}
// sort the starts and ends respectively
kx::radix_sort(chr.starts.begin(),chr.starts.end());
kx::radix_sort(chr.ends.begin(),chr.ends.end());
chromosomes.push_back(chr);
std::cout << "Reading finished\n" << std::endl;
free(str.s);
ks_destroy(ks);
gzclose(fp);
return chromosomes;
} If sorted is false, the program creates a mapping: std::map<std::string, chromosome> chromosomes;
chromosomes = read_bed_map(bedPath); where std::map<std::string, chromosome> read_bed_map(const char *bedPath)
{
//vector of vector of regions to store regions in one vector per chromosome
std::cout << "\nReading chromosomes" << std::endl;
//std::cout << "\nInput file: " << bedPath << "\n";
gzFile fp;
kstream_t *ks;
kstring_t str = {0, 0, 0};
fp = bedPath && strcmp(bedPath, "-") ? gzopen(bedPath, "r") : gzdopen(0, "r");
if (fp == 0)
{
fprintf(stderr, "ERROR: failed to open the input file\n");
exit(1);
}
ks = ks_init(fp);
// chromosome chr;
char chrom[100] = "";
std::map<std::string, chromosome> chromosomes;
// std::vector<chromosome> chromosomes;
while (ks_getuntil(ks, KS_SEP_LINE, &str, 0) >= 0)
{
char *ctg, *rest;
int32_t st, en;
ctg = parse_bed(str.s, &st, &en, &rest);
// std:: cout << "\n" << ctg << "\t" << st << "\t" << en;
if (ctg) {
if (strcmp(chrom, ctg) != 0) {
strcpy(chrom, ctg);
if (chromosomes.find(chrom) == chromosomes.end()) {
chromosome chr;
chr.chrom = std::string(chrom);
// fprintf(stderr, "Creating a new chromosome: %s\n", chr.chrom.c_str());
chromosomes.insert(std::pair<std::string, chromosome>(chrom, chr));
}
}
chromosomes[chrom].starts.push_back(st);
chromosomes[chrom].ends.push_back(en);
}
}
// sort the starts and ends respectively
for (std::map<std::string, chromosome>::iterator it=chromosomes.begin(); it!=chromosomes.end(); it++) {
kx::radix_sort(it->second.starts.begin(),it->second.starts.end());
kx::radix_sort(it->second.ends.begin(),it->second.ends.end());
}
std::cout << "Reading finished\n" << std::endl;
free(str.s);
ks_destroy(ks);
gzclose(fp);
return chromosomes;
}
This function also sorts the starts and ends in memory. TODO:
|
donaldcampbelljr
added a commit
that referenced
this issue
Jan 21, 2025
This may be superseded by #72 |
Closing for now as building with release flag helped immensely. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
No description provided.
The text was updated successfully, but these errors were encountered: