Skip to content

Commit 485d16a

Browse files
committed
fs/gfa: order edges by node id, don't mix different semantics
this makes a lot of things much easier; we don't care about degree order in this case at all.
1 parent 9081169 commit 485d16a

File tree

3 files changed

+108
-150
lines changed

3 files changed

+108
-150
lines changed

fs/gfa.c

+108-129
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,9 @@ typedef struct Aux Aux;
2121
struct Aux{
2222
namemap *names;
2323
edgeset *edges;
24-
vlong *nodeoff;
2524
int *length;
2625
ushort *degree;
27-
ioff *index;
26+
vlong *nodeoff;
2827
vlong *edgeoff;
2928
ioff nnodes;
3029
ioff nedges;
@@ -98,126 +97,106 @@ readedgetags(Aux *a, File *f)
9897
return 0;
9998
}
10099

101-
/* UGH */
102100
static void
103-
initedges(ioff nedges, edgeset *eset, ioff *index, ushort *degree)
101+
printgraph(void)
102+
{
103+
ioff i, j, x, *e, *ee;
104+
Node *u, *ue;
105+
106+
warn("current graph: %d nodes (%d rnodes) %d edges (%d redges)\n",
107+
dylen(nodes), dylen(rnodes), dylen(edges), dylen(redges));
108+
for(i=0, u=nodes, ue=u+dylen(u); u<ue; u++, i++){
109+
warn("[%d] off=%d ne=%d\n", i, u->eoff, u->nedges);
110+
for(e=edges+u->eoff, ee=e+u->nedges; e<ee; e++){
111+
x = *e;
112+
j = x >> 2;
113+
warn("\t<%zd> %08x %d%c%d%c\n", e-edges, x,
114+
i, x&1?'-':'+', j, x&2?'-':'+');
115+
}
116+
}
117+
}
118+
119+
static void
120+
initedges(ioff nedges, edgeset *eset)
104121
{
105-
ushort d, *deg;
106-
ioff off, i, j;
107-
vlong nn, nm;
108-
u64int e, u, v;
122+
ioff off, ne, x, u, v;
123+
Node *n;
124+
u64int e;
109125
edgeset *h;
110126
khint_t k;
111127

112-
nn = dylen(edges); /* mooltigraph */
113-
nm = nn + 2 * nedges;
114-
dyresize(edges, nm);
115-
dyresize(redges, nedges);
128+
ne = kh_size(eset);
129+
x = dylen(redges); /* mooltigraph */
130+
dyresize(redges, x + ne);
131+
x = dylen(edges);
132+
dyresize(edges, x + nedges);
116133
h = eset;
117-
deg = degree;
118134
kh_foreach(h, k){
119135
e = kh_key(h, k);
120-
u = e >> 32ULL & 0xffffffff;
121-
v = e & 0xffffffff;
122-
i = u >> 1;
123-
j = v >> 1;
124-
if(debug & Debuggraph)
125-
warn("map %d%c %d%c → ", i, u&1?'-':'+', j, v&1?'-':'+');
126-
assert(i < dylen(nodes));
127-
assert(j < dylen(nodes));
128-
/* FIXME: now that we don't assume in edges are at the end, get rid
129-
* of this shit */
130-
if(i == j){ /* self edge */
131-
deg[i]--;
132-
off = index[i]++;
133-
if(debug & Debuggraph)
134-
warn("self off=%d base=%d\n", off, nodes[j].eoff);
135-
edges[off] = v << 1 | u & 1;
136-
}else if(u & 1){ /* in edge */
137-
d = --deg[i];
138-
off = index[i] + d;
139-
if(debug & Debuggraph)
140-
warn("in off=%d base=%d, ", off, nodes[i].eoff);
141-
edges[off] = v << 1 | 1;
142-
deg[j]--;
143-
off = index[j]++;
144-
if(debug & Debuggraph)
145-
warn("out off=%d base=%d\n", off, nodes[j].eoff);
146-
edges[off] = (u ^ 1) << 1 | v & 1 ^ 1;
147-
}else{ /* out edge */
148-
deg[i]--;
149-
off = index[i]++;
150-
if(debug & Debuggraph)
151-
warn("out off=%d base=%d, ", off, nodes[i].eoff);
152-
edges[off] = v << 1 | 0;
153-
d = --deg[j];
154-
off = index[j] + d;
155-
if(debug & Debuggraph)
156-
warn("in off=%d base=%d\n", off, nodes[j].eoff);
157-
edges[off] = (u ^ 1) << 1 | v & 1 ^ 1;
158-
}
136+
u = e >> 32 & 0x7fffffff;
137+
v = e >> 2 & 0x3fffffff;
138+
assert(u < dylen(nodes) && v < dylen(nodes));
139+
n = nodes + u;
140+
off = n->eoff + n->nedges++;
141+
assert(off >= 0 && off < dylen(edges));
142+
x = v << 2 | e & 3;
143+
DPRINT(Debugfs, "map %d%c%d%c → edge[%d]=%08x",
144+
u, e&1?'-':'+', v, e&2?'-':'+', off, x);
145+
edges[off] = x;
146+
if(u == v)
147+
continue;
148+
n = nodes + v;
149+
off = n->eoff + n->nedges++;
150+
assert(off >= 0 && off < dylen(edges));
151+
x = u << 2 | (e >> 1 & 1 | e << 1 & 2) ^ 3;
152+
DPRINT(Debugfs, "map %d%c%d%c → edge[%d]=%08x",
153+
v, e&2?'+':'-', u, e&1?'+':'-', off, x);
154+
edges[off] = x;
155+
}
156+
}
157+
158+
static void
159+
initnodes(ioff nnodes, int *len, ushort *deg)
160+
{
161+
int i;
162+
ioff off;
163+
vlong nn;
164+
Node *n, *ne;
165+
V v;
166+
167+
nn = dylen(nodes); /* for mooltigraph */
168+
if(nn > 0){
169+
n = nodes + nn - 1;
170+
off = n->eoff + n->nedges;
171+
}else
172+
off = 0;
173+
dyresize(nodes, (nn + nnodes));
174+
dyresize(rnodes, (nn + nnodes));
175+
for(i=nn, n=nodes+nn, ne=n+nnodes; n<ne; n++, i++){
176+
n->id = i;
177+
n->eoff = off;
178+
off += *deg++;
179+
v.i = *len++;
180+
setspectag(TLN, i, v);
159181
}
160182
}
161183

184+
/* FIXME: mooltigraph: can preprocess the gfas in parallel, then
185+
* once all threads are done, we can join everything here; we
186+
* can use common stuff if we add locks; else we just have to load
187+
* gfas sequentially */
162188
static void
163189
mkgraph(Aux *a)
164190
{
165-
initnodes(a->nnodes, a->length, a->index, a->degree);
191+
initnodes(a->nnodes, a->length, a->degree);
166192
dyfree(a->length);
167-
initedges(a->nedges, a->edges, a->index, a->degree);
168-
edges_destroy(a->edges);
169193
dyfree(a->degree);
170-
free(a->index);
194+
initedges(a->nedges, a->edges);
195+
edges_destroy(a->edges);
171196
if(newlayout(-1) < 0)
172197
warn("initgraph: %s\n", error());
173198
}
174199

175-
static void
176-
mkbuckets(Aux *a)
177-
{
178-
ioff *off, *offset, *idx, *t, *te, *totals;
179-
uint k, n, m;
180-
ushort d, *dp, *de;
181-
182-
/* cumulative total number of edges per degree */
183-
totals = nil;
184-
for(dp=a->degree, de=dp+dylen(dp); dp<de; dp++){
185-
d = *dp;
186-
dyresize(totals, d+1);
187-
totals[d]++;
188-
}
189-
/* offset of node's current adjacency list */
190-
offset = emalloc(dylen(totals) * sizeof *offset);
191-
for(n=m=0, d=0, off=offset, t=totals, te=t+dylen(t); t<te; d++, off++){
192-
k = *t;
193-
n += k;
194-
*t++ = n;
195-
if(d > 0){
196-
*off = m;
197-
m += k * d;
198-
}
199-
}
200-
/* allot nodes by increasing degree number */
201-
a->index = emalloc(dylen(a->degree) * sizeof *a->index);
202-
for(idx=a->index, dp=a->degree, de=dp+dylen(dp); dp<de; dp++){
203-
d = *dp;
204-
*idx++ = offset[d];
205-
offset[d] += d;
206-
}
207-
free(offset);
208-
if(debug & Debugfs){
209-
for(n=0, t=totals, te=t+dylen(t); t<te; t++){
210-
m = *t;
211-
if(m > n){
212-
warn("totals[%zd] %d elements (%d total)\n", t-totals, m - n, m);
213-
n = m;
214-
}
215-
}
216-
}
217-
dyfree(totals);
218-
USED(totals);
219-
}
220-
221200
static ioff
222201
pushnode(Aux *a, char *s, int *abs)
223202
{
@@ -235,9 +214,9 @@ pushnode(Aux *a, char *s, int *abs)
235214
kh_val(h, k) = id;
236215
v.s = s;
237216
setspectag(Tnode, id, v);
217+
DPRINT(Debugfs, "node \"%s\" → %d", s, id);
238218
}else
239219
id = kh_val(h, k);
240-
DPRINT(Debugfs, "node[%d] %s", id, s);
241220
return id;
242221
}
243222

@@ -285,7 +264,7 @@ static inline int
285264
readedge(Aux *a, File *f)
286265
{
287266
int i, j, abs;
288-
ioff n, u, v;
267+
ioff x, u, v;
289268
vlong off;
290269
u64int id;
291270
char *s, *fld[4], **fp;
@@ -307,43 +286,43 @@ readedge(Aux *a, File *f)
307286
v = pushnode(a, fld[2], &abs);
308287
i = *fld[1] == '+' ? 0 : 1;
309288
j = *fld[3] == '+' ? 0 : 1;
310-
DPRINT(Debugfs, "edge[%d] %d%c,%d%c", a->nedges, u, *fld[1], v, *fld[3]);
311289
if(nextfield(f) != nil)
312290
off = f->foff;
313291
else
314292
off = -1;
315293
/* precedence rule: always flip edge st. u < v or u is forward if self edge */
316-
if(u > v || u == v && i == 1){
317-
n = u;
318-
u = v;
319-
v = n;
320-
n = i ^ 1;
321-
i = j ^ 1;
322-
j = n;
323-
}
294+
if(u > v || u == v && i == 1)
295+
id = (u64int)v << 32 | u << 2 | (i << 1 | j) ^ 3;
296+
else
297+
id = (u64int)u << 32 | v << 2 | j << 1 | i;
324298
/* guard against duplicate records */
325-
id = ((u64int)u << 1 | i) << 32ULL | v << 1 | j;
326299
edges_put(a->edges, id, &abs);
327300
if(!abs){
328301
werrstr("duplicate edge, ignored");
329302
return -1;
330303
}
304+
DPRINT(Debugfs, "edge \"%s%c,%s%c\" → %d%c%d%c",
305+
fld[0], *fld[1], fld[2], *fld[3],
306+
u, i?'-':'+', v, j?'-':'+');
331307
dypush(a->edgeoff, off);
332-
dyresize(a->degree, v+1);
308+
x = u > v ? u : v;
309+
dyresize(a->degree, x+1);
333310
a->degree[u]++;
334-
if(v != u)
311+
if(v != u){
335312
a->degree[v]++;
336-
a->nedges++;
313+
a->nedges += 2;
314+
}else
315+
a->nedges++;
337316
return 0;
338317
}
339318

340319
static int
341320
readgfa(Aux *a, File *f)
342321
{
343-
int nerr, nwarn, r;
322+
int ns, nl, nerr, nwarn, r;
344323
char *s;
345324

346-
nerr = nwarn = 0;
325+
ns = nl = nerr = nwarn = 0;
347326
while(readline(f) != nil){
348327
if((s = nextfield(f)) == nil)
349328
continue;
@@ -359,14 +338,14 @@ readgfa(Aux *a, File *f)
359338
case 'S':
360339
if((r = readnode(a, f)) < 0)
361340
break;
362-
if(a->nnodes % 1000000 == 0)
363-
warn("readgfa: %.3g nodes...\n", (double)a->nnodes);
341+
if(++ns % 1000000 == 0)
342+
warn("readgfa: %.3g nodes...\n", (double)ns);
364343
break;
365344
case 'L':
366345
if((r = readedge(a, f)) < 0)
367346
break;
368-
if(a->nedges % 1000000 == 0)
369-
warn("readgfa: %.3g edges...\n", (double)a->nedges);
347+
if(++nl % 1000000 == 0)
348+
warn("readgfa: %.3g edges...\n", (double)nl);
370349
break;
371350
default:
372351
DPRINT(Debuginfo, "line %d: unhandled record type %c", f->nr, s[0]);
@@ -387,6 +366,10 @@ readgfa(Aux *a, File *f)
387366
werrstr("empty graph");
388367
return -1;
389368
}
369+
if(a->nedges <= 0){
370+
werrstr("no edges");
371+
return -1;
372+
}
390373
if(dylen(a->nodeoff) < a->nnodes){ /* the reverse is fine */
391374
werrstr("missing S lines: links reference non-existent segments");
392375
return -1;
@@ -433,15 +416,11 @@ loadgfa1(void *arg)
433416
pushcmd("loadbatch()");
434417
flushcmd();
435418
TIME("readgfa");
436-
if(a.nnodes == 0)
437-
sysfatal("loadgfa: no nodes\n");
438-
else if(a.nedges == 0)
439-
sysfatal("loadgfa: no edges\n");
440419
logmsg("loadgfa: initializing graph...\n");
441-
mkbuckets(&a); /* compute slots for each node's edges */
442-
TIME("mkbuckets");
443420
mkgraph(&a); /* batch initialize nodes and edges from indexes */
444421
TIME("mkgraph");
422+
if(debug & Debuggraph)
423+
printgraph();
445424
if(gottagofast){ /* FIXME: very much unsafe, won't work with csv, etc. */
446425
pushcmd("cmd(\"OPL753\")"); /* load what we have and start layouting */
447426
flushcmd(); /* awk must be in on it or it won't work */

graph/graph.c

-20
Original file line numberDiff line numberDiff line change
@@ -122,23 +122,3 @@ setattr(int type, ioff id, V val)
122122
break;
123123
}
124124
}
125-
126-
void
127-
initnodes(ioff nnodes, int *len, ioff *off, ushort *deg)
128-
{
129-
int i;
130-
vlong nn;
131-
Node *n, *ne;
132-
V v;
133-
134-
nn = dylen(nodes); /* for mooltigraph */
135-
dyresize(nodes, (nn + nnodes));
136-
dyresize(rnodes, (nn + nnodes));
137-
for(i=nn, n=nodes+nn, ne=n+nnodes; n<ne; n++, i++){
138-
n->id = i;
139-
n->nedges = *deg++;
140-
n->eoff = *off++;
141-
v.i = *len++;
142-
setspectag(TLN, i, v);
143-
}
144-
}

graph/graph.h

-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,3 @@ int expand(ioff);
66
ioff getnodeidx(ioff);
77
ioff getrealid(ioff);
88
u64int getrealedge(ioff);
9-
void initnodes(ioff, int*, ioff*, ushort*);

0 commit comments

Comments
 (0)