@@ -21,10 +21,9 @@ typedef struct Aux Aux;
21
21
struct Aux {
22
22
namemap * names ;
23
23
edgeset * edges ;
24
- vlong * nodeoff ;
25
24
int * length ;
26
25
ushort * degree ;
27
- ioff * index ;
26
+ vlong * nodeoff ;
28
27
vlong * edgeoff ;
29
28
ioff nnodes ;
30
29
ioff nedges ;
@@ -98,126 +97,106 @@ readedgetags(Aux *a, File *f)
98
97
return 0 ;
99
98
}
100
99
101
- /* UGH */
102
100
static void
103
- initedges (ioff nedges , edgeset * eset , ioff * index , ushort * degree )
101
+ printgraph (void )
102
+ {
103
+ ioff i , j , x , * e , * ee ;
104
+ Node * u , * ue ;
105
+
106
+ warn ("current graph: %d nodes (%d rnodes) %d edges (%d redges)\n" ,
107
+ dylen (nodes ), dylen (rnodes ), dylen (edges ), dylen (redges ));
108
+ for (i = 0 , u = nodes , ue = u + dylen (u ); u < ue ; u ++ , i ++ ){
109
+ warn ("[%d] off=%d ne=%d\n" , i , u -> eoff , u -> nedges );
110
+ for (e = edges + u -> eoff , ee = e + u -> nedges ; e < ee ; e ++ ){
111
+ x = * e ;
112
+ j = x >> 2 ;
113
+ warn ("\t<%zd> %08x %d%c%d%c\n" , e - edges , x ,
114
+ i , x & 1 ?'-' :'+' , j , x & 2 ?'-' :'+' );
115
+ }
116
+ }
117
+ }
118
+
119
+ static void
120
+ initedges (ioff nedges , edgeset * eset )
104
121
{
105
- ushort d , * deg ;
106
- ioff off , i , j ;
107
- vlong nn , nm ;
108
- u64int e , u , v ;
122
+ ioff off , ne , x , u , v ;
123
+ Node * n ;
124
+ u64int e ;
109
125
edgeset * h ;
110
126
khint_t k ;
111
127
112
- nn = dylen (edges ); /* mooltigraph */
113
- nm = nn + 2 * nedges ;
114
- dyresize (edges , nm );
115
- dyresize (redges , nedges );
128
+ ne = kh_size (eset );
129
+ x = dylen (redges ); /* mooltigraph */
130
+ dyresize (redges , x + ne );
131
+ x = dylen (edges );
132
+ dyresize (edges , x + nedges );
116
133
h = eset ;
117
- deg = degree ;
118
134
kh_foreach (h , k ){
119
135
e = kh_key (h , k );
120
- u = e >> 32ULL & 0xffffffff ;
121
- v = e & 0xffffffff ;
122
- i = u >> 1 ;
123
- j = v >> 1 ;
124
- if (debug & Debuggraph )
125
- warn ("map %d%c %d%c → " , i , u & 1 ?'-' :'+' , j , v & 1 ?'-' :'+' );
126
- assert (i < dylen (nodes ));
127
- assert (j < dylen (nodes ));
128
- /* FIXME: now that we don't assume in edges are at the end, get rid
129
- * of this shit */
130
- if (i == j ){ /* self edge */
131
- deg [i ]-- ;
132
- off = index [i ]++ ;
133
- if (debug & Debuggraph )
134
- warn ("self off=%d base=%d\n" , off , nodes [j ].eoff );
135
- edges [off ] = v << 1 | u & 1 ;
136
- }else if (u & 1 ){ /* in edge */
137
- d = -- deg [i ];
138
- off = index [i ] + d ;
139
- if (debug & Debuggraph )
140
- warn ("in off=%d base=%d, " , off , nodes [i ].eoff );
141
- edges [off ] = v << 1 | 1 ;
142
- deg [j ]-- ;
143
- off = index [j ]++ ;
144
- if (debug & Debuggraph )
145
- warn ("out off=%d base=%d\n" , off , nodes [j ].eoff );
146
- edges [off ] = (u ^ 1 ) << 1 | v & 1 ^ 1 ;
147
- }else { /* out edge */
148
- deg [i ]-- ;
149
- off = index [i ]++ ;
150
- if (debug & Debuggraph )
151
- warn ("out off=%d base=%d, " , off , nodes [i ].eoff );
152
- edges [off ] = v << 1 | 0 ;
153
- d = -- deg [j ];
154
- off = index [j ] + d ;
155
- if (debug & Debuggraph )
156
- warn ("in off=%d base=%d\n" , off , nodes [j ].eoff );
157
- edges [off ] = (u ^ 1 ) << 1 | v & 1 ^ 1 ;
158
- }
136
+ u = e >> 32 & 0x7fffffff ;
137
+ v = e >> 2 & 0x3fffffff ;
138
+ assert (u < dylen (nodes ) && v < dylen (nodes ));
139
+ n = nodes + u ;
140
+ off = n -> eoff + n -> nedges ++ ;
141
+ assert (off >= 0 && off < dylen (edges ));
142
+ x = v << 2 | e & 3 ;
143
+ DPRINT (Debugfs , "map %d%c%d%c → edge[%d]=%08x" ,
144
+ u , e & 1 ?'-' :'+' , v , e & 2 ?'-' :'+' , off , x );
145
+ edges [off ] = x ;
146
+ if (u == v )
147
+ continue ;
148
+ n = nodes + v ;
149
+ off = n -> eoff + n -> nedges ++ ;
150
+ assert (off >= 0 && off < dylen (edges ));
151
+ x = u << 2 | (e >> 1 & 1 | e << 1 & 2 ) ^ 3 ;
152
+ DPRINT (Debugfs , "map %d%c%d%c → edge[%d]=%08x" ,
153
+ v , e & 2 ?'+' :'-' , u , e & 1 ?'+' :'-' , off , x );
154
+ edges [off ] = x ;
155
+ }
156
+ }
157
+
158
+ static void
159
+ initnodes (ioff nnodes , int * len , ushort * deg )
160
+ {
161
+ int i ;
162
+ ioff off ;
163
+ vlong nn ;
164
+ Node * n , * ne ;
165
+ V v ;
166
+
167
+ nn = dylen (nodes ); /* for mooltigraph */
168
+ if (nn > 0 ){
169
+ n = nodes + nn - 1 ;
170
+ off = n -> eoff + n -> nedges ;
171
+ }else
172
+ off = 0 ;
173
+ dyresize (nodes , (nn + nnodes ));
174
+ dyresize (rnodes , (nn + nnodes ));
175
+ for (i = nn , n = nodes + nn , ne = n + nnodes ; n < ne ; n ++ , i ++ ){
176
+ n -> id = i ;
177
+ n -> eoff = off ;
178
+ off += * deg ++ ;
179
+ v .i = * len ++ ;
180
+ setspectag (TLN , i , v );
159
181
}
160
182
}
161
183
184
+ /* FIXME: mooltigraph: can preprocess the gfas in parallel, then
185
+ * once all threads are done, we can join everything here; we
186
+ * can use common stuff if we add locks; else we just have to load
187
+ * gfas sequentially */
162
188
static void
163
189
mkgraph (Aux * a )
164
190
{
165
- initnodes (a -> nnodes , a -> length , a -> index , a -> degree );
191
+ initnodes (a -> nnodes , a -> length , a -> degree );
166
192
dyfree (a -> length );
167
- initedges (a -> nedges , a -> edges , a -> index , a -> degree );
168
- edges_destroy (a -> edges );
169
193
dyfree (a -> degree );
170
- free (a -> index );
194
+ initedges (a -> nedges , a -> edges );
195
+ edges_destroy (a -> edges );
171
196
if (newlayout (-1 ) < 0 )
172
197
warn ("initgraph: %s\n" , error ());
173
198
}
174
199
175
- static void
176
- mkbuckets (Aux * a )
177
- {
178
- ioff * off , * offset , * idx , * t , * te , * totals ;
179
- uint k , n , m ;
180
- ushort d , * dp , * de ;
181
-
182
- /* cumulative total number of edges per degree */
183
- totals = nil ;
184
- for (dp = a -> degree , de = dp + dylen (dp ); dp < de ; dp ++ ){
185
- d = * dp ;
186
- dyresize (totals , d + 1 );
187
- totals [d ]++ ;
188
- }
189
- /* offset of node's current adjacency list */
190
- offset = emalloc (dylen (totals ) * sizeof * offset );
191
- for (n = m = 0 , d = 0 , off = offset , t = totals , te = t + dylen (t ); t < te ; d ++ , off ++ ){
192
- k = * t ;
193
- n += k ;
194
- * t ++ = n ;
195
- if (d > 0 ){
196
- * off = m ;
197
- m += k * d ;
198
- }
199
- }
200
- /* allot nodes by increasing degree number */
201
- a -> index = emalloc (dylen (a -> degree ) * sizeof * a -> index );
202
- for (idx = a -> index , dp = a -> degree , de = dp + dylen (dp ); dp < de ; dp ++ ){
203
- d = * dp ;
204
- * idx ++ = offset [d ];
205
- offset [d ] += d ;
206
- }
207
- free (offset );
208
- if (debug & Debugfs ){
209
- for (n = 0 , t = totals , te = t + dylen (t ); t < te ; t ++ ){
210
- m = * t ;
211
- if (m > n ){
212
- warn ("totals[%zd] %d elements (%d total)\n" , t - totals , m - n , m );
213
- n = m ;
214
- }
215
- }
216
- }
217
- dyfree (totals );
218
- USED (totals );
219
- }
220
-
221
200
static ioff
222
201
pushnode (Aux * a , char * s , int * abs )
223
202
{
@@ -235,9 +214,9 @@ pushnode(Aux *a, char *s, int *abs)
235
214
kh_val (h , k ) = id ;
236
215
v .s = s ;
237
216
setspectag (Tnode , id , v );
217
+ DPRINT (Debugfs , "node \"%s\" → %d" , s , id );
238
218
}else
239
219
id = kh_val (h , k );
240
- DPRINT (Debugfs , "node[%d] %s" , id , s );
241
220
return id ;
242
221
}
243
222
@@ -285,7 +264,7 @@ static inline int
285
264
readedge (Aux * a , File * f )
286
265
{
287
266
int i , j , abs ;
288
- ioff n , u , v ;
267
+ ioff x , u , v ;
289
268
vlong off ;
290
269
u64int id ;
291
270
char * s , * fld [4 ], * * fp ;
@@ -307,43 +286,43 @@ readedge(Aux *a, File *f)
307
286
v = pushnode (a , fld [2 ], & abs );
308
287
i = * fld [1 ] == '+' ? 0 : 1 ;
309
288
j = * fld [3 ] == '+' ? 0 : 1 ;
310
- DPRINT (Debugfs , "edge[%d] %d%c,%d%c" , a -> nedges , u , * fld [1 ], v , * fld [3 ]);
311
289
if (nextfield (f ) != nil )
312
290
off = f -> foff ;
313
291
else
314
292
off = -1 ;
315
293
/* precedence rule: always flip edge st. u < v or u is forward if self edge */
316
- if (u > v || u == v && i == 1 ){
317
- n = u ;
318
- u = v ;
319
- v = n ;
320
- n = i ^ 1 ;
321
- i = j ^ 1 ;
322
- j = n ;
323
- }
294
+ if (u > v || u == v && i == 1 )
295
+ id = (u64int )v << 32 | u << 2 | (i << 1 | j ) ^ 3 ;
296
+ else
297
+ id = (u64int )u << 32 | v << 2 | j << 1 | i ;
324
298
/* guard against duplicate records */
325
- id = ((u64int )u << 1 | i ) << 32ULL | v << 1 | j ;
326
299
edges_put (a -> edges , id , & abs );
327
300
if (!abs ){
328
301
werrstr ("duplicate edge, ignored" );
329
302
return -1 ;
330
303
}
304
+ DPRINT (Debugfs , "edge \"%s%c,%s%c\" → %d%c%d%c" ,
305
+ fld [0 ], * fld [1 ], fld [2 ], * fld [3 ],
306
+ u , i ?'-' :'+' , v , j ?'-' :'+' );
331
307
dypush (a -> edgeoff , off );
332
- dyresize (a -> degree , v + 1 );
308
+ x = u > v ? u : v ;
309
+ dyresize (a -> degree , x + 1 );
333
310
a -> degree [u ]++ ;
334
- if (v != u )
311
+ if (v != u ){
335
312
a -> degree [v ]++ ;
336
- a -> nedges ++ ;
313
+ a -> nedges += 2 ;
314
+ }else
315
+ a -> nedges ++ ;
337
316
return 0 ;
338
317
}
339
318
340
319
static int
341
320
readgfa (Aux * a , File * f )
342
321
{
343
- int nerr , nwarn , r ;
322
+ int ns , nl , nerr , nwarn , r ;
344
323
char * s ;
345
324
346
- nerr = nwarn = 0 ;
325
+ ns = nl = nerr = nwarn = 0 ;
347
326
while (readline (f ) != nil ){
348
327
if ((s = nextfield (f )) == nil )
349
328
continue ;
@@ -359,14 +338,14 @@ readgfa(Aux *a, File *f)
359
338
case 'S' :
360
339
if ((r = readnode (a , f )) < 0 )
361
340
break ;
362
- if (a -> nnodes % 1000000 == 0 )
363
- warn ("readgfa: %.3g nodes...\n" , (double )a -> nnodes );
341
+ if (++ ns % 1000000 == 0 )
342
+ warn ("readgfa: %.3g nodes...\n" , (double )ns );
364
343
break ;
365
344
case 'L' :
366
345
if ((r = readedge (a , f )) < 0 )
367
346
break ;
368
- if (a -> nedges % 1000000 == 0 )
369
- warn ("readgfa: %.3g edges...\n" , (double )a -> nedges );
347
+ if (++ nl % 1000000 == 0 )
348
+ warn ("readgfa: %.3g edges...\n" , (double )nl );
370
349
break ;
371
350
default :
372
351
DPRINT (Debuginfo , "line %d: unhandled record type %c" , f -> nr , s [0 ]);
@@ -387,6 +366,10 @@ readgfa(Aux *a, File *f)
387
366
werrstr ("empty graph" );
388
367
return -1 ;
389
368
}
369
+ if (a -> nedges <= 0 ){
370
+ werrstr ("no edges" );
371
+ return -1 ;
372
+ }
390
373
if (dylen (a -> nodeoff ) < a -> nnodes ){ /* the reverse is fine */
391
374
werrstr ("missing S lines: links reference non-existent segments" );
392
375
return -1 ;
@@ -433,15 +416,11 @@ loadgfa1(void *arg)
433
416
pushcmd ("loadbatch()" );
434
417
flushcmd ();
435
418
TIME ("readgfa" );
436
- if (a .nnodes == 0 )
437
- sysfatal ("loadgfa: no nodes\n" );
438
- else if (a .nedges == 0 )
439
- sysfatal ("loadgfa: no edges\n" );
440
419
logmsg ("loadgfa: initializing graph...\n" );
441
- mkbuckets (& a ); /* compute slots for each node's edges */
442
- TIME ("mkbuckets" );
443
420
mkgraph (& a ); /* batch initialize nodes and edges from indexes */
444
421
TIME ("mkgraph" );
422
+ if (debug & Debuggraph )
423
+ printgraph ();
445
424
if (gottagofast ){ /* FIXME: very much unsafe, won't work with csv, etc. */
446
425
pushcmd ("cmd(\"OPL753\")" ); /* load what we have and start layouting */
447
426
flushcmd (); /* awk must be in on it or it won't work */
0 commit comments