@@ -18,7 +18,7 @@ use grebi_shared::find_strings;
18
18
struct Args {
19
19
20
20
#[ arg( long) ]
21
- add_prefix : String , // used to prepend the subgraph name like hra_kg:g:
21
+ identifier_properties : String ,
22
22
23
23
#[ arg( long) ]
24
24
groups_txt : String ,
@@ -34,9 +34,15 @@ static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
34
34
fn main ( ) {
35
35
36
36
let args = Args :: parse ( ) ;
37
- let preserve_fields: HashSet < Vec < u8 > > = args. preserve_field . iter ( ) . map ( |x| x. as_bytes ( ) . to_vec ( ) ) . collect ( ) ;
38
37
39
- let add_prefix = args. add_prefix ;
38
+
39
+ let mut id_props: HashSet < Vec < u8 > > = HashSet :: new ( ) ;
40
+ for prop in args. identifier_properties . split ( "," ) {
41
+ id_props. insert ( prop. as_bytes ( ) . to_vec ( ) ) ;
42
+ }
43
+
44
+
45
+ let preserve_fields: HashSet < Vec < u8 > > = args. preserve_field . iter ( ) . map ( |x| x. as_bytes ( ) . to_vec ( ) ) . collect ( ) ;
40
46
41
47
let id_to_group: HashMap < Vec < u8 > , Vec < u8 > > = {
42
48
@@ -95,29 +101,32 @@ fn main() {
95
101
while json. peek ( ) . kind != JsonTokenType :: EndObject {
96
102
let prop_key = json. name ( ) ;
97
103
98
- if prop_key == b"id" {
99
- id = Some ( json. string ( ) ) ;
104
+ // any of the IDs will do, we only need one
105
+ // as all identifiers map to the same group
106
+ //
107
+ if id_props. contains ( prop_key) {
108
+ // TODO handle the same cases as the id extraction does
109
+ if json. peek ( ) . kind == JsonTokenType :: StartArray {
110
+ json. begin_array ( ) ;
111
+ id = Some ( json. string ( ) ) ;
112
+ } else {
113
+ id = Some ( json. string ( ) ) ;
114
+ }
115
+ break ;
100
116
} else {
101
117
json. value ( ) ; // skip
102
118
}
103
119
}
104
120
105
121
let group = id_to_group. get ( id. unwrap ( ) ) ;
106
- if group. is_some ( ) {
107
-
108
- // the subject mapped to an equivalence group
109
- writer. write_all ( "{\" grebi:nodeId\" :\" " . as_bytes ( ) ) . unwrap ( ) ;
110
- writer. write_all ( add_prefix. as_bytes ( ) ) . unwrap ( ) ;
111
- writer. write_all ( group. unwrap ( ) . as_slice ( ) ) . unwrap ( ) ;
112
- writer. write_all ( "\" " . as_bytes ( ) ) . unwrap ( ) ;
113
- } else {
114
- // the subject did not map to an equivalence group
115
- writer. write_all ( "{\" grebi:nodeId\" :\" " . as_bytes ( ) ) . unwrap ( ) ;
116
- writer. write_all ( add_prefix. as_bytes ( ) ) . unwrap ( ) ;
117
- writer. write_all ( id. unwrap ( ) ) . unwrap ( ) ;
118
- writer. write_all ( "\" " . as_bytes ( ) ) . unwrap ( ) ;
122
+ if !group. is_some ( ) {
123
+ panic ! ( "could not find identifier group for id: {}" , String :: from_utf8( id. unwrap( ) . to_vec( ) ) . unwrap( ) ) ;
119
124
}
120
125
126
+ writer. write_all ( "{\" grebi:nodeId\" :\" " . as_bytes ( ) ) . unwrap ( ) ;
127
+ writer. write_all ( group. unwrap ( ) . as_slice ( ) ) . unwrap ( ) ;
128
+ writer. write_all ( "\" " . as_bytes ( ) ) . unwrap ( ) ;
129
+
121
130
json. rewind ( ) ;
122
131
while json. peek ( ) . kind != JsonTokenType :: EndObject {
123
132
@@ -129,7 +138,6 @@ fn main() {
129
138
} else {
130
139
let name_group = id_to_group. get ( name) ;
131
140
if name_group. is_some ( ) {
132
- writer. write_all ( add_prefix. as_bytes ( ) ) . unwrap ( ) ;
133
141
writer. write_all ( name_group. unwrap ( ) ) . unwrap ( ) ;
134
142
} else {
135
143
writer. write_all ( name) . unwrap ( ) ;
@@ -140,7 +148,7 @@ fn main() {
140
148
if name. eq ( b"id" ) || preserve_fields. contains ( name) {
141
149
writer. write_all ( json. value ( ) ) . unwrap ( ) ;
142
150
} else {
143
- write_value ( & mut writer, json. value ( ) , & id_to_group, & add_prefix ) ;
151
+ write_value ( & mut writer, json. value ( ) , & id_to_group) ;
144
152
}
145
153
}
146
154
@@ -151,7 +159,7 @@ fn main() {
151
159
152
160
}
153
161
154
- fn write_value ( writer : & mut BufWriter < io:: StdoutLock > , value : & [ u8 ] , id_to_group : & HashMap < Vec < u8 > , Vec < u8 > > , add_prefix : & str ) {
162
+ fn write_value ( writer : & mut BufWriter < io:: StdoutLock > , value : & [ u8 ] , id_to_group : & HashMap < Vec < u8 > , Vec < u8 > > ) {
155
163
156
164
let string_locations = find_strings ( & value) ;
157
165
@@ -174,7 +182,6 @@ fn write_value(writer:&mut BufWriter<io::StdoutLock>, value:&[u8], id_to_group:&
174
182
175
183
let pv_group = id_to_group. get ( str) ;
176
184
if pv_group. is_some ( ) {
177
- writer. write_all ( add_prefix. as_bytes ( ) ) . unwrap ( ) ;
178
185
writer. write_all ( pv_group. unwrap ( ) ) . unwrap ( ) ;
179
186
} else {
180
187
writer. write_all ( str) . unwrap ( ) ;
0 commit comments