Skip to content

Commit 8d81e1e

Browse files
author
Dominick Leppich
committed
Merge pull request 'release_1.1.9' (#13) from release_1.1.9 into master
Reviewed-on: https://gitea.intranda.com/intranda/goobi-vocabulary-server/pulls/13
2 parents f1fea75 + fd4687b commit 8d81e1e

24 files changed

+264
-43
lines changed

docs/de/migration.md

+2
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ Diese URLs sollten gültig und auflösbar sein.
155155
Der Host-Teil dieser URLs wird aus der Anfrage generiert.
156156

157157
## Migration der Mets-Datei
158+
**Achtung** Bitte erstellen Sie vorher ein Backup. In einigen Fällen sind die mets-Dateien inkonsistent, d. h. die gespeicherten Werte und Vokabularreferenzen stimmen nicht überein. Das Migrationsskript verwendet die Vokabularreferenzen, um die richtigen Werte zu finden. Wenn die Vokabularreferenzen falsch und die Werte richtig sind, wird die Migration die Datenintegrität korrumpieren!
159+
158160
Dieser Schritt kann nur durchgeführt werden, wenn die Migration der Vokabulardaten erfolgreich abgeschlossen wurde!
159161

160162
Wenn die Datei `migration.csv` vorhanden ist, führen Sie den folgenden Befehl in der aktivierten Python-Umgebung aus:

docs/en/migration.md

+2
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ blau,123
127127
This file maps all record values to the corresponding record IDs in the reference vocabulary.
128128

129129
## Mets file migration
130+
**Caution** Please create a backup beforehand. In some cases the mets files are inconsistent, i. e. the stored values and vocabulary references do not match. The migration script uses the vocabulary references to find the correct values. If the vocabulary references were wrong and the values correct, the migration will corrupt the data!
131+
130132
This step can only be done after the vocabulary data migration has been successfully completed!
131133

132134
With the `migration.csv` file present, run the following command in the activated Python environment:

migration/lib/api.py

+18-6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
22
import requests
33
import json
4+
import sys
45

56
SCHEMA_INSERTION_URL = 'http://{{HOST}}:{{PORT}}/api/v1/schemas'
67
SCHEMA_LOOKUP_URL = 'http://{{HOST}}:{{PORT}}/api/v1/schemas/{{SCHEMA_ID}}'
@@ -61,7 +62,16 @@ def query(self, url, obj=None, method='POST'):
6162
response = requests.request(method, url=url, headers=HEADERS, data=payload)
6263
try:
6364
# Check for success
64-
if response.status_code // 100 != 2:
65+
if response.status_code == 401 or response.status_code == 403:
66+
error_msg = f'API call was not successful, reason: Authentification'
67+
logging.critical(error_msg)
68+
sys.exit(1)
69+
raise Exception(error_msg)
70+
if response.status_code == 404:
71+
error_msg = f'API call was not successful, reason: Entity not found {url}'
72+
logging.warning(error_msg)
73+
raise Exception(error_msg)
74+
elif response.status_code // 100 != 2:
6575
error_msg = f'API call was not successful, reason:\n{extract_error_from_response(response)}'
6676
logging.warning(error_msg)
6777
raise Exception(error_msg)
@@ -111,18 +121,20 @@ def insert_record(self, record):
111121
result = self.query(url, record)
112122
return result['id']
113123

114-
def find_record(self, ctx, vocabulary_id, search_term):
124+
def find_record(self, ctx, vocabulary_id, search_term, search_field=None):
115125
url = self.urls[RECORD_SEARCH].replace('{{VOCABULARY_ID}}', str(vocabulary_id)).replace('{{SEARCH_TERM}}', search_term)
116126
result = self.query(url, obj=None, method='GET')
117127
if not '_embedded' in result:
118-
raise Exception(f'Record search for search term "{search_term}" has no results')
128+
raise Exception(f'Record search in vocabulary "{vocabulary_id}" for search term "{search_term}" has no results')
119129
results = result['_embedded']['vocabularyRecordList']
120130
# Filter for exact searches
121-
results = [r for r in results if ctx.record_contains_value(r, search_term)]
131+
results = [r for r in results if ctx.record_contains_value(r, search_term, search_field=search_field)]
132+
122133
if len(results) == 0:
123-
raise Exception(f'Record search for search term "{search_term}" has no results')
134+
raise Exception(f'Record search in vocabulary "{vocabulary_id}" for search term "{search_term}" has no results')
124135
elif len(results) > 1:
125-
raise Exception(f'Record search for search term "{search_term}" has no unique result, {len(results)} records found')
136+
ids = [r['id'] for r in results]
137+
raise Exception(f'Record search in vocabulary "{vocabulary_id}" for search term "{search_term}" has no unique result, {len(results)} records found: {ids}')
126138

127139
return results[0]['id']
128140

migration/lib/mets_context.py

+28-6
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
RECORD_PATTERN = re.compile('^(\\d+).*$')
77

88
class Context:
9-
def __init__(self, api, dry, verbose, continue_on_error, metadata_directory, mapping_file, preferred_mets_main_value_language, manual_id_fix):
9+
def __init__(self, api, dry, verbose, continue_on_error, metadata_directory, mapping_file, preferred_mets_main_value_language, manual_id_fix, trust, enable_relation_vocabulary_column_logic):
1010
self.api = api
1111
self.dry = dry
1212
self.verbose = verbose
@@ -15,12 +15,24 @@ def __init__(self, api, dry, verbose, continue_on_error, metadata_directory, map
1515
self.mapping_file = mapping_file
1616
self.preferred_mets_main_value_language = preferred_mets_main_value_language
1717
self.manual_id_fix = manual_id_fix
18+
self.trust = trust
19+
self.enable_relation_vocabulary_column_logic = enable_relation_vocabulary_column_logic
20+
self.vocabulary_name_id_map = {}
1821
self.vocabulary_id_name_map = {}
1922
self.vocabulary_id_map = {}
2023
self.record_id_map = {}
2124
self.vocabulary_id_schema_id_map = {}
2225
self.schema_id_main_field_id_map = {}
2326

27+
def find_vocabulary_by_name(self, identifier):
28+
if not identifier in self.vocabulary_name_id_map:
29+
error = f'Vocabulary name "{identifier}" not found'
30+
if self.continue_on_error:
31+
logging.error(error)
32+
else:
33+
raise Exception(error)
34+
return self.vocabulary_name_id_map[identifier]
35+
2436
def lookup_vocabulary_name(self, identifier):
2537
if not identifier in self.vocabulary_id_name_map:
2638
error = f'Vocabulary name not found for vocabulary with ID {identifier}'
@@ -69,12 +81,22 @@ def retrieve_main_field_id(self, schema_id):
6981
self.schema_id_main_field_id_map[schema_id] = main_definitions[0]['id']
7082
return self.schema_id_main_field_id_map[schema_id]
7183

72-
def record_contains_value(self, record, value):
84+
def record_contains_value(self, record, value, search_field=None):
85+
field_id = None
86+
if search_field != None:
87+
vocabulary = self.api.lookup_vocabulary(record['vocabularyId'])
88+
schema = self.api.lookup_schema(vocabulary['schemaId'])
89+
ids = [d['id'] for d in schema['definitions'] if d['name'] == search_field]
90+
if len(ids) != 1:
91+
logging.critical(f'Non unique "{search_field}" fields found: {ids}!')
92+
sys.exit(1)
93+
field_id = ids[0]
7394
for f in record['fields']:
74-
for v in f['values']:
75-
for t in v['translations']:
76-
if t['value'] == value:
77-
return True
95+
if field_id == None or f['definitionId'] == field_id:
96+
for v in f['values']:
97+
for t in v['translations']:
98+
if t['value'] == value:
99+
return True
78100
return False
79101

80102
def extract_language_values(self, field):

migration/lib/mets_manipulator.py

+112-7
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,14 @@ def process_mets_file(self):
4040
self.ctx.log_processed(self.file_path)
4141

4242
def process_node(self, node):
43-
if self.is_vocabulary_reference(node) and not self.is_already_migrated(node):
44-
self.process_vocabulary_reference(node)
45-
if self.ctx.dry:
46-
dump_node(node)
4743
if self.is_manual_id_reference(node):
4844
self.process_manual_id_reference(node)
4945
if self.ctx.dry:
5046
dump_node(node)
47+
elif self.is_vocabulary_reference(node) and not self.is_already_migrated(node):
48+
self.process_vocabulary_reference(node)
49+
if self.ctx.dry:
50+
dump_node(node)
5151
for child in node:
5252
self.process_node(child)
5353

@@ -67,6 +67,14 @@ def generate_record_uri(self, record_id):
6767
return self.record_endpoint.replace('{{ID}}', str(record_id))
6868

6969
def process_vocabulary_reference(self, node):
70+
if (self.ctx.trust == 'ID'):
71+
self.process_vocabulary_reference_by_id(node)
72+
elif (self.ctx.trust == 'Value'):
73+
self.process_vocabulary_reference_by_value(node)
74+
else:
75+
raise Exception(f'Unknown trust source \"{self.ctx.trust}\"')
76+
77+
def process_vocabulary_reference_by_id(self, node):
7078
try:
7179
# Extract old vocabulary and record ids
7280
valueURI = node.attrib['valueURI']
@@ -132,18 +140,115 @@ def process_vocabulary_reference(self, node):
132140
error = f'Unable to retrieve vocabulary and record id from valueURI: {valueURI}\n\t\t{e}'
133141
logging.debug(error)
134142
self.ctx.log_issue(self.file_path, error)
143+
144+
def process_vocabulary_reference_by_value(self, node):
145+
try:
146+
vocabulary_name = node.attrib['authority']
147+
148+
if vocabulary_name == 'geonames':
149+
return
150+
vocabulary_id = self.ctx.find_vocabulary_by_name(vocabulary_name)
151+
except Exception as e:
152+
error = f'Unable to retrieve vocabulary by name: {vocabulary_name}\n\t\t{e}'
153+
logging.debug(error)
154+
self.ctx.log_issue(self.file_path, error)
155+
return
156+
157+
try:
158+
value = node.text
159+
160+
search_field=None
161+
inverse_search_field=None
162+
if self.ctx.enable_relation_vocabulary_column_logic and 'Relationship' in vocabulary_name:
163+
parent = node.getparent()
164+
if parent == None:
165+
logging.warn(f'No parent found!')
166+
dump_node(node)
167+
return
168+
169+
entity_type = None
170+
for sibling in parent:
171+
if sibling.attrib['name'] == 'RelationEntityType':
172+
entity_type = sibling.text
173+
break
174+
175+
entity_type_in_relation_count = vocabulary_name.count(entity_type)
176+
if entity_type_in_relation_count == 1:
177+
# Find out relation direction
178+
separator_position = vocabulary_name.index('-')
179+
entity_type_position = vocabulary_name.index(entity_type)
180+
181+
# use second column of vocabulary: `Reverse relationship` (The relation vocabulary is specified from `A->B`, the relation references an entity of type `A` and is therefore of type `B`)
182+
if entity_type_position < separator_position:
183+
search_field='Reverse relationship'
184+
inverse_search_field='Relationship type'
185+
else:
186+
search_field='Relationship type'
187+
inverse_search_field='Reverse relationship'
188+
189+
try:
190+
new_record_id = self.ctx.api.find_record(self.ctx, vocabulary_id, value, search_field=search_field)
191+
except:
192+
new_record_id = self.ctx.api.find_record(self.ctx, vocabulary_id, value, search_field=inverse_search_field)
193+
old_value = node.text
194+
record_data = self.ctx.api.lookup_record(new_record_id)
195+
196+
v = self.ctx.api.lookup_vocabulary(record_data['vocabularyId'])
197+
s = self.ctx.api.lookup_schema(v['schemaId'])
198+
ids = [d['id'] for d in s['definitions'] if d['name'] == search_field] # We need the value, that we actually originally searched for
199+
if len(ids) != 1:
200+
logging.critical(f'Non unique "{search_field}" fields found: {ids}!')
201+
sys.exit(1)
202+
203+
field_data = [f for f in record_data['fields'] if f['definitionId'] == ids[0]]
204+
if len(field_data) != 1:
205+
logging.critical(f'Record [{new_record_id}] has no unique search column entry field')
206+
sys.exit(1)
207+
208+
# Replace node text if not matching any translation of main value
209+
translated_main_values = self.ctx.extract_language_values(field_data[0])
210+
new_value = self.ctx.extract_preferred_language(translated_main_values)
211+
212+
#dump_node(node)
213+
logging.warn(f'Relation is saved in the wrong direction, correct direction found and corrected: "{old_value}" -> "{new_value}"')
214+
node.text = new_value
215+
216+
else:
217+
new_record_id = self.ctx.api.find_record(self.ctx, vocabulary_id, value, search_field=None)
218+
219+
# Set all attributes accordingly
220+
node.attrib['authority'] = vocabulary_name
221+
node.attrib['authorityURI'] = self.generate_vocabulary_uri(vocabulary_id)
222+
node.attrib['valueURI'] = self.generate_record_uri(new_record_id)
223+
224+
self.changed = True
225+
except Exception as e:
226+
error = f'Unable to find record by value: {value}\n\t\t{e}'
227+
logging.error(error)
228+
self.ctx.log_issue(self.file_path, error)
135229

136230
def process_manual_id_reference(self, node):
137231
try:
232+
if node.text == None:
233+
return
138234
record_id_old = int(node.text)
139235
record_id_new = self.ctx.lookup_record_id(record_id_old)
140236
node.text = str(record_id_new)
237+
238+
if 'authority' in node.attrib or 'authorityURI' in node.attrib or 'valueURI' in node.attrib:
239+
record = self.ctx.api.lookup_record(record_id_new)
240+
vocabulary = self.ctx.api.lookup_vocabulary(record['vocabularyId'])
241+
node.attrib['authority'] = vocabulary['name']
242+
node.attrib['authorityURI'] = self.generate_vocabulary_uri(vocabulary['id'])
243+
node.attrib['valueURI'] = self.generate_record_uri(record_id_new)
244+
141245
self.changed = True
142246
except Exception as e:
143247
msg = f'Unable to read ID {node.text}!'
144-
logging.critical(msg)
145-
raise Exception(msg)
248+
logging.warn(msg)
249+
#raise Exception(msg)
146250

147251
def dump_node(node):
148252
attributes = ' '.join(f'{k}="{v}"' for k, v in node.attrib.items())
149-
logging.info(f'<{node.tag} {attributes} />')
253+
value = node.text
254+
logging.info(f'<{node.tag} {attributes}>{value}</{node.tag}>')

migration/lib/mets_migrator.py

+2
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ def load_mapping_file(self):
4343

4444
if not vocabulary_id_new in self.ctx.vocabulary_id_name_map:
4545
self.ctx.vocabulary_id_name_map[vocabulary_id_new] = vocabulary_name
46+
if not vocabulary_name in self.ctx.vocabulary_name_id_map:
47+
self.ctx.vocabulary_name_id_map[vocabulary_name] = vocabulary_id_new
4648
if not vocabulary_id_old in self.ctx.vocabulary_id_map:
4749
self.ctx.vocabulary_id_map[vocabulary_id_old] = vocabulary_id_new
4850
if not record_id_old in self.ctx.record_id_map:

migration/metadata-migrator.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def main():
1414
args.vocabulary_server_port,
1515
args.vocabulary_server_token
1616
)
17-
ctx = Context(api, args.dry, args.verbose, args.continue_on_error, args.metadata_directory, args.mapping_file, args.preferred_mets_main_value_language, args.manual_id_fix)
17+
ctx = Context(api, args.dry, args.verbose, args.continue_on_error, args.metadata_directory, args.mapping_file, args.preferred_mets_main_value_language, args.manual_id_fix, args.trust, args.enable_relation_vocabulary_column_logic)
1818

1919
try:
2020
migrator = MetsMigrator(ctx)
@@ -39,6 +39,8 @@ def parse_args():
3939
parser.add_argument('--vocabulary-server-port', type=str, default='8081', help='vocabulary server port')
4040
parser.add_argument('--vocabulary-server-token', type=str, default=None, help='vocabulary server security token')
4141
parser.add_argument('--preferred-mets-main-value-language', type=str, default='eng', help='Default language to use for mets value writing, if present and prior value invalid')
42+
parser.add_argument('--trust', required=False, type=str, default='ID', help='Set the data source to trust for the migration. Possible values are: "ID" and "Value". If "ID" is set, the record ID is parsed from the valueURI and used to find the migrated record. If "Value" is set, the XML elements value is used to find the newly migrated record by value. Defaults to "ID".')
43+
parser.add_argument('--enable-relation-vocabulary-column-logic', required=False, default=False, action='store_const', const=True, help='Activate relationship vocabulary correct column finding logic (reverse vs non-reverse, artist dictionary)')
4244
parser.add_argument('--manual-id-fix', type=str, default=None, help='Manually fix the record ID of elements whose name attribute matches this parameter. Caution, this must not be executed twice!')
4345
parser.add_argument('--log', required=False, default='INFO', help='logger level (possible values are: NOTSET, DEBUG, INFO, WARNING, ERROR, CRITICAL)')
4446
parser.add_argument('--verbose', required=False, default=False, action='store_const', const=True, help='verbose output')

module-core/pom.xml

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
</parent>
1111
<groupId>io.goobi.vocabulary</groupId>
1212
<artifactId>vocabulary-server-core</artifactId>
13-
<version>1.1.8</version>
13+
<version>1.1.9</version>
1414
<name>Vocabulary-Server-Core</name>
1515
<description>Spring Boot based RESTful web service for vocabulary management</description>
1616
<packaging>jar</packaging>
@@ -35,7 +35,7 @@
3535
<dependency>
3636
<groupId>io.goobi.vocabulary</groupId>
3737
<artifactId>vocabulary-server-exchange</artifactId>
38-
<version>1.1.8</version>
38+
<version>1.1.9</version>
3939
<scope>compile</scope>
4040
</dependency>
4141

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
package io.goobi.vocabulary.api;
2+
3+
import io.goobi.vocabulary.api.assemblers.FieldDefinitionAssembler;
4+
import io.goobi.vocabulary.exchange.FieldDefinition;
5+
import io.goobi.vocabulary.service.manager.FieldDefinitionDTOManager;
6+
import org.springframework.hateoas.EntityModel;
7+
import org.springframework.web.bind.annotation.GetMapping;
8+
import org.springframework.web.bind.annotation.PathVariable;
9+
import org.springframework.web.bind.annotation.RequestMapping;
10+
import org.springframework.web.bind.annotation.RestController;
11+
12+
@RestController
13+
@RequestMapping("/api/v1")
14+
public class FieldDefinitionController {
15+
private final FieldDefinitionDTOManager manager;
16+
private final FieldDefinitionAssembler assembler;
17+
18+
public FieldDefinitionController(FieldDefinitionDTOManager manager, FieldDefinitionAssembler assembler) {
19+
this.manager = manager;
20+
this.assembler = assembler;
21+
}
22+
23+
@GetMapping("/fieldDefinitions/{id}")
24+
public EntityModel<FieldDefinition> one(@PathVariable long id) {
25+
return assembler.toModel(manager.get(id));
26+
}
27+
}

module-core/src/main/java/io/goobi/vocabulary/api/MaintenanceController.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package io.goobi.vocabulary.api;
22

3-
import io.goobi.vocabulary.maintenance.selfcheck.SelfCheckResult;
3+
import io.goobi.vocabulary.monitoring.SelfCheckResult;
44
import io.goobi.vocabulary.service.manager.MaintenanceManager;
55
import org.springframework.web.bind.annotation.GetMapping;
66
import org.springframework.web.bind.annotation.RequestMapping;

module-core/src/main/java/io/goobi/vocabulary/api/MonitoringController.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package io.goobi.vocabulary.api;
22

3-
import io.goobi.vocabulary.maintenance.MonitoringResult;
3+
import io.goobi.vocabulary.monitoring.MonitoringResult;
44
import io.goobi.vocabulary.service.manager.MaintenanceManager;
55
import lombok.extern.slf4j.Slf4j;
66
import org.springframework.web.bind.annotation.GetMapping;

0 commit comments

Comments
 (0)