Skip to content

Commit 444c4a0

Browse files
author
Dominick Leppich
committed
task: relation direction logic
1 parent 57af637 commit 444c4a0

File tree

4 files changed

+46
-17
lines changed

4 files changed

+46
-17
lines changed

migration/lib/api.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -121,14 +121,14 @@ def insert_record(self, record):
121121
result = self.query(url, record)
122122
return result['id']
123123

124-
def find_record(self, ctx, vocabulary_id, search_term, main_value_only=False):
124+
def find_record(self, ctx, vocabulary_id, search_term, search_field=None):
125125
url = self.urls[RECORD_SEARCH].replace('{{VOCABULARY_ID}}', str(vocabulary_id)).replace('{{SEARCH_TERM}}', search_term)
126126
result = self.query(url, obj=None, method='GET')
127127
if not '_embedded' in result:
128128
raise Exception(f'Record search in vocabulary "{vocabulary_id}" for search term "{search_term}" has no results')
129129
results = result['_embedded']['vocabularyRecordList']
130130
# Filter for exact searches
131-
results = [r for r in results if ctx.record_contains_value(r, search_term, main_value_only=main_value_only)]
131+
results = [r for r in results if ctx.record_contains_value(r, search_term, search_field=search_field)]
132132

133133
if len(results) == 0:
134134
raise Exception(f'Record search in vocabulary "{vocabulary_id}" for search term "{search_term}" has no results')

migration/lib/mets_context.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
RECORD_PATTERN = re.compile('^(\\d+).*$')
77

88
class Context:
9-
def __init__(self, api, dry, verbose, continue_on_error, metadata_directory, mapping_file, preferred_mets_main_value_language, manual_id_fix, trust):
9+
def __init__(self, api, dry, verbose, continue_on_error, metadata_directory, mapping_file, preferred_mets_main_value_language, manual_id_fix, trust, enable_relation_vocabulary_column_logic):
1010
self.api = api
1111
self.dry = dry
1212
self.verbose = verbose
@@ -16,6 +16,7 @@ def __init__(self, api, dry, verbose, continue_on_error, metadata_directory, map
1616
self.preferred_mets_main_value_language = preferred_mets_main_value_language
1717
self.manual_id_fix = manual_id_fix
1818
self.trust = trust
19+
self.enable_relation_vocabulary_column_logic = enable_relation_vocabulary_column_logic
1920
self.vocabulary_name_id_map = {}
2021
self.vocabulary_id_name_map = {}
2122
self.vocabulary_id_map = {}
@@ -80,18 +81,18 @@ def retrieve_main_field_id(self, schema_id):
8081
self.schema_id_main_field_id_map[schema_id] = main_definitions[0]['id']
8182
return self.schema_id_main_field_id_map[schema_id]
8283

83-
def record_contains_value(self, record, value, main_value_only=False):
84-
main_value_id = None
85-
if main_value_only:
84+
def record_contains_value(self, record, value, search_field=None):
85+
field_id = None
86+
if search_field != None:
8687
vocabulary = self.api.lookup_vocabulary(record['vocabularyId'])
8788
schema = self.api.lookup_schema(vocabulary['schemaId'])
88-
mainIds = [d['id'] for d in schema['definitions'] if d['mainEntry'] == True]
89-
if len(mainIds) != 1:
90-
logging.critical(f'Non unique main entries: {mainIds}!')
89+
ids = [d['id'] for d in schema['definitions'] if d['name'] == search_field]
90+
if len(ids) != 1:
91+
logging.critical(f'Non unique "{search_field}" fields found: {ids}!')
9192
sys.exit(1)
92-
main_value_id = mainIds[0]
93+
field_id = ids[0]
9394
for f in record['fields']:
94-
if main_value_id == None or f['definitionId'] == main_value_id:
95+
if field_id == None or f['definitionId'] == field_id:
9596
for v in f['values']:
9697
for t in v['translations']:
9798
if t['value'] == value:

migration/lib/mets_manipulator.py

+31-4
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ def process_vocabulary_reference_by_id(self, node):
144144
def process_vocabulary_reference_by_value(self, node):
145145
try:
146146
vocabulary_name = node.attrib['authority']
147+
148+
if vocabulary_name == 'geonames':
149+
return
147150
vocabulary_id = self.ctx.find_vocabulary_by_name(vocabulary_name)
148151
except Exception as e:
149152
error = f'Unable to retrieve vocabulary by name: {vocabulary_name}\n\t\t{e}'
@@ -153,10 +156,34 @@ def process_vocabulary_reference_by_value(self, node):
153156

154157
try:
155158
value = node.text
156-
try:
157-
new_record_id = self.ctx.api.find_record(self.ctx, vocabulary_id, value, main_value_only=False)
158-
except:
159-
new_record_id = self.ctx.api.find_record(self.ctx, vocabulary_id, value, main_value_only=True)
159+
160+
search_field=None
161+
if self.ctx.enable_relation_vocabulary_column_logic and 'Relationship' in vocabulary_name:
162+
parent = node.getparent()
163+
if parent == None:
164+
logging.warn(f'No parent found!')
165+
dump_node(node)
166+
return
167+
168+
entity_type = None
169+
for sibling in parent:
170+
if sibling.attrib['name'] == 'RelationEntityType':
171+
entity_type = sibling.text
172+
break
173+
174+
entity_type_in_relation_count = vocabulary_name.count(entity_type)
175+
if entity_type_in_relation_count == 1:
176+
# Find out relation direction
177+
separator_position = vocabulary_name.index('-')
178+
entity_type_position = vocabulary_name.index(entity_type)
179+
180+
# use second column of vocabulary: `Reverse relationship` (The relation vocabulary is specified from `A->B`, the relation references an entity of type `A` and is therefore of type `B`)
181+
if entity_type_position < separator_position:
182+
search_field='Reverse relationship'
183+
else:
184+
search_field='Relationship type'
185+
186+
new_record_id = self.ctx.api.find_record(self.ctx, vocabulary_id, value, search_field=search_field)
160187

161188
# Set all attributes accordingly
162189
node.attrib['authority'] = vocabulary_name

migration/metadata-migrator.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def main():
1414
args.vocabulary_server_port,
1515
args.vocabulary_server_token
1616
)
17-
ctx = Context(api, args.dry, args.verbose, args.continue_on_error, args.metadata_directory, args.mapping_file, args.preferred_mets_main_value_language, args.manual_id_fix, args.trust)
17+
ctx = Context(api, args.dry, args.verbose, args.continue_on_error, args.metadata_directory, args.mapping_file, args.preferred_mets_main_value_language, args.manual_id_fix, args.trust, args.enable_relation_vocabulary_column_logic)
1818

1919
try:
2020
migrator = MetsMigrator(ctx)
@@ -39,7 +39,8 @@ def parse_args():
3939
parser.add_argument('--vocabulary-server-port', type=str, default='8081', help='vocabulary server port')
4040
parser.add_argument('--vocabulary-server-token', type=str, default=None, help='vocabulary server security token')
4141
parser.add_argument('--preferred-mets-main-value-language', type=str, default='eng', help='Default language to use for mets value writing, if present and prior value invalid')
42-
parser.add_argument('--trust', type=str, default='ID', help='Set the data source to trust for the migration. Possible values are: "ID" and "Value". If "ID" is set, the record ID is parsed from the valueURI and used to find the migrated record. If "Value" is set, the XML elements value is used to find the newly migrated record by value. Defaults to "ID".')
42+
parser.add_argument('--trust', required=False, type=str, default='ID', help='Set the data source to trust for the migration. Possible values are: "ID" and "Value". If "ID" is set, the record ID is parsed from the valueURI and used to find the migrated record. If "Value" is set, the XML elements value is used to find the newly migrated record by value. Defaults to "ID".')
43+
parser.add_argument('--enable-relation-vocabulary-column-logic', required=False, default=False, action='store_const', const=True, help='Activate relationship vocabulary correct column finding logic (reverse vs non-reverse, artist dictionary)')
4344
parser.add_argument('--manual-id-fix', type=str, default=None, help='Manually fix the record ID of elements whose name attribute matches this parameter. Caution, this must not be executed twice!')
4445
parser.add_argument('--log', required=False, default='INFO', help='logger level (possible values are: NOTSET, DEBUG, INFO, WARNING, ERROR, CRITICAL)')
4546
parser.add_argument('--verbose', required=False, default=False, action='store_const', const=True, help='verbose output')

0 commit comments

Comments
 (0)