Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serialization of KItems/KTypes #46

Draft
wants to merge 65 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
65 commits
Select commit Hold shift + click to select a range
7db4eb3
Added function calls to the ktype service
Sep 16, 2024
5b139c0
Added backend check for existing ktypes
Sep 18, 2024
87b27b5
Merge branch 'main' of github.com:MI-FraunhoferIWM/dsms-python-sdk in…
Sep 18, 2024
ed618b5
Added additional methods to support ktype functionalities
Sep 20, 2024
d8d2e67
Example for ktype usage has been added.
Sep 24, 2024
c96381e
Fixed pylint errors
Sep 24, 2024
3d0f850
Fixed pylint errors
Sep 25, 2024
6bd9835
Delete testktype.py
arjungkk Sep 26, 2024
997549d
update ktype retrieval
MBueschelberger Sep 27, 2024
46daa6e
udpate pytests
MBueschelberger Sep 27, 2024
c70ba76
minorly refactor ktype enum for direct retrieval of ktype model
MBueschelberger Oct 1, 2024
5ade860
update ktype validation
MBueschelberger Oct 1, 2024
4284256
update setup.cfg and bring back underscore for private variable
MBueschelberger Oct 2, 2024
9c0b6c4
remove unneeded utils
MBueschelberger Oct 2, 2024
6dc696f
update jupyter notebook for docs
MBueschelberger Oct 2, 2024
a70ad66
update jupyter notebook for docs
MBueschelberger Oct 2, 2024
db4509b
remove unneeded private property assignment
MBueschelberger Oct 2, 2024
ffdac4a
update ktype validator
MBueschelberger Oct 2, 2024
a0e1e69
update jupyter notebooks for docs
MBueschelberger Oct 2, 2024
cffaeb3
bring accidentally deleted ktype.setter
MBueschelberger Oct 2, 2024
23c819f
Merge pull request #37 from MI-FraunhoferIWM/review/ktype-integration
arjungkk Oct 7, 2024
901dc73
Mapped webform to pydantic model
Oct 21, 2024
2b81fae
Delete test.py
arjungkk Oct 24, 2024
2af08a7
Added alias generator to webform inputs
Oct 24, 2024
97ef7e2
Merge branch 'dev/ktype-integration' of github.com:MI-FraunhoferIWM/d…
Oct 24, 2024
d2b4c5f
Updated README and removed duplicate files
Oct 24, 2024
a3b7c2e
Added pylint fixes
Oct 24, 2024
aac1be4
adapt sdk to new platform backend
MBueschelberger Nov 21, 2024
aa4f3e6
Bump version v2.0.4 -> v2.1.0dev0
MBueschelberger Nov 21, 2024
7f4bff9
make upper restriction for pydantic
MBueschelberger Nov 21, 2024
761c3e3
set max length of string-field names
MBueschelberger Nov 27, 2024
8796d2e
bump dev version tag
MBueschelberger Nov 27, 2024
02ab43e
Support for webform changes
Dec 2, 2024
aab928f
Support for webform
Dec 2, 2024
e5b3396
Merge pull request #41 from MI-FraunhoferIWM/dev/webform-support
arjungkk Dec 2, 2024
6efc6e5
add schema transformation function
MBueschelberger Dec 2, 2024
1977ed9
bump dev version
MBueschelberger Dec 2, 2024
63d7be2
make temporary compatibility with old webform model
MBueschelberger Dec 9, 2024
0d06906
temporary fix for custom properties
MBueschelberger Dec 10, 2024
79a7f0d
set upper limit for pydantic
MBueschelberger Dec 10, 2024
b61f472
merge from dev/ktype-integration
MBueschelberger Dec 12, 2024
d6c23b9
update custom properties model
MBueschelberger Dec 12, 2024
c06d45d
update webform kitem assignment and validation
MBueschelberger Dec 13, 2024
ddf2458
fix printing of values
MBueschelberger Dec 16, 2024
78c534b
debug custom properties and dataframe
MBueschelberger Dec 16, 2024
e9f3f58
update config
MBueschelberger Dec 16, 2024
1f60b4d
debug buffer context
MBueschelberger Dec 17, 2024
00999e4
bump dev version
MBueschelberger Dec 17, 2024
3893290
fix minor problems
MBueschelberger Dec 17, 2024
ae25109
remove 'NumericalDatatype', debug unit converion
MBueschelberger Dec 18, 2024
69ba134
bump dev version
MBueschelberger Dec 18, 2024
9738986
debug type check in entry
MBueschelberger Dec 18, 2024
75ff828
bump version
MBueschelberger Dec 18, 2024
0a40c39
debug serialization
MBueschelberger Dec 18, 2024
3cc313a
update webform and custom properties aliases
MBueschelberger Dec 18, 2024
a666346
bump version
MBueschelberger Dec 18, 2024
c2170f6
remove unneeded union
MBueschelberger Dec 18, 2024
7e83491
move function to sectionize metadata to sdk
MBueschelberger Dec 19, 2024
8871d71
bump version
MBueschelberger Dec 19, 2024
2c28db7
kitem export to hdf5
Jan 29, 2025
e5f556f
Merge branch 'main' of github.com:MI-FraunhoferIWM/dsms-python-sdk in…
Jan 29, 2025
aefd516
kitem export
Jan 29, 2025
bbb1776
fixed merge issues
Jan 29, 2025
d5fbf7c
fixed merge issues
Jan 29, 2025
cc04d7d
Export function for ktype
Feb 11, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions dsms/knowledge/kitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@
logger.addHandler(handler)
logger.propagate = False

class Format(Enum):
"""Data formats"""

JSON = "json"
YAML = "yaml"
HDF5 = "hdf5"

class KItem(BaseModel):
"""
Expand Down Expand Up @@ -655,3 +661,18 @@ def is_a(self, to_be_compared: KType) -> bool:
def refresh(self) -> None:
"""Refresh the KItem"""
_refresh_kitem(self)

def export(self, format: Format) -> Any:
"""Export kitems to different formats"""

if format == Format.HDF5:
from dsms.knowledge.kitem_wrapper import to_hdf5
return to_hdf5(self)

elif format == Format.JSON:
# need to implement
return

elif format == Format.YAML:
# need to implement
return
165 changes: 165 additions & 0 deletions dsms/knowledge/kitem_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
import io
import h5py
import numpy as np

def to_hdf5(kItem) -> io.BytesIO:
"""Export KItem to HDF5"""

data_bytes = io.BytesIO()
# with tempfile.NamedTemporaryFile(delete=False) as temp_file:
with h5py.File(data_bytes, 'w') as hdf:

# Store top-level attributes
keys = ['name', 'id', 'ktype_id', 'in_backend', 'slug', 'avatar_exists', 'created_at', 'updated_at', 'rdf_exists', 'context_id', 'access_url']
for key in keys:
value = getattr(kItem, key)
create_dataset(key, value, hdf)

# Store the summary
summary = getattr(kItem, 'summary')
if summary is not None:
if summary.text is not None:
hdf.create_dataset('summary', data = summary.text)

# Store dataframe
dataframe = getattr(kItem, 'dataframe')
if dataframe is not None:
value = dataframe.to_df().to_json()
hdf.create_dataset('dataframe', data = value)

# Store the avatar in binary
avatar = getattr(kItem, 'avatar')
if avatar is not None:
# Get the image
image = avatar.download()

# Create a BytesIO object and save the image to it
image_bytes = io.BytesIO()
image.save(image_bytes, format='PNG')

# Get the bytes value
value = image_bytes.getvalue()
img_arr = np.frombuffer(value, dtype=np.uint8)
hdf.create_dataset('avatar', data=img_arr, dtype=img_arr.dtype)

# Store the subgraph after serialization
subgraph = getattr(kItem, 'subgraph')
if subgraph is not None:
value = subgraph.serialize()
hdf.create_dataset('subgraph', data = value)

# Store annotations
annotations_group = hdf.create_group('annotations')
for i, annotation in enumerate(getattr(kItem, 'annotations')):
annotation_group = annotations_group.create_group(f'annotation_{i}')
for key, value in annotation:
create_dataset(key, value, annotation_group)

# Store attachments
attachments_group = hdf.create_group('attachments')
for i, attachment in enumerate(getattr(kItem,'attachments')):
attachment_group = attachments_group.create_group(f'attachment_{i}')
for key, value in attachment:
if key == 'content':
value = attachment.download().encode()
binary_data = np.frombuffer(value, dtype='uint8')
attachment_group.create_dataset(key, data=binary_data, dtype=binary_data.dtype)
else:
create_dataset(key, value, attachment_group)

# Store linked_kitems
linked_kitems_group = hdf.create_group('linked_kitems')
for i, linked_kitem in enumerate(getattr(kItem,'linked_kitems')):
linked_kitem_group = linked_kitems_group.create_group(f'linked_kitem_{i}')
for key in ['id', 'name', 'slug', 'ktype_id']:
value = getattr(linked_kitem, key)
create_dataset(key, value, linked_kitem_group)

# Store affiliations
affiliations_group = hdf.create_group('affiliations')
for i, affiliation in enumerate(getattr(kItem,'affiliations')):
affiliation_group = affiliations_group.create_group(f'affiliation_{i}')
for key, value in affiliation:
create_dataset(key, value, affiliation_group)

# Store authors
authors_group = hdf.create_group('authors')
for i, author in enumerate(getattr(kItem,'authors')):
author_group = authors_group.create_group(f'author_{i}')
for key, value in author:
create_dataset(key, value, author_group)

# Store contacts
contacts_group = hdf.create_group('contacts')
for i, contact in enumerate(getattr(kItem,'contacts')):
contact_group = contacts_group.create_group(f'contact_{i}')
for key, value in contact:
create_dataset(key, value, contact_group)

# Store external links
external_links_group = hdf.create_group('external_links')
for i, external_link in enumerate(getattr(kItem,'external_links')):
external_link_group = external_links_group.create_group(f'external_link_{i}')
for key, value in external_link:
create_dataset(key, value, external_link_group)

# Store kitem_apps
kitem_apps_group = hdf.create_group('kitem_apps')
for i, app in enumerate(getattr(kItem,'kitem_apps')):
app_group = kitem_apps_group.create_group(f'app_{i}')
for key, value in app:
if key == 'additional_properties':
for prop_key, prop_value in value:
app_group.create_dataset(f'additional_properties/{prop_key}', data=prop_value)
else:
create_dataset(key, value, app_group)

# Store user groups
user_groups_group = hdf.create_group('user_groups')
for i, user_group in enumerate(getattr(kItem,'user_groups')):
user_group_group = user_groups_group.create_group(f'user_group_{i}')
for key, value in user_group:
create_dataset(key, value, user_group_group)

# Store custom_properties
from dsms.knowledge.webform import KItemCustomPropertiesModel
custom_properties_group = hdf.create_group('custom_properties')
for item in kItem:
if 'custom_properties' in item:
break
for custom_property in item:

if isinstance(custom_property, KItemCustomPropertiesModel):
sections_group = custom_properties_group.create_group('sections')
for i, section in enumerate(custom_property):
section_group = sections_group.create_group(f'section_{i}')
section_group.create_dataset('id', data=section.id)
section_group.create_dataset('name', data=section.name)
entries_group = section_group.create_group('entries')

for j, entry in enumerate(section):
entry_group = entries_group.create_group(f'entry_{j}')
entry_keys = ['measurement_unit', 'relation_mapping']
for entry_key, entry_value in entry:
if entry_key == 'kitem':
continue
elif entry_key in entry_keys and entry_value is not None:
group = entry_group.create_group(entry_key)
for key_, value_ in entry_value:
if key_ == 'kitem':
continue
create_dataset(key_, value_, group)
else:
create_dataset(key, value, entry_group)

return data_bytes


def create_dataset(key, value, group):
"""Create dataset depending on the type of the data"""

basic_types = (int, float, str, bool, list, tuple, dict, set)
if isinstance(value, basic_types):
group.create_dataset(key, data=value)
else:
group.create_dataset(key, data=str(value))
23 changes: 23 additions & 0 deletions dsms/knowledge/ktype.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from datetime import datetime
from typing import TYPE_CHECKING, Any, Optional, Union
from uuid import UUID
from enum import Enum

from pydantic import BaseModel, Field, model_serializer

Expand All @@ -19,6 +20,12 @@
logger.addHandler(handler)
logger.propagate = False

class Format(Enum):
"""Data formats"""

JSON = "json"
YAML = "yaml"
HDF5 = "hdf5"

class KType(BaseModel):
"""Knowledge type of the knowledge item."""
Expand Down Expand Up @@ -137,3 +144,19 @@ def serialize(self):
)
for key, value in self.__dict__.items()
}

def export(self, format: Format) -> Any:
"""Export ktypes to different formats"""

if format == Format.HDF5:
from dsms.knowledge.ktype_wrapper import to_hdf5
return to_hdf5(self)


elif format == Format.JSON:
# need to implement
return

elif format == Format.YAML:
# need to implement
return
67 changes: 67 additions & 0 deletions dsms/knowledge/ktype_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import io
import h5py
import numpy as np

def to_hdf5(ktype) -> io.BytesIO:

data_bytes = io.BytesIO()
with h5py.File(data_bytes, 'w') as hdf:

# Store top-level attributes
keys = ['id', 'name', 'created_at', 'updated_at']
for key in keys:
value = getattr(ktype, key)
create_dataset(key, value, hdf)

# Store the Webform
webform = getattr(ktype, 'webform')
webform_group = hdf.create_group('webform')
if webform is not None:
sections_group = webform_group.create_group('sections')
section_keys = ['id', 'name', 'hidden']
input_keys = ['measurement_unit', 'relation_mapping', 'relation_mapping_extra', 'range_options']
for webform_key, webform_value in webform:
if webform_key == 'kitem':
continue
elif webform_key == 'sections':
for i, section in enumerate(webform_value):
section_group = sections_group.create_group(f'section_{i}')
for section_key in section_keys:
section_value = getattr(section, section_key)
create_dataset(section_key, section_value, section_group)

inputs_group = section_group.create_group('inputs')

for j, input in enumerate(section.inputs):
input_group = inputs_group.create_group(f'input_{j}')
for input_key, input_value in input:
if input_key == 'kitem':
continue
elif input_key == 'select_options':
select_options_group = input_group.create_group('select_options')
for k, select_option in enumerate(input_value):
select_option_group = select_options_group.create_group(f'option_{k}')
for option_key, option_value in select_option:
create_dataset(option_key, option_value, select_option_group)
elif input_key in input_keys and input_value is not None:
group = input_group.create_group(input_key)
for key_, value_ in input_value:
if key_ == 'kitem':
continue
create_dataset(key_, value_, group)
else:
create_dataset(input_key, input_value, input_group)

else:
create_dataset(webform_key, webform_value, webform_group)

return data_bytes

def create_dataset(key, value, group):
"""Create dataset depending on the type of the data"""

basic_types = (int, float, str, bool, list, tuple, dict, set)
if isinstance(value, basic_types):
group.create_dataset(key, data=value)
else:
group.create_dataset(key, data=str(value))
Loading