Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hotfix 439 - Develop merge missing table #443

Merged
merged 16 commits into from
Apr 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.13.0
current_version = 0.13.1
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)((?P<release>(a|na))+(?P<build>\d+))?
serialize =
{major}.{minor}.{patch}{release}{build}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci-production.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
- name: create package
run: python setup.py sdist
- name: import open-mastr
run: python -m pip install ./dist/open_mastr-0.13.0.tar.gz
run: python -m pip install ./dist/open_mastr-0.13.1.tar.gz
- name: Create credentials file
env:
MASTR_TOKEN: ${{ secrets.MASTR_TOKEN }}
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ For each version important additions, changes and removals are listed here.
The format is inspired from [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and the versioning aims to respect [Semantic Versioning](http://semver.org/spec/v2.0.0.html).

## [v0.13.1] Hotfix - 2023-04-11

### Added
- Add new table and new columns to the data model [#440](https://github.com/OpenEnergyPlatform/open-MaStR/pull/440)
## [v0.13.0] Maintenance release - 2023-02-16
### Added
- Add a `workflow_dispatch` to run CI pipelines from a button click [#389](https://github.com/OpenEnergyPlatform/open-MaStR/pull/389)
Expand Down
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ authors:
title: "open-MaStR"
type: software
license: AGPL-3.0
version: 0.13.0
version: 0.13.1
doi:
date-released: 2023-02-16
date-released: 2023-04-11
url: "https://github.com/OpenEnergyPlatform/open-MaStR/"
1 change: 1 addition & 0 deletions open_mastr/mastr.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ def download(
"balancing_area", "Yes", "No"
"permit", "Yes", "Yes"
"deleted_units", "Yes", "No"
"retrofit_units", "Yes", "No"

date: None or :class:`datetime.datetime` or str, optional
For bulk method:
Expand Down
4 changes: 2 additions & 2 deletions open_mastr/soap_api/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,15 +479,15 @@ def __init__(self, parallel_processes=None):
"permit_data": "GetEinheitGenehmigung",
},
"gsgk": {
"unit_data": "GetEinheitGeoSolarthermieGrubenKlaerschlammDruckentspannung",
"unit_data": "GetEinheitGeothermieGrubengasDruckentspannung",
"energietraeger": [
"Geothermie",
"Solarthermie",
"Grubengas",
"Klaerschlamm",
],
"kwk_data": "GetAnlageKwk",
"eeg_data": "GetAnlageEegGeoSolarthermieGrubenKlaerschlammDruckentspannung",
"eeg_data": "GetAnlageEegGeothermieGrubengasDruckentspannung",
"permit_data": "GetEinheitGenehmigung",
},
"nuclear": {
Expand Down
31 changes: 16 additions & 15 deletions open_mastr/soap_api/mirror.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def retrieve_additional_data(self, data, data_type, limit=10 ** 8, chunksize=100
data, requested_ids, download_functions[data_type]
)

unit_data = flatten_dict(unit_data)
unit_data = flatten_dict(unit_data, serialize_with_json=False)
number_units_merged = 0

# Prepare data and add to database table
Expand Down Expand Up @@ -839,20 +839,21 @@ def _preprocess_additional_data_entry(self, unit_dat, technology, data_type):
ertuechtigung["ProzentualeErhoehungDesLv"] = float(
ertuechtigung["ProzentualeErhoehungDesLv"]
)
# The NetzbetreiberMastrNummer is handed over as type:list, hence
# non-compatible with sqlite)
# This replaces the list with the first (string)element in the list
# to make it sqlite compatible
if (
"NetzbetreiberMastrNummer" in unit_dat
and type(unit_dat["NetzbetreiberMastrNummer"]) == list
):
if len(unit_dat["NetzbetreiberMastrNummer"]) > 0:
unit_dat["NetzbetreiberMastrNummer"] = unit_dat[
"NetzbetreiberMastrNummer"
][0]
else:
unit_dat["NetzbetreiberMastrNummer"] = None
# Some data (data_in_list) is handed over as type:list, hence
# non-compatible with sqlite or postgresql
# This replaces the list with the first element in the list

data_as_list = ["NetzbetreiberMastrNummer","Netzbetreiberzuordnungen"]

for dat in data_as_list:
if (
dat in unit_dat
and type(unit_dat[dat]) == list
):
if len(unit_dat[dat]) > 0:
unit_dat[dat] = f"{unit_dat[dat][0]}"
else:
unit_dat[dat] = None

# Rename the typo in column zugeordneteWirkleistungWechselrichter
if "zugeordneteWirkleistungWechselrichter" in unit_dat.keys():
Expand Down
15 changes: 8 additions & 7 deletions open_mastr/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
"balancing_area",
"permit",
"deleted_units",
"retrofit_units",
]

# Possible values for parameter 'data' with API download method
Expand Down Expand Up @@ -59,7 +60,8 @@
"market_actors",
"market_roles",
"permit",
"deleted_units"
"deleted_units",
"retrofit_units",
]

# Possible data types for API download
Expand Down Expand Up @@ -99,6 +101,7 @@
"balancing_area": ["bilanzierungsgebiete"],
"permit": ["einheitengenehmigung"],
"deleted_units": ["geloeschteunddeaktivierteeinheiten"],
"retrofit_units": ["ertuechtigungen"],
}

# Map bulk data to database table names, for csv export
Expand All @@ -116,6 +119,7 @@
"balancing_area": ["balancing_area"],
"permit": ["permit"],
"deleted_units": ["deleted_units"],
"retrofit_units": ["retrofit_units"],
}

# used to map the parameter options in open-mastr to the exact table class names in orm.py
Expand Down Expand Up @@ -152,10 +156,7 @@
"eeg_data": "HydroEeg",
"permit_data": "Permit",
},
"nuclear": {
"unit_data": "NuclearExtended",
"permit_data": "Permit"
},
"nuclear": {"unit_data": "NuclearExtended", "permit_data": "Permit"},
"storage": {
"unit_data": "StorageExtended",
"eeg_data": "StorageEeg",
Expand All @@ -173,11 +174,11 @@
"grids": "Grids",
"balancing_area": "BalancingArea",
"permit": "Permit",
"deleted_units": "DeletedUnits"
"deleted_units": "DeletedUnits",
"retrofit_units": "RetrofitUnits",
}



UNIT_TYPE_MAP = {
"Windeinheit": "wind",
"Solareinheit": "solar",
Expand Down
28 changes: 26 additions & 2 deletions open_mastr/utils/orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class BasicUnit(Base):
GenMastrNummer = Column(String)
BestandsanlageMastrNummer = Column(String)
NichtVorhandenInMigriertenEinheiten = Column(String)
EinheitSystemstatus = Column(String)


class AdditionalDataRequested(Base):
Expand Down Expand Up @@ -134,6 +135,7 @@ class Extended(object):
Einspeisungsart = Column(String)
PraequalifiziertFuerRegelenergie = Column(Boolean)
GenMastrNummer = Column(String)
Netzbetreiberzuordnungen = Column(String)
# from bulk download
Hausnummer_nv = Column(Boolean)
Weic_nv = Column(Boolean)
Expand Down Expand Up @@ -227,6 +229,7 @@ class CombustionExtended(Extended, ParentAllTables, Base):
Einsatzort = Column(String)
KwkMastrNummer = Column(String)
Technologie = Column(String)
AusschliesslicheVerwendungImKombibetrieb = Column(Boolean)


class GsgkExtended(Extended, ParentAllTables, Base):
Expand Down Expand Up @@ -292,6 +295,7 @@ class Eeg(object):
AusschreibungZuschlag = Column(Boolean)
AnlagenkennzifferAnlagenregister = Column(String)
AnlagenkennzifferAnlagenregister_nv = Column(Boolean)
Netzbetreiberzuordnungen = Column(String)


class WindEeg(Eeg, ParentAllTables, Base):
Expand Down Expand Up @@ -385,6 +389,7 @@ class Kwk(ParentAllTables, Base):
VerknuepfteEinheiten = Column(String)
AnlageBetriebsstatus = Column(String)
AusschreibungZuschlag = Column(Boolean)
Netzbetreiberzuordnungen = Column(String)


class Permit(ParentAllTables, Base):
Expand All @@ -404,6 +409,7 @@ class Permit(ParentAllTables, Base):
VerknuepfteEinheiten = Column(String)
Frist_nv = Column(Boolean)
WasserrechtAblaufdatum_nv = Column(Boolean)
Netzbetreiberzuordnungen = Column(String)


class LocationBasic(Base):
Expand Down Expand Up @@ -741,6 +747,7 @@ class Grids(ParentAllTables, Base):
GeschlossenesVerteilnetz = Column(String)
Bezeichnung = Column(String)
Marktgebiet = Column(String)
Bundesland = Column(String)


class GridConnections(ParentAllTables, Base):
Expand Down Expand Up @@ -774,6 +781,18 @@ class DeletedUnits(ParentAllTables, Base):
EinheitBetriebsstatus = Column(String)


class RetrofitUnits(ParentAllTables, Base):
__tablename__ = "retrofit_units"

Id = Column(Integer, primary_key=True)
EegMastrNummer = Column(String)
Leistungserhoehung = Column(Float)
WiederinbetriebnahmeDatum = Column(Date)
DatumLetzteAktualisierung = Column(DateTime(timezone=True))
Ertuechtigungsart = Column(String)
ErtuechtigungIstZulassungspflichtig = Column(Boolean)


tablename_mapping = {
"anlageneegbiomasse": {
"__name__": BiomassEeg.__tablename__,
Expand All @@ -793,15 +812,15 @@ class DeletedUnits(ParentAllTables, Base):
"LokationMaStRNummer": "LokationMastrNummer",
},
},
"anlageneeggeosolarthermiegrubenklaerschlammdruckentspannung": {
"anlageneeggeothermiegrubengasdruckentspannung": {
"__name__": GsgkEeg.__tablename__,
"__class__": GsgkEeg,
"replace_column_names": {
"EegMaStRNummer": "EegMastrNummer",
"VerknuepfteEinheitenMaStRNummern": "VerknuepfteEinheit",
},
},
"einheitengeosolarthermiegrubenklaerschlammdruckentspannung": {
"einheitengeothermiegrubengasdruckentspannung": {
"__name__": GsgkExtended.__tablename__,
"__class__": GsgkExtended,
"replace_column_names": {
Expand Down Expand Up @@ -958,6 +977,11 @@ class DeletedUnits(ParentAllTables, Base):
"KwkMaStRNummer": "KwkMastrNummer",
},
},
"ertuechtigungen": {
"__name__": RetrofitUnits.__tablename__,
"__class__": RetrofitUnits,
"replace_column_names": None,
},
"geloeschteunddeaktivierteeinheiten": {
"__name__": DeletedUnits.__tablename__,
"__class__": DeletedUnits,
Expand Down
2 changes: 1 addition & 1 deletion open_mastr/xml_download/utils_cleansing_bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def replace_mastr_katalogeintraege(
"""Replaces the IDs from the mastr database by its mapped string values from
the table katalogwerte"""
katalogwerte = create_katalogwerte_from_bulk_download(zipped_xml_file_path)

for column_name in df.columns:
if column_name in columns_replace_list:
if df[column_name].dtype == "O":
Expand All @@ -43,6 +42,7 @@ def replace_mastr_katalogeintraege(
df[column_name]
.str.split(",", expand=True)
.apply(lambda x: x.str.strip())
.replace("", None)
.astype("Int64")
.applymap(katalogwerte.get)
.agg(lambda d: ",".join(i for i in d if isinstance(i, str)), axis=1)
Expand Down
13 changes: 9 additions & 4 deletions open_mastr/xml_download/utils_write_to_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,16 @@ def write_single_entries_until_not_unique_comes_up(
key_list = (
pd.read_sql(sql=select(primary_key), con=con).values.squeeze().tolist()
)
df = df.set_index(primary_key.name)

len_df_before = len(df)
df = df.drop(labels=key_list, errors="ignore")
df = df.drop_duplicates(
subset=[primary_key.name]
) # drop all entries with duplicated primary keys in the dataframe
df = df.set_index(primary_key.name)

df = df.drop(
labels=key_list, errors="ignore"
) # drop primary keys that already exist in the table
df = df.reset_index()
print(f"{len_df_before-len(df)} entries already existed in the database.")

Expand Down Expand Up @@ -378,5 +385,3 @@ def handle_xml_syntax_error(data: bytes, err: Error) -> pd.DataFrame:
df = pd.read_xml(decoded_data)
print("One invalid xml expression was deleted.")
return df


4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@
"open_mastr.utils.config",
"open_mastr.xml_download",
],
version="0.13.0",
version="0.13.1",
description="A package that provides an interface for downloading and"
" processing the data of the Marktstammdatenregister (MaStR)",
long_description=long_description,
long_description_content_type="text/x-rst",
url="https://github.com/OpenEnergyPlatform/open-MaStR",
download_url="https://github.com/OpenEnergyPlatform/open-MaStR/archive"
"/refs/tags/v0.13.0.tar.gz",
"/refs/tags/v0.13.1.tar.gz",
author="Open Energy Family",
author_email="datenzentrum@rl-institut.de",
maintainer="Ludwig Hülk",
Expand Down
2 changes: 2 additions & 0 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@ def parameter_dict_working_list():
"grid",
"balancing_area",
"permit",
"deleted_units",
"retrofit_units",
None,
["wind", "solar"],
],
Expand Down