Skip to content

Commit

Permalink
exe compiling changes, add rebuild option to dynamic data compilier, …
Browse files Browse the repository at this point in the history
…add columns example in read me
  • Loading branch information
nick-gorman committed Jun 27, 2022
1 parent d448682 commit 0ffb74a
Show file tree
Hide file tree
Showing 7 changed files with 231 additions and 22 deletions.
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,28 @@ price_data = dynamic_data_compiler(start_time, end_time, table, raw_data_cache,
If the option `fformat='parquet'` is provided then no feather files will be created, and a parquet file will be used instead.
While feather might have faster read/write, parquet has excellent compression characteristics and good compatability with packages for handling large on-memory/cluster datasets (e.g. Dask). This helps with local storage (especially for Causer Pays data) and file size for version control.

#### Accessing additional table columns

By default NEMOSIS only includes a subset of an AEMO table's columns, the full set of columns are listed in the
[MMS Data Model Reports](https://visualisations.aemo.com.au/aemo/di-help/Content/Data_Model/MMS_Data_Model.htm
?TocPath=_____8), or can be seen by inspecting the CSVs in the raw data cache. Users of the python interface can add
additional columns as shown below. If you using a feather or parquet based cache the rebuild option should be set to
true so the additional columns are added to the cache files when they are rebuilt. This method of adding additional
columns should also work with the `cache_compiler` function.

```python
from nemosis import defaults, dynamic_data_compiler

defaults.table_columns['BIDPEROFFER_D'] += ['PASAAVAILABILITY']

start_time = '2017/01/01 00:00:00'
end_time = '2017/01/01 00:05:00'
table = 'BIDPEROFFER_D'
raw_data_cache = 'C:/Users/your_data_storage'

volume_bid_data = dynamic_data_compiler(start_time, end_time, table, raw_data_cache, rebuild=True)
```

##### Cache compiler

This may be useful if you're using NEMOSIS to
Expand Down
7 changes: 4 additions & 3 deletions nemosis/compiling to exe
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
3. Activate venv then comlpete the next steps
4. Install NEMOSIS to venv: pip install e .
5. Also install pyinstaller: pip install pyinstaller
6. run pyinstaller: PyInstaller --onefile nemosis\gui.py
6. run pyinstaller: pyi-makespec --onefile --icon=favicon.ico nemosis/gui.py
7. edit gui.spec by adding the following line after the 'a' variable is defined. The file path will need to be
adjusted.
a.datas += [('favicon.ico', 'C:\\Users\\user\\Documents\\GitHub\\nemosis\\nemosis\\favicon.ico', 'DATA')]
8. rerun pyinstaller but use the spec file: Pyinstaller --onefile --icon=favicon.ico --clean gui.spec
8. Also add "icon='nemosis\\favicon.ico'" to EXE call in spec
8. rerun pyinstaller but use the spec file: Pyinstaller --clean gui.spec

Sample Spec:
# -*- mode: python ; coding: utf-8 -*-
Expand All @@ -29,7 +30,7 @@ a = Analysis(['nemosis\\gui.py'],
cipher=block_cipher,
noarchive=False)

a.datas += [('favicon.ico', 'C:\\Users\\nick\\Documents\\GitHub\\Abi_NEMOSIS\\nemosis\\favicon.ico', 'DATA')]
a.datas += [('favicon.ico', 'C:\\Users\\nick\\Documents\\GitHub\\nemosis\\nemosis\\favicon.ico', 'DATA')]

pyz = PYZ(a.pure, a.zipped_data,
cipher=block_cipher)
Expand Down
5 changes: 4 additions & 1 deletion nemosis/data_fetch_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def dynamic_data_compiler(start_time, end_time, table_name, raw_data_location,
select_columns=None, filter_cols=None,
filter_values=None, fformat='feather',
keep_csv=True, parse_data_types=True,
**kwargs):
rebuild=False, **kwargs):
"""
Downloads and compiles data for all dynamic tables. For non-CSV formats,
will save data typed as strings/objects. To save typed data (e.g.
Expand Down Expand Up @@ -42,6 +42,8 @@ def dynamic_data_compiler(start_time, end_time, table_name, raw_data_location,
If False, will not return any data.
parse_data_types (bool): infers data types of columns when reading
data. default True for API use.
rebuild (bool): If True then cache files are rebuilt
even if they exist already. False by default.
**kwargs: additional arguments passed to the pd.to_{fformat}() function
Returns:
Expand Down Expand Up @@ -77,6 +79,7 @@ def dynamic_data_compiler(start_time, end_time, table_name, raw_data_location,
select_columns, date_filter,
fformat=fformat,
keep_csv=keep_csv,
rebuild=rebuild,
write_kwargs=kwargs)
if data_tables:
all_data = _pd.concat(data_tables, sort=False)
Expand Down
19 changes: 6 additions & 13 deletions nemosis/date_generators.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from nemosis import defaults
from calendar import monthrange

from datetime import timedelta

def year_and_month_gen(start_time, end_time):

Expand Down Expand Up @@ -60,21 +60,14 @@ def year_month_day_index_gen(start_time, end_time):

def bid_table_gen(start_time, end_time):


# Test for if we are after the date that aemo changes the datafiles to a daily format.
if (start_time.year >= 2021 and start_time.month >= 4) or start_time.year >= 2022:
if (start_time.day == 2 and start_time.hour <= 4) or start_time.day == 1:
# If its a daily format only push the start buffer back by a day.
if start_time.month == 1:
start_time = start_time.replace(month=12)
start_time = start_time.replace(day=31)
start_time = start_time.replace(year=start_time.year - 1)
else:
start_time = start_time.replace(month=start_time.month - 1)
last_day_previous_month = monthrange(start_time.year, start_time.month)[1]
start_time = start_time.replace(day=last_day_previous_month)
if start_time.day == 2 and start_time.hour <= 4:
start_time = start_time - timedelta(days=2)
elif start_time.hour <= 4 or start_time.day == 1:
start_time = start_time - timedelta(days=1)
else:
if start_time.day == 1 and start_time.hour == 0 or start_time.minute == 0:
if start_time.day == 1 and start_time.hour == 0 and start_time.minute == 0:
# If its a monthly format push the buffer back by a month.
if start_time.month == 1:
start_time = start_time.replace(month=12)
Expand Down
8 changes: 4 additions & 4 deletions nemosis/test_data_fetch_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -384,8 +384,8 @@ def test_dispatch_tables_start_of_month(self):
print('Passed')

def test_dispatch_tables_start_of_month_previous_market_day_but_not_start_calendar_month(self):
start_time = '2021/06/01 03:00:00'
end_time = '2021/06/01 03:15:00'
start_time = '2021/06/05 03:00:00'
end_time = '2021/06/05 03:15:00'
for table in self.table_names:
print(f'Testing {table} returning values at start of month two.')
dat_col = defaults.primary_date_columns[table]
Expand All @@ -398,11 +398,11 @@ def test_dispatch_tables_start_of_month_previous_market_day_but_not_start_calend
expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S')
if table == 'BIDDAYOFFER_D':
expected_length = 1 * 4
expected_last_time = '2021/05/31 00:00:00'
expected_last_time = '2021/06/04 00:00:00'
expected_last_time = \
pd.to_datetime(expected_last_time,
format='%Y/%m/%d %H:%M:%S')
expected_first_time = '2021/05/31 00:00:00'
expected_first_time = '2021/06/04 00:00:00'
expected_first_time =\
pd.to_datetime(expected_first_time,
format='%Y/%m/%d %H:%M:%S')
Expand Down
190 changes: 190 additions & 0 deletions nemosis/test_date_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,196 @@ def test_two_times_in_middle_of_jan_and_march_return_3_months(self):
self.assertEqual(len(times), 3)


class TestBidTableGen(unittest.TestCase):
def setUp(self):
pass

def test_two_times_not_at_edge_of_month_return_one_month(self):
start_time = datetime.strptime('2017/01/02 00:00:00', '%Y/%m/%d %H:%M:%S')
end_time = datetime.strptime('2017/01/03 00:00:00', '%Y/%m/%d %H:%M:%S')
gen = date_generators.bid_table_gen(start_time, end_time)
times = [(year, month, day, index) for year, month, day, index in gen]
self.assertEqual(times[0][0], '2017')
self.assertEqual(times[0][1], '01')
self.assertEqual(times[0][2], None)
self.assertEqual(times[0][3], None)
self.assertEqual(len(times), 1)


def test_two_times_first_at_edge_of_month_return_month_before_and_month_of_times(self):
start_time = datetime.strptime('2017/02/01 00:00:00', '%Y/%m/%d %H:%M:%S')
end_time = datetime.strptime('2017/02/03 00:00:00', '%Y/%m/%d %H:%M:%S')
gen = date_generators.bid_table_gen(start_time, end_time)
times = [(year, month, day, index) for year, month, day, index in gen]
self.assertEqual(times[0][0], '2017')
self.assertEqual(times[0][1], '01')
self.assertEqual(times[0][2], None)
self.assertEqual(times[0][3], None)
self.assertEqual(times[1][0], '2017')
self.assertEqual(times[1][1], '02')
self.assertEqual(times[1][2], None)
self.assertEqual(times[1][3], None)

def test_two_times_first_at_edge_of_year_return_month_before_and_month_of_times(self):
start_time = datetime.strptime('2017/01/01 00:00:00', '%Y/%m/%d %H:%M:%S')
end_time = datetime.strptime('2017/01/03 00:00:00', '%Y/%m/%d %H:%M:%S')
gen = date_generators.bid_table_gen(start_time, end_time)
times = [(year, month, day, index) for year, month, day, index in gen]
self.assertEqual(times[0][0], '2016')
self.assertEqual(times[0][1], '12')
self.assertEqual(times[0][2], None)
self.assertEqual(times[0][3], None)
self.assertEqual(times[1][0], '2017')
self.assertEqual(times[1][1], '01')
self.assertEqual(times[1][2], None)
self.assertEqual(times[1][3], None)

def test_two_times_second_at_edge_of_month_returns_one_month(self):
start_time = datetime.strptime('2017/01/05 00:00:00', '%Y/%m/%d %H:%M:%S')
end_time = datetime.strptime('2017/01/31 00:00:00', '%Y/%m/%d %H:%M:%S')
gen = date_generators.bid_table_gen(start_time, end_time)
times = [(year, month, day, index) for year, month, day, index in gen]
self.assertEqual(times[0][0], '2017')
self.assertEqual(times[0][1], '01')
self.assertEqual(times[0][2], None)
self.assertEqual(times[0][3], None)
self.assertEqual(len(times), 1)

def test_two_times_second_at_edge_of_year_returns_one_month(self):
start_time = datetime.strptime('2017/12/02 00:00:00', '%Y/%m/%d %H:%M:%S')
end_time = datetime.strptime('2017/12/31 00:00:00', '%Y/%m/%d %H:%M:%S')
gen = date_generators.bid_table_gen(start_time, end_time)
times = [(year, month, day, index) for year, month, day, index in gen]
self.assertEqual(times[0][0], '2017')
self.assertEqual(times[0][1], '12')
self.assertEqual(times[0][2], None)
self.assertEqual(times[0][3], None)
self.assertEqual(len(times), 1)

def test_two_times_in_middle_of_jan_and_march_return_3_months(self):
start_time = datetime.strptime('2017/01/05 00:00:00', '%Y/%m/%d %H:%M:%S')
end_time = datetime.strptime('2017/03/24 00:00:00', '%Y/%m/%d %H:%M:%S')
gen = date_generators.bid_table_gen(start_time, end_time)
times = [(year, month, day, index) for year, month, day, index in gen]
self.assertEqual(times[0][0], '2017')
self.assertEqual(times[0][1], '01')
self.assertEqual(times[0][2], None)
self.assertEqual(times[0][3], None)
self.assertEqual(times[1][0], '2017')
self.assertEqual(times[1][1], '02')
self.assertEqual(times[1][2], None)
self.assertEqual(times[1][3], None)
self.assertEqual(times[2][0], '2017')
self.assertEqual(times[2][1], '03')
self.assertEqual(times[2][2], None)
self.assertEqual(times[2][3], None)
self.assertEqual(len(times), 3)

def test_change_from_months_to_days(self):
start_time = datetime.strptime('2021/02/01 00:00:00', '%Y/%m/%d %H:%M:%S')
end_time = datetime.strptime('2021/04/03 00:00:00', '%Y/%m/%d %H:%M:%S')
gen = date_generators.bid_table_gen(start_time, end_time)
times = [(year, month, day, index) for year, month, day, index in gen]
# Note we expect the 1st of april to be skipped
self.assertEqual(times[0][0], '2021')
self.assertEqual(times[0][1], '01')
self.assertEqual(times[0][2], None)
self.assertEqual(times[0][3], None)
self.assertEqual(times[1][0], '2021')
self.assertEqual(times[1][1], '02')
self.assertEqual(times[1][2], None)
self.assertEqual(times[1][3], None)
# Data for march and the first of april is missing from the AEMO website so we don't generate the dates
# for these times.
self.assertEqual(times[2][0], '2021')
self.assertEqual(times[2][1], '04')
self.assertEqual(times[2][2], '02')
self.assertEqual(times[2][3], None)
self.assertEqual(times[3][0], '2021')
self.assertEqual(times[3][1], '04')
self.assertEqual(times[3][2], '03')
self.assertEqual(times[3][3], None)
self.assertEqual(len(times), 4)

def test_day_given_in_april_2021(self):
start_time = datetime.strptime('2021/04/01 00:00:00', '%Y/%m/%d %H:%M:%S')
end_time = datetime.strptime('2021/04/03 00:00:00', '%Y/%m/%d %H:%M:%S')
gen = date_generators.bid_table_gen(start_time, end_time)
times = [(year, month, day, index) for year, month, day, index in gen]
# Note we expect the 1st of april to be skipped
self.assertEqual(times[0][0], '2021')
self.assertEqual(times[0][1], '04')
self.assertEqual(times[0][2], '02')
self.assertEqual(times[0][3], None)
self.assertEqual(times[1][0], '2021')
self.assertEqual(times[1][1], '04')
self.assertEqual(times[1][2], '03')
self.assertEqual(times[1][3], None)
self.assertEqual(len(times), 2)

def test_include_previous_market_day(self):
start_time = datetime.strptime('2021/05/10 01:00:00', '%Y/%m/%d %H:%M:%S')
end_time = datetime.strptime('2021/05/10 05:00:00', '%Y/%m/%d %H:%M:%S')
gen = date_generators.bid_table_gen(start_time, end_time)
times = [(year, month, day, index) for year, month, day, index in gen]
# Note we expect the 1st of april to be skipped
self.assertEqual(times[0][0], '2021')
self.assertEqual(times[0][1], '05')
self.assertEqual(times[0][2], '09')
self.assertEqual(times[0][3], None)
self.assertEqual(times[1][0], '2021')
self.assertEqual(times[1][1], '05')
self.assertEqual(times[1][2], '10')
self.assertEqual(times[1][3], None)
self.assertEqual(len(times), 2)

def test_include_previous_month_if_1st_market_day_of_month(self):
start_time = datetime.strptime('2021/05/01 05:00:00', '%Y/%m/%d %H:%M:%S')
end_time = datetime.strptime('2021/05/03 05:00:00', '%Y/%m/%d %H:%M:%S')
gen = date_generators.bid_table_gen(start_time, end_time)
times = [(year, month, day, index) for year, month, day, index in gen]
self.assertEqual(times[0][0], '2021')
self.assertEqual(times[0][1], '04')
self.assertEqual(times[0][2], '30')
self.assertEqual(times[0][3], None)
self.assertEqual(times[1][0], '2021')
self.assertEqual(times[1][1], '05')
self.assertEqual(times[1][2], '01')
self.assertEqual(times[1][3], None)
self.assertEqual(times[2][0], '2021')
self.assertEqual(times[2][1], '05')
self.assertEqual(times[2][2], '02')
self.assertEqual(times[2][3], None)
self.assertEqual(times[3][0], '2021')
self.assertEqual(times[3][1], '05')
self.assertEqual(times[3][2], '03')
self.assertEqual(times[3][3], None)
self.assertEqual(len(times), 4)

def test_include_previous_month_if_1st_market_day_of_month_but_2nd_calendar_day(self):
start_time = datetime.strptime('2021/05/02 04:00:00', '%Y/%m/%d %H:%M:%S')
end_time = datetime.strptime('2021/05/03 05:00:00', '%Y/%m/%d %H:%M:%S')
gen = date_generators.bid_table_gen(start_time, end_time)
times = [(year, month, day, index) for year, month, day, index in gen]
self.assertEqual(times[0][0], '2021')
self.assertEqual(times[0][1], '04')
self.assertEqual(times[0][2], '30')
self.assertEqual(times[0][3], None)
self.assertEqual(times[1][0], '2021')
self.assertEqual(times[1][1], '05')
self.assertEqual(times[1][2], '01')
self.assertEqual(times[1][3], None)
self.assertEqual(times[2][0], '2021')
self.assertEqual(times[2][1], '05')
self.assertEqual(times[2][2], '02')
self.assertEqual(times[2][3], None)
self.assertEqual(times[3][0], '2021')
self.assertEqual(times[3][1], '05')
self.assertEqual(times[3][2], '03')
self.assertEqual(times[3][3], None)
self.assertEqual(len(times), 4)


class TestYearMonthDayIndexGen(unittest.TestCase):
def setUp(self):
pass
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="nemosis",
version="3.0.0",
version="3.0.1",
author="Nicholas Gorman, Abhijith Prakash",
author_email="n.gorman305@gmail.com",
description="A tool for accessing AEMO data.",
Expand Down

0 comments on commit 0ffb74a

Please sign in to comment.