-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrolling_and_plot_dc.py
365 lines (299 loc) · 13.9 KB
/
rolling_and_plot_dc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
import torch
def helper(value, j):
'''
helper function for data_plot()
'''
if value == "None":
return None
elif type(value) == list and j < len(value):
return value[j]
else: # not a list so only one value
if j == 0:
return value
else:
return None
def data_plot(data=None, x=None, y=None,
x_title=None, y_title=None, title=None,
**kwargs):
'''
list of pandas.DataFrame, list of str, list of str, list of str, kwargs -> plotly plot object
Precondition: If an argument has multiple objects, they must be in a list (can have nested lists).
The order of the arguments must be in the same order as the DataFrames.
There must be the same number of x columns as y columns passed.
ex) ocv_plot(
data = [df1, df2],
x = [ "SOC", "SOC-Dis" ],
y = [ "OCV", "OCV-Dis" ],
mode = ["lines+markers", "markers"],
color = ["mintcream", "darkorchid"]
)
This function takes one or more DataFrames, columns from the respective DataFrames to be plot on x and y-axes.
It also takes the mode of plotting desired for the DataFrames and optional keyword arguments.
It outputs a plotly plot of the data from the columns that were passed.
Parameters:
`data` DataFrame or list of DataFrames
`x` list of columns or nested lists of columns
example of each option in order:
x = ["SOC-Dis"]
x = ["SOC-Dis","SOC-Chg","SOC"]
x = [ ["Test Time (sec)","Step Time (sec)"], "Step"]
Test Time and Step Time are both from the same DataFrame; there must be two y columns as well.
`y` list of columns or nested lists of columns
View `x` for help
`x_title` str
the name of the x_axis to be displayed
else None
`y_title` str
the name of the y_axis to be displayed
else None
`title` str
The title of the Plot
default None will not add a title
**kwargs: (alphabetical order)
`color` str, list of str, nested lists of str:
same principle as above arguments,
assigns the color of the individual data lines.
if no value is passed for a plot, plotly will do it automatically.
The 'color' property is a color and may be specified as:
- A hex string (e.g. '#ff0000')
- An rgb/rgba string (e.g. 'rgb(255,0,0)')
- An hsl/hsla string (e.g. 'hsl(0,100%,50%)')
- An hsv/hsva string (e.g. 'hsv(0,100%,100%)')
- A named CSS color:
aliceblue, antiquewhite, aqua, aquamarine, azure,
beige, bisque, black, blanchedalmond, blue,
blueviolet, brown, burlywood, cadetblue,
chartreuse, chocolate, coral, cornflowerblue,
cornsilk, crimson, cyan, darkblue, darkcyan,
darkgoldenrod, darkgray, darkgrey, darkgreen,
darkkhaki, darkmagenta, darkolivegreen, darkorange,
darkorchid, darkred, darksalmon, darkseagreen,
darkslateblue, darkslategray, darkslategrey,
darkturquoise, darkviolet, deeppink, deepskyblue,
dimgray, dimgrey, dodgerblue, firebrick,
floralwhite, forestgreen, fuchsia, gainsboro,
ghostwhite, gold, goldenrod, gray, grey, green,
greenyellow, honeydew, hotpink, indianred, indigo,
ivory, khaki, lavender, lavenderblush, lawngreen,
lemonchiffon, lightblue, lightcoral, lightcyan,
lightgoldenrodyellow, lightgray, lightgrey,
lightgreen, lightpink, lightsalmon, lightseagreen,
lightskyblue, lightslategray, lightslategrey,
lightsteelblue, lightyellow, lime, limegreen,
linen, magenta, maroon, mediumaquamarine,
mediumblue, mediumorchid, mediumpurple,
mediumseagreen, mediumslateblue, mediumspringgreen,
mediumturquoise, mediumvioletred, midnightblue,
mintcream, mistyrose, moccasin, navajowhite, navy,
oldlace, olive, olivedrab, orange, orangered,
orchid, palegoldenrod, palegreen, paleturquoise,
palevioletred, papayawhip, peachpuff, peru, pink,
plum, powderblue, purple, red, rosybrown,
royalblue, rebeccapurple, saddlebrown, salmon,
sandybrown, seagreen, seashell, sienna, silver,
skyblue, slateblue, slategray, slategrey, snow,
springgreen, steelblue, tan, teal, thistle, tomato,
turquoise, violet, wheat, white, whitesmoke,
yellow, yellowgreen
- A number that will be interpreted as a color
according to scatter.marker.colorscale
- A list or array of any of the above
`mode` str, list of str, nested lists of str:
default None: will set mode = "lines"
Note: str must be one of "lines", "markers", "lines+markers" which are self-explanatory
example of each option in order:
mode = "markers"
mode = ["lines+markers", "lines"]
mode = ["lines+markers",["lines","lines"]]
`name` str, list of str, nested list of strs
same principle as above arguments
assigns the names of the individual data lines to be displayed in the legend
`size` int/float, list of int/float or nested lists of int/float
same principle as above arguments
assigns the size of the individual data lines
if no value is passed, plotly will do it automatically.
>>>df1 = generate_ocv_pts("JMFM_12_SOC_OCV_Test_220411.txt", to_csv = False)
>>>df2 = ocv_estimate(df1, to_csv = False)
>>>data_plot(data = [df1,df2],
x=[ ["SOC-Chg","SOC-Dis"],"SOC" ],
y = [ ["OCV-Chg","OCV-Dis"], "OCV" ],
title = "JMFM-12 OCV vs. SOC Curve",
x_title = "SOC (%)",
y_title = "OCV (V)",
mode = [ ["markers","markers"] ],
color = [ ["violet","lightcoral"], "darkorchid"],
name = [ ["Charge-OCV","Discharge-OCV"], "OCV"],
size = [[4.5,4.5]]
)
figure...
'''
if type(data) == list and not pd.Series(
pd.Series([len(x), len(y)]) == len(data)
).all():
return '''Error: x and y columns passed much match the number of DataFrames passed
Use nested lists for multiple columns from the same DataFrame
'''
elif type(data) != list and not pd.Series(pd.Series([len(x), len(y)]) == 1).all():
return '''Error: x and y columns passed much match the number of DataFrames passed
Use nested lists for multiple columns from the same DataFrame
'''
if "mode" in kwargs.keys():
if type(kwargs["mode"]) == list and len(kwargs["mode"]) > len(data):
return "Error: passed more modes than DataFrames"
if "color" in kwargs.keys():
if type(kwargs["color"]) == list and len(kwargs["color"]) > len(data):
return "Error: passed more colors than DataFrames"
if "name" in kwargs.keys():
if type(kwargs["name"]) == list and len(kwargs["name"]) > len(data):
return "Error: passed more names than DataFrames"
if "size" in kwargs.keys():
if type(kwargs["size"]) == list and len(kwargs["size"]) > len(data):
return "Error: passed more sizes than DataFrames"
frame = pd.DataFrame(data={"x": x, "y": y})
for i in ["color", "mode", "name", "size"]:
frame = frame.join(
pd.Series(kwargs.get(i), name=i, dtype="object"),
how="outer")
frame.fillna("None", inplace=True)
figure = make_subplots(
x_title=x_title, y_title=y_title, subplot_titles=[title])
for i in frame.index:
if type(data) == list:
use_data = data[i]
else:
use_data = data
if type(frame["x"][i]) == list: # y[i] must be a list
for j in range(len(x[i])):
use_x = frame.loc[i, "x"][j]
use_y = frame.loc[i, "y"][j]
use_color = helper(frame.loc[i, "color"], j)
use_mode = helper(frame.loc[i, "mode"], j)
use_name = helper(frame.loc[i, "name"], j)
use_size = helper(frame.loc[i, "size"], j)
figure.add_trace(
go.Scatter(
x=use_data[use_x], y=use_data[use_y],
mode=use_mode, marker={
"size": use_size, "color": use_color},
name=use_name)
)
else: # x[i] and y[i] are not lists
use_x = frame.loc[i, "x"]
use_y = frame.loc[i, "y"]
use_color = helper(frame.loc[i, "color"], 0)
use_mode = helper(frame.loc[i, "mode"], 0)
use_name = helper(frame.loc[i, "name"], 0)
use_size = helper(frame.loc[i, "size"], 0)
# zero is just a placholder
figure.add_trace(
go.Scatter(
x=use_data[use_x], y=use_data[use_y],
mode=use_mode, marker={
"size": use_size, "color": use_color},
name=use_name)
)
return figure
# -------------------------------------------------------
def normalize(data: pd.DataFrame, capacity: float):
'''
pd.DataFrame -> pd.DataFrame
Precondition: "delta t" is removed from the DataFrame
Normalizes the data by applying sklearn.preprocessing functions
Voltage is scaled between 0 and 1;
Current is scaled to be C-rate;
SOC is scaled between 0 and 1 (just divided by 100)
Output:
normalized pd.DataFrame
'''
data["current"] /= capacity
data["voltage"] = MinMaxScaler((0, 1)).fit_transform(
data["voltage"].values.reshape(-1, 1))
data["soc"] /= 100.
print(f'''Scaled stats:
variance:\n{data.var(axis = 0)},
mean:\n{data.mean(axis=0)}''')
return data
# -------------------------------------------------------
def rolling_split(df, window_size, tgt_len, test_size=0.1, train=True):
'''
Precondition: "delta t" is not in the columns
implements rolling window sectioning
Four input features: delta_t, I, V, SOC all at time t-1
The prediction of SOC at time t uses no other information
Returns a shuffled and windowed dataset using
sklearn.model_selection.train_test_split
Parameters:
`window_size` int
the number of consecutive data points needed to form a data window
`test_size` float in between 0 and 0.2 exclusive
the ratio of data points allocated to the dev/test set
Should never exceed 0.2
'''
assert "delta t" not in df.columns
assert isinstance(test_size, float)
assert test_size > 0 and test_size <= 0.2
df_x = [window.values
for window
# staggered left by one
in df[["current", "voltage", "soc"]].iloc[:-1]
.rolling(window=window_size,
method="table"
)][window_size:]
df_y = [window.values
for window
in df["soc"].iloc[window_size - tgt_len + 1:]
.rolling(window=tgt_len)][tgt_len:]
if train:
return train_test_split(np.array(df_x, dtype="float32"),
np.array(df_y, dtype="float32")[:,:, np.newaxis],
test_size=test_size,
shuffle=True)
else:
return (np.array(df_x, dtype="float32"),
np.array(df_y, dtype="float32")[:,:, np.newaxis])
# ----------------------------------------------------------------
# Validation
def validate(model, dataloader, dev=True):
'''
tensorflow model, tensorflow DataSet -> pd.DataFrame, prints 2 floats and a Plotly plot
!! Tensorflow version, not the original PyTorch version
This function runs a td.data.Dataset through the model and prints the max and min
predicted SOC, it also prints a Plotly plot of the predictions versus the labels
This function outputs a pandas.DataFrame of the predictions with their corresponding labels.
Parameters:
`dev` bool
whether or not it's the developmental set
use False if it's the entire dataset
'''
aggregate = model.predict(dataloader, verbose = 1)[:,-1,:] #only want the final time step
print("Max pred: ", aggregate.max(), "\tMin pred: ", aggregate.min())
np_labels = np.concatenate([label[:,-1,:].numpy() for _, label in dataloader][
:len(aggregate)], axis = 0)
visualize = pd.DataFrame(data={"pred": aggregate.squeeze(),
"labels": np_labels.squeeze()})
if dev: # if it is the dev set, the values need to be sorted by value
visualize.sort_values("labels", inplace=True)
# if it is the entire dataset, it is already sorted chronologically which is more important
visualize.reset_index(drop=True)
visualize["point"] = list(range(1, len(visualize) + 1))
print("Percent Accuracy:", np.mean(100.0 - abs((aggregate - np_labels))/(np_labels+0.01) * 100))
fig = data_plot(data=visualize,
x=[["point", "point"]],
y=[["pred", "labels"]],
x_title="Data Point",
y_title="SOC",
title="Predicted vs Actual SOC",
name=[["predictions", "labels"]],
mode=[["lines", "lines"]],
color=[["red", "green"]],
size = [[6,6.5]]
)
fig.show()
return visualize