Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expand LuxGroupby Tests and add bug fixes #287

Merged
merged 20 commits into from
Mar 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ init:
test:
black --check .
python -m pytest tests/

.PHONY: init test
7 changes: 7 additions & 0 deletions lux/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,13 @@ def filter(self, *args, **kwargs):
ret_val.pre_aggregated = False # Returned LuxDataFrame isn't pre_aggregated
return ret_val

def apply(self, *args, **kwargs):
ret_val = super(LuxDataFrameGroupBy, self).apply(*args, **kwargs)
for attr in self._metadata:
ret_val.__dict__[attr] = getattr(self, attr, None)
ret_val.pre_aggregated = False # Returned LuxDataFrame isn't pre_aggregated
return ret_val

def size(self, *args, **kwargs):
ret_val = super(LuxDataFrameGroupBy, self).size(*args, **kwargs)
for attr in self._metadata:
Expand Down
7 changes: 5 additions & 2 deletions lux/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class LuxSeries(pd.Series):
"_pandas_only",
"pre_aggregated",
"_type_override",
"name",
]

_default_metadata = {
Expand Down Expand Up @@ -111,10 +112,12 @@ def __repr__(self):
from lux.core.frame import LuxDataFrame

series_repr = super(LuxSeries, self).__repr__()

ldf = LuxDataFrame(self)

# Default column name 0 causes errors
if self.name is None:
self.name = " "
ldf = LuxDataFrame(self)
ldf = ldf.rename(columns={0: " "})
self._ldf = ldf

try:
Expand Down
1 change: 1 addition & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,5 @@ def global_var():
"_pandas_only",
"pre_aggregated",
"_type_override",
"name",
]
14 changes: 14 additions & 0 deletions tests/test_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,3 +131,17 @@ def test_int_columns(global_var):
assert list(df.recommendation.keys()) == ["Enhance", "Filter", "Generalize"]
df.intent = [0]
assert list(df.recommendation.keys()) == ["Enhance", "Filter"]


def test_name_column(global_var):
df = pd.read_csv("lux/data/car.csv")
new_df = df.rename(columns={"Name": "name"})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for this test, can we do a quick check that the values of the "name" column have not all been converted to None values?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I added a few more assert statements to check for this case!

assert list(new_df.recommendation.keys()) == [
"Correlation",
"Distribution",
"Occurrence",
"Temporal",
]
assert len(new_df.recommendation["Correlation"])
assert new_df["name"][0] != None
assert (new_df["name"].unique() != None)[0]
14 changes: 14 additions & 0 deletions tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def test_agg(global_var):
new_df = df[["Horsepower", "Brand"]].groupby("Brand").agg(sum)
new_df._repr_html_()
assert new_df.history[0].name == "groupby"
assert new_df.pre_aggregated


def test_shortcut_agg(global_var):
Expand All @@ -31,6 +32,7 @@ def test_shortcut_agg(global_var):
new_df = df[["MilesPerGal", "Brand"]].groupby("Brand").sum()
new_df._repr_html_()
assert new_df.history[0].name == "groupby"
assert new_df.pre_aggregated


def test_agg_mean(global_var):
Expand All @@ -39,6 +41,7 @@ def test_agg_mean(global_var):
new_df = df.groupby("Origin").mean()
new_df._repr_html_()
assert new_df.history[0].name == "groupby"
assert new_df.pre_aggregated


def test_agg_size(global_var):
Expand All @@ -47,6 +50,7 @@ def test_agg_size(global_var):
new_df = df.groupby("Brand").size().to_frame()
new_df._repr_html_()
assert new_df.history[0].name == "groupby"
assert new_df.pre_aggregated


def test_filter(global_var):
Expand All @@ -55,3 +59,13 @@ def test_filter(global_var):
new_df = df.groupby("Origin").filter(lambda x: x["Weight"].mean() > 3000)
new_df._repr_html_()
assert new_df.history[0].name == "groupby"
assert not new_df.pre_aggregated


def test_get_group(global_var):
df = pytest.car_df
df._repr_html_()
new_df = df.groupby("Origin").get_group("Japan")
new_df._repr_html_()
assert new_df.history[0].name == "groupby"
assert not new_df.pre_aggregated
1 change: 1 addition & 0 deletions tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def test_df_to_series():
"_pandas_only",
"pre_aggregated",
"_type_override",
"name",
], "Metadata is lost when going from Dataframe to Series."
assert df.cardinality is not None, "Metadata is lost when going from Dataframe to Series."
assert series.name == "Weight", "Pandas Series original `name` property not retained."
Expand Down