Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix get_values #326

Merged
merged 3 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

### 3.0.4 [#326](https://github.com/openfisca/openfisca-survey-manager/pull/326)

* Technical changes
- Fix get_values of Survey class.

### 3.0.3 [#329](https://github.com/openfisca/openfisca-survey-manager/pull/329)

* Technical changes
Expand Down
11 changes: 6 additions & 5 deletions openfisca_survey_manager/surveys.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def get_values(self, variables = None, table = None, lowercase = False, ignoreca
if table is None:
raise Exception("A table name is needed to retrieve data from a parquet file")
for table_name, table_content in self.tables.items():
if table in table_name:
if table == table_name:
parquet_file = table_content.get("parquet_file")
# Is parquet_file a folder or a file?
if os.path.isdir(parquet_file):
Expand All @@ -246,9 +246,10 @@ def get_values(self, variables = None, table = None, lowercase = False, ignoreca
one_parquet_file = parquet_file
parquet_schema = pq.read_schema(one_parquet_file)
assert len(parquet_schema.names) >= 1, f"The parquet file {table_content.get('parquet_file')} is empty"
columns = table_content.get('variables')
if variables is None:
variables = table_content.get('variables')
if filter_by:
df = pq.ParquetDataset(parquet_file, filters=filter_by).read(columns=columns).to_pandas()
df = pq.ParquetDataset(parquet_file, filters=filter_by).read(columns=variables).to_pandas()
elif batch_size:
if os.path.isdir(parquet_file):
parquet_file = glob.glob(os.path.join(parquet_file, '*.parquet'))
Expand All @@ -258,7 +259,7 @@ def get_values(self, variables = None, table = None, lowercase = False, ignoreca
tables = []
# Loop through the file paths and read each Parquet file
for file_path in parquet_file:
table = pq.read_table(file_path, columns=columns)
table = pq.read_table(file_path, columns=variables)
tables.append(table)

# Concatenate the tables if needed
Expand All @@ -283,7 +284,7 @@ def get_values(self, variables = None, table = None, lowercase = False, ignoreca
# break
# index += 1
else:
df = pq.ParquetDataset(parquet_file).read(columns=columns).to_pandas()
df = pq.ParquetDataset(parquet_file).read(columns=variables).to_pandas()
break
else:
raise Exception(f"No table {table} found in {self.parquet_file_path}")
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "OpenFisca-Survey-Manager"
version = "3.0.3"
version = "3.0.4"
description = "A tool for managing survey/administrative data and import them in OpenFisca"
readme = "README.md"
keywords = ["microsimulation", "tax", "benefit", "rac", "rules-as-code", "survey", "data"]
Expand Down