Skip to content

Commit

Permalink
Add from_dataframe method to Measurement to create multidim measureme…
Browse files Browse the repository at this point in the history
…nt from a DataFrame.

This also adds some symmetry with the existing to_dataframe method.

PiperOrigin-RevId: 721549130
  • Loading branch information
glados-verma authored and copybara-github committed Jan 31, 2025
1 parent b8cfa7e commit a3dafb7
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 2 deletions.
29 changes: 29 additions & 0 deletions openhtf/core/measurements.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,35 @@ def to_dataframe(self, columns: Any = None) -> Any:

return dataframe

def from_dataframe(self, dataframe: Any, metric_column: str):
"""Convert a pandas DataFrame to a multi-dim measurement.
Args:
dataframe: A pandas DataFrame. Dimensions for this multi-dim measurement
need to match columns in the DataFrame (can be multi-index).
metric_column: The column name of the metric to be measured.
Raises:
TypeError: If this measurement is not dimensioned.
ValueError: If dataframe is missing dimensions.
"""
if not isinstance(self._measured_value, DimensionedMeasuredValue):
raise TypeError(
'Only a dimensioned measurement can be set from a DataFrame'
)
dimension_labels = [d.name for d in self.dimensions]
dimensioned_df = dataframe.reset_index()
try:
dimensioned_df.set_index(dimension_labels, inplace=True)
except KeyError as e:
raise ValueError('DataFrame is missing dimensions') from e
if metric_column not in dimensioned_df.columns:
raise ValueError(
f'DataFrame does not have a column named {metric_column}'
)
for row_dimensions, row_metrics in dimensioned_df.iterrows():
self.measured_value[row_dimensions] = row_metrics[metric_column]


@attr.s(slots=True)
class MeasuredValue(object):
Expand Down
115 changes: 113 additions & 2 deletions test/core/measurements_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from openhtf.core import measurements
from examples import all_the_things
from openhtf.util import test as htf_test
import pandas

# Fields that are considered 'volatile' for record comparison.
_VOLATILE_FIELDS = {
Expand Down Expand Up @@ -231,14 +232,19 @@ def test_to_dataframe__no_pandas(self):
with self.assertRaises(RuntimeError):
self.test_to_dataframe(units=True)

def test_to_dataframe(self, units=True):
def _make_multidim_measurement(self, units=''):
measurement = htf.Measurement('test_multidim')
measurement.with_dimensions('ms', 'assembly', htf.Dimension('my_zone'))
if units:
measurement.with_units(units)
return measurement

def test_to_dataframe(self, units=True):
if units:
measurement.with_units('°C')
measurement = self._make_multidim_measurement('°C')
measure_column_name = 'degree Celsius'
else:
measurement = self._make_multidim_measurement()
measure_column_name = 'value'

for t in range(5):
Expand All @@ -260,6 +266,111 @@ def test_to_dataframe(self, units=True):
def test_to_dataframe__no_units(self):
self.test_to_dataframe(units=False)

def test_from_dataframe_raises_if_dimensions_missing_in_dataframe(self):
measurement = self._make_multidim_measurement('°C')
with self.assertRaisesRegex(
ValueError, 'DataFrame is missing dimensions'
) as cm:
measurement.from_dataframe(
pandas.DataFrame({
'ms': [1, 2, 3],
'my_zone': ['X', 'Y', 'Z'],
'degree_celsius': [10, 20, 30],
}),
metric_column='degree_celsius',
)
with self.assertRaisesRegex(
KeyError, "None of ['assembly'] are in the columns"
):
raise cm.exception.__cause__

def test_from_dataframe_raises_if_metric_missing_in_dataframe(self):
measurement = self._make_multidim_measurement('°C')
with self.assertRaisesRegex(
ValueError, 'DataFrame does not have a column named degree_celsius'
):
measurement.from_dataframe(
pandas.DataFrame({
'ms': [1, 2, 3],
'assembly': ['A', 'B', 'C'],
'my_zone': ['X', 'Y', 'Z'],
'degrees_fahrenheit': [10, 20, 30],
}),
metric_column='degree_celsius',
)

def test_from_flat_dataframe(self):
measurement = self._make_multidim_measurement('°C')
source_dataframe = pandas.DataFrame({
'ms': [1, 2, 3],
'assembly': ['A', 'B', 'C'],
'my_zone': ['X', 'Y', 'Z'],
'degree_celsius': [10, 20, 30],
})
measurement.from_dataframe(source_dataframe, metric_column='degree_celsius')
measurement.outcome = measurements.Outcome.PASS
self.assertEqual(measurement.measured_value[(1, 'A', 'X')], 10)
self.assertEqual(measurement.measured_value[(2, 'B', 'Y')], 20)
self.assertEqual(measurement.measured_value[(3, 'C', 'Z')], 30)
pandas.testing.assert_frame_equal(
measurement.to_dataframe().rename(
columns={
'ms': 'ms',
'assembly': 'assembly',
'my_zone': 'my_zone',
# The metric column name comes from the unit.
'degree Celsius': 'degree_celsius',
}
),
source_dataframe,
)

def test_from_dataframe_with_multiindex_dataframe(self):
measurement = self._make_multidim_measurement('°C')
source_dataframe = pandas.DataFrame({
'ms': [1, 2, 3],
'assembly': ['A', 'B', 'C'],
'my_zone': ['X', 'Y', 'Z'],
'degree_celsius': [10, 20, 30],
})
source_dataframe.set_index(['ms', 'assembly', 'my_zone'], inplace=True)
measurement.from_dataframe(source_dataframe, metric_column='degree_celsius')
measurement.outcome = measurements.Outcome.PASS
self.assertEqual(measurement.measured_value[(1, 'A', 'X')], 10)
self.assertEqual(measurement.measured_value[(2, 'B', 'Y')], 20)
self.assertEqual(measurement.measured_value[(3, 'C', 'Z')], 30)

def test_from_dataframe_ignores_extra_columns(self):
measurement = self._make_multidim_measurement('°C')
source_dataframe = pandas.DataFrame({
'ms': [1, 2, 3],
'assembly': ['A', 'B', 'C'],
'my_zone': ['X', 'Y', 'Z'],
'degree_celsius': [10, 20, 30],
'degrees_fahrenheit': [11, 21, 31],
})
measurement.from_dataframe(source_dataframe, metric_column='degree_celsius')
measurement.outcome = measurements.Outcome.PASS
self.assertEqual(measurement.measured_value[(1, 'A', 'X')], 10)
self.assertEqual(measurement.measured_value[(2, 'B', 'Y')], 20)
self.assertEqual(measurement.measured_value[(3, 'C', 'Z')], 30)

def test_from_dataframe_with_duplicate_dimensions_overwrites(self):
"""Verifies multi-dim measurement overwrite with duplicate dimensions."""
measurement = self._make_multidim_measurement('°C')
source_dataframe = pandas.DataFrame({
'ms': [1, 2, 3, 1],
'assembly': ['A', 'B', 'C', 'A'],
'my_zone': ['X', 'Y', 'Z', 'X'],
'degree_celsius': [10, 20, 30, 11],
})
measurement.from_dataframe(source_dataframe, metric_column='degree_celsius')
measurement.outcome = measurements.Outcome.PASS
# Overwritten value.
self.assertEqual(measurement.measured_value[(1, 'A', 'X')], 11)
self.assertEqual(measurement.measured_value[(2, 'B', 'Y')], 20)
self.assertEqual(measurement.measured_value[(3, 'C', 'Z')], 30)

def test_bad_validator(self):
measurement = htf.Measurement('bad_measure')
measurement.with_dimensions('a')
Expand Down

0 comments on commit a3dafb7

Please sign in to comment.