Skip to content

Commit becca9b

Browse files
committed
Replace ExperimentData._analysis_results with dataframe
This change decouples AnalysisResult from ExperimentData. Since AnalysisResult not only defines data model but also provide API for the IBM experiment service, this coupling limits capability of experiment data analysis. ExperimentData.analysis_results still returns AnalysisResult for backward compatibility, but these object is not identical object. Returned object is AnalysisResult which is newly generated from dataframe.
1 parent 6e69f6a commit becca9b

File tree

7 files changed

+656
-141
lines changed

7 files changed

+656
-141
lines changed

qiskit_experiments/database_service/device_component.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -80,9 +80,10 @@ def to_component(string: str) -> DeviceComponent:
8080
Raises:
8181
ValueError: If input string is not a valid device component.
8282
"""
83+
if isinstance(string, DeviceComponent):
84+
return string
8385
if string.startswith("Q"):
8486
return Qubit(int(string[1:]))
85-
elif string.startswith("R"):
87+
if string.startswith("R"):
8688
return Resonator(int(string[1:]))
87-
else:
88-
return UnknownComponent(string)
89+
return UnknownComponent(string)

qiskit_experiments/database_service/utils.py

+237-1
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,15 @@
1919
from abc import ABC, abstractmethod
2020
from collections import OrderedDict
2121
from datetime import datetime, timezone
22-
from typing import Callable, Tuple, Dict, Any, Union, Type, Optional
22+
from typing import Callable, Tuple, List, Dict, Any, Union, Type, Optional
2323
import json
24+
import uuid
2425

26+
import pandas as pd
2527
import dateutil.parser
2628
import pkg_resources
2729
from dateutil import tz
30+
2831
from qiskit.version import __version__ as terra_version
2932

3033
from qiskit_ibm_experiment import (
@@ -276,3 +279,236 @@ def append(self, value):
276279
"""Append to the list."""
277280
with self._lock:
278281
self._container.append(value)
282+
283+
284+
class ThreadSafeDataFrame(ThreadSafeContainer):
285+
"""Thread safe data frame.
286+
287+
This class wraps pandas dataframe with predefined column labels,
288+
which is specified by the class method `_default_columns`.
289+
Subclass can override this method to provide default labels specific to its data structure.
290+
291+
This object is expected to be used internally in the ExperimentData.
292+
"""
293+
294+
def __init__(self, init_values=None):
295+
"""ThreadSafeContainer constructor."""
296+
self._columns = self._default_columns()
297+
self._extra = []
298+
super().__init__(init_values)
299+
300+
@classmethod
301+
def _default_columns(cls) -> List[str]:
302+
return []
303+
304+
def _init_container(self, init_values: Optional[Union[Dict, pd.DataFrame]] = None):
305+
"""Initialize the container."""
306+
if init_values is None:
307+
return pd.DataFrame(columns=self.get_columns())
308+
if isinstance(init_values, pd.DataFrame):
309+
input_columns = list(init_values.columns)
310+
if input_columns != self.get_columns():
311+
raise ValueError(
312+
f"Input data frame contains unexpected columns {input_columns}. "
313+
f"{self.__class__.__name__} defines {self.get_columns()} as default columns."
314+
)
315+
return init_values
316+
if isinstance(init_values, dict):
317+
return pd.DataFrame.from_dict(
318+
data=init_values,
319+
orient="index",
320+
columns=self.get_columns(),
321+
)
322+
raise TypeError(f"Initial value of {type(init_values)} is not valid data type.")
323+
324+
def get_columns(self) -> List[str]:
325+
"""Return current column names.
326+
327+
Returns:
328+
List of column names.
329+
"""
330+
return self._columns.copy()
331+
332+
def add_columns(self, *new_columns: str, default_value: Any = None):
333+
"""Add new columns to the table.
334+
335+
This operation mutates the current container.
336+
337+
Args:
338+
new_columns: Name of columns to add.
339+
default_value: Default value to fill added columns.
340+
"""
341+
# Order sensitive
342+
new_columns = [c for c in new_columns if c not in self.get_columns()]
343+
self._extra.extend(new_columns)
344+
345+
# Update current table
346+
with self._lock:
347+
for new_column in new_columns:
348+
self._container.insert(len(self._container.columns), new_column, default_value)
349+
self._columns.extend(new_columns)
350+
351+
def clear(self):
352+
"""Remove all elements from this container."""
353+
with self._lock:
354+
self._container = self._init_container()
355+
self._columns = self._default_columns()
356+
self._extra = []
357+
358+
def container(
359+
self,
360+
collapse_extra: bool = True,
361+
) -> pd.DataFrame:
362+
"""Return bare pandas dataframe.
363+
364+
Args:
365+
collapse_extra: Set True to show only default columns.
366+
367+
Returns:
368+
Bare pandas dataframe. This object is no longer thread safe.
369+
"""
370+
with self._lock:
371+
container = self._container
372+
373+
if collapse_extra:
374+
return container[self._default_columns()]
375+
return container
376+
377+
def add_entry(self, **kwargs):
378+
"""Add new entry to the dataframe.
379+
380+
Args:
381+
kwargs: Description of new entry to register.
382+
"""
383+
columns = self.get_columns()
384+
missing = kwargs.keys() - set(columns)
385+
if missing:
386+
self.add_columns(*sorted(missing))
387+
388+
template = dict.fromkeys(self.get_columns())
389+
template.update(kwargs)
390+
391+
if not template["result_id"]:
392+
template["result_id"] = uuid.uuid4().hex
393+
name = self._unique_table_index(template["result_id"])
394+
with self._lock:
395+
self._container.loc[name] = list(template.values())
396+
397+
def _unique_table_index(self, index_name: str):
398+
"""Generate unique index name with 8 characters."""
399+
if not isinstance(index_name, str):
400+
index_name = str(index_name)
401+
truncated = index_name[:8]
402+
with self.lock:
403+
while truncated in self._container.index:
404+
truncated = uuid.uuid4().hex[:8]
405+
return truncated
406+
407+
def _repr_html_(self) -> Union[str, None]:
408+
"""Return HTML representation of this dataframe."""
409+
with self._lock:
410+
# Remove underscored columns.
411+
return self._container._repr_html_()
412+
413+
def __getattr__(self, item):
414+
lock = object.__getattribute__(self, "_lock")
415+
416+
with lock:
417+
# Lock when access to container's member.
418+
container = object.__getattribute__(self, "_container")
419+
if hasattr(container, item):
420+
return getattr(container, item)
421+
raise AttributeError(f"'ThreadSafeDataFrame' object has no attribute '{item}'")
422+
423+
def __json_encode__(self) -> Dict[str, Any]:
424+
return {
425+
"class": "ThreadSafeDataFrame",
426+
"data": self._container.to_dict(orient="index"),
427+
"columns": self._columns,
428+
"extra": self._extra,
429+
}
430+
431+
@classmethod
432+
def __json_decode__(cls, value: Dict[str, Any]) -> "ThreadSafeDataFrame":
433+
if not value.get("class", None) == "ThreadSafeDataFrame":
434+
raise ValueError("JSON decoded value for ThreadSafeDataFrame is not valid class type.")
435+
436+
instance = object.__new__(AnalysisResultTable)
437+
# Need to update self._columns first to set extra columns in the dataframe container.
438+
instance._columns = value.get("columns", cls._default_columns())
439+
instance._extra = value.get("extra", [])
440+
instance._lock = threading.RLock()
441+
instance._container = instance._init_container(init_values=value.get("data", {}))
442+
return instance
443+
444+
445+
class AnalysisResultTable(ThreadSafeDataFrame):
446+
"""Thread safe dataframe to store the analysis results."""
447+
448+
@classmethod
449+
def _default_columns(cls) -> List[str]:
450+
return [
451+
"name",
452+
"value",
453+
"quality",
454+
"components",
455+
"experiment",
456+
"experiment_id",
457+
"result_id",
458+
"tags",
459+
"backend",
460+
"run_time",
461+
"created_time",
462+
]
463+
464+
@classmethod
465+
def _tier1(cls) -> List[str]:
466+
"""The data group that the analysis class produces."""
467+
return [
468+
"name",
469+
"value",
470+
"components",
471+
"quality",
472+
]
473+
474+
@classmethod
475+
def _tier2(cls) -> List[str]:
476+
"""The data group of metadata that the experiment class provides."""
477+
return [
478+
"experiment",
479+
"backend",
480+
"run_time",
481+
]
482+
483+
@classmethod
484+
def _tier3(cls) -> List[str]:
485+
"""The data group which is used to communicate with the experiment service."""
486+
return [
487+
"experiment_id",
488+
"result_id",
489+
"tags",
490+
"created_time",
491+
]
492+
493+
def filter_columns(self, verbosity: int) -> List[str]:
494+
"""Return column names at given verbosity level.
495+
496+
Extra columns are always added.
497+
498+
Args:
499+
verbosity: Level of verbosity of returned data table (1, 2, 3):
500+
501+
* 1 (minimum): Return data from the analysis.
502+
* 2 (normal): With supplemental data experiment.
503+
* 3 (finest): With extra data to communicate with experiment service.
504+
505+
Return:
506+
Valid column names.
507+
"""
508+
if verbosity == 1:
509+
return self._tier1() + self._extra
510+
if verbosity == 2:
511+
return self._tier1() + self._tier2() + self._extra
512+
if verbosity == 3:
513+
return self._tier1() + self._tier2() + self._tier3() + self._extra
514+
raise ValueError(f"verbosity {verbosity} is not defined. Choose value from 1, 2, 3.")

qiskit_experiments/framework/base_analysis.py

+33-39
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from abc import ABC, abstractmethod
1616
import copy
1717
from collections import OrderedDict
18+
from datetime import datetime, timezone
1819
from typing import List, Tuple, Union, Dict
1920

2021
from qiskit_experiments.database_service.device_component import Qubit
@@ -23,7 +24,6 @@
2324
from qiskit_experiments.framework.experiment_data import ExperimentData
2425
from qiskit_experiments.framework.configs import AnalysisConfig
2526
from qiskit_experiments.framework.analysis_result_data import AnalysisResultData
26-
from qiskit_experiments.framework.analysis_result import AnalysisResult
2727

2828

2929
class BaseAnalysis(ABC, StoreInitArgs):
@@ -153,30 +153,48 @@ def run(
153153
if not replace_results and _requires_copy(experiment_data):
154154
experiment_data = experiment_data.copy()
155155

156-
experiment_components = self._get_experiment_components(experiment_data)
157-
158156
# Set Analysis options
159157
if not options:
160158
analysis = self
161159
else:
162160
analysis = self.copy()
163161
analysis.set_options(**options)
164162

165-
def run_analysis(expdata):
163+
def run_analysis(expdata: ExperimentData):
166164
# Clearing previous analysis data
167165
experiment_data._clear_results()
168-
# making new analysis
166+
experiment_components = self._get_experiment_components(experiment_data)
167+
168+
# Making new analysis
169169
results, figures = analysis._run_analysis(expdata)
170-
# Add components
171-
analysis_results = [
172-
analysis._format_analysis_result(
173-
result, expdata.experiment_id, experiment_components
174-
)
175-
for result in results
176-
]
177-
# Update experiment data with analysis results
178-
if analysis_results:
179-
expdata.add_analysis_results(analysis_results)
170+
171+
if results:
172+
for result in results:
173+
supplementary = result.extra
174+
if result.chisq is not None:
175+
supplementary["chisq"] = result.chisq
176+
if "experiment" not in supplementary:
177+
supplementary["experiment"] = expdata.experiment_type
178+
if "experiment_id" not in supplementary:
179+
supplementary["experiment_id"] = expdata.experiment_id
180+
if "backend" not in supplementary:
181+
supplementary["backend"] = expdata.backend_name
182+
if "run_time" not in supplementary:
183+
# TODO add job RUNNING time
184+
supplementary["run_time"] = None
185+
if "created_time" not in supplementary:
186+
supplementary["created_time"] = datetime.now(timezone.utc)
187+
# Bypass generation of AnalysisResult, i.e. calling add_analysis_results.
188+
# AnalysisResult is a data container with experiment service API.
189+
# Since analysis is a local operation in the client,
190+
# we should directly populate analysis result dataframe.
191+
expdata.add_analysis_results(
192+
name=result.name,
193+
value=result.value,
194+
quality=result.quality,
195+
components=result.device_components or experiment_components,
196+
**supplementary,
197+
)
180198
if figures:
181199
expdata.add_figures(figures, figure_names=self.options.figure_names)
182200

@@ -195,30 +213,6 @@ def _get_experiment_components(self, experiment_data: ExperimentData):
195213

196214
return experiment_components
197215

198-
def _format_analysis_result(self, data, experiment_id, experiment_components=None):
199-
"""Format run analysis result to DbAnalysisResult"""
200-
device_components = []
201-
if data.device_components:
202-
device_components = data.device_components
203-
elif experiment_components:
204-
device_components = experiment_components
205-
206-
if isinstance(data, AnalysisResult):
207-
# Update device components and experiment id
208-
data.device_components = device_components
209-
data.experiment_id = experiment_id
210-
return data
211-
212-
return AnalysisResult(
213-
name=data.name,
214-
value=data.value,
215-
device_components=device_components,
216-
experiment_id=experiment_id,
217-
chisq=data.chisq,
218-
quality=data.quality,
219-
extra=data.extra,
220-
)
221-
222216
@abstractmethod
223217
def _run_analysis(
224218
self,

0 commit comments

Comments
 (0)