|
19 | 19 | from abc import ABC, abstractmethod
|
20 | 20 | from collections import OrderedDict
|
21 | 21 | from datetime import datetime, timezone
|
22 |
| -from typing import Callable, Tuple, Dict, Any, Union, Type, Optional |
| 22 | +from typing import Callable, Tuple, List, Dict, Any, Union, Type, Optional |
23 | 23 | import json
|
| 24 | +import uuid |
24 | 25 |
|
| 26 | +import pandas as pd |
25 | 27 | import dateutil.parser
|
26 | 28 | import pkg_resources
|
27 | 29 | from dateutil import tz
|
| 30 | + |
28 | 31 | from qiskit.version import __version__ as terra_version
|
29 | 32 |
|
30 | 33 | from qiskit_ibm_experiment import (
|
@@ -276,3 +279,236 @@ def append(self, value):
|
276 | 279 | """Append to the list."""
|
277 | 280 | with self._lock:
|
278 | 281 | self._container.append(value)
|
| 282 | + |
| 283 | + |
| 284 | +class ThreadSafeDataFrame(ThreadSafeContainer): |
| 285 | + """Thread safe data frame. |
| 286 | +
|
| 287 | + This class wraps pandas dataframe with predefined column labels, |
| 288 | + which is specified by the class method `_default_columns`. |
| 289 | + Subclass can override this method to provide default labels specific to its data structure. |
| 290 | +
|
| 291 | + This object is expected to be used internally in the ExperimentData. |
| 292 | + """ |
| 293 | + |
| 294 | + def __init__(self, init_values=None): |
| 295 | + """ThreadSafeContainer constructor.""" |
| 296 | + self._columns = self._default_columns() |
| 297 | + self._extra = [] |
| 298 | + super().__init__(init_values) |
| 299 | + |
| 300 | + @classmethod |
| 301 | + def _default_columns(cls) -> List[str]: |
| 302 | + return [] |
| 303 | + |
| 304 | + def _init_container(self, init_values: Optional[Union[Dict, pd.DataFrame]] = None): |
| 305 | + """Initialize the container.""" |
| 306 | + if init_values is None: |
| 307 | + return pd.DataFrame(columns=self.get_columns()) |
| 308 | + if isinstance(init_values, pd.DataFrame): |
| 309 | + input_columns = list(init_values.columns) |
| 310 | + if input_columns != self.get_columns(): |
| 311 | + raise ValueError( |
| 312 | + f"Input data frame contains unexpected columns {input_columns}. " |
| 313 | + f"{self.__class__.__name__} defines {self.get_columns()} as default columns." |
| 314 | + ) |
| 315 | + return init_values |
| 316 | + if isinstance(init_values, dict): |
| 317 | + return pd.DataFrame.from_dict( |
| 318 | + data=init_values, |
| 319 | + orient="index", |
| 320 | + columns=self.get_columns(), |
| 321 | + ) |
| 322 | + raise TypeError(f"Initial value of {type(init_values)} is not valid data type.") |
| 323 | + |
| 324 | + def get_columns(self) -> List[str]: |
| 325 | + """Return current column names. |
| 326 | +
|
| 327 | + Returns: |
| 328 | + List of column names. |
| 329 | + """ |
| 330 | + return self._columns.copy() |
| 331 | + |
| 332 | + def add_columns(self, *new_columns: str, default_value: Any = None): |
| 333 | + """Add new columns to the table. |
| 334 | +
|
| 335 | + This operation mutates the current container. |
| 336 | +
|
| 337 | + Args: |
| 338 | + new_columns: Name of columns to add. |
| 339 | + default_value: Default value to fill added columns. |
| 340 | + """ |
| 341 | + # Order sensitive |
| 342 | + new_columns = [c for c in new_columns if c not in self.get_columns()] |
| 343 | + self._extra.extend(new_columns) |
| 344 | + |
| 345 | + # Update current table |
| 346 | + with self._lock: |
| 347 | + for new_column in new_columns: |
| 348 | + self._container.insert(len(self._container.columns), new_column, default_value) |
| 349 | + self._columns.extend(new_columns) |
| 350 | + |
| 351 | + def clear(self): |
| 352 | + """Remove all elements from this container.""" |
| 353 | + with self._lock: |
| 354 | + self._container = self._init_container() |
| 355 | + self._columns = self._default_columns() |
| 356 | + self._extra = [] |
| 357 | + |
| 358 | + def container( |
| 359 | + self, |
| 360 | + collapse_extra: bool = True, |
| 361 | + ) -> pd.DataFrame: |
| 362 | + """Return bare pandas dataframe. |
| 363 | +
|
| 364 | + Args: |
| 365 | + collapse_extra: Set True to show only default columns. |
| 366 | +
|
| 367 | + Returns: |
| 368 | + Bare pandas dataframe. This object is no longer thread safe. |
| 369 | + """ |
| 370 | + with self._lock: |
| 371 | + container = self._container |
| 372 | + |
| 373 | + if collapse_extra: |
| 374 | + return container[self._default_columns()] |
| 375 | + return container |
| 376 | + |
| 377 | + def add_entry(self, **kwargs): |
| 378 | + """Add new entry to the dataframe. |
| 379 | +
|
| 380 | + Args: |
| 381 | + kwargs: Description of new entry to register. |
| 382 | + """ |
| 383 | + columns = self.get_columns() |
| 384 | + missing = kwargs.keys() - set(columns) |
| 385 | + if missing: |
| 386 | + self.add_columns(*sorted(missing)) |
| 387 | + |
| 388 | + template = dict.fromkeys(self.get_columns()) |
| 389 | + template.update(kwargs) |
| 390 | + |
| 391 | + if not template["result_id"]: |
| 392 | + template["result_id"] = uuid.uuid4().hex |
| 393 | + name = self._unique_table_index(template["result_id"]) |
| 394 | + with self._lock: |
| 395 | + self._container.loc[name] = list(template.values()) |
| 396 | + |
| 397 | + def _unique_table_index(self, index_name: str): |
| 398 | + """Generate unique index name with 8 characters.""" |
| 399 | + if not isinstance(index_name, str): |
| 400 | + index_name = str(index_name) |
| 401 | + truncated = index_name[:8] |
| 402 | + with self.lock: |
| 403 | + while truncated in self._container.index: |
| 404 | + truncated = uuid.uuid4().hex[:8] |
| 405 | + return truncated |
| 406 | + |
| 407 | + def _repr_html_(self) -> Union[str, None]: |
| 408 | + """Return HTML representation of this dataframe.""" |
| 409 | + with self._lock: |
| 410 | + # Remove underscored columns. |
| 411 | + return self._container._repr_html_() |
| 412 | + |
| 413 | + def __getattr__(self, item): |
| 414 | + lock = object.__getattribute__(self, "_lock") |
| 415 | + |
| 416 | + with lock: |
| 417 | + # Lock when access to container's member. |
| 418 | + container = object.__getattribute__(self, "_container") |
| 419 | + if hasattr(container, item): |
| 420 | + return getattr(container, item) |
| 421 | + raise AttributeError(f"'ThreadSafeDataFrame' object has no attribute '{item}'") |
| 422 | + |
| 423 | + def __json_encode__(self) -> Dict[str, Any]: |
| 424 | + return { |
| 425 | + "class": "ThreadSafeDataFrame", |
| 426 | + "data": self._container.to_dict(orient="index"), |
| 427 | + "columns": self._columns, |
| 428 | + "extra": self._extra, |
| 429 | + } |
| 430 | + |
| 431 | + @classmethod |
| 432 | + def __json_decode__(cls, value: Dict[str, Any]) -> "ThreadSafeDataFrame": |
| 433 | + if not value.get("class", None) == "ThreadSafeDataFrame": |
| 434 | + raise ValueError("JSON decoded value for ThreadSafeDataFrame is not valid class type.") |
| 435 | + |
| 436 | + instance = object.__new__(AnalysisResultTable) |
| 437 | + # Need to update self._columns first to set extra columns in the dataframe container. |
| 438 | + instance._columns = value.get("columns", cls._default_columns()) |
| 439 | + instance._extra = value.get("extra", []) |
| 440 | + instance._lock = threading.RLock() |
| 441 | + instance._container = instance._init_container(init_values=value.get("data", {})) |
| 442 | + return instance |
| 443 | + |
| 444 | + |
| 445 | +class AnalysisResultTable(ThreadSafeDataFrame): |
| 446 | + """Thread safe dataframe to store the analysis results.""" |
| 447 | + |
| 448 | + @classmethod |
| 449 | + def _default_columns(cls) -> List[str]: |
| 450 | + return [ |
| 451 | + "name", |
| 452 | + "value", |
| 453 | + "quality", |
| 454 | + "components", |
| 455 | + "experiment", |
| 456 | + "experiment_id", |
| 457 | + "result_id", |
| 458 | + "tags", |
| 459 | + "backend", |
| 460 | + "run_time", |
| 461 | + "created_time", |
| 462 | + ] |
| 463 | + |
| 464 | + @classmethod |
| 465 | + def _tier1(cls) -> List[str]: |
| 466 | + """The data group that the analysis class produces.""" |
| 467 | + return [ |
| 468 | + "name", |
| 469 | + "value", |
| 470 | + "components", |
| 471 | + "quality", |
| 472 | + ] |
| 473 | + |
| 474 | + @classmethod |
| 475 | + def _tier2(cls) -> List[str]: |
| 476 | + """The data group of metadata that the experiment class provides.""" |
| 477 | + return [ |
| 478 | + "experiment", |
| 479 | + "backend", |
| 480 | + "run_time", |
| 481 | + ] |
| 482 | + |
| 483 | + @classmethod |
| 484 | + def _tier3(cls) -> List[str]: |
| 485 | + """The data group which is used to communicate with the experiment service.""" |
| 486 | + return [ |
| 487 | + "experiment_id", |
| 488 | + "result_id", |
| 489 | + "tags", |
| 490 | + "created_time", |
| 491 | + ] |
| 492 | + |
| 493 | + def filter_columns(self, verbosity: int) -> List[str]: |
| 494 | + """Return column names at given verbosity level. |
| 495 | +
|
| 496 | + Extra columns are always added. |
| 497 | +
|
| 498 | + Args: |
| 499 | + verbosity: Level of verbosity of returned data table (1, 2, 3): |
| 500 | +
|
| 501 | + * 1 (minimum): Return data from the analysis. |
| 502 | + * 2 (normal): With supplemental data experiment. |
| 503 | + * 3 (finest): With extra data to communicate with experiment service. |
| 504 | +
|
| 505 | + Return: |
| 506 | + Valid column names. |
| 507 | + """ |
| 508 | + if verbosity == 1: |
| 509 | + return self._tier1() + self._extra |
| 510 | + if verbosity == 2: |
| 511 | + return self._tier1() + self._tier2() + self._extra |
| 512 | + if verbosity == 3: |
| 513 | + return self._tier1() + self._tier2() + self._tier3() + self._extra |
| 514 | + raise ValueError(f"verbosity {verbosity} is not defined. Choose value from 1, 2, 3.") |
0 commit comments