diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index dbe7faefed69ef..53a7396065a8d6 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -27,7 +27,7 @@ ExtensionType Changes ^^^^^^^^^^^^^^^^^^^^^ - ``ExtensionArray`` has gained the abstract methods ``.dropna()`` and ``.append()``, and attribute ``array_type`` (:issue:`21185`) -- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instaniate a registered ``DecimalDtype`` (:issue:`21185`) +- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype`` (:issue:`21185`) - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`) .. _whatsnew_0240.api.other: diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 781118723a7c68..ca077bd89434fb 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -38,7 +38,6 @@ class ExtensionArray(object): * copy * append * _concat_same_type - * array_type An additional method is available to satisfy pandas' internal, private block API. diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index ba359c9ef49822..701863a2595aab 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -109,6 +109,12 @@ class ExtensionDtype(_DtypeOpsMixin): * name * construct_from_string + + Optionally one can assign an array_type for construction with the name + of this dtype via the Registry + + * array_type + The `na_value` class attribute can be used to set the default NA value for this type. :attr:`numpy.nan` is used by default. @@ -118,6 +124,8 @@ class ExtensionDtype(_DtypeOpsMixin): provided for registering virtual subclasses. """ + array_type = None + def __str__(self): return self.name diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 94388103f19533..8a5f1b5f885bfb 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2,7 +2,6 @@ import re import numpy as np -from collections import OrderedDict from pandas import compat from pandas.core.dtypes.generic import ABCIndexClass, ABCCategoricalIndex @@ -18,22 +17,19 @@ class Registry(object): These are tried in order for inference. """ - dtypes = OrderedDict() + dtypes = [] @classmethod - def register(self, dtype, constructor=None): + def register(self, dtype): """ Parameters ---------- - dtype : PandasExtension Dtype + dtype : ExtensionDtype """ if not issubclass(dtype, (PandasExtensionDtype, ExtensionDtype)): raise ValueError("can only register pandas extension dtypes") - if constructor is None: - constructor = dtype.construct_from_string - - self.dtypes[dtype] = constructor + self.dtypes.append(dtype) def find(self, dtype): """ @@ -54,9 +50,9 @@ def find(self, dtype): return None - for dtype_type, constructor in self.dtypes.items(): + for dtype_type in self.dtypes: try: - return constructor(dtype) + return dtype_type.construct_from_string(dtype) except TypeError: pass @@ -610,11 +606,16 @@ def _parse_dtype_strict(cls, freq): @classmethod def construct_from_string(cls, string): """ - attempt to construct this type from a string, raise a TypeError - if its not possible + Strict construction from a string, raise a TypeError if not + possible """ from pandas.tseries.offsets import DateOffset - if isinstance(string, (compat.string_types, DateOffset)): + + if (isinstance(string, compat.string_types) and + (string.startswith('period[') or + string.startswith('Period[')) or + isinstance(string, DateOffset)): + # do not parse string like U as period[U] # avoid tuple to be regarded as freq try: return cls(freq=string) @@ -622,17 +623,6 @@ def construct_from_string(cls, string): pass raise TypeError("could not construct PeriodDtype") - @classmethod - def construct_from_string_strict(cls, string): - """ - Strict construction from a string, raise a TypeError if not - possible - """ - if string.startswith('period[') or string.startswith('Period['): - # do not parse string like U as period[U] - return PeriodDtype.construct_from_string(string) - raise TypeError("could not construct PeriodDtype") - def __unicode__(self): return "period[{freq}]".format(freq=self.freq.freqstr) @@ -747,21 +737,13 @@ def construct_from_string(cls, string): attempt to construct this type from a string, raise a TypeError if its not possible """ - if isinstance(string, compat.string_types): + if (isinstance(string, compat.string_types) and + (string.startswith('interval') or + string.startswith('Interval'))): return cls(string) msg = "a string needs to be passed, got type {typ}" raise TypeError(msg.format(typ=type(string))) - @classmethod - def construct_from_string_strict(cls, string): - """ - Strict construction from a string, raise a TypeError if not - possible - """ - if string.startswith('interval') or string.startswith('Interval'): - return IntervalDtype.construct_from_string(string) - raise TypeError("cannot construct IntervalDtype") - def __unicode__(self): if self.subtype is None: return "interval" @@ -806,6 +788,6 @@ def is_dtype(cls, dtype): # register the dtypes in search order registry.register(DatetimeTZDtype) -registry.register(PeriodDtype, PeriodDtype.construct_from_string_strict) -registry.register(IntervalDtype, IntervalDtype.construct_from_string_strict) +registry.register(PeriodDtype) +registry.register(IntervalDtype) registry.register(CategoricalDtype) diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 8f8d8760583ce0..2694f5d5be384a 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -800,7 +800,7 @@ def astype(self, dtype, copy=True): @cache_readonly def dtype(self): """Return the dtype object of the underlying data""" - return IntervalDtype.construct_from_string(str(self.left.dtype)) + return IntervalDtype(str(self.left.dtype)) @property def inferred_type(self): diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py deleted file mode 100644 index 3742f342e43463..00000000000000 --- a/pandas/tests/extension/base/ops.py +++ /dev/null @@ -1,6 +0,0 @@ -from .base import BaseExtensionTests - - -class BaseOpsTests(BaseExtensionTests): - """Various Series and DataFrame ops methos.""" - pass