From 45862450b4e00eaadd1bbc816c5fc63ecf2740e4 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 3 Jul 2018 18:40:43 -0400
Subject: [PATCH 01/20] ENH: add integer-na support via an ExtensionArray

closes #20700
closes #20747
---
 doc/source/whatsnew/v0.24.0.txt               |  57 ++
 pandas/core/arrays/__init__.py                |   3 +
 pandas/core/arrays/base.py                    |  12 +-
 pandas/core/arrays/categorical.py             |   4 +-
 pandas/core/arrays/integer.py                 | 563 ++++++++++++++++
 pandas/core/dtypes/cast.py                    |   3 +-
 pandas/core/dtypes/concat.py                  |   1 +
 pandas/core/indexes/base.py                   |  24 +-
 pandas/core/missing.py                        |   3 +-
 pandas/core/ops.py                            |  78 ++-
 pandas/core/series.py                         |   2 +-
 pandas/tests/extension/base/__init__.py       |   2 +-
 pandas/tests/extension/base/getitem.py        |   6 +-
 pandas/tests/extension/base/ops.py            |  20 +-
 pandas/tests/extension/base/reshaping.py      |  24 +-
 pandas/tests/extension/decimal/array.py       |  11 +-
 .../tests/extension/decimal/test_decimal.py   |   8 +-
 pandas/tests/extension/integer/__init__.py    |   0
 .../tests/extension/integer/test_integer.py   | 604 ++++++++++++++++++
 pandas/tests/extension/json/array.py          |  13 +-
 pandas/tests/extension/json/test_json.py      |  15 +-
 21 files changed, 1386 insertions(+), 67 deletions(-)
 create mode 100644 pandas/core/arrays/integer.py
 create mode 100644 pandas/tests/extension/integer/__init__.py
 create mode 100644 pandas/tests/extension/integer/test_integer.py

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index ed4022d422b4d..0c9b0ce98ede0 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -8,6 +8,7 @@ v0.24.0 (Month XX, 2018)
 New features
 ~~~~~~~~~~~~
 
+
 - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
 
 .. _whatsnew_0240.enhancements.extension_array_operators:
@@ -26,6 +27,61 @@ See the :ref:`ExtensionArray Operator Support
 <extending.extension.operator>` documentation section for details on both
 ways of adding operator support.
 
+.. _whatsnew_0240.enhancements.intna:
+
+Integer NA Support
+^^^^^^^^^^^^^^^^^^
+
+Pandas has gained the ability to hold integer dtypes with missing values. This long requested feature is enabled thru the use of ``ExtensionTypes`` . Here is an example of the usage.
+
+We can construct a ``Series`` with the specified dtype. The dtype string ``Int64`` is a pandas ``ExtensionDtype``. Specifying an list or array using the traditional missing value
+marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`)
+
+.. ipython:: python
+
+   s = pd.Series([1, 2, np.nan], dtype='Int64')
+   s
+
+
+Operations on these dtypes will propagate ``NaN`` as other pandas operations.
+
+.. ipython:: python
+
+   # arithmetic
+   s + 1
+
+   # comparison
+   s == 1
+
+   # indexing
+   s.iloc[1:3]
+
+   # operate with other dtypes
+   s + s.iloc[1:3]
+
+   # coerce when needed
+   s + 0.01
+
+These dtypes can operate as part of ``DataFrames``.
+
+.. ipython:: python
+
+   df = pd.DataFrame({'A': s, 'B': [1, 1, 3], 'C': list('aab')})
+   df
+   df.dtypes
+
+
+These dtypes can be merged & reshaped & casted.
+
+.. ipython:: python
+
+   pd.concat([df[['A']], df[['B', 'C']]], axis=1).dtypes
+   df['A'].astype(float)
+
+.. warning::
+
+   The Integer NA support currently uses the captilized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This maybe changed at a future date.
+
 .. _whatsnew_0240.enhancements.read_html:
 
 ``read_html`` Enhancements
@@ -182,6 +238,7 @@ Previous Behavior:
 ExtensionType Changes
 ^^^^^^^^^^^^^^^^^^^^^
 
+- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`)
 - ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore
   the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`)
 - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`)
diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
index 1b8a43d4293a5..6e8dafd125bfc 100644
--- a/pandas/core/arrays/__init__.py
+++ b/pandas/core/arrays/__init__.py
@@ -1,6 +1,9 @@
 from .base import (ExtensionArray,    # noqa
+                   ExtensionOpsMixin,
                    ExtensionScalarOpsMixin)
 from .categorical import Categorical  # noqa
 from .datetimes import DatetimeArrayMixin  # noqa
 from .period import PeriodArrayMixin  # noqa
 from .timedelta import TimedeltaArrayMixin  # noqa
+from .integer import (  # noqa
+    IntegerArray, to_integer_array)
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index fe4e461b0bd4f..c0697dd29e4d0 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -12,8 +12,8 @@
 from pandas.errors import AbstractMethodError
 from pandas.compat.numpy import function as nv
 from pandas.compat import set_function_name, PY3
-from pandas.core.dtypes.common import is_list_like
 from pandas.core import ops
+from pandas.core.dtypes.common import is_list_like
 
 _not_implemented_message = "{} does not implement {}."
 
@@ -88,7 +88,7 @@ class ExtensionArray(object):
     # Constructors
     # ------------------------------------------------------------------------
     @classmethod
-    def _from_sequence(cls, scalars, copy=False):
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
         """Construct a new ExtensionArray from a sequence of scalars.
 
         Parameters
@@ -96,6 +96,8 @@ def _from_sequence(cls, scalars, copy=False):
         scalars : Sequence
             Each element will be an instance of the scalar type for this
             array, ``cls.dtype.type``.
+        dtype : Dtype, optional
+            consruct for this particular dtype
         copy : boolean, default False
             if True, copy the underlying data
         Returns
@@ -378,7 +380,7 @@ def fillna(self, value=None, method=None, limit=None):
                 func = pad_1d if method == 'pad' else backfill_1d
                 new_values = func(self.astype(object), limit=limit,
                                   mask=mask)
-                new_values = self._from_sequence(new_values)
+                new_values = self._from_sequence(new_values, dtype=self.dtype)
             else:
                 # fill with value
                 new_values = self.copy()
@@ -407,7 +409,7 @@ def unique(self):
         from pandas import unique
 
         uniques = unique(self.astype(object))
-        return self._from_sequence(uniques)
+        return self._from_sequence(uniques, dtype=self.dtype)
 
     def _values_for_factorize(self):
         # type: () -> Tuple[ndarray, Any]
@@ -559,7 +561,7 @@ def take(self, indices, allow_fill=False, fill_value=None):
 
                result = take(data, indices, fill_value=fill_value,
                              allow_fill=allow_fill)
-               return self._from_sequence(result)
+               return self._from_sequence(result, dtype=self.dtype)
         """
         # Implementer note: The `fill_value` parameter should be a user-facing
         # value, an instance of self.dtype.type. When passed `fill_value=None`,
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 83fd0ab499283..ceaa41b98ae7e 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -487,8 +487,8 @@ def _constructor(self):
         return Categorical
 
     @classmethod
-    def _from_sequence(cls, scalars):
-        return Categorical(scalars)
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        return Categorical(scalars, dtype=dtype)
 
     def copy(self):
         """ Copy constructor. """
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
new file mode 100644
index 0000000000000..a7c0518bb4ed9
--- /dev/null
+++ b/pandas/core/arrays/integer.py
@@ -0,0 +1,563 @@
+import sys
+import warnings
+import copy
+import numpy as np
+
+from pandas.compat import u
+from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
+from pandas.util._decorators import cache_readonly
+from pandas.compat import set_function_name
+from pandas.api.types import (is_integer, is_scalar, is_float,
+                              is_float_dtype, is_integer_dtype,
+                              is_object_dtype,
+                              is_list_like,
+                              infer_dtype)
+from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
+from pandas.core.dtypes.base import ExtensionDtype
+from pandas.core.dtypes.dtypes import registry
+from pandas.core.dtypes.missing import isna, notna
+from pandas.core.dtypes.cast import maybe_downcast_to_dtype
+
+from pandas.io.formats.printing import (
+    format_object_summary, format_object_attrs, default_pprint)
+
+
+class IntegerDtype(ExtensionDtype):
+    type = None
+    na_value = np.nan
+
+    @cache_readonly
+    def is_signed_integer(self):
+        return self.kind == 'i'
+
+    @cache_readonly
+    def is_unsigned_integer(self):
+        return self.kind == 'u'
+
+    @cache_readonly
+    def numpy_dtype(self):
+        """ Return an instance of our numpy dtype """
+        return np.dtype(self.type)
+
+    @cache_readonly
+    def kind(self):
+        return self.numpy_dtype.kind
+
+    @classmethod
+    def construct_array_type(cls):
+        """Return the array type associated with this dtype
+
+        Returns
+        -------
+        type
+        """
+        return IntegerArray
+
+    @classmethod
+    def construct_from_string(cls, string):
+        """
+        Construction from a string, raise a TypeError if not
+        possible
+        """
+        if string == cls.name:
+            return cls()
+        raise TypeError("Cannot construct a '{}' from "
+                        "'{}'".format(cls, string))
+
+
+def to_integer_array(values):
+    """
+    Parameters
+    ----------
+    values : 1D list-like
+
+    Returns
+    -------
+    infer and return an integer array
+
+    Raises
+    ------
+    TypeError if incompatible types
+    """
+    values = np.array(values, copy=False)
+    try:
+        dtype = _dtypes[str(values.dtype)]
+    except KeyError:
+        if is_float_dtype(values):
+            return IntegerArray(values)
+
+        raise TypeError("Incompatible dtype for {}".format(values.dtype))
+    return IntegerArray(values, dtype=dtype, copy=False)
+
+
+def coerce_to_array(values, dtype, mask=None, copy=False):
+    """
+    Coerce the input values array to numpy arrays with a mask
+
+    Parameters
+    ----------
+    values : 1D list-like
+    dtype : integer dtype
+    mask : boolean 1D array, optional
+    copy : boolean, default False
+        if True, copy the input
+
+    Returns
+    -------
+    tuple of (values, mask)
+    """
+
+    if isinstance(values, IntegerArray):
+        values, mask = values.data, values.mask
+        if copy:
+            values = values.copy()
+            mask = mask.copy()
+        return values, mask
+
+    values = np.array(values, copy=copy)
+    if is_object_dtype(values):
+        inferred_type = infer_dtype(values)
+        if inferred_type not in ['floating', 'integer',
+                                 'mixed-integer', 'mixed-integer-float']:
+            raise TypeError("{} cannot be converted to an IntegerDtype".format(
+                values.dtype))
+
+    elif not (is_integer_dtype(values) or is_float_dtype(values)):
+        raise TypeError("{} cannot be converted to an IntegerDtype".format(
+            values.dtype))
+
+    if mask is None:
+        mask = isna(values)
+    else:
+        assert len(mask) == len(values)
+
+    if not values.ndim == 1:
+        raise TypeError("values must be a 1D list-like")
+    if not mask.ndim == 1:
+        raise TypeError("mask must be a 1D list-like")
+
+    # avoid float->int numpy conversion issues
+    if is_object_dtype(values):
+        mask |= isna(values)
+
+    # infer dtype if needed
+    if dtype is None:
+        if is_integer_dtype(values):
+            dtype = values.dtype
+        else:
+            dtype = np.dtype('int64')
+    else:
+        dtype = dtype.type
+
+    # we copy as need to coerce here
+    if mask.any():
+        values = values.copy()
+        values[mask] = 1
+
+        values = values.astype(dtype)
+    else:
+        values = values.astype(dtype, copy=False)
+
+    return values, mask
+
+
+class IntegerArray(ExtensionArray, ExtensionOpsMixin):
+    """
+    We represent an IntegerArray with 2 numpy arrays
+    - data: contains a numpy integer array of the appropriate dtype
+    - mask: a boolean array holding a mask on the data, False is missing
+    """
+
+    @cache_readonly
+    def dtype(self):
+        return _dtypes[str(self.data.dtype)]
+
+    def __init__(self, values, mask=None, dtype=None, copy=False):
+        self.data, self.mask = coerce_to_array(
+            values, dtype=dtype, mask=mask, copy=copy)
+
+    @classmethod
+    def _from_sequence(cls, scalars, mask=None, dtype=None, copy=False):
+        return cls(scalars, mask=mask, dtype=dtype, copy=copy)
+
+    @classmethod
+    def _from_factorized(cls, values, original):
+        return cls(values, dtype=original.dtype)
+
+    def __getitem__(self, item):
+        if is_integer(item):
+            if self.mask[item]:
+                return self.dtype.na_value
+            return self.data[item]
+        return type(self)(self.data[item],
+                          mask=self.mask[item],
+                          dtype=self.dtype)
+
+    def _coerce_to_ndarray(self):
+        """ coerce to an ndarary, preserving my scalar types """
+
+        # TODO(jreback) make this better
+        data = self.data.astype(object)
+        data[self.mask] = self._na_value
+        return data
+
+    def __array__(self, dtype=None):
+        """
+        the array interface, return my values
+        We return an object array here to preserve our scalar values
+        """
+        return self._coerce_to_ndarray()
+
+    def __iter__(self):
+        """Iterate over elements of the array.
+
+        """
+        # This needs to be implemented so that pandas recognizes extension
+        # arrays as list-like. The default implementation makes successive
+        # calls to ``__getitem__``, which may be slower than necessary.
+        for i in range(len(self)):
+            if self.mask[i]:
+                yield self.dtype.na_value
+            else:
+                yield self.data[i]
+
+    def _formatting_values(self):
+        # type: () -> np.ndarray
+        return self._coerce_to_ndarray()
+
+    def take(self, indexer, allow_fill=False, fill_value=None):
+        from pandas.api.extensions import take
+
+        # we always fill with 1 internally
+        # to avoid upcasting
+        data_fill_value = 1 if isna(fill_value) else fill_value
+        result = take(self.data, indexer, fill_value=data_fill_value,
+                      allow_fill=allow_fill)
+
+        mask = take(self.mask, indexer, fill_value=True,
+                    allow_fill=allow_fill)
+
+        # if we are filling
+        # we only fill where the indexer is null
+        # not existing missing values
+        # TODO(jreback) what if we have a non-na float as a fill value?
+        if allow_fill and notna(fill_value):
+            fill_mask = np.asarray(indexer) == -1
+            result[fill_mask] = fill_value
+            mask = mask ^ fill_mask
+
+        return type(self)(result, mask=mask, dtype=self.dtype)
+
+    def copy(self, deep=False):
+        data, mask = self.data, self.mask
+        if deep:
+            data = copy.deepcopy(data)
+            mask = copy.deepcopy(mask)
+        else:
+            data = data.copy()
+            mask = mask.copy()
+        return type(self)(data, mask, dtype=self.dtype, copy=False)
+
+    def __setitem__(self, key, value):
+        _is_scalar = is_scalar(value)
+        if _is_scalar:
+            value = [value]
+        value, mask = coerce_to_array(value, dtype=self.dtype)
+
+        if _is_scalar:
+            value = value[0]
+            mask = mask[0]
+
+        self.data[key] = value
+        self.mask[key] = mask
+
+    def __len__(self):
+        return len(self.data)
+
+    def __repr__(self):
+        """
+        Return a string representation for this object.
+
+        Invoked by unicode(df) in py2 only. Yields a Unicode String in both
+        py2/py3.
+        """
+        klass = self.__class__.__name__
+        data = format_object_summary(self, default_pprint, False)
+        attrs = format_object_attrs(self)
+        space = " "
+
+        prepr = (u(",%s") %
+                 space).join(u("%s=%s") % (k, v) for k, v in attrs)
+
+        res = u("%s(%s%s)") % (klass, data, prepr)
+
+        return res
+
+    @property
+    def nbytes(self):
+        return self.data.nbytes + self.mask.nbytes
+
+    def isna(self):
+        return self.mask
+
+    @property
+    def _na_value(self):
+        return np.nan
+
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        data = np.concatenate([x.data for x in to_concat])
+        mask = np.concatenate([x.mask for x in to_concat])
+        return cls(data, mask=mask, dtype=to_concat[0].dtype)
+
+    def astype(self, dtype, copy=True):
+        """Cast to a NumPy array with 'dtype'.
+
+        Parameters
+        ----------
+        dtype : str or dtype
+            Typecode or data-type to which the array is cast.
+        copy : bool, default True
+            Whether to copy the data, even if not necessary. If False,
+            a copy is made only if the old dtype does not match the
+            new dtype.
+
+        Returns
+        -------
+        array : ndarray
+            NumPy ndarray with 'dtype' for its dtype.
+
+        Raises
+        ------
+        TypeError
+            if incompatible type with an IntegerDtype, equivalent of same_kind
+            casting
+        """
+
+        # if we are astyping to an existing IntegerDtype we can fastpath
+        if isinstance(dtype, IntegerDtype):
+            result = self.data.astype(dtype.numpy_dtype,
+                                      casting='same_kind', copy=False)
+            return type(self)(result, mask=self.mask,
+                              dtype=dtype, copy=False)
+
+        # coerce
+        data = self._coerce_to_ndarray()
+        return data.astype(dtype=dtype, copy=False)
+
+    @property
+    def _ndarray_values(self):
+        # type: () -> np.ndarray
+        """Internal pandas method for lossy conversion to a NumPy ndarray.
+
+        This method is not part of the pandas interface.
+
+        The expectation is that this is cheap to compute, and is primarily
+        used for interacting with our indexers.
+        """
+        return self.data
+
+    def value_counts(self, dropna=True):
+        """
+        Returns a Series containing counts of each category.
+
+        Every category will have an entry, even those with a count of 0.
+
+        Parameters
+        ----------
+        dropna : boolean, default True
+            Don't include counts of NaN.
+
+        Returns
+        -------
+        counts : Series
+
+        See Also
+        --------
+        Series.value_counts
+
+        """
+
+        from pandas import Index, Series
+
+        # compute counts on the data with no nans
+        data = self.data[~self.mask]
+        value_counts = Index(data).value_counts()
+        array = value_counts.values
+
+        # TODO(extension)
+        # if we have allow Index to hold an ExtensionArray
+        # this is easier
+        index = value_counts.index.astype(object)
+
+        # if we want nans, count the mask
+        if not dropna:
+
+            # TODO(extension)
+            # appending to an Index *always* infers
+            # w/o passing the dtype
+            array = np.append(array, [self.mask.sum()])
+            index = Index(np.concatenate(
+                [index.values,
+                 np.array([np.nan], dtype=object)]), dtype=object)
+
+        return Series(array, index=index)
+
+    def _values_for_argsort(self):
+        # type: () -> ndarray
+        """Return values for sorting.
+
+        Returns
+        -------
+        ndarray
+            The transformed values should maintain the ordering between values
+            within the array.
+
+        See Also
+        --------
+        ExtensionArray.argsort
+        """
+        data = self.data.copy()
+        data[self.mask] = data.min() - 1
+        return data
+
+    @classmethod
+    def _create_comparison_method(cls, op):
+        def cmp_method(self, other):
+
+            op_name = op.__name__
+            mask = None
+            if isinstance(other, IntegerArray):
+                other, mask = other.data, other.mask
+            elif is_list_like(other):
+                other = np.asarray(other)
+                if other.ndim > 0 and len(self) != len(other):
+                    raise ValueError('Lengths must match to compare')
+
+            # numpy will show a DeprecationWarning on invalid elementwise
+            # comparisons, this will raise in the future
+            with warnings.catch_warnings(record=True):
+                with np.errstate(all='ignore'):
+                    result = op(self.data, other)
+
+            # nans propagate
+            if mask is None:
+                mask = self.mask
+            else:
+                mask = self.mask | mask
+
+            result[mask] = True if op_name == 'ne' else False
+            return result
+
+        name = '__{name}__'.format(name=op.__name__)
+        return set_function_name(cmp_method, name, cls)
+
+    def _maybe_mask_result(self, result, mask, other, op_name):
+        """
+        Parameters
+        ----------
+        result : array-like
+        mask : array-like bool
+        other : scalar or array-like
+        op_name : str
+        """
+
+        # may need to fill infs
+        # and mask wraparound
+        if is_float_dtype(result):
+            mask |= (result == np.inf) | (result == -np.inf)
+
+        # floor div can be a float or an integer dependending
+        # on the operands
+        if (op_name in ['rfloordiv', 'floordiv'] and
+                (is_float_dtype(other) or is_float(other))):
+            result[mask] = np.nan
+            return result
+
+        # by definition a float result
+        elif op_name in ['rtruediv', 'truediv', 'rdiv', 'div']:
+            result[mask] = np.nan
+            return result
+
+        elif is_float_dtype(result):
+            # if our float result, try to downcast if possible
+            # if remains float, then mask and return as float
+            nonans = result[notna(result)]
+            maybe = maybe_downcast_to_dtype(nonans, self.dtype.numpy_dtype)
+            if not is_integer_dtype(maybe):
+                result[mask] = np.nan
+                return result
+
+        return type(self)(result, mask=mask, dtype=self.dtype, copy=False)
+
+    @classmethod
+    def _create_arithmetic_method(cls, op):
+        def integer_arithmetic_method(self, other):
+
+            op_name = op.__name__
+            mask = None
+            if isinstance(other, (ABCSeries, ABCIndexClass)):
+                other = getattr(other, 'values', other)
+
+            if isinstance(other, IntegerArray):
+                other, mask = other.data, other.mask
+            elif getattr(other, 'ndim', 0) > 1:
+                raise TypeError("can only perform ops with 1-d structures")
+            elif is_list_like(other):
+                other = np.asarray(other)
+                if not other.ndim:
+                    other = other.item()
+                elif other.ndim == 1:
+                    if not (is_float_dtype(other) or is_integer_dtype(other)):
+                        raise TypeError(
+                            "can only perform ops with numeric values")
+            else:
+                if not (is_float(other) or is_integer(other)):
+                    raise TypeError("can only perform ops with numeric values")
+
+            # nans propagate
+            if mask is None:
+                mask = self.mask
+            else:
+                mask = self.mask | mask
+
+            with np.errstate(all='ignore'):
+                result = op(self.data, other)
+
+            # divmod returns a tuple
+            if op_name == 'divmod':
+                div, mod = result
+                return (self._maybe_mask_result(div, mask, other, 'floordiv'),
+                        self._maybe_mask_result(mod, mask, other, 'mod'))
+
+            return self._maybe_mask_result(result, mask, other, op_name)
+
+        name = '__{name}__'.format(name=op.__name__)
+        return set_function_name(integer_arithmetic_method, name, cls)
+
+
+IntegerArray._add_arithmetic_ops()
+IntegerArray._add_comparison_ops()
+
+
+module = sys.modules[__name__]
+
+
+# create the Dtype
+_dtypes = {}
+for dtype in ['int8', 'int16', 'int32', 'int64',
+              'uint8', 'uint16', 'uint32', 'uint64']:
+
+    if dtype.startswith('u'):
+        name = "U{}".format(dtype[1:].capitalize())
+    else:
+        name = dtype.capitalize()
+    classname = "{}Dtype".format(name)
+    attributes_dict = {'type': getattr(np, dtype),
+                       'name': name}
+    dtype_type = type(classname, (IntegerDtype, ), attributes_dict)
+    setattr(module, classname, dtype_type)
+
+    # register
+    registry.register(dtype_type)
+    _dtypes[dtype] = dtype_type()
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 0bc6ad8499934..c84ca88754e4f 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -651,7 +651,8 @@ def astype_nansafe(arr, dtype, copy=True):
 
     # dispatch on extension dtype if needed
     if is_extension_array_dtype(dtype):
-        return dtype.array_type._from_sequence(arr, copy=copy)
+        return dtype.construct_array_type()._from_sequence(
+            arr, dtype=dtype, copy=copy)
 
     if not isinstance(dtype, np.dtype):
         dtype = pandas_dtype(dtype)
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 285e386e25613..7abffb54e6a10 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -531,6 +531,7 @@ def _concat_index_asobject(to_concat, name=None):
 
     to_concat = [x._values if isinstance(x, Index) else x
                  for x in to_concat]
+
     return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs)
 
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 78fa6f8217157..8f5dbb6c8cf57 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -44,6 +44,7 @@
     is_datetime64_any_dtype,
     is_datetime64tz_dtype,
     is_timedelta64_dtype,
+    is_extension_array_dtype,
     is_hashable,
     needs_i8_conversion,
     is_iterator, is_list_like,
@@ -266,18 +267,32 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
                                          name=name)
 
         # categorical
-        if is_categorical_dtype(data) or is_categorical_dtype(dtype):
+        elif is_categorical_dtype(data) or is_categorical_dtype(dtype):
             from .category import CategoricalIndex
             return CategoricalIndex(data, dtype=dtype, copy=copy, name=name,
                                     **kwargs)
 
         # interval
-        if is_interval_dtype(data) or is_interval_dtype(dtype):
+        elif is_interval_dtype(data) or is_interval_dtype(dtype):
             from .interval import IntervalIndex
             closed = kwargs.get('closed', None)
             return IntervalIndex(data, dtype=dtype, name=name, copy=copy,
                                  closed=closed)
 
+        # extension dtype
+        elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype):
+            data = np.asarray(data)
+            if not (dtype is None or is_object_dtype(dtype)):
+
+                # coerce to the provided dtype
+                data = dtype.construct_array_type()(
+                    data, dtype=dtype, copy=False)
+
+            # coerce to the object dtype
+            data = data.astype(object)
+            return Index(data, dtype=object, copy=True, name=name,
+                         **kwargs)
+
         # index-like
         elif isinstance(data, (np.ndarray, Index, ABCSeries)):
 
@@ -1175,10 +1190,15 @@ def _to_embed(self, keep_tz=False, dtype=None):
     def astype(self, dtype, copy=True):
         if is_dtype_equal(self.dtype, dtype):
             return self.copy() if copy else self
+
         elif is_categorical_dtype(dtype):
             from .category import CategoricalIndex
             return CategoricalIndex(self.values, name=self.name, dtype=dtype,
                                     copy=copy)
+
+        elif is_extension_array_dtype(dtype):
+            return Index(np.asarray(self), dtype=dtype, copy=copy)
+
         try:
             if is_datetime64tz_dtype(dtype):
                 from pandas import DatetimeIndex
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
index e9b9a734ec5f5..f5fb0070ffc4b 100644
--- a/pandas/core/missing.py
+++ b/pandas/core/missing.py
@@ -638,7 +638,8 @@ def fill_zeros(result, x, y, name, fill):
             # if we have a fill of inf, then sign it correctly
             # (GH 6178 and PR 9308)
             if np.isinf(fill):
-                signs = np.sign(y if name.startswith(('r', '__r')) else x)
+                signs = y if name.startswith(('r', '__r')) else x
+                signs = np.sign(signs.astype('float', copy=False))
                 negative_inf_mask = (signs.ravel() < 0) & mask
                 np.putmask(result, negative_inf_mask, -fill)
 
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 1ddf77cf71a11..70f64e1aef9cf 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -135,6 +135,13 @@ def rfloordiv(left, right):
 
 
 def rmod(left, right):
+    # check if right is a string as % is the string
+    # formatting operation; this is a TypeError
+    # otherwise perform the op
+    if isinstance(right, compat.string_types):
+        raise TypeError("{typ} cannot perform the operation mod".format(
+            typ=type(left).__name__))
+
     return right % left
 
 
@@ -1018,7 +1025,7 @@ def _align_method_SERIES(left, right, align_asobject=False):
     return left, right
 
 
-def _construct_result(left, result, index, name, dtype):
+def _construct_result(left, result, index, name, dtype=None):
     """
     If the raw op result has a non-None name (e.g. it is an Index object) and
     the name argument is None, then passing name to the constructor will
@@ -1030,7 +1037,7 @@ def _construct_result(left, result, index, name, dtype):
     return out
 
 
-def _construct_divmod_result(left, result, index, name, dtype):
+def _construct_divmod_result(left, result, index, name, dtype=None):
     """divmod returns a tuple of like indexed series instead of a single series.
     """
     constructor = left._constructor
@@ -1048,16 +1055,33 @@ def dispatch_to_extension_op(op, left, right):
 
     # The op calls will raise TypeError if the op is not defined
     # on the ExtensionArray
+    # TODO(jreback)
+    # we need to listify to avoid ndarray, or non-same-type extension array
+    # dispatching
+
     if is_extension_array_dtype(left):
-        res_values = op(left.values, right)
+
+        new_left = left.values
+        if (isinstance(right, np.ndarray) or
+                (is_extension_array_dtype(right) and
+                 type(left) != type(right))):
+            new_right = list(right)
+        else:
+            new_right = right
+
     else:
-        # We know that left is not ExtensionArray and is Series and right is
-        # ExtensionArray.  Want to force ExtensionArray op to get called
-        res_values = op(list(left.values), right.values)
 
+        new_left = list(left.values)
+        new_right = right
+
+    res_values = op(new_left, new_right)
     res_name = get_op_result_name(left, right)
-    return left._constructor(res_values, index=left.index,
-                             name=res_name)
+
+    if op.__name__ == 'divmod':
+        return _construct_divmod_result(
+            left, res_values, left.index, res_name)
+
+    return _construct_result(left, res_values, left.index, res_name)
 
 
 def _arith_method_SERIES(cls, op, special):
@@ -1074,7 +1098,6 @@ def _arith_method_SERIES(cls, op, special):
 
     def na_op(x, y):
         import pandas.core.computation.expressions as expressions
-
         try:
             result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
         except TypeError:
@@ -1095,6 +1118,20 @@ def na_op(x, y):
         return result
 
     def safe_na_op(lvalues, rvalues):
+        """
+        return the result of evaluating na_op on the passed in values
+
+        try coercion to object type if the native types are not compatible
+
+        Parameters
+        ----------
+        lvalues : array-like
+        rvalues : array-like
+
+        Raises
+        ------
+        TypeError: invalid operation
+        """
         try:
             with np.errstate(all='ignore'):
                 return na_op(lvalues, rvalues)
@@ -1105,14 +1142,21 @@ def safe_na_op(lvalues, rvalues):
             raise
 
     def wrapper(left, right):
-
         if isinstance(right, ABCDataFrame):
             return NotImplemented
 
         left, right = _align_method_SERIES(left, right)
         res_name = get_op_result_name(left, right)
 
-        if is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
+        if is_categorical_dtype(left):
+            raise TypeError("{typ} cannot perform the operation "
+                            "{op}".format(typ=type(left).__name__, op=str_rep))
+
+        elif (is_extension_array_dtype(left) or
+                is_extension_array_dtype(right)):
+            return dispatch_to_extension_op(op, left, right)
+
+        elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
             result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex)
             return construct_result(left, result,
                                     index=left.index, name=res_name,
@@ -1124,15 +1168,6 @@ def wrapper(left, right):
                                     index=left.index, name=res_name,
                                     dtype=result.dtype)
 
-        elif is_categorical_dtype(left):
-            raise TypeError("{typ} cannot perform the operation "
-                            "{op}".format(typ=type(left).__name__, op=str_rep))
-
-        elif (is_extension_array_dtype(left) or
-              (is_extension_array_dtype(right) and
-               not is_categorical_dtype(right))):
-            return dispatch_to_extension_op(op, left, right)
-
         lvalues = left.values
         rvalues = right
         if isinstance(rvalues, ABCSeries):
@@ -1204,6 +1239,9 @@ def _comp_method_SERIES(cls, op, special):
     masker = _gen_eval_kwargs(op_name).get('masker', False)
 
     def na_op(x, y):
+        # TODO:
+        # should have guarantess on what x, y can be type-wise
+        # Extension Dtypes are not called here
 
         # dispatch to the categorical if we have a categorical
         # in either operand
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 0bdb9d9cc23a6..83580c86ccbfb 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -4094,7 +4094,7 @@ def _try_cast(arr, take_fast_path):
             elif is_extension_array_dtype(dtype):
                 # create an extension array from its dtype
                 array_type = dtype.construct_array_type()
-                subarr = array_type(subarr, copy=copy)
+                subarr = array_type(subarr, dtype=dtype, copy=copy)
 
             elif dtype is not None and raise_cast_failure:
                 raise
diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py
index 640b894e2245f..b6b81bb941a59 100644
--- a/pandas/tests/extension/base/__init__.py
+++ b/pandas/tests/extension/base/__init__.py
@@ -47,7 +47,7 @@ class TestMyDtype(BaseDtypeTests):
 from .groupby import BaseGroupbyTests  # noqa
 from .interface import BaseInterfaceTests  # noqa
 from .methods import BaseMethodsTests  # noqa
-from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests  # noqa
+from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests, BaseOpsUtil  # noqa
 from .missing import BaseMissingTests  # noqa
 from .reshaping import BaseReshapingTests  # noqa
 from .setitem import BaseSetitemTests  # noqa
diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
index e9df49780f119..886a0f66b5f66 100644
--- a/pandas/tests/extension/base/getitem.py
+++ b/pandas/tests/extension/base/getitem.py
@@ -226,12 +226,14 @@ def test_reindex(self, data, na_value):
         n = len(data)
         result = s.reindex([-1, 0, n])
         expected = pd.Series(
-            data._from_sequence([na_value, data[0], na_value]),
+            data._from_sequence([na_value, data[0], na_value],
+                                dtype=s.dtype),
             index=[-1, 0, n])
         self.assert_series_equal(result, expected)
 
         result = s.reindex([n, n + 1])
-        expected = pd.Series(data._from_sequence([na_value, na_value]),
+        expected = pd.Series(data._from_sequence([na_value, na_value],
+                                                 dtype=s.dtype),
                              index=[n, n + 1])
         self.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
index 659b9757ac1e3..f8d2f8314d2b1 100644
--- a/pandas/tests/extension/base/ops.py
+++ b/pandas/tests/extension/base/ops.py
@@ -3,10 +3,12 @@
 import operator
 
 import pandas as pd
+from pandas.core import ops
 from .base import BaseExtensionTests
 
 
 class BaseOpsUtil(BaseExtensionTests):
+
     def get_op_from_name(self, op_name):
         short_opname = op_name.strip('_')
         try:
@@ -32,6 +34,20 @@ def _check_op(self, s, op, other, exc=NotImplementedError):
             with pytest.raises(exc):
                 op(s, other)
 
+    def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
+        # divmod has multiple return values, so check separatly
+        if exc is None:
+            result_div, result_mod = op(s, other)
+            if op is divmod:
+                expected_div, expected_mod = s // other, s % other
+            else:
+                expected_div, expected_mod = other // s, other % s
+            self.assert_series_equal(result_div, expected_div)
+            self.assert_series_equal(result_mod, expected_mod)
+        else:
+            with pytest.raises(exc):
+                divmod(s, other)
+
 
 class BaseArithmeticOpsTests(BaseOpsUtil):
     """Various Series and DataFrame arithmetic ops methods."""
@@ -50,8 +66,8 @@ def test_arith_array(self, data, all_arithmetic_operators):
 
     def test_divmod(self, data):
         s = pd.Series(data)
-        self._check_op(s, divmod, 1, exc=TypeError)
-        self._check_op(1, divmod, s, exc=TypeError)
+        self._check_divmod_op(s, divmod, 1, exc=TypeError)
+        self._check_divmod_op(1, ops.rdivmod, s, exc=TypeError)
 
     def test_error(self, data, all_arithmetic_operators):
         # invalid ops
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index c83726c5278a5..0340289e0b674 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -82,7 +82,8 @@ def test_concat_columns(self, data, na_value):
         # non-aligned
         df2 = pd.DataFrame({'B': [1, 2, 3]}, index=[1, 2, 3])
         expected = pd.DataFrame({
-            'A': data._from_sequence(list(data[:3]) + [na_value]),
+            'A': data._from_sequence(list(data[:3]) + [na_value],
+                                     dtype=data.dtype),
             'B': [np.nan, 1, 2, 3]})
 
         result = pd.concat([df1, df2], axis=1)
@@ -96,8 +97,10 @@ def test_align(self, data, na_value):
         r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
 
         # Assumes that the ctor can take a list of scalars of the type
-        e1 = pd.Series(data._from_sequence(list(a) + [na_value]))
-        e2 = pd.Series(data._from_sequence([na_value] + list(b)))
+        e1 = pd.Series(data._from_sequence(list(a) + [na_value],
+                                           dtype=data.dtype))
+        e2 = pd.Series(data._from_sequence([na_value] + list(b),
+                                           dtype=data.dtype))
         self.assert_series_equal(r1, e1)
         self.assert_series_equal(r2, e2)
 
@@ -109,8 +112,10 @@ def test_align_frame(self, data, na_value):
         )
 
         # Assumes that the ctor can take a list of scalars of the type
-        e1 = pd.DataFrame({'A': data._from_sequence(list(a) + [na_value])})
-        e2 = pd.DataFrame({'A': data._from_sequence([na_value] + list(b))})
+        e1 = pd.DataFrame({'A': data._from_sequence(list(a) + [na_value],
+                                                    dtype=data.dtype)})
+        e2 = pd.DataFrame({'A': data._from_sequence([na_value] + list(b),
+                                                    dtype=data.dtype)})
         self.assert_frame_equal(r1, e1)
         self.assert_frame_equal(r2, e2)
 
@@ -120,7 +125,8 @@ def test_align_series_frame(self, data, na_value):
         df = pd.DataFrame({"col": np.arange(len(ser) + 1)})
         r1, r2 = ser.align(df)
 
-        e1 = pd.Series(data._from_sequence(list(data) + [na_value]),
+        e1 = pd.Series(data._from_sequence(list(data) + [na_value],
+                                           dtype=data.dtype),
                        name=ser.name)
 
         self.assert_series_equal(r1, e1)
@@ -153,7 +159,8 @@ def test_merge(self, data, na_value):
         res = pd.merge(df1, df2)
         exp = pd.DataFrame(
             {'int1': [1, 1, 2], 'int2': [1, 2, 3], 'key': [0, 0, 1],
-             'ext': data._from_sequence([data[0], data[0], data[1]])})
+             'ext': data._from_sequence([data[0], data[0], data[1]],
+                                        dtype=data.dtype)})
         self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
 
         res = pd.merge(df1, df2, how='outer')
@@ -161,5 +168,6 @@ def test_merge(self, data, na_value):
             {'int1': [1, 1, 2, 3, np.nan], 'int2': [1, 2, 3, np.nan, 4],
              'key': [0, 0, 1, 2, 3],
              'ext': data._from_sequence(
-                 [data[0], data[0], data[1], data[2], na_value])})
+                 [data[0], data[0], data[1], data[2], na_value],
+                 dtype=data.dtype)})
         self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index 33adebbbe5780..373da1af9ef24 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -1,6 +1,5 @@
 import decimal
 import numbers
-import random
 import sys
 
 import numpy as np
@@ -38,7 +37,7 @@ def construct_from_string(cls, string):
 class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin):
     dtype = DecimalDtype()
 
-    def __init__(self, values, copy=False):
+    def __init__(self, values, dtype=None, copy=False):
         for val in values:
             if not isinstance(val, self.dtype.type):
                 raise TypeError("All values must be of type " +
@@ -54,8 +53,8 @@ def __init__(self, values, copy=False):
         # self._values = self.values = self.data
 
     @classmethod
-    def _from_sequence(cls, scalars, copy=False):
-        return cls(scalars)
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        return cls(scalars, copy=copy)
 
     @classmethod
     def _from_factorized(cls, values, original):
@@ -117,7 +116,3 @@ def _concat_same_type(cls, to_concat):
 
 DecimalArray._add_arithmetic_ops()
 DecimalArray._add_comparison_ops()
-
-
-def make_data():
-    return [decimal.Decimal(random.random()) for _ in range(100)]
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 8fd3d1a57f6c8..b851ad1d06d4d 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -1,5 +1,6 @@
 import decimal
 
+import random
 import numpy as np
 import pandas as pd
 import pandas.util.testing as tm
@@ -7,7 +8,12 @@
 
 from pandas.tests.extension import base
 
-from .array import DecimalDtype, DecimalArray, make_data
+from .array import DecimalDtype, DecimalArray
+
+
+@pytest.fixture
+def make_data():
+    return [decimal.Decimal(random.random()) for _ in range(100)]
 
 
 @pytest.fixture
diff --git a/pandas/tests/extension/integer/__init__.py b/pandas/tests/extension/integer/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py
new file mode 100644
index 0000000000000..773581a826b8d
--- /dev/null
+++ b/pandas/tests/extension/integer/test_integer.py
@@ -0,0 +1,604 @@
+import numpy as np
+import pandas as pd
+import pandas.util.testing as tm
+import pytest
+
+from pandas.tests.extension import base
+from pandas.api.types import (
+    is_integer, is_scalar, is_float, is_float_dtype)
+from pandas.core.dtypes.generic import ABCIndexClass
+
+from pandas.core.arrays import (
+    to_integer_array, IntegerArray)
+from pandas.core.arrays.integer import (
+    Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
+    UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype)
+
+
+@pytest.fixture
+def make_data():
+    return (list(range(8)) +
+            [np.nan] +
+            list(range(10, 98)) +
+            [np.nan] +
+            [99, 100])
+
+
+@pytest.fixture(params=[Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
+                        UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype])
+def dtype(request):
+    return request.param()
+
+
+@pytest.fixture
+def data(dtype):
+    return IntegerArray(make_data(), dtype=dtype)
+
+
+@pytest.fixture
+def data_missing(dtype):
+    return IntegerArray([np.nan, 1], dtype=dtype)
+
+
+@pytest.fixture
+def data_repeated(data):
+    def gen(count):
+        for _ in range(count):
+            yield data
+    yield gen
+
+
+@pytest.fixture
+def data_for_sorting(dtype):
+    return IntegerArray([1, 2, 0], dtype=dtype)
+
+
+@pytest.fixture
+def data_missing_for_sorting(dtype):
+    return IntegerArray([1, np.nan, 0], dtype=dtype)
+
+
+@pytest.fixture
+def na_cmp():
+    # we are np.nan
+    return lambda x, y: np.isnan(x) and np.isnan(y)
+
+
+@pytest.fixture
+def na_value():
+    return np.nan
+
+
+@pytest.fixture
+def data_for_grouping(dtype):
+    b = 1
+    a = 0
+    c = 2
+    na = np.nan
+    return IntegerArray([b, b, na, na, a, a, b, c], dtype=dtype)
+
+
+def test_dtypes(dtype):
+    # smoke tests on auto dtype construction
+
+    if dtype.is_signed_integer:
+        assert np.dtype(dtype.type).kind == 'i'
+    else:
+        assert np.dtype(dtype.type).kind == 'u'
+    assert dtype.name is not None
+
+
+class BaseInteger(object):
+
+    def assert_index_equal(self, left, right, *args, **kwargs):
+
+        left_na = left.isna()
+        right_na = right.isna()
+
+        tm.assert_numpy_array_equal(left_na, right_na)
+        return tm.assert_index_equal(left[~left_na],
+                                     right[~right_na],
+                                     *args, **kwargs)
+
+    def assert_series_equal(self, left, right, *args, **kwargs):
+
+        left_na = left.isna()
+        right_na = right.isna()
+
+        tm.assert_series_equal(left_na, right_na)
+        return tm.assert_series_equal(left[~left_na],
+                                      right[~right_na],
+                                      *args, **kwargs)
+
+    def assert_frame_equal(self, left, right, *args, **kwargs):
+        # TODO(EA): select_dtypes
+        tm.assert_index_equal(
+            left.columns, right.columns,
+            exact=kwargs.get('check_column_type', 'equiv'),
+            check_names=kwargs.get('check_names', True),
+            check_exact=kwargs.get('check_exact', False),
+            check_categorical=kwargs.get('check_categorical', True),
+            obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame')))
+
+        integers = (left.dtypes == 'integer').index
+
+        for col in integers:
+            self.assert_series_equal(left[col], right[col],
+                                     *args, **kwargs)
+
+        left = left.drop(columns=integers)
+        right = right.drop(columns=integers)
+        tm.assert_frame_equal(left, right, *args, **kwargs)
+
+
+class TestDtype(BaseInteger, base.BaseDtypeTests):
+
+    @pytest.mark.skip(reason="using multiple dtypes")
+    def test_is_dtype_unboxes_dtype(self):
+        # we have multiple dtypes, so skip
+        pass
+
+    def test_array_type_with_arg(self, data, dtype):
+        assert dtype.construct_array_type() is IntegerArray
+
+
+class TestArithmeticOps(BaseInteger, base.BaseArithmeticOpsTests):
+
+    def _check_divmod_op(self, s, op, other, exc=None):
+        super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None)
+
+    def _check_op(self, s, op_name, other, exc=None):
+        op = self.get_op_from_name(op_name)
+        result = op(s, other)
+
+        # compute expected
+        mask = s.isna()
+
+        # other array is an Integer
+        if isinstance(other, IntegerArray):
+            omask = getattr(other, 'mask', None)
+            mask = getattr(other, 'data', other)
+            if omask is not None:
+                mask |= omask
+
+        # float result type or float op
+        if ((is_float_dtype(other) or is_float(other) or
+             op_name in ['__rtruediv__', '__truediv__',
+                         '__rdiv__', '__div__'])):
+            rs = s.astype('float')
+            expected = op(rs, other)
+            self._check_op_float(result, expected, mask, s, op_name, other)
+
+        # integer result type
+        else:
+            rs = pd.Series(s.values.data)
+            expected = op(rs, other)
+            self._check_op_integer(result, expected, mask, s, op_name, other)
+
+    def _check_op_float(self, result, expected, mask, s, op_name, other):
+        # check comparisions that are resulting in float dtypes
+
+        expected[mask] = np.nan
+        self.assert_series_equal(result, expected)
+
+    def _check_op_integer(self, result, expected, mask, s, op_name, other):
+        # check comparisions that are resulting in integer dtypes
+
+        # to compare properly, we convert the expected
+        # to float, mask to nans and convert infs
+        # if we have uints then we process as uints
+        # then conert to float
+        # and we ultimately want to create a IntArray
+        # for comparisons
+
+        fill_value = 0
+
+        # mod/rmod turn floating 0 into NaN while
+        # integer works as expected (no nan)
+        if op_name in ['__mod__', '__rmod__']:
+            if is_scalar(other):
+                if other == 0:
+                    expected[s.values == 0] = 0
+                else:
+                    expected = expected.fillna(0)
+            else:
+                expected[(s.values == 0) &
+                         ((expected == 0) | expected.isna())] = 0
+
+        try:
+            expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
+            original = expected
+            expected = expected.astype(s.dtype)
+
+        except ValueError:
+
+            expected = expected.astype(float)
+            expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
+            original = expected
+            expected = expected.astype(s.dtype)
+
+        expected[mask] = np.nan
+
+        # assert that the expected astype is ok
+        # (skip for unsigned as they have wrap around)
+        if not s.dtype.is_unsigned_integer:
+            original = pd.Series(original)
+
+            # we need to fill with 0's to emulate what an astype('int') does
+            # (truncation) for certain ops
+            if op_name in ['__rtruediv__', '__rdiv__']:
+                mask |= original.isna()
+                original = original.fillna(0).astype('int')
+
+            original = original.astype('float')
+            original[mask] = np.nan
+            self.assert_series_equal(original, expected.astype('float'))
+
+        # assert our expected result
+        self.assert_series_equal(result, expected)
+
+    def test_arith_integer_array(self, data, all_arithmetic_operators):
+        # we operate with a rhs of an integer array
+
+        op = all_arithmetic_operators
+
+        s = pd.Series(data)
+        rhs = pd.Series([1] * len(data), dtype=data.dtype)
+        rhs.iloc[-1] = np.nan
+
+        self._check_op(s, op, rhs)
+
+    def test_arith_scalar(self, data, all_arithmetic_operators):
+        # scalar
+        op = all_arithmetic_operators
+
+        s = pd.Series(data)
+        self._check_op(s, op, 1, exc=TypeError)
+
+    def test_arith_array(self, data, all_arithmetic_operators):
+        # ndarray & other series
+        op = all_arithmetic_operators
+
+        s = pd.Series(data)
+        other = np.ones(len(s), dtype=s.dtype.type)
+        self._check_op(s, op, other, exc=TypeError)
+
+    def test_arith_coerce_scalar(self, data, all_arithmetic_operators):
+
+        op = all_arithmetic_operators
+        s = pd.Series(data)
+
+        other = 0.01
+        self._check_op(s, op, other)
+
+    def test_error(self, data, all_arithmetic_operators):
+        # invalid ops
+
+        op = all_arithmetic_operators
+        s = pd.Series(data)
+        ops = getattr(s, op)
+        opa = getattr(data, op)
+
+        # invalid scalars
+        with pytest.raises(TypeError):
+            ops('foo')
+        with pytest.raises(TypeError):
+            ops(pd.Timestamp('20180101'))
+
+        # invalid array-likes
+        with pytest.raises(TypeError):
+            ops(pd.Series('foo', index=s.index))
+
+        if op != '__rpow__':
+            # TODO(extension)
+            # rpow with a datetimelike coerces the integer array incorrectly
+            with pytest.raises(TypeError):
+                ops(pd.Series(pd.date_range('20180101', periods=len(s))))
+
+        # 2d
+        with pytest.raises(TypeError):
+            opa(pd.DataFrame({'A': s}))
+        with pytest.raises(TypeError):
+            opa(np.arange(len(s)).reshape(-1, len(s)))
+
+
+class TestComparisonOps(BaseInteger, base.BaseComparisonOpsTests):
+
+    def _compare_other(self, s, data, op_name, other):
+        op = self.get_op_from_name(op_name)
+
+        # array
+        result = op(s, other)
+        expected = pd.Series(op(data.data, other))
+
+        # fill the nan locations
+        expected[data.mask] = True if op_name == '__ne__' else False
+
+        tm.assert_series_equal(result, expected)
+
+        # series
+        s = pd.Series(data)
+        result = op(s, other)
+
+        expected = pd.Series(data.data)
+        expected = op(expected, other)
+
+        # fill the nan locations
+        expected[data.mask] = True if op_name == '__ne__' else False
+
+        tm.assert_series_equal(result, expected)
+
+
+class TestInterface(BaseInteger, base.BaseInterfaceTests):
+
+    def test_repr_array(self, data):
+        result = repr(data)
+
+        # not long
+        assert '...' not in result
+
+        assert 'dtype=' in result
+        assert 'IntegerArray' in result
+
+    def test_repr_array_long(self, data):
+        # some arrays may be able to assert a ... in the repr
+        with pd.option_context('display.max_seq_items', 1):
+            result = repr(data)
+
+            assert '...' in result
+            assert 'length' in result
+
+
+class TestConstructors(BaseInteger, base.BaseConstructorsTests):
+
+    def test_from_dtype_from_float(self, data):
+        # construct from our dtype & string dtype
+        dtype = data.dtype
+
+        # from float
+        expected = pd.Series(data)
+        result = pd.Series(np.array(data).astype('float'), dtype=str(dtype))
+        self.assert_series_equal(result, expected)
+
+        # from int / list
+        expected = pd.Series(data)
+        result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
+        self.assert_series_equal(result, expected)
+
+        # from int / array
+        expected = pd.Series(data).dropna().reset_index(drop=True)
+        dropped = np.array(data.dropna()).astype(np.dtype((dtype.type)))
+        result = pd.Series(dropped, dtype=str(dtype))
+        self.assert_series_equal(result, expected)
+
+
+class TestReshaping(BaseInteger, base.BaseReshapingTests):
+
+    def test_concat_mixed_dtypes(self, data):
+        # https://github.com/pandas-dev/pandas/issues/20762
+        df1 = pd.DataFrame({'A': data[:3]})
+        df2 = pd.DataFrame({"A": [1, 2, 3]})
+        df3 = pd.DataFrame({"A": ['a', 'b', 'c']}).astype('category')
+        df4 = pd.DataFrame({"A": pd.SparseArray([1, 2, 3])})
+        dfs = [df1, df2, df3, df4]
+
+        # dataframes
+        result = pd.concat(dfs)
+        expected = pd.concat([x.astype(object) for x in dfs])
+        self.assert_frame_equal(result, expected)
+
+        # series
+        result = pd.concat([x['A'] for x in dfs])
+        expected = pd.concat([x['A'].astype(object) for x in dfs])
+        self.assert_series_equal(result, expected)
+
+        result = pd.concat([df1, df2])
+        expected = pd.concat([df1.astype('object'), df2.astype('object')])
+        self.assert_frame_equal(result, expected)
+
+        # concat of an Integer and Int coerces to object dtype
+        # TODO(jreback) once integrated this would
+        # be a result of Integer
+        result = pd.concat([df1['A'], df2['A']])
+        expected = pd.concat([df1['A'].astype('object'),
+                              df2['A'].astype('object')])
+        self.assert_series_equal(result, expected)
+
+
+class TestGetitem(BaseInteger, base.BaseGetitemTests):
+    pass
+
+
+class TestMissing(BaseInteger, base.BaseMissingTests):
+    pass
+
+
+class TestMethods(BaseInteger, base.BaseMethodsTests):
+
+    @pytest.mark.parametrize('dropna', [True, False])
+    def test_value_counts(self, all_data, dropna):
+        all_data = all_data[:10]
+        if dropna:
+            other = np.array(all_data[~all_data.isna()])
+        else:
+            other = all_data
+
+        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
+        expected = pd.Series(other).value_counts(
+            dropna=dropna).sort_index()
+        expected.index = expected.index.astype(all_data.dtype)
+
+        self.assert_series_equal(result, expected)
+
+    def test_combine_add(self, data_repeated):
+        # GH 20825
+        orig_data1, orig_data2 = data_repeated(2)
+        s1 = pd.Series(orig_data1)
+        s2 = pd.Series(orig_data2)
+
+        # fundamentally this is not a great operation
+        # as overflow / underflow can easily happen here
+        # e.g. int8 + int8
+        def scalar_add(a, b):
+
+            # TODO; should really be a type specific NA
+            if pd.isna(a) or pd.isna(b):
+                return np.nan
+            if is_integer(a):
+                a = int(a)
+            elif is_integer(b):
+                b = int(b)
+            return a + b
+
+        result = s1.combine(s2, scalar_add)
+        expected = pd.Series(
+            orig_data1._from_sequence([scalar_add(a, b) for (a, b) in
+                                       zip(orig_data1,
+                                           orig_data2)]))
+        self.assert_series_equal(result, expected)
+
+        val = s1.iloc[0]
+        result = s1.combine(val, lambda x1, x2: x1 + x2)
+        expected = pd.Series(
+            orig_data1._from_sequence([a + val for a in list(orig_data1)]))
+        self.assert_series_equal(result, expected)
+
+
+class TestCasting(BaseInteger, base.BaseCastingTests):
+
+    @pytest.mark.parametrize('dropna', [True, False])
+    def test_construct_index(self, all_data, dropna):
+        # ensure that we do not coerce to Float64Index, rather
+        # keep as Index
+
+        all_data = all_data[:10]
+        if dropna:
+            other = np.array(all_data[~all_data.isna()])
+        else:
+            other = all_data
+
+        result = pd.Index(IntegerArray(other,
+                                       dtype=all_data.dtype))
+        expected = pd.Index(other, dtype=object)
+
+        self.assert_index_equal(result, expected)
+
+    @pytest.mark.parametrize('dropna', [True, False])
+    def test_astype_index(self, all_data, dropna):
+        # as an int/uint index to Index
+
+        all_data = all_data[:10]
+        if dropna:
+            other = all_data[~all_data.isna()]
+        else:
+            other = all_data
+
+        dtype = all_data.dtype
+        idx = pd.Index(np.array(other))
+        assert isinstance(idx, ABCIndexClass)
+
+        result = idx.astype(dtype)
+        expected = idx.astype(object).astype(dtype)
+        self.assert_index_equal(result, expected)
+
+
+class TestGroupby(BaseInteger, base.BaseGroupbyTests):
+
+    @pytest.mark.xfail(reason="groupby not working")
+    def test_groupby_extension_no_sort(self, data_for_grouping):
+        super(TestGroupby, self).test_groupby_extension_no_sort(
+            data_for_grouping)
+
+    @pytest.mark.xfail(reason="groupby not working")
+    @pytest.mark.parametrize('as_index', [True, False])
+    def test_groupby_extension_agg(self, as_index, data_for_grouping):
+        super(TestGroupby, self).test_groupby_extension_agg(
+            as_index, data_for_grouping)
+
+
+def test_frame_repr(data_missing):
+
+    df = pd.DataFrame({'A': data_missing})
+    result = repr(df)
+    expected = '     A\n0  NaN\n1    1'
+    assert result == expected
+
+
+def test_conversions(data_missing):
+
+    # astype to object series
+    df = pd.DataFrame({'A': data_missing})
+    result = df['A'].astype('object')
+    expected = pd.Series(np.array([np.nan, 1], dtype=object), name='A')
+    tm.assert_series_equal(result, expected)
+
+    # convert to object ndarray
+    # we assert that we are exactly equal
+    # including type conversions of scalars
+    result = df['A'].astype('object').values
+    expected = np.array([np.nan, 1], dtype=object)
+    tm.assert_numpy_array_equal(result, expected)
+
+    for r, e in zip(result, expected):
+        if pd.isnull(r):
+            assert pd.isnull(e)
+        elif is_integer(r):
+            # PY2 can be int or long
+            assert r == e
+            assert is_integer(e)
+        else:
+            assert r == e
+            assert type(r) == type(e)
+
+
+@pytest.mark.parametrize(
+    'values',
+    [
+        ['foo', 'bar'],
+        'foo',
+        1,
+        1.0,
+        pd.date_range('20130101', periods=2),
+        np.array(['foo'])])
+def test_to_integer_array_error(values):
+    # error in converting existing arrays to IntegerArrays
+    with pytest.raises(TypeError):
+        to_integer_array(values)
+
+
+@pytest.mark.parametrize(
+    'values, dtype',
+    [
+        (np.array([1], dtype='int64'), Int64Dtype),
+        (np.array([1, np.nan]), Int64Dtype)])
+def test_to_integer_array(values, dtype):
+    # convert existing arrays to IntegerArrays
+    result = to_integer_array(values)
+    expected = IntegerArray(values, dtype=dtype)
+    tm.assert_extension_array_equal(result, expected)
+
+
+def test_cross_type_arithmetic():
+
+    df = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
+                       'B': pd.Series([1, np.nan, 3], dtype='UInt8'),
+                       'C': [1, 2, 3]})
+
+    result = df.A + df.C
+    expected = pd.Series([2, 4, np.nan], dtype='Int64')
+    tm.assert_series_equal(result, expected)
+
+    result = (df.A + df.C) * 3 == 12
+    expected = pd.Series([False, True, False])
+    tm.assert_series_equal(result, expected)
+
+    result = df.A + df.B
+    expected = pd.Series([2, np.nan, np.nan], dtype='Int64')
+    tm.assert_series_equal(result, expected)
+
+
+# TODO(jreback) - these need testing / are broken
+
+# shift
+
+# set_index (destroys type)
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index 160bf259e1e32..34c397252a8bb 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -13,8 +13,6 @@
 import collections
 import itertools
 import numbers
-import random
-import string
 import sys
 
 import numpy as np
@@ -54,7 +52,7 @@ def construct_from_string(cls, string):
 class JSONArray(ExtensionArray):
     dtype = JSONDtype()
 
-    def __init__(self, values, copy=False):
+    def __init__(self, values, dtype=None, copy=False):
         for val in values:
             if not isinstance(val, self.dtype.type):
                 raise TypeError("All values must be of type " +
@@ -69,7 +67,7 @@ def __init__(self, values, copy=False):
         # self._values = self.values = self.data
 
     @classmethod
-    def _from_sequence(cls, scalars, copy=False):
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
         return cls(scalars)
 
     @classmethod
@@ -180,10 +178,3 @@ def _values_for_argsort(self):
         # cast them to an (N, P) array, instead of an (N,) array of tuples.
         frozen = [()] + list(tuple(x.items()) for x in self)
         return np.array(frozen, dtype=object)[1:]
-
-
-def make_data():
-    # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
-    return [collections.UserDict([
-        (random.choice(string.ascii_letters), random.randint(0, 100))
-        for _ in range(random.randint(0, 10))]) for _ in range(100)]
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index 7eeaf7946663e..126607907a8da 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -1,5 +1,7 @@
 import operator
 import collections
+import random
+import string
 
 import pytest
 
@@ -8,11 +10,19 @@
 from pandas.compat import PY2, PY36
 from pandas.tests.extension import base
 
-from .array import JSONArray, JSONDtype, make_data
+from .array import JSONArray, JSONDtype
 
 pytestmark = pytest.mark.skipif(PY2, reason="Py2 doesn't have a UserDict")
 
 
+@pytest.fixture
+def make_data():
+    # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
+    return [collections.UserDict([
+        (random.choice(string.ascii_letters), random.randint(0, 100))
+        for _ in range(random.randint(0, 10))]) for _ in range(100)]
+
+
 @pytest.fixture
 def dtype():
     return JSONDtype()
@@ -203,7 +213,8 @@ def test_combine_add(self, data_repeated):
 
 
 class TestCasting(BaseJSON, base.BaseCastingTests):
-    @pytest.mark.xfail
+
+    @pytest.mark.xfail(reason="failing on np.array(self, dtype=str)")
     def test_astype_str(self):
         """This currently fails in NumPy on np.array(self, dtype=str) with
 

From 4faa4c6a20db3c35d487b462f85c025a9a6f3ea0 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 7 Jul 2018 10:39:31 -0500
Subject: [PATCH 02/20] update for review comments

---
 pandas/core/arrays/integer.py                  | 9 +++++----
 pandas/tests/extension/integer/test_integer.py | 4 ++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index a7c0518bb4ed9..0e59828727e63 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -311,7 +311,7 @@ def _concat_same_type(cls, to_concat):
         return cls(data, mask=mask, dtype=to_concat[0].dtype)
 
     def astype(self, dtype, copy=True):
-        """Cast to a NumPy array with 'dtype'.
+        """Cast to a NumPy array or IntegerArray with 'dtype'.
 
         Parameters
         ----------
@@ -324,8 +324,8 @@ def astype(self, dtype, copy=True):
 
         Returns
         -------
-        array : ndarray
-            NumPy ndarray with 'dtype' for its dtype.
+        array : ndarray or IntegerArray
+            NumPy ndarray or IntergerArray with 'dtype' for its dtype.
 
         Raises
         ------
@@ -502,7 +502,8 @@ def integer_arithmetic_method(self, other):
             if isinstance(other, IntegerArray):
                 other, mask = other.data, other.mask
             elif getattr(other, 'ndim', 0) > 1:
-                raise TypeError("can only perform ops with 1-d structures")
+                raise NotImplementedError(
+                    "can only perform ops with 1-d structures")
             elif is_list_like(other):
                 other = np.asarray(other)
                 if not other.ndim:
diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py
index 773581a826b8d..a8bfda82f0211 100644
--- a/pandas/tests/extension/integer/test_integer.py
+++ b/pandas/tests/extension/integer/test_integer.py
@@ -296,9 +296,9 @@ def test_error(self, data, all_arithmetic_operators):
                 ops(pd.Series(pd.date_range('20180101', periods=len(s))))
 
         # 2d
-        with pytest.raises(TypeError):
+        with pytest.raises(NotImplementedError):
             opa(pd.DataFrame({'A': s}))
-        with pytest.raises(TypeError):
+        with pytest.raises(NotImplementedError):
             opa(np.arange(len(s)).reshape(-1, len(s)))
 
 

From 712b52d2fff17ccd1bb56bb37ffd0d86c09d8d98 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 7 Jul 2018 14:12:57 -0500
Subject: [PATCH 03/20] update docs of IntegerDtype remove mask arg from
 _from_sqequence in IntegerArray

---
 pandas/core/arrays/integer.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 0e59828727e63..c4db155447a5c 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -22,7 +22,16 @@
     format_object_summary, format_object_attrs, default_pprint)
 
 
-class IntegerDtype(ExtensionDtype):
+class _IntegerDtype(ExtensionDtype):
+    """
+    An ExtensionDtype to hold a single size & kind of integer dtype.
+
+    These specific implementations are subclasses of the non-public
+    _IntegerDtype. For example we have Int8Dtype to represnt signed int 8s.
+
+    The attributes name & type are set when these subclasses are created.
+    """
+    name = None
     type = None
     na_value = np.nan
 
@@ -177,8 +186,8 @@ def __init__(self, values, mask=None, dtype=None, copy=False):
             values, dtype=dtype, mask=mask, copy=copy)
 
     @classmethod
-    def _from_sequence(cls, scalars, mask=None, dtype=None, copy=False):
-        return cls(scalars, mask=mask, dtype=dtype, copy=copy)
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        return cls(scalars, dtype=dtype, copy=copy)
 
     @classmethod
     def _from_factorized(cls, values, original):
@@ -335,7 +344,7 @@ def astype(self, dtype, copy=True):
         """
 
         # if we are astyping to an existing IntegerDtype we can fastpath
-        if isinstance(dtype, IntegerDtype):
+        if isinstance(dtype, _IntegerDtype):
             result = self.data.astype(dtype.numpy_dtype,
                                       casting='same_kind', copy=False)
             return type(self)(result, mask=self.mask,
@@ -556,7 +565,7 @@ def integer_arithmetic_method(self, other):
     classname = "{}Dtype".format(name)
     attributes_dict = {'type': getattr(np, dtype),
                        'name': name}
-    dtype_type = type(classname, (IntegerDtype, ), attributes_dict)
+    dtype_type = type(classname, (_IntegerDtype, ), attributes_dict)
     setattr(module, classname, dtype_type)
 
     # register

From 74f392a29baccceddc88ceedffb1f00dec6da56c Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 7 Jul 2018 15:28:03 -0500
Subject: [PATCH 04/20] review comments

---
 pandas/core/arrays/base.py    |  3 ++-
 pandas/core/arrays/integer.py | 12 ++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index c0697dd29e4d0..139124e47ed45 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -97,7 +97,8 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
             Each element will be an instance of the scalar type for this
             array, ``cls.dtype.type``.
         dtype : Dtype, optional
-            consruct for this particular dtype
+            construct for this particular dtype. This should be a Dtype
+            compatible with the ExtensionArray.
         copy : boolean, default False
             if True, copy the underlying data
         Returns
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index c4db155447a5c..aff17a4a86dd9 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -182,6 +182,18 @@ def dtype(self):
         return _dtypes[str(self.data.dtype)]
 
     def __init__(self, values, mask=None, dtype=None, copy=False):
+        """
+        Parameters
+        ----------
+        values : 1D list-like / IntegerArray
+        mask : 1D list-like, optional
+        dtype : subclass of _IntegerDtype, optional
+        copy : bool, default False
+
+        Returns
+        -------
+        IntegerArray
+        """
         self.data, self.mask = coerce_to_array(
             values, dtype=dtype, mask=mask, copy=copy)
 

From 3889feb29647e0b9bac02b3f42bc79b9959d4ed0 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 7 Jul 2018 15:44:09 -0500
Subject: [PATCH 05/20] make data & mask private attributes

---
 pandas/core/arrays/integer.py                 | 74 +++++++++----------
 .../tests/extension/integer/test_integer.py   | 10 +--
 2 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index aff17a4a86dd9..0bdadbd089e93 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -117,7 +117,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
     """
 
     if isinstance(values, IntegerArray):
-        values, mask = values.data, values.mask
+        values, mask = values._data, values._mask
         if copy:
             values = values.copy()
             mask = mask.copy()
@@ -179,7 +179,7 @@ class IntegerArray(ExtensionArray, ExtensionOpsMixin):
 
     @cache_readonly
     def dtype(self):
-        return _dtypes[str(self.data.dtype)]
+        return _dtypes[str(self._data.dtype)]
 
     def __init__(self, values, mask=None, dtype=None, copy=False):
         """
@@ -194,7 +194,7 @@ def __init__(self, values, mask=None, dtype=None, copy=False):
         -------
         IntegerArray
         """
-        self.data, self.mask = coerce_to_array(
+        self._data, self._mask = coerce_to_array(
             values, dtype=dtype, mask=mask, copy=copy)
 
     @classmethod
@@ -207,19 +207,19 @@ def _from_factorized(cls, values, original):
 
     def __getitem__(self, item):
         if is_integer(item):
-            if self.mask[item]:
+            if self._mask[item]:
                 return self.dtype.na_value
-            return self.data[item]
-        return type(self)(self.data[item],
-                          mask=self.mask[item],
+            return self._data[item]
+        return type(self)(self._data[item],
+                          mask=self._mask[item],
                           dtype=self.dtype)
 
     def _coerce_to_ndarray(self):
         """ coerce to an ndarary, preserving my scalar types """
 
         # TODO(jreback) make this better
-        data = self.data.astype(object)
-        data[self.mask] = self._na_value
+        data = self._data.astype(object)
+        data[self._mask] = self._na_value
         return data
 
     def __array__(self, dtype=None):
@@ -237,10 +237,10 @@ def __iter__(self):
         # arrays as list-like. The default implementation makes successive
         # calls to ``__getitem__``, which may be slower than necessary.
         for i in range(len(self)):
-            if self.mask[i]:
+            if self._mask[i]:
                 yield self.dtype.na_value
             else:
-                yield self.data[i]
+                yield self._data[i]
 
     def _formatting_values(self):
         # type: () -> np.ndarray
@@ -252,10 +252,10 @@ def take(self, indexer, allow_fill=False, fill_value=None):
         # we always fill with 1 internally
         # to avoid upcasting
         data_fill_value = 1 if isna(fill_value) else fill_value
-        result = take(self.data, indexer, fill_value=data_fill_value,
+        result = take(self._data, indexer, fill_value=data_fill_value,
                       allow_fill=allow_fill)
 
-        mask = take(self.mask, indexer, fill_value=True,
+        mask = take(self._mask, indexer, fill_value=True,
                     allow_fill=allow_fill)
 
         # if we are filling
@@ -270,7 +270,7 @@ def take(self, indexer, allow_fill=False, fill_value=None):
         return type(self)(result, mask=mask, dtype=self.dtype)
 
     def copy(self, deep=False):
-        data, mask = self.data, self.mask
+        data, mask = self._data, self._mask
         if deep:
             data = copy.deepcopy(data)
             mask = copy.deepcopy(mask)
@@ -289,11 +289,11 @@ def __setitem__(self, key, value):
             value = value[0]
             mask = mask[0]
 
-        self.data[key] = value
-        self.mask[key] = mask
+        self._data[key] = value
+        self._mask[key] = mask
 
     def __len__(self):
-        return len(self.data)
+        return len(self._data)
 
     def __repr__(self):
         """
@@ -316,10 +316,10 @@ def __repr__(self):
 
     @property
     def nbytes(self):
-        return self.data.nbytes + self.mask.nbytes
+        return self._data.nbytes + self._mask.nbytes
 
     def isna(self):
-        return self.mask
+        return self._mask
 
     @property
     def _na_value(self):
@@ -327,8 +327,8 @@ def _na_value(self):
 
     @classmethod
     def _concat_same_type(cls, to_concat):
-        data = np.concatenate([x.data for x in to_concat])
-        mask = np.concatenate([x.mask for x in to_concat])
+        data = np.concatenate([x._data for x in to_concat])
+        mask = np.concatenate([x._mask for x in to_concat])
         return cls(data, mask=mask, dtype=to_concat[0].dtype)
 
     def astype(self, dtype, copy=True):
@@ -357,9 +357,9 @@ def astype(self, dtype, copy=True):
 
         # if we are astyping to an existing IntegerDtype we can fastpath
         if isinstance(dtype, _IntegerDtype):
-            result = self.data.astype(dtype.numpy_dtype,
-                                      casting='same_kind', copy=False)
-            return type(self)(result, mask=self.mask,
+            result = self._data.astype(dtype.numpy_dtype,
+                                       casting='same_kind', copy=False)
+            return type(self)(result, mask=self._mask,
                               dtype=dtype, copy=False)
 
         # coerce
@@ -376,7 +376,7 @@ def _ndarray_values(self):
         The expectation is that this is cheap to compute, and is primarily
         used for interacting with our indexers.
         """
-        return self.data
+        return self._data
 
     def value_counts(self, dropna=True):
         """
@@ -402,7 +402,7 @@ def value_counts(self, dropna=True):
         from pandas import Index, Series
 
         # compute counts on the data with no nans
-        data = self.data[~self.mask]
+        data = self._data[~self._mask]
         value_counts = Index(data).value_counts()
         array = value_counts.values
 
@@ -417,7 +417,7 @@ def value_counts(self, dropna=True):
             # TODO(extension)
             # appending to an Index *always* infers
             # w/o passing the dtype
-            array = np.append(array, [self.mask.sum()])
+            array = np.append(array, [self._mask.sum()])
             index = Index(np.concatenate(
                 [index.values,
                  np.array([np.nan], dtype=object)]), dtype=object)
@@ -438,8 +438,8 @@ def _values_for_argsort(self):
         --------
         ExtensionArray.argsort
         """
-        data = self.data.copy()
-        data[self.mask] = data.min() - 1
+        data = self._data.copy()
+        data[self._mask] = data.min() - 1
         return data
 
     @classmethod
@@ -449,7 +449,7 @@ def cmp_method(self, other):
             op_name = op.__name__
             mask = None
             if isinstance(other, IntegerArray):
-                other, mask = other.data, other.mask
+                other, mask = other._data, other._mask
             elif is_list_like(other):
                 other = np.asarray(other)
                 if other.ndim > 0 and len(self) != len(other):
@@ -459,13 +459,13 @@ def cmp_method(self, other):
             # comparisons, this will raise in the future
             with warnings.catch_warnings(record=True):
                 with np.errstate(all='ignore'):
-                    result = op(self.data, other)
+                    result = op(self._data, other)
 
             # nans propagate
             if mask is None:
-                mask = self.mask
+                mask = self._mask
             else:
-                mask = self.mask | mask
+                mask = self._mask | mask
 
             result[mask] = True if op_name == 'ne' else False
             return result
@@ -521,7 +521,7 @@ def integer_arithmetic_method(self, other):
                 other = getattr(other, 'values', other)
 
             if isinstance(other, IntegerArray):
-                other, mask = other.data, other.mask
+                other, mask = other._data, other._mask
             elif getattr(other, 'ndim', 0) > 1:
                 raise NotImplementedError(
                     "can only perform ops with 1-d structures")
@@ -539,12 +539,12 @@ def integer_arithmetic_method(self, other):
 
             # nans propagate
             if mask is None:
-                mask = self.mask
+                mask = self._mask
             else:
-                mask = self.mask | mask
+                mask = self._mask | mask
 
             with np.errstate(all='ignore'):
-                result = op(self.data, other)
+                result = op(self._data, other)
 
             # divmod returns a tuple
             if op_name == 'divmod':
diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py
index a8bfda82f0211..e98e559858e2b 100644
--- a/pandas/tests/extension/integer/test_integer.py
+++ b/pandas/tests/extension/integer/test_integer.py
@@ -171,7 +171,7 @@ def _check_op(self, s, op_name, other, exc=None):
 
         # integer result type
         else:
-            rs = pd.Series(s.values.data)
+            rs = pd.Series(s.values._data)
             expected = op(rs, other)
             self._check_op_integer(result, expected, mask, s, op_name, other)
 
@@ -309,10 +309,10 @@ def _compare_other(self, s, data, op_name, other):
 
         # array
         result = op(s, other)
-        expected = pd.Series(op(data.data, other))
+        expected = pd.Series(op(data._data, other))
 
         # fill the nan locations
-        expected[data.mask] = True if op_name == '__ne__' else False
+        expected[data._mask] = True if op_name == '__ne__' else False
 
         tm.assert_series_equal(result, expected)
 
@@ -320,11 +320,11 @@ def _compare_other(self, s, data, op_name, other):
         s = pd.Series(data)
         result = op(s, other)
 
-        expected = pd.Series(data.data)
+        expected = pd.Series(data._data)
         expected = op(expected, other)
 
         # fill the nan locations
-        expected[data.mask] = True if op_name == '__ne__' else False
+        expected[data._mask] = True if op_name == '__ne__' else False
 
         tm.assert_series_equal(result, expected)
 

From e5b86415d9ef2ab87de2838d71029e5bb0f4b995 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 7 Jul 2018 16:22:56 -0500
Subject: [PATCH 06/20] add dtype to to_integer_array

---
 pandas/core/arrays/integer.py                 | 21 +++++++++++--------
 .../tests/extension/integer/test_integer.py   | 13 ++++++------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 0bdadbd089e93..e1dd98cfb4754 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -74,11 +74,13 @@ def construct_from_string(cls, string):
                         "'{}'".format(cls, string))
 
 
-def to_integer_array(values):
+def to_integer_array(values, dtype=None):
     """
     Parameters
     ----------
     values : 1D list-like
+    dtype : dtype, optional
+        dtype to coerce
 
     Returns
     -------
@@ -88,14 +90,6 @@ def to_integer_array(values):
     ------
     TypeError if incompatible types
     """
-    values = np.array(values, copy=False)
-    try:
-        dtype = _dtypes[str(values.dtype)]
-    except KeyError:
-        if is_float_dtype(values):
-            return IntegerArray(values)
-
-        raise TypeError("Incompatible dtype for {}".format(values.dtype))
     return IntegerArray(values, dtype=dtype, copy=False)
 
 
@@ -115,9 +109,18 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
     -------
     tuple of (values, mask)
     """
+    if dtype is not None:
+        if not issubclass(type(dtype), _IntegerDtype):
+            try:
+                dtype = _dtypes[str(np.dtype(dtype))]
+            except KeyError:
+                raise ValueError("invalid dtype specified {}".format(dtype))
 
     if isinstance(values, IntegerArray):
         values, mask = values._data, values._mask
+        if dtype is not None:
+            values = values.astype(dtype.numpy_dtype, copy=False)
+
         if copy:
             values = values.copy()
             mask = mask.copy()
diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py
index e98e559858e2b..65a40ecfcf352 100644
--- a/pandas/tests/extension/integer/test_integer.py
+++ b/pandas/tests/extension/integer/test_integer.py
@@ -567,14 +567,15 @@ def test_to_integer_array_error(values):
 
 
 @pytest.mark.parametrize(
-    'values, dtype',
+    'values, to_dtype, result_dtype',
     [
-        (np.array([1], dtype='int64'), Int64Dtype),
-        (np.array([1, np.nan]), Int64Dtype)])
-def test_to_integer_array(values, dtype):
+        (np.array([1], dtype='int64'), None, Int64Dtype),
+        (np.array([1, np.nan]), None, Int64Dtype),
+        (np.array([1, np.nan]), 'int8', Int8Dtype)])
+def test_to_integer_array(values, to_dtype, result_dtype):
     # convert existing arrays to IntegerArrays
-    result = to_integer_array(values)
-    expected = IntegerArray(values, dtype=dtype)
+    result = to_integer_array(values, dtype=to_dtype)
+    expected = IntegerArray(values, dtype=result_dtype())
     tm.assert_extension_array_equal(result, expected)
 
 

From d073e57765efc0caf7bfcc3e65844ae44ad38ee5 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 7 Jul 2018 16:48:00 -0500
Subject: [PATCH 07/20] remove uneeded code & copies

---
 pandas/core/arrays/integer.py | 6 +-----
 pandas/core/indexes/base.py   | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index e1dd98cfb4754..4202411306111 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -148,10 +148,6 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
     if not mask.ndim == 1:
         raise TypeError("mask must be a 1D list-like")
 
-    # avoid float->int numpy conversion issues
-    if is_object_dtype(values):
-        mask |= isna(values)
-
     # infer dtype if needed
     if dtype is None:
         if is_integer_dtype(values):
@@ -270,7 +266,7 @@ def take(self, indexer, allow_fill=False, fill_value=None):
             result[fill_mask] = fill_value
             mask = mask ^ fill_mask
 
-        return type(self)(result, mask=mask, dtype=self.dtype)
+        return type(self)(result, mask=mask, dtype=self.dtype, copy=False)
 
     def copy(self, deep=False):
         data, mask = self._data, self._mask
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 8f5dbb6c8cf57..11cc92773f793 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -290,7 +290,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
 
             # coerce to the object dtype
             data = data.astype(object)
-            return Index(data, dtype=object, copy=True, name=name,
+            return Index(data, dtype=object, copy=copy, name=name,
                          **kwargs)
 
         # index-like

From 2f0818181659e07ba4df61508c025a2e64251f64 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 7 Jul 2018 17:31:15 -0500
Subject: [PATCH 08/20] handle numpy scalars & more tests

---
 pandas/core/arrays/integer.py                 | 24 ++++---------------
 pandas/core/ops.py                            | 16 +++++++++----
 .../tests/extension/integer/test_integer.py   | 10 ++++++++
 3 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 4202411306111..51dafabc943e7 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -16,7 +16,6 @@
 from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.dtypes import registry
 from pandas.core.dtypes.missing import isna, notna
-from pandas.core.dtypes.cast import maybe_downcast_to_dtype
 
 from pandas.io.formats.printing import (
     format_object_summary, format_object_attrs, default_pprint)
@@ -487,27 +486,14 @@ def _maybe_mask_result(self, result, mask, other, op_name):
         if is_float_dtype(result):
             mask |= (result == np.inf) | (result == -np.inf)
 
-        # floor div can be a float or an integer dependending
-        # on the operands
-        if (op_name in ['rfloordiv', 'floordiv'] and
-                (is_float_dtype(other) or is_float(other))):
+        # if we have a float operand we are by-definition
+        # a float result
+        # or our op is a divide
+        if ((is_float_dtype(other) or is_float(other)) or
+                (op_name in ['rtruediv', 'truediv', 'rdiv', 'div'])):
             result[mask] = np.nan
             return result
 
-        # by definition a float result
-        elif op_name in ['rtruediv', 'truediv', 'rdiv', 'div']:
-            result[mask] = np.nan
-            return result
-
-        elif is_float_dtype(result):
-            # if our float result, try to downcast if possible
-            # if remains float, then mask and return as float
-            nonans = result[notna(result)]
-            maybe = maybe_downcast_to_dtype(nonans, self.dtype.numpy_dtype)
-            if not is_integer_dtype(maybe):
-                result[mask] = np.nan
-                return result
-
         return type(self)(result, mask=mask, dtype=self.dtype, copy=False)
 
     @classmethod
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 70f64e1aef9cf..b7422ede0faac 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -13,7 +13,7 @@
 import numpy as np
 import pandas as pd
 
-from pandas._libs import algos as libalgos, ops as libops
+from pandas._libs import lib, algos as libalgos, ops as libops
 
 from pandas import compat
 from pandas.util._decorators import Appender
@@ -1062,10 +1062,16 @@ def dispatch_to_extension_op(op, left, right):
     if is_extension_array_dtype(left):
 
         new_left = left.values
-        if (isinstance(right, np.ndarray) or
-                (is_extension_array_dtype(right) and
-                 type(left) != type(right))):
-            new_right = list(right)
+        if isinstance(right, np.ndarray):
+
+            # handle numpy scalars, this is a PITA
+            # TODO(jreback)
+            new_right = lib.item_from_zerodim(right)
+            if is_scalar(new_right):
+                new_right = [new_right]
+            new_right = list(new_right)
+        elif is_extension_array_dtype(right) and type(left) != type(right):
+            new_right = list(new_right)
         else:
             new_right = right
 
diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py
index 65a40ecfcf352..6a6c1f27a51cd 100644
--- a/pandas/tests/extension/integer/test_integer.py
+++ b/pandas/tests/extension/integer/test_integer.py
@@ -271,6 +271,16 @@ def test_arith_coerce_scalar(self, data, all_arithmetic_operators):
         other = 0.01
         self._check_op(s, op, other)
 
+    @pytest.mark.parametrize("other", [1., 1.0, np.array(1.), np.array([1.])])
+    def test_arithmetic_conversion(self, all_arithmetic_operators, other):
+        # if we have a float operand we should have a float result
+        # if if that is equal to an integer
+        op = self.get_op_from_name(all_arithmetic_operators)
+
+        s = pd.Series([1, 2, 3], dtype='Int64')
+        result = op(s, other)
+        assert result.dtype is np.dtype('float')
+
     def test_error(self, data, all_arithmetic_operators):
         # invalid ops
 

From e6533ddc2cdbfe5b1741c7e5b3efd57605b488fd Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 8 Jul 2018 08:34:44 -0500
Subject: [PATCH 09/20] clean up / test astype

---
 pandas/core/dtypes/common.py                  | 19 +++++-
 pandas/core/internals.py                      | 11 +++-
 pandas/core/series.py                         | 14 +----
 .../tests/extension/integer/test_integer.py   | 61 +++++++++++++++++++
 pandas/tests/series/test_constructors.py      |  9 ++-
 5 files changed, 95 insertions(+), 19 deletions(-)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index ef4f36dc6df33..f893c8cae0b90 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1714,6 +1714,11 @@ def is_extension_array_dtype(arr_or_dtype):
     if isinstance(arr_or_dtype, (ABCIndexClass, ABCSeries)):
         arr_or_dtype = arr_or_dtype._values
 
+    try:
+        arr_or_dtype = pandas_dtype(arr_or_dtype)
+    except TypeError:
+        pass
+
     return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray))
 
 
@@ -1976,6 +1981,11 @@ def pandas_dtype(dtype):
     Returns
     -------
     np.dtype or a pandas dtype
+
+    Raises
+    ------
+    TypeError if not a dtype
+
     """
 
     # registered extension types
@@ -1987,10 +1997,15 @@ def pandas_dtype(dtype):
     if isinstance(dtype, ExtensionDtype):
         return dtype
 
+    # try a numpy dtype
+    # raise a consistent TypeError if failed
     try:
         npdtype = np.dtype(dtype)
-    except (TypeError, ValueError):
+    except TypeError:
         raise
+    except ValueError:
+        raise TypeError("data type '{}' not understood".format(
+            type(dtype)))
 
     # Any invalid dtype (such as pd.Timestamp) should raise an error.
     # np.dtype(invalid_type).kind = 0 for such objects. However, this will
@@ -2000,6 +2015,6 @@ def pandas_dtype(dtype):
     if dtype in [object, np.object_, 'object', 'O']:
         return npdtype
     elif npdtype.kind == 'O':
-        raise TypeError('dtype {dtype} not understood'.format(dtype=dtype))
+        raise TypeError("dtype '{}' not understood".format(dtype))
 
     return npdtype
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 208d7b8bcf8a7..fb2cb64407302 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -39,7 +39,8 @@
     is_re,
     is_re_compilable,
     is_scalar,
-    _get_dtype)
+    _get_dtype,
+    pandas_dtype)
 from pandas.core.dtypes.cast import (
     maybe_downcast_to_dtype,
     maybe_upcast,
@@ -631,9 +632,13 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
 
             return self.make_block(Categorical(self.values, dtype=dtype))
 
+        # convert dtypes if needed
+        try:
+            dtype = pandas_dtype(dtype)
+        except TypeError:
+            pass
+
         # astype processing
-        if not is_extension_array_dtype(dtype):
-            dtype = np.dtype(dtype)
         if is_dtype_equal(self.dtype, dtype):
             if copy:
                 return self.copy()
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 83580c86ccbfb..d2823034b4202 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -234,13 +234,8 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                                          '`index` argument.  `copy` must '
                                          'be False.')
 
-            elif is_extension_array_dtype(data) and dtype is not None:
-                if not data.dtype.is_dtype(dtype):
-                    raise ValueError("Cannot specify a dtype '{}' with an "
-                                     "extension array of a different "
-                                     "dtype ('{}').".format(dtype,
-                                                            data.dtype))
-
+            elif is_extension_array_dtype(data):
+                pass
             elif (isinstance(data, types.GeneratorType) or
                   (compat.PY3 and isinstance(data, map))):
                 data = list(data)
@@ -4131,10 +4126,7 @@ def _try_cast(arr, take_fast_path):
         subarr = data
 
         if dtype is not None and not data.dtype.is_dtype(dtype):
-            msg = ("Cannot coerce extension array to dtype '{typ}'. "
-                   "Do the coercion before passing to the constructor "
-                   "instead.".format(typ=dtype))
-            raise ValueError(msg)
+            subarr = data.astype(dtype)
 
         if copy:
             subarr = data.copy()
diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py
index 6a6c1f27a51cd..8a220e9019572 100644
--- a/pandas/tests/extension/integer/test_integer.py
+++ b/pandas/tests/extension/integer/test_integer.py
@@ -511,6 +511,67 @@ def test_astype_index(self, all_data, dropna):
         expected = idx.astype(object).astype(dtype)
         self.assert_index_equal(result, expected)
 
+    def test_astype(self, all_data):
+        all_data = all_data[:10]
+
+        ints = all_data[~all_data.isna()]
+        mixed = all_data
+        dtype = Int8Dtype()
+
+        # coerce to same type - ints
+        s = pd.Series(ints)
+        result = s.astype(all_data.dtype)
+        expected = pd.Series(ints)
+        self.assert_series_equal(result, expected)
+
+        # coerce to same other - ints
+        s = pd.Series(ints)
+        result = s.astype(dtype)
+        expected = pd.Series(ints, dtype=dtype)
+        self.assert_series_equal(result, expected)
+
+        # coerce to same numpy_dtype - ints
+        s = pd.Series(ints)
+        result = s.astype(all_data.dtype.numpy_dtype)
+        expected = pd.Series(ints._data.astype(
+            all_data.dtype.numpy_dtype))
+        tm.assert_series_equal(result, expected)
+
+        # coerce to same type - mixed
+        s = pd.Series(mixed)
+        result = s.astype(all_data.dtype)
+        expected = pd.Series(mixed)
+        self.assert_series_equal(result, expected)
+
+        # coerce to same other - mixed
+        s = pd.Series(mixed)
+        result = s.astype(dtype)
+        expected = pd.Series(mixed, dtype=dtype)
+        self.assert_series_equal(result, expected)
+
+        # coerce to same numpy_dtype - mixed
+        s = pd.Series(mixed)
+        with pytest.raises(ValueError):
+            s.astype(all_data.dtype.numpy_dtype)
+
+        # coerce to object
+        s = pd.Series(mixed)
+        result = s.astype('object')
+        expected = pd.Series(np.asarray(mixed))
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('dtype', [Int8Dtype(), 'Int8'])
+    def test_astype_specific_casting(self, dtype):
+        s = pd.Series([1, 2, 3], dtype='Int64')
+        result = s.astype(dtype)
+        expected = pd.Series([1, 2, 3], dtype='Int8')
+        self.assert_series_equal(result, expected)
+
+        s = pd.Series([1, 2, 3, None], dtype='Int64')
+        result = s.astype(dtype)
+        expected = pd.Series([1, 2, 3, None], dtype='Int8')
+        self.assert_series_equal(result, expected)
+
 
 class TestGroupby(BaseInteger, base.BaseGroupbyTests):
 
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index fe224436c52e6..e95e41bbdeefa 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -226,10 +226,13 @@ def test_constructor_categorical(self):
         res = Series(cat)
         tm.assert_categorical_equal(res.values, cat)
 
+        # can cast to a new dtype
+        result = Series(pd.Categorical([1, 2, 3]),
+                        dtype='int64')
+        expected = pd.Series([1, 2, 3], dtype='int64')
+        tm.assert_series_equal(result, expected)
+
         # GH12574
-        pytest.raises(
-            ValueError, lambda: Series(pd.Categorical([1, 2, 3]),
-                                       dtype='int64'))
         cat = Series(pd.Categorical([1, 2, 3]), dtype='category')
         assert is_categorical_dtype(cat)
         assert is_categorical_dtype(cat.dtype)

From 35a87387db87ceb6a96621b428b856e2416c6a2d Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 8 Jul 2018 10:24:58 -0500
Subject: [PATCH 10/20] fix up dtype comparison tests

---
 pandas/core/dtypes/common.py                  |  4 +---
 .../tests/extension/decimal/test_decimal.py   | 23 +++++++------------
 pandas/tests/extension/test_common.py         | 15 +++++++++---
 3 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index f893c8cae0b90..d83d0224ce84b 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -2001,9 +2001,7 @@ def pandas_dtype(dtype):
     # raise a consistent TypeError if failed
     try:
         npdtype = np.dtype(dtype)
-    except TypeError:
-        raise
-    except ValueError:
+    except Exception:
         raise TypeError("data type '{}' not understood".format(
             type(dtype)))
 
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index b851ad1d06d4d..d40e318b7319c 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -178,35 +178,28 @@ def test_series_constructor_coerce_data_to_extension_dtype_raises():
         pd.Series([0, 1, 2], dtype=DecimalDtype())
 
 
-def test_series_constructor_with_same_dtype_ok():
+def test_series_constructor_with_dtype():
     arr = DecimalArray([decimal.Decimal('10.0')])
     result = pd.Series(arr, dtype=DecimalDtype())
     expected = pd.Series(arr)
     tm.assert_series_equal(result, expected)
 
-
-def test_series_constructor_coerce_extension_array_to_dtype_raises():
-    arr = DecimalArray([decimal.Decimal('10.0')])
-    xpr = r"Cannot specify a dtype 'int64' .* \('decimal'\)."
-
-    with tm.assert_raises_regex(ValueError, xpr):
-        pd.Series(arr, dtype='int64')
+    result = pd.Series(arr, dtype='int64')
+    expected = pd.Series([10])
+    tm.assert_series_equal(result, expected)
 
 
-def test_dataframe_constructor_with_same_dtype_ok():
+def test_dataframe_constructor_with_dtype():
     arr = DecimalArray([decimal.Decimal('10.0')])
 
     result = pd.DataFrame({"A": arr}, dtype=DecimalDtype())
     expected = pd.DataFrame({"A": arr})
     tm.assert_frame_equal(result, expected)
 
-
-def test_dataframe_constructor_with_different_dtype_raises():
     arr = DecimalArray([decimal.Decimal('10.0')])
-
-    xpr = "Cannot coerce extension array to dtype 'int64'. "
-    with tm.assert_raises_regex(ValueError, xpr):
-        pd.DataFrame({"A": arr}, dtype='int64')
+    result = pd.DataFrame({"A": arr}, dtype='int64')
+    expected = pd.DataFrame({"A": [10]})
+    tm.assert_frame_equal(result, expected)
 
 
 class TestArithmeticOps(BaseDecimal, base.BaseArithmeticOpsTests):
diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py
index 589134632c7e9..d71587136e489 100644
--- a/pandas/tests/extension/test_common.py
+++ b/pandas/tests/extension/test_common.py
@@ -22,7 +22,16 @@ def __array__(self, dtype):
 
     @property
     def dtype(self):
-        return self.data.dtype
+        return DummyDtype()
+
+    def astype(self, dtype, copy=True):
+        # we don't support anything but a single dtype
+        if isinstance(dtype, DummyDtype):
+            if copy:
+                return type(self)(self.data)
+            return self
+
+        return np.array(self, dtype=dtype, copy=copy)
 
 
 class TestExtensionArrayDtype(object):
@@ -61,10 +70,10 @@ def test_astype_no_copy():
     arr = DummyArray(np.array([1, 2, 3], dtype=np.int64))
     result = arr.astype(arr.dtype, copy=False)
 
-    assert arr.data is result
+    assert arr is result
 
     result = arr.astype(arr.dtype)
-    assert arr.data is not result
+    assert arr is not result
 
 
 @pytest.mark.parametrize('dtype', [

From 68efb028db23e7fa7953d0d3352a5433765e1658 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 8 Jul 2018 10:36:59 -0500
Subject: [PATCH 11/20] fixup quotes in interval index error messages

---
 pandas/core/dtypes/common.py                       | 2 +-
 pandas/tests/indexes/interval/test_astype.py       | 2 +-
 pandas/tests/indexes/interval/test_construction.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index d83d0224ce84b..244e1526f5d92 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -2003,7 +2003,7 @@ def pandas_dtype(dtype):
         npdtype = np.dtype(dtype)
     except Exception:
         raise TypeError("data type '{}' not understood".format(
-            type(dtype)))
+            dtype))
 
     # Any invalid dtype (such as pd.Timestamp) should raise an error.
     # np.dtype(invalid_type).kind = 0 for such objects. However, this will
diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py
index b3a4bfa878c3f..1e96ac730a0eb 100644
--- a/pandas/tests/indexes/interval/test_astype.py
+++ b/pandas/tests/indexes/interval/test_astype.py
@@ -56,7 +56,7 @@ def test_astype_cannot_cast(self, index, dtype):
             index.astype(dtype)
 
     def test_astype_invalid_dtype(self, index):
-        msg = 'data type "fake_dtype" not understood'
+        msg = "data type 'fake_dtype' not understood"
         with tm.assert_raises_regex(TypeError, msg):
             index.astype('fake_dtype')
 
diff --git a/pandas/tests/indexes/interval/test_construction.py b/pandas/tests/indexes/interval/test_construction.py
index 3745f79d7d65d..1bcfcee3724da 100644
--- a/pandas/tests/indexes/interval/test_construction.py
+++ b/pandas/tests/indexes/interval/test_construction.py
@@ -138,7 +138,7 @@ def test_generic_errors(self, constructor):
             constructor(dtype='int64', **filler)
 
         # invalid dtype
-        msg = 'data type "invalid" not understood'
+        msg = "data type 'invalid' not understood"
         with tm.assert_raises_regex(TypeError, msg):
             constructor(dtype='invalid', **filler)
 

From c9e8f7db945955ca0c2793103bdbafec032bca4f Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 8 Jul 2018 10:48:46 -0500
Subject: [PATCH 12/20] some optimization on dtype checking

---
 pandas/core/dtypes/common.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 244e1526f5d92..96cf2b8977b44 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1809,6 +1809,9 @@ def _get_dtype(arr_or_dtype):
     TypeError : The passed in object is None.
     """
 
+    # TODO(extension)
+    # replace with pandas_dtype
+
     if arr_or_dtype is None:
         raise TypeError("Cannot deduce dtype from null object")
     if isinstance(arr_or_dtype, np.dtype):
@@ -1856,6 +1859,8 @@ def _get_dtype_type(arr_or_dtype):
                passed in array or dtype object.
     """
 
+    # TODO(extension)
+    # replace with pandas_dtype
     if isinstance(arr_or_dtype, np.dtype):
         return arr_or_dtype.type
     elif isinstance(arr_or_dtype, type):
@@ -1988,13 +1993,19 @@ def pandas_dtype(dtype):
 
     """
 
+    # short-circuit
+    if isinstance(dtype, np.ndarray):
+        return dtype.dtype
+    elif isinstance(dtype, np.dtype):
+        return dtype
+
     # registered extension types
     result = registry.find(dtype)
     if result is not None:
         return result
 
     # un-registered extension types
-    if isinstance(dtype, ExtensionDtype):
+    elif isinstance(dtype, ExtensionDtype):
         return dtype
 
     # try a numpy dtype

From ec2c63275f7345b24a995abdb4444040d694f2e0 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 8 Jul 2018 12:09:26 -0500
Subject: [PATCH 13/20] don't force repr on invalid dtype

---
 pandas/core/dtypes/common.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 96cf2b8977b44..d5ba87b9fecc8 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -2013,6 +2013,9 @@ def pandas_dtype(dtype):
     try:
         npdtype = np.dtype(dtype)
     except Exception:
+        # we don't want to force a repr of the non-string
+        if not isinstance(dtype, string_types):
+            raise TypeError("data type not understood")
         raise TypeError("data type '{}' not understood".format(
             dtype))
 

From 953de123371db86d1fa758f3eabd562f96c838f0 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 9 Jul 2018 18:05:20 -0400
Subject: [PATCH 14/20] remove uneeded try/catch; review comments

---
 pandas/core/arrays/base.py    | 2 +-
 pandas/core/arrays/integer.py | 2 +-
 pandas/core/internals.py      | 5 +----
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 139124e47ed45..6b213dcb870a9 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -96,7 +96,7 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
         scalars : Sequence
             Each element will be an instance of the scalar type for this
             array, ``cls.dtype.type``.
-        dtype : Dtype, optional
+        dtype : dtype, optional
             construct for this particular dtype. This should be a Dtype
             compatible with the ExtensionArray.
         copy : boolean, default False
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 51dafabc943e7..a78e78681ddec 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -3,7 +3,7 @@
 import copy
 import numpy as np
 
-from pandas.compat import u
+from pandas.compat import u, range
 from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
 from pandas.util._decorators import cache_readonly
 from pandas.compat import set_function_name
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index fb2cb64407302..df0a0a06adf26 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -633,10 +633,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
             return self.make_block(Categorical(self.values, dtype=dtype))
 
         # convert dtypes if needed
-        try:
-            dtype = pandas_dtype(dtype)
-        except TypeError:
-            pass
+        dtype = pandas_dtype(dtype)
 
         # astype processing
         if is_dtype_equal(self.dtype, dtype):

From e74d10b7c226be2f856e17231eaea06cac42a553 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 9 Jul 2018 19:55:52 -0400
Subject: [PATCH 15/20] only allow safe casting

---
 pandas/core/arrays/integer.py                 | 28 +++++++++++++++++--
 .../tests/extension/integer/test_integer.py   | 17 +++++++++++
 2 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index a78e78681ddec..2df0f5f2bc9d7 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -92,6 +92,26 @@ def to_integer_array(values, dtype=None):
     return IntegerArray(values, dtype=dtype, copy=False)
 
 
+def safe_cast(values, dtype, copy):
+    """
+    Safely cast the values to the dtype if they
+    are equivalent, meaning floats must be equivalent to the
+    ints.
+
+    """
+
+    try:
+        return values.astype(dtype, casting='safe', copy=copy)
+    except TypeError:
+
+        casted = values.astype(dtype, copy=copy)
+        if (casted == values).all():
+            return casted
+
+        raise TypeError("cannot safely cast non-equivalent {} to {}".format(
+            values.dtype, np.dtype(dtype)))
+
+
 def coerce_to_array(values, dtype, mask=None, copy=False):
     """
     Coerce the input values array to numpy arrays with a mask
@@ -156,14 +176,16 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
     else:
         dtype = dtype.type
 
+    # if we are float, let's make sure that we can
+    # safely cast
+
     # we copy as need to coerce here
     if mask.any():
         values = values.copy()
         values[mask] = 1
-
-        values = values.astype(dtype)
+        values = safe_cast(values, dtype, copy=False)
     else:
-        values = values.astype(dtype, copy=False)
+        values = safe_cast(values, dtype, copy=False)
 
     return values, mask
 
diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py
index 8a220e9019572..58beb4ae75152 100644
--- a/pandas/tests/extension/integer/test_integer.py
+++ b/pandas/tests/extension/integer/test_integer.py
@@ -572,6 +572,23 @@ def test_astype_specific_casting(self, dtype):
         expected = pd.Series([1, 2, 3, None], dtype='Int8')
         self.assert_series_equal(result, expected)
 
+    def test_construct_cast_invalid(self, dtype):
+
+        msg = "cannot safely"
+        arr = [1.2, 2.3, 3.7]
+        with tm.assert_raises_regex(TypeError, msg):
+            IntegerArray(arr, dtype=dtype)
+
+        with tm.assert_raises_regex(TypeError, msg):
+            pd.Series(arr).astype(dtype)
+
+        arr = [1.2, 2.3, 3.7, np.nan]
+        with tm.assert_raises_regex(TypeError, msg):
+            IntegerArray(arr, dtype=dtype)
+
+        with tm.assert_raises_regex(TypeError, msg):
+            pd.Series(arr).astype(dtype)
+
 
 class TestGroupby(BaseInteger, base.BaseGroupbyTests):
 

From 23afee1fa32b6df43c919d26716d0c796ec7a535 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 10 Jul 2018 20:23:23 -0400
Subject: [PATCH 16/20] review comments

---
 doc/source/whatsnew/v0.24.0.txt               | 15 +++++------
 pandas/core/arrays/base.py                    |  4 +--
 pandas/core/arrays/integer.py                 | 25 ++++++++++++-------
 pandas/tests/extension/decimal/array.py       |  2 +-
 .../tests/extension/decimal/test_decimal.py   |  1 -
 .../tests/extension/integer/test_integer.py   |  1 -
 pandas/tests/extension/json/test_json.py      |  1 -
 7 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 0c9b0ce98ede0..067adc5415e91 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -29,12 +29,13 @@ ways of adding operator support.
 
 .. _whatsnew_0240.enhancements.intna:
 
-Integer NA Support
-^^^^^^^^^^^^^^^^^^
+Optional Integer NA Support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Pandas has gained the ability to hold integer dtypes with missing values. This long requested feature is enabled thru the use of ``ExtensionTypes`` . Here is an example of the usage.
+Pandas has gained the ability to hold integer dtypes with missing values. This long requested feature is enabled through the use of :ref:`extension types <extending.extension-types>`.
+Here is an example of the usage.
 
-We can construct a ``Series`` with the specified dtype. The dtype string ``Int64`` is a pandas ``ExtensionDtype``. Specifying an list or array using the traditional missing value
+We can construct a ``Series`` with the specified dtype. The dtype string ``Int64`` is a pandas ``ExtensionDtype``. Specifying a list or array using the traditional missing value
 marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`)
 
 .. ipython:: python
@@ -57,12 +58,12 @@ Operations on these dtypes will propagate ``NaN`` as other pandas operations.
    s.iloc[1:3]
 
    # operate with other dtypes
-   s + s.iloc[1:3]
+   s + s.iloc[1:3].astype('Int8')
 
    # coerce when needed
    s + 0.01
 
-These dtypes can operate as part of ``DataFrames``.
+These dtypes can operate as part of of ``DataFrame``.
 
 .. ipython:: python
 
@@ -80,7 +81,7 @@ These dtypes can be merged & reshaped & casted.
 
 .. warning::
 
-   The Integer NA support currently uses the captilized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This maybe changed at a future date.
+   The Integer NA support currently uses the captilized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This may be changed at a future date.
 
 .. _whatsnew_0240.enhancements.read_html:
 
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 6b213dcb870a9..01ed085dd2b9f 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -97,10 +97,10 @@ def _from_sequence(cls, scalars, dtype=None, copy=False):
             Each element will be an instance of the scalar type for this
             array, ``cls.dtype.type``.
         dtype : dtype, optional
-            construct for this particular dtype. This should be a Dtype
+            Construct for this particular dtype. This should be a Dtype
             compatible with the ExtensionArray.
         copy : boolean, default False
-            if True, copy the underlying data
+            If True, copy the underlying data.
         Returns
         -------
         ExtensionArray
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 2df0f5f2bc9d7..c126117060c3d 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -3,15 +3,18 @@
 import copy
 import numpy as np
 
-from pandas.compat import u, range
-from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
+from pandas._libs.lib import infer_dtype
 from pandas.util._decorators import cache_readonly
+from pandas.compat import u, range
 from pandas.compat import set_function_name
-from pandas.api.types import (is_integer, is_scalar, is_float,
-                              is_float_dtype, is_integer_dtype,
-                              is_object_dtype,
-                              is_list_like,
-                              infer_dtype)
+
+from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
+from pandas.core.dtypes.common import (
+    is_integer, is_scalar, is_float,
+    is_float_dtype,
+    is_integer_dtype,
+    is_object_dtype,
+    is_list_like)
 from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
 from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.dtypes import registry
@@ -75,6 +78,8 @@ def construct_from_string(cls, string):
 
 def to_integer_array(values, dtype=None):
     """
+    Infer and return an integer array of the values.
+
     Parameters
     ----------
     values : 1D list-like
@@ -83,7 +88,7 @@ def to_integer_array(values, dtype=None):
 
     Returns
     -------
-    infer and return an integer array
+    IntegerArray
 
     Raises
     ------
@@ -235,7 +240,9 @@ def __getitem__(self, item):
                           dtype=self.dtype)
 
     def _coerce_to_ndarray(self):
-        """ coerce to an ndarary, preserving my scalar types """
+        """
+        coerce to an ndarary of object dtype
+        """
 
         # TODO(jreback) make this better
         data = self._data.astype(object)
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index 373da1af9ef24..108b8874b3ac5 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -54,7 +54,7 @@ def __init__(self, values, dtype=None, copy=False):
 
     @classmethod
     def _from_sequence(cls, scalars, dtype=None, copy=False):
-        return cls(scalars, copy=copy)
+        return cls(scalars)
 
     @classmethod
     def _from_factorized(cls, values, original):
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index d40e318b7319c..6e614274457fb 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -11,7 +11,6 @@
 from .array import DecimalDtype, DecimalArray
 
 
-@pytest.fixture
 def make_data():
     return [decimal.Decimal(random.random()) for _ in range(100)]
 
diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py
index 58beb4ae75152..77320a2de62c4 100644
--- a/pandas/tests/extension/integer/test_integer.py
+++ b/pandas/tests/extension/integer/test_integer.py
@@ -15,7 +15,6 @@
     UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype)
 
 
-@pytest.fixture
 def make_data():
     return (list(range(8)) +
             [np.nan] +
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index 126607907a8da..520c303f1990b 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -15,7 +15,6 @@
 pytestmark = pytest.mark.skipif(PY2, reason="Py2 doesn't have a UserDict")
 
 
-@pytest.fixture
 def make_data():
     # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
     return [collections.UserDict([

From 86362f6ae34bd764abbbda5a9903d2eda1e32544 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 11 Jul 2018 06:12:37 -0400
Subject: [PATCH 17/20] xfail reduce ops

---
 pandas/tests/extension/base/ops.py             | 13 ++++++++++---
 pandas/tests/extension/decimal/test_decimal.py |  2 +-
 pandas/tests/extension/integer/test_integer.py | 12 ++++++++++--
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
index f8d2f8314d2b1..16b37564643bf 100644
--- a/pandas/tests/extension/base/ops.py
+++ b/pandas/tests/extension/base/ops.py
@@ -52,13 +52,20 @@ def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
 class BaseArithmeticOpsTests(BaseOpsUtil):
     """Various Series and DataFrame arithmetic ops methods."""
 
-    def test_arith_scalar(self, data, all_arithmetic_operators):
-        # scalar
+    def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
+        # series & scalar
         op_name = all_arithmetic_operators
         s = pd.Series(data)
         self.check_opname(s, op_name, s.iloc[0], exc=TypeError)
 
-    def test_arith_array(self, data, all_arithmetic_operators):
+    @pytest.mark.xfail(reason="_reduce needs implementation")
+    def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
+        # frame & scalar
+        op_name = all_arithmetic_operators
+        df = pd.DataFrame({'A': data})
+        self.check_opname(df, op_name, data[0], exc=TypeError)
+
+    def test_arith_series_with_array(self, data, all_arithmetic_operators):
         # ndarray & other series
         op_name = all_arithmetic_operators
         s = pd.Series(data)
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 6e614274457fb..20ff94fe33d0a 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -207,7 +207,7 @@ def check_opname(self, s, op_name, other, exc=None):
         super(TestArithmeticOps, self).check_opname(s, op_name,
                                                     other, exc=None)
 
-    def test_arith_array(self, data, all_arithmetic_operators):
+    def test_arith_series_with_array(self, data, all_arithmetic_operators):
         op_name = all_arithmetic_operators
         s = pd.Series(data)
 
diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py
index 77320a2de62c4..451f7488bd38a 100644
--- a/pandas/tests/extension/integer/test_integer.py
+++ b/pandas/tests/extension/integer/test_integer.py
@@ -247,14 +247,22 @@ def test_arith_integer_array(self, data, all_arithmetic_operators):
 
         self._check_op(s, op, rhs)
 
-    def test_arith_scalar(self, data, all_arithmetic_operators):
+    def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
         # scalar
         op = all_arithmetic_operators
 
         s = pd.Series(data)
         self._check_op(s, op, 1, exc=TypeError)
 
-    def test_arith_array(self, data, all_arithmetic_operators):
+    @pytest.mark.xfail(run=False, reason="_reduce needs implementation")
+    def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
+        # frame & scalar
+        op = all_arithmetic_operators
+
+        df = pd.DataFrame({'A': data})
+        self._check_op(df, op, 1, exc=TypeError)
+
+    def test_arith_series_with_array(self, data, all_arithmetic_operators):
         # ndarray & other series
         op = all_arithmetic_operators
 

From 1bdeb187dded618f177e737f8822ddc022b19466 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 11 Jul 2018 06:38:12 -0400
Subject: [PATCH 18/20] better type checking for extension types

---
 pandas/core/dtypes/common.py                  |  1 -
 pandas/core/ops.py                            |  2 +-
 pandas/tests/extension/base/dtype.py          | 24 +++++++++++++++++++
 pandas/tests/extension/base/ops.py            |  2 +-
 .../extension/category/test_categorical.py    |  5 ++--
 5 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index d5ba87b9fecc8..81ed515d96c95 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1992,7 +1992,6 @@ def pandas_dtype(dtype):
     TypeError if not a dtype
 
     """
-
     # short-circuit
     if isinstance(dtype, np.ndarray):
         return dtype.dtype
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index b7422ede0faac..ec834f84315ff 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -1356,7 +1356,7 @@ def wrapper(self, other, axis=None):
 
         elif (is_extension_array_dtype(self) or
               (is_extension_array_dtype(other) and
-               not is_categorical_dtype(other))):
+               not is_scalar(other))):
             return dispatch_to_extension_op(op, self, other)
 
         elif isinstance(other, ABCSeries):
diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py
index 52a12816c8722..7e582dc58194b 100644
--- a/pandas/tests/extension/base/dtype.py
+++ b/pandas/tests/extension/base/dtype.py
@@ -1,6 +1,7 @@
 import pytest
 import numpy as np
 import pandas as pd
+from pandas.util import testing as tm
 
 from .base import BaseExtensionTests
 
@@ -54,3 +55,26 @@ def test_array_type(self, data, dtype):
     def test_array_type_with_arg(self, data, dtype):
         with pytest.raises(NotImplementedError):
             dtype.construct_array_type('foo')
+
+    def test_check_dtype(self, data, dtype):
+        # check equivalency for using .dtypes
+        df = pd.DataFrame({'A': pd.Series(data, dtype=dtype),
+                           'B': data,
+                           'C': 'foo', 'D': 1})
+
+        # np.dtype('int64') == 'Int64' == 'int64'
+        # so can't distinguish
+        if dtype.name == 'Int64':
+            expected = pd.Series([True, True, False, True],
+                                 index=list('ABCD'))
+        else:
+            expected = pd.Series([True, True, False, False],
+                                 index=list('ABCD'))
+
+        result = df.dtypes == dtype.name
+        tm.assert_series_equal(result, expected)
+
+        expected = pd.Series([True, True, False, False],
+                             index=list('ABCD'))
+        result = df.dtypes.apply(str) == dtype.name
+        tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
index 16b37564643bf..f7bfdb8ec218a 100644
--- a/pandas/tests/extension/base/ops.py
+++ b/pandas/tests/extension/base/ops.py
@@ -58,7 +58,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
         s = pd.Series(data)
         self.check_opname(s, op_name, s.iloc[0], exc=TypeError)
 
-    @pytest.mark.xfail(reason="_reduce needs implementation")
+    @pytest.mark.xfail(run=False, reason="_reduce needs implementation")
     def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
         # frame & scalar
         op_name = all_arithmetic_operators
diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py
index 715e8bd40a2d0..76f6b03907ef8 100644
--- a/pandas/tests/extension/category/test_categorical.py
+++ b/pandas/tests/extension/category/test_categorical.py
@@ -189,11 +189,12 @@ class TestCasting(base.BaseCastingTests):
 
 class TestArithmeticOps(base.BaseArithmeticOpsTests):
 
-    def test_arith_scalar(self, data, all_arithmetic_operators):
+    def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
 
         op_name = all_arithmetic_operators
         if op_name != '__rmod__':
-            super(TestArithmeticOps, self).test_arith_scalar(data, op_name)
+            super(TestArithmeticOps, self).test_arith_series_with_scalar(
+                data, op_name)
         else:
             pytest.skip('rmod never called when string is first argument')
 

From 88858358898c3e013a4b5b03717cc25b6a896617 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 12 Jul 2018 06:01:53 -0400
Subject: [PATCH 19/20] use a better testing idiom

---
 pandas/tests/extension/base/dtype.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py
index 7e582dc58194b..269017b961b35 100644
--- a/pandas/tests/extension/base/dtype.py
+++ b/pandas/tests/extension/base/dtype.py
@@ -1,7 +1,6 @@
 import pytest
 import numpy as np
 import pandas as pd
-from pandas.util import testing as tm
 
 from .base import BaseExtensionTests
 
@@ -72,9 +71,9 @@ def test_check_dtype(self, data, dtype):
                                  index=list('ABCD'))
 
         result = df.dtypes == dtype.name
-        tm.assert_series_equal(result, expected)
+        self.assert_series_equal(result, expected)
 
         expected = pd.Series([True, True, False, False],
                              index=list('ABCD'))
         result = df.dtypes.apply(str) == dtype.name
-        tm.assert_series_equal(result, expected)
+        self.assert_series_equal(result, expected)

From 160678647fb0a01b57261e34da853a882d2e55dd Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 16 Jul 2018 06:39:41 -0400
Subject: [PATCH 20/20] interval index compat

---
 pandas/core/arrays/interval.py       | 4 ++--
 pandas/tests/extension/base/dtype.py | 8 +++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index 4ad53e16bc439..e78d6d4948023 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -191,8 +191,8 @@ def _simple_new(cls, left, right, closed=None,
         return result
 
     @classmethod
-    def _from_sequence(cls, scalars):
-        return cls(scalars)
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        return cls(scalars, dtype=dtype, copy=copy)
 
     @classmethod
     def _from_factorized(cls, values, original):
diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py
index 269017b961b35..2125458e8a0ba 100644
--- a/pandas/tests/extension/base/dtype.py
+++ b/pandas/tests/extension/base/dtype.py
@@ -55,7 +55,9 @@ def test_array_type_with_arg(self, data, dtype):
         with pytest.raises(NotImplementedError):
             dtype.construct_array_type('foo')
 
-    def test_check_dtype(self, data, dtype):
+    def test_check_dtype(self, data):
+        dtype = data.dtype
+
         # check equivalency for using .dtypes
         df = pd.DataFrame({'A': pd.Series(data, dtype=dtype),
                            'B': data,
@@ -70,10 +72,10 @@ def test_check_dtype(self, data, dtype):
             expected = pd.Series([True, True, False, False],
                                  index=list('ABCD'))
 
-        result = df.dtypes == dtype.name
+        result = df.dtypes == str(dtype)
         self.assert_series_equal(result, expected)
 
         expected = pd.Series([True, True, False, False],
                              index=list('ABCD'))
-        result = df.dtypes.apply(str) == dtype.name
+        result = df.dtypes.apply(str) == str(dtype)
         self.assert_series_equal(result, expected)