From 930e99dfdc99a9dfa4d434cb47644dc8c2b6f2c0 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 3 Jul 2018 18:40:43 -0400
Subject: [PATCH] ENH: add integer-na support via an ExtensionArray

closes #20700
closes #20747
---
 doc/source/whatsnew/v0.24.0.txt               |  57 ++
 pandas/core/arrays/__init__.py                |   3 +
 pandas/core/arrays/base.py                    |  12 +-
 pandas/core/arrays/categorical.py             |   4 +-
 pandas/core/arrays/integer.py                 | 563 ++++++++++++++++
 pandas/core/dtypes/cast.py                    |   3 +-
 pandas/core/dtypes/concat.py                  |   1 +
 pandas/core/indexes/base.py                   |  24 +-
 pandas/core/missing.py                        |   3 +-
 pandas/core/ops.py                            |  78 ++-
 pandas/core/series.py                         |   2 +-
 pandas/tests/extension/base/__init__.py       |   2 +-
 pandas/tests/extension/base/getitem.py        |   6 +-
 pandas/tests/extension/base/ops.py            |  20 +-
 pandas/tests/extension/base/reshaping.py      |  24 +-
 pandas/tests/extension/decimal/array.py       |  11 +-
 .../tests/extension/decimal/test_decimal.py   |   8 +-
 pandas/tests/extension/integer/__init__.py    |   0
 .../tests/extension/integer/test_integer.py   | 604 ++++++++++++++++++
 pandas/tests/extension/json/array.py          |  13 +-
 pandas/tests/extension/json/test_json.py      |  15 +-
 21 files changed, 1386 insertions(+), 67 deletions(-)
 create mode 100644 pandas/core/arrays/integer.py
 create mode 100644 pandas/tests/extension/integer/__init__.py
 create mode 100644 pandas/tests/extension/integer/test_integer.py

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 5d6ed50ca3f26a..7e852186e043f3 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -8,6 +8,7 @@ v0.24.0 (Month XX, 2018)
 New features
 ~~~~~~~~~~~~
 
+
 - ``ExcelWriter`` now accepts ``mode`` as a keyword argument, enabling append to existing workbooks when using the ``openpyxl`` engine (:issue:`3441`)
 
 .. _whatsnew_0240.enhancements.extension_array_operators:
@@ -26,6 +27,61 @@ See the :ref:`ExtensionArray Operator Support
 <extending.extension.operator>` documentation section for details on both
 ways of adding operator support.
 
+.. _whatsnew_0240.enhancements.intna:
+
+Integer NA Support
+^^^^^^^^^^^^^^^^^^
+
+Pandas has gained the ability to hold integer dtypes with missing values. This long requested feature is enabled thru the use of ``ExtensionTypes`` . Here is an example of the usage.
+
+We can construct a ``Series`` with the specified dtype. The dtype string ``Int64`` is a pandas ``ExtensionDtype``. Specifying an list or array using the traditional missing value
+marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`)
+
+.. ipython:: python
+
+   s = pd.Series([1, 2, np.nan], dtype='Int64')
+   s
+
+
+Operations on these dtypes will propagate ``NaN`` as other pandas operations.
+
+.. ipython:: python
+
+   # arithmetic
+   s + 1
+
+   # comparison
+   s == 1
+
+   # indexing
+   s.iloc[1:3]
+
+   # operate with other dtypes
+   s + s.iloc[1:3]
+
+   # coerce when needed
+   s + 0.01
+
+These dtypes can operate as part of ``DataFrames``.
+
+.. ipython:: python
+
+   df = pd.DataFrame({'A': s, 'B': [1, 1, 3], 'C': list('aab')})
+   df
+   df.dtypes
+
+
+These dtypes can be merged & reshaped & casted.
+
+.. ipython:: python
+
+   pd.concat([df[['A']], df[['B', 'C']]], axis=1).dtypes
+   df['A'].astype(float)
+
+.. warning::
+
+   The Integer NA support currently uses the captilized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This maybe changed at a future date.
+
 .. _whatsnew_0240.enhancements.read_html:
 
 ``read_html`` Enhancements
@@ -175,6 +231,7 @@ Previous Behavior:
 ExtensionType Changes
 ^^^^^^^^^^^^^^^^^^^^^
 
+- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`)
 - ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore
   the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`)
 - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`)
diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py
index 1b8a43d4293a58..6e8dafd125bfcc 100644
--- a/pandas/core/arrays/__init__.py
+++ b/pandas/core/arrays/__init__.py
@@ -1,6 +1,9 @@
 from .base import (ExtensionArray,    # noqa
+                   ExtensionOpsMixin,
                    ExtensionScalarOpsMixin)
 from .categorical import Categorical  # noqa
 from .datetimes import DatetimeArrayMixin  # noqa
 from .period import PeriodArrayMixin  # noqa
 from .timedelta import TimedeltaArrayMixin  # noqa
+from .integer import (  # noqa
+    IntegerArray, to_integer_array)
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index fe4e461b0bd4f6..c0697dd29e4d06 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -12,8 +12,8 @@
 from pandas.errors import AbstractMethodError
 from pandas.compat.numpy import function as nv
 from pandas.compat import set_function_name, PY3
-from pandas.core.dtypes.common import is_list_like
 from pandas.core import ops
+from pandas.core.dtypes.common import is_list_like
 
 _not_implemented_message = "{} does not implement {}."
 
@@ -88,7 +88,7 @@ class ExtensionArray(object):
     # Constructors
     # ------------------------------------------------------------------------
     @classmethod
-    def _from_sequence(cls, scalars, copy=False):
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
         """Construct a new ExtensionArray from a sequence of scalars.
 
         Parameters
@@ -96,6 +96,8 @@ def _from_sequence(cls, scalars, copy=False):
         scalars : Sequence
             Each element will be an instance of the scalar type for this
             array, ``cls.dtype.type``.
+        dtype : Dtype, optional
+            consruct for this particular dtype
         copy : boolean, default False
             if True, copy the underlying data
         Returns
@@ -378,7 +380,7 @@ def fillna(self, value=None, method=None, limit=None):
                 func = pad_1d if method == 'pad' else backfill_1d
                 new_values = func(self.astype(object), limit=limit,
                                   mask=mask)
-                new_values = self._from_sequence(new_values)
+                new_values = self._from_sequence(new_values, dtype=self.dtype)
             else:
                 # fill with value
                 new_values = self.copy()
@@ -407,7 +409,7 @@ def unique(self):
         from pandas import unique
 
         uniques = unique(self.astype(object))
-        return self._from_sequence(uniques)
+        return self._from_sequence(uniques, dtype=self.dtype)
 
     def _values_for_factorize(self):
         # type: () -> Tuple[ndarray, Any]
@@ -559,7 +561,7 @@ def take(self, indices, allow_fill=False, fill_value=None):
 
                result = take(data, indices, fill_value=fill_value,
                              allow_fill=allow_fill)
-               return self._from_sequence(result)
+               return self._from_sequence(result, dtype=self.dtype)
         """
         # Implementer note: The `fill_value` parameter should be a user-facing
         # value, an instance of self.dtype.type. When passed `fill_value=None`,
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 375db28a4ee5a8..739acbf8e7573e 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -487,8 +487,8 @@ def _constructor(self):
         return Categorical
 
     @classmethod
-    def _from_sequence(cls, scalars):
-        return Categorical(scalars)
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        return Categorical(scalars, dtype=dtype)
 
     def copy(self):
         """ Copy constructor. """
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
new file mode 100644
index 00000000000000..a7c0518bb4ed9e
--- /dev/null
+++ b/pandas/core/arrays/integer.py
@@ -0,0 +1,563 @@
+import sys
+import warnings
+import copy
+import numpy as np
+
+from pandas.compat import u
+from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
+from pandas.util._decorators import cache_readonly
+from pandas.compat import set_function_name
+from pandas.api.types import (is_integer, is_scalar, is_float,
+                              is_float_dtype, is_integer_dtype,
+                              is_object_dtype,
+                              is_list_like,
+                              infer_dtype)
+from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
+from pandas.core.dtypes.base import ExtensionDtype
+from pandas.core.dtypes.dtypes import registry
+from pandas.core.dtypes.missing import isna, notna
+from pandas.core.dtypes.cast import maybe_downcast_to_dtype
+
+from pandas.io.formats.printing import (
+    format_object_summary, format_object_attrs, default_pprint)
+
+
+class IntegerDtype(ExtensionDtype):
+    type = None
+    na_value = np.nan
+
+    @cache_readonly
+    def is_signed_integer(self):
+        return self.kind == 'i'
+
+    @cache_readonly
+    def is_unsigned_integer(self):
+        return self.kind == 'u'
+
+    @cache_readonly
+    def numpy_dtype(self):
+        """ Return an instance of our numpy dtype """
+        return np.dtype(self.type)
+
+    @cache_readonly
+    def kind(self):
+        return self.numpy_dtype.kind
+
+    @classmethod
+    def construct_array_type(cls):
+        """Return the array type associated with this dtype
+
+        Returns
+        -------
+        type
+        """
+        return IntegerArray
+
+    @classmethod
+    def construct_from_string(cls, string):
+        """
+        Construction from a string, raise a TypeError if not
+        possible
+        """
+        if string == cls.name:
+            return cls()
+        raise TypeError("Cannot construct a '{}' from "
+                        "'{}'".format(cls, string))
+
+
+def to_integer_array(values):
+    """
+    Parameters
+    ----------
+    values : 1D list-like
+
+    Returns
+    -------
+    infer and return an integer array
+
+    Raises
+    ------
+    TypeError if incompatible types
+    """
+    values = np.array(values, copy=False)
+    try:
+        dtype = _dtypes[str(values.dtype)]
+    except KeyError:
+        if is_float_dtype(values):
+            return IntegerArray(values)
+
+        raise TypeError("Incompatible dtype for {}".format(values.dtype))
+    return IntegerArray(values, dtype=dtype, copy=False)
+
+
+def coerce_to_array(values, dtype, mask=None, copy=False):
+    """
+    Coerce the input values array to numpy arrays with a mask
+
+    Parameters
+    ----------
+    values : 1D list-like
+    dtype : integer dtype
+    mask : boolean 1D array, optional
+    copy : boolean, default False
+        if True, copy the input
+
+    Returns
+    -------
+    tuple of (values, mask)
+    """
+
+    if isinstance(values, IntegerArray):
+        values, mask = values.data, values.mask
+        if copy:
+            values = values.copy()
+            mask = mask.copy()
+        return values, mask
+
+    values = np.array(values, copy=copy)
+    if is_object_dtype(values):
+        inferred_type = infer_dtype(values)
+        if inferred_type not in ['floating', 'integer',
+                                 'mixed-integer', 'mixed-integer-float']:
+            raise TypeError("{} cannot be converted to an IntegerDtype".format(
+                values.dtype))
+
+    elif not (is_integer_dtype(values) or is_float_dtype(values)):
+        raise TypeError("{} cannot be converted to an IntegerDtype".format(
+            values.dtype))
+
+    if mask is None:
+        mask = isna(values)
+    else:
+        assert len(mask) == len(values)
+
+    if not values.ndim == 1:
+        raise TypeError("values must be a 1D list-like")
+    if not mask.ndim == 1:
+        raise TypeError("mask must be a 1D list-like")
+
+    # avoid float->int numpy conversion issues
+    if is_object_dtype(values):
+        mask |= isna(values)
+
+    # infer dtype if needed
+    if dtype is None:
+        if is_integer_dtype(values):
+            dtype = values.dtype
+        else:
+            dtype = np.dtype('int64')
+    else:
+        dtype = dtype.type
+
+    # we copy as need to coerce here
+    if mask.any():
+        values = values.copy()
+        values[mask] = 1
+
+        values = values.astype(dtype)
+    else:
+        values = values.astype(dtype, copy=False)
+
+    return values, mask
+
+
+class IntegerArray(ExtensionArray, ExtensionOpsMixin):
+    """
+    We represent an IntegerArray with 2 numpy arrays
+    - data: contains a numpy integer array of the appropriate dtype
+    - mask: a boolean array holding a mask on the data, False is missing
+    """
+
+    @cache_readonly
+    def dtype(self):
+        return _dtypes[str(self.data.dtype)]
+
+    def __init__(self, values, mask=None, dtype=None, copy=False):
+        self.data, self.mask = coerce_to_array(
+            values, dtype=dtype, mask=mask, copy=copy)
+
+    @classmethod
+    def _from_sequence(cls, scalars, mask=None, dtype=None, copy=False):
+        return cls(scalars, mask=mask, dtype=dtype, copy=copy)
+
+    @classmethod
+    def _from_factorized(cls, values, original):
+        return cls(values, dtype=original.dtype)
+
+    def __getitem__(self, item):
+        if is_integer(item):
+            if self.mask[item]:
+                return self.dtype.na_value
+            return self.data[item]
+        return type(self)(self.data[item],
+                          mask=self.mask[item],
+                          dtype=self.dtype)
+
+    def _coerce_to_ndarray(self):
+        """ coerce to an ndarary, preserving my scalar types """
+
+        # TODO(jreback) make this better
+        data = self.data.astype(object)
+        data[self.mask] = self._na_value
+        return data
+
+    def __array__(self, dtype=None):
+        """
+        the array interface, return my values
+        We return an object array here to preserve our scalar values
+        """
+        return self._coerce_to_ndarray()
+
+    def __iter__(self):
+        """Iterate over elements of the array.
+
+        """
+        # This needs to be implemented so that pandas recognizes extension
+        # arrays as list-like. The default implementation makes successive
+        # calls to ``__getitem__``, which may be slower than necessary.
+        for i in range(len(self)):
+            if self.mask[i]:
+                yield self.dtype.na_value
+            else:
+                yield self.data[i]
+
+    def _formatting_values(self):
+        # type: () -> np.ndarray
+        return self._coerce_to_ndarray()
+
+    def take(self, indexer, allow_fill=False, fill_value=None):
+        from pandas.api.extensions import take
+
+        # we always fill with 1 internally
+        # to avoid upcasting
+        data_fill_value = 1 if isna(fill_value) else fill_value
+        result = take(self.data, indexer, fill_value=data_fill_value,
+                      allow_fill=allow_fill)
+
+        mask = take(self.mask, indexer, fill_value=True,
+                    allow_fill=allow_fill)
+
+        # if we are filling
+        # we only fill where the indexer is null
+        # not existing missing values
+        # TODO(jreback) what if we have a non-na float as a fill value?
+        if allow_fill and notna(fill_value):
+            fill_mask = np.asarray(indexer) == -1
+            result[fill_mask] = fill_value
+            mask = mask ^ fill_mask
+
+        return type(self)(result, mask=mask, dtype=self.dtype)
+
+    def copy(self, deep=False):
+        data, mask = self.data, self.mask
+        if deep:
+            data = copy.deepcopy(data)
+            mask = copy.deepcopy(mask)
+        else:
+            data = data.copy()
+            mask = mask.copy()
+        return type(self)(data, mask, dtype=self.dtype, copy=False)
+
+    def __setitem__(self, key, value):
+        _is_scalar = is_scalar(value)
+        if _is_scalar:
+            value = [value]
+        value, mask = coerce_to_array(value, dtype=self.dtype)
+
+        if _is_scalar:
+            value = value[0]
+            mask = mask[0]
+
+        self.data[key] = value
+        self.mask[key] = mask
+
+    def __len__(self):
+        return len(self.data)
+
+    def __repr__(self):
+        """
+        Return a string representation for this object.
+
+        Invoked by unicode(df) in py2 only. Yields a Unicode String in both
+        py2/py3.
+        """
+        klass = self.__class__.__name__
+        data = format_object_summary(self, default_pprint, False)
+        attrs = format_object_attrs(self)
+        space = " "
+
+        prepr = (u(",%s") %
+                 space).join(u("%s=%s") % (k, v) for k, v in attrs)
+
+        res = u("%s(%s%s)") % (klass, data, prepr)
+
+        return res
+
+    @property
+    def nbytes(self):
+        return self.data.nbytes + self.mask.nbytes
+
+    def isna(self):
+        return self.mask
+
+    @property
+    def _na_value(self):
+        return np.nan
+
+    @classmethod
+    def _concat_same_type(cls, to_concat):
+        data = np.concatenate([x.data for x in to_concat])
+        mask = np.concatenate([x.mask for x in to_concat])
+        return cls(data, mask=mask, dtype=to_concat[0].dtype)
+
+    def astype(self, dtype, copy=True):
+        """Cast to a NumPy array with 'dtype'.
+
+        Parameters
+        ----------
+        dtype : str or dtype
+            Typecode or data-type to which the array is cast.
+        copy : bool, default True
+            Whether to copy the data, even if not necessary. If False,
+            a copy is made only if the old dtype does not match the
+            new dtype.
+
+        Returns
+        -------
+        array : ndarray
+            NumPy ndarray with 'dtype' for its dtype.
+
+        Raises
+        ------
+        TypeError
+            if incompatible type with an IntegerDtype, equivalent of same_kind
+            casting
+        """
+
+        # if we are astyping to an existing IntegerDtype we can fastpath
+        if isinstance(dtype, IntegerDtype):
+            result = self.data.astype(dtype.numpy_dtype,
+                                      casting='same_kind', copy=False)
+            return type(self)(result, mask=self.mask,
+                              dtype=dtype, copy=False)
+
+        # coerce
+        data = self._coerce_to_ndarray()
+        return data.astype(dtype=dtype, copy=False)
+
+    @property
+    def _ndarray_values(self):
+        # type: () -> np.ndarray
+        """Internal pandas method for lossy conversion to a NumPy ndarray.
+
+        This method is not part of the pandas interface.
+
+        The expectation is that this is cheap to compute, and is primarily
+        used for interacting with our indexers.
+        """
+        return self.data
+
+    def value_counts(self, dropna=True):
+        """
+        Returns a Series containing counts of each category.
+
+        Every category will have an entry, even those with a count of 0.
+
+        Parameters
+        ----------
+        dropna : boolean, default True
+            Don't include counts of NaN.
+
+        Returns
+        -------
+        counts : Series
+
+        See Also
+        --------
+        Series.value_counts
+
+        """
+
+        from pandas import Index, Series
+
+        # compute counts on the data with no nans
+        data = self.data[~self.mask]
+        value_counts = Index(data).value_counts()
+        array = value_counts.values
+
+        # TODO(extension)
+        # if we have allow Index to hold an ExtensionArray
+        # this is easier
+        index = value_counts.index.astype(object)
+
+        # if we want nans, count the mask
+        if not dropna:
+
+            # TODO(extension)
+            # appending to an Index *always* infers
+            # w/o passing the dtype
+            array = np.append(array, [self.mask.sum()])
+            index = Index(np.concatenate(
+                [index.values,
+                 np.array([np.nan], dtype=object)]), dtype=object)
+
+        return Series(array, index=index)
+
+    def _values_for_argsort(self):
+        # type: () -> ndarray
+        """Return values for sorting.
+
+        Returns
+        -------
+        ndarray
+            The transformed values should maintain the ordering between values
+            within the array.
+
+        See Also
+        --------
+        ExtensionArray.argsort
+        """
+        data = self.data.copy()
+        data[self.mask] = data.min() - 1
+        return data
+
+    @classmethod
+    def _create_comparison_method(cls, op):
+        def cmp_method(self, other):
+
+            op_name = op.__name__
+            mask = None
+            if isinstance(other, IntegerArray):
+                other, mask = other.data, other.mask
+            elif is_list_like(other):
+                other = np.asarray(other)
+                if other.ndim > 0 and len(self) != len(other):
+                    raise ValueError('Lengths must match to compare')
+
+            # numpy will show a DeprecationWarning on invalid elementwise
+            # comparisons, this will raise in the future
+            with warnings.catch_warnings(record=True):
+                with np.errstate(all='ignore'):
+                    result = op(self.data, other)
+
+            # nans propagate
+            if mask is None:
+                mask = self.mask
+            else:
+                mask = self.mask | mask
+
+            result[mask] = True if op_name == 'ne' else False
+            return result
+
+        name = '__{name}__'.format(name=op.__name__)
+        return set_function_name(cmp_method, name, cls)
+
+    def _maybe_mask_result(self, result, mask, other, op_name):
+        """
+        Parameters
+        ----------
+        result : array-like
+        mask : array-like bool
+        other : scalar or array-like
+        op_name : str
+        """
+
+        # may need to fill infs
+        # and mask wraparound
+        if is_float_dtype(result):
+            mask |= (result == np.inf) | (result == -np.inf)
+
+        # floor div can be a float or an integer dependending
+        # on the operands
+        if (op_name in ['rfloordiv', 'floordiv'] and
+                (is_float_dtype(other) or is_float(other))):
+            result[mask] = np.nan
+            return result
+
+        # by definition a float result
+        elif op_name in ['rtruediv', 'truediv', 'rdiv', 'div']:
+            result[mask] = np.nan
+            return result
+
+        elif is_float_dtype(result):
+            # if our float result, try to downcast if possible
+            # if remains float, then mask and return as float
+            nonans = result[notna(result)]
+            maybe = maybe_downcast_to_dtype(nonans, self.dtype.numpy_dtype)
+            if not is_integer_dtype(maybe):
+                result[mask] = np.nan
+                return result
+
+        return type(self)(result, mask=mask, dtype=self.dtype, copy=False)
+
+    @classmethod
+    def _create_arithmetic_method(cls, op):
+        def integer_arithmetic_method(self, other):
+
+            op_name = op.__name__
+            mask = None
+            if isinstance(other, (ABCSeries, ABCIndexClass)):
+                other = getattr(other, 'values', other)
+
+            if isinstance(other, IntegerArray):
+                other, mask = other.data, other.mask
+            elif getattr(other, 'ndim', 0) > 1:
+                raise TypeError("can only perform ops with 1-d structures")
+            elif is_list_like(other):
+                other = np.asarray(other)
+                if not other.ndim:
+                    other = other.item()
+                elif other.ndim == 1:
+                    if not (is_float_dtype(other) or is_integer_dtype(other)):
+                        raise TypeError(
+                            "can only perform ops with numeric values")
+            else:
+                if not (is_float(other) or is_integer(other)):
+                    raise TypeError("can only perform ops with numeric values")
+
+            # nans propagate
+            if mask is None:
+                mask = self.mask
+            else:
+                mask = self.mask | mask
+
+            with np.errstate(all='ignore'):
+                result = op(self.data, other)
+
+            # divmod returns a tuple
+            if op_name == 'divmod':
+                div, mod = result
+                return (self._maybe_mask_result(div, mask, other, 'floordiv'),
+                        self._maybe_mask_result(mod, mask, other, 'mod'))
+
+            return self._maybe_mask_result(result, mask, other, op_name)
+
+        name = '__{name}__'.format(name=op.__name__)
+        return set_function_name(integer_arithmetic_method, name, cls)
+
+
+IntegerArray._add_arithmetic_ops()
+IntegerArray._add_comparison_ops()
+
+
+module = sys.modules[__name__]
+
+
+# create the Dtype
+_dtypes = {}
+for dtype in ['int8', 'int16', 'int32', 'int64',
+              'uint8', 'uint16', 'uint32', 'uint64']:
+
+    if dtype.startswith('u'):
+        name = "U{}".format(dtype[1:].capitalize())
+    else:
+        name = dtype.capitalize()
+    classname = "{}Dtype".format(name)
+    attributes_dict = {'type': getattr(np, dtype),
+                       'name': name}
+    dtype_type = type(classname, (IntegerDtype, ), attributes_dict)
+    setattr(module, classname, dtype_type)
+
+    # register
+    registry.register(dtype_type)
+    _dtypes[dtype] = dtype_type()
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index 2cd8144e43ceac..f3b1f2736c1c56 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -651,7 +651,8 @@ def astype_nansafe(arr, dtype, copy=True):
 
     # dispatch on extension dtype if needed
     if is_extension_array_dtype(dtype):
-        return dtype.array_type._from_sequence(arr, copy=copy)
+        return dtype.construct_array_type()._from_sequence(
+            arr, dtype=dtype, copy=copy)
 
     if not isinstance(dtype, np.dtype):
         dtype = pandas_dtype(dtype)
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 9f6813bc38464c..7e1b9f0aae14d4 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -531,6 +531,7 @@ def _concat_index_asobject(to_concat, name=None):
 
     to_concat = [x._values if isinstance(x, Index) else x
                  for x in to_concat]
+
     return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs)
 
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index ba60d100999487..4c205bb8763ebc 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -44,6 +44,7 @@
     is_datetime64_any_dtype,
     is_datetime64tz_dtype,
     is_timedelta64_dtype,
+    is_extension_array_dtype,
     is_hashable,
     needs_i8_conversion,
     is_iterator, is_list_like,
@@ -266,18 +267,32 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
                                          name=name)
 
         # categorical
-        if is_categorical_dtype(data) or is_categorical_dtype(dtype):
+        elif is_categorical_dtype(data) or is_categorical_dtype(dtype):
             from .category import CategoricalIndex
             return CategoricalIndex(data, dtype=dtype, copy=copy, name=name,
                                     **kwargs)
 
         # interval
-        if is_interval_dtype(data) or is_interval_dtype(dtype):
+        elif is_interval_dtype(data) or is_interval_dtype(dtype):
             from .interval import IntervalIndex
             closed = kwargs.get('closed', None)
             return IntervalIndex(data, dtype=dtype, name=name, copy=copy,
                                  closed=closed)
 
+        # extension dtype
+        elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype):
+            data = np.asarray(data)
+            if not (dtype is None or is_object_dtype(dtype)):
+
+                # coerce to the provided dtype
+                data = dtype.construct_array_type()(
+                    data, dtype=dtype, copy=False)
+
+            # coerce to the object dtype
+            data = data.astype(object)
+            return Index(data, dtype=object, copy=True, name=name,
+                         **kwargs)
+
         # index-like
         elif isinstance(data, (np.ndarray, Index, ABCSeries)):
 
@@ -1171,10 +1186,15 @@ def _to_embed(self, keep_tz=False, dtype=None):
     def astype(self, dtype, copy=True):
         if is_dtype_equal(self.dtype, dtype):
             return self.copy() if copy else self
+
         elif is_categorical_dtype(dtype):
             from .category import CategoricalIndex
             return CategoricalIndex(self.values, name=self.name, dtype=dtype,
                                     copy=copy)
+
+        elif is_extension_array_dtype(dtype):
+            return Index(np.asarray(self), dtype=dtype, copy=copy)
+
         try:
             if is_datetime64tz_dtype(dtype):
                 from pandas import DatetimeIndex
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
index e9b9a734ec5f58..f5fb0070ffc4bf 100644
--- a/pandas/core/missing.py
+++ b/pandas/core/missing.py
@@ -638,7 +638,8 @@ def fill_zeros(result, x, y, name, fill):
             # if we have a fill of inf, then sign it correctly
             # (GH 6178 and PR 9308)
             if np.isinf(fill):
-                signs = np.sign(y if name.startswith(('r', '__r')) else x)
+                signs = y if name.startswith(('r', '__r')) else x
+                signs = np.sign(signs.astype('float', copy=False))
                 negative_inf_mask = (signs.ravel() < 0) & mask
                 np.putmask(result, negative_inf_mask, -fill)
 
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index fa6d88648cc636..89be2e7cf99cfc 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -135,6 +135,13 @@ def rfloordiv(left, right):
 
 
 def rmod(left, right):
+    # check if right is a string as % is the string
+    # formatting operation; this is a TypeError
+    # otherwise perform the op
+    if isinstance(right, compat.string_types):
+        raise TypeError("{typ} cannot perform the operation mod".format(
+            typ=type(left).__name__))
+
     return right % left
 
 
@@ -972,7 +979,7 @@ def _align_method_SERIES(left, right, align_asobject=False):
     return left, right
 
 
-def _construct_result(left, result, index, name, dtype):
+def _construct_result(left, result, index, name, dtype=None):
     """
     If the raw op result has a non-None name (e.g. it is an Index object) and
     the name argument is None, then passing name to the constructor will
@@ -984,7 +991,7 @@ def _construct_result(left, result, index, name, dtype):
     return out
 
 
-def _construct_divmod_result(left, result, index, name, dtype):
+def _construct_divmod_result(left, result, index, name, dtype=None):
     """divmod returns a tuple of like indexed series instead of a single series.
     """
     constructor = left._constructor
@@ -1002,16 +1009,33 @@ def dispatch_to_extension_op(op, left, right):
 
     # The op calls will raise TypeError if the op is not defined
     # on the ExtensionArray
+    # TODO(jreback)
+    # we need to listify to avoid ndarray, or non-same-type extension array
+    # dispatching
+
     if is_extension_array_dtype(left):
-        res_values = op(left.values, right)
+
+        new_left = left.values
+        if (isinstance(right, np.ndarray) or
+                (is_extension_array_dtype(right) and
+                 type(left) != type(right))):
+            new_right = list(right)
+        else:
+            new_right = right
+
     else:
-        # We know that left is not ExtensionArray and is Series and right is
-        # ExtensionArray.  Want to force ExtensionArray op to get called
-        res_values = op(list(left.values), right.values)
 
+        new_left = list(left.values)
+        new_right = right
+
+    res_values = op(new_left, new_right)
     res_name = get_op_result_name(left, right)
-    return left._constructor(res_values, index=left.index,
-                             name=res_name)
+
+    if op.__name__ == 'divmod':
+        return _construct_divmod_result(
+            left, res_values, left.index, res_name)
+
+    return _construct_result(left, res_values, left.index, res_name)
 
 
 def _arith_method_SERIES(cls, op, special):
@@ -1028,7 +1052,6 @@ def _arith_method_SERIES(cls, op, special):
 
     def na_op(x, y):
         import pandas.core.computation.expressions as expressions
-
         try:
             result = expressions.evaluate(op, str_rep, x, y, **eval_kwargs)
         except TypeError:
@@ -1049,6 +1072,20 @@ def na_op(x, y):
         return result
 
     def safe_na_op(lvalues, rvalues):
+        """
+        return the result of evaluating na_op on the passed in values
+
+        try coercion to object type if the native types are not compatible
+
+        Parameters
+        ----------
+        lvalues : array-like
+        rvalues : array-like
+
+        Raises
+        ------
+        TypeError: invalid operation
+        """
         try:
             with np.errstate(all='ignore'):
                 return na_op(lvalues, rvalues)
@@ -1059,14 +1096,21 @@ def safe_na_op(lvalues, rvalues):
             raise
 
     def wrapper(left, right):
-
         if isinstance(right, ABCDataFrame):
             return NotImplemented
 
         left, right = _align_method_SERIES(left, right)
         res_name = get_op_result_name(left, right)
 
-        if is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
+        if is_categorical_dtype(left):
+            raise TypeError("{typ} cannot perform the operation "
+                            "{op}".format(typ=type(left).__name__, op=str_rep))
+
+        elif (is_extension_array_dtype(left) or
+                is_extension_array_dtype(right)):
+            return dispatch_to_extension_op(op, left, right)
+
+        elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
             result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex)
             return construct_result(left, result,
                                     index=left.index, name=res_name,
@@ -1078,15 +1122,6 @@ def wrapper(left, right):
                                     index=left.index, name=res_name,
                                     dtype=result.dtype)
 
-        elif is_categorical_dtype(left):
-            raise TypeError("{typ} cannot perform the operation "
-                            "{op}".format(typ=type(left).__name__, op=str_rep))
-
-        elif (is_extension_array_dtype(left) or
-              (is_extension_array_dtype(right) and
-               not is_categorical_dtype(right))):
-            return dispatch_to_extension_op(op, left, right)
-
         lvalues = left.values
         rvalues = right
         if isinstance(rvalues, ABCSeries):
@@ -1158,6 +1193,9 @@ def _comp_method_SERIES(cls, op, special):
     masker = _gen_eval_kwargs(op_name).get('masker', False)
 
     def na_op(x, y):
+        # TODO:
+        # should have guarantess on what x, y can be type-wise
+        # Extension Dtypes are not called here
 
         # dispatch to the categorical if we have a categorical
         # in either operand
diff --git a/pandas/core/series.py b/pandas/core/series.py
index e4c69d5cd0a0c2..8e78dd1b1bad25 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -4096,7 +4096,7 @@ def _try_cast(arr, take_fast_path):
             elif is_extension_array_dtype(dtype):
                 # create an extension array from its dtype
                 array_type = dtype.construct_array_type()
-                subarr = array_type(subarr, copy=copy)
+                subarr = array_type(subarr, dtype=dtype, copy=copy)
 
             elif dtype is not None and raise_cast_failure:
                 raise
diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py
index 640b894e2245f9..b6b81bb941a59c 100644
--- a/pandas/tests/extension/base/__init__.py
+++ b/pandas/tests/extension/base/__init__.py
@@ -47,7 +47,7 @@ class TestMyDtype(BaseDtypeTests):
 from .groupby import BaseGroupbyTests  # noqa
 from .interface import BaseInterfaceTests  # noqa
 from .methods import BaseMethodsTests  # noqa
-from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests  # noqa
+from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests, BaseOpsUtil  # noqa
 from .missing import BaseMissingTests  # noqa
 from .reshaping import BaseReshapingTests  # noqa
 from .setitem import BaseSetitemTests  # noqa
diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
index e9df49780f1192..886a0f66b5f667 100644
--- a/pandas/tests/extension/base/getitem.py
+++ b/pandas/tests/extension/base/getitem.py
@@ -226,12 +226,14 @@ def test_reindex(self, data, na_value):
         n = len(data)
         result = s.reindex([-1, 0, n])
         expected = pd.Series(
-            data._from_sequence([na_value, data[0], na_value]),
+            data._from_sequence([na_value, data[0], na_value],
+                                dtype=s.dtype),
             index=[-1, 0, n])
         self.assert_series_equal(result, expected)
 
         result = s.reindex([n, n + 1])
-        expected = pd.Series(data._from_sequence([na_value, na_value]),
+        expected = pd.Series(data._from_sequence([na_value, na_value],
+                                                 dtype=s.dtype),
                              index=[n, n + 1])
         self.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py
index 659b9757ac1e33..f8d2f8314d2b19 100644
--- a/pandas/tests/extension/base/ops.py
+++ b/pandas/tests/extension/base/ops.py
@@ -3,10 +3,12 @@
 import operator
 
 import pandas as pd
+from pandas.core import ops
 from .base import BaseExtensionTests
 
 
 class BaseOpsUtil(BaseExtensionTests):
+
     def get_op_from_name(self, op_name):
         short_opname = op_name.strip('_')
         try:
@@ -32,6 +34,20 @@ def _check_op(self, s, op, other, exc=NotImplementedError):
             with pytest.raises(exc):
                 op(s, other)
 
+    def _check_divmod_op(self, s, op, other, exc=NotImplementedError):
+        # divmod has multiple return values, so check separatly
+        if exc is None:
+            result_div, result_mod = op(s, other)
+            if op is divmod:
+                expected_div, expected_mod = s // other, s % other
+            else:
+                expected_div, expected_mod = other // s, other % s
+            self.assert_series_equal(result_div, expected_div)
+            self.assert_series_equal(result_mod, expected_mod)
+        else:
+            with pytest.raises(exc):
+                divmod(s, other)
+
 
 class BaseArithmeticOpsTests(BaseOpsUtil):
     """Various Series and DataFrame arithmetic ops methods."""
@@ -50,8 +66,8 @@ def test_arith_array(self, data, all_arithmetic_operators):
 
     def test_divmod(self, data):
         s = pd.Series(data)
-        self._check_op(s, divmod, 1, exc=TypeError)
-        self._check_op(1, divmod, s, exc=TypeError)
+        self._check_divmod_op(s, divmod, 1, exc=TypeError)
+        self._check_divmod_op(1, ops.rdivmod, s, exc=TypeError)
 
     def test_error(self, data, all_arithmetic_operators):
         # invalid ops
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
index c83726c5278a5e..0340289e0b6740 100644
--- a/pandas/tests/extension/base/reshaping.py
+++ b/pandas/tests/extension/base/reshaping.py
@@ -82,7 +82,8 @@ def test_concat_columns(self, data, na_value):
         # non-aligned
         df2 = pd.DataFrame({'B': [1, 2, 3]}, index=[1, 2, 3])
         expected = pd.DataFrame({
-            'A': data._from_sequence(list(data[:3]) + [na_value]),
+            'A': data._from_sequence(list(data[:3]) + [na_value],
+                                     dtype=data.dtype),
             'B': [np.nan, 1, 2, 3]})
 
         result = pd.concat([df1, df2], axis=1)
@@ -96,8 +97,10 @@ def test_align(self, data, na_value):
         r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3]))
 
         # Assumes that the ctor can take a list of scalars of the type
-        e1 = pd.Series(data._from_sequence(list(a) + [na_value]))
-        e2 = pd.Series(data._from_sequence([na_value] + list(b)))
+        e1 = pd.Series(data._from_sequence(list(a) + [na_value],
+                                           dtype=data.dtype))
+        e2 = pd.Series(data._from_sequence([na_value] + list(b),
+                                           dtype=data.dtype))
         self.assert_series_equal(r1, e1)
         self.assert_series_equal(r2, e2)
 
@@ -109,8 +112,10 @@ def test_align_frame(self, data, na_value):
         )
 
         # Assumes that the ctor can take a list of scalars of the type
-        e1 = pd.DataFrame({'A': data._from_sequence(list(a) + [na_value])})
-        e2 = pd.DataFrame({'A': data._from_sequence([na_value] + list(b))})
+        e1 = pd.DataFrame({'A': data._from_sequence(list(a) + [na_value],
+                                                    dtype=data.dtype)})
+        e2 = pd.DataFrame({'A': data._from_sequence([na_value] + list(b),
+                                                    dtype=data.dtype)})
         self.assert_frame_equal(r1, e1)
         self.assert_frame_equal(r2, e2)
 
@@ -120,7 +125,8 @@ def test_align_series_frame(self, data, na_value):
         df = pd.DataFrame({"col": np.arange(len(ser) + 1)})
         r1, r2 = ser.align(df)
 
-        e1 = pd.Series(data._from_sequence(list(data) + [na_value]),
+        e1 = pd.Series(data._from_sequence(list(data) + [na_value],
+                                           dtype=data.dtype),
                        name=ser.name)
 
         self.assert_series_equal(r1, e1)
@@ -153,7 +159,8 @@ def test_merge(self, data, na_value):
         res = pd.merge(df1, df2)
         exp = pd.DataFrame(
             {'int1': [1, 1, 2], 'int2': [1, 2, 3], 'key': [0, 0, 1],
-             'ext': data._from_sequence([data[0], data[0], data[1]])})
+             'ext': data._from_sequence([data[0], data[0], data[1]],
+                                        dtype=data.dtype)})
         self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
 
         res = pd.merge(df1, df2, how='outer')
@@ -161,5 +168,6 @@ def test_merge(self, data, na_value):
             {'int1': [1, 1, 2, 3, np.nan], 'int2': [1, 2, 3, np.nan, 4],
              'key': [0, 0, 1, 2, 3],
              'ext': data._from_sequence(
-                 [data[0], data[0], data[1], data[2], na_value])})
+                 [data[0], data[0], data[1], data[2], na_value],
+                 dtype=data.dtype)})
         self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index 33adebbbe57800..373da1af9ef24b 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -1,6 +1,5 @@
 import decimal
 import numbers
-import random
 import sys
 
 import numpy as np
@@ -38,7 +37,7 @@ def construct_from_string(cls, string):
 class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin):
     dtype = DecimalDtype()
 
-    def __init__(self, values, copy=False):
+    def __init__(self, values, dtype=None, copy=False):
         for val in values:
             if not isinstance(val, self.dtype.type):
                 raise TypeError("All values must be of type " +
@@ -54,8 +53,8 @@ def __init__(self, values, copy=False):
         # self._values = self.values = self.data
 
     @classmethod
-    def _from_sequence(cls, scalars, copy=False):
-        return cls(scalars)
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
+        return cls(scalars, copy=copy)
 
     @classmethod
     def _from_factorized(cls, values, original):
@@ -117,7 +116,3 @@ def _concat_same_type(cls, to_concat):
 
 DecimalArray._add_arithmetic_ops()
 DecimalArray._add_comparison_ops()
-
-
-def make_data():
-    return [decimal.Decimal(random.random()) for _ in range(100)]
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 8fd3d1a57f6c8d..b851ad1d06d4d2 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -1,5 +1,6 @@
 import decimal
 
+import random
 import numpy as np
 import pandas as pd
 import pandas.util.testing as tm
@@ -7,7 +8,12 @@
 
 from pandas.tests.extension import base
 
-from .array import DecimalDtype, DecimalArray, make_data
+from .array import DecimalDtype, DecimalArray
+
+
+@pytest.fixture
+def make_data():
+    return [decimal.Decimal(random.random()) for _ in range(100)]
 
 
 @pytest.fixture
diff --git a/pandas/tests/extension/integer/__init__.py b/pandas/tests/extension/integer/__init__.py
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py
new file mode 100644
index 00000000000000..773581a826b8d8
--- /dev/null
+++ b/pandas/tests/extension/integer/test_integer.py
@@ -0,0 +1,604 @@
+import numpy as np
+import pandas as pd
+import pandas.util.testing as tm
+import pytest
+
+from pandas.tests.extension import base
+from pandas.api.types import (
+    is_integer, is_scalar, is_float, is_float_dtype)
+from pandas.core.dtypes.generic import ABCIndexClass
+
+from pandas.core.arrays import (
+    to_integer_array, IntegerArray)
+from pandas.core.arrays.integer import (
+    Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
+    UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype)
+
+
+@pytest.fixture
+def make_data():
+    return (list(range(8)) +
+            [np.nan] +
+            list(range(10, 98)) +
+            [np.nan] +
+            [99, 100])
+
+
+@pytest.fixture(params=[Int8Dtype, Int16Dtype, Int32Dtype, Int64Dtype,
+                        UInt8Dtype, UInt16Dtype, UInt32Dtype, UInt64Dtype])
+def dtype(request):
+    return request.param()
+
+
+@pytest.fixture
+def data(dtype):
+    return IntegerArray(make_data(), dtype=dtype)
+
+
+@pytest.fixture
+def data_missing(dtype):
+    return IntegerArray([np.nan, 1], dtype=dtype)
+
+
+@pytest.fixture
+def data_repeated(data):
+    def gen(count):
+        for _ in range(count):
+            yield data
+    yield gen
+
+
+@pytest.fixture
+def data_for_sorting(dtype):
+    return IntegerArray([1, 2, 0], dtype=dtype)
+
+
+@pytest.fixture
+def data_missing_for_sorting(dtype):
+    return IntegerArray([1, np.nan, 0], dtype=dtype)
+
+
+@pytest.fixture
+def na_cmp():
+    # we are np.nan
+    return lambda x, y: np.isnan(x) and np.isnan(y)
+
+
+@pytest.fixture
+def na_value():
+    return np.nan
+
+
+@pytest.fixture
+def data_for_grouping(dtype):
+    b = 1
+    a = 0
+    c = 2
+    na = np.nan
+    return IntegerArray([b, b, na, na, a, a, b, c], dtype=dtype)
+
+
+def test_dtypes(dtype):
+    # smoke tests on auto dtype construction
+
+    if dtype.is_signed_integer:
+        assert np.dtype(dtype.type).kind == 'i'
+    else:
+        assert np.dtype(dtype.type).kind == 'u'
+    assert dtype.name is not None
+
+
+class BaseInteger(object):
+
+    def assert_index_equal(self, left, right, *args, **kwargs):
+
+        left_na = left.isna()
+        right_na = right.isna()
+
+        tm.assert_numpy_array_equal(left_na, right_na)
+        return tm.assert_index_equal(left[~left_na],
+                                     right[~right_na],
+                                     *args, **kwargs)
+
+    def assert_series_equal(self, left, right, *args, **kwargs):
+
+        left_na = left.isna()
+        right_na = right.isna()
+
+        tm.assert_series_equal(left_na, right_na)
+        return tm.assert_series_equal(left[~left_na],
+                                      right[~right_na],
+                                      *args, **kwargs)
+
+    def assert_frame_equal(self, left, right, *args, **kwargs):
+        # TODO(EA): select_dtypes
+        tm.assert_index_equal(
+            left.columns, right.columns,
+            exact=kwargs.get('check_column_type', 'equiv'),
+            check_names=kwargs.get('check_names', True),
+            check_exact=kwargs.get('check_exact', False),
+            check_categorical=kwargs.get('check_categorical', True),
+            obj='{obj}.columns'.format(obj=kwargs.get('obj', 'DataFrame')))
+
+        integers = (left.dtypes == 'integer').index
+
+        for col in integers:
+            self.assert_series_equal(left[col], right[col],
+                                     *args, **kwargs)
+
+        left = left.drop(columns=integers)
+        right = right.drop(columns=integers)
+        tm.assert_frame_equal(left, right, *args, **kwargs)
+
+
+class TestDtype(BaseInteger, base.BaseDtypeTests):
+
+    @pytest.mark.skip(reason="using multiple dtypes")
+    def test_is_dtype_unboxes_dtype(self):
+        # we have multiple dtypes, so skip
+        pass
+
+    def test_array_type_with_arg(self, data, dtype):
+        assert dtype.construct_array_type() is IntegerArray
+
+
+class TestArithmeticOps(BaseInteger, base.BaseArithmeticOpsTests):
+
+    def _check_divmod_op(self, s, op, other, exc=None):
+        super(TestArithmeticOps, self)._check_divmod_op(s, op, other, None)
+
+    def _check_op(self, s, op_name, other, exc=None):
+        op = self.get_op_from_name(op_name)
+        result = op(s, other)
+
+        # compute expected
+        mask = s.isna()
+
+        # other array is an Integer
+        if isinstance(other, IntegerArray):
+            omask = getattr(other, 'mask', None)
+            mask = getattr(other, 'data', other)
+            if omask is not None:
+                mask |= omask
+
+        # float result type or float op
+        if ((is_float_dtype(other) or is_float(other) or
+             op_name in ['__rtruediv__', '__truediv__',
+                         '__rdiv__', '__div__'])):
+            rs = s.astype('float')
+            expected = op(rs, other)
+            self._check_op_float(result, expected, mask, s, op_name, other)
+
+        # integer result type
+        else:
+            rs = pd.Series(s.values.data)
+            expected = op(rs, other)
+            self._check_op_integer(result, expected, mask, s, op_name, other)
+
+    def _check_op_float(self, result, expected, mask, s, op_name, other):
+        # check comparisions that are resulting in float dtypes
+
+        expected[mask] = np.nan
+        self.assert_series_equal(result, expected)
+
+    def _check_op_integer(self, result, expected, mask, s, op_name, other):
+        # check comparisions that are resulting in integer dtypes
+
+        # to compare properly, we convert the expected
+        # to float, mask to nans and convert infs
+        # if we have uints then we process as uints
+        # then conert to float
+        # and we ultimately want to create a IntArray
+        # for comparisons
+
+        fill_value = 0
+
+        # mod/rmod turn floating 0 into NaN while
+        # integer works as expected (no nan)
+        if op_name in ['__mod__', '__rmod__']:
+            if is_scalar(other):
+                if other == 0:
+                    expected[s.values == 0] = 0
+                else:
+                    expected = expected.fillna(0)
+            else:
+                expected[(s.values == 0) &
+                         ((expected == 0) | expected.isna())] = 0
+
+        try:
+            expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
+            original = expected
+            expected = expected.astype(s.dtype)
+
+        except ValueError:
+
+            expected = expected.astype(float)
+            expected[(expected == np.inf) | (expected == -np.inf)] = fill_value
+            original = expected
+            expected = expected.astype(s.dtype)
+
+        expected[mask] = np.nan
+
+        # assert that the expected astype is ok
+        # (skip for unsigned as they have wrap around)
+        if not s.dtype.is_unsigned_integer:
+            original = pd.Series(original)
+
+            # we need to fill with 0's to emulate what an astype('int') does
+            # (truncation) for certain ops
+            if op_name in ['__rtruediv__', '__rdiv__']:
+                mask |= original.isna()
+                original = original.fillna(0).astype('int')
+
+            original = original.astype('float')
+            original[mask] = np.nan
+            self.assert_series_equal(original, expected.astype('float'))
+
+        # assert our expected result
+        self.assert_series_equal(result, expected)
+
+    def test_arith_integer_array(self, data, all_arithmetic_operators):
+        # we operate with a rhs of an integer array
+
+        op = all_arithmetic_operators
+
+        s = pd.Series(data)
+        rhs = pd.Series([1] * len(data), dtype=data.dtype)
+        rhs.iloc[-1] = np.nan
+
+        self._check_op(s, op, rhs)
+
+    def test_arith_scalar(self, data, all_arithmetic_operators):
+        # scalar
+        op = all_arithmetic_operators
+
+        s = pd.Series(data)
+        self._check_op(s, op, 1, exc=TypeError)
+
+    def test_arith_array(self, data, all_arithmetic_operators):
+        # ndarray & other series
+        op = all_arithmetic_operators
+
+        s = pd.Series(data)
+        other = np.ones(len(s), dtype=s.dtype.type)
+        self._check_op(s, op, other, exc=TypeError)
+
+    def test_arith_coerce_scalar(self, data, all_arithmetic_operators):
+
+        op = all_arithmetic_operators
+        s = pd.Series(data)
+
+        other = 0.01
+        self._check_op(s, op, other)
+
+    def test_error(self, data, all_arithmetic_operators):
+        # invalid ops
+
+        op = all_arithmetic_operators
+        s = pd.Series(data)
+        ops = getattr(s, op)
+        opa = getattr(data, op)
+
+        # invalid scalars
+        with pytest.raises(TypeError):
+            ops('foo')
+        with pytest.raises(TypeError):
+            ops(pd.Timestamp('20180101'))
+
+        # invalid array-likes
+        with pytest.raises(TypeError):
+            ops(pd.Series('foo', index=s.index))
+
+        if op != '__rpow__':
+            # TODO(extension)
+            # rpow with a datetimelike coerces the integer array incorrectly
+            with pytest.raises(TypeError):
+                ops(pd.Series(pd.date_range('20180101', periods=len(s))))
+
+        # 2d
+        with pytest.raises(TypeError):
+            opa(pd.DataFrame({'A': s}))
+        with pytest.raises(TypeError):
+            opa(np.arange(len(s)).reshape(-1, len(s)))
+
+
+class TestComparisonOps(BaseInteger, base.BaseComparisonOpsTests):
+
+    def _compare_other(self, s, data, op_name, other):
+        op = self.get_op_from_name(op_name)
+
+        # array
+        result = op(s, other)
+        expected = pd.Series(op(data.data, other))
+
+        # fill the nan locations
+        expected[data.mask] = True if op_name == '__ne__' else False
+
+        tm.assert_series_equal(result, expected)
+
+        # series
+        s = pd.Series(data)
+        result = op(s, other)
+
+        expected = pd.Series(data.data)
+        expected = op(expected, other)
+
+        # fill the nan locations
+        expected[data.mask] = True if op_name == '__ne__' else False
+
+        tm.assert_series_equal(result, expected)
+
+
+class TestInterface(BaseInteger, base.BaseInterfaceTests):
+
+    def test_repr_array(self, data):
+        result = repr(data)
+
+        # not long
+        assert '...' not in result
+
+        assert 'dtype=' in result
+        assert 'IntegerArray' in result
+
+    def test_repr_array_long(self, data):
+        # some arrays may be able to assert a ... in the repr
+        with pd.option_context('display.max_seq_items', 1):
+            result = repr(data)
+
+            assert '...' in result
+            assert 'length' in result
+
+
+class TestConstructors(BaseInteger, base.BaseConstructorsTests):
+
+    def test_from_dtype_from_float(self, data):
+        # construct from our dtype & string dtype
+        dtype = data.dtype
+
+        # from float
+        expected = pd.Series(data)
+        result = pd.Series(np.array(data).astype('float'), dtype=str(dtype))
+        self.assert_series_equal(result, expected)
+
+        # from int / list
+        expected = pd.Series(data)
+        result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
+        self.assert_series_equal(result, expected)
+
+        # from int / array
+        expected = pd.Series(data).dropna().reset_index(drop=True)
+        dropped = np.array(data.dropna()).astype(np.dtype((dtype.type)))
+        result = pd.Series(dropped, dtype=str(dtype))
+        self.assert_series_equal(result, expected)
+
+
+class TestReshaping(BaseInteger, base.BaseReshapingTests):
+
+    def test_concat_mixed_dtypes(self, data):
+        # https://github.com/pandas-dev/pandas/issues/20762
+        df1 = pd.DataFrame({'A': data[:3]})
+        df2 = pd.DataFrame({"A": [1, 2, 3]})
+        df3 = pd.DataFrame({"A": ['a', 'b', 'c']}).astype('category')
+        df4 = pd.DataFrame({"A": pd.SparseArray([1, 2, 3])})
+        dfs = [df1, df2, df3, df4]
+
+        # dataframes
+        result = pd.concat(dfs)
+        expected = pd.concat([x.astype(object) for x in dfs])
+        self.assert_frame_equal(result, expected)
+
+        # series
+        result = pd.concat([x['A'] for x in dfs])
+        expected = pd.concat([x['A'].astype(object) for x in dfs])
+        self.assert_series_equal(result, expected)
+
+        result = pd.concat([df1, df2])
+        expected = pd.concat([df1.astype('object'), df2.astype('object')])
+        self.assert_frame_equal(result, expected)
+
+        # concat of an Integer and Int coerces to object dtype
+        # TODO(jreback) once integrated this would
+        # be a result of Integer
+        result = pd.concat([df1['A'], df2['A']])
+        expected = pd.concat([df1['A'].astype('object'),
+                              df2['A'].astype('object')])
+        self.assert_series_equal(result, expected)
+
+
+class TestGetitem(BaseInteger, base.BaseGetitemTests):
+    pass
+
+
+class TestMissing(BaseInteger, base.BaseMissingTests):
+    pass
+
+
+class TestMethods(BaseInteger, base.BaseMethodsTests):
+
+    @pytest.mark.parametrize('dropna', [True, False])
+    def test_value_counts(self, all_data, dropna):
+        all_data = all_data[:10]
+        if dropna:
+            other = np.array(all_data[~all_data.isna()])
+        else:
+            other = all_data
+
+        result = pd.Series(all_data).value_counts(dropna=dropna).sort_index()
+        expected = pd.Series(other).value_counts(
+            dropna=dropna).sort_index()
+        expected.index = expected.index.astype(all_data.dtype)
+
+        self.assert_series_equal(result, expected)
+
+    def test_combine_add(self, data_repeated):
+        # GH 20825
+        orig_data1, orig_data2 = data_repeated(2)
+        s1 = pd.Series(orig_data1)
+        s2 = pd.Series(orig_data2)
+
+        # fundamentally this is not a great operation
+        # as overflow / underflow can easily happen here
+        # e.g. int8 + int8
+        def scalar_add(a, b):
+
+            # TODO; should really be a type specific NA
+            if pd.isna(a) or pd.isna(b):
+                return np.nan
+            if is_integer(a):
+                a = int(a)
+            elif is_integer(b):
+                b = int(b)
+            return a + b
+
+        result = s1.combine(s2, scalar_add)
+        expected = pd.Series(
+            orig_data1._from_sequence([scalar_add(a, b) for (a, b) in
+                                       zip(orig_data1,
+                                           orig_data2)]))
+        self.assert_series_equal(result, expected)
+
+        val = s1.iloc[0]
+        result = s1.combine(val, lambda x1, x2: x1 + x2)
+        expected = pd.Series(
+            orig_data1._from_sequence([a + val for a in list(orig_data1)]))
+        self.assert_series_equal(result, expected)
+
+
+class TestCasting(BaseInteger, base.BaseCastingTests):
+
+    @pytest.mark.parametrize('dropna', [True, False])
+    def test_construct_index(self, all_data, dropna):
+        # ensure that we do not coerce to Float64Index, rather
+        # keep as Index
+
+        all_data = all_data[:10]
+        if dropna:
+            other = np.array(all_data[~all_data.isna()])
+        else:
+            other = all_data
+
+        result = pd.Index(IntegerArray(other,
+                                       dtype=all_data.dtype))
+        expected = pd.Index(other, dtype=object)
+
+        self.assert_index_equal(result, expected)
+
+    @pytest.mark.parametrize('dropna', [True, False])
+    def test_astype_index(self, all_data, dropna):
+        # as an int/uint index to Index
+
+        all_data = all_data[:10]
+        if dropna:
+            other = all_data[~all_data.isna()]
+        else:
+            other = all_data
+
+        dtype = all_data.dtype
+        idx = pd.Index(np.array(other))
+        assert isinstance(idx, ABCIndexClass)
+
+        result = idx.astype(dtype)
+        expected = idx.astype(object).astype(dtype)
+        self.assert_index_equal(result, expected)
+
+
+class TestGroupby(BaseInteger, base.BaseGroupbyTests):
+
+    @pytest.mark.xfail(reason="groupby not working")
+    def test_groupby_extension_no_sort(self, data_for_grouping):
+        super(TestGroupby, self).test_groupby_extension_no_sort(
+            data_for_grouping)
+
+    @pytest.mark.xfail(reason="groupby not working")
+    @pytest.mark.parametrize('as_index', [True, False])
+    def test_groupby_extension_agg(self, as_index, data_for_grouping):
+        super(TestGroupby, self).test_groupby_extension_agg(
+            as_index, data_for_grouping)
+
+
+def test_frame_repr(data_missing):
+
+    df = pd.DataFrame({'A': data_missing})
+    result = repr(df)
+    expected = '     A\n0  NaN\n1    1'
+    assert result == expected
+
+
+def test_conversions(data_missing):
+
+    # astype to object series
+    df = pd.DataFrame({'A': data_missing})
+    result = df['A'].astype('object')
+    expected = pd.Series(np.array([np.nan, 1], dtype=object), name='A')
+    tm.assert_series_equal(result, expected)
+
+    # convert to object ndarray
+    # we assert that we are exactly equal
+    # including type conversions of scalars
+    result = df['A'].astype('object').values
+    expected = np.array([np.nan, 1], dtype=object)
+    tm.assert_numpy_array_equal(result, expected)
+
+    for r, e in zip(result, expected):
+        if pd.isnull(r):
+            assert pd.isnull(e)
+        elif is_integer(r):
+            # PY2 can be int or long
+            assert r == e
+            assert is_integer(e)
+        else:
+            assert r == e
+            assert type(r) == type(e)
+
+
+@pytest.mark.parametrize(
+    'values',
+    [
+        ['foo', 'bar'],
+        'foo',
+        1,
+        1.0,
+        pd.date_range('20130101', periods=2),
+        np.array(['foo'])])
+def test_to_integer_array_error(values):
+    # error in converting existing arrays to IntegerArrays
+    with pytest.raises(TypeError):
+        to_integer_array(values)
+
+
+@pytest.mark.parametrize(
+    'values, dtype',
+    [
+        (np.array([1], dtype='int64'), Int64Dtype),
+        (np.array([1, np.nan]), Int64Dtype)])
+def test_to_integer_array(values, dtype):
+    # convert existing arrays to IntegerArrays
+    result = to_integer_array(values)
+    expected = IntegerArray(values, dtype=dtype)
+    tm.assert_extension_array_equal(result, expected)
+
+
+def test_cross_type_arithmetic():
+
+    df = pd.DataFrame({'A': pd.Series([1, 2, np.nan], dtype='Int64'),
+                       'B': pd.Series([1, np.nan, 3], dtype='UInt8'),
+                       'C': [1, 2, 3]})
+
+    result = df.A + df.C
+    expected = pd.Series([2, 4, np.nan], dtype='Int64')
+    tm.assert_series_equal(result, expected)
+
+    result = (df.A + df.C) * 3 == 12
+    expected = pd.Series([False, True, False])
+    tm.assert_series_equal(result, expected)
+
+    result = df.A + df.B
+    expected = pd.Series([2, np.nan, np.nan], dtype='Int64')
+    tm.assert_series_equal(result, expected)
+
+
+# TODO(jreback) - these need testing / are broken
+
+# shift
+
+# set_index (destroys type)
diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py
index 160bf259e1e32f..34c397252a8bb1 100644
--- a/pandas/tests/extension/json/array.py
+++ b/pandas/tests/extension/json/array.py
@@ -13,8 +13,6 @@
 import collections
 import itertools
 import numbers
-import random
-import string
 import sys
 
 import numpy as np
@@ -54,7 +52,7 @@ def construct_from_string(cls, string):
 class JSONArray(ExtensionArray):
     dtype = JSONDtype()
 
-    def __init__(self, values, copy=False):
+    def __init__(self, values, dtype=None, copy=False):
         for val in values:
             if not isinstance(val, self.dtype.type):
                 raise TypeError("All values must be of type " +
@@ -69,7 +67,7 @@ def __init__(self, values, copy=False):
         # self._values = self.values = self.data
 
     @classmethod
-    def _from_sequence(cls, scalars, copy=False):
+    def _from_sequence(cls, scalars, dtype=None, copy=False):
         return cls(scalars)
 
     @classmethod
@@ -180,10 +178,3 @@ def _values_for_argsort(self):
         # cast them to an (N, P) array, instead of an (N,) array of tuples.
         frozen = [()] + list(tuple(x.items()) for x in self)
         return np.array(frozen, dtype=object)[1:]
-
-
-def make_data():
-    # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
-    return [collections.UserDict([
-        (random.choice(string.ascii_letters), random.randint(0, 100))
-        for _ in range(random.randint(0, 10))]) for _ in range(100)]
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index 7eeaf7946663ed..126607907a8dab 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -1,5 +1,7 @@
 import operator
 import collections
+import random
+import string
 
 import pytest
 
@@ -8,11 +10,19 @@
 from pandas.compat import PY2, PY36
 from pandas.tests.extension import base
 
-from .array import JSONArray, JSONDtype, make_data
+from .array import JSONArray, JSONDtype
 
 pytestmark = pytest.mark.skipif(PY2, reason="Py2 doesn't have a UserDict")
 
 
+@pytest.fixture
+def make_data():
+    # TODO: Use a regular dict. See _NDFrameIndexer._setitem_with_indexer
+    return [collections.UserDict([
+        (random.choice(string.ascii_letters), random.randint(0, 100))
+        for _ in range(random.randint(0, 10))]) for _ in range(100)]
+
+
 @pytest.fixture
 def dtype():
     return JSONDtype()
@@ -203,7 +213,8 @@ def test_combine_add(self, data_repeated):
 
 
 class TestCasting(BaseJSON, base.BaseCastingTests):
-    @pytest.mark.xfail
+
+    @pytest.mark.xfail(reason="failing on np.array(self, dtype=str)")
     def test_astype_str(self):
         """This currently fails in NumPy on np.array(self, dtype=str) with