Skip to content

Commit df4c52b

Browse files
date_microseconds FUTURE flag (#6260)
* PoC monkeypatch precision. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add FUTURE flag. * FutureWarning. * Corrected behaviour and added tests. * Corrected behaviour and added tests. * What's New entry. * Make sensitive to cf-units version. * Further test improvements. * Clearer FutureWarning text. * Use a cf-units subclass instead. * Rename _IrisUnit to Unit. --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 01bbdf6 commit df4c52b

File tree

4 files changed

+196
-5
lines changed

4 files changed

+196
-5
lines changed

docs/src/whatsnew/latest.rst

+11-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,13 @@ This document explains the changes made to Iris for this release
3030
✨ Features
3131
===========
3232

33-
#. N/A
33+
#. `@trexfeathers`_ added a new :class:`~iris.Future` flag -
34+
``date_microseconds`` - which sets whether Iris should use the new
35+
microsecond-precision units (see :class:`cf_units.Unit`, microseconds
36+
introduced in version 3.3) when the unit
37+
is a time unit. The previous maximum precision was seconds. You should check
38+
your code for new floating point problems if activating this (e.g. when
39+
using the :class:`~iris.Constraint` API). (:pull:`6260`)
3440

3541

3642
🐛 Bugs Fixed
@@ -50,7 +56,10 @@ This document explains the changes made to Iris for this release
5056
🚀 Performance Enhancements
5157
===========================
5258

53-
#. N/A
59+
#. Note that due to the new ``date_microseconds`` :class:`~iris.Future` flag,
60+
the time coordinate categorisation speedup introduced in
61+
:doc:`/whatsnew/3.11` will only be available when
62+
``iris.FUTURE.date_microseconds == True``.
5463

5564

5665
🔥 Deprecations

lib/iris/__init__.py

+21-2
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,13 @@ def callback(cube, field, filename):
143143
class Future(threading.local):
144144
"""Run-time configuration controller."""
145145

146-
def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=False):
146+
def __init__(
147+
self,
148+
datum_support=False,
149+
pandas_ndim=False,
150+
save_split_attrs=False,
151+
date_microseconds=False,
152+
):
147153
"""Container for run-time options controls.
148154
149155
To adjust the values simply update the relevant attribute from
@@ -169,6 +175,13 @@ def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=Fals
169175
different ways : "global" ones are saved as dataset attributes, where
170176
possible, while "local" ones are saved as data-variable attributes.
171177
See :func:`iris.fileformats.netcdf.saver.save`.
178+
date_microseconds : bool, default=False
179+
Newer versions of cftime and cf-units support microsecond precision
180+
for dates, compared to the legacy behaviour that only works with
181+
seconds. Enabling microsecond precision will alter core Iris
182+
behaviour, such as when using :class:`~iris.Constraint`, and you
183+
may need to defend against floating point precision issues where
184+
you didn't need to before.
172185
173186
"""
174187
# The flag 'example_future_flag' is provided as a reference for the
@@ -181,6 +194,7 @@ def __init__(self, datum_support=False, pandas_ndim=False, save_split_attrs=Fals
181194
self.__dict__["datum_support"] = datum_support
182195
self.__dict__["pandas_ndim"] = pandas_ndim
183196
self.__dict__["save_split_attrs"] = save_split_attrs
197+
self.__dict__["date_microseconds"] = date_microseconds
184198

185199
# TODO: next major release: set IrisDeprecation to subclass
186200
# DeprecationWarning instead of UserWarning.
@@ -189,7 +203,12 @@ def __repr__(self):
189203
# msg = ('Future(example_future_flag={})')
190204
# return msg.format(self.example_future_flag)
191205
msg = "Future(datum_support={}, pandas_ndim={}, save_split_attrs={})"
192-
return msg.format(self.datum_support, self.pandas_ndim, self.save_split_attrs)
206+
return msg.format(
207+
self.datum_support,
208+
self.pandas_ndim,
209+
self.save_split_attrs,
210+
self.date_microseconds,
211+
)
193212

194213
# deprecated_options = {'example_future_flag': 'warning',}
195214
deprecated_options: dict[str, Literal["error", "warning"]] = {}

lib/iris/common/mixin.py

+66-1
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,10 @@
77
from __future__ import annotations
88

99
from collections.abc import Mapping
10+
from datetime import timedelta
1011
from functools import wraps
1112
from typing import Any
13+
import warnings
1214

1315
import cf_units
1416
import numpy as np
@@ -139,6 +141,68 @@ def update(self, other, **kwargs):
139141
dict.update(self, other, **kwargs)
140142

141143

144+
class Unit(cf_units.Unit):
145+
# TODO: remove this subclass once FUTURE.date_microseconds is removed.
146+
147+
@classmethod
148+
def from_unit(cls, unit: cf_units.Unit):
149+
"""Cast a :class:`cf_units.Unit` to an :class:`Unit`."""
150+
if isinstance(unit, Unit):
151+
result = unit
152+
elif isinstance(unit, cf_units.Unit):
153+
result = cls.__new__(cls)
154+
result.__dict__.update(unit.__dict__)
155+
else:
156+
message = f"Expected a cf_units.Unit, got {type(unit)}"
157+
raise TypeError(message)
158+
return result
159+
160+
def num2date(
161+
self,
162+
time_value,
163+
only_use_cftime_datetimes=True,
164+
only_use_python_datetimes=False,
165+
):
166+
# Used to patch the cf_units.Unit.num2date method to round to the
167+
# nearest second, which was the legacy behaviour. This is under a FUTURE
168+
# flag - users will need to adapt to microsecond precision eventually,
169+
# which may involve floating point issues.
170+
from iris import FUTURE
171+
172+
def _round(date):
173+
if date.microsecond == 0:
174+
return date
175+
elif date.microsecond < 500000:
176+
return date - timedelta(microseconds=date.microsecond)
177+
else:
178+
return (
179+
date
180+
+ timedelta(seconds=1)
181+
- timedelta(microseconds=date.microsecond)
182+
)
183+
184+
result = super().num2date(
185+
time_value, only_use_cftime_datetimes, only_use_python_datetimes
186+
)
187+
if FUTURE.date_microseconds is False:
188+
message = (
189+
"You are using legacy date precision for Iris units - max "
190+
"precision is seconds. In future, Iris will use microsecond "
191+
"precision - available since cf-units version 3.3 - which may "
192+
"affect core behaviour. To opt-in to the "
193+
"new behaviour, set `iris.FUTURE.date_microseconds = True`."
194+
)
195+
warnings.warn(message, category=FutureWarning)
196+
197+
if hasattr(result, "shape"):
198+
vfunc = np.vectorize(_round)
199+
result = vfunc(result)
200+
else:
201+
result = _round(result)
202+
203+
return result
204+
205+
142206
class CFVariableMixin:
143207
_metadata_manager: Any
144208

@@ -207,7 +271,8 @@ def units(self) -> cf_units.Unit:
207271

208272
@units.setter
209273
def units(self, unit: cf_units.Unit | str | None) -> None:
210-
self._metadata_manager.units = cf_units.as_unit(unit)
274+
unit = cf_units.as_unit(unit)
275+
self._metadata_manager.units = Unit.from_unit(unit)
211276

212277
@property
213278
def attributes(self) -> LimitedAttributeDict:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Copyright Iris contributors
2+
#
3+
# This file is part of Iris and is released under the BSD license.
4+
# See LICENSE in the root of the repository for full licensing details.
5+
"""Unit tests for the opt-in FUTURE.date_microseconds behaviour."""
6+
7+
import warnings
8+
9+
import cf_units
10+
import numpy as np
11+
from packaging.version import Version
12+
import pytest
13+
14+
from iris import FUTURE
15+
from iris.coords import DimCoord
16+
from iris.tests._shared_utils import assert_array_equal
17+
18+
cf_units_legacy = Version(cf_units.__version__) < Version("3.3.0")
19+
20+
21+
@pytest.fixture(
22+
params=[0, 1000, 500000],
23+
ids=["no_microseconds", "1_millisecond", "half_second"],
24+
)
25+
def time_coord(request) -> tuple[bool, DimCoord]:
26+
points = np.array([0.0, 1.0, 2.0])
27+
points += request.param / 1e6
28+
return request.param, DimCoord(
29+
points,
30+
"time",
31+
units="seconds since 1970-01-01 00:00:00",
32+
)
33+
34+
35+
@pytest.fixture(
36+
params=[False, True],
37+
ids=["without_future", "with_future"],
38+
)
39+
def future_date_microseconds(request):
40+
FUTURE.date_microseconds = request.param
41+
yield request.param
42+
FUTURE.date_microseconds = False
43+
44+
45+
def test_warning(time_coord, future_date_microseconds):
46+
# Warning should be raised whether the coordinate has microseconds or not.
47+
# Want users to be aware, and opt-in, as early as possible.
48+
n_microseconds, coord = time_coord
49+
50+
def _op():
51+
_ = coord.units.num2date(coord.points)
52+
53+
if future_date_microseconds:
54+
with warnings.catch_warnings():
55+
warnings.simplefilter("error", FutureWarning)
56+
_op()
57+
else:
58+
with pytest.warns(FutureWarning):
59+
_op()
60+
61+
62+
@pytest.mark.parametrize(
63+
"indexing",
64+
(np.s_[0], np.s_[:], np.s_[:, np.newaxis]),
65+
ids=("single", "array", "array_2d"),
66+
)
67+
def test_num2date(time_coord, future_date_microseconds, indexing):
68+
n_microseconds, coord = time_coord
69+
result = coord.units.num2date(coord.points[indexing])
70+
71+
if indexing == np.s_[0]:
72+
assert hasattr(result, "microsecond")
73+
# Convert to iterable for more consistency downstream.
74+
result = [result]
75+
else:
76+
assert hasattr(result, "shape")
77+
assert hasattr(result.flatten()[0], "microsecond")
78+
result = result.flatten()
79+
80+
expected_microseconds = n_microseconds
81+
if not future_date_microseconds or cf_units_legacy:
82+
expected_microseconds = 0
83+
84+
result_microseconds = np.array([r.microsecond for r in result])
85+
assert_array_equal(result_microseconds, expected_microseconds)
86+
87+
88+
def test_roundup(time_coord, future_date_microseconds):
89+
n_microseconds, coord = time_coord
90+
result = coord.units.num2date(coord.points)
91+
92+
expected_seconds = np.floor(coord.points)
93+
if n_microseconds >= 500000 and (not future_date_microseconds or cf_units_legacy):
94+
# Legacy cf-units versions round microseconds and ignore the future flag.
95+
expected_seconds += 1
96+
97+
result_seconds = np.array([r.second for r in result])
98+
assert_array_equal(result_seconds, expected_seconds)

0 commit comments

Comments
 (0)