Skip to content

Commit 8fc8c7a

Browse files
committed
[AIRFLOW-2859] Implement own UtcDateTime (apache#3708)
The different UtcDateTime implementations all have issues. Either they replace tzinfo directly without converting or they do not convert to UTC at all. We also ensure all mysql connections are in UTC in order to keep sanity, as mysql will ignore the timezone of a field when inserting/updating. (cherry picked from commit 6fd4e60) Signed-off-by: Bolke de Bruin <bolke@xs4all.nl>
1 parent f58246d commit 8fc8c7a

File tree

9 files changed

+174
-17
lines changed

9 files changed

+174
-17
lines changed

airflow/bin/cli.py

-1
Original file line numberDiff line numberDiff line change
@@ -1003,7 +1003,6 @@ def initdb(args): # noqa
10031003
print("Done.")
10041004

10051005

1006-
@cli_utils.action_logging
10071006
def resetdb(args):
10081007
print("DB: " + repr(settings.engine.url))
10091008
if args.yes or input("This will drop existing tables "

airflow/jobs.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
Column, Integer, String, func, Index, or_, and_, not_)
4141
from sqlalchemy.exc import OperationalError
4242
from sqlalchemy.orm.session import make_transient
43-
from sqlalchemy_utc import UtcDateTime
4443
from tabulate import tabulate
4544
from time import sleep
4645

@@ -52,6 +51,7 @@
5251
from airflow.task.task_runner import get_task_runner
5352
from airflow.ti_deps.dep_context import DepContext, QUEUE_DEPS, RUN_DEPS
5453
from airflow.utils import asciiart, helpers, timezone
54+
from airflow.utils.configuration import tmp_configuration_copy
5555
from airflow.utils.dag_processing import (AbstractDagFileProcessor,
5656
DagFileProcessorManager,
5757
SimpleDag,
@@ -60,9 +60,9 @@
6060
from airflow.utils.db import create_session, provide_session
6161
from airflow.utils.email import send_email
6262
from airflow.utils.log.logging_mixin import LoggingMixin, set_context, StreamLogWriter
63-
from airflow.utils.state import State
64-
from airflow.utils.configuration import tmp_configuration_copy
6563
from airflow.utils.net import get_hostname
64+
from airflow.utils.state import State
65+
from airflow.utils.sqlalchemy import UtcDateTime
6666

6767
Base = models.Base
6868
ID_LEN = models.ID_LEN

airflow/models.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@
6060
from sqlalchemy import func, or_, and_, true as sqltrue
6161
from sqlalchemy.ext.declarative import declarative_base, declared_attr
6262
from sqlalchemy.orm import reconstructor, relationship, synonym
63-
from sqlalchemy_utc import UtcDateTime
6463

6564
from croniter import croniter
6665
import six
@@ -87,6 +86,7 @@
8786
as_tuple, is_container, validate_key, pprinttable)
8887
from airflow.utils.operator_resources import Resources
8988
from airflow.utils.state import State
89+
from airflow.utils.sqlalchemy import UtcDateTime
9090
from airflow.utils.timeout import timeout
9191
from airflow.utils.trigger_rule import TriggerRule
9292
from airflow.utils.weight_rule import WeightRule

airflow/utils/sqlalchemy.py

+58-3
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,19 @@
2222
from __future__ import print_function
2323
from __future__ import unicode_literals
2424

25+
import datetime
2526
import os
27+
import pendulum
2628
import time
2729
import random
2830

2931
from sqlalchemy import event, exc, select
32+
from sqlalchemy.types import DateTime, TypeDecorator
3033

3134
from airflow.utils.log.logging_mixin import LoggingMixin
3235

3336
log = LoggingMixin().log
37+
utc = pendulum.timezone('UTC')
3438

3539

3640
def setup_event_handlers(
@@ -101,13 +105,21 @@ def ping_connection(connection, branch):
101105
def connect(dbapi_connection, connection_record):
102106
connection_record.info['pid'] = os.getpid()
103107

104-
@event.listens_for(engine, "connect")
105-
def set_sqlite_pragma(dbapi_connection, connection_record):
106-
if 'sqlite3.Connection' in str(type(dbapi_connection)):
108+
if engine.dialect.name == "sqlite":
109+
@event.listens_for(engine, "connect")
110+
def set_sqlite_pragma(dbapi_connection, connection_record):
107111
cursor = dbapi_connection.cursor()
108112
cursor.execute("PRAGMA foreign_keys=ON")
109113
cursor.close()
110114

115+
# this ensures sanity in mysql when storing datetimes (not required for postgres)
116+
if engine.dialect.name == "mysql":
117+
@event.listens_for(engine, "connect")
118+
def set_mysql_timezone(dbapi_connection, connection_record):
119+
cursor = dbapi_connection.cursor()
120+
cursor.execute("SET time_zone = '+00:00'")
121+
cursor.close()
122+
111123
@event.listens_for(engine, "checkout")
112124
def checkout(dbapi_connection, connection_record, connection_proxy):
113125
pid = os.getpid()
@@ -117,3 +129,46 @@ def checkout(dbapi_connection, connection_record, connection_proxy):
117129
"Connection record belongs to pid {}, "
118130
"attempting to check out in pid {}".format(connection_record.info['pid'], pid)
119131
)
132+
133+
134+
class UtcDateTime(TypeDecorator):
135+
"""
136+
Almost equivalent to :class:`~sqlalchemy.types.DateTime` with
137+
``timezone=True`` option, but it differs from that by:
138+
- Never silently take naive :class:`~datetime.datetime`, instead it
139+
always raise :exc:`ValueError` unless time zone aware value.
140+
- :class:`~datetime.datetime` value's :attr:`~datetime.datetime.tzinfo`
141+
is always converted to UTC.
142+
- Unlike SQLAlchemy's built-in :class:`~sqlalchemy.types.DateTime`,
143+
it never return naive :class:`~datetime.datetime`, but time zone
144+
aware value, even with SQLite or MySQL.
145+
- Always returns DateTime in UTC
146+
"""
147+
148+
impl = DateTime(timezone=True)
149+
150+
def process_bind_param(self, value, dialect):
151+
if value is not None:
152+
if not isinstance(value, datetime.datetime):
153+
raise TypeError('expected datetime.datetime, not ' +
154+
repr(value))
155+
elif value.tzinfo is None:
156+
raise ValueError('naive datetime is disallowed')
157+
158+
return value.astimezone(utc)
159+
160+
def process_result_value(self, value, dialect):
161+
"""
162+
Processes DateTimes from the DB making sure it is always
163+
returning UTC. Not using timezone.convert_to_utc as that
164+
converts to configured TIMEZONE while the DB might be
165+
running with some other setting. We assume UTC datetimes
166+
in the database.
167+
"""
168+
if value is not None:
169+
if value.tzinfo is None:
170+
value = value.replace(tzinfo=utc)
171+
else:
172+
value = value.astimezone(utc)
173+
174+
return value

run_unit_tests.sh

+3-2
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
# to you under the Apache License, Version 2.0 (the
99
# "License"); you may not use this file except in compliance
1010
# with the License. You may obtain a copy of the License at
11-
#
11+
#
1212
# http://www.apache.org/licenses/LICENSE-2.0
13-
#
13+
#
1414
# Unless required by applicable law or agreed to in writing,
1515
# software distributed under the License is distributed on an
1616
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -91,3 +91,4 @@ nosetests $nose_args
9191

9292
# To run individual tests:
9393
# nosetests tests.core:CoreTest.test_scheduler_job
94+

setup.py

-1
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,6 @@ def do_setup():
298298
'requests>=2.5.1, <3',
299299
'setproctitle>=1.1.8, <2',
300300
'sqlalchemy>=1.1.15, <1.2.0',
301-
'sqlalchemy-utc>=0.9.0',
302301
'tabulate>=0.7.5, <0.8.0',
303302
'tenacity==4.8.0',
304303
'thrift>=0.9.2',

tests/core.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@
3838
from email.mime.application import MIMEApplication
3939
from email.mime.multipart import MIMEMultipart
4040
from email.mime.text import MIMEText
41-
from freezegun import freeze_time
4241
from numpy.testing import assert_array_almost_equal
4342
from six.moves.urllib.parse import urlencode
4443
from time import sleep
@@ -70,6 +69,7 @@
7069
from airflow.configuration import AirflowConfigException, run_command
7170
from jinja2.sandbox import SecurityError
7271
from jinja2 import UndefinedError
72+
from pendulum import utcnow
7373

7474
import six
7575

@@ -261,7 +261,6 @@ def test_schedule_dag_start_end_dates(self):
261261

262262
self.assertIsNone(additional_dag_run)
263263

264-
@freeze_time('2016-01-01')
265264
def test_schedule_dag_no_end_date_up_to_today_only(self):
266265
"""
267266
Tests that a Dag created without an end_date can only be scheduled up
@@ -273,8 +272,11 @@ def test_schedule_dag_no_end_date_up_to_today_only(self):
273272
"""
274273
session = settings.Session()
275274
delta = timedelta(days=1)
276-
start_date = DEFAULT_DATE
277-
runs = 365
275+
now = utcnow()
276+
start_date = now.subtract(weeks=1)
277+
278+
runs = (now - start_date).days
279+
278280
dag = DAG(TEST_DAG_ID + 'test_schedule_dag_no_end_date_up_to_today_only',
279281
start_date=start_date,
280282
schedule_interval=delta)

tests/test_utils/fake_datetime.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
# to you under the Apache License, Version 2.0 (the
88
# "License"); you may not use this file except in compliance
99
# with the License. You may obtain a copy of the License at
10-
#
10+
#
1111
# http://www.apache.org/licenses/LICENSE-2.0
12-
#
12+
#
1313
# Unless required by applicable law or agreed to in writing,
1414
# software distributed under the License is distributed on an
1515
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY

tests/utils/test_sqlalchemy.py

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Licensed to the Apache Software Foundation (ASF) under one
4+
# or more contributor license agreements. See the NOTICE file
5+
# distributed with this work for additional information
6+
# regarding copyright ownership. The ASF licenses this file
7+
# to you under the Apache License, Version 2.0 (the
8+
# "License"); you may not use this file except in compliance
9+
# with the License. You may obtain a copy of the License at
10+
#
11+
# http://www.apache.org/licenses/LICENSE-2.0
12+
#
13+
# Unless required by applicable law or agreed to in writing,
14+
# software distributed under the License is distributed on an
15+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16+
# KIND, either express or implied. See the License for the
17+
# specific language governing permissions and limitations
18+
# under the License.
19+
#
20+
import datetime
21+
import unittest
22+
23+
from airflow import settings
24+
from airflow.models import DAG
25+
from airflow.settings import Session
26+
from airflow.utils.state import State
27+
from airflow.utils.timezone import utcnow
28+
29+
from sqlalchemy.exc import StatementError
30+
31+
32+
class TestSqlAlchemyUtils(unittest.TestCase):
33+
def setUp(self):
34+
session = Session()
35+
36+
# make sure NOT to run in UTC. Only postgres supports storing
37+
# timezone information in the datetime field
38+
if session.bind.dialect.name == "postgresql":
39+
session.execute("SET timezone='Europe/Amsterdam'")
40+
41+
self.session = session
42+
43+
def test_utc_transformations(self):
44+
"""
45+
Test whether what we are storing is what we are retrieving
46+
for datetimes
47+
"""
48+
dag_id = 'test_utc_transformations'
49+
start_date = utcnow()
50+
iso_date = start_date.isoformat()
51+
execution_date = start_date + datetime.timedelta(hours=1, days=1)
52+
53+
dag = DAG(
54+
dag_id=dag_id,
55+
start_date=start_date,
56+
)
57+
dag.clear()
58+
59+
run = dag.create_dagrun(
60+
run_id=iso_date,
61+
state=State.NONE,
62+
execution_date=execution_date,
63+
start_date=start_date,
64+
session=self.session,
65+
)
66+
67+
self.assertEqual(execution_date, run.execution_date)
68+
self.assertEqual(start_date, run.start_date)
69+
70+
self.assertEqual(execution_date.utcoffset().total_seconds(), 0.0)
71+
self.assertEqual(start_date.utcoffset().total_seconds(), 0.0)
72+
73+
self.assertEqual(iso_date, run.run_id)
74+
self.assertEqual(run.start_date.isoformat(), run.run_id)
75+
76+
dag.clear()
77+
78+
def test_process_bind_param_naive(self):
79+
"""
80+
Check if naive datetimes are prevented from saving to the db
81+
"""
82+
dag_id = 'test_process_bind_param_naive'
83+
84+
# naive
85+
start_date = datetime.datetime.now()
86+
dag = DAG(dag_id=dag_id, start_date=start_date)
87+
dag.clear()
88+
89+
with self.assertRaises((ValueError, StatementError)):
90+
dag.create_dagrun(
91+
run_id=start_date.isoformat,
92+
state=State.NONE,
93+
execution_date=start_date,
94+
start_date=start_date,
95+
session=self.session
96+
)
97+
dag.clear()
98+
99+
def tearDown(self):
100+
self.session.close()
101+
settings.engine.dispose()

0 commit comments

Comments
 (0)