Skip to content

Commit d14b6ca

Browse files
committed
reset
1 parent 1491f71 commit d14b6ca

File tree

3 files changed

+79
-4
lines changed

3 files changed

+79
-4
lines changed

Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

query_engine/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ workspace = true
1414
# In alphabetical order
1515
arrow = { workspace = true }
1616
async-trait = { workspace = true }
17+
chrono = { workspace = true }
1718
common_types = { workspace = true }
1819
common_util = { workspace = true }
1920
datafusion = { workspace = true }

query_engine/src/logical_optimizer/type_conversion.rs

+77-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
use std::{mem, sync::Arc};
44

5-
use arrow::{compute, compute::kernels::cast_utils::string_to_timestamp_nanos};
5+
use arrow::{compute, compute::kernels::cast_utils::string_to_timestamp_nanos, error::ArrowError};
6+
use chrono::{prelude::*, LocalResult};
67
use datafusion::{
78
arrow::datatypes::DataType,
89
common::DFSchemaRef,
@@ -281,16 +282,59 @@ impl<'a> ExprRewriter for TypeRewriter<'a> {
281282
}
282283

283284
fn string_to_timestamp_ms(string: &str) -> Result<ScalarValue> {
285+
// TODO(lee): remove following codes after PR(https://github.com/apache/arrow-rs/pull/3787) merged
286+
// Because function `string_to_timestamp_nanos` returns a NaiveDateTime's
287+
// nanoseconds from a string without a specify time zone, We need to convert
288+
// it to local timestamp.
289+
290+
// without a timezone specifier as a local time, using 'T' as a separator
291+
// Example: 2020-09-08T13:42:29.190855
292+
if let Ok(ts) = NaiveDateTime::parse_from_str(string, "%Y-%m-%dT%H:%M:%S%.f") {
293+
let mills = naive_datetime_to_timestamp(string, ts).map_err(DataFusionError::from)?;
294+
return Ok(ScalarValue::TimestampMillisecond(Some(mills), None));
295+
}
296+
297+
// without a timezone specifier as a local time, using ' ' as a separator
298+
// Example: 2020-09-08 13:42:29.190855
299+
if let Ok(ts) = NaiveDateTime::parse_from_str(string, "%Y-%m-%d %H:%M:%S%.f") {
300+
let mills = naive_datetime_to_timestamp(string, ts).map_err(DataFusionError::from)?;
301+
return Ok(ScalarValue::TimestampMillisecond(Some(mills), None));
302+
}
303+
304+
let result = string_to_timestamp_nanos(string);
284305
Ok(ScalarValue::TimestampMillisecond(
285306
Some(
286-
string_to_timestamp_nanos(string)
307+
result
287308
.map(|t| t / 1_000_000)
288309
.map_err(DataFusionError::from)?,
289310
),
290311
None,
291312
))
292313
}
293314

315+
/// Converts the naive datetime (which has no specific timezone) to a
316+
/// nanosecond epoch timestamp relative to UTC.
317+
fn naive_datetime_to_timestamp(s: &str, datetime: NaiveDateTime) -> Result<i64, ArrowError> {
318+
let l = Local {};
319+
320+
match l.from_local_datetime(&datetime) {
321+
LocalResult::None => Err(ArrowError::CastError(format!(
322+
"Error parsing '{s}' as timestamp: local time representation is invalid"
323+
))),
324+
LocalResult::Single(local_datetime) => {
325+
Ok(local_datetime.with_timezone(&Utc).timestamp_nanos() / 1_000_000)
326+
}
327+
// Ambiguous times can happen if the timestamp is exactly when
328+
// a daylight savings time transition occurs, for example, and
329+
// so the datetime could validly be said to be in two
330+
// potential offsets. However, since we are about to convert
331+
// to UTC anyways, we can pick one arbitrarily
332+
LocalResult::Ambiguous(local_datetime, _) => {
333+
Ok(local_datetime.with_timezone(&Utc).timestamp_nanos() / 1_000_000)
334+
}
335+
}
336+
}
337+
294338
enum TimestampType {
295339
Second,
296340
#[allow(dead_code)]
@@ -321,6 +365,7 @@ mod tests {
321365
};
322366

323367
use super::*;
368+
use crate::logical_optimizer::type_conversion;
324369

325370
fn expr_test_schema() -> DFSchemaRef {
326371
Arc::new(
@@ -445,7 +490,7 @@ mod tests {
445490

446491
#[test]
447492
fn test_type_conversion_timestamp() {
448-
let date_string = "2021-09-07 16:00:00".to_string();
493+
let date_string = "2021-09-07T16:00:00Z".to_string();
449494
let schema = expr_test_schema();
450495
let mut rewriter = TypeRewriter {
451496
schemas: vec![&schema],
@@ -498,7 +543,7 @@ mod tests {
498543
);
499544

500545
// Timestamp c6 between "2021-09-07 16:00:00" and "2021-09-07 17:00:00"
501-
let date_string2 = "2021-09-07 17:00:00".to_string();
546+
let date_string2 = "2021-09-07T17:00:00Z".to_string();
502547
let exp = Expr::Between(Between {
503548
expr: Box::new(col("c6")),
504549
negated: false,
@@ -530,4 +575,32 @@ mod tests {
530575
})
531576
);
532577
}
578+
579+
#[test]
580+
fn test_string_to_timestamp_ms() {
581+
let date_string = [
582+
"2021-09-07T16:00:00+08:00",
583+
"2021-09-07 16:00:00+08:00",
584+
"2021-09-07T16:00:00Z",
585+
"2021-09-07 16:00:00Z",
586+
];
587+
let expects: [i64; 4] = [1631001600000, 1631001600000, 1631030400000, 1631030400000];
588+
for (index, &string) in date_string.iter().enumerate() {
589+
let result = type_conversion::string_to_timestamp_ms(string);
590+
if let Ok(ScalarValue::TimestampMillisecond(Some(mills), _)) = result {
591+
let expect = *expects.get(index).unwrap();
592+
assert_eq!(mills, expect)
593+
}
594+
}
595+
596+
let date_string = "2021-09-07 16:00:00".to_string();
597+
let d = NaiveDate::from_ymd_opt(2021, 9, 7).unwrap();
598+
let t = NaiveTime::from_hms_milli_opt(16, 0, 0, 0).unwrap();
599+
let dt = NaiveDateTime::new(d, t);
600+
let expect = naive_datetime_to_timestamp(&date_string, dt).unwrap();
601+
let result = type_conversion::string_to_timestamp_ms(&date_string);
602+
if let Ok(ScalarValue::TimestampMillisecond(Some(mills), _)) = result {
603+
assert_eq!(mills, expect)
604+
}
605+
}
533606
}

0 commit comments

Comments
 (0)