Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: warning when query's timestamp is exceeding table's ttl #1054

Merged
merged 12 commits into from
Jul 11, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion analytic_engine/src/compaction/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use std::{collections::HashMap, fmt, str::FromStr, sync::Arc};

use common_types::COMPACTION_STRATEGY;
use common_util::config::{ReadableSize, TimeUnit};
use serde::{Deserialize, Serialize};
use snafu::{ensure, Backtrace, GenerateBacktrace, ResultExt, Snafu};
Expand All @@ -13,7 +14,6 @@ use crate::{
compaction::picker::{CommonCompactionPicker, CompactionPickerRef},
sst::file::{FileHandle, Level},
table::data::TableDataRef,
table_options::COMPACTION_STRATEGY,
};

mod metrics;
Expand Down
39 changes: 13 additions & 26 deletions analytic_engine/src/table_options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,24 @@
use std::{collections::HashMap, string::ToString, time::Duration};

use ceresdbproto::manifest as manifest_pb;
use common_types::time::Timestamp;
use common_types::{
time::Timestamp, ARENA_BLOCK_SIZE, COMPACTION_STRATEGY, COMPRESSION, ENABLE_TTL,
NUM_ROWS_PER_ROW_GROUP, OPTION_KEY_ENABLE_TTL, SEGMENT_DURATION, STORAGE_FORMAT, TTL,
UPDATE_MODE, WRITE_BUFFER_SIZE,
};
use common_util::{
config::{ReadableDuration, ReadableSize, TimeUnit},
define_result,
time::DurationExt,
time::{parse_duration, DurationExt},
};
use datafusion::parquet::basic::Compression as ParquetCompression;
use serde::{Deserialize, Serialize};
use snafu::{Backtrace, GenerateBacktrace, OptionExt, ResultExt, Snafu};
use table_engine::OPTION_KEY_ENABLE_TTL;

use crate::compaction::{
self, CompactionStrategy, SizeTieredCompactionOptions, TimeWindowCompactionOptions,
};

pub const SEGMENT_DURATION: &str = "segment_duration";
pub const ENABLE_TTL: &str = OPTION_KEY_ENABLE_TTL;
pub const TTL: &str = "ttl";
pub const ARENA_BLOCK_SIZE: &str = "arena_block_size";
pub const WRITE_BUFFER_SIZE: &str = "write_buffer_size";
pub const COMPACTION_STRATEGY: &str = "compaction_strategy";
pub const NUM_ROWS_PER_ROW_GROUP: &str = "num_rows_per_row_group";
pub const UPDATE_MODE: &str = "update_mode";
pub const COMPRESSION: &str = "compression";
pub const STORAGE_FORMAT: &str = "storage_format";

const UPDATE_MODE_OVERWRITE: &str = "OVERWRITE";
const UPDATE_MODE_APPEND: &str = "APPEND";
const COMPRESSION_UNCOMPRESSED: &str = "UNCOMPRESSED";
Expand Down Expand Up @@ -64,8 +56,11 @@ const MAX_NUM_ROWS_PER_ROW_GROUP: usize = 10_000_000;
#[derive(Debug, Snafu)]
#[allow(clippy::enum_variant_names)]
pub enum Error {
#[snafu(display("Failed to parse duration, err:{}.\nBacktrace:\n{}", err, backtrace))]
ParseDuration { err: String, backtrace: Backtrace },
#[snafu(display("Failed to parse duration, err:{}.\nBacktrace:\n{}", source, backtrace))]
ParseDuration {
source: common_util::time::Error,
backtrace: Backtrace,
},

#[snafu(display("Failed to parse size, err:{}.\nBacktrace:\n{}", err, backtrace))]
ParseSize { err: String, backtrace: Backtrace },
Expand Down Expand Up @@ -707,15 +702,15 @@ fn merge_table_options(
let mut table_opts = table_old_opts.clone();
if is_create {
if let Some(v) = options.get(SEGMENT_DURATION) {
table_opts.segment_duration = Some(parse_duration(v)?);
table_opts.segment_duration = Some(parse_duration(v).context(ParseDuration)?);
}
if let Some(v) = options.get(UPDATE_MODE) {
table_opts.update_mode = UpdateMode::parse_from(v)?;
}
}

if let Some(v) = options.get(TTL) {
table_opts.ttl = parse_duration(v)?;
table_opts.ttl = parse_duration(v).context(ParseDuration)?;
}
if let Some(v) = options.get(OPTION_KEY_ENABLE_TTL) {
table_opts.enable_ttl = v.parse::<bool>().context(ParseBool)?;
Expand Down Expand Up @@ -744,14 +739,6 @@ fn merge_table_options(
Ok(table_opts)
}

fn parse_duration(v: &str) -> Result<ReadableDuration> {
v.parse::<ReadableDuration>()
.map_err(|err| Error::ParseDuration {
err,
backtrace: Backtrace::generate(),
})
}

fn parse_size(v: &str) -> Result<ReadableSize> {
v.parse::<ReadableSize>().map_err(|err| Error::ParseSize {
err,
Expand Down
4 changes: 2 additions & 2 deletions analytic_engine/src/tests/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ impl Builder {

pub fn enable_ttl(mut self, enable_ttl: bool) -> Self {
self.create_request.options.insert(
table_engine::OPTION_KEY_ENABLE_TTL.to_string(),
common_types::OPTION_KEY_ENABLE_TTL.to_string(),
enable_ttl.to_string(),
);
self
Expand All @@ -286,7 +286,7 @@ impl Builder {
pub fn ttl(mut self, duration: ReadableDuration) -> Self {
self.create_request
.options
.insert(table_options::TTL.to_string(), duration.to_string());
.insert(common_types::TTL.to_string(), duration.to_string());
self
}

Expand Down
12 changes: 12 additions & 0 deletions common_types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,17 @@ pub const MAX_SEQUENCE_NUMBER: u64 = u64::MAX;
/// sequence number should starts from 1.
pub const MIN_SEQUENCE_NUMBER: u64 = 0;

/// Enable ttl key
pub const OPTION_KEY_ENABLE_TTL: &str = "enable_ttl";
pub const SEGMENT_DURATION: &str = "segment_duration";
pub const ENABLE_TTL: &str = OPTION_KEY_ENABLE_TTL;
pub const TTL: &str = "ttl";
pub const ARENA_BLOCK_SIZE: &str = "arena_block_size";
pub const WRITE_BUFFER_SIZE: &str = "write_buffer_size";
pub const COMPACTION_STRATEGY: &str = "compaction_strategy";
pub const NUM_ROWS_PER_ROW_GROUP: &str = "num_rows_per_row_group";
pub const UPDATE_MODE: &str = "update_mode";
pub const COMPRESSION: &str = "compression";
pub const STORAGE_FORMAT: &str = "storage_format";
#[cfg(any(test, feature = "test"))]
pub mod tests;
24 changes: 24 additions & 0 deletions common_util/src/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,17 @@ use std::{

use chrono::{DateTime, Utc};
use common_types::time::Timestamp;
use snafu::{Backtrace, GenerateBacktrace, Snafu};

use crate::config::ReadableDuration;

#[derive(Debug, Snafu)]
#[allow(clippy::enum_variant_names)]
pub enum Error {
#[snafu(display("Failed to parse duration, err:{}.\nBacktrace:\n{}", err, backtrace))]
ParseDuration { err: String, backtrace: Backtrace },
}
define_result!(Error);

pub trait DurationExt {
/// Convert into u64.
Expand Down Expand Up @@ -56,6 +67,11 @@ pub fn current_time_millis() -> u64 {
Utc::now().timestamp_millis() as u64
}

#[inline]
pub fn current_time_secs() -> u64 {
current_time_millis() / 1_000_u64
}

#[inline]
pub fn current_as_rfc3339() -> String {
Utc::now().to_rfc3339()
Expand All @@ -78,6 +94,14 @@ pub fn try_to_millis(ts: i64) -> Option<Timestamp> {
None
}

pub fn parse_duration(v: &str) -> Result<ReadableDuration> {
v.parse::<ReadableDuration>()
.map_err(|err| Error::ParseDuration {
err,
backtrace: Backtrace::generate(),
})
}

#[cfg(test)]
mod tests {
use std::thread;
Expand Down
1 change: 1 addition & 0 deletions proxy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ df_operator = { workspace = true }
futures = { workspace = true }
http = "0.2"
influxdb-line-protocol = "1.0"
influxql-query = { workspace = true }
interpreters = { workspace = true }
itertools = { workspace = true }
lazy_static = { workspace = true }
Expand Down
5 changes: 5 additions & 0 deletions proxy/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,17 @@ pub enum Error {
msg: String,
source: GenericError,
},

#[snafu(display("sql query error, message:{}", msg))]
SqlQueryOverTTL { code: StatusCode, msg: String },
}

impl Error {
pub fn code(&self) -> StatusCode {
match *self {
Error::ErrNoCause { code, .. } => code,
Error::ErrWithCause { code, .. } => code,
Error::SqlQueryOverTTL { code, .. } => code,
Error::Internal { .. } | Error::InternalNoCause { .. } => {
StatusCode::INTERNAL_SERVER_ERROR
}
Expand All @@ -50,6 +54,7 @@ impl Error {
let first_line = error_util::remove_backtrace_from_err(&err_string);
format!("{msg}. Caused by: {first_line}")
}
Error::SqlQueryOverTTL { code: _, msg } => msg.clone(),
}
}
}
Expand Down
105 changes: 97 additions & 8 deletions proxy/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#![feature(trait_alias)]

pub mod context;
mod error;
pub mod error;
mod error_util;
pub mod forward;
mod grpc;
Expand All @@ -31,16 +31,25 @@ use std::{
};

use ::http::StatusCode;
use catalog::schema::{
CreateOptions, CreateTableRequest, DropOptions, DropTableRequest, NameRef, SchemaRef,
use catalog::{
schema::{
CreateOptions, CreateTableRequest, DropOptions, DropTableRequest, NameRef, SchemaRef,
},
CatalogRef,
};
use ceresdbproto::storage::{
storage_service_client::StorageServiceClient, PrometheusRemoteQueryRequest,
PrometheusRemoteQueryResponse, Route, RouteRequest,
};
use common_types::{request_id::RequestId, table::DEFAULT_SHARD_ID};
use common_util::{error::BoxError, runtime::Runtime};
use common_types::{request_id::RequestId, table::DEFAULT_SHARD_ID, ENABLE_TTL, TTL};
use common_util::{
error::BoxError,
runtime::Runtime,
time::{current_time_secs, parse_duration},
};
use datafusion::prelude::{Column, Expr};
use futures::FutureExt;
use influxql_query::logical_optimizer::range_predicate::find_time_range;
use interpreters::{
context::Context as InterpreterContext,
factory::Factory,
Expand All @@ -54,7 +63,7 @@ use snafu::{OptionExt, ResultExt};
use table_engine::{
engine::{EngineRuntimes, TableState},
remote::model::{GetTableInfoRequest, TableIdentifier},
table::TableId,
table::{TableId, TableRef},
PARTITION_TABLE_ENGINE_TYPE,
};
use tonic::{transport::Channel, IntoRequest};
Expand All @@ -67,6 +76,9 @@ use crate::{
schema_config_provider::SchemaConfigProviderRef,
};

// Because the clock may have errors, choose 1 hour as the error buffer
const BUFFER_DURATION: Duration = Duration::new(60 * 60, 0);

pub struct Proxy<Q> {
router: Arc<dyn Router + Send + Sync>,
forwarder: ForwarderRef,
Expand Down Expand Up @@ -165,12 +177,69 @@ impl<Q: QueryExecutor + 'static> Proxy<Q> {
})
}

async fn maybe_open_partition_table_if_not_exist(
// TODO(tanruixiang): Add integration testing when supported by the testing
// framework
fn valid_ttl_range(
&self,
plan: &Plan,
catalog_name: &str,
schema_name: &str,
table_name: &str,
) -> Result<()> {
) -> Result<(bool, i64)> {
if let Plan::Query(query) = &plan {
let catalog = self.get_catalog(catalog_name)?;

let schema = self.get_schema(&catalog, schema_name)?;

let tableref = match self.get_table(&schema, table_name) {
Ok(Some(tableref)) => tableref,
_ => return Ok((true, 0)),
};
if let Some(value) = tableref.options().get(ENABLE_TTL) {
if value == "false" {
return Ok((true, 0));
}
}

let ttl_duration = match tableref.options().get(TTL) {
Some(value) => parse_duration(value),
None => return Ok((false, 0)),
};
if ttl_duration.is_err() {
return Err(Error::ErrNoCause {
code: StatusCode::OK,
msg: "error parse".to_string(),
});
}
let ttl_duration = ttl_duration.unwrap();

// TODO(tanruixiang): use sql's timestamp as nowtime(need sql support)
let nowtime = current_time_secs() as i64;

let ddl = nowtime - ttl_duration.as_secs() as i64 - BUFFER_DURATION.as_secs() as i64;

let timestamp_name = &tableref
.schema()
.column(tableref.schema().timestamp_index())
.name
.clone();
let ts_col = Column::from_name(timestamp_name);
let range = find_time_range(&query.df_plan, &ts_col).unwrap();
match range.end {
std::ops::Bound::Included(x) | std::ops::Bound::Excluded(x) => {
if let Expr::Literal(datafusion::scalar::ScalarValue::Int64(Some(x))) = x {
if x <= ddl {
return Ok((false, ddl));
}
}
}
std::ops::Bound::Unbounded => (),
}
}
Ok((true, 0))
}

fn get_catalog(&self, catalog_name: &str) -> Result<CatalogRef> {
let catalog = self
.instance
.catalog_manager
Expand All @@ -184,7 +253,10 @@ impl<Q: QueryExecutor + 'static> Proxy<Q> {
code: StatusCode::BAD_REQUEST,
msg: format!("Catalog not found, catalog_name:{catalog_name}"),
})?;
Ok(catalog)
}

fn get_schema(&self, catalog: &CatalogRef, schema_name: &str) -> Result<SchemaRef> {
// TODO: support create schema if not exist
let schema = catalog
.schema_by_name(schema_name)
Expand All @@ -197,14 +269,31 @@ impl<Q: QueryExecutor + 'static> Proxy<Q> {
code: StatusCode::BAD_REQUEST,
msg: format!("Schema not found, schema_name:{schema_name}"),
})?;
Ok(schema)
}

fn get_table(&self, schema: &SchemaRef, table_name: &str) -> Result<Option<TableRef>> {
let table = schema
.table_by_name(table_name)
.box_err()
.with_context(|| ErrWithCause {
code: StatusCode::INTERNAL_SERVER_ERROR,
msg: format!("Failed to find table, table_name:{table_name}"),
})?;
Ok(table)
}

async fn maybe_open_partition_table_if_not_exist(
&self,
catalog_name: &str,
schema_name: &str,
table_name: &str,
) -> Result<()> {
let catalog = self.get_catalog(catalog_name)?;

let schema = self.get_schema(&catalog, schema_name)?;

let table = self.get_table(&schema, table_name)?;

let table_info_in_meta = self
.router
Expand Down
Loading