Skip to content

Commit

Permalink
perf(expr): optimize casting to varchar (#7066)
Browse files Browse the repository at this point in the history
This PR optimizes the performance of casting values to varchar.

It introduced write API for `ToText`, so that strings can be directly written to array buffers without generating String.
The display function of interval and timestampz was also optimized.

<img width="581" alt="perf-cast" src="https://user-images.githubusercontent.com/15158738/209610088-859f0f77-5272-4cb8-bbe3-f743bc0cbe97.png">

<details>
<summary>Click to show full results</summary>

bench | Before time(us) | After time(us) | Change(%) | Speedup
-- | -- | -- | -- | --
cast(timestampz->varchar) | 508.640 | 121.600 | -76.1% | 3.2
cast(timestamp->varchar) | 166.200 | 58.245 | -65.0% | 1.9
cast(float64->varchar) | 78.386 | 57.597 | -26.5% | 0.4
cast(float32->varchar) | 57.903 | 37.384 | -35.4% | 0.5
cast(date->varchar) | 86.896 | 32.669 | -62.4% | 1.7
cast(time->varchar) | 47.508 | 28.428 | -40.2% | 0.7
cast(decimal->varchar) | 67.682 | 28.317 | -58.2% | 1.4
cast(int16->varchar) | 29.532 | 12.337 | -58.2% | 1.4
cast(int64->varchar) | 52.043 | 12.319 | -76.3% | 3.2
cast(int32->varchar) | 28.863 | 12.258 | -57.5% | 1.4
cast(boolean->varchar) | 26.826 | 6.396 | -76.2% | 3.2
bool_out(boolean) | 25.480 | 5.126 | -79.9% | 4.0

</details>

The `writer` argument of string functions was also changed from `StringWriter<'_>` to `&mut dyn Write`, making them decouple from array. I tried to use `&mut impl Write` but was blocked by annoying lifetime issues. Anyways, the performance of these operations is still slightly improved:

<img width="600" alt="perf-string-ops" src="https://user-images.githubusercontent.com/15158738/209610928-8036e4d1-e994-4178-8ce4-ff1340877e47.png">

<details>
<summary>Click to show full results</summary>

bench | Before time(us) | After time(us) | Change(%) | Speedup
-- | -- | -- | -- | --
rtrim(varchar,varchar) | 21.780 | 15.768 | -27.6% | 0.4
substr(varchar,int32,int32) | 11.126 | 8.090 | -27.3% | 0.4
rtrim(varchar) | 10.537 | 7.712 | -26.8% | 0.4
substr(varchar,int32) | 9.198 | 7.111 | -22.7% | 0.3
ltrim(varchar) | 9.661 | 8.010 | -17.1% | 0.2
trim(varchar) | 11.308 | 9.618 | -14.9% | 0.2
overlay(varchar,varchar,int32,int32) | 17.107 | 14.697 | -14.1% | 0.2
overlay(varchar,varchar,int32) | 13.408 | 12.007 | -10.4% | 0.1
ltrim(varchar,varchar) | 21.198 | 19.021 | -10.3% | 0.1
trim(varchar,varchar) | 20.876 | 19.205 | -8.0% | 0.1
split_part(varchar,varchar,int32) | 30.708 | 29.293 | -4.6% | 0.0
md5(varchar) | 346.010 | 331.670 | -4.1% | 0.0

</details>

Approved-By: BowenXiao1999
Approved-By: BugenZhao
  • Loading branch information
wangrunji0408 authored Dec 27, 2022
1 parent 2d74a67 commit 29270ca
Show file tree
Hide file tree
Showing 27 changed files with 423 additions and 551 deletions.
21 changes: 2 additions & 19 deletions src/common/src/array/bytes_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,18 +110,6 @@ impl Array for BytesArray {
}

impl BytesArray {
/// Retrieve the ownership of the single bytes value.
///
/// Panics if there're multiple or no values.
pub fn into_single_value(self) -> Option<Box<[u8]>> {
assert_eq!(self.len(), 1);
if !self.is_null(0) {
Some(self.data.into_boxed_slice())
} else {
None
}
}

#[cfg(test)]
pub(super) fn data(&self) -> &[u8] {
&self.data
Expand Down Expand Up @@ -257,13 +245,10 @@ pub struct BytesWriter<'a> {
builder: &'a mut BytesArrayBuilder,
}

pub struct WrittenGuard(());

impl<'a> BytesWriter<'a> {
/// `write_ref` will consume `BytesWriter` and pass the ownership of `builder` to `BytesGuard`.
pub fn write_ref(self, value: &[u8]) -> WrittenGuard {
pub fn write_ref(self, value: &[u8]) {
self.builder.append(Some(value));
WrittenGuard(())
}

/// `begin` will create a `PartialBytesWriter`, which allow multiple appendings to create a new
Expand All @@ -290,10 +275,8 @@ impl<'a> PartialBytesWriter<'a> {

/// `finish` will be called while the entire record is written.
/// Exactly one new record was appended and the `builder` can be safely used.
pub fn finish(self) -> WrittenGuard {
pub fn finish(self) {
self.builder.finish_partial();

WrittenGuard(())
}
}

Expand Down
11 changes: 6 additions & 5 deletions src/common/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ impl Debug for ListRef<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
iter_elems_ref!(self, it, {
for v in it {
v.fmt(f)?;
Debug::fmt(&v, f)?;
}
Ok(())
})
Expand All @@ -487,9 +487,10 @@ impl Debug for ListRef<'_> {
impl ToText for ListRef<'_> {
// This function will be invoked when pgwire prints a list value in string.
// Refer to PostgreSQL `array_out` or `appendPGArray`.
fn to_text(&self) -> String {
fn write<W: std::fmt::Write>(&self, f: &mut W) -> std::fmt::Result {
iter_elems_ref!(self, it, {
format!(
write!(
f,
"{{{}}}",
it.format_with(",", |datum_ref, f| {
let s = datum_ref.to_text();
Expand Down Expand Up @@ -521,9 +522,9 @@ impl ToText for ListRef<'_> {
})
}

fn to_text_with_type(&self, ty: &DataType) -> String {
fn write_with_type<W: std::fmt::Write>(&self, ty: &DataType, f: &mut W) -> std::fmt::Result {
match ty {
DataType::List { .. } => self.to_text(),
DataType::List { .. } => self.write(f),
_ => unreachable!(),
}
}
Expand Down
21 changes: 14 additions & 7 deletions src/common/src/array/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -414,18 +414,25 @@ impl Debug for StructRef<'_> {
}

impl ToText for StructRef<'_> {
fn to_text(&self) -> String {
fn write<W: std::fmt::Write>(&self, f: &mut W) -> std::fmt::Result {
iter_fields_ref!(self, it, {
format!(
"({})",
it.map(|x| x.to_text()).collect::<Vec<String>>().join(",")
)
write!(f, "(")?;
let mut is_first = true;
for x in it {
if is_first {
is_first = false;
} else {
write!(f, ",")?;
}
ToText::write(&x, f)?;
}
write!(f, ")")
})
}

fn to_text_with_type(&self, ty: &DataType) -> String {
fn write_with_type<W: std::fmt::Write>(&self, ty: &DataType, f: &mut W) -> std::fmt::Result {
match ty {
DataType::Struct(_) => self.to_text(),
DataType::Struct(_) => self.write(f),
_ => unreachable!(),
}
}
Expand Down
65 changes: 14 additions & 51 deletions src/common/src/array/utf8_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use std::fmt::{Display, Write};

use risingwave_pb::data::{Array as ProstArray, ArrayType};

use super::bytes_array::{BytesWriter, PartialBytesWriter, WrittenGuard};
use super::bytes_array::{BytesWriter, PartialBytesWriter};
use super::{Array, ArrayBuilder, ArrayMeta, BytesArray, BytesArrayBuilder};
use crate::array::ArrayBuilderImpl;
use crate::buffer::Bitmap;
Expand Down Expand Up @@ -89,16 +89,6 @@ impl<'a> FromIterator<&'a str> for Utf8Array {
}

impl Utf8Array {
/// Retrieve the ownership of the single string value.
///
/// Panics if there're multiple or no values.
#[inline]
pub fn into_single_value(self) -> Option<Box<str>> {
self.bytes
.into_single_value()
.map(|bytes| unsafe { std::str::from_boxed_utf8_unchecked(bytes) })
}

pub fn into_bytes_array(self) -> BytesArray {
self.bytes
}
Expand Down Expand Up @@ -169,23 +159,6 @@ pub struct StringWriter<'a> {
}

impl<'a> StringWriter<'a> {
/// `write_ref` will consume `StringWriter` and pass the ownership of `builder` to `BytesGuard`.
#[inline]
pub fn write_ref(self, value: &str) -> WrittenGuard {
self.bytes.write_ref(value.as_bytes())
}

/// `write_from_char_iter` will consume `StringWriter` and write the characters from the `iter`.
///
/// Prefer [`StringWriter::begin`] for writing multiple string pieces.
pub fn write_from_char_iter(self, iter: impl Iterator<Item = char>) -> WrittenGuard {
let mut writer = self.begin();
for c in iter {
writer.write_char(c).unwrap();
}
writer.finish()
}

/// `begin` will create a `PartialStringWriter`, which allow multiple appendings to create a new
/// record.
pub fn begin(self) -> PartialStringWriter<'a> {
Expand All @@ -202,24 +175,16 @@ pub struct PartialStringWriter<'a> {
}

impl<'a> PartialStringWriter<'a> {
/// `write_ref` will append partial dirty data to `builder`.
/// `PartialStringWriter::write_ref` is different from `StringWriter::write_ref`
/// in that it allows us to call it multiple times.
#[inline]
pub fn write_ref(&mut self, value: &str) {
self.bytes.write_ref(value.as_bytes());
}

/// `finish` will be called while the entire record is written.
/// Exactly one new record was appended and the `builder` can be safely used.
pub fn finish(self) -> WrittenGuard {
pub fn finish(self) {
self.bytes.finish()
}
}

impl Write for PartialStringWriter<'_> {
fn write_str(&mut self, s: &str) -> std::fmt::Result {
self.write_ref(s);
self.bytes.write_ref(s.as_bytes());
Ok(())
}
}
Expand Down Expand Up @@ -249,11 +214,11 @@ mod tests {
#[test]
fn test_utf8_partial_writer() {
let mut builder = Utf8ArrayBuilder::new(0);
let _guard: WrittenGuard = {
{
let writer = builder.writer();
let mut partial_writer = writer.begin();
for _ in 0..2 {
partial_writer.write_ref("ran");
partial_writer.write_str("ran").unwrap();
}
partial_writer.finish()
};
Expand All @@ -267,31 +232,29 @@ mod tests {
fn test_utf8_partial_writer_failed() {
let mut builder = Utf8ArrayBuilder::new(0);
// Write a record.
let _guard: WrittenGuard = {
{
let writer = builder.writer();
let mut partial_writer = writer.begin();
partial_writer.write_ref("Dia");
partial_writer.write_ref("na");
partial_writer.write_str("Dia").unwrap();
partial_writer.write_str("na").unwrap();
partial_writer.finish()
};

// Write a record failed.
let _maybe_guard: Option<WrittenGuard> = {
{
let writer = builder.writer();
let mut partial_writer = writer.begin();
partial_writer.write_ref("Ca");
partial_writer.write_ref("rol");

partial_writer.write_str("Ca").unwrap();
partial_writer.write_str("rol").unwrap();
// We don't finish here.
None
};

// Write a record.
let _guard: WrittenGuard = {
{
let writer = builder.writer();
let mut partial_writer = writer.begin();
partial_writer.write_ref("Ki");
partial_writer.write_ref("ra");
partial_writer.write_str("Ki").unwrap();
partial_writer.write_str("ra").unwrap();
partial_writer.finish()
};

Expand Down
24 changes: 12 additions & 12 deletions src/common/src/types/chrono_wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,39 +72,39 @@ impl Default for NaiveDateTimeWrapper {
}

impl ToText for NaiveDateWrapper {
fn to_text(&self) -> String {
self.0.to_string()
fn write<W: std::fmt::Write>(&self, f: &mut W) -> std::fmt::Result {
write!(f, "{}", self.0)
}

fn to_text_with_type(&self, ty: &DataType) -> String {
fn write_with_type<W: std::fmt::Write>(&self, ty: &DataType, f: &mut W) -> std::fmt::Result {
match ty {
super::DataType::Date => self.to_text(),
super::DataType::Date => self.write(f),
_ => unreachable!(),
}
}
}

impl ToText for NaiveTimeWrapper {
fn to_text(&self) -> String {
self.0.to_string()
fn write<W: std::fmt::Write>(&self, f: &mut W) -> std::fmt::Result {
write!(f, "{}", self.0)
}

fn to_text_with_type(&self, ty: &DataType) -> String {
fn write_with_type<W: std::fmt::Write>(&self, ty: &DataType, f: &mut W) -> std::fmt::Result {
match ty {
super::DataType::Time => self.to_text(),
super::DataType::Time => self.write(f),
_ => unreachable!(),
}
}
}

impl ToText for NaiveDateTimeWrapper {
fn to_text(&self) -> String {
self.0.to_string()
fn write<W: std::fmt::Write>(&self, f: &mut W) -> std::fmt::Result {
write!(f, "{}", self.0)
}

fn to_text_with_type(&self, ty: &DataType) -> String {
fn write_with_type<W: std::fmt::Write>(&self, ty: &DataType, f: &mut W) -> std::fmt::Result {
match ty {
super::DataType::Timestamp => self.to_text(),
super::DataType::Timestamp => self.write(f),
_ => unreachable!(),
}
}
Expand Down
8 changes: 4 additions & 4 deletions src/common/src/types/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ pub enum Decimal {
}

impl ToText for Decimal {
fn to_text(&self) -> String {
self.to_string()
fn write<W: std::fmt::Write>(&self, f: &mut W) -> std::fmt::Result {
write!(f, "{self}")
}

fn to_text_with_type(&self, ty: &DataType) -> String {
fn write_with_type<W: std::fmt::Write>(&self, ty: &DataType, f: &mut W) -> std::fmt::Result {
match ty {
DataType::Decimal => self.to_text(),
DataType::Decimal => self.write(f),
_ => unreachable!(),
}
}
Expand Down
Loading

0 comments on commit 29270ca

Please sign in to comment.