Skip to content

Commit c2e8323

Browse files
fsaintjacquesnealrichardson
authored andcommitted
ARROW-6214: [R] Add R sanitizer docker image
The PR creates a docker image to replicate R sanitizer errors. One can iterate with arrow local sources without installing CRAN arrow package. A real undefined behaviour was fixed in `array_to_vector.cpp`. The other errors were false positives (to best of my knowledge). Closes apache#5408 from fsaintjacques/ARROW-6214-r-sanitizer and squashes the following commits: e85c7e3 <François Saint-Jacques> Add docker-r-sanitizer to nightly tasks 7209ca9 <François Saint-Jacques> Fix UBSAN pointer casting error 6b425b4 <François Saint-Jacques> ARROW-6214: Add sanitizer docker image Authored-by: François Saint-Jacques <fsaintjacques@gmail.com> Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
1 parent 19d1d0a commit c2e8323

11 files changed

+195
-18
lines changed

.hadolint.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,4 @@ ignored:
2121
- DL3018
2222
- DL3015 # Avoid additional packages by specifying `--no-install-recommends`
2323
- DL3028 # Ruby gem version pinning
24+
- DL3007 # r-sanitizer must use latest

Makefile.docker

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
# To run the test suite
2121
# $ make -f Makefile.docker cpp
2222

23-
LANGUAGES = cpp cpp-alpine cpp-cmake32 c_glib go java js python python-alpine rust r
23+
LANGUAGES = cpp cpp-alpine cpp-cmake32 c_glib go java js python python-alpine rust r r-sanitizer
2424
MISC = lint iwyu clang-format docs pandas-master
2525
TESTS = dask hdfs-integration spark-integration python-nopandas
2626

ci/docker_build_r.sh

+9-3
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,19 @@ set -e
2020

2121
export ARROW_HOME=$CONDA_PREFIX
2222

23+
: ${R_BIN:=R}
24+
source /arrow/ci/docker_install_r_deps.sh
25+
2326
# Build arrow
2427
pushd /arrow/r
2528

26-
R CMD build --keep-empty-dirs .
27-
R CMD INSTALL $(ls | grep arrow_*.tar.gz)
29+
install_deps
30+
31+
make clean
32+
${R_BIN} CMD build --keep-empty-dirs .
33+
${R_BIN} CMD INSTALL $(ls | grep arrow_*.tar.gz)
2834

2935
export _R_CHECK_FORCE_SUGGESTS_=false
30-
R CMD check $(ls | grep arrow_*.tar.gz) --as-cran --no-manual
36+
${R_BIN} CMD check $(ls | grep arrow_*.tar.gz) --as-cran --no-manual
3137

3238
popd

ci/docker_build_r_sanitizer.sh

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#!/usr/bin/env bash
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing,
13+
# software distributed under the License is distributed on an
14+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
# KIND, either express or implied. See the License for the
16+
# specific language governing permissions and limitations
17+
# under the License.
18+
19+
set -e
20+
21+
: ${R_BIN:=RDsan}
22+
source /arrow/ci/docker_install_r_deps.sh
23+
24+
# Build arrow
25+
pushd /arrow/r
26+
27+
install_deps
28+
29+
make clean
30+
${R_BIN} CMD INSTALL --no-byte-compile .
31+
32+
pushd tests
33+
34+
export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp"
35+
${R_BIN} < testthat.R
36+
37+
popd
38+
39+
popd

ci/docker_install_r_deps.sh

+5-9
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,8 @@
1616
# specific language governing permissions and limitations
1717
# under the License.
1818

19-
set -e
20-
21-
pushd /arrow/r
22-
23-
Rscript -e "install.packages(c('remotes', 'dplyr', 'glue'))"
24-
Rscript -e "remotes::install_deps(dependencies = TRUE)"
25-
Rscript -e "remotes::install_github('romainfrancois/decor')"
26-
27-
popd
19+
install_deps() {
20+
${R_BIN} -e "install.packages(c('remotes', 'dplyr', 'glue'))"
21+
${R_BIN} -e "remotes::install_deps(dependencies = TRUE)"
22+
${R_BIN} -e "remotes::install_github('romainfrancois/decor')"
23+
}

dev/tasks/tasks.yml

+11
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ groups:
7373
docker:
7474
- docker-r
7575
- docker-r-conda
76+
- docker-r-sanitizer
7677
- docker-rust
7778
- docker-cpp
7879
- docker-cpp-alpine
@@ -164,6 +165,7 @@ groups:
164165
- gandiva-jar-osx
165166
- docker-r
166167
- docker-r-conda
168+
- docker-r-sanitizer
167169
- docker-rust
168170
- docker-cpp
169171
- docker-cpp-cmake32
@@ -1121,6 +1123,15 @@ tasks:
11211123
- docker-compose build r-conda
11221124
- docker-compose run r-conda
11231125

1126+
docker-r-sanitizer:
1127+
ci: circle
1128+
platform: linux
1129+
template: docker-tests/circle.linux.yml
1130+
params:
1131+
commands:
1132+
- docker-compose build r-sanitizer
1133+
- docker-compose run r-sanitizer
1134+
11241135
docker-rust:
11251136
ci: circle
11261137
platform: linux

docker-compose.yml

+13
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,19 @@ services:
400400
dockerfile: r/Dockerfile
401401
volumes: *ubuntu-volumes
402402

403+
r-sanitizer:
404+
# Usage:
405+
# docker-compose build r-sanitizer
406+
# docker-compose run r-sanitizer
407+
image: arrow:r-sanitizer
408+
cap_add:
409+
# LeakSanitizer and gdb requires ptrace(2)
410+
- SYS_PTRACE
411+
build:
412+
context: .
413+
dockerfile: r/Dockerfile.sanitizer
414+
volumes: *ubuntu-volumes
415+
403416
r-conda:
404417
# Usage:
405418
# export R_VERSION=3.5.1

r/Dockerfile.sanitizer

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
FROM wch1/r-debug:latest
19+
20+
# Installs C++ toolchain and dependencies
21+
RUN apt-get update -y -q && \
22+
apt-get install -y -q --no-install-recommends \
23+
autoconf \
24+
bison \
25+
ca-certificates \
26+
ccache \
27+
cmake \
28+
flex \
29+
g++ \
30+
gcc \
31+
git \
32+
libbenchmark-dev \
33+
libboost-filesystem-dev \
34+
libboost-regex-dev \
35+
libboost-system-dev \
36+
libbrotli-dev \
37+
libbz2-dev \
38+
libdouble-conversion-dev \
39+
libgflags-dev \
40+
libgoogle-glog-dev \
41+
liblz4-dev \
42+
liblzma-dev \
43+
libre2-dev \
44+
libsnappy-dev \
45+
libssl-dev \
46+
libzstd-dev \
47+
ninja-build \
48+
pkg-config \
49+
rapidjson-dev \
50+
thrift-compiler \
51+
tzdata && \
52+
apt-get clean && rm -rf /var/lib/apt/lists*
53+
54+
# The following dependencies will be downloaded due to missing/invalid packages
55+
# provided by the distribution:
56+
# - libc-ares-dev does not install CMake config files
57+
# - flatbuffer is not packaged
58+
# - libgtest-dev only provide sources
59+
# - libprotobuf-dev only provide sources
60+
# - thrift is too old
61+
ENV CMAKE_ARGS="-DThrift_SOURCE=BUNDLED \
62+
-DFlatbuffers_SOURCE=BUNDLED \
63+
-DGTest_SOURCE=BUNDLED \
64+
-Dc-ares_SOURCE=BUNDLED \
65+
-DgRPC_SOURCE=BUNDLED \
66+
-DProtobuf_SOURCE=BUNDLED ${CMAKE_ARGS}"
67+
68+
# Prioritize system packages and local installation
69+
ENV ARROW_DEPENDENCY_SOURCE=SYSTEM \
70+
ARROW_FLIGHT=OFF \
71+
ARROW_GANDIVA=OFF \
72+
ARROW_HDFS=OFF \
73+
ARROW_ORC=OFF \
74+
ARROW_PARQUET=ON \
75+
ARROW_PLASMA=OFF \
76+
ARROW_USE_ASAN=OFF \
77+
ARROW_USE_UBSAN=OFF \
78+
ARROW_NO_DEPRECATED_API=ON \
79+
ARROW_INSTALL_NAME_RPATH=OFF \
80+
ARROW_WITH_BZ2=OFF \
81+
ARROW_WITH_ZSTD=OFF \
82+
ARROW_R_DEV=TRUE
83+
84+
# Ensure parallel R package installation
85+
RUN printf "options(Ncpus = parallel::detectCores(), repos = 'https://demo.rstudiopm.com/all/__linux__/bionic/latest')\n" >> /etc/R/Rprofile.site
86+
# Also ensure parallel compilation of each individual package
87+
RUN printf "MAKEFLAGS=-j8\n" >> /usr/lib/R/etc/Makeconf
88+
89+
CMD ["/bin/bash", "-c", "/arrow/ci/docker_build_cpp.sh && \
90+
/arrow/ci/docker_build_r_sanitizer.sh"]

r/src/Makevars.in

+4-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@
1616
# under the License.
1717

1818
PKG_CPPFLAGS=@cflags@
19-
PKG_CXXFLAGS=$(CXX_VISIBILITY)
19+
# `-fvisibility=hidden` does not play well with UBSAN:
20+
# https://bugs.llvm.org/show_bug.cgi?id=39191
21+
# https://www.mail-archive.com/gcc-bugs@gcc.gnu.org/msg534862.html
22+
# PKG_CXXFLAGS=$(CXX_VISIBILITY)
2023
CXX_STD=CXX11
2124
PKG_LIBS=@libs@

r/src/array_to_vector.cpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ class Converter_Promotion : public Converter {
457457
}
458458
};
459459

460-
template <typename value_type>
460+
template <typename value_type, typename unit_type = TimeType>
461461
class Converter_Time : public Converter {
462462
public:
463463
explicit Converter_Time(const ArrayVector& arrays) : Converter(arrays) {}
@@ -485,7 +485,7 @@ class Converter_Time : public Converter {
485485

486486
private:
487487
int TimeUnit_multiplier(const std::shared_ptr<Array>& array) const {
488-
switch (static_cast<TimeType*>(array->type().get())->unit()) {
488+
switch (static_cast<unit_type*>(array->type().get())->unit()) {
489489
case TimeUnit::SECOND:
490490
return 1;
491491
case TimeUnit::MILLI:
@@ -501,10 +501,10 @@ class Converter_Time : public Converter {
501501
};
502502

503503
template <typename value_type>
504-
class Converter_Timestamp : public Converter_Time<value_type> {
504+
class Converter_Timestamp : public Converter_Time<value_type, TimestampType> {
505505
public:
506506
explicit Converter_Timestamp(const ArrayVector& arrays)
507-
: Converter_Time<value_type>(arrays) {}
507+
: Converter_Time<value_type, TimestampType>(arrays) {}
508508

509509
SEXP Allocate(R_xlen_t n) const {
510510
Rcpp::NumericVector data(no_init(n));

r/tools/ubsan.supp

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
vptr:include/c++/8/bits/shared_ptr_base.h

0 commit comments

Comments
 (0)