From 1db5be5ab6b315a8420377e8f196c22cd413d4f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 24 Feb 2025 20:27:23 +0100 Subject: [PATCH 01/22] GH-25025: [C++] Move non core compute kernels into separate shared library --- cpp/src/arrow/ArrowComputeConfig.cmake.in | 38 +++++++++ cpp/src/arrow/CMakeLists.txt | 62 +++++++++------ cpp/src/arrow/acero/CMakeLists.txt | 4 +- cpp/src/arrow/acero/aggregate_node_test.cc | 6 ++ cpp/src/arrow/acero/asof_join_node_test.cc | 6 ++ cpp/src/arrow/acero/hash_aggregate_test.cc | 6 ++ cpp/src/arrow/acero/hash_join_node_test.cc | 6 ++ cpp/src/arrow/acero/order_by_node_test.cc | 6 ++ cpp/src/arrow/acero/plan_test.cc | 6 ++ cpp/src/arrow/arrow-compute.pc.in | 27 +++++++ cpp/src/arrow/compute/CMakeLists.txt | 7 ++ cpp/src/arrow/compute/expression_test.cc | 5 ++ .../arrow/compute/kernels/aggregate_test.cc | 5 ++ cpp/src/arrow/compute/kernels/registry.cc | 79 +++++++++++++++++++ cpp/src/arrow/compute/kernels/registry.h | 41 ++++++++++ .../compute/kernels/scalar_arithmetic_test.cc | 5 ++ .../compute/kernels/scalar_boolean_test.cc | 4 + .../compute/kernels/scalar_if_else_test.cc | 4 + .../compute/kernels/scalar_random_test.cc | 4 + .../compute/kernels/scalar_temporal_test.cc | 4 + .../compute/kernels/test_util_internal.h | 10 +++ .../kernels/vector_cumulative_ops_test.cc | 4 + .../compute/kernels/vector_selection_test.cc | 4 + .../arrow/compute/kernels/vector_sort_test.cc | 4 + .../compute/kernels/vector_swizzle_test.cc | 4 + cpp/src/arrow/compute/registry.cc | 43 +--------- cpp/src/arrow/dataset/dataset_test.cc | 8 ++ cpp/src/arrow/dataset/discovery_test.cc | 7 ++ cpp/src/arrow/dataset/file_csv_test.cc | 6 ++ cpp/src/arrow/dataset/file_ipc_test.cc | 6 ++ cpp/src/arrow/dataset/file_json_test.cc | 7 ++ cpp/src/arrow/dataset/file_orc_test.cc | 6 ++ .../dataset/file_parquet_encryption_test.cc | 8 ++ cpp/src/arrow/dataset/file_parquet_test.cc | 6 ++ cpp/src/arrow/dataset/file_test.cc | 6 ++ cpp/src/arrow/dataset/partition_test.cc | 6 ++ cpp/src/arrow/dataset/scanner_test.cc | 6 ++ .../arrow/engine/substrait/function_test.cc | 6 ++ cpp/src/arrow/flight/sql/acero_test.cc | 6 ++ 39 files changed, 412 insertions(+), 66 deletions(-) create mode 100644 cpp/src/arrow/ArrowComputeConfig.cmake.in create mode 100644 cpp/src/arrow/arrow-compute.pc.in create mode 100644 cpp/src/arrow/compute/kernels/registry.cc create mode 100644 cpp/src/arrow/compute/kernels/registry.h diff --git a/cpp/src/arrow/ArrowComputeConfig.cmake.in b/cpp/src/arrow/ArrowComputeConfig.cmake.in new file mode 100644 index 0000000000000..63b6e11723127 --- /dev/null +++ b/cpp/src/arrow/ArrowComputeConfig.cmake.in @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# This config sets the following variables in your project:: +# +# ArrowCompute_FOUND - true if Arrow Compute found on the system +# +# This config sets the following targets in your project:: +# +# ArrowCompute::arrow_compute_shared - for linked as shared library if shared library is built +# ArrowCompute::arrow_compute_static - for linked as static library if static library is built + +@PACKAGE_INIT@ + +include(CMakeFindDependencyMacro) +find_dependency(Arrow) + +include("${CMAKE_CURRENT_LIST_DIR}/ArrowComputeTargets.cmake") + +arrow_keep_backward_compatibility(ArrowCompute arrow_compute) + +check_required_components(ArrowCompute) + +arrow_show_details(ArrowCompute ARROW_COMPUTE) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index b9b8785cbc80a..010ba6c8752be 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -742,6 +742,7 @@ set(ARROW_COMPUTE_SRCS compute/kernels/scalar_cast_temporal.cc compute/kernels/util_internal.cc compute/kernels/vector_hash.cc + compute/kernels/vector_run_end_encode.cc compute/kernels/vector_selection.cc compute/kernels/vector_selection_filter_internal.cc compute/kernels/vector_selection_internal.cc @@ -750,13 +751,14 @@ set(ARROW_COMPUTE_SRCS if(ARROW_COMPUTE) # Include the remaining kernels list(APPEND - ARROW_COMPUTE_SRCS + ARROW_COMPUTE_LIB_SRCS compute/kernels/aggregate_basic.cc compute/kernels/aggregate_mode.cc compute/kernels/aggregate_quantile.cc compute/kernels/aggregate_tdigest.cc compute/kernels/aggregate_var_std.cc compute/kernels/hash_aggregate.cc + compute/kernels/registry.cc compute/kernels/scalar_arithmetic.cc compute/kernels/scalar_boolean.cc compute/kernels/scalar_compare.cc @@ -776,7 +778,6 @@ if(ARROW_COMPUTE) compute/kernels/vector_pairwise.cc compute/kernels/vector_rank.cc compute/kernels/vector_replace.cc - compute/kernels/vector_run_end_encode.cc compute/kernels/vector_select_k.cc compute/kernels/vector_sort.cc compute/kernels/vector_swizzle.cc @@ -791,40 +792,53 @@ if(ARROW_COMPUTE) compute/util.cc compute/util_internal.cc) - append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic_avx2.cc) - append_runtime_avx512_src(ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic_avx512.cc) - append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc) - append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc) - append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/compare_internal_avx2.cc) - append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc) - append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc) + append_runtime_avx2_src(ARROW_COMPUTE_LIB_SRCS compute/kernels/aggregate_basic_avx2.cc) + append_runtime_avx512_src(ARROW_COMPUTE_LIB_SRCS + compute/kernels/aggregate_basic_avx512.cc) + append_runtime_avx2_src(ARROW_COMPUTE_LIB_SRCS compute/key_hash_internal_avx2.cc) + append_runtime_avx2_bmi2_src(ARROW_COMPUTE_LIB_SRCS compute/key_map_internal_avx2.cc) + append_runtime_avx2_src(ARROW_COMPUTE_LIB_SRCS compute/row/compare_internal_avx2.cc) + append_runtime_avx2_src(ARROW_COMPUTE_LIB_SRCS compute/row/encode_internal_avx2.cc) + append_runtime_avx2_bmi2_src(ARROW_COMPUTE_LIB_SRCS compute/util_avx2.cc) + set(ARROW_COMPUTE_LINK_LIBS Boost::headers ${ARROW_XSIMD} ${ARROW_OPENTELEMETRY_LIBS}) + if(ARROW_WITH_RE2) + list(APPEND ARROW_COMPUTE_LINK_LIBS re2::re2) + endif() + if(ARROW_WITH_UTF8PROC) + list(APPEND ARROW_COMPUTE_LINK_LIBS utf8proc::utf8proc) + endif() + add_arrow_lib(arrow_compute + CMAKE_PACKAGE_NAME + ArrowCompute + PKG_CONFIG_NAME + arrow-compute + SHARED_PRIVATE_LINK_LIBS + ${ARROW_COMPUTE_LINK_LIBS} # TODO: This has to be added conditionally if ARROW_USE_XSIMD + ${ARROW_OPENTELEMETRY_LIBS} + OUTPUTS + ARROW_COMPUTE_LIBRARIES + SOURCES + ${ARROW_COMPUTE_LIB_SRCS} + SHARED_LINK_FLAGS + ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt + ) endif() arrow_add_object_library(ARROW_COMPUTE ${ARROW_COMPUTE_SRCS}) if(ARROW_USE_BOOST) - foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) - target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE Boost::headers) - endforeach() + target_link_libraries(arrow_compute PRIVATE Boost::headers) endif() if(ARROW_USE_XSIMD) - foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) - target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE ${ARROW_XSIMD}) - endforeach() + target_link_libraries(arrow_compute PRIVATE ${ARROW_XSIMD}) endif() if(ARROW_WITH_OPENTELEMETRY) - foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) - target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE ${ARROW_OPENTELEMETRY_LIBS}) - endforeach() + target_link_libraries(arrow_compute PRIVATE ${ARROW_OPENTELEMETRY_LIBS}) endif() if(ARROW_WITH_RE2) - foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) - target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE re2::re2) - endforeach() + target_link_libraries(arrow_compute PRIVATE re2::re2) endif() if(ARROW_WITH_UTF8PROC) - foreach(ARROW_COMPUTE_TARGET ${ARROW_COMPUTE_TARGETS}) - target_link_libraries(${ARROW_COMPUTE_TARGET} PRIVATE utf8proc::utf8proc) - endforeach() + target_link_libraries(arrow_compute PRIVATE utf8proc::utf8proc) endif() if(ARROW_FILESYSTEM) diff --git a/cpp/src/arrow/acero/CMakeLists.txt b/cpp/src/arrow/acero/CMakeLists.txt index e6aa0560dfa80..5c1613d233a21 100644 --- a/cpp/src/arrow/acero/CMakeLists.txt +++ b/cpp/src/arrow/acero/CMakeLists.txt @@ -66,8 +66,8 @@ endif() list(APPEND ARROW_ACERO_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static) list(APPEND ARROW_ACERO_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared) -list(APPEND ARROW_ACERO_STATIC_LINK_LIBS arrow_static) -list(APPEND ARROW_ACERO_SHARED_LINK_LIBS arrow_shared) +list(APPEND ARROW_ACERO_STATIC_LINK_LIBS arrow_static arrow_compute_static) +list(APPEND ARROW_ACERO_SHARED_LINK_LIBS arrow_shared arrow_compute_shared) add_arrow_lib(arrow_acero CMAKE_PACKAGE_NAME diff --git a/cpp/src/arrow/acero/aggregate_node_test.cc b/cpp/src/arrow/acero/aggregate_node_test.cc index f980496d527d1..9083c40767122 100644 --- a/cpp/src/arrow/acero/aggregate_node_test.cc +++ b/cpp/src/arrow/acero/aggregate_node_test.cc @@ -24,6 +24,7 @@ #include "arrow/acero/test_util_internal.h" #include "arrow/compute/api_aggregate.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/compute/test_util_internal.h" #include "arrow/result.h" #include "arrow/table.h" @@ -33,8 +34,13 @@ namespace arrow { +using compute::ComputeKernelEnvironment; using compute::ExecBatchFromJSON; +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace acero { Result> TableGroupBy( diff --git a/cpp/src/arrow/acero/asof_join_node_test.cc b/cpp/src/arrow/acero/asof_join_node_test.cc index c726ac7c821a7..7e38d61f3dccc 100644 --- a/cpp/src/arrow/acero/asof_join_node_test.cc +++ b/cpp/src/arrow/acero/asof_join_node_test.cc @@ -42,6 +42,7 @@ #include "arrow/api.h" #include "arrow/compute/api_scalar.h" #include "arrow/compute/cast.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/compute/row/row_encoder_internal.h" #include "arrow/compute/test_util_internal.h" #include "arrow/testing/gtest_util.h" @@ -67,11 +68,16 @@ using testing::UnorderedElementsAreArray; namespace arrow { using compute::Cast; +using compute::ComputeKernelEnvironment; using compute::Divide; using compute::ExecBatchFromJSON; using compute::Multiply; using compute::Subtract; +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace acero { bool is_temporal_primitive(Type::type type_id) { diff --git a/cpp/src/arrow/acero/hash_aggregate_test.cc b/cpp/src/arrow/acero/hash_aggregate_test.cc index 7f4b6dd75272f..fc4f4a973ee4f 100644 --- a/cpp/src/arrow/acero/hash_aggregate_test.cc +++ b/cpp/src/arrow/acero/hash_aggregate_test.cc @@ -40,6 +40,7 @@ #include "arrow/compute/exec_internal.h" #include "arrow/compute/kernels/aggregate_internal.h" #include "arrow/compute/kernels/codegen_internal.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/compute/registry.h" #include "arrow/compute/row/grouper.h" #include "arrow/table.h" @@ -71,6 +72,7 @@ using internal::ToChars; using compute::ArgShape; using compute::CallFunction; +using compute::ComputeKernelEnvironment; using compute::CountOptions; using compute::default_exec_context; using compute::ExecBatchFromJSON; @@ -88,6 +90,10 @@ using compute::TDigestOptions; using compute::ValidateOutput; using compute::VarianceOptions; +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace acero { TEST(AggregateSchema, NoKeys) { diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc index 654fd59c45d5a..cc8659c4f241e 100644 --- a/cpp/src/arrow/acero/hash_join_node_test.cc +++ b/cpp/src/arrow/acero/hash_join_node_test.cc @@ -26,6 +26,7 @@ #include "arrow/acero/test_util_internal.h" #include "arrow/acero/util.h" #include "arrow/api.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/compute/light_array_internal.h" #include "arrow/compute/row/row_encoder_internal.h" #include "arrow/compute/test_util_internal.h" @@ -47,6 +48,7 @@ using arrow::random::kSeedMax; using arrow::random::RandomArrayGenerator; using compute::and_; using compute::call; +using compute::ComputeKernelEnvironment; using compute::default_exec_context; using compute::ExecBatchBuilder; using compute::ExecBatchFromJSON; @@ -57,6 +59,10 @@ using compute::SortKey; using compute::Take; using compute::internal::RowEncoder; +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace acero { BatchesWithSchema GenerateBatchesFromString( diff --git a/cpp/src/arrow/acero/order_by_node_test.cc b/cpp/src/arrow/acero/order_by_node_test.cc index 37e6862ed0f52..cc824a2f16aa9 100644 --- a/cpp/src/arrow/acero/order_by_node_test.cc +++ b/cpp/src/arrow/acero/order_by_node_test.cc @@ -22,6 +22,7 @@ #include "arrow/acero/exec_plan.h" #include "arrow/acero/options.h" #include "arrow/acero/test_nodes.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/table.h" #include "arrow/testing/generator.h" #include "arrow/testing/gtest_util.h" @@ -32,9 +33,14 @@ namespace arrow { using internal::checked_pointer_cast; +using compute::ComputeKernelEnvironment; using compute::SortKey; using compute::SortOrder; +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace acero { // Sorting is slow, don't use too many rows diff --git a/cpp/src/arrow/acero/plan_test.cc b/cpp/src/arrow/acero/plan_test.cc index 61ab09f6674d9..830e5fb780c28 100644 --- a/cpp/src/arrow/acero/plan_test.cc +++ b/cpp/src/arrow/acero/plan_test.cc @@ -27,6 +27,7 @@ #include "arrow/acero/util.h" #include "arrow/compute/exec.h" #include "arrow/compute/expression.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/compute/test_util_internal.h" #include "arrow/io/util_internal.h" #include "arrow/record_batch.h" @@ -54,6 +55,7 @@ namespace arrow { using compute::ArgShape; using compute::call; +using compute::ComputeKernelEnvironment; using compute::CountOptions; using compute::ExecBatchFromJSON; using compute::field_ref; @@ -63,6 +65,10 @@ using compute::SortOrder; using compute::Take; using compute::TDigestOptions; +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace acero { TEST(ExecPlanConstruction, Empty) { diff --git a/cpp/src/arrow/arrow-compute.pc.in b/cpp/src/arrow/arrow-compute.pc.in new file mode 100644 index 0000000000000..8344b3e9a240b --- /dev/null +++ b/cpp/src/arrow/arrow-compute.pc.in @@ -0,0 +1,27 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +prefix=@CMAKE_INSTALL_PREFIX@ +includedir=@ARROW_PKG_CONFIG_INCLUDEDIR@ +libdir=@ARROW_PKG_CONFIG_LIBDIR@ + +Name: Apache Arrow Compute Kernels +Description: Apache Arrow's Compute Kernels. +Version: @ARROW_VERSION@ +Requires: arrow +Libs: -L${libdir} -larrow_compute +Cflags.private: -DARROW_COMPUTE_STATIC diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt index 6deb2cbad8cb3..685e096e2bc34 100644 --- a/cpp/src/arrow/compute/CMakeLists.txt +++ b/cpp/src/arrow/compute/CMakeLists.txt @@ -27,6 +27,11 @@ endif() # # Unit tests # +if(ARROW_TEST_LINKAGE STREQUAL "static") + set(ARROW_COMPUTE_TEST_LINK_LIBS arrow_compute_static ${ARROW_TEST_STATIC_LINK_LIBS}) +else() + set(ARROW_COMPUTE_TEST_LINK_LIBS arrow_compute_shared ${ARROW_TEST_SHARED_LINK_LIBS}) +endif() # Define arrow_compute_testing object library for common test files if(ARROW_TESTING) @@ -86,6 +91,8 @@ function(ADD_ARROW_COMPUTE_TEST REL_TEST_NAME) ${PREFIX} LABELS ${LABELS} + STATIC_LINK_LIBS + ${ARROW_COMPUTE_TEST_LINK_LIBS} ${ARG_UNPARSED_ARGUMENTS}) endfunction() diff --git a/cpp/src/arrow/compute/expression_test.cc b/cpp/src/arrow/compute/expression_test.cc index 0b7e8a9c23b13..641d1732b1286 100644 --- a/cpp/src/arrow/compute/expression_test.cc +++ b/cpp/src/arrow/compute/expression_test.cc @@ -30,6 +30,7 @@ #include "arrow/array/builder_primitive.h" #include "arrow/compute/expression_internal.h" #include "arrow/compute/function_internal.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/compute/registry.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" @@ -47,6 +48,10 @@ using internal::checked_pointer_cast; namespace compute { +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + const std::shared_ptr kBoringSchema = schema({ field("bool", boolean()), field("i8", int8()), diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc index e6ad915fd5667..fb47fb9aff21d 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_test.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc @@ -32,6 +32,7 @@ #include "arrow/compute/api_vector.h" #include "arrow/compute/cast.h" #include "arrow/compute/kernels/aggregate_internal.h" +#include "arrow/compute/kernels/registry.h" #include "arrow/compute/kernels/test_util_internal.h" #include "arrow/compute/registry.h" #include "arrow/type.h" @@ -55,6 +56,10 @@ namespace compute { using internal::FindAccumulatorType; +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + // // Sum // diff --git a/cpp/src/arrow/compute/kernels/registry.cc b/cpp/src/arrow/compute/kernels/registry.cc new file mode 100644 index 0000000000000..aeeac092a64be --- /dev/null +++ b/cpp/src/arrow/compute/kernels/registry.cc @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include "arrow/compute/registry.h" +#include "arrow/compute/kernels/registry.h" + +#include +#include +#include +#include +#include + +#include "arrow/compute/function.h" +#include "arrow/compute/function_internal.h" +#include "arrow/compute/registry_internal.h" +#include "arrow/status.h" +#include "arrow/util/config.h" // For ARROW_COMPUTE +#include "arrow/util/logging.h" + +namespace arrow { +namespace compute { +namespace internal { + +Status RegisterComputeKernels() { + auto registry = GetFunctionRegistry(); + + // Register additional kernels on libarrow_compute + // Scalar functions + RegisterScalarArithmetic(registry); + RegisterScalarBoolean(registry); + RegisterScalarComparison(registry); + RegisterScalarIfElse(registry); + RegisterScalarNested(registry); + RegisterScalarRandom(registry); // Nullary + RegisterScalarRoundArithmetic(registry); + RegisterScalarSetLookup(registry); + RegisterScalarStringAscii(registry); + RegisterScalarStringUtf8(registry); + RegisterScalarTemporalBinary(registry); + RegisterScalarTemporalUnary(registry); + RegisterScalarValidity(registry); + + // Vector functions + RegisterVectorArraySort(registry); + RegisterVectorCumulativeSum(registry); + RegisterVectorNested(registry); + RegisterVectorRank(registry); + RegisterVectorReplace(registry); + RegisterVectorSelectK(registry); + RegisterVectorSort(registry); + RegisterVectorPairwise(registry); + RegisterVectorSwizzle(registry); + + // Aggregate functions + RegisterHashAggregateBasic(registry); + RegisterScalarAggregateBasic(registry); + RegisterScalarAggregateMode(registry); + RegisterScalarAggregateQuantile(registry); + RegisterScalarAggregateTDigest(registry); + RegisterScalarAggregateVariance(registry); + + return Status::OK(); +} +} // namespace internal +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/registry.h b/cpp/src/arrow/compute/kernels/registry.h new file mode 100644 index 0000000000000..ba099317a3ac1 --- /dev/null +++ b/cpp/src/arrow/compute/kernels/registry.h @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include "arrow/compute/registry.h" + +#include +#include +#include +#include +#include + +#include "arrow/compute/function.h" +#include "arrow/compute/function_internal.h" +#include "arrow/compute/registry_internal.h" +#include "arrow/status.h" +#include "arrow/util/config.h" // For ARROW_COMPUTE +#include "arrow/util/logging.h" +// TODO: Review includes + +namespace arrow { +namespace compute { +namespace internal { +// This must be public, not internal +Status RegisterComputeKernels(); + +} // namespace internal +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc index 1162dad855da8..bb126d1cff7b2 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic_test.cc @@ -44,6 +44,11 @@ namespace arrow { namespace compute { + +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace { // 2.718281828459045090795598298427648842334747314453125 diff --git a/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc b/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc index a8d7cab5f5ad9..4808c2a2a57d2 100644 --- a/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_boolean_test.cc @@ -34,6 +34,10 @@ using internal::checked_pointer_cast; namespace compute { +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + void CheckBooleanScalarArrayBinary(std::string func_name, Datum array) { for (std::shared_ptr scalar : {std::make_shared(), std::make_shared(true), diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index 76ad19f3c4833..9580176afed7e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -34,6 +34,10 @@ using internal::checked_pointer_cast; namespace compute { +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + // Helper that combines a dictionary and the value type so it can // later be used with DictArrayFromJSON struct JsonDict { diff --git a/cpp/src/arrow/compute/kernels/scalar_random_test.cc b/cpp/src/arrow/compute/kernels/scalar_random_test.cc index ff90d0c332ab4..afb6514b87465 100644 --- a/cpp/src/arrow/compute/kernels/scalar_random_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_random_test.cc @@ -28,6 +28,10 @@ using internal::ThreadPool; namespace compute { +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace { void TestRandomWithOptions(int64_t length, const RandomOptions& random_options) { diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc index 6f92036f55b44..a0fa89dcb21b4 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_test.cc @@ -38,6 +38,10 @@ using internal::StringFormatter; namespace compute { +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + class ScalarTemporalTest : public ::testing::Test { public: const char* date32s = diff --git a/cpp/src/arrow/compute/kernels/test_util_internal.h b/cpp/src/arrow/compute/kernels/test_util_internal.h index e3a27ab9addbc..a4f2cb439fa0d 100644 --- a/cpp/src/arrow/compute/kernels/test_util_internal.h +++ b/cpp/src/arrow/compute/kernels/test_util_internal.h @@ -29,6 +29,7 @@ #include "arrow/compute/api_scalar.h" #include "arrow/compute/cast.h" #include "arrow/compute/kernel.h" +#include "arrow/compute/kernels/registry.h" #include "arrow/compute/test_util_internal.h" #include "arrow/datum.h" #include "arrow/memory_pool.h" @@ -50,6 +51,15 @@ namespace compute { using DatumVector = std::vector; +class ComputeKernelEnvironment : public ::testing::Environment { + public: + // This must be done before using the compute kernels in order to + // register them to the FunctionRegistry. + ComputeKernelEnvironment() : ::testing::Environment() {} + + void SetUp() override { ASSERT_OK(arrow::compute::internal::RegisterComputeKernels()); } +}; + template std::shared_ptr _MakeArray(const std::shared_ptr& type, const std::vector& values, diff --git a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc index 53c28032b8261..eb435662d5f4e 100644 --- a/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_cumulative_ops_test.cc @@ -37,6 +37,10 @@ namespace arrow { namespace compute { +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + static const std::vector kCumulativeFunctionNames{ "cumulative_sum", "cumulative_sum_checked", "cumulative_prod", "cumulative_prod_checked", "cumulative_min", "cumulative_max", diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc index 5fa2d6824dc17..d6e7f4d79c834 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc @@ -45,6 +45,10 @@ using std::string_view; namespace compute { +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace { template diff --git a/cpp/src/arrow/compute/kernels/vector_sort_test.cc b/cpp/src/arrow/compute/kernels/vector_sort_test.cc index 2b592cd1a9260..9a44911699e3d 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort_test.cc @@ -45,6 +45,10 @@ using internal::checked_pointer_cast; namespace compute { +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + #ifdef ARROW_VALGRIND using RealArrowTypes = ::testing::Types; diff --git a/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc b/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc index 0879955ec49ae..fef488c828e6d 100644 --- a/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_swizzle_test.cc @@ -28,6 +28,10 @@ namespace arrow::compute { +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace { using SmallSignedIntegerTypes = ::testing::Types; diff --git a/cpp/src/arrow/compute/registry.cc b/cpp/src/arrow/compute/registry.cc index ef9f3c7e1fbf5..147b7336d96c8 100644 --- a/cpp/src/arrow/compute/registry.cc +++ b/cpp/src/arrow/compute/registry.cc @@ -288,50 +288,13 @@ static std::unique_ptr CreateBuiltInRegistry() { RegisterVectorHash(registry.get()); RegisterVectorSelection(registry.get()); + RegisterVectorRunEndEncode(registry.get()); + RegisterVectorRunEndDecode(registry.get()); + RegisterScalarOptions(registry.get()); RegisterVectorOptions(registry.get()); RegisterAggregateOptions(registry.get()); -#ifdef ARROW_COMPUTE - // Register additional kernels - - // Scalar functions - RegisterScalarArithmetic(registry.get()); - RegisterScalarBoolean(registry.get()); - RegisterScalarComparison(registry.get()); - RegisterScalarIfElse(registry.get()); - RegisterScalarNested(registry.get()); - RegisterScalarRandom(registry.get()); // Nullary - RegisterScalarRoundArithmetic(registry.get()); - RegisterScalarSetLookup(registry.get()); - RegisterScalarStringAscii(registry.get()); - RegisterScalarStringUtf8(registry.get()); - RegisterScalarTemporalBinary(registry.get()); - RegisterScalarTemporalUnary(registry.get()); - RegisterScalarValidity(registry.get()); - - // Vector functions - RegisterVectorArraySort(registry.get()); - RegisterVectorCumulativeSum(registry.get()); - RegisterVectorNested(registry.get()); - RegisterVectorRank(registry.get()); - RegisterVectorReplace(registry.get()); - RegisterVectorSelectK(registry.get()); - RegisterVectorSort(registry.get()); - RegisterVectorRunEndEncode(registry.get()); - RegisterVectorRunEndDecode(registry.get()); - RegisterVectorPairwise(registry.get()); - RegisterVectorSwizzle(registry.get()); - - // Aggregate functions - RegisterHashAggregateBasic(registry.get()); - RegisterScalarAggregateBasic(registry.get()); - RegisterScalarAggregateMode(registry.get()); - RegisterScalarAggregateQuantile(registry.get()); - RegisterScalarAggregateTDigest(registry.get()); - RegisterScalarAggregateVariance(registry.get()); -#endif - return registry; } diff --git a/cpp/src/arrow/dataset/dataset_test.cc b/cpp/src/arrow/dataset/dataset_test.cc index cd23429bf5ef4..13a17eaa508d2 100644 --- a/cpp/src/arrow/dataset/dataset_test.cc +++ b/cpp/src/arrow/dataset/dataset_test.cc @@ -19,6 +19,7 @@ #include +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/dataset/dataset_internal.h" #include "arrow/dataset/discovery.h" #include "arrow/dataset/partition.h" @@ -29,6 +30,13 @@ #include "arrow/testing/generator.h" namespace arrow { + +using compute::ComputeKernelEnvironment; + +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace dataset { class TestInMemoryFragment : public DatasetFixtureMixin {}; diff --git a/cpp/src/arrow/dataset/discovery_test.cc b/cpp/src/arrow/dataset/discovery_test.cc index 981146b7999ef..928b0f41e5252 100644 --- a/cpp/src/arrow/dataset/discovery_test.cc +++ b/cpp/src/arrow/dataset/discovery_test.cc @@ -23,6 +23,7 @@ #include #include +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/dataset/partition.h" #include "arrow/dataset/test_util_internal.h" #include "arrow/filesystem/test_util.h" @@ -33,6 +34,12 @@ using testing::SizeIs; namespace arrow { + +using compute::ComputeKernelEnvironment; + +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); namespace dataset { void AssertSchemasAre(std::vector> actual, diff --git a/cpp/src/arrow/dataset/file_csv_test.cc b/cpp/src/arrow/dataset/file_csv_test.cc index e8e5838e6f93a..e0a60fb2d8085 100644 --- a/cpp/src/arrow/dataset/file_csv_test.cc +++ b/cpp/src/arrow/dataset/file_csv_test.cc @@ -22,6 +22,7 @@ #include #include "arrow/acero/exec_plan.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/csv/writer.h" #include "arrow/dataset/dataset_internal.h" #include "arrow/dataset/file_base.h" @@ -39,6 +40,11 @@ #include "arrow/util/config.h" namespace arrow { +using compute::ComputeKernelEnvironment; + +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); namespace dataset { class CsvFormatHelper { diff --git a/cpp/src/arrow/dataset/file_ipc_test.cc b/cpp/src/arrow/dataset/file_ipc_test.cc index 370c6d9782008..2f8351938458b 100644 --- a/cpp/src/arrow/dataset/file_ipc_test.cc +++ b/cpp/src/arrow/dataset/file_ipc_test.cc @@ -21,6 +21,7 @@ #include #include +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/dataset/dataset_internal.h" #include "arrow/dataset/discovery.h" #include "arrow/dataset/file_base.h" @@ -37,8 +38,13 @@ namespace arrow { +using compute::ComputeKernelEnvironment; using internal::checked_pointer_cast; +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace dataset { class IpcFormatHelper { diff --git a/cpp/src/arrow/dataset/file_json_test.cc b/cpp/src/arrow/dataset/file_json_test.cc index 9626e8a5509df..31824a939aa99 100644 --- a/cpp/src/arrow/dataset/file_json_test.cc +++ b/cpp/src/arrow/dataset/file_json_test.cc @@ -17,6 +17,7 @@ #include "arrow/dataset/file_json.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/dataset/plan.h" #include "arrow/dataset/test_util_internal.h" #include "arrow/filesystem/mockfs.h" @@ -32,6 +33,12 @@ namespace arrow { using internal::checked_cast; +using compute::ComputeKernelEnvironment; + +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace dataset { namespace rj = arrow::rapidjson; diff --git a/cpp/src/arrow/dataset/file_orc_test.cc b/cpp/src/arrow/dataset/file_orc_test.cc index 17be015de516e..4d695fa8a326c 100644 --- a/cpp/src/arrow/dataset/file_orc_test.cc +++ b/cpp/src/arrow/dataset/file_orc_test.cc @@ -21,6 +21,7 @@ #include #include "arrow/adapters/orc/adapter.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/dataset/dataset_internal.h" #include "arrow/dataset/discovery.h" #include "arrow/dataset/file_base.h" @@ -33,6 +34,11 @@ #include "arrow/testing/util.h" namespace arrow { +using compute::ComputeKernelEnvironment; + +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); namespace dataset { class OrcFormatHelper { diff --git a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc index 0287d593d12d3..86a9942b9a859 100644 --- a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc @@ -20,6 +20,7 @@ #include "gtest/gtest.h" #include "arrow/array.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/dataset/dataset.h" #include "arrow/dataset/file_base.h" #include "arrow/dataset/file_parquet.h" @@ -49,6 +50,13 @@ constexpr std::string_view kBaseDir = ""; using arrow::internal::checked_pointer_cast; namespace arrow { + +using compute::ComputeKernelEnvironment; + +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace dataset { // Base class to test writing and reading encrypted dataset. diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc index c97ae520d8e1d..40d705f806903 100644 --- a/cpp/src/arrow/dataset/file_parquet_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_test.cc @@ -23,6 +23,7 @@ #include #include "arrow/compute/api_scalar.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/dataset/dataset_internal.h" #include "arrow/dataset/parquet_encryption_config.h" #include "arrow/dataset/test_util_internal.h" @@ -47,6 +48,11 @@ namespace arrow { +using compute::ComputeKernelEnvironment; + +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); using internal::checked_cast; using internal::checked_pointer_cast; diff --git a/cpp/src/arrow/dataset/file_test.cc b/cpp/src/arrow/dataset/file_test.cc index 5d6068557f047..952b0b05753fe 100644 --- a/cpp/src/arrow/dataset/file_test.cc +++ b/cpp/src/arrow/dataset/file_test.cc @@ -27,6 +27,7 @@ #include "arrow/acero/exec_plan.h" #include "arrow/acero/test_util_internal.h" #include "arrow/array/array_primitive.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/compute/test_util_internal.h" #include "arrow/dataset/api.h" #include "arrow/dataset/partition.h" @@ -42,6 +43,11 @@ namespace cp = arrow::compute; namespace arrow { +using compute::ComputeKernelEnvironment; + +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); using compute::ExecBatchFromJSON; using internal::TemporaryDir; diff --git a/cpp/src/arrow/dataset/partition_test.cc b/cpp/src/arrow/dataset/partition_test.cc index 9f0bd7c0be040..7ec1888dd4a10 100644 --- a/cpp/src/arrow/dataset/partition_test.cc +++ b/cpp/src/arrow/dataset/partition_test.cc @@ -29,6 +29,7 @@ #include "arrow/compute/api_scalar.h" #include "arrow/compute/api_vector.h" #include "arrow/compute/cast.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/dataset/dataset.h" #include "arrow/dataset/file_ipc.h" #include "arrow/dataset/test_util_internal.h" @@ -40,6 +41,11 @@ #include "arrow/util/uri.h" namespace arrow { +using compute::ComputeKernelEnvironment; + +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); using compute::Cast; diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index a45847b49def0..a8cd5e8c42545 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -30,6 +30,7 @@ #include "arrow/compute/api_vector.h" #include "arrow/compute/cast.h" #include "arrow/compute/expression_internal.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/dataset/dataset_internal.h" #include "arrow/dataset/plan.h" #include "arrow/dataset/test_util_internal.h" @@ -55,9 +56,14 @@ using testing::UnorderedElementsAreArray; namespace arrow { +using compute::ComputeKernelEnvironment; using internal::GetCpuThreadPool; using internal::Iota; +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); + namespace dataset { // The basic evolution strategy doesn't really need any info from the dataset diff --git a/cpp/src/arrow/engine/substrait/function_test.cc b/cpp/src/arrow/engine/substrait/function_test.cc index f0d2583e815ae..888ce85ab7fb9 100644 --- a/cpp/src/arrow/engine/substrait/function_test.cc +++ b/cpp/src/arrow/engine/substrait/function_test.cc @@ -34,6 +34,7 @@ #include "arrow/array/builder_binary.h" #include "arrow/compute/api_vector.h" #include "arrow/compute/cast.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/datum.h" #include "arrow/engine/substrait/extension_set.h" #include "arrow/engine/substrait/options.h" @@ -48,6 +49,11 @@ #include "arrow/type_fwd.h" namespace arrow { +using compute::ComputeKernelEnvironment; + +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); namespace engine { struct FunctionTestCase { diff --git a/cpp/src/arrow/flight/sql/acero_test.cc b/cpp/src/arrow/flight/sql/acero_test.cc index a8298f57b9d30..df9fab6cb386f 100644 --- a/cpp/src/arrow/flight/sql/acero_test.cc +++ b/cpp/src/arrow/flight/sql/acero_test.cc @@ -24,6 +24,7 @@ #include #include "arrow/array.h" +#include "arrow/compute/kernels/test_util_internal.h" #include "arrow/engine/substrait/util.h" #include "arrow/flight/server.h" #include "arrow/flight/sql/client.h" @@ -38,6 +39,11 @@ #include "arrow/util/checked_cast.h" namespace arrow { +using compute::ComputeKernelEnvironment; + +// Register the compute kernels +::testing::Environment* compute_kernels_env = + ::testing::AddGlobalTestEnvironment(new ComputeKernelEnvironment); namespace flight { namespace sql { From 1864b2efa98390e2e95636bf44364dba341584e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 25 Feb 2025 10:09:06 +0100 Subject: [PATCH 02/22] Fix build for some benchmarks and examples --- cpp/examples/arrow/CMakeLists.txt | 8 +++++++- cpp/examples/arrow/compute_and_write_csv_example.cc | 2 ++ cpp/examples/arrow/join_example.cc | 2 ++ cpp/src/arrow/compute/row/CMakeLists.txt | 5 +++++ 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/cpp/examples/arrow/CMakeLists.txt b/cpp/examples/arrow/CMakeLists.txt index a5b69a5d8b4ad..468f59cedd491 100644 --- a/cpp/examples/arrow/CMakeLists.txt +++ b/cpp/examples/arrow/CMakeLists.txt @@ -42,7 +42,13 @@ if(ARROW_SUBSTRAIT) endif() if(ARROW_COMPUTE AND ARROW_CSV) - add_arrow_example(compute_and_write_csv_example) + if(ARROW_BUILD_SHARED) + set(COMPUTE_KERNELES_LINK_LIBS arrow_compute_shared) + else() + set(COMPUTE_KERNELES_LINK_LIBS arrow_compute_static) + endif() + add_arrow_example(compute_and_write_csv_example EXTRA_LINK_LIBS + ${COMPUTE_KERNELES_LINK_LIBS}) endif() if(ARROW_FLIGHT) diff --git a/cpp/examples/arrow/compute_and_write_csv_example.cc b/cpp/examples/arrow/compute_and_write_csv_example.cc index 7e0f6cdf1ce16..19cd023c5af3b 100644 --- a/cpp/examples/arrow/compute_and_write_csv_example.cc +++ b/cpp/examples/arrow/compute_and_write_csv_example.cc @@ -22,6 +22,7 @@ #include #include #include +#include "arrow/compute/kernels/registry.h" #include #include @@ -41,6 +42,7 @@ // in the current directory arrow::Status RunMain(int argc, char** argv) { + ARROW_RETURN_NOT_OK(arrow::compute::internal::RegisterComputeKernels()); // Make Arrays arrow::NumericBuilder int64_builder; arrow::BooleanBuilder boolean_builder; diff --git a/cpp/examples/arrow/join_example.cc b/cpp/examples/arrow/join_example.cc index c1c6e5e82ff11..e52fd0cc5ccc3 100644 --- a/cpp/examples/arrow/join_example.cc +++ b/cpp/examples/arrow/join_example.cc @@ -23,6 +23,7 @@ #include #include "arrow/acero/exec_plan.h" #include "arrow/compute/expression.h" +#include "arrow/compute/kernels/registry.h" #include #include @@ -82,6 +83,7 @@ arrow::Result> CreateDataSetFromCSVData } arrow::Status DoHashJoin() { + ARROW_RETURN_NOT_OK(arrow::compute::internal::RegisterComputeKernels()); arrow::dataset::internal::Initialize(); ARROW_ASSIGN_OR_RAISE(auto l_dataset, CreateDataSetFromCSVData(true)); diff --git a/cpp/src/arrow/compute/row/CMakeLists.txt b/cpp/src/arrow/compute/row/CMakeLists.txt index ef03c767f974e..45fce65e57e34 100644 --- a/cpp/src/arrow/compute/row/CMakeLists.txt +++ b/cpp/src/arrow/compute/row/CMakeLists.txt @@ -21,3 +21,8 @@ arrow_install_all_headers("arrow/compute/row") add_arrow_benchmark(grouper_benchmark PREFIX "arrow-compute") +if(ARROW_BUILD_STATIC) + target_link_libraries(arrow-compute-grouper-benchmark PUBLIC arrow_compute_static) +else() + target_link_libraries(arrow-compute-grouper-benchmark PUBLIC arrow_compute_shared) +endif() From 12c3d15755159c5a27e0c3d63ba4db8abec231f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 25 Feb 2025 11:04:13 +0100 Subject: [PATCH 03/22] Only link arrow_compute to benchmark if we are building benchmarks --- cpp/examples/arrow/CMakeLists.txt | 6 +++--- cpp/src/arrow/compute/row/CMakeLists.txt | 14 +++++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/cpp/examples/arrow/CMakeLists.txt b/cpp/examples/arrow/CMakeLists.txt index 468f59cedd491..80de0b93192e6 100644 --- a/cpp/examples/arrow/CMakeLists.txt +++ b/cpp/examples/arrow/CMakeLists.txt @@ -43,12 +43,12 @@ endif() if(ARROW_COMPUTE AND ARROW_CSV) if(ARROW_BUILD_SHARED) - set(COMPUTE_KERNELES_LINK_LIBS arrow_compute_shared) + set(COMPUTE_KERNELS_LINK_LIBS arrow_compute_shared) else() - set(COMPUTE_KERNELES_LINK_LIBS arrow_compute_static) + set(COMPUTE_KERNELS_LINK_LIBS arrow_compute_static) endif() add_arrow_example(compute_and_write_csv_example EXTRA_LINK_LIBS - ${COMPUTE_KERNELES_LINK_LIBS}) + ${COMPUTE_KERNELS_LINK_LIBS}) endif() if(ARROW_FLIGHT) diff --git a/cpp/src/arrow/compute/row/CMakeLists.txt b/cpp/src/arrow/compute/row/CMakeLists.txt index 45fce65e57e34..11e622bcb9a12 100644 --- a/cpp/src/arrow/compute/row/CMakeLists.txt +++ b/cpp/src/arrow/compute/row/CMakeLists.txt @@ -20,9 +20,13 @@ arrow_install_all_headers("arrow/compute/row") -add_arrow_benchmark(grouper_benchmark PREFIX "arrow-compute") -if(ARROW_BUILD_STATIC) - target_link_libraries(arrow-compute-grouper-benchmark PUBLIC arrow_compute_static) -else() - target_link_libraries(arrow-compute-grouper-benchmark PUBLIC arrow_compute_shared) +if(ARROW_BUILD_BENCHMARKS) + add_arrow_benchmark(grouper_benchmark PREFIX "arrow-compute") + if(ARROW_COMPUTE) + if(ARROW_BUILD_STATIC) + target_link_libraries(arrow-compute-grouper-benchmark PUBLIC arrow_compute_static) + else() + target_link_libraries(arrow-compute-grouper-benchmark PUBLIC arrow_compute_shared) + endif() + endif() endif() From edb0005fccee155af2c3ea47d363d83c8930f6fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 25 Feb 2025 11:40:13 +0100 Subject: [PATCH 04/22] Rename arrow_compute target to arrow_compute_core --- cpp/src/arrow/CMakeLists.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 010ba6c8752be..29bc4ba3b6f24 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -824,21 +824,21 @@ if(ARROW_COMPUTE) ) endif() -arrow_add_object_library(ARROW_COMPUTE ${ARROW_COMPUTE_SRCS}) +arrow_add_object_library(ARROW_COMPUTE_CORE ${ARROW_COMPUTE_SRCS}) if(ARROW_USE_BOOST) - target_link_libraries(arrow_compute PRIVATE Boost::headers) + target_link_libraries(arrow_compute_core PRIVATE Boost::headers) endif() if(ARROW_USE_XSIMD) - target_link_libraries(arrow_compute PRIVATE ${ARROW_XSIMD}) + target_link_libraries(arrow_compute_core PRIVATE ${ARROW_XSIMD}) endif() if(ARROW_WITH_OPENTELEMETRY) - target_link_libraries(arrow_compute PRIVATE ${ARROW_OPENTELEMETRY_LIBS}) + target_link_libraries(arrow_compute_core PRIVATE ${ARROW_OPENTELEMETRY_LIBS}) endif() if(ARROW_WITH_RE2) - target_link_libraries(arrow_compute PRIVATE re2::re2) + target_link_libraries(arrow_compute_core PRIVATE re2::re2) endif() if(ARROW_WITH_UTF8PROC) - target_link_libraries(arrow_compute PRIVATE utf8proc::utf8proc) + target_link_libraries(arrow_compute_core PRIVATE utf8proc::utf8proc) endif() if(ARROW_FILESYSTEM) @@ -1046,7 +1046,7 @@ add_arrow_lib(arrow ${ARROW_SHARED_LINK_FLAGS} SHARED_PRIVATE_LINK_LIBS ${ARROW_ARRAY_TARGET_SHARED} - ${ARROW_COMPUTE_TARGET_SHARED} + ${ARROW_COMPUTE_CORE_TARGET_SHARED} ${ARROW_CSV_TARGET_SHARED} ${ARROW_FILESYSTEM_TARGET_SHARED} ${ARROW_INTEGRATION_TARGET_SHARED} @@ -1062,7 +1062,7 @@ add_arrow_lib(arrow ${ARROW_SYSTEM_LINK_LIBS} STATIC_LINK_LIBS ${ARROW_ARRAY_TARGET_STATIC} - ${ARROW_COMPUTE_TARGET_STATIC} + ${ARROW_COMPUTE_CORE_TARGET_STATIC} ${ARROW_CSV_TARGET_STATIC} ${ARROW_FILESYSTEM_TARGET_STATIC} ${ARROW_INTEGRATION_TARGET_STATIC} From bcce3da171f041a2be1a597d2d2cec4d91d8c3ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 25 Feb 2025 11:53:20 +0100 Subject: [PATCH 05/22] Try fixing arrow_compute_core target for Windows --- cpp/src/arrow/CMakeLists.txt | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 29bc4ba3b6f24..200d7738c9602 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -800,6 +800,7 @@ if(ARROW_COMPUTE) append_runtime_avx2_src(ARROW_COMPUTE_LIB_SRCS compute/row/compare_internal_avx2.cc) append_runtime_avx2_src(ARROW_COMPUTE_LIB_SRCS compute/row/encode_internal_avx2.cc) append_runtime_avx2_bmi2_src(ARROW_COMPUTE_LIB_SRCS compute/util_avx2.cc) + # TODO: Some of those should be added conditionally set(ARROW_COMPUTE_LINK_LIBS Boost::headers ${ARROW_XSIMD} ${ARROW_OPENTELEMETRY_LIBS}) if(ARROW_WITH_RE2) list(APPEND ARROW_COMPUTE_LINK_LIBS re2::re2) @@ -813,7 +814,7 @@ if(ARROW_COMPUTE) PKG_CONFIG_NAME arrow-compute SHARED_PRIVATE_LINK_LIBS - ${ARROW_COMPUTE_LINK_LIBS} # TODO: This has to be added conditionally if ARROW_USE_XSIMD + ${ARROW_COMPUTE_LINK_LIBS} ${ARROW_OPENTELEMETRY_LIBS} OUTPUTS ARROW_COMPUTE_LIBRARIES @@ -825,20 +826,33 @@ if(ARROW_COMPUTE) endif() arrow_add_object_library(ARROW_COMPUTE_CORE ${ARROW_COMPUTE_SRCS}) +# TODO: Review whether the following (Boost, xsimd, opentelemetry, re2 and utf8proc) are required +# for the core compute library. if(ARROW_USE_BOOST) - target_link_libraries(arrow_compute_core PRIVATE Boost::headers) + foreach(ARROW_COMPUTE_CORE_TARGET ${ARROW_COMPUTE_CORE_TARGETS}) + target_link_libraries(${ARROW_COMPUTE_CORE_TARGET} PRIVATE Boost::headers) + endforeach() endif() if(ARROW_USE_XSIMD) - target_link_libraries(arrow_compute_core PRIVATE ${ARROW_XSIMD}) + foreach(ARROW_COMPUTE_CORE_TARGET ${ARROW_COMPUTE_CORE_TARGETS}) + target_link_libraries(${ARROW_COMPUTE_CORE_TARGET} PRIVATE ${ARROW_XSIMD}) + endforeach() endif() if(ARROW_WITH_OPENTELEMETRY) - target_link_libraries(arrow_compute_core PRIVATE ${ARROW_OPENTELEMETRY_LIBS}) + foreach(ARROW_COMPUTE_CORE_TARGET ${ARROW_COMPUTE_CORE_TARGETS}) + target_link_libraries(${ARROW_COMPUTE_CORE_TARGET} + PRIVATE ${ARROW_OPENTELEMETRY_LIBS}) + endforeach() endif() if(ARROW_WITH_RE2) - target_link_libraries(arrow_compute_core PRIVATE re2::re2) + foreach(ARROW_COMPUTE_CORE_TARGET ${ARROW_COMPUTE_CORE_TARGETS}) + target_link_libraries(${ARROW_COMPUTE_CORE_TARGET} PRIVATE re2::re2) + endforeach() endif() if(ARROW_WITH_UTF8PROC) - target_link_libraries(arrow_compute_core PRIVATE utf8proc::utf8proc) + foreach(ARROW_COMPUTE_CORE_TARGET ${ARROW_COMPUTE_CORE_TARGETS}) + target_link_libraries(${ARROW_COMPUTE_CORE_TARGET} PRIVATE utf8proc::utf8proc) + endforeach() endif() if(ARROW_FILESYSTEM) From 80d83d73dbfeedaf923b60d8c5f01a870407b062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 25 Feb 2025 12:51:03 +0100 Subject: [PATCH 06/22] Remove ARROW_EXPORT from arrow_compute in order to fix inconsistent linkage on Windows --- cpp/src/arrow/compute/row/compare_internal.h | 2 +- cpp/src/arrow/compute/row/encode_internal.h | 2 +- cpp/src/arrow/compute/row/grouper.h | 6 +-- cpp/src/arrow/compute/row/grouper_internal.h | 2 +- .../arrow/compute/row/row_encoder_internal.h | 12 ++--- cpp/src/arrow/compute/row/row_internal.h | 4 +- cpp/src/arrow/compute/util.h | 52 ++++++++----------- cpp/src/arrow/compute/util_internal.h | 2 +- 8 files changed, 37 insertions(+), 45 deletions(-) diff --git a/cpp/src/arrow/compute/row/compare_internal.h b/cpp/src/arrow/compute/row/compare_internal.h index 29d7f859e59ee..2e62f224f9506 100644 --- a/cpp/src/arrow/compute/row/compare_internal.h +++ b/cpp/src/arrow/compute/row/compare_internal.h @@ -30,7 +30,7 @@ namespace arrow { namespace compute { -class ARROW_EXPORT KeyCompare { +class KeyCompare { public: // Clarify the max temp stack usage for CompareColumnsToRows, which might be necessary // for the caller to be aware of (possibly at compile time) to reserve enough stack size diff --git a/cpp/src/arrow/compute/row/encode_internal.h b/cpp/src/arrow/compute/row/encode_internal.h index 5ad82e0c8e749..75bf40d3d6999 100644 --- a/cpp/src/arrow/compute/row/encode_internal.h +++ b/cpp/src/arrow/compute/row/encode_internal.h @@ -44,7 +44,7 @@ namespace compute { /// be accessed together, as in the case of hash table key. /// /// Does not support nested types -class ARROW_EXPORT RowTableEncoder { +class RowTableEncoder { public: void Init(const std::vector& cols, int row_alignment, int string_alignment); diff --git a/cpp/src/arrow/compute/row/grouper.h b/cpp/src/arrow/compute/row/grouper.h index 345bc62924241..3db34750e3688 100644 --- a/cpp/src/arrow/compute/row/grouper.h +++ b/cpp/src/arrow/compute/row/grouper.h @@ -36,7 +36,7 @@ namespace compute { /// same segment key within a given batch. When a segment group span cross batches, it /// will have multiple segments. A segment never spans cross batches. The segment data /// structure only makes sense when used along with a exec batch. -struct ARROW_EXPORT Segment { +struct Segment { /// \brief the offset into the batch where the segment starts int64_t offset; /// \brief the length of the segment @@ -74,7 +74,7 @@ inline bool operator!=(const Segment& segment1, const Segment& segment2) { /// /// If the next call to the segmenter starts with `A A` then that segment would set the /// "extends" flag, which indicates whether the segment continues the last open batch. -class ARROW_EXPORT RowSegmenter { +class RowSegmenter { public: virtual ~RowSegmenter() = default; @@ -106,7 +106,7 @@ class ARROW_EXPORT RowSegmenter { }; /// Consumes batches of keys and yields batches of the group ids. -class ARROW_EXPORT Grouper { +class Grouper { public: virtual ~Grouper() = default; diff --git a/cpp/src/arrow/compute/row/grouper_internal.h b/cpp/src/arrow/compute/row/grouper_internal.h index eb3dfe8ba1654..1926854b941e3 100644 --- a/cpp/src/arrow/compute/row/grouper_internal.h +++ b/cpp/src/arrow/compute/row/grouper_internal.h @@ -20,7 +20,7 @@ namespace arrow { namespace compute { -ARROW_EXPORT Result> MakeAnyKeysSegmenter( +Result> MakeAnyKeysSegmenter( const std::vector& key_types, ExecContext* ctx); } // namespace compute diff --git a/cpp/src/arrow/compute/row/row_encoder_internal.h b/cpp/src/arrow/compute/row/row_encoder_internal.h index c3275283d5a66..7234efcaf3d1c 100644 --- a/cpp/src/arrow/compute/row/row_encoder_internal.h +++ b/cpp/src/arrow/compute/row/row_encoder_internal.h @@ -29,7 +29,7 @@ using internal::checked_cast; namespace compute { namespace internal { -struct ARROW_EXPORT KeyEncoder { +struct KeyEncoder { // the first byte of an encoded key is used to indicate nullity static constexpr bool kExtraByteForNull = true; @@ -85,7 +85,7 @@ struct ARROW_EXPORT KeyEncoder { } }; -struct ARROW_EXPORT BooleanKeyEncoder : KeyEncoder { +struct BooleanKeyEncoder : KeyEncoder { static constexpr int kByteWidth = 1; void AddLength(const ExecValue& data, int64_t batch_length, int32_t* lengths) override; @@ -101,7 +101,7 @@ struct ARROW_EXPORT BooleanKeyEncoder : KeyEncoder { MemoryPool* pool) override; }; -struct ARROW_EXPORT FixedWidthKeyEncoder : KeyEncoder { +struct FixedWidthKeyEncoder : KeyEncoder { explicit FixedWidthKeyEncoder(std::shared_ptr type) : type_(std::move(type)), byte_width_(checked_cast(*type_).bit_width() / 8) {} @@ -122,7 +122,7 @@ struct ARROW_EXPORT FixedWidthKeyEncoder : KeyEncoder { const int byte_width_; }; -struct ARROW_EXPORT DictionaryKeyEncoder : FixedWidthKeyEncoder { +struct DictionaryKeyEncoder : FixedWidthKeyEncoder { DictionaryKeyEncoder(std::shared_ptr type, MemoryPool* pool) : FixedWidthKeyEncoder(std::move(type)), pool_(pool) {} @@ -251,7 +251,7 @@ struct VarLengthKeyEncoder : KeyEncoder { std::shared_ptr type_; }; -struct ARROW_EXPORT NullKeyEncoder : KeyEncoder { +struct NullKeyEncoder : KeyEncoder { void AddLength(const ExecValue&, int64_t batch_length, int32_t* lengths) override {} void AddLengthNull(int32_t* length) override {} @@ -331,7 +331,7 @@ struct ARROW_EXPORT NullKeyEncoder : KeyEncoder { /// # Row Encoding /// /// The row format is the concatenation of the encodings of each column. -class ARROW_EXPORT RowEncoder { +class RowEncoder { public: static constexpr int kRowIdForNulls() { return -1; } diff --git a/cpp/src/arrow/compute/row/row_internal.h b/cpp/src/arrow/compute/row/row_internal.h index 0919773a2281b..bb236558f0e58 100644 --- a/cpp/src/arrow/compute/row/row_internal.h +++ b/cpp/src/arrow/compute/row/row_internal.h @@ -29,7 +29,7 @@ namespace arrow { namespace compute { /// Description of the data stored in a RowTable -struct ARROW_EXPORT RowTableMetadata { +struct RowTableMetadata { using offset_type = int64_t; /// \brief True if there are no variable length columns in the table @@ -170,7 +170,7 @@ struct ARROW_EXPORT RowTableMetadata { /// Can store both fixed-size data types and variable-length data types /// /// The row table is not safe -class ARROW_EXPORT RowTableImpl { +class RowTableImpl { public: using offset_type = RowTableMetadata::offset_type; diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h index 1aaff43e10e1f..21873c6a1aa25 100644 --- a/cpp/src/arrow/compute/util.h +++ b/cpp/src/arrow/compute/util.h @@ -66,49 +66,41 @@ class MiniBatch { namespace bit_util { -ARROW_EXPORT void bits_to_indexes(int bit_to_search, int64_t hardware_flags, - const int num_bits, const uint8_t* bits, - int* num_indexes, uint16_t* indexes, - int bit_offset = 0); +void bits_to_indexes(int bit_to_search, int64_t hardware_flags, const int num_bits, + const uint8_t* bits, int* num_indexes, uint16_t* indexes, + int bit_offset = 0); -ARROW_EXPORT void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, - const int num_bits, const uint8_t* bits, - const uint16_t* input_indexes, int* num_indexes, - uint16_t* indexes, int bit_offset = 0); +void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, const int num_bits, + const uint8_t* bits, const uint16_t* input_indexes, + int* num_indexes, uint16_t* indexes, int bit_offset = 0); // Input and output indexes may be pointing to the same data (in-place filtering). -ARROW_EXPORT void bits_split_indexes(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, int* num_indexes_bit0, - uint16_t* indexes_bit0, uint16_t* indexes_bit1, - int bit_offset = 0); +void bits_split_indexes(int64_t hardware_flags, const int num_bits, const uint8_t* bits, + int* num_indexes_bit0, uint16_t* indexes_bit0, + uint16_t* indexes_bit1, int bit_offset = 0); // Bit 1 is replaced with byte 0xFF. -ARROW_EXPORT void bits_to_bytes(int64_t hardware_flags, const int num_bits, - const uint8_t* bits, uint8_t* bytes, int bit_offset = 0); +void bits_to_bytes(int64_t hardware_flags, const int num_bits, const uint8_t* bits, + uint8_t* bytes, int bit_offset = 0); // Return highest bit of each byte. -ARROW_EXPORT void bytes_to_bits(int64_t hardware_flags, const int num_bits, - const uint8_t* bytes, uint8_t* bits, int bit_offset = 0); +void bytes_to_bits(int64_t hardware_flags, const int num_bits, const uint8_t* bytes, + uint8_t* bits, int bit_offset = 0); -ARROW_EXPORT bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, - uint32_t num_bytes); +bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, uint32_t num_bytes); #if defined(ARROW_HAVE_RUNTIME_AVX2) && defined(ARROW_HAVE_RUNTIME_BMI2) // The functions below use BMI2 instructions, be careful before calling! namespace avx2 { -ARROW_EXPORT void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, - const uint8_t* bits, - const uint16_t* input_indexes, - int* num_indexes, uint16_t* indexes); -ARROW_EXPORT void bits_to_indexes_avx2(int bit_to_search, const int num_bits, - const uint8_t* bits, int* num_indexes, - uint16_t* indexes, uint16_t base_index = 0); -ARROW_EXPORT void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, - uint8_t* bytes); -ARROW_EXPORT void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, - uint8_t* bits); -ARROW_EXPORT bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes); +void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, const uint8_t* bits, + const uint16_t* input_indexes, int* num_indexes, + uint16_t* indexes); +void bits_to_indexes_avx2(int bit_to_search, const int num_bits, const uint8_t* bits, + int* num_indexes, uint16_t* indexes, uint16_t base_index = 0); +void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* bytes); +void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* bits); +bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes); } // namespace avx2 #endif diff --git a/cpp/src/arrow/compute/util_internal.h b/cpp/src/arrow/compute/util_internal.h index 5e5b15a5ff600..44831bd5938c0 100644 --- a/cpp/src/arrow/compute/util_internal.h +++ b/cpp/src/arrow/compute/util_internal.h @@ -34,7 +34,7 @@ void CheckAlignment(const void* ptr) { /// Temporary vectors should resemble allocating temporary variables on the stack /// but in the context of vectorized processing where we need to store a vector of /// temporaries instead of a single value. -class ARROW_EXPORT TempVectorStack { +class TempVectorStack { template friend class TempVectorHolder; From 1d1bcca00c28cb820d7555ae9d9f69fa28f13858 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 25 Feb 2025 13:06:11 +0100 Subject: [PATCH 07/22] Remove ARROW_EXPORT from arrow_compute in order to fix inconsistent linkage on Windows 2 --- cpp/src/arrow/compute/key_map_internal.h | 2 +- cpp/src/arrow/compute/light_array_internal.h | 28 ++++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/compute/key_map_internal.h b/cpp/src/arrow/compute/key_map_internal.h index 8423134cb3269..da23a1498a9d9 100644 --- a/cpp/src/arrow/compute/key_map_internal.h +++ b/cpp/src/arrow/compute/key_map_internal.h @@ -37,7 +37,7 @@ namespace compute { // slots, stamps) and operations provided by this class is given in the document: // arrow/compute/exec/doc/key_map.md. // -class ARROW_EXPORT SwissTable { +class SwissTable { friend class SwissTableMerge; public: diff --git a/cpp/src/arrow/compute/light_array_internal.h b/cpp/src/arrow/compute/light_array_internal.h index 60f1a6a21e264..59dd978678ee2 100644 --- a/cpp/src/arrow/compute/light_array_internal.h +++ b/cpp/src/arrow/compute/light_array_internal.h @@ -53,7 +53,7 @@ struct LightContext { /// and no children. /// /// This metadata object is a zero-allocation analogue of arrow::DataType -struct ARROW_EXPORT KeyColumnMetadata { +struct KeyColumnMetadata { KeyColumnMetadata() = default; KeyColumnMetadata(bool is_fixed_length_in, uint32_t fixed_length_in, bool is_null_type_in = false) @@ -81,7 +81,7 @@ struct ARROW_EXPORT KeyColumnMetadata { /// A "key" column is a non-nested, non-union column \see KeyColumnMetadata /// /// This metadata object is a zero-allocation analogue of arrow::ArrayData -class ARROW_EXPORT KeyColumnArray { +class KeyColumnArray { public: /// \brief Create an uninitialized KeyColumnArray KeyColumnArray() = default; @@ -218,7 +218,7 @@ class ARROW_EXPORT KeyColumnArray { /// /// This should only be called on "key" columns. Calling this with /// a non-key column will return Status::TypeError. -ARROW_EXPORT Result ColumnMetadataFromDataType( +Result ColumnMetadataFromDataType( const std::shared_ptr& type); /// \brief Create KeyColumnArray from ArrayData @@ -228,7 +228,7 @@ ARROW_EXPORT Result ColumnMetadataFromDataType( /// /// The caller should ensure this is only called on "key" columns. /// \see ColumnMetadataFromDataType for details -ARROW_EXPORT Result ColumnArrayFromArrayData( +Result ColumnArrayFromArrayData( const std::shared_ptr& array_data, int64_t start_row, int64_t num_rows); /// \brief Create KeyColumnArray from ArrayData and KeyColumnMetadata @@ -238,7 +238,7 @@ ARROW_EXPORT Result ColumnArrayFromArrayData( /// /// The caller should ensure this is only called on "key" columns. /// \see ColumnMetadataFromDataType for details -ARROW_EXPORT KeyColumnArray ColumnArrayFromArrayDataAndMetadata( +KeyColumnArray ColumnArrayFromArrayDataAndMetadata( const std::shared_ptr& array_data, const KeyColumnMetadata& metadata, int64_t start_row, int64_t num_rows); @@ -248,8 +248,8 @@ ARROW_EXPORT KeyColumnArray ColumnArrayFromArrayDataAndMetadata( /// /// All columns in `batch` must be eligible "key" columns and have an array shape /// \see ColumnMetadataFromDataType for more details -ARROW_EXPORT Status ColumnMetadatasFromExecBatch( - const ExecBatch& batch, std::vector* column_metadatas); +Status ColumnMetadatasFromExecBatch(const ExecBatch& batch, + std::vector* column_metadatas); /// \brief Create KeyColumnArray instances from a slice of an ExecBatch /// @@ -257,9 +257,9 @@ ARROW_EXPORT Status ColumnMetadatasFromExecBatch( /// /// All columns in `batch` must be eligible "key" columns and have an array shape /// \see ColumnArrayFromArrayData for more details -ARROW_EXPORT Status ColumnArraysFromExecBatch(const ExecBatch& batch, int64_t start_row, - int64_t num_rows, - std::vector* column_arrays); +Status ColumnArraysFromExecBatch(const ExecBatch& batch, int64_t start_row, + int64_t num_rows, + std::vector* column_arrays); /// \brief Create KeyColumnArray instances from an ExecBatch /// @@ -267,8 +267,8 @@ ARROW_EXPORT Status ColumnArraysFromExecBatch(const ExecBatch& batch, int64_t st /// /// All columns in `batch` must be eligible "key" columns and have an array shape /// \see ColumnArrayFromArrayData for more details -ARROW_EXPORT Status ColumnArraysFromExecBatch(const ExecBatch& batch, - std::vector* column_arrays); +Status ColumnArraysFromExecBatch(const ExecBatch& batch, + std::vector* column_arrays); /// A lightweight resizable array for "key" columns /// @@ -276,7 +276,7 @@ ARROW_EXPORT Status ColumnArraysFromExecBatch(const ExecBatch& batch, /// /// Resizing is handled by arrow::ResizableBuffer and a doubling approach is /// used so that resizes will always grow up to the next power of 2 -class ARROW_EXPORT ResizableArrayData { +class ResizableArrayData { public: /// \brief Create an uninitialized instance /// @@ -372,7 +372,7 @@ class ARROW_EXPORT ResizableArrayData { /// \brief A builder to concatenate batches of data into a larger batch /// /// Will only store num_rows_max() rows -class ARROW_EXPORT ExecBatchBuilder { +class ExecBatchBuilder { public: /// \brief Add rows from `source` into `target` column /// From bde6fb7b0956b7958a78f60f6f2eca9707b233ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 25 Feb 2025 13:14:41 +0100 Subject: [PATCH 08/22] Remove ARROW_EXPORT from arrow_compute in order to fix inconsistent linkage on Windows 3 --- cpp/src/arrow/compute/key_hash_internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/compute/key_hash_internal.h b/cpp/src/arrow/compute/key_hash_internal.h index 769f3b2145e29..507e545ff7be5 100644 --- a/cpp/src/arrow/compute/key_hash_internal.h +++ b/cpp/src/arrow/compute/key_hash_internal.h @@ -34,7 +34,7 @@ enum class BloomFilterBuildStrategy; // Implementations are based on xxh3 32-bit algorithm description from: // https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md // -class ARROW_EXPORT Hashing32 { +class Hashing32 { friend class TestVectorHash; template friend void TestBloomLargeHashHelper(int64_t, int64_t, const std::vector&, @@ -157,7 +157,7 @@ class ARROW_EXPORT Hashing32 { #endif }; -class ARROW_EXPORT Hashing64 { +class Hashing64 { friend class TestVectorHash; template friend void TestBloomLargeHashHelper(int64_t, int64_t, const std::vector&, From b61f2e598bce02168edbf19374a879f6dcfa648f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 25 Feb 2025 14:09:24 +0100 Subject: [PATCH 09/22] Link arrow_compute with libarrow --- cpp/src/arrow/CMakeLists.txt | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 200d7738c9602..391543f1ecf95 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -801,21 +801,44 @@ if(ARROW_COMPUTE) append_runtime_avx2_src(ARROW_COMPUTE_LIB_SRCS compute/row/encode_internal_avx2.cc) append_runtime_avx2_bmi2_src(ARROW_COMPUTE_LIB_SRCS compute/util_avx2.cc) # TODO: Some of those should be added conditionally - set(ARROW_COMPUTE_LINK_LIBS Boost::headers ${ARROW_XSIMD} ${ARROW_OPENTELEMETRY_LIBS}) + set(ARROW_COMPUTED_SHARED_PRIVATE_LINK_LIBS Boost::headers ${ARROW_XSIMD} + ${ARROW_OPENTELEMETRY_LIBS}) + set(ARROW_COMPUTE_STATIC_LINK_LIBS Boost::headers ${ARROW_XSIMD} + ${ARROW_OPENTELEMETRY_LIBS}) if(ARROW_WITH_RE2) - list(APPEND ARROW_COMPUTE_LINK_LIBS re2::re2) + list(APPEND ARROW_COMPUTED_SHARED_PRIVATE_LINK_LIBS re2::re2) + list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS re2::re2) endif() if(ARROW_WITH_UTF8PROC) - list(APPEND ARROW_COMPUTE_LINK_LIBS utf8proc::utf8proc) + list(APPEND ARROW_COMPUTED_SHARED_PRIVATE_LINK_LIBS utf8proc::utf8proc) + list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS re2::re2) endif() + + set(ARROW_COMPUTE_SHARED_LINK_LIBS) + set(ARROW_COMPUTE_STATIC_LINK_LIBS) + set(ARROW_COMPUTE_STATIC_INSTALL_INTERFACE_LIBS) + set(ARROW_COMPUTE_SHARED_INSTALL_INTERFACE_LIBS) + + list(APPEND ARROW_COMPUTE_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static) + list(APPEND ARROW_COMPUTE_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared) + list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS arrow_static) + list(APPEND ARROW_COMPUTE_SHARED_LINK_LIBS arrow_shared) + add_arrow_lib(arrow_compute CMAKE_PACKAGE_NAME ArrowCompute PKG_CONFIG_NAME arrow-compute + SHARED_LINK_LIBS + ${ARROW_COMPUTE_SHARED_LINK_LIBS} SHARED_PRIVATE_LINK_LIBS - ${ARROW_COMPUTE_LINK_LIBS} - ${ARROW_OPENTELEMETRY_LIBS} + ${ARROW_COMPUTED_SHARED_PRIVATE_LINK_LIBS} + SHARED_INSTALL_INTERFACE_LIBS + ${ARROW_COMPUTE_SHARED_INSTALL_INTERFACE_LIBS} + STATIC_LINK_LIBS + ${ARROW_COMPUTE_STATIC_LINK_LIBS} + STATIC_INSTALL_INTERFACE_LIBS + ${ARROW_COMPUTE_STATIC_INSTALL_INTERFACE_LIBS} OUTPUTS ARROW_COMPUTE_LIBRARIES SOURCES From b2b7e76b4b593e8e843feda159f08eb4a5a4355c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 25 Feb 2025 15:40:20 +0100 Subject: [PATCH 10/22] Some code reorganization and add ARROW_EXPORT to required function --- cpp/src/arrow/CMakeLists.txt | 4 ++-- cpp/src/arrow/type.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 391543f1ecf95..0da29aaeaec37 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -729,7 +729,6 @@ set(ARROW_COMPUTE_SRCS compute/kernel.cc compute/ordering.cc compute/registry.cc - compute/kernels/chunked_internal.cc compute/kernels/codegen_internal.cc compute/kernels/ree_util_internal.cc compute/kernels/scalar_cast_boolean.cc @@ -740,7 +739,6 @@ set(ARROW_COMPUTE_SRCS compute/kernels/scalar_cast_numeric.cc compute/kernels/scalar_cast_string.cc compute/kernels/scalar_cast_temporal.cc - compute/kernels/util_internal.cc compute/kernels/vector_hash.cc compute/kernels/vector_run_end_encode.cc compute/kernels/vector_selection.cc @@ -757,6 +755,7 @@ if(ARROW_COMPUTE) compute/kernels/aggregate_quantile.cc compute/kernels/aggregate_tdigest.cc compute/kernels/aggregate_var_std.cc + compute/kernels/chunked_internal.cc compute/kernels/hash_aggregate.cc compute/kernels/registry.cc compute/kernels/scalar_arithmetic.cc @@ -772,6 +771,7 @@ if(ARROW_COMPUTE) compute/kernels/scalar_temporal_binary.cc compute/kernels/scalar_temporal_unary.cc compute/kernels/scalar_validity.cc + compute/kernels/util_internal.cc compute/kernels/vector_array_sort.cc compute/kernels/vector_cumulative_ops.cc compute/kernels/vector_nested.cc diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 6b4f2c9f37f66..d7864406dc34d 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -292,6 +292,7 @@ std::ostream& operator<<(std::ostream& os, const TypeHolder& type); /// - if a `PhysicalType` alias exists in the concrete type class, return /// an instance of `PhysicalType`. /// - otherwise, return the input type itself. +ARROW_EXPORT std::shared_ptr GetPhysicalType(const std::shared_ptr& type); /// \brief Base class for all fixed-width data types From b2da5105b400e3e534fcd9495268e6b75a40024f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 25 Feb 2025 15:56:57 +0100 Subject: [PATCH 11/22] Add duplicated codegen_internal to arrow_compute --- cpp/src/arrow/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 0da29aaeaec37..147759979ddd5 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -755,6 +755,7 @@ if(ARROW_COMPUTE) compute/kernels/aggregate_quantile.cc compute/kernels/aggregate_tdigest.cc compute/kernels/aggregate_var_std.cc + compute/kernels/codegen_internal.cc # This is wrong but I am testing something compute/kernels/chunked_internal.cc compute/kernels/hash_aggregate.cc compute/kernels/registry.cc From 88c709b4e15a2578b4795bb89122352981403b56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 25 Feb 2025 16:34:17 +0100 Subject: [PATCH 12/22] Remove some more ARROW_EXPORT (this will have to be reverted) --- cpp/src/arrow/compute/kernels/codegen_internal.h | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h index 2a492f581f53b..f0dba993d5752 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.h +++ b/cpp/src/arrow/compute/kernels/codegen_internal.h @@ -1386,41 +1386,29 @@ ArrayKernelExec GenerateDecimal(detail::GetTypeId get_id) { // END of kernel generator-dispatchers // ---------------------------------------------------------------------- // BEGIN of DispatchBest helpers -ARROW_EXPORT void EnsureDictionaryDecoded(std::vector* types); -ARROW_EXPORT void EnsureDictionaryDecoded(TypeHolder* begin, size_t count); -ARROW_EXPORT void ReplaceNullWithOtherType(std::vector* types); -ARROW_EXPORT void ReplaceNullWithOtherType(TypeHolder* begin, size_t count); -ARROW_EXPORT void ReplaceTypes(const TypeHolder& replacement, std::vector* types); -ARROW_EXPORT void ReplaceTypes(const TypeHolder& replacement, TypeHolder* types, size_t count); -ARROW_EXPORT void ReplaceTemporalTypes(TimeUnit::type unit, std::vector* types); -ARROW_EXPORT TypeHolder CommonNumeric(const std::vector& types); -ARROW_EXPORT TypeHolder CommonNumeric(const TypeHolder* begin, size_t count); -ARROW_EXPORT TypeHolder CommonTemporal(const TypeHolder* begin, size_t count); -ARROW_EXPORT bool CommonTemporalResolution(const TypeHolder* begin, size_t count, TimeUnit::type* finest_unit); -ARROW_EXPORT TypeHolder CommonBinary(const TypeHolder* begin, size_t count); /// How to promote decimal precision/scale in CastBinaryDecimalArgs. @@ -1433,18 +1421,14 @@ enum class DecimalPromotion : uint8_t { /// Given two arguments, at least one of which is decimal, promote all /// to not necessarily identical types, but types which are compatible /// for the given operator (add/multiply/divide). -ARROW_EXPORT Status CastBinaryDecimalArgs(DecimalPromotion promotion, std::vector* types); /// Given one or more arguments, at least one of which is decimal, /// promote all to an identical type. -ARROW_EXPORT Status CastDecimalArgs(TypeHolder* begin, size_t count); -ARROW_EXPORT bool HasDecimal(const std::vector& types); -ARROW_EXPORT void PromoteIntegerForDurationArithmetic(std::vector* types); // END of DispatchBest helpers From 794a8cafb59be850c2264fc3db3d83330a560a6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 25 Feb 2025 19:15:44 +0100 Subject: [PATCH 13/22] Add ArrowCompute dependency to ArrowAcery and interface libs --- cpp/src/arrow/acero/ArrowAceroConfig.cmake.in | 1 + cpp/src/arrow/acero/CMakeLists.txt | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/acero/ArrowAceroConfig.cmake.in b/cpp/src/arrow/acero/ArrowAceroConfig.cmake.in index 66aa2b4078c7f..b0cf1290959d9 100644 --- a/cpp/src/arrow/acero/ArrowAceroConfig.cmake.in +++ b/cpp/src/arrow/acero/ArrowAceroConfig.cmake.in @@ -28,6 +28,7 @@ include(CMakeFindDependencyMacro) find_dependency(Arrow) +find_dependency(ArrowCompute) include("${CMAKE_CURRENT_LIST_DIR}/ArrowAceroTargets.cmake") diff --git a/cpp/src/arrow/acero/CMakeLists.txt b/cpp/src/arrow/acero/CMakeLists.txt index 5c1613d233a21..15c0740f2ddd5 100644 --- a/cpp/src/arrow/acero/CMakeLists.txt +++ b/cpp/src/arrow/acero/CMakeLists.txt @@ -64,8 +64,10 @@ if(ARROW_WITH_OPENTELEMETRY) list(APPEND ARROW_ACERO_STATIC_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS}) endif() -list(APPEND ARROW_ACERO_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static) -list(APPEND ARROW_ACERO_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared) +list(APPEND ARROW_ACERO_STATIC_INSTALL_INTERFACE_LIBS Arrow::arrow_static + ArrowCompute::arrow_compute_static) +list(APPEND ARROW_ACERO_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared + ArrowCompute::arrow_compute_shared) list(APPEND ARROW_ACERO_STATIC_LINK_LIBS arrow_static arrow_compute_static) list(APPEND ARROW_ACERO_SHARED_LINK_LIBS arrow_shared arrow_compute_shared) From 11a3feae585058a5bc292e91aaf226f29b1b7ab7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 26 Feb 2025 13:12:25 +0100 Subject: [PATCH 14/22] Define ARROW_COMPUTE_EXPORT and add it to some of the deleted ARROW_EXPORT. Fix a typo too --- cpp/src/arrow/CMakeLists.txt | 13 ++++-- cpp/src/arrow/compute/key_hash_internal.h | 5 +- cpp/src/arrow/compute/key_map_internal.h | 3 +- cpp/src/arrow/compute/light_array_internal.h | 29 ++++++------ cpp/src/arrow/compute/row/compare_internal.h | 3 +- cpp/src/arrow/compute/row/encode_internal.h | 3 +- cpp/src/arrow/compute/row/grouper.h | 7 +-- cpp/src/arrow/compute/row/grouper_internal.h | 2 +- cpp/src/arrow/compute/visibility.h | 48 ++++++++++++++++++++ 9 files changed, 85 insertions(+), 28 deletions(-) create mode 100644 cpp/src/arrow/compute/visibility.h diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 147759979ddd5..0f55fa32a19a7 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -802,16 +802,16 @@ if(ARROW_COMPUTE) append_runtime_avx2_src(ARROW_COMPUTE_LIB_SRCS compute/row/encode_internal_avx2.cc) append_runtime_avx2_bmi2_src(ARROW_COMPUTE_LIB_SRCS compute/util_avx2.cc) # TODO: Some of those should be added conditionally - set(ARROW_COMPUTED_SHARED_PRIVATE_LINK_LIBS Boost::headers ${ARROW_XSIMD} - ${ARROW_OPENTELEMETRY_LIBS}) + set(ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS Boost::headers ${ARROW_XSIMD} + ${ARROW_OPENTELEMETRY_LIBS}) set(ARROW_COMPUTE_STATIC_LINK_LIBS Boost::headers ${ARROW_XSIMD} ${ARROW_OPENTELEMETRY_LIBS}) if(ARROW_WITH_RE2) - list(APPEND ARROW_COMPUTED_SHARED_PRIVATE_LINK_LIBS re2::re2) + list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS re2::re2) list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS re2::re2) endif() if(ARROW_WITH_UTF8PROC) - list(APPEND ARROW_COMPUTED_SHARED_PRIVATE_LINK_LIBS utf8proc::utf8proc) + list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS utf8proc::utf8proc) list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS re2::re2) endif() @@ -833,7 +833,7 @@ if(ARROW_COMPUTE) SHARED_LINK_LIBS ${ARROW_COMPUTE_SHARED_LINK_LIBS} SHARED_PRIVATE_LINK_LIBS - ${ARROW_COMPUTED_SHARED_PRIVATE_LINK_LIBS} + ${ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS} SHARED_INSTALL_INTERFACE_LIBS ${ARROW_COMPUTE_SHARED_INSTALL_INTERFACE_LIBS} STATIC_LINK_LIBS @@ -847,6 +847,9 @@ if(ARROW_COMPUTE) SHARED_LINK_FLAGS ${ARROW_VERSION_SCRIPT_FLAGS} # Defined in cpp/arrow/CMakeLists.txt ) + foreach(LIB_TARGET ${ARROW_COMPUTE_LIBRARIES}) + target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_COMPUTE_EXPORTING) + endforeach() endif() arrow_add_object_library(ARROW_COMPUTE_CORE ${ARROW_COMPUTE_SRCS}) diff --git a/cpp/src/arrow/compute/key_hash_internal.h b/cpp/src/arrow/compute/key_hash_internal.h index 507e545ff7be5..d141603ce0f6a 100644 --- a/cpp/src/arrow/compute/key_hash_internal.h +++ b/cpp/src/arrow/compute/key_hash_internal.h @@ -21,6 +21,7 @@ #include "arrow/compute/light_array_internal.h" #include "arrow/compute/util.h" +#include "arrow/compute/visibility.h" #include "arrow/util/simd.h" namespace arrow { @@ -34,7 +35,7 @@ enum class BloomFilterBuildStrategy; // Implementations are based on xxh3 32-bit algorithm description from: // https://github.com/Cyan4973/xxHash/blob/dev/doc/xxhash_spec.md // -class Hashing32 { +class ARROW_COMPUTE_EXPORT Hashing32 { friend class TestVectorHash; template friend void TestBloomLargeHashHelper(int64_t, int64_t, const std::vector&, @@ -157,7 +158,7 @@ class Hashing32 { #endif }; -class Hashing64 { +class ARROW_COMPUTE_EXPORT Hashing64 { friend class TestVectorHash; template friend void TestBloomLargeHashHelper(int64_t, int64_t, const std::vector&, diff --git a/cpp/src/arrow/compute/key_map_internal.h b/cpp/src/arrow/compute/key_map_internal.h index da23a1498a9d9..36ff24a9b3ddc 100644 --- a/cpp/src/arrow/compute/key_map_internal.h +++ b/cpp/src/arrow/compute/key_map_internal.h @@ -22,6 +22,7 @@ #include "arrow/compute/util.h" #include "arrow/compute/util_internal.h" +#include "arrow/compute/visibility.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/type_fwd.h" @@ -37,7 +38,7 @@ namespace compute { // slots, stamps) and operations provided by this class is given in the document: // arrow/compute/exec/doc/key_map.md. // -class SwissTable { +class ARROW_COMPUTE_EXPORT SwissTable { friend class SwissTableMerge; public: diff --git a/cpp/src/arrow/compute/light_array_internal.h b/cpp/src/arrow/compute/light_array_internal.h index 59dd978678ee2..557d410862b17 100644 --- a/cpp/src/arrow/compute/light_array_internal.h +++ b/cpp/src/arrow/compute/light_array_internal.h @@ -23,6 +23,7 @@ #include "arrow/compute/exec.h" #include "arrow/compute/util.h" #include "arrow/compute/util_internal.h" +#include "arrow/compute/visibility.h" #include "arrow/type.h" #include "arrow/util/cpu_info.h" #include "arrow/util/logging.h" @@ -53,7 +54,7 @@ struct LightContext { /// and no children. /// /// This metadata object is a zero-allocation analogue of arrow::DataType -struct KeyColumnMetadata { +struct ARROW_COMPUTE_EXPORT KeyColumnMetadata { KeyColumnMetadata() = default; KeyColumnMetadata(bool is_fixed_length_in, uint32_t fixed_length_in, bool is_null_type_in = false) @@ -81,7 +82,7 @@ struct KeyColumnMetadata { /// A "key" column is a non-nested, non-union column \see KeyColumnMetadata /// /// This metadata object is a zero-allocation analogue of arrow::ArrayData -class KeyColumnArray { +class ARROW_COMPUTE_EXPORT KeyColumnArray { public: /// \brief Create an uninitialized KeyColumnArray KeyColumnArray() = default; @@ -218,7 +219,7 @@ class KeyColumnArray { /// /// This should only be called on "key" columns. Calling this with /// a non-key column will return Status::TypeError. -Result ColumnMetadataFromDataType( +ARROW_COMPUTE_EXPORT Result ColumnMetadataFromDataType( const std::shared_ptr& type); /// \brief Create KeyColumnArray from ArrayData @@ -228,7 +229,7 @@ Result ColumnMetadataFromDataType( /// /// The caller should ensure this is only called on "key" columns. /// \see ColumnMetadataFromDataType for details -Result ColumnArrayFromArrayData( +ARROW_COMPUTE_EXPORT Result ColumnArrayFromArrayData( const std::shared_ptr& array_data, int64_t start_row, int64_t num_rows); /// \brief Create KeyColumnArray from ArrayData and KeyColumnMetadata @@ -238,7 +239,7 @@ Result ColumnArrayFromArrayData( /// /// The caller should ensure this is only called on "key" columns. /// \see ColumnMetadataFromDataType for details -KeyColumnArray ColumnArrayFromArrayDataAndMetadata( +ARROW_COMPUTE_EXPORT KeyColumnArray ColumnArrayFromArrayDataAndMetadata( const std::shared_ptr& array_data, const KeyColumnMetadata& metadata, int64_t start_row, int64_t num_rows); @@ -248,8 +249,8 @@ KeyColumnArray ColumnArrayFromArrayDataAndMetadata( /// /// All columns in `batch` must be eligible "key" columns and have an array shape /// \see ColumnMetadataFromDataType for more details -Status ColumnMetadatasFromExecBatch(const ExecBatch& batch, - std::vector* column_metadatas); +ARROW_COMPUTE_EXPORT Status ColumnMetadatasFromExecBatch( + const ExecBatch& batch, std::vector* column_metadatas); /// \brief Create KeyColumnArray instances from a slice of an ExecBatch /// @@ -257,9 +258,9 @@ Status ColumnMetadatasFromExecBatch(const ExecBatch& batch, /// /// All columns in `batch` must be eligible "key" columns and have an array shape /// \see ColumnArrayFromArrayData for more details -Status ColumnArraysFromExecBatch(const ExecBatch& batch, int64_t start_row, - int64_t num_rows, - std::vector* column_arrays); +ARROW_COMPUTE_EXPORT Status +ColumnArraysFromExecBatch(const ExecBatch& batch, int64_t start_row, int64_t num_rows, + std::vector* column_arrays); /// \brief Create KeyColumnArray instances from an ExecBatch /// @@ -267,8 +268,8 @@ Status ColumnArraysFromExecBatch(const ExecBatch& batch, int64_t start_row, /// /// All columns in `batch` must be eligible "key" columns and have an array shape /// \see ColumnArrayFromArrayData for more details -Status ColumnArraysFromExecBatch(const ExecBatch& batch, - std::vector* column_arrays); +ARROW_COMPUTE_EXPORT Status ColumnArraysFromExecBatch( + const ExecBatch& batch, std::vector* column_arrays); /// A lightweight resizable array for "key" columns /// @@ -276,7 +277,7 @@ Status ColumnArraysFromExecBatch(const ExecBatch& batch, /// /// Resizing is handled by arrow::ResizableBuffer and a doubling approach is /// used so that resizes will always grow up to the next power of 2 -class ResizableArrayData { +class ARROW_COMPUTE_EXPORT ResizableArrayData { public: /// \brief Create an uninitialized instance /// @@ -372,7 +373,7 @@ class ResizableArrayData { /// \brief A builder to concatenate batches of data into a larger batch /// /// Will only store num_rows_max() rows -class ExecBatchBuilder { +class ARROW_COMPUTE_EXPORT ExecBatchBuilder { public: /// \brief Add rows from `source` into `target` column /// diff --git a/cpp/src/arrow/compute/row/compare_internal.h b/cpp/src/arrow/compute/row/compare_internal.h index 2e62f224f9506..fc581c32c3bf1 100644 --- a/cpp/src/arrow/compute/row/compare_internal.h +++ b/cpp/src/arrow/compute/row/compare_internal.h @@ -23,6 +23,7 @@ #include "arrow/compute/row/encode_internal.h" #include "arrow/compute/row/row_internal.h" #include "arrow/compute/util.h" +#include "arrow/compute/visibility.h" #include "arrow/memory_pool.h" #include "arrow/result.h" #include "arrow/status.h" @@ -30,7 +31,7 @@ namespace arrow { namespace compute { -class KeyCompare { +class ARROW_EXPORT KeyCompare { public: // Clarify the max temp stack usage for CompareColumnsToRows, which might be necessary // for the caller to be aware of (possibly at compile time) to reserve enough stack size diff --git a/cpp/src/arrow/compute/row/encode_internal.h b/cpp/src/arrow/compute/row/encode_internal.h index 75bf40d3d6999..bc60478cb0b53 100644 --- a/cpp/src/arrow/compute/row/encode_internal.h +++ b/cpp/src/arrow/compute/row/encode_internal.h @@ -26,6 +26,7 @@ #include "arrow/compute/light_array_internal.h" #include "arrow/compute/row/row_internal.h" #include "arrow/compute/util.h" +#include "arrow/compute/visibility.h" #include "arrow/memory_pool.h" #include "arrow/result.h" #include "arrow/status.h" @@ -44,7 +45,7 @@ namespace compute { /// be accessed together, as in the case of hash table key. /// /// Does not support nested types -class RowTableEncoder { +class ARROW_EXPORT RowTableEncoder { public: void Init(const std::vector& cols, int row_alignment, int string_alignment); diff --git a/cpp/src/arrow/compute/row/grouper.h b/cpp/src/arrow/compute/row/grouper.h index 3db34750e3688..9cb70181fb0f6 100644 --- a/cpp/src/arrow/compute/row/grouper.h +++ b/cpp/src/arrow/compute/row/grouper.h @@ -21,6 +21,7 @@ #include #include "arrow/compute/kernel.h" +#include "arrow/compute/visibility.h" #include "arrow/datum.h" #include "arrow/result.h" #include "arrow/util/visibility.h" @@ -36,7 +37,7 @@ namespace compute { /// same segment key within a given batch. When a segment group span cross batches, it /// will have multiple segments. A segment never spans cross batches. The segment data /// structure only makes sense when used along with a exec batch. -struct Segment { +struct ARROW_EXPORT Segment { /// \brief the offset into the batch where the segment starts int64_t offset; /// \brief the length of the segment @@ -74,7 +75,7 @@ inline bool operator!=(const Segment& segment1, const Segment& segment2) { /// /// If the next call to the segmenter starts with `A A` then that segment would set the /// "extends" flag, which indicates whether the segment continues the last open batch. -class RowSegmenter { +class ARROW_EXPORT RowSegmenter { public: virtual ~RowSegmenter() = default; @@ -106,7 +107,7 @@ class RowSegmenter { }; /// Consumes batches of keys and yields batches of the group ids. -class Grouper { +class ARROW_EXPORT Grouper { public: virtual ~Grouper() = default; diff --git a/cpp/src/arrow/compute/row/grouper_internal.h b/cpp/src/arrow/compute/row/grouper_internal.h index 1926854b941e3..bce9ea1d3d5ea 100644 --- a/cpp/src/arrow/compute/row/grouper_internal.h +++ b/cpp/src/arrow/compute/row/grouper_internal.h @@ -20,7 +20,7 @@ namespace arrow { namespace compute { -Result> MakeAnyKeysSegmenter( +ARROW_COMPUTE_EXPORT Result> MakeAnyKeysSegmenter( const std::vector& key_types, ExecContext* ctx); } // namespace compute diff --git a/cpp/src/arrow/compute/visibility.h b/cpp/src/arrow/compute/visibility.h new file mode 100644 index 0000000000000..c220eac99cd60 --- /dev/null +++ b/cpp/src/arrow/compute/visibility.h @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#if defined(_WIN32) || defined(__CYGWIN__) +# if defined(_MSC_VER) +# pragma warning(push) +# pragma warning(disable : 4251) +# else +# pragma GCC diagnostic ignored "-Wattributes" +# endif + +# ifdef ARROW_COMPUTE_STATIC +# define ARROW_COMPUTE_EXPORT +# elif defined(ARROW_COMPUTE_EXPORTING) +# define ARROW_COMPUTE_EXPORT __declspec(dllexport) +# else +# define ARROW_COMPUTE_EXPORT __declspec(dllimport) +# endif + +# define ARROW_COMPUTE_NO_EXPORT +#else // Not Windows +# ifndef ARROW_COMPUTE_EXPORT +# define ARROW_COMPUTE_EXPORT __attribute__((visibility("default"))) +# endif +# ifndef ARROW_COMPUTE_NO_EXPORT +# define ARROW_COMPUTE_NO_EXPORT __attribute__((visibility("hidden"))) +# endif +#endif // Not-Windows + +#if defined(_MSC_VER) +# pragma warning(pop) +#endif From f76d3fb7b84e2abeab23eaa38fb5e072dd78a9eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 26 Feb 2025 15:07:46 +0100 Subject: [PATCH 15/22] Fix wrongly added ARROW_EXPORT for ARROW_COMPUTE_EXPORT --- cpp/src/arrow/compute/row/compare_internal.h | 2 +- cpp/src/arrow/compute/row/encode_internal.h | 2 +- cpp/src/arrow/compute/row/grouper.h | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/compute/row/compare_internal.h b/cpp/src/arrow/compute/row/compare_internal.h index fc581c32c3bf1..264ef69b39f29 100644 --- a/cpp/src/arrow/compute/row/compare_internal.h +++ b/cpp/src/arrow/compute/row/compare_internal.h @@ -31,7 +31,7 @@ namespace arrow { namespace compute { -class ARROW_EXPORT KeyCompare { +class ARROW_COMPUTE_EXPORT KeyCompare { public: // Clarify the max temp stack usage for CompareColumnsToRows, which might be necessary // for the caller to be aware of (possibly at compile time) to reserve enough stack size diff --git a/cpp/src/arrow/compute/row/encode_internal.h b/cpp/src/arrow/compute/row/encode_internal.h index bc60478cb0b53..6bfb87e6f8486 100644 --- a/cpp/src/arrow/compute/row/encode_internal.h +++ b/cpp/src/arrow/compute/row/encode_internal.h @@ -45,7 +45,7 @@ namespace compute { /// be accessed together, as in the case of hash table key. /// /// Does not support nested types -class ARROW_EXPORT RowTableEncoder { +class ARROW_COMPUTE_EXPORT RowTableEncoder { public: void Init(const std::vector& cols, int row_alignment, int string_alignment); diff --git a/cpp/src/arrow/compute/row/grouper.h b/cpp/src/arrow/compute/row/grouper.h index 9cb70181fb0f6..6333097a6e8da 100644 --- a/cpp/src/arrow/compute/row/grouper.h +++ b/cpp/src/arrow/compute/row/grouper.h @@ -37,7 +37,7 @@ namespace compute { /// same segment key within a given batch. When a segment group span cross batches, it /// will have multiple segments. A segment never spans cross batches. The segment data /// structure only makes sense when used along with a exec batch. -struct ARROW_EXPORT Segment { +struct ARROW_COMPUTE_EXPORT Segment { /// \brief the offset into the batch where the segment starts int64_t offset; /// \brief the length of the segment @@ -75,7 +75,7 @@ inline bool operator!=(const Segment& segment1, const Segment& segment2) { /// /// If the next call to the segmenter starts with `A A` then that segment would set the /// "extends" flag, which indicates whether the segment continues the last open batch. -class ARROW_EXPORT RowSegmenter { +class ARROW_COMPUTE_EXPORT RowSegmenter { public: virtual ~RowSegmenter() = default; @@ -107,7 +107,7 @@ class ARROW_EXPORT RowSegmenter { }; /// Consumes batches of keys and yields batches of the group ids. -class ARROW_EXPORT Grouper { +class ARROW_COMPUTE_EXPORT Grouper { public: virtual ~Grouper() = default; From b0122dc56887f29cd41a560bc8e8edde26731e3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 26 Feb 2025 15:29:56 +0100 Subject: [PATCH 16/22] Add some more missing ARROW_COMPUTE_EXPORT --- .../arrow/compute/row/row_encoder_internal.h | 13 +++-- cpp/src/arrow/compute/row/row_internal.h | 5 +- cpp/src/arrow/compute/util.h | 58 ++++++++++++------- cpp/src/arrow/compute/util_internal.h | 3 +- 4 files changed, 48 insertions(+), 31 deletions(-) diff --git a/cpp/src/arrow/compute/row/row_encoder_internal.h b/cpp/src/arrow/compute/row/row_encoder_internal.h index 7234efcaf3d1c..b961e45a526b2 100644 --- a/cpp/src/arrow/compute/row/row_encoder_internal.h +++ b/cpp/src/arrow/compute/row/row_encoder_internal.h @@ -20,6 +20,7 @@ #include #include "arrow/compute/kernels/codegen_internal.h" +#include "arrow/compute/visibility.h" #include "arrow/visit_data_inline.h" namespace arrow { @@ -29,7 +30,7 @@ using internal::checked_cast; namespace compute { namespace internal { -struct KeyEncoder { +struct ARROW_COMPUTE_EXPORT KeyEncoder { // the first byte of an encoded key is used to indicate nullity static constexpr bool kExtraByteForNull = true; @@ -85,7 +86,7 @@ struct KeyEncoder { } }; -struct BooleanKeyEncoder : KeyEncoder { +struct ARROW_COMPUTE_EXPORT BooleanKeyEncoder : KeyEncoder { static constexpr int kByteWidth = 1; void AddLength(const ExecValue& data, int64_t batch_length, int32_t* lengths) override; @@ -101,7 +102,7 @@ struct BooleanKeyEncoder : KeyEncoder { MemoryPool* pool) override; }; -struct FixedWidthKeyEncoder : KeyEncoder { +struct ARROW_COMPUTE_EXPORT FixedWidthKeyEncoder : KeyEncoder { explicit FixedWidthKeyEncoder(std::shared_ptr type) : type_(std::move(type)), byte_width_(checked_cast(*type_).bit_width() / 8) {} @@ -122,7 +123,7 @@ struct FixedWidthKeyEncoder : KeyEncoder { const int byte_width_; }; -struct DictionaryKeyEncoder : FixedWidthKeyEncoder { +struct ARROW_COMPUTE_EXPORT DictionaryKeyEncoder : FixedWidthKeyEncoder { DictionaryKeyEncoder(std::shared_ptr type, MemoryPool* pool) : FixedWidthKeyEncoder(std::move(type)), pool_(pool) {} @@ -251,7 +252,7 @@ struct VarLengthKeyEncoder : KeyEncoder { std::shared_ptr type_; }; -struct NullKeyEncoder : KeyEncoder { +struct ARROW_COMPUTE_EXPORT NullKeyEncoder : KeyEncoder { void AddLength(const ExecValue&, int64_t batch_length, int32_t* lengths) override {} void AddLengthNull(int32_t* length) override {} @@ -331,7 +332,7 @@ struct NullKeyEncoder : KeyEncoder { /// # Row Encoding /// /// The row format is the concatenation of the encodings of each column. -class RowEncoder { +class ARROW_COMPUTE_EXPORT RowEncoder { public: static constexpr int kRowIdForNulls() { return -1; } diff --git a/cpp/src/arrow/compute/row/row_internal.h b/cpp/src/arrow/compute/row/row_internal.h index bb236558f0e58..219fcbc51f4d8 100644 --- a/cpp/src/arrow/compute/row/row_internal.h +++ b/cpp/src/arrow/compute/row/row_internal.h @@ -21,6 +21,7 @@ #include "arrow/buffer.h" #include "arrow/compute/light_array_internal.h" +#include "arrow/compute/visibility.h" #include "arrow/memory_pool.h" #include "arrow/status.h" #include "arrow/util/logging.h" @@ -29,7 +30,7 @@ namespace arrow { namespace compute { /// Description of the data stored in a RowTable -struct RowTableMetadata { +struct ARROW_COMPUTE_EXPORT RowTableMetadata { using offset_type = int64_t; /// \brief True if there are no variable length columns in the table @@ -170,7 +171,7 @@ struct RowTableMetadata { /// Can store both fixed-size data types and variable-length data types /// /// The row table is not safe -class RowTableImpl { +class ARROW_COMPUTE_EXPORT RowTableImpl { public: using offset_type = RowTableMetadata::offset_type; diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h index 21873c6a1aa25..ad541e182a479 100644 --- a/cpp/src/arrow/compute/util.h +++ b/cpp/src/arrow/compute/util.h @@ -26,6 +26,7 @@ #include "arrow/compute/expression.h" #include "arrow/compute/type_fwd.h" +#include "arrow/compute/visibility.h" #include "arrow/result.h" #include "arrow/util/cpu_info.h" #include "arrow/util/simd.h" @@ -66,41 +67,54 @@ class MiniBatch { namespace bit_util { -void bits_to_indexes(int bit_to_search, int64_t hardware_flags, const int num_bits, - const uint8_t* bits, int* num_indexes, uint16_t* indexes, - int bit_offset = 0); +ARROW_COMPUTE_EXPORT void bits_to_indexes(int bit_to_search, int64_t hardware_flags, + const int num_bits, const uint8_t* bits, + int* num_indexes, uint16_t* indexes, + int bit_offset = 0); -void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, const int num_bits, - const uint8_t* bits, const uint16_t* input_indexes, - int* num_indexes, uint16_t* indexes, int bit_offset = 0); +ARROW_COMPUTE_EXPORT void bits_filter_indexes(int bit_to_search, int64_t hardware_flags, + const int num_bits, const uint8_t* bits, + const uint16_t* input_indexes, + int* num_indexes, uint16_t* indexes, + int bit_offset = 0); // Input and output indexes may be pointing to the same data (in-place filtering). -void bits_split_indexes(int64_t hardware_flags, const int num_bits, const uint8_t* bits, - int* num_indexes_bit0, uint16_t* indexes_bit0, - uint16_t* indexes_bit1, int bit_offset = 0); +ARROW_COMPUTE_EXPORT void bits_split_indexes(int64_t hardware_flags, const int num_bits, + const uint8_t* bits, int* num_indexes_bit0, + uint16_t* indexes_bit0, + uint16_t* indexes_bit1, int bit_offset = 0); // Bit 1 is replaced with byte 0xFF. -void bits_to_bytes(int64_t hardware_flags, const int num_bits, const uint8_t* bits, - uint8_t* bytes, int bit_offset = 0); +ARROW_COMPUTE_EXPORT void bits_to_bytes(int64_t hardware_flags, const int num_bits, + const uint8_t* bits, uint8_t* bytes, + int bit_offset = 0); // Return highest bit of each byte. -void bytes_to_bits(int64_t hardware_flags, const int num_bits, const uint8_t* bytes, - uint8_t* bits, int bit_offset = 0); +ARROW_COMPUTE_EXPORT void bytes_to_bits(int64_t hardware_flags, const int num_bits, + const uint8_t* bytes, uint8_t* bits, + int bit_offset = 0); -bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, uint32_t num_bytes); +ARROW_COMPUTE_EXPORT bool are_all_bytes_zero(int64_t hardware_flags, const uint8_t* bytes, + uint32_t num_bytes); #if defined(ARROW_HAVE_RUNTIME_AVX2) && defined(ARROW_HAVE_RUNTIME_BMI2) // The functions below use BMI2 instructions, be careful before calling! namespace avx2 { -void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, const uint8_t* bits, - const uint16_t* input_indexes, int* num_indexes, - uint16_t* indexes); -void bits_to_indexes_avx2(int bit_to_search, const int num_bits, const uint8_t* bits, - int* num_indexes, uint16_t* indexes, uint16_t base_index = 0); -void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, uint8_t* bytes); -void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, uint8_t* bits); -bool are_all_bytes_zero_avx2(const uint8_t* bytes, uint32_t num_bytes); +ARROW_COMPUTE_EXPORT void bits_filter_indexes_avx2(int bit_to_search, const int num_bits, + const uint8_t* bits, + const uint16_t* input_indexes, + int* num_indexes, uint16_t* indexes); +ARROW_COMPUTE_EXPORT void bits_to_indexes_avx2(int bit_to_search, const int num_bits, + const uint8_t* bits, int* num_indexes, + uint16_t* indexes, + uint16_t base_index = 0); +ARROW_COMPUTE_EXPORT void bits_to_bytes_avx2(const int num_bits, const uint8_t* bits, + uint8_t* bytes); +ARROW_COMPUTE_EXPORT void bytes_to_bits_avx2(const int num_bits, const uint8_t* bytes, + uint8_t* bits); +ARROW_COMPUTE_EXPORT bool are_all_bytes_zero_avx2(const uint8_t* bytes, + uint32_t num_bytes); } // namespace avx2 #endif diff --git a/cpp/src/arrow/compute/util_internal.h b/cpp/src/arrow/compute/util_internal.h index 44831bd5938c0..301fd4939b457 100644 --- a/cpp/src/arrow/compute/util_internal.h +++ b/cpp/src/arrow/compute/util_internal.h @@ -17,6 +17,7 @@ #pragma once +#include "arrow/compute/visibility.h" #include "arrow/status.h" #include "arrow/type_fwd.h" #include "arrow/util/logging.h" @@ -34,7 +35,7 @@ void CheckAlignment(const void* ptr) { /// Temporary vectors should resemble allocating temporary variables on the stack /// but in the context of vectorized processing where we need to store a vector of /// temporaries instead of a single value. -class TempVectorStack { +class ARROW_COMPUTE_EXPORT TempVectorStack { template friend class TempVectorHolder; From d6a78287e03579cb0c7f28ec333af461f849ee8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 26 Feb 2025 16:32:11 +0100 Subject: [PATCH 17/22] Export symbols for kernel registration and move outside of internal namespace --- .../arrow/compute_and_write_csv_example.cc | 2 +- cpp/examples/arrow/join_example.cc | 2 +- cpp/src/arrow/compute/kernels/registry.cc | 58 +++++++++---------- cpp/src/arrow/compute/kernels/registry.h | 18 +----- .../compute/kernels/test_util_internal.h | 2 +- 5 files changed, 34 insertions(+), 48 deletions(-) diff --git a/cpp/examples/arrow/compute_and_write_csv_example.cc b/cpp/examples/arrow/compute_and_write_csv_example.cc index 19cd023c5af3b..129cc5f43d20b 100644 --- a/cpp/examples/arrow/compute_and_write_csv_example.cc +++ b/cpp/examples/arrow/compute_and_write_csv_example.cc @@ -42,7 +42,7 @@ // in the current directory arrow::Status RunMain(int argc, char** argv) { - ARROW_RETURN_NOT_OK(arrow::compute::internal::RegisterComputeKernels()); + ARROW_RETURN_NOT_OK(arrow::compute::RegisterComputeKernels()); // Make Arrays arrow::NumericBuilder int64_builder; arrow::BooleanBuilder boolean_builder; diff --git a/cpp/examples/arrow/join_example.cc b/cpp/examples/arrow/join_example.cc index e52fd0cc5ccc3..416210bd2d539 100644 --- a/cpp/examples/arrow/join_example.cc +++ b/cpp/examples/arrow/join_example.cc @@ -83,7 +83,7 @@ arrow::Result> CreateDataSetFromCSVData } arrow::Status DoHashJoin() { - ARROW_RETURN_NOT_OK(arrow::compute::internal::RegisterComputeKernels()); + ARROW_RETURN_NOT_OK(arrow::compute::RegisterComputeKernels()); arrow::dataset::internal::Initialize(); ARROW_ASSIGN_OR_RAISE(auto l_dataset, CreateDataSetFromCSVData(true)); diff --git a/cpp/src/arrow/compute/kernels/registry.cc b/cpp/src/arrow/compute/kernels/registry.cc index aeeac092a64be..f03fa821703f0 100644 --- a/cpp/src/arrow/compute/kernels/registry.cc +++ b/cpp/src/arrow/compute/kernels/registry.cc @@ -32,48 +32,46 @@ namespace arrow { namespace compute { -namespace internal { Status RegisterComputeKernels() { auto registry = GetFunctionRegistry(); // Register additional kernels on libarrow_compute // Scalar functions - RegisterScalarArithmetic(registry); - RegisterScalarBoolean(registry); - RegisterScalarComparison(registry); - RegisterScalarIfElse(registry); - RegisterScalarNested(registry); - RegisterScalarRandom(registry); // Nullary - RegisterScalarRoundArithmetic(registry); - RegisterScalarSetLookup(registry); - RegisterScalarStringAscii(registry); - RegisterScalarStringUtf8(registry); - RegisterScalarTemporalBinary(registry); - RegisterScalarTemporalUnary(registry); - RegisterScalarValidity(registry); + internal::RegisterScalarArithmetic(registry); + internal::RegisterScalarBoolean(registry); + internal::RegisterScalarComparison(registry); + internal::RegisterScalarIfElse(registry); + internal::RegisterScalarNested(registry); + internal::RegisterScalarRandom(registry); // Nullary + internal::RegisterScalarRoundArithmetic(registry); + internal::RegisterScalarSetLookup(registry); + internal::RegisterScalarStringAscii(registry); + internal::RegisterScalarStringUtf8(registry); + internal::RegisterScalarTemporalBinary(registry); + internal::RegisterScalarTemporalUnary(registry); + internal::RegisterScalarValidity(registry); // Vector functions - RegisterVectorArraySort(registry); - RegisterVectorCumulativeSum(registry); - RegisterVectorNested(registry); - RegisterVectorRank(registry); - RegisterVectorReplace(registry); - RegisterVectorSelectK(registry); - RegisterVectorSort(registry); - RegisterVectorPairwise(registry); - RegisterVectorSwizzle(registry); + internal::RegisterVectorArraySort(registry); + internal::RegisterVectorCumulativeSum(registry); + internal::RegisterVectorNested(registry); + internal::RegisterVectorRank(registry); + internal::RegisterVectorReplace(registry); + internal::RegisterVectorSelectK(registry); + internal::RegisterVectorSort(registry); + internal::RegisterVectorPairwise(registry); + internal::RegisterVectorSwizzle(registry); // Aggregate functions - RegisterHashAggregateBasic(registry); - RegisterScalarAggregateBasic(registry); - RegisterScalarAggregateMode(registry); - RegisterScalarAggregateQuantile(registry); - RegisterScalarAggregateTDigest(registry); - RegisterScalarAggregateVariance(registry); + internal::RegisterHashAggregateBasic(registry); + internal::RegisterScalarAggregateBasic(registry); + internal::RegisterScalarAggregateMode(registry); + internal::RegisterScalarAggregateQuantile(registry); + internal::RegisterScalarAggregateTDigest(registry); + internal::RegisterScalarAggregateVariance(registry); return Status::OK(); } -} // namespace internal } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/registry.h b/cpp/src/arrow/compute/kernels/registry.h index ba099317a3ac1..0c211b8b13f34 100644 --- a/cpp/src/arrow/compute/kernels/registry.h +++ b/cpp/src/arrow/compute/kernels/registry.h @@ -16,26 +16,14 @@ // under the License. #include "arrow/compute/registry.h" -#include -#include -#include -#include -#include - -#include "arrow/compute/function.h" -#include "arrow/compute/function_internal.h" -#include "arrow/compute/registry_internal.h" +#include "arrow/compute/visibility.h" #include "arrow/status.h" -#include "arrow/util/config.h" // For ARROW_COMPUTE -#include "arrow/util/logging.h" // TODO: Review includes namespace arrow { namespace compute { -namespace internal { -// This must be public, not internal -Status RegisterComputeKernels(); -} // namespace internal +ARROW_COMPUTE_EXPORT Status RegisterComputeKernels(); + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/test_util_internal.h b/cpp/src/arrow/compute/kernels/test_util_internal.h index a4f2cb439fa0d..d9ac7d5d3ef32 100644 --- a/cpp/src/arrow/compute/kernels/test_util_internal.h +++ b/cpp/src/arrow/compute/kernels/test_util_internal.h @@ -57,7 +57,7 @@ class ComputeKernelEnvironment : public ::testing::Environment { // register them to the FunctionRegistry. ComputeKernelEnvironment() : ::testing::Environment() {} - void SetUp() override { ASSERT_OK(arrow::compute::internal::RegisterComputeKernels()); } + void SetUp() override { ASSERT_OK(arrow::compute::RegisterComputeKernels()); } }; template From bc6839e832afaa07caf26c2a8d1615b1c0feb647 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 27 Feb 2025 13:13:54 +0100 Subject: [PATCH 18/22] Move problematic function to compute/codegen_internal instead of compute/kernels/codegen_internal --- cpp/src/arrow/CMakeLists.txt | 2 +- cpp/src/arrow/compute/codegen_internal.cc | 34 +++++++++++++++++++ cpp/src/arrow/compute/codegen_internal.h | 32 +++++++++++++++++ .../arrow/compute/kernels/codegen_internal.cc | 4 --- 4 files changed, 67 insertions(+), 5 deletions(-) create mode 100644 cpp/src/arrow/compute/codegen_internal.cc create mode 100644 cpp/src/arrow/compute/codegen_internal.h diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 0f55fa32a19a7..5abb4b13b114c 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -722,6 +722,7 @@ set(ARROW_COMPUTE_SRCS compute/api_scalar.cc compute/api_vector.cc compute/cast.cc + compute/codegen_internal.cc compute/exec.cc compute/expression.cc compute/function.cc @@ -729,7 +730,6 @@ set(ARROW_COMPUTE_SRCS compute/kernel.cc compute/ordering.cc compute/registry.cc - compute/kernels/codegen_internal.cc compute/kernels/ree_util_internal.cc compute/kernels/scalar_cast_boolean.cc compute/kernels/scalar_cast_dictionary.cc diff --git a/cpp/src/arrow/compute/codegen_internal.cc b/cpp/src/arrow/compute/codegen_internal.cc new file mode 100644 index 0000000000000..ec6214dffc404 --- /dev/null +++ b/cpp/src/arrow/compute/codegen_internal.cc @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/codegen_internal.h" + +#include "arrow/compute/api_vector.h" +#include "arrow/result.h" +#include "arrow/type_fwd.h" + +namespace arrow { +namespace compute { +namespace internal { + +Result FirstType(KernelContext*, const std::vector& types) { + return types.front(); +} + +} // namespace internal +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/codegen_internal.h b/cpp/src/arrow/compute/codegen_internal.h new file mode 100644 index 0000000000000..506eedc0c82c4 --- /dev/null +++ b/cpp/src/arrow/compute/codegen_internal.h @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/api_vector.h" +#include "arrow/result.h" +#include "arrow/type_fwd.h" + +namespace arrow { +namespace compute { +namespace internal { +// ---------------------------------------------------------------------- +// Reusable type resolvers + +Result FirstType(KernelContext*, const std::vector& types); + +} // namespace internal +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.cc b/cpp/src/arrow/compute/kernels/codegen_internal.cc index 0fd9cae7a8d71..8d3d93d2b0254 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.cc +++ b/cpp/src/arrow/compute/kernels/codegen_internal.cc @@ -49,10 +49,6 @@ const std::vector>& ExampleParametricTypes() { return example_parametric_types; } -Result FirstType(KernelContext*, const std::vector& types) { - return types.front(); -} - Result LastType(KernelContext*, const std::vector& types) { return types.back(); } From 2d26f3db8593fcc2bfa08c4b650f3b7a59c05070 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 27 Feb 2025 14:17:41 +0100 Subject: [PATCH 19/22] Add ARROW_EXPORT ot FirstType --- cpp/src/arrow/compute/codegen_internal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/compute/codegen_internal.h b/cpp/src/arrow/compute/codegen_internal.h index 506eedc0c82c4..3e059e0918e54 100644 --- a/cpp/src/arrow/compute/codegen_internal.h +++ b/cpp/src/arrow/compute/codegen_internal.h @@ -25,7 +25,8 @@ namespace internal { // ---------------------------------------------------------------------- // Reusable type resolvers -Result FirstType(KernelContext*, const std::vector& types); +ARROW_EXPORT Result FirstType(KernelContext*, + const std::vector& types); } // namespace internal } // namespace compute From de11ab2e5a59c69ad52a28fbe74d803eab046d3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 27 Feb 2025 14:57:54 +0100 Subject: [PATCH 20/22] Remove duplicated symbol, expose new required symbols --- cpp/src/arrow/compute/kernels/aggregate_basic.cc | 1 + cpp/src/arrow/compute/kernels/codegen_internal.h | 16 ++++++++-------- cpp/src/arrow/compute/kernels/hash_aggregate.cc | 1 + .../arrow/compute/kernels/scalar_arithmetic.cc | 1 + cpp/src/arrow/compute/kernels/scalar_if_else.cc | 1 + cpp/src/arrow/compute/kernels/scalar_round.cc | 1 + .../compute/kernels/scalar_temporal_unary.cc | 1 + cpp/src/arrow/compute/kernels/vector_hash.cc | 1 + cpp/src/arrow/compute/kernels/vector_replace.cc | 1 + .../compute/kernels/vector_selection_internal.cc | 1 + 10 files changed, 17 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc index 68b1ac7c03ca8..1d3008a3bd743 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc @@ -16,6 +16,7 @@ // under the License. #include "arrow/compute/api_aggregate.h" +#include "arrow/compute/codegen_internal.h" #include "arrow/compute/kernels/aggregate_basic_internal.h" #include "arrow/compute/kernels/aggregate_internal.h" #include "arrow/compute/kernels/common_internal.h" diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h index f0dba993d5752..878981a274505 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.h +++ b/cpp/src/arrow/compute/kernels/codegen_internal.h @@ -473,8 +473,6 @@ static void VisitTwoArrayValuesInline(const ArraySpan& arr0, const ArraySpan& ar // ---------------------------------------------------------------------- // Reusable type resolvers - -Result FirstType(KernelContext*, const std::vector& types); Result LastType(KernelContext*, const std::vector& types); Result ListValuesType(KernelContext* ctx, const std::vector& types); @@ -1398,16 +1396,17 @@ void ReplaceTypes(const TypeHolder& replacement, std::vector* types) void ReplaceTypes(const TypeHolder& replacement, TypeHolder* types, size_t count); -void ReplaceTemporalTypes(TimeUnit::type unit, std::vector* types); +ARROW_COMPUTE_EXPORT void ReplaceTemporalTypes(TimeUnit::type unit, + std::vector* types); TypeHolder CommonNumeric(const std::vector& types); TypeHolder CommonNumeric(const TypeHolder* begin, size_t count); -TypeHolder CommonTemporal(const TypeHolder* begin, size_t count); +ARROW_COMPUTE_EXPORT TypeHolder CommonTemporal(const TypeHolder* begin, size_t count); -bool CommonTemporalResolution(const TypeHolder* begin, size_t count, - TimeUnit::type* finest_unit); +ARROW_COMPUTE_EXPORT bool CommonTemporalResolution(const TypeHolder* begin, size_t count, + TimeUnit::type* finest_unit); TypeHolder CommonBinary(const TypeHolder* begin, size_t count); @@ -1421,11 +1420,12 @@ enum class DecimalPromotion : uint8_t { /// Given two arguments, at least one of which is decimal, promote all /// to not necessarily identical types, but types which are compatible /// for the given operator (add/multiply/divide). -Status CastBinaryDecimalArgs(DecimalPromotion promotion, std::vector* types); +ARROW_COMPUTE_EXPORT Status CastBinaryDecimalArgs(DecimalPromotion promotion, + std::vector* types); /// Given one or more arguments, at least one of which is decimal, /// promote all to an identical type. -Status CastDecimalArgs(TypeHolder* begin, size_t count); +ARROW_COMPUTE_EXPORT Status CastDecimalArgs(TypeHolder* begin, size_t count); bool HasDecimal(const std::vector& types); diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc index 21b7bd9bf6632..381e32ab7ecbb 100644 --- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc +++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc @@ -29,6 +29,7 @@ #include "arrow/buffer_builder.h" #include "arrow/compute/api_aggregate.h" #include "arrow/compute/api_vector.h" +#include "arrow/compute/codegen_internal.h" #include "arrow/compute/kernel.h" #include "arrow/compute/kernels/aggregate_internal.h" #include "arrow/compute/kernels/aggregate_var_std_internal.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc index c13dae573a3d9..c0eb435b4765a 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc @@ -25,6 +25,7 @@ #include "arrow/compare.h" #include "arrow/compute/api_scalar.h" #include "arrow/compute/cast.h" +#include "arrow/compute/codegen_internal.h" #include "arrow/compute/kernels/base_arithmetic_internal.h" #include "arrow/compute/kernels/codegen_internal.h" #include "arrow/compute/kernels/common_internal.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 6368ef525ff9c..5c79df4a48090 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -21,6 +21,7 @@ #include "arrow/array/builder_time.h" #include "arrow/array/builder_union.h" #include "arrow/compute/api.h" +#include "arrow/compute/codegen_internal.h" #include "arrow/compute/kernels/codegen_internal.h" #include "arrow/compute/kernels/copy_data_internal.h" #include "arrow/result.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_round.cc b/cpp/src/arrow/compute/kernels/scalar_round.cc index 98ebc8422cdd5..302ab0d55ce52 100644 --- a/cpp/src/arrow/compute/kernels/scalar_round.cc +++ b/cpp/src/arrow/compute/kernels/scalar_round.cc @@ -25,6 +25,7 @@ #include "arrow/compare.h" #include "arrow/compute/api_scalar.h" #include "arrow/compute/cast.h" +#include "arrow/compute/codegen_internal.h" #include "arrow/compute/kernel.h" #include "arrow/compute/kernels/base_arithmetic_internal.h" #include "arrow/compute/kernels/codegen_internal.h" diff --git a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc index 35b1deb3cda58..091bb914141a8 100644 --- a/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc +++ b/cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc @@ -21,6 +21,7 @@ #include "arrow/builder.h" #include "arrow/compute/api_scalar.h" +#include "arrow/compute/codegen_internal.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/compute/kernels/temporal_internal.h" #include "arrow/util/checked_cast.h" diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc index 5067298858132..ce678e0eee673 100644 --- a/cpp/src/arrow/compute/kernels/vector_hash.cc +++ b/cpp/src/arrow/compute/kernels/vector_hash.cc @@ -29,6 +29,7 @@ #include "arrow/buffer.h" #include "arrow/compute/api_vector.h" #include "arrow/compute/cast.h" +#include "arrow/compute/codegen_internal.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/result.h" #include "arrow/util/hashing.h" diff --git a/cpp/src/arrow/compute/kernels/vector_replace.cc b/cpp/src/arrow/compute/kernels/vector_replace.cc index 35448b95956d6..9a13748986b25 100644 --- a/cpp/src/arrow/compute/kernels/vector_replace.cc +++ b/cpp/src/arrow/compute/kernels/vector_replace.cc @@ -16,6 +16,7 @@ // under the License. #include "arrow/compute/api_scalar.h" +#include "arrow/compute/codegen_internal.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/compute/kernels/copy_data_internal.h" #include "arrow/compute/kernels/util_internal.h" diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc index 7189d42850e79..5c98ba01cf31b 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc @@ -27,6 +27,7 @@ #include "arrow/buffer_builder.h" #include "arrow/chunked_array.h" #include "arrow/compute/api_vector.h" +#include "arrow/compute/codegen_internal.h" #include "arrow/compute/function.h" #include "arrow/compute/kernel.h" #include "arrow/compute/kernels/codegen_internal.h" From 7240ca253191e6309e8335d0c190100f193bd4fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Thu, 27 Feb 2025 14:59:04 +0100 Subject: [PATCH 21/22] Add missing header --- cpp/src/arrow/compute/kernels/codegen_internal.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h index 878981a274505..9bfbd18e70eae 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.h +++ b/cpp/src/arrow/compute/kernels/codegen_internal.h @@ -31,6 +31,7 @@ #include "arrow/buffer.h" #include "arrow/buffer_builder.h" #include "arrow/compute/kernel.h" +#include "arrow/compute/visibility.h" #include "arrow/datum.h" #include "arrow/result.h" #include "arrow/scalar.h" From 98b46082009932b78e1c715cc8f96417408bf5ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 28 Feb 2025 15:11:31 +0100 Subject: [PATCH 22/22] Fix linked libs for arrow_compute --- cpp/src/arrow/CMakeLists.txt | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 5abb4b13b114c..0358e3628f481 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -801,20 +801,8 @@ if(ARROW_COMPUTE) append_runtime_avx2_src(ARROW_COMPUTE_LIB_SRCS compute/row/compare_internal_avx2.cc) append_runtime_avx2_src(ARROW_COMPUTE_LIB_SRCS compute/row/encode_internal_avx2.cc) append_runtime_avx2_bmi2_src(ARROW_COMPUTE_LIB_SRCS compute/util_avx2.cc) - # TODO: Some of those should be added conditionally - set(ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS Boost::headers ${ARROW_XSIMD} - ${ARROW_OPENTELEMETRY_LIBS}) - set(ARROW_COMPUTE_STATIC_LINK_LIBS Boost::headers ${ARROW_XSIMD} - ${ARROW_OPENTELEMETRY_LIBS}) - if(ARROW_WITH_RE2) - list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS re2::re2) - list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS re2::re2) - endif() - if(ARROW_WITH_UTF8PROC) - list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS utf8proc::utf8proc) - list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS re2::re2) - endif() + set(ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS) set(ARROW_COMPUTE_SHARED_LINK_LIBS) set(ARROW_COMPUTE_STATIC_LINK_LIBS) set(ARROW_COMPUTE_STATIC_INSTALL_INTERFACE_LIBS) @@ -825,6 +813,27 @@ if(ARROW_COMPUTE) list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS arrow_static) list(APPEND ARROW_COMPUTE_SHARED_LINK_LIBS arrow_shared) + if(ARROW_USE_BOOST) + list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS Boost::headers) + list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS Boost::headers) + endif() + if(ARROW_USE_XSIMD) + list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS ${ARROW_XSIMD}) + list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS ${ARROW_XSIMD}) + endif() + if(ARROW_WITH_OPENTELEMETRY) + list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS}) + list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS}) + endif() + if(ARROW_WITH_RE2) + list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS re2::re2) + list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS re2::re2) + endif() + if(ARROW_WITH_UTF8PROC) + list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS utf8proc::utf8proc) + list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS utf8proc::utf8proc) + endif() + add_arrow_lib(arrow_compute CMAKE_PACKAGE_NAME ArrowCompute