Skip to content

Commit

Permalink
feat(fuzzer): Allow functions to be tested only with sorted input in …
Browse files Browse the repository at this point in the history
…aggregation fuzzer (#12392)

Summary:
Pull Request resolved: #12392

The tdigest_agg() function needs to be tested only with input of determined order so that Velox result is comparable with Presto's. This is because different ordering of input values affects the content in TDigest. Therefore, this diff makes it possible to specify a set of functions to be tested only with ordered inputs in aggregation fuzzer. Aggregation fuzzer will then use `order by arg0, arg1, ...` inside the aggregaiton function calls of this function.

Reviewed By: natashasehgal

Differential Revision: D69886108

fbshipit-source-id: 304d13ae41cbc14df330a02cc2a775c731929637
  • Loading branch information
kagamiori authored and facebook-github-bot committed Feb 25, 2025
1 parent 71a1d78 commit 4787a99
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 13 deletions.
41 changes: 28 additions & 13 deletions velox/exec/fuzzer/AggregationFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class AggregationFuzzer : public AggregationFuzzerBase {
AggregationFuzzer(
AggregateFunctionSignatureMap signatureMap,
size_t seed,
const std::unordered_set<std::string>& functionsRequireSortedInput,
const std::unordered_map<std::string, std::shared_ptr<ResultVerifier>>&
customVerificationFunctions,
const std::unordered_map<std::string, std::shared_ptr<InputGenerator>>&
Expand Down Expand Up @@ -183,14 +184,18 @@ class AggregationFuzzer : public AggregationFuzzerBase {
}
}

bool mustSortInput(const CallableSignature& signature) const;

Stats stats_;
const std::unordered_map<std::string, DataSpec> functionDataSpec_;
const std::unordered_set<std::string> functionsRequireSortedInput_;
};
} // namespace

void aggregateFuzzer(
AggregateFunctionSignatureMap signatureMap,
size_t seed,
const std::unordered_set<std::string>& functionsRequireSortedInput,
const std::unordered_map<std::string, std::shared_ptr<ResultVerifier>>&
customVerificationFunctions,
const std::unordered_map<std::string, std::shared_ptr<InputGenerator>>&
Expand All @@ -205,6 +210,7 @@ void aggregateFuzzer(
auto aggregationFuzzer = AggregationFuzzer(
std::move(signatureMap),
seed,
functionsRequireSortedInput,
customVerificationFunctions,
customInputGenerators,
functionDataSpec,
Expand All @@ -222,6 +228,7 @@ namespace {
AggregationFuzzer::AggregationFuzzer(
AggregateFunctionSignatureMap signatureMap,
size_t seed,
const std::unordered_set<std::string>& functionsRequireSortedInput,
const std::unordered_map<std::string, std::shared_ptr<ResultVerifier>>&
customVerificationFunctions,
const std::unordered_map<std::string, std::shared_ptr<InputGenerator>>&
Expand All @@ -233,7 +240,8 @@ AggregationFuzzer::AggregationFuzzer(
bool orderableGroupKeys,
std::unique_ptr<ReferenceQueryRunner> referenceQueryRunner)
: AggregationFuzzerBase{seed, customVerificationFunctions, customInputGenerators, timestampPrecision, queryConfigs, hiveConfigs, orderableGroupKeys, std::move(referenceQueryRunner)},
functionDataSpec_{functionDataSpec} {
functionDataSpec_{functionDataSpec},
functionsRequireSortedInput_{functionsRequireSortedInput} {
VELOX_CHECK(!signatureMap.empty(), "No function signatures available.");

if (persistAndRunOnce_ && reproPersistPath_.empty()) {
Expand Down Expand Up @@ -309,6 +317,11 @@ bool supportsDistinctInputs(
return arg->isComparable();
}

bool AggregationFuzzer::mustSortInput(
const CallableSignature& signature) const {
return functionsRequireSortedInput_.count(signature.name) > 0;
}

void AggregationFuzzer::go() {
VELOX_CHECK(
FLAGS_steps > 0 || FLAGS_duration_sec > 0,
Expand Down Expand Up @@ -338,6 +351,13 @@ void AggregationFuzzer::go() {
} else {
// Pick a random signature.
auto signatureWithStats = pickSignature();
auto signature = signatureWithStats.first;
if (mustSortInput(signature) &&
!(FLAGS_enable_sorted_aggregations && canSortInputs(signature))) {
continue;
}
signatureWithStats.second.numRuns++;
stats_.functionNames.insert(signature.name);

if (functionDataSpec_.count(signatureWithStats.first.name) > 0) {
vectorOptions.dataSpec =
Expand All @@ -347,19 +367,10 @@ void AggregationFuzzer::go() {
vectorOptions.dataSpec = {true, true};
}
vectorFuzzer_.setOptions(vectorOptions);
signatureWithStats.second.numRuns++;

auto signature = signatureWithStats.first;
stats_.functionNames.insert(signature.name);

const bool customVerification =
customVerificationFunctions_.count(signature.name) != 0;

std::vector<TypePtr> argTypes = signature.args;
std::vector<std::string> argNames = makeNames(argTypes.size());

const bool sortedInputs = FLAGS_enable_sorted_aggregations &&
canSortInputs(signature) && vectorFuzzer_.coinToss(0.2);
const bool sortedInputs = mustSortInput(signature) ||
(FLAGS_enable_sorted_aggregations && canSortInputs(signature) &&
vectorFuzzer_.coinToss(0.2));

// Exclude approx_xxx aggregations since their verifiers may not be able
// to verify the results. The approx_percentile verifier would discard
Expand All @@ -372,6 +383,8 @@ void AggregationFuzzer::go() {
supportsDistinctInputs(signature, orderableGroupKeys_) &&
vectorFuzzer_.coinToss(0.2);

std::vector<TypePtr> argTypes = signature.args;
std::vector<std::string> argNames = makeNames(argTypes.size());
auto call = makeFunctionCall(
signature.name, argNames, sortedInputs, distinctInputs);

Expand All @@ -398,6 +411,8 @@ void AggregationFuzzer::go() {

logVectors(input);

const bool customVerification =
customVerificationFunctions_.count(signature.name) != 0;
std::shared_ptr<ResultVerifier> customVerifier;
if (customVerification) {
customVerifier = customVerificationFunctions_.at(signature.name);
Expand Down
1 change: 1 addition & 0 deletions velox/exec/fuzzer/AggregationFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ namespace facebook::velox::exec::test {
void aggregateFuzzer(
AggregateFunctionSignatureMap signatureMap,
size_t seed,
const std::unordered_set<std::string>& functionsRequireSortedInput,
const std::unordered_map<std::string, std::shared_ptr<ResultVerifier>>&
orderDependentFunctions,
const std::unordered_map<std::string, std::shared_ptr<InputGenerator>>&
Expand Down
3 changes: 3 additions & 0 deletions velox/exec/fuzzer/AggregationFuzzerOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ struct AggregationFuzzerOptions {
/// Set of functions to not test.
std::unordered_set<std::string> skipFunctions;

/// Set of functions that should only be tested with sorted input.
std::unordered_set<std::string> functionsRequireSortedInput;

/// Set of functions whose results are non-deterministic. These can be
/// order-dependent functions whose results depend on the order of input
/// rows, or functions that return complex-typed results containing
Expand Down
1 change: 1 addition & 0 deletions velox/exec/fuzzer/AggregationFuzzerRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ class AggregationFuzzerRunner {
facebook::velox::exec::test::aggregateFuzzer(
filteredSignatures,
seed,
options.functionsRequireSortedInput,
options.customVerificationFunctions,
options.customInputGenerators,
aggregationFunctionDataSpecs,
Expand Down
5 changes: 5 additions & 0 deletions velox/functions/prestosql/fuzzer/AggregationFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,10 @@ int main(int argc, char** argv) {
"any_value",
};

static const std::unordered_set<std::string> functionsRequireSortedInput = {
"tdigest_agg",
};

using facebook::velox::exec::test::ApproxDistinctResultVerifier;
using facebook::velox::exec::test::ApproxPercentileResultVerifier;
using facebook::velox::exec::test::ArbitraryResultVerifier;
Expand Down Expand Up @@ -195,6 +199,7 @@ int main(int argc, char** argv) {
Options options;
options.onlyFunctions = FLAGS_only;
options.skipFunctions = skipFunctions;
options.functionsRequireSortedInput = functionsRequireSortedInput;
options.customVerificationFunctions = customVerificationFunctions;
options.customInputGenerators =
facebook::velox::exec::test::getCustomInputGenerators();
Expand Down

0 comments on commit 4787a99

Please sign in to comment.