Skip to content

Commit

Permalink
feat(fuzzer): Add input generator for json_parse in expression fuzzer
Browse files Browse the repository at this point in the history
Summary:
Make expression fuzzer generate input vectors of valid JSON strings for the
json_parse function. To test corner cases, the JSON strings may be
randomly truncated or inserted with a space character.

Differential Revision: D67820571
  • Loading branch information
kagamiori authored and facebook-github-bot committed Jan 4, 2025
1 parent d75e3fb commit 3c0d260
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 14 deletions.
1 change: 0 additions & 1 deletion velox/exec/fuzzer/PrestoQueryRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,6 @@ bool PrestoQueryRunner::isSupported(const exec::FunctionSignature& signature) {
usesTypeName(signature, "interval year to month") ||
usesTypeName(signature, "hugeint") ||
usesTypeName(signature, "hyperloglog") ||
usesInputTypeName(signature, "json") ||
usesInputTypeName(signature, "ipaddress") ||
usesInputTypeName(signature, "ipprefix"));
}
Expand Down
35 changes: 34 additions & 1 deletion velox/expression/fuzzer/ExpressionFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "velox/expression/SimpleFunctionRegistry.h"
#include "velox/expression/fuzzer/ArgumentTypeFuzzer.h"
#include "velox/expression/fuzzer/ExpressionFuzzer.h"
#include "velox/vector/fuzzer/ConstrainedGenerators.h"

namespace facebook::velox::fuzzer {

Expand Down Expand Up @@ -499,7 +500,9 @@ ExpressionFuzzer::ExpressionFuzzer(
const std::shared_ptr<VectorFuzzer>& vectorFuzzer,
const std::optional<ExpressionFuzzer::Options>& options,
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
argGenerators)
argGenerators,
const std::unordered_map<std::string, ArgsOverrideFuncPtr>&
argsOverrideFuncs)
: options_(options.value_or(Options())),
vectorFuzzer_(vectorFuzzer),
state{rng_, std::max(1, options_.maxLevelOfNesting)},
Expand Down Expand Up @@ -664,6 +667,9 @@ ExpressionFuzzer::ExpressionFuzzer(
// Register function override (for cases where we want to restrict the types
// or parameters we pass to functions).
registerFuncOverride(&ExpressionFuzzer::generateSwitchArgs, "switch");
for (const auto& [name, func] : argsOverrideFuncs) {
registerFuncOverride(func, name);
}
}

bool ExpressionFuzzer::isSupportedSignature(
Expand Down Expand Up @@ -788,6 +794,7 @@ core::TypedExprPtr ExpressionFuzzer::generateArgColumn(const TypePtr& arg) {
state.inputRowTypes_.emplace_back(arg);
state.inputRowNames_.emplace_back(
fmt::format("c{}", state.inputRowTypes_.size() - 1));
state.customInputGenerators_.emplace_back(nullptr);
listOfCandidateCols.push_back(state.inputRowNames_.back());
return std::make_shared<core::FieldAccessTypedExpr>(
arg, state.inputRowNames_.back());
Expand Down Expand Up @@ -957,6 +964,31 @@ std::vector<core::TypedExprPtr> ExpressionFuzzer::generateSwitchArgs(
return inputExpressions;
}

std::vector<core::TypedExprPtr> ExpressionFuzzer::generateJsonParseArg(
const CallableSignature& input) {
VELOX_CHECK_EQ(input.args.size(), 1);
std::vector<core::TypedExprPtr> inputExpressions;

state.inputRowTypes_.emplace_back(input.args[0]);
state.inputRowNames_.emplace_back(
fmt::format("c{}", state.inputRowTypes_.size() - 1));

const auto representedType = vectorFuzzer_->randType(3);
const auto seed = rand<uint32_t>(rng_);
const auto nullRatio = vectorFuzzer_->getOptions().nullRatio;
state.customInputGenerators_.emplace_back(
std::make_shared<fuzzer::JsonInputGenerator>(
seed,
input.args[0],
nullRatio,
fuzzer::getRandomInputGenerator(seed, representedType, nullRatio),
true));

inputExpressions.push_back(std::make_shared<core::FieldAccessTypedExpr>(
input.args[0], state.inputRowNames_.back()));
return inputExpressions;
}

ExpressionFuzzer::FuzzedExpressionData ExpressionFuzzer::fuzzExpressions(
const RowTypePtr& outType) {
state.reset();
Expand All @@ -970,6 +1002,7 @@ ExpressionFuzzer::FuzzedExpressionData ExpressionFuzzer::fuzzExpressions(
return {
std::move(expressions),
ROW(std::move(state.inputRowNames_), std::move(state.inputRowTypes_)),
std::move(state.customInputGenerators_),
std::move(state.expressionStats_)};
}

Expand Down
21 changes: 20 additions & 1 deletion velox/expression/fuzzer/ExpressionFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ using facebook::velox::exec::test::ExprTransformer;
// A tool that can be used to generate random expressions.
class ExpressionFuzzer {
public:
using ArgsOverrideFuncPtr = std::vector<core::TypedExprPtr> (
facebook::velox::fuzzer::ExpressionFuzzer::*)(const CallableSignature&);

struct Options {
// The maximum number of variadic arguments fuzzer will generate for
// functions that accept variadic arguments. Fuzzer will generate up to
Expand Down Expand Up @@ -120,7 +123,9 @@ class ExpressionFuzzer {
const std::shared_ptr<VectorFuzzer>& vectorFuzzer,
const std::optional<ExpressionFuzzer::Options>& options = std::nullopt,
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
argGenerators = {});
argGenerators = {},
const std::unordered_map<std::string, ArgsOverrideFuncPtr>&
argsOverrideFuncs = {});

template <typename TFunc>
void registerFuncOverride(TFunc func, const std::string& name);
Expand All @@ -132,6 +137,12 @@ class ExpressionFuzzer {
// The input vector type that is expected by the generated expressions.
RowTypePtr inputType;

// Custom input generators for input vectors. The generator at index i
// corresponds to the i-th field in inputType. If customInputGenerators[i]
// doesn't exist or is nullptr, then no custom input generator is used for
// the i-th field.
std::vector<std::shared_ptr<AbstractInputGenerator>> customInputGenerators;

// Count how many times each expression has been selected in expressions.
std::unordered_map<std::string, size_t> selectionStats;
};
Expand Down Expand Up @@ -208,6 +219,11 @@ class ExpressionFuzzer {

RowTypePtr fuzzRowReturnType(size_t size, char prefix = 'p');

/// Specialization for the "json_parse" function. It makes json_parse take a
/// column argument that contains valid JSON strings with random variations.
std::vector<core::TypedExprPtr> generateJsonParseArg(
const CallableSignature& input);

private:
bool isSupportedSignature(const exec::FunctionSignature& signature);

Expand Down Expand Up @@ -403,6 +419,7 @@ class ExpressionFuzzer {
typeToColumnNames_.clear();
expressionBank_.reset();
expressionStats_.clear();
customInputGenerators_.clear();
}

State(FuzzerGenerator& rng, int maxLevelOfNesting)
Expand All @@ -417,6 +434,8 @@ class ExpressionFuzzer {
/// expressions consume.
std::vector<TypePtr> inputRowTypes_;
std::vector<std::string> inputRowNames_;
/// Contains the custom input generators for the input vectors.
std::vector<std::shared_ptr<AbstractInputGenerator>> customInputGenerators_;

// Count how many times each function has been selected.
std::unordered_map<std::string, size_t> expressionStats_;
Expand Down
7 changes: 7 additions & 0 deletions velox/expression/fuzzer/ExpressionFuzzerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include "velox/exec/fuzzer/PrestoQueryRunner.h"
#include "velox/expression/fuzzer/ArgGenerator.h"
#include "velox/expression/fuzzer/ExpressionFuzzer.h"
#include "velox/expression/fuzzer/FuzzerRunner.h"
#include "velox/functions/prestosql/fuzzer/DivideArgGenerator.h"
#include "velox/functions/prestosql/fuzzer/FloorAndRoundArgGenerator.h"
Expand Down Expand Up @@ -51,6 +52,7 @@ DEFINE_uint32(
using namespace facebook::velox::exec::test;
using facebook::velox::exec::test::PrestoQueryRunner;
using facebook::velox::fuzzer::ArgGenerator;
using facebook::velox::fuzzer::ExpressionFuzzer;
using facebook::velox::fuzzer::FuzzerRunner;
using facebook::velox::test::ReferenceQueryRunner;

Expand Down Expand Up @@ -121,6 +123,10 @@ int main(int argc, char** argv) {
{"map_keys", std::make_shared<SortArrayTransformer>()},
{"map_values", std::make_shared<SortArrayTransformer>()}};

std::unordered_map<std::string, ExpressionFuzzer::ArgsOverrideFuncPtr>
argsOverrideFuncs = {
{"json_parse", &ExpressionFuzzer::generateJsonParseArg}};

std::shared_ptr<facebook::velox::memory::MemoryPool> rootPool{
facebook::velox::memory::memoryManager()->addRootPool()};
std::shared_ptr<ReferenceQueryRunner> referenceQueryRunner{nullptr};
Expand All @@ -139,5 +145,6 @@ int main(int argc, char** argv) {
{{"session_timezone", "America/Los_Angeles"},
{"adjust_timestamp_to_session_timezone", "true"}},
argGenerators,
argsOverrideFuncs,
referenceQueryRunner);
}
27 changes: 19 additions & 8 deletions velox/expression/fuzzer/ExpressionFuzzerVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@ ExpressionFuzzerVerifier::ExpressionFuzzerVerifier(
size_t initialSeed,
const ExpressionFuzzerVerifier::Options& options,
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
argGenerators)
argGenerators,
const std::unordered_map<
std::string,
ExpressionFuzzer::ArgsOverrideFuncPtr>& argsOverrideFuncs)
: options_(options),
queryCtx_(core::QueryCtx::create(
nullptr,
Expand All @@ -77,7 +80,8 @@ ExpressionFuzzerVerifier::ExpressionFuzzerVerifier(
initialSeed,
vectorFuzzer_,
options_.expressionFuzzerOptions,
argGenerators),
argGenerators,
argsOverrideFuncs),
referenceQueryRunner_{
options_.expressionFuzzerOptions.referenceQueryRunner} {
filesystems::registerLocalFileSystem();
Expand All @@ -101,7 +105,9 @@ ExpressionFuzzerVerifier::ExpressionFuzzerVerifier(
std::pair<std::vector<InputTestCase>, InputRowMetadata>
ExpressionFuzzerVerifier::generateInput(
const RowTypePtr& rowType,
VectorFuzzer& vectorFuzzer) {
VectorFuzzer& vectorFuzzer,
const std::vector<std::shared_ptr<AbstractInputGenerator>>&
inputGenerators) {
// Randomly pick to generate one or two input rows.
std::vector<InputTestCase> inputs;
int numInputs = vectorFuzzer.coinToss(0.5) ? 1 : 2;
Expand All @@ -123,19 +129,24 @@ ExpressionFuzzerVerifier::generateInput(
std::vector<VectorPtr> children;
children.reserve(rowType->size() + 1);
for (auto i = 0; i < rowType->size(); ++i) {
const auto& inputGenerator =
inputGenerators.size() > i ? inputGenerators[i] : nullptr;
if (std::binary_search(
metadata.columnsToWrapInCommonDictionary.begin(),
metadata.columnsToWrapInCommonDictionary.end(),
i)) {
// These will be wrapped in common dictionary later.
if (vectorFuzzer.getOptions().allowConstantVector &&
vectorFuzzer.coinToss(0.2)) {
children.push_back(vectorFuzzer.fuzzConstant(rowType->childAt(i)));
children.push_back(
vectorFuzzer.fuzzConstant(rowType->childAt(i), inputGenerator));
} else {
children.push_back(vectorFuzzer.fuzzFlat(rowType->childAt(i)));
children.push_back(
vectorFuzzer.fuzzFlat(rowType->childAt(i), inputGenerator));
}
} else {
children.push_back(vectorFuzzer.fuzz(rowType->childAt(i)));
children.push_back(
vectorFuzzer.fuzz(rowType->childAt(i), inputGenerator));
}
}

Expand Down Expand Up @@ -377,7 +388,7 @@ void ExpressionFuzzerVerifier::go() {
// set.
int numExpressionTrees = boost::random::uniform_int_distribution<int>(
1, options_.maxExpressionTreesPerStep)(rng_);
auto [expressions, inputType, selectionStats] =
auto [expressions, inputType, inputGenerators, selectionStats] =
expressionFuzzer_.fuzzExpressions(numExpressionTrees);
// Project a row number column in the output to enable epsilon-comparison
// for floating-point columns and make investigation of failures easier.
Expand All @@ -391,7 +402,7 @@ void ExpressionFuzzerVerifier::go() {
std::vector<core::TypedExprPtr> plans = std::move(expressions);

auto [inputTestCases, inputRowMetadata] =
generateInput(inputType, *vectorFuzzer_);
generateInput(inputType, *vectorFuzzer_, inputGenerators);

auto resultVectors = generateResultVectors(plans);
std::vector<fuzzer::ResultOrError> results;
Expand Down
9 changes: 7 additions & 2 deletions velox/expression/fuzzer/ExpressionFuzzerVerifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,10 @@ class ExpressionFuzzerVerifier {
size_t initialSeed,
const Options& options,
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
argGenerators);
argGenerators,
const std::unordered_map<
std::string,
ExpressionFuzzer::ArgsOverrideFuncPtr>& argsOverrideFuncs);

// This function starts the test that is performed by the
// ExpressionFuzzerVerifier which is generating random expressions and
Expand Down Expand Up @@ -173,7 +176,9 @@ class ExpressionFuzzerVerifier {
// 4. Appends a row number column to the input row vector.
std::pair<std::vector<InputTestCase>, InputRowMetadata> generateInput(
const RowTypePtr& rowType,
VectorFuzzer& vectorFuzzer);
VectorFuzzer& vectorFuzzer,
const std::vector<std::shared_ptr<AbstractInputGenerator>>&
inputGenerators);

/// Randomize initial result vector data to test for correct null and data
/// setting in functions.
Expand Down
10 changes: 9 additions & 1 deletion velox/expression/fuzzer/FuzzerRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,13 +236,17 @@ int FuzzerRunner::run(
const std::unordered_map<std::string, std::string>& queryConfigs,
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
argGenerators,
const std::unordered_map<
std::string,
ExpressionFuzzer::ArgsOverrideFuncPtr>& argsOverrideFuncs,
std::shared_ptr<exec::test::ReferenceQueryRunner> referenceQueryRunner) {
runFromGtest(
seed,
skipFunctions,
exprTransformers,
queryConfigs,
argGenerators,
argsOverrideFuncs,
referenceQueryRunner);
return RUN_ALL_TESTS();
}
Expand All @@ -256,6 +260,9 @@ void FuzzerRunner::runFromGtest(
const std::unordered_map<std::string, std::string>& queryConfigs,
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
argGenerators,
const std::unordered_map<
std::string,
ExpressionFuzzer::ArgsOverrideFuncPtr>& argsOverrideFuncs,
std::shared_ptr<exec::test::ReferenceQueryRunner> referenceQueryRunner) {
if (!memory::MemoryManager::testInstance()) {
memory::MemoryManager::testingSetInstance({});
Expand All @@ -266,7 +273,8 @@ void FuzzerRunner::runFromGtest(
seed,
getExpressionFuzzerVerifierOptions(
skipFunctions, exprTransformers, queryConfigs, referenceQueryRunner),
argGenerators)
argGenerators,
argsOverrideFuncs)
.go();
}
} // namespace facebook::velox::fuzzer
6 changes: 6 additions & 0 deletions velox/expression/fuzzer/FuzzerRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ class FuzzerRunner {
const std::unordered_map<std::string, std::string>& queryConfigs,
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
argGenerators,
const std::unordered_map<
std::string,
ExpressionFuzzer::ArgsOverrideFuncPtr>& argsOverrideFuncs,
std::shared_ptr<exec::test::ReferenceQueryRunner> referenceQueryRunner);

static void runFromGtest(
Expand All @@ -52,6 +55,9 @@ class FuzzerRunner {
const std::unordered_map<std::string, std::string>& queryConfigs,
const std::unordered_map<std::string, std::shared_ptr<ArgGenerator>>&
argGenerators,
const std::unordered_map<
std::string,
ExpressionFuzzer::ArgsOverrideFuncPtr>& argsOverrideFuncs,
std::shared_ptr<exec::test::ReferenceQueryRunner> referenceQueryRunner);
};

Expand Down

0 comments on commit 3c0d260

Please sign in to comment.