Skip to content

Commit

Permalink
feat(fuzzer): Support custom input generator in VectorFuzzer (#11466)
Browse files Browse the repository at this point in the history
Summary:


Custom types often require custom logic to generate valid values, such as JSON. To support such custom data generation for expression fuzzer, this diff makes two changes:
1. Require a custom type to provide a custom input generator that is automatically used when VectorFuzzer generates vectors of this type. The custom type can provide a nullptr, in which case VectorFuzzer generates random data in the old way.
2. Allow users of VectorFuzzer to provide a custom input generator to the API calls. (This will be needed for custom input generation for non-custom types in expression fuzzer, such as cdf functions that require some arguments to be positive numbers).

Differential Revision: D65576377
  • Loading branch information
kagamiori authored and facebook-github-bot committed Jan 4, 2025
1 parent 8b3b55a commit d75e3fb
Show file tree
Hide file tree
Showing 19 changed files with 324 additions and 72 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ class TypeFactories : public CustomTypeFactories {
return nullptr;
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& config) const override {
return nullptr;
}

private:
TypePtr type_;
};
Expand Down
5 changes: 5 additions & 0 deletions velox/expression/tests/CustomTypeTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ class FancyIntTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
VELOX_UNSUPPORTED();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& config) const override {
return nullptr;
}
};

class ToFancyIntFunction : public exec::VectorFunction {
Expand Down
5 changes: 5 additions & 0 deletions velox/functions/prestosql/types/HyperLogLogType.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ class HyperLogLogTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
return nullptr;
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& config) const override {
return nullptr;
}
};

void registerHyperLogLogType();
Expand Down
5 changes: 5 additions & 0 deletions velox/functions/prestosql/types/IPAddressType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,11 @@ class IPAddressTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
return std::make_shared<IPAddressCastOperator>();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& config) const override {
return nullptr;
}
};

} // namespace
Expand Down
5 changes: 5 additions & 0 deletions velox/functions/prestosql/types/IPPrefixType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,11 @@ class IPPrefixTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
return std::make_shared<IPPrefixCastOperator>();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& config) const override {
return nullptr;
}
};

} // namespace
Expand Down
12 changes: 12 additions & 0 deletions velox/functions/prestosql/types/JsonType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "velox/functions/prestosql/json/SIMDJsonUtil.h"
#include "velox/type/Conversions.h"
#include "velox/type/Type.h"
#include "velox/vector/fuzzer/ConstrainedGenerators.h"

namespace facebook::velox {

Expand Down Expand Up @@ -1288,6 +1289,17 @@ class JsonTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
return std::make_shared<JsonCastOperator>();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& config) const override {
return std::make_shared<fuzzer::JsonInputGenerator>(
config.seed_,
JSON(),
config.nullRatio_,
fuzzer::getRandomInputGenerator(
config.seed_, config.representedType_, config.nullRatio_),
false);
}
};

} // namespace
Expand Down
5 changes: 5 additions & 0 deletions velox/functions/prestosql/types/TimestampWithTimeZoneType.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,11 @@ class TimestampWithTimeZoneTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
return TimestampWithTimeZoneCastOperator::get();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& config) const override {
return nullptr;
}
};

void registerTimestampWithTimeZoneType();
Expand Down
5 changes: 5 additions & 0 deletions velox/functions/prestosql/types/UuidType.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ class UuidTypeFactories : public CustomTypeFactories {
exec::CastOperatorPtr getCastOperator() const override {
return std::make_shared<UuidCastOperator>();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& config) const override {
return nullptr;
}
};

} // namespace
Expand Down
5 changes: 5 additions & 0 deletions velox/type/OpaqueCustomTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ class OpaqueCustomTypeRegister {
exec::CastOperatorPtr getCastOperator() const override {
VELOX_UNSUPPORTED();
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& config) const override {
return nullptr;
}
};
};
} // namespace facebook::velox
11 changes: 11 additions & 0 deletions velox/type/Type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,17 @@ exec::CastOperatorPtr getCustomTypeCastOperator(const std::string& name) {
return nullptr;
}

AbstractInputGeneratorPtr getCustomTypeInputGenerator(
const std::string& name,
const InputGeneratorConfig& config) {
auto factories = getTypeFactories(name);
if (factories) {
return factories->getInputGenerator(config);
}

return nullptr;
}

void toTypeSql(const TypePtr& type, std::ostream& out) {
switch (type->kind()) {
case TypeKind::ARRAY:
Expand Down
54 changes: 54 additions & 0 deletions velox/type/Type.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include <folly/CPortability.h>
#include <folly/Random.h>
#include <folly/Range.h>
#include <folly/dynamic.h>

Expand Down Expand Up @@ -2003,6 +2004,22 @@ class CastOperator;
using CastOperatorPtr = std::shared_ptr<const CastOperator>;
} // namespace exec

/// Forward declaration.
class variant;
class AbstractInputGenerator;

using AbstractInputGeneratorPtr = std::shared_ptr<AbstractInputGenerator>;
using FuzzerGenerator = folly::detail::DefaultGenerator;

struct InputGeneratorConfig {
size_t seed_;
double nullRatio_;

// Type of data represented by JSON. This config should be ignored by non-JSON
// input generators.
const TypePtr& representedType_;
};

/// Associates custom types with their custom operators to be the payload in
/// the custom type registry.
class CustomTypeFactories {
Expand All @@ -2017,6 +2034,38 @@ class CustomTypeFactories {
/// return a nullptr. If a custom type does not support castings, throw an
/// exception.
virtual exec::CastOperatorPtr getCastOperator() const = 0;

virtual AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& config) const = 0;
};

class AbstractInputGenerator {
public:
AbstractInputGenerator(
size_t seed,
const TypePtr& type,
std::unique_ptr<AbstractInputGenerator>&& next,
double nullRatio)
: type_{type}, next_{std::move(next)}, nullRatio_{nullRatio} {
rng_.seed(seed);
}

virtual ~AbstractInputGenerator() = default;

virtual variant generate() = 0;

TypePtr type() const {
return type_;
}

protected:
FuzzerGenerator rng_;

TypePtr type_;

std::unique_ptr<AbstractInputGenerator> next_;

double nullRatio_;
};

/// Adds custom type to the registry if it doesn't exist already. No-op if
Expand Down Expand Up @@ -2083,6 +2132,11 @@ bool unregisterCustomType(const std::string& name);
/// does not have a dedicated custom cast operator.
exec::CastOperatorPtr getCustomTypeCastOperator(const std::string& name);

/// Returns the input generator for the custom type with the specified name.
AbstractInputGeneratorPtr getCustomTypeInputGenerator(
const std::string& name,
const InputGeneratorConfig& config);

// Allows us to transparently use folly::toAppend(), folly::join(), etc.
template <class TString>
void toAppend(
Expand Down
5 changes: 5 additions & 0 deletions velox/type/parser/tests/TypeParserTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ class TypeFactories : public CustomTypeFactories {
return nullptr;
}

AbstractInputGeneratorPtr getInputGenerator(
const InputGeneratorConfig& config) const override {
return nullptr;
}

private:
TypePtr type_;
};
Expand Down
8 changes: 7 additions & 1 deletion velox/vector/fuzzer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,13 @@ target_link_libraries(
add_library(velox_vector_fuzzer GeneratorSpec.cpp VectorFuzzer.cpp)

target_link_libraries(
velox_vector_fuzzer velox_type velox_vector velox_vector_fuzzer_util)
velox_vector_fuzzer
velox_type
velox_vector
velox_vector_fuzzer_util
velox_presto_types
velox_fuzzer_constrained_input_generators)

if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
target_compile_options(velox_vector_fuzzer
PRIVATE -Wno-deprecated-declarations)
Expand Down
10 changes: 0 additions & 10 deletions velox/vector/fuzzer/ConstrainedGenerators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,6 @@

namespace facebook::velox::fuzzer {

// AbstractInputGenerator
AbstractInputGenerator::AbstractInputGenerator(
size_t seed,
const TypePtr& type,
std::unique_ptr<AbstractInputGenerator>&& next,
double nullRatio)
: type_{type}, next_{std::move(next)}, nullRatio_{nullRatio} {
rng_.seed(seed);
}

// NotEqualConstrainedGenerator
variant NotEqualConstrainedGenerator::generate() {
variant value;
Expand Down
26 changes: 0 additions & 26 deletions velox/vector/fuzzer/ConstrainedGenerators.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,32 +28,6 @@ namespace facebook::velox::fuzzer {

using facebook::velox::variant;

class AbstractInputGenerator {
public:
AbstractInputGenerator(
size_t seed,
const TypePtr& type,
std::unique_ptr<AbstractInputGenerator>&& next,
double nullRatio);

virtual ~AbstractInputGenerator() = default;

virtual variant generate() = 0;

TypePtr type() const {
return type_;
}

protected:
FuzzerGenerator rng_;

TypePtr type_;

std::unique_ptr<AbstractInputGenerator> next_;

double nullRatio_;
};

std::unique_ptr<AbstractInputGenerator>
getRandomInputGenerator(size_t seed, const TypePtr& type, double nullRatio);

Expand Down
Loading

0 comments on commit d75e3fb

Please sign in to comment.