diff --git a/velox/expression/signature_parser/tests/ParseTypeSignatureTest.cpp b/velox/expression/signature_parser/tests/ParseTypeSignatureTest.cpp index 2f4c3c105c46..85a7546bad72 100644 --- a/velox/expression/signature_parser/tests/ParseTypeSignatureTest.cpp +++ b/velox/expression/signature_parser/tests/ParseTypeSignatureTest.cpp @@ -64,6 +64,11 @@ class TypeFactories : public CustomTypeFactories { return nullptr; } + AbstractInputGeneratorPtr getInputGenerator( + const InputGeneratorConfig& config) const override { + return nullptr; + } + private: TypePtr type_; }; diff --git a/velox/expression/tests/CustomTypeTest.cpp b/velox/expression/tests/CustomTypeTest.cpp index 97afc1f6c79d..c0770ec08858 100644 --- a/velox/expression/tests/CustomTypeTest.cpp +++ b/velox/expression/tests/CustomTypeTest.cpp @@ -62,6 +62,11 @@ class FancyIntTypeFactories : public CustomTypeFactories { exec::CastOperatorPtr getCastOperator() const override { VELOX_UNSUPPORTED(); } + + AbstractInputGeneratorPtr getInputGenerator( + const InputGeneratorConfig& config) const override { + return nullptr; + } }; class ToFancyIntFunction : public exec::VectorFunction { diff --git a/velox/functions/prestosql/types/HyperLogLogType.h b/velox/functions/prestosql/types/HyperLogLogType.h index 6680a3fd2f12..3755914dc96d 100644 --- a/velox/functions/prestosql/types/HyperLogLogType.h +++ b/velox/functions/prestosql/types/HyperLogLogType.h @@ -81,6 +81,11 @@ class HyperLogLogTypeFactories : public CustomTypeFactories { exec::CastOperatorPtr getCastOperator() const override { return nullptr; } + + AbstractInputGeneratorPtr getInputGenerator( + const InputGeneratorConfig& config) const override { + return nullptr; + } }; void registerHyperLogLogType(); diff --git a/velox/functions/prestosql/types/IPAddressType.cpp b/velox/functions/prestosql/types/IPAddressType.cpp index f1cb2223f76a..681f361bf402 100644 --- a/velox/functions/prestosql/types/IPAddressType.cpp +++ b/velox/functions/prestosql/types/IPAddressType.cpp @@ -265,6 +265,11 @@ class IPAddressTypeFactories : public CustomTypeFactories { exec::CastOperatorPtr getCastOperator() const override { return std::make_shared(); } + + AbstractInputGeneratorPtr getInputGenerator( + const InputGeneratorConfig& config) const override { + return nullptr; + } }; } // namespace diff --git a/velox/functions/prestosql/types/IPPrefixType.cpp b/velox/functions/prestosql/types/IPPrefixType.cpp index 8963044f5a63..18b969a9c81e 100644 --- a/velox/functions/prestosql/types/IPPrefixType.cpp +++ b/velox/functions/prestosql/types/IPPrefixType.cpp @@ -190,6 +190,11 @@ class IPPrefixTypeFactories : public CustomTypeFactories { exec::CastOperatorPtr getCastOperator() const override { return std::make_shared(); } + + AbstractInputGeneratorPtr getInputGenerator( + const InputGeneratorConfig& config) const override { + return nullptr; + } }; } // namespace diff --git a/velox/functions/prestosql/types/JsonType.cpp b/velox/functions/prestosql/types/JsonType.cpp index d2fd99bc11bc..97e6cf5691bb 100644 --- a/velox/functions/prestosql/types/JsonType.cpp +++ b/velox/functions/prestosql/types/JsonType.cpp @@ -35,6 +35,7 @@ #include "velox/functions/prestosql/json/SIMDJsonUtil.h" #include "velox/type/Conversions.h" #include "velox/type/Type.h" +#include "velox/vector/fuzzer/ConstrainedGenerators.h" namespace facebook::velox { @@ -1288,6 +1289,17 @@ class JsonTypeFactories : public CustomTypeFactories { exec::CastOperatorPtr getCastOperator() const override { return std::make_shared(); } + + AbstractInputGeneratorPtr getInputGenerator( + const InputGeneratorConfig& config) const override { + return std::make_shared( + config.seed_, + JSON(), + config.nullRatio_, + fuzzer::getRandomInputGenerator( + config.seed_, config.representedType_, config.nullRatio_), + false); + } }; } // namespace diff --git a/velox/functions/prestosql/types/TimestampWithTimeZoneType.h b/velox/functions/prestosql/types/TimestampWithTimeZoneType.h index e3f1f035fb18..6ca5cdec6a4e 100644 --- a/velox/functions/prestosql/types/TimestampWithTimeZoneType.h +++ b/velox/functions/prestosql/types/TimestampWithTimeZoneType.h @@ -159,6 +159,11 @@ class TimestampWithTimeZoneTypeFactories : public CustomTypeFactories { exec::CastOperatorPtr getCastOperator() const override { return TimestampWithTimeZoneCastOperator::get(); } + + AbstractInputGeneratorPtr getInputGenerator( + const InputGeneratorConfig& config) const override { + return nullptr; + } }; void registerTimestampWithTimeZoneType(); diff --git a/velox/functions/prestosql/types/UuidType.cpp b/velox/functions/prestosql/types/UuidType.cpp index b97d5f3d5735..9879c7f526ed 100644 --- a/velox/functions/prestosql/types/UuidType.cpp +++ b/velox/functions/prestosql/types/UuidType.cpp @@ -147,6 +147,11 @@ class UuidTypeFactories : public CustomTypeFactories { exec::CastOperatorPtr getCastOperator() const override { return std::make_shared(); } + + AbstractInputGeneratorPtr getInputGenerator( + const InputGeneratorConfig& config) const override { + return nullptr; + } }; } // namespace diff --git a/velox/type/OpaqueCustomTypes.h b/velox/type/OpaqueCustomTypes.h index 6450c1c47c88..03bffcc6a4e4 100644 --- a/velox/type/OpaqueCustomTypes.h +++ b/velox/type/OpaqueCustomTypes.h @@ -94,6 +94,11 @@ class OpaqueCustomTypeRegister { exec::CastOperatorPtr getCastOperator() const override { VELOX_UNSUPPORTED(); } + + AbstractInputGeneratorPtr getInputGenerator( + const InputGeneratorConfig& config) const override { + return nullptr; + } }; }; } // namespace facebook::velox diff --git a/velox/type/Type.cpp b/velox/type/Type.cpp index 36a9449d75f7..e3c29ef175bd 100644 --- a/velox/type/Type.cpp +++ b/velox/type/Type.cpp @@ -977,6 +977,17 @@ exec::CastOperatorPtr getCustomTypeCastOperator(const std::string& name) { return nullptr; } +AbstractInputGeneratorPtr getCustomTypeInputGenerator( + const std::string& name, + const InputGeneratorConfig& config) { + auto factories = getTypeFactories(name); + if (factories) { + return factories->getInputGenerator(config); + } + + return nullptr; +} + void toTypeSql(const TypePtr& type, std::ostream& out) { switch (type->kind()) { case TypeKind::ARRAY: diff --git a/velox/type/Type.h b/velox/type/Type.h index 31cddb2629ad..a5a6f3967b27 100644 --- a/velox/type/Type.h +++ b/velox/type/Type.h @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -2003,6 +2004,22 @@ class CastOperator; using CastOperatorPtr = std::shared_ptr; } // namespace exec +/// Forward declaration. +class variant; +class AbstractInputGenerator; + +using AbstractInputGeneratorPtr = std::shared_ptr; +using FuzzerGenerator = folly::detail::DefaultGenerator; + +struct InputGeneratorConfig { + size_t seed_; + double nullRatio_; + + // Type of data represented by JSON. This config should be ignored by non-JSON + // input generators. + const TypePtr& representedType_; +}; + /// Associates custom types with their custom operators to be the payload in /// the custom type registry. class CustomTypeFactories { @@ -2017,6 +2034,38 @@ class CustomTypeFactories { /// return a nullptr. If a custom type does not support castings, throw an /// exception. virtual exec::CastOperatorPtr getCastOperator() const = 0; + + virtual AbstractInputGeneratorPtr getInputGenerator( + const InputGeneratorConfig& config) const = 0; +}; + +class AbstractInputGenerator { + public: + AbstractInputGenerator( + size_t seed, + const TypePtr& type, + std::unique_ptr&& next, + double nullRatio) + : type_{type}, next_{std::move(next)}, nullRatio_{nullRatio} { + rng_.seed(seed); + } + + virtual ~AbstractInputGenerator() = default; + + virtual variant generate() = 0; + + TypePtr type() const { + return type_; + } + + protected: + FuzzerGenerator rng_; + + TypePtr type_; + + std::unique_ptr next_; + + double nullRatio_; }; /// Adds custom type to the registry if it doesn't exist already. No-op if @@ -2083,6 +2132,11 @@ bool unregisterCustomType(const std::string& name); /// does not have a dedicated custom cast operator. exec::CastOperatorPtr getCustomTypeCastOperator(const std::string& name); +/// Returns the input generator for the custom type with the specified name. +AbstractInputGeneratorPtr getCustomTypeInputGenerator( + const std::string& name, + const InputGeneratorConfig& config); + // Allows us to transparently use folly::toAppend(), folly::join(), etc. template void toAppend( diff --git a/velox/type/parser/tests/TypeParserTest.cpp b/velox/type/parser/tests/TypeParserTest.cpp index 87af58afe19a..06569ea136a7 100644 --- a/velox/type/parser/tests/TypeParserTest.cpp +++ b/velox/type/parser/tests/TypeParserTest.cpp @@ -54,6 +54,11 @@ class TypeFactories : public CustomTypeFactories { return nullptr; } + AbstractInputGeneratorPtr getInputGenerator( + const InputGeneratorConfig& config) const override { + return nullptr; + } + private: TypePtr type_; }; diff --git a/velox/vector/fuzzer/CMakeLists.txt b/velox/vector/fuzzer/CMakeLists.txt index 36964ee7f5a1..a6aeeaebc4dd 100644 --- a/velox/vector/fuzzer/CMakeLists.txt +++ b/velox/vector/fuzzer/CMakeLists.txt @@ -20,7 +20,13 @@ target_link_libraries( add_library(velox_vector_fuzzer GeneratorSpec.cpp VectorFuzzer.cpp) target_link_libraries( - velox_vector_fuzzer velox_type velox_vector velox_vector_fuzzer_util) + velox_vector_fuzzer + velox_type + velox_vector + velox_vector_fuzzer_util + velox_presto_types + velox_fuzzer_constrained_input_generators) + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") target_compile_options(velox_vector_fuzzer PRIVATE -Wno-deprecated-declarations) diff --git a/velox/vector/fuzzer/ConstrainedGenerators.cpp b/velox/vector/fuzzer/ConstrainedGenerators.cpp index a623d4e34eba..ad91807e8225 100644 --- a/velox/vector/fuzzer/ConstrainedGenerators.cpp +++ b/velox/vector/fuzzer/ConstrainedGenerators.cpp @@ -22,16 +22,6 @@ namespace facebook::velox::fuzzer { -// AbstractInputGenerator -AbstractInputGenerator::AbstractInputGenerator( - size_t seed, - const TypePtr& type, - std::unique_ptr&& next, - double nullRatio) - : type_{type}, next_{std::move(next)}, nullRatio_{nullRatio} { - rng_.seed(seed); -} - // NotEqualConstrainedGenerator variant NotEqualConstrainedGenerator::generate() { variant value; diff --git a/velox/vector/fuzzer/ConstrainedGenerators.h b/velox/vector/fuzzer/ConstrainedGenerators.h index 58d9582ab201..fdddd777b5f2 100644 --- a/velox/vector/fuzzer/ConstrainedGenerators.h +++ b/velox/vector/fuzzer/ConstrainedGenerators.h @@ -28,32 +28,6 @@ namespace facebook::velox::fuzzer { using facebook::velox::variant; -class AbstractInputGenerator { - public: - AbstractInputGenerator( - size_t seed, - const TypePtr& type, - std::unique_ptr&& next, - double nullRatio); - - virtual ~AbstractInputGenerator() = default; - - virtual variant generate() = 0; - - TypePtr type() const { - return type_; - } - - protected: - FuzzerGenerator rng_; - - TypePtr type_; - - std::unique_ptr next_; - - double nullRatio_; -}; - std::unique_ptr getRandomInputGenerator(size_t seed, const TypePtr& type, double nullRatio); diff --git a/velox/vector/fuzzer/VectorFuzzer.cpp b/velox/vector/fuzzer/VectorFuzzer.cpp index 509b4398aea9..0980ae4624b5 100644 --- a/velox/vector/fuzzer/VectorFuzzer.cpp +++ b/velox/vector/fuzzer/VectorFuzzer.cpp @@ -28,6 +28,7 @@ #include "velox/vector/FlatVector.h" #include "velox/vector/NullsBuilder.h" #include "velox/vector/VectorTypeUtils.h" +#include "velox/vector/fuzzer/ConstrainedVectorGenerator.h" #include "velox/vector/fuzzer/Utils.h" namespace facebook::velox { @@ -117,7 +118,13 @@ VectorPtr fuzzConstantPrimitiveImpl( const TypePtr& type, vector_size_t size, FuzzerGenerator& rng, - const VectorFuzzer::Options& opts) { + const VectorFuzzer::Options& opts, + const AbstractInputGeneratorPtr& customGenerator) { + if (customGenerator) { + return fuzzer::ConstrainedVectorGenerator::generateConstant( + customGenerator, size, pool); + } + using TCpp = typename TypeTraits::NativeType; if constexpr (std::is_same_v) { std::wstring_convert, char16_t> converter; @@ -226,23 +233,34 @@ bool hasNestedDictionaryLayers(const VectorPtr& baseVector) { } // namespace -VectorPtr VectorFuzzer::fuzzNotNull(const TypePtr& type) { - return fuzzNotNull(type, opts_.vectorSize); +VectorPtr VectorFuzzer::fuzzNotNull( + const TypePtr& type, + const AbstractInputGeneratorPtr& customGenerator) { + return fuzzNotNull(type, opts_.vectorSize, customGenerator); } -VectorPtr VectorFuzzer::fuzzNotNull(const TypePtr& type, vector_size_t size) { +VectorPtr VectorFuzzer::fuzzNotNull( + const TypePtr& type, + vector_size_t size, + const AbstractInputGeneratorPtr& customGenerator) { ScopedOptions restorer(this); opts_.nullRatio = 0; - return fuzz(type, size); + return fuzz(type, size, customGenerator); } -VectorPtr VectorFuzzer::fuzz(const TypePtr& type) { - return fuzz(type, opts_.vectorSize); +VectorPtr VectorFuzzer::fuzz( + const TypePtr& type, + const AbstractInputGeneratorPtr& customGenerator) { + return fuzz(type, opts_.vectorSize, customGenerator); } -VectorPtr VectorFuzzer::fuzz(const TypePtr& type, vector_size_t size) { +VectorPtr VectorFuzzer::fuzz( + const TypePtr& type, + vector_size_t size, + const AbstractInputGeneratorPtr& customGenerator) { VectorPtr vector; vector_size_t vectorSize = size; + auto inputGenerator = customGenerator; bool usingLazyVector = opts_.allowLazyVector && coinToss(0.1); // Lazy Vectors cannot be sliced, so we skip this if using lazy wrapping. @@ -251,15 +269,22 @@ VectorPtr VectorFuzzer::fuzz(const TypePtr& type, vector_size_t size) { vectorSize += rand(rng_) % 8; } + if (!inputGenerator && getCustomType(type->name())) { + InputGeneratorConfig config{ + rand(rng_), opts_.nullRatio, randType(3)}; + inputGenerator = getCustomTypeInputGenerator(type->name(), config); + } + // 20% chance of adding a constant vector. if (opts_.allowConstantVector && coinToss(0.2)) { - vector = fuzzConstant(type, vectorSize); - } else if (type->isPrimitiveType()) { - vector = fuzzFlatPrimitive(type, vectorSize); + vector = fuzzConstant(type, vectorSize, inputGenerator); } else if (type->isOpaque()) { vector = fuzzFlatOpaque(type, vectorSize); + } else if (inputGenerator) { + vector = fuzzFlat(type, vectorSize, inputGenerator); } else { - vector = fuzzComplex(type, vectorSize); + vector = type->isPrimitiveType() ? fuzzFlatPrimitive(type, vectorSize) + : fuzzComplex(type, vectorSize); } if (vectorSize > size) { @@ -292,11 +317,16 @@ VectorPtr VectorFuzzer::fuzz(const GeneratorSpec& generatorSpec) { return generatorSpec.generateData(rng_, pool_, opts_.vectorSize); } -VectorPtr VectorFuzzer::fuzzConstant(const TypePtr& type) { - return fuzzConstant(type, opts_.vectorSize); +VectorPtr VectorFuzzer::fuzzConstant( + const TypePtr& type, + const AbstractInputGeneratorPtr& customGenerator) { + return fuzzConstant(type, opts_.vectorSize, customGenerator); } -VectorPtr VectorFuzzer::fuzzConstant(const TypePtr& type, vector_size_t size) { +VectorPtr VectorFuzzer::fuzzConstant( + const TypePtr& type, + vector_size_t size, + const AbstractInputGeneratorPtr& customGenerator) { // For constants, there are two possible cases: // - generate a regular constant vector (only for primitive types). // - generate a random vector and wrap it using a constant vector. @@ -315,7 +345,8 @@ VectorPtr VectorFuzzer::fuzzConstant(const TypePtr& type, vector_size_t size) { type, size, rng_, - opts_); + opts_, + customGenerator); } } @@ -336,13 +367,16 @@ VectorPtr VectorFuzzer::fuzzConstant(const TypePtr& type, vector_size_t size) { opts_.maxConstantContainerSize.value(), opts_.containerLength); opts_.complexElementsMaxSize = std::min( opts_.maxConstantContainerSize.value(), opts_.complexElementsMaxSize); + // TODO: incorporate fuzzer options into customGenerator. } return BaseVector::wrapInConstant( - size, constantIndex, fuzz(type, innerVectorSize)); + size, constantIndex, fuzz(type, innerVectorSize, customGenerator)); } -VectorPtr VectorFuzzer::fuzzFlat(const TypePtr& type) { - return fuzzFlat(type, opts_.vectorSize); +VectorPtr VectorFuzzer::fuzzFlat( + const TypePtr& type, + const AbstractInputGeneratorPtr& customGenerator) { + return fuzzFlat(type, opts_.vectorSize, customGenerator); } VectorPtr VectorFuzzer::fuzzFlatNotNull(const TypePtr& type) { @@ -357,7 +391,15 @@ VectorPtr VectorFuzzer::fuzzFlatNotNull( return fuzzFlat(type, size); } -VectorPtr VectorFuzzer::fuzzFlat(const TypePtr& type, vector_size_t size) { +VectorPtr VectorFuzzer::fuzzFlat( + const TypePtr& type, + vector_size_t size, + const AbstractInputGeneratorPtr& customGenerator) { + if (customGenerator) { + return fuzzer::ConstrainedVectorGenerator::generateFlat( + customGenerator, size, pool_); + } + // Primitive types. if (type->isPrimitiveType()) { return fuzzFlatPrimitive(type, size); @@ -635,8 +677,10 @@ MapVectorPtr VectorFuzzer::fuzzMap( values); } -RowVectorPtr VectorFuzzer::fuzzInputRow(const RowTypePtr& rowType) { - return fuzzRow(rowType, opts_.vectorSize, false); +RowVectorPtr VectorFuzzer::fuzzInputRow( + const RowTypePtr& rowType, + const std::vector& inputGenerators) { + return fuzzRow(rowType, opts_.vectorSize, false, inputGenerators); } RowVectorPtr VectorFuzzer::fuzzInputFlatRow(const RowTypePtr& rowType) { @@ -693,14 +737,18 @@ RowVectorPtr VectorFuzzer::fuzzRow(const RowTypePtr& rowType) { RowVectorPtr VectorFuzzer::fuzzRow( const RowTypePtr& rowType, vector_size_t size, - bool allowTopLevelNulls) { + bool allowTopLevelNulls, + const std::vector& inputGenerators) { std::vector children; children.reserve(rowType->size()); for (auto i = 0; i < rowType->size(); ++i) { + const auto& inputGenerator = + inputGenerators.size() > i ? inputGenerators[i] : nullptr; children.push_back( - opts_.containerHasNulls ? fuzz(rowType->childAt(i), size) - : fuzzNotNull(rowType->childAt(i), size)); + opts_.containerHasNulls + ? fuzz(rowType->childAt(i), size, inputGenerator) + : fuzzNotNull(rowType->childAt(i), size, inputGenerator)); } return std::make_shared( diff --git a/velox/vector/fuzzer/VectorFuzzer.h b/velox/vector/fuzzer/VectorFuzzer.h index 67736f081595..47ca4a8ce700 100644 --- a/velox/vector/fuzzer/VectorFuzzer.h +++ b/velox/vector/fuzzer/VectorFuzzer.h @@ -166,8 +166,13 @@ class VectorFuzzer { /// Returns a "fuzzed" vector, containing randomized data, nulls, and indices /// vector (dictionary). Returns a vector containing `opts_.vectorSize` or /// `size` elements. - VectorPtr fuzz(const TypePtr& type); - VectorPtr fuzz(const TypePtr& type, vector_size_t size); + VectorPtr fuzz( + const TypePtr& type, + const AbstractInputGeneratorPtr& customGenerator = nullptr); + VectorPtr fuzz( + const TypePtr& type, + vector_size_t size, + const AbstractInputGeneratorPtr& customGenerator = nullptr); /// Returns a "fuzzed" vector containing randomized data customized according /// to generatorSpec. @@ -175,14 +180,24 @@ class VectorFuzzer { /// Same as above, but returns a vector without nulls (regardless of the value /// of opts.nullRatio). - VectorPtr fuzzNotNull(const TypePtr& type); - VectorPtr fuzzNotNull(const TypePtr& type, vector_size_t size); + VectorPtr fuzzNotNull( + const TypePtr& type, + const AbstractInputGeneratorPtr& customGenerator = nullptr); + VectorPtr fuzzNotNull( + const TypePtr& type, + vector_size_t size, + const AbstractInputGeneratorPtr& customGenerator = nullptr); /// Returns a flat vector or a complex vector with flat children with /// randomized data and nulls. Returns a vector containing `opts_.vectorSize` /// or `size` elements. - VectorPtr fuzzFlat(const TypePtr& type); - VectorPtr fuzzFlat(const TypePtr& type, vector_size_t size); + VectorPtr fuzzFlat( + const TypePtr& type, + const AbstractInputGeneratorPtr& customGenerator = nullptr); + VectorPtr fuzzFlat( + const TypePtr& type, + vector_size_t size, + const AbstractInputGeneratorPtr& customGenerator = nullptr); /// Same as above, but returns a vector without nulls (regardless of the value /// of opts.nullRatio). @@ -196,8 +211,13 @@ class VectorFuzzer { /// Returns a random constant vector (which could be a null constant). Returns /// a vector with size set to `opts_.vectorSize` or 'size'. - VectorPtr fuzzConstant(const TypePtr& type); - VectorPtr fuzzConstant(const TypePtr& type, vector_size_t size); + VectorPtr fuzzConstant( + const TypePtr& type, + const AbstractInputGeneratorPtr& customGenerator = nullptr); + VectorPtr fuzzConstant( + const TypePtr& type, + vector_size_t size, + const AbstractInputGeneratorPtr& customGenerator = nullptr); /// Wraps `vector` using a randomized indices vector, returning a /// DictionaryVector which has same number of indices as the underlying @@ -233,7 +253,8 @@ class VectorFuzzer { RowVectorPtr fuzzRow( const RowTypePtr& rowType, vector_size_t size, - bool allowTopLevelNulls = true); + bool allowTopLevelNulls = true, + const std::vector& inputGenerators = {}); /// Returns a RowVector based on the provided vectors, fuzzing its top-level /// null buffer. @@ -248,7 +269,9 @@ class VectorFuzzer { /// Same as the function above, but never return nulls for the top-level row /// elements. - RowVectorPtr fuzzInputRow(const RowTypePtr& rowType); + RowVectorPtr fuzzInputRow( + const RowTypePtr& rowType, + const std::vector& inputGenerators = {}); /// Same as the function above, but all generated vectors are flat, i.e. no /// constant or dictionary-encoded vectors at any level. diff --git a/velox/vector/fuzzer/tests/CMakeLists.txt b/velox/vector/fuzzer/tests/CMakeLists.txt index bee3271a5cb2..39951c5f5b67 100644 --- a/velox/vector/fuzzer/tests/CMakeLists.txt +++ b/velox/vector/fuzzer/tests/CMakeLists.txt @@ -18,6 +18,8 @@ add_test(velox_vector_fuzzer_test velox_vector_fuzzer_test) target_link_libraries( velox_vector_fuzzer_test velox_vector_fuzzer + velox_presto_types + velox_fuzzer_constrained_input_generators velox_memory GTest::gtest GTest::gtest_main diff --git a/velox/vector/fuzzer/tests/VectorFuzzerTest.cpp b/velox/vector/fuzzer/tests/VectorFuzzerTest.cpp index f67ca1af45cc..5b499b753d6e 100644 --- a/velox/vector/fuzzer/tests/VectorFuzzerTest.cpp +++ b/velox/vector/fuzzer/tests/VectorFuzzerTest.cpp @@ -19,18 +19,25 @@ #include #include "velox/common/memory/Memory.h" +#include "velox/functions/prestosql/types/JsonType.h" #include "velox/type/TypeEncodingUtil.h" #include "velox/vector/DictionaryVector.h" +#include "velox/vector/fuzzer/ConstrainedGenerators.h" #include "velox/vector/fuzzer/VectorFuzzer.h" using namespace facebook::velox; namespace { +using facebook::velox::fuzzer::JsonInputGenerator; +using facebook::velox::fuzzer::RandomInputGenerator; +using facebook::velox::fuzzer::SetConstrainedGenerator; + class VectorFuzzerTest : public testing::Test { public: static void SetUpTestCase() { memory::MemoryManager::testingSetInstance({}); + registerJsonType(); } memory::MemoryPool* pool() const { @@ -984,4 +991,84 @@ TEST_F(VectorFuzzerTest, randTypeByWidth) { ASSERT_GE(approximateTypeEncodingwidth(type), width); } } + +TEST_F(VectorFuzzerTest, json) { + VectorFuzzer::Options opts; + VectorFuzzer fuzzer(opts, pool()); + + const uint32_t kSize = 10; + for (auto i = 0; i < 10; ++i) { + auto result = fuzzer.fuzz(JSON(), kSize); + EXPECT_TRUE(result != nullptr); + EXPECT_TRUE(isJsonType(result->type())); + EXPECT_EQ(result->size(), kSize); + + DecodedVector decoded; + decoded.decode(*result, SelectivityVector(kSize)); + folly::dynamic json; + folly::json::serialization_opts opts; + opts.allow_non_string_keys = true; + opts.allow_nan_inf = true; + for (auto j = 0; j < kSize; ++j) { + if (decoded.isNullAt(j)) { + continue; + } + std::string value = decoded.valueAt(j); + try { + json = folly::parseJson(value, opts); + } catch (...) { + EXPECT_TRUE(false); + } + } + } +} + +TEST_F(VectorFuzzerTest, jsonConstrained) { + VectorFuzzer::Options opts; + VectorFuzzer fuzzer(opts, pool()); + + const TypePtr type = ARRAY(ROW({BIGINT()})); + std::shared_ptr generator = + std::make_shared( + 0, + JSON(), + 0.2, + std::make_unique>(0, type, 0.3)); + + const uint32_t kSize = 1000; + const auto& jsonOpts = generator->serializationOptions(); + DecodedVector decoded; + for (auto i = 0; i < 10; ++i) { + auto vector = fuzzer.fuzz(JSON(), kSize, generator); + VELOX_CHECK_NOT_NULL(vector); + VELOX_CHECK_EQ(vector->type()->kind(), TypeKind::VARCHAR); + decoded.decode(*vector, SelectivityVector(kSize)); + for (auto j = 0; j < kSize; ++j) { + if (decoded.isNullAt(j)) { + continue; + } + std::string value = decoded.valueAt(j); + folly::dynamic json; + EXPECT_NO_THROW(json = folly::parseJson(value, jsonOpts)); + EXPECT_TRUE(json.isNull() || json.isArray()); + } + } +} + +TEST_F(VectorFuzzerTest, setConstrained) { + VectorFuzzer::Options opts; + VectorFuzzer fuzzer(opts, pool()); + + std::shared_ptr generator = + std::make_shared( + 0, VARCHAR(), std::vector{variant("a"), variant("b")}); + const uint32_t kSize = 1000; + auto vector = fuzzer.fuzz(VARCHAR(), kSize, generator); + + DecodedVector decoded(*vector, SelectivityVector(kSize)); + for (auto i = 0; i < kSize; ++i) { + std::string value = decoded.valueAt(i); + EXPECT_TRUE(value == "a" || value == "b"); + } +} } // namespace