diff --git a/config/plugin_config b/config/plugin_config index e3ac0f1d046..7ed45eed8e7 100644 --- a/config/plugin_config +++ b/config/plugin_config @@ -88,4 +88,5 @@ connector-web3j connector-milvus connector-activemq connector-sls ---end-- \ No newline at end of file +connector-cdc-opengauss +--end-- diff --git a/docs/en/connector-v2/sink/Kafka.md b/docs/en/connector-v2/sink/Kafka.md index 6447faa159a..0846c60a730 100644 --- a/docs/en/connector-v2/sink/Kafka.md +++ b/docs/en/connector-v2/sink/Kafka.md @@ -43,6 +43,9 @@ They can be downloaded via install-plugin.sh or from the Maven central repositor | format | String | No | json | Data format. The default format is json. Optional text format, canal_json, debezium_json, ogg_json and avro.If you use json or text format. The default field separator is ", ". If you customize the delimiter, add the "field_delimiter" option.If you use canal format, please refer to [canal-json](../formats/canal-json.md) for details.If you use debezium format, please refer to [debezium-json](../formats/debezium-json.md) for details. | | field_delimiter | String | No | , | Customize the field delimiter for data format. | | common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](../sink-common-options.md) for details | +| protobuf_message_name | String | No | - | Effective when the format is set to protobuf, specifies the Message name | +| protobuf_schema | String | No | - | Effective when the format is set to protobuf, specifies the Schema definition | + ## Parameter Interpretation @@ -213,3 +216,56 @@ sink { } ``` + +### Protobuf Configuration + +Set the `format` to `protobuf` and configure the `protobuf` data structure using the `protobuf_message_name` and `protobuf_schema` parameters. + +Example Usage: + +```hocon +sink { + kafka { + topic = "test_protobuf_topic_fake_source" + bootstrap.servers = "kafkaCluster:9092" + format = protobuf + kafka.request.timeout.ms = 60000 + kafka.config = { + acks = "all" + request.timeout.ms = 60000 + buffer.memory = 33554432 + } + protobuf_message_name = Person + protobuf_schema = """ + syntax = "proto3"; + + package org.apache.seatunnel.format.protobuf; + + option java_outer_classname = "ProtobufE2E"; + + message Person { + int32 c_int32 = 1; + int64 c_int64 = 2; + float c_float = 3; + double c_double = 4; + bool c_bool = 5; + string c_string = 6; + bytes c_bytes = 7; + + message Address { + string street = 1; + string city = 2; + string state = 3; + string zip = 4; + } + + Address address = 8; + + map attributes = 9; + + repeated string phone_numbers = 10; + } + """ + } +} +``` diff --git a/docs/en/connector-v2/source/Jdbc.md b/docs/en/connector-v2/source/Jdbc.md index 31257d85b12..44a8a7f3dff 100644 --- a/docs/en/connector-v2/source/Jdbc.md +++ b/docs/en/connector-v2/source/Jdbc.md @@ -39,7 +39,7 @@ supports query SQL and can achieve projection effect. ## Options -| name | type | required | default value | description | +| name | type | required | default value | description | |--------------------------------------------|---------|----------|-----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:postgresql://localhost/test | | driver | String | Yes | - | The jdbc class name used to connect to the remote data source, if you use MySQL the value is `com.mysql.cj.jdbc.Driver`. | @@ -52,6 +52,7 @@ supports query SQL and can achieve projection effect. | partition_upper_bound | Long | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | | partition_lower_bound | Long | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | | partition_num | Int | No | job parallelism | Not recommended for use, The correct approach is to control the number of split through `split.size`
How many splits do we need to split into, only support positive integer. default value is job parallelism. | +| decimal_type_narrowing | Boolean | No | true | Decimal type narrowing, if true, the decimal type will be narrowed to the int or long type if without loss of precision. Only support for Oracle at now. Please refer to `decimal_type_narrowing` below | | use_select_count | Boolean | No | false | Use select count for table count rather then other methods in dynamic chunk split stage. This is currently only available for jdbc-oracle.In this scenario, select count directly is used when it is faster to update statistics using sql from analysis table | | skip_analyze | Boolean | No | false | Skip the analysis of table count in dynamic chunk split stage. This is currently only available for jdbc-oracle.In this scenario, you schedule analysis table sql to update related table statistics periodically or your table data does not change frequently | | fetch_size | Int | No | 0 | For queries that return a large number of objects, you can configure the row fetch size used in the query to improve performance by reducing the number database hits required to satisfy the selection criteria. Zero means use jdbc default value. | @@ -66,6 +67,28 @@ supports query SQL and can achieve projection effect. | split.inverse-sampling.rate | Int | No | 1000 | The inverse of the sampling rate used in the sample sharding strategy. For example, if this value is set to 1000, it means a 1/1000 sampling rate is applied during the sampling process. This option provides flexibility in controlling the granularity of the sampling, thus affecting the final number of shards. It's especially useful when dealing with very large datasets where a lower sampling rate is preferred. The default value is 1000. | | common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](../source-common-options.md) for details. | +### decimal_type_narrowing + +Decimal type narrowing, if true, the decimal type will be narrowed to the int or long type if without loss of precision. Only support for Oracle at now. + +eg: + +decimal_type_narrowing = true + +| Oracle | SeaTunnel | +|---------------|-----------| +| NUMBER(1, 0) | Boolean | +| NUMBER(6, 0) | INT | +| NUMBER(10, 0) | BIGINT | + +decimal_type_narrowing = false + +| Oracle | SeaTunnel | +|---------------|----------------| +| NUMBER(1, 0) | Decimal(1, 0) | +| NUMBER(6, 0) | Decimal(6, 0) | +| NUMBER(10, 0) | Decimal(10, 0) | + ## Parallel Reader The JDBC Source connector supports parallel reading of data from tables. SeaTunnel will use certain rules to split the data in the table, which will be handed over to readers for reading. The number of readers is determined by the `parallelism` option. diff --git a/docs/en/connector-v2/source/Opengauss-CDC.md b/docs/en/connector-v2/source/Opengauss-CDC.md new file mode 100644 index 00000000000..81691ea1ff4 --- /dev/null +++ b/docs/en/connector-v2/source/Opengauss-CDC.md @@ -0,0 +1,170 @@ +# Opengauss CDC + +> Opengauss CDC source connector + +## Support Those Engines + +> SeaTunnel Zeta
+> Flink
+ +## Key features + +- [ ] [batch](../../concept/connector-v2-features.md) +- [x] [stream](../../concept/connector-v2-features.md) +- [x] [exactly-once](../../concept/connector-v2-features.md) +- [ ] [column projection](../../concept/connector-v2-features.md) +- [x] [parallelism](../../concept/connector-v2-features.md) +- [x] [support user-defined split](../../concept/connector-v2-features.md) + +## Description + +The Opengauss CDC connector allows for reading snapshot data and incremental data from Opengauss database. This document +describes how to set up the Opengauss CDC connector to run SQL queries against Opengauss databases. + +## Using steps + +> Here are the steps to enable CDC (Change Data Capture) in Opengauss: + +1. Ensure the wal_level is set to logical, you can use SQL commands to modify the configuration directly: + +```sql +ALTER SYSTEM SET wal_level TO 'logical'; +SELECT pg_reload_conf(); +``` + +2. Change the REPLICA policy of the specified table to FULL + +```sql +ALTER TABLE your_table_name REPLICA IDENTITY FULL; +``` + +If you have multi tables,you can use the result of this sql to change the REPLICA policy of all tables to FULL + +```sql +select 'ALTER TABLE ' || schemaname || '.' || tablename || ' REPLICA IDENTITY FULL;' from pg_tables where schemaname = 'YourTableSchema' +``` + +## Data Type Mapping + +| Opengauss Data type | SeaTunnel Data type | +|-----------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------| +| BOOL
| BOOLEAN | +| BYTEA
| BYTES | +| INT2
SMALLSERIAL
INT4
SERIAL
| INT | +| INT8
BIGSERIAL
| BIGINT | +| FLOAT4
| FLOAT | +| FLOAT8
| DOUBLE | +| NUMERIC(Get the designated column's specified column size>0) | DECIMAL(Get the designated column's specified column size,Gets the number of digits in the specified column to the right of the decimal point) | +| NUMERIC(Get the designated column's specified column size<0) | DECIMAL(38, 18) | +| BPCHAR
CHARACTER
VARCHAR
TEXT
GEOMETRY
GEOGRAPHY
JSON
JSONB | STRING | +| TIMESTAMP
| TIMESTAMP | +| TIME
| TIME | +| DATE
| DATE | +| OTHER DATA TYPES | NOT SUPPORTED YET | + +## Source Options + +| Name | Type | Required | Default | Description | +|------------------------------------------------|----------|----------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| base-url | String | Yes | - | The URL of the JDBC connection. Refer to a case: `jdbc:postgresql://localhost:5432/postgres_cdc?loggerLevel=OFF`. | +| username | String | Yes | - | Username of the database to use when connecting to the database server. | +| password | String | Yes | - | Password to use when connecting to the database server. | +| database-names | List | No | - | Database name of the database to monitor. | +| table-names | List | Yes | - | Table name of the database to monitor. The table name needs to include the database name, for example: `database_name.table_name` | +| table-names-config | List | No | - | Table config list. for example: [{"table": "db1.schema1.table1","primaryKeys":["key1"]}] | +| startup.mode | Enum | No | INITIAL | Optional startup mode for Opengauss CDC consumer, valid enumerations are `initial`, `earliest`, `latest` and `specific`.
`initial`: Synchronize historical data at startup, and then synchronize incremental data.
`earliest`: Startup from the earliest offset possible.
`latest`: Startup from the latest offset.
`specific`: Startup from user-supplied specific offsets. | +| snapshot.split.size | Integer | No | 8096 | The split size (number of rows) of table snapshot, captured tables are split into multiple splits when read the snapshot of table. | +| snapshot.fetch.size | Integer | No | 1024 | The maximum fetch size for per poll when read table snapshot. | +| slot.name | String | No | - | The name of the Opengauss logical decoding slot that was created for streaming changes from a particular plug-in for a particular database/schema. The server uses this slot to stream events to the connector that you are configuring. Default is seatunnel. | +| decoding.plugin.name | String | No | pgoutput | The name of the Postgres logical decoding plug-in installed on the server,Supported values are decoderbufs, wal2json, wal2json_rds, wal2json_streaming,wal2json_rds_streaming and pgoutput. | +| server-time-zone | String | No | UTC | The session time zone in database server. If not set, then ZoneId.systemDefault() is used to determine the server time zone. | +| connect.timeout.ms | Duration | No | 30000 | The maximum time that the connector should wait after trying to connect to the database server before timing out. | +| connect.max-retries | Integer | No | 3 | The max retry times that the connector should retry to build database server connection. | +| connection.pool.size | Integer | No | 20 | The jdbc connection pool size. | +| chunk-key.even-distribution.factor.upper-bound | Double | No | 100 | The upper bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be less than or equal to this upper bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is greater, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 100.0. | +| chunk-key.even-distribution.factor.lower-bound | Double | No | 0.05 | The lower bound of the chunk key distribution factor. This factor is used to determine whether the table data is evenly distributed. If the distribution factor is calculated to be greater than or equal to this lower bound (i.e., (MAX(id) - MIN(id) + 1) / row count), the table chunks would be optimized for even distribution. Otherwise, if the distribution factor is less, the table will be considered as unevenly distributed and the sampling-based sharding strategy will be used if the estimated shard count exceeds the value specified by `sample-sharding.threshold`. The default value is 0.05. | +| sample-sharding.threshold | Integer | No | 1000 | This configuration specifies the threshold of estimated shard count to trigger the sample sharding strategy. When the distribution factor is outside the bounds specified by `chunk-key.even-distribution.factor.upper-bound` and `chunk-key.even-distribution.factor.lower-bound`, and the estimated shard count (calculated as approximate row count / chunk size) exceeds this threshold, the sample sharding strategy will be used. This can help to handle large datasets more efficiently. The default value is 1000 shards. | +| inverse-sampling.rate | Integer | No | 1000 | The inverse of the sampling rate used in the sample sharding strategy. For example, if this value is set to 1000, it means a 1/1000 sampling rate is applied during the sampling process. This option provides flexibility in controlling the granularity of the sampling, thus affecting the final number of shards. It's especially useful when dealing with very large datasets where a lower sampling rate is preferred. The default value is 1000. | +| exactly_once | Boolean | No | false | Enable exactly once semantic. | +| format | Enum | No | DEFAULT | Optional output format for Opengauss CDC, valid enumerations are `DEFAULT`, `COMPATIBLE_DEBEZIUM_JSON`. | +| debezium | Config | No | - | Pass-through [Debezium's properties](https://github.com/debezium/debezium/blob/v1.9.8.Final/documentation/modules/ROOT/pages/connectors/postgresql.adoc#connector-configuration-properties) to Debezium Embedded Engine which is used to capture data changes from Opengauss server. | +| common-options | | no | - | Source plugin common parameters, please refer to [Source Common Options](../source-common-options.md) for details | + +## Task Example + +### Simple + +> Support multi-table reading + +``` + +env { + # You can set engine configuration here + execution.parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 + read_limit.bytes_per_second=7000000 + read_limit.rows_per_second=400 +} + +source { + Opengauss-CDC { + result_table_name = "customers_opengauss_cdc" + username = "gaussdb" + password = "openGauss@123" + database-names = ["opengauss_cdc"] + schema-names = ["inventory"] + table-names = ["opengauss_cdc.inventory.opengauss_cdc_table_1","opengauss_cdc.inventory.opengauss_cdc_table_2"] + base-url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc" + decoding.plugin.name = "pgoutput" + } +} + +transform { + +} + +sink { + jdbc { + source_table_name = "customers_opengauss_cdc" + url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc" + driver = "org.postgresql.Driver" + user = "dailai" + password = "openGauss@123" + + compatible_mode="postgresLow" + generate_sink_sql = true + # You need to configure both database and table + database = "opengauss_cdc" + schema = "inventory" + tablePrefix = "sink_" + primary_keys = ["id"] + } +} + +``` + +### Support custom primary key for table + +``` +source { + Opengauss-CDC { + result_table_name = "customers_opengauss_cdc" + username = "gaussdb" + password = "openGauss@123" + database-names = ["opengauss_cdc"] + schema-names = ["inventory"] + table-names = ["opengauss_cdc.inventory.full_types_no_primary_key"] + base-url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF" + decoding.plugin.name = "pgoutput" + exactly_once = true + table-names-config = [ + { + table = "opengauss_cdc.inventory.full_types_no_primary_key" + primaryKeys = ["id"] + } + ] + } +} +``` + diff --git a/docs/en/connector-v2/source/PostgreSQL.md b/docs/en/connector-v2/source/PostgreSQL.md index 59d89c89771..101902d3618 100644 --- a/docs/en/connector-v2/source/PostgreSQL.md +++ b/docs/en/connector-v2/source/PostgreSQL.md @@ -74,24 +74,9 @@ Read external data source data through JDBC. ## Options -| Name | Type | Required | Default | Description | -|------------------------------|------------|----------|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:postgresql://localhost:5432/test | -| driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
if you use PostgreSQL the value is `org.postgresql.Driver`. | -| user | String | No | - | Connection instance user name | -| password | String | No | - | Connection instance password | -| query | String | Yes | - | Query statement | -| connection_check_timeout_sec | Int | No | 30 | The time in seconds to wait for the database operation used to validate the connection to complete | -| partition_column | String | No | - | The column name for parallelism's partition, only support numeric type,Only support numeric type primary key, and only can config one column. | -| partition_lower_bound | BigDecimal | No | - | The partition_column min value for scan, if not set SeaTunnel will query database get min value. | -| partition_upper_bound | BigDecimal | No | - | The partition_column max value for scan, if not set SeaTunnel will query database get max value. | -| partition_num | Int | No | job parallelism | The number of partition count, only support positive integer. default value is job parallelism | -| fetch_size | Int | No | 0 | For queries that return a large number of objects,you can configure
the row fetch size used in the query toimprove performance by
reducing the number database hits required to satisfy the selection criteria.
Zero means use jdbc default value. | -| properties | Map | No | - | Additional connection configuration parameters,when properties and URL have the same parameters, the priority is determined by the
specific implementation of the driver. For example, in MySQL, properties take precedence over the URL. | - -| Name | Type | Required | Default | Description | +| Name | Type | Required | Default | Description | |--------------------------------------------|------------|----------|-----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:mysql://localhost:3306:3306/test | +| url | String | Yes | - | The URL of the JDBC connection. Refer to a case: jdbc:postgresql://localhost:5432/test | | driver | String | Yes | - | The jdbc class name used to connect to the remote data source,
if you use MySQL the value is `com.mysql.cj.jdbc.Driver`. | | user | String | No | - | Connection instance user name | | password | String | No | - | Connection instance password | diff --git a/docs/en/connector-v2/source/Tablestore.md b/docs/en/connector-v2/source/Tablestore.md new file mode 100644 index 00000000000..8e0d1aeebc7 --- /dev/null +++ b/docs/en/connector-v2/source/Tablestore.md @@ -0,0 +1,102 @@ +# Tablestore + +> Tablestore source connector + +## Description + +Read data from Alicloud Tablestoreļ¼Œsupport full and CDC. + + +## Key features + +- [ ] [batch](../../concept/connector-v2-features.md) +- [X] [stream](../../concept/connector-v2-features.md) +- [ ] [exactly-once](../../concept/connector-v2-features.md) +- [ ] [column projection](../../concept/connector-v2-features.md) +- [ ] [parallelism](../../concept/connector-v2-features.md) +- [ ] [support user-defined split](../../concept/connector-v2-features.md) + +## Options + +| name | type | required | default value | +|-----------------------|--------|----------|---------------| +| end_point | string | yes | - | +| instance_name | string | yes | - | +| access_key_id | string | yes | - | +| access_key_secret | string | yes | - | +| table | string | yes | - | +| primary_keys | array | yes | - | +| schema | config | yes | - | + + +### end_point [string] + +The endpoint of Tablestore. + +### instance_name [string] + +The intance name of Tablestore. + +### access_key_id [string] + +The access id of Tablestore. + +### access_key_secret [string] + +The access secret of Tablestore. + +### table [string] + +The table name of Tablestore. + +### primary_keys [array] + +The primarky key of table,just add a unique primary key. + +### schema [Config] + + + +## Example + +```bash +env { + parallelism = 1 + job.mode = "STREAMING" +} + +source { + # This is a example source plugin **only for test and demonstrate the feature source plugin** + Tablestore { + end_point = "https://****.cn-zhangjiakou.tablestore.aliyuncs.com" + instance_name = "****" + access_key_id="***************2Ag5" + access_key_secret="***********2Dok" + table="test" + primary_keys=["id"] + schema={ + fields { + id = string + name = string + } + } + } +} + + +sink { + MongoDB{ + uri = "mongodb://localhost:27017" + database = "test" + collection = "test" + primary-key = ["id"] + schema = { + fields { + id = string + name = string + } + } + } +} +``` + diff --git a/docs/en/connector-v2/source/kafka.md b/docs/en/connector-v2/source/kafka.md index 42106fc7d5f..e419a218001 100644 --- a/docs/en/connector-v2/source/kafka.md +++ b/docs/en/connector-v2/source/kafka.md @@ -32,24 +32,26 @@ They can be downloaded via install-plugin.sh or from the Maven central repositor ## Source Options -| Name | Type | Required | Default | Description | -|-------------------------------------|-----------------------------------------------------------------------------|----------|--------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| topic | String | Yes | - | Topic name(s) to read data from when the table is used as source. It also supports topic list for source by separating topic by comma like 'topic-1,topic-2'. | -| table_list | Map | No | - | Topic list config You can configure only one `table_list` and one `topic` at the same time | -| bootstrap.servers | String | Yes | - | Comma separated list of Kafka brokers. | -| pattern | Boolean | No | false | If `pattern` is set to `true`,the regular expression for a pattern of topic names to read from. All topics in clients with names that match the specified regular expression will be subscribed by the consumer. | -| consumer.group | String | No | SeaTunnel-Consumer-Group | `Kafka consumer group id`, used to distinguish different consumer groups. | -| commit_on_checkpoint | Boolean | No | true | If true the consumer's offset will be periodically committed in the background. | -| kafka.config | Map | No | - | In addition to the above necessary parameters that must be specified by the `Kafka consumer` client, users can also specify multiple `consumer` client non-mandatory parameters, covering [all consumer parameters specified in the official Kafka document](https://kafka.apache.org/documentation.html#consumerconfigs). | -| schema | Config | No | - | The structure of the data, including field names and field types. | -| format | String | No | json | Data format. The default format is json. Optional text format, canal_json, debezium_json, ogg_json and avro.If you use json or text format. The default field separator is ", ". If you customize the delimiter, add the "field_delimiter" option.If you use canal format, please refer to [canal-json](../formats/canal-json.md) for details.If you use debezium format, please refer to [debezium-json](../formats/debezium-json.md) for details. | -| format_error_handle_way | String | No | fail | The processing method of data format error. The default value is fail, and the optional value is (fail, skip). When fail is selected, data format error will block and an exception will be thrown. When skip is selected, data format error will skip this line data. | -| field_delimiter | String | No | , | Customize the field delimiter for data format. | -| start_mode | StartMode[earliest],[group_offsets],[latest],[specific_offsets],[timestamp] | No | group_offsets | The initial consumption pattern of consumers. | -| start_mode.offsets | Config | No | - | The offset required for consumption mode to be specific_offsets. | -| start_mode.timestamp | Long | No | - | The time required for consumption mode to be "timestamp". | -| partition-discovery.interval-millis | Long | No | -1 | The interval for dynamically discovering topics and partitions. | -| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](../source-common-options.md) for details | +| Name | Type | Required | Default | Description | +|-------------------------------------|-----------------------------------------------------------------------------|----------|--------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| topic | String | Yes | - | Topic name(s) to read data from when the table is used as source. It also supports topic list for source by separating topic by comma like 'topic-1,topic-2'. | +| table_list | Map | No | - | Topic list config You can configure only one `table_list` and one `topic` at the same time | +| bootstrap.servers | String | Yes | - | Comma separated list of Kafka brokers. | +| pattern | Boolean | No | false | If `pattern` is set to `true`,the regular expression for a pattern of topic names to read from. All topics in clients with names that match the specified regular expression will be subscribed by the consumer. | +| consumer.group | String | No | SeaTunnel-Consumer-Group | `Kafka consumer group id`, used to distinguish different consumer groups. | +| commit_on_checkpoint | Boolean | No | true | If true the consumer's offset will be periodically committed in the background. | +| kafka.config | Map | No | - | In addition to the above necessary parameters that must be specified by the `Kafka consumer` client, users can also specify multiple `consumer` client non-mandatory parameters, covering [all consumer parameters specified in the official Kafka document](https://kafka.apache.org/documentation.html#consumerconfigs). | +| schema | Config | No | - | The structure of the data, including field names and field types. | +| format | String | No | json | Data format. The default format is json. Optional text format, canal_json, debezium_json, ogg_json, avro and protobuf. If you use json or text format. The default field separator is ", ". If you customize the delimiter, add the "field_delimiter" option.If you use canal format, please refer to [canal-json](../formats/canal-json.md) for details.If you use debezium format, please refer to [debezium-json](../formats/debezium-json.md) for details. | +| format_error_handle_way | String | No | fail | The processing method of data format error. The default value is fail, and the optional value is (fail, skip). When fail is selected, data format error will block and an exception will be thrown. When skip is selected, data format error will skip this line data. | +| field_delimiter | String | No | , | Customize the field delimiter for data format. | +| start_mode | StartMode[earliest],[group_offsets],[latest],[specific_offsets],[timestamp] | No | group_offsets | The initial consumption pattern of consumers. | +| start_mode.offsets | Config | No | - | The offset required for consumption mode to be specific_offsets. | +| start_mode.timestamp | Long | No | - | The time required for consumption mode to be "timestamp". | +| partition-discovery.interval-millis | Long | No | -1 | The interval for dynamically discovering topics and partitions. | +| common-options | | No | - | Source plugin common parameters, please refer to [Source Common Options](../source-common-options.md) for details | +| protobuf_message_name | String | No | - | Effective when the format is set to protobuf, specifies the Message name | +| protobuf_schema | String | No | - | Effective when the format is set to protobuf, specifies the Schema definition | ## Task Example @@ -242,3 +244,47 @@ sink { } ``` + + +```hocon +source { + Kafka { + topic = "test_protobuf_topic_fake_source" + format = protobuf + protobuf_message_name = Person + protobuf_schema = """ + syntax = "proto3"; + + package org.apache.seatunnel.format.protobuf; + + option java_outer_classname = "ProtobufE2E"; + + message Person { + int32 c_int32 = 1; + int64 c_int64 = 2; + float c_float = 3; + double c_double = 4; + bool c_bool = 5; + string c_string = 6; + bytes c_bytes = 7; + + message Address { + string street = 1; + string city = 2; + string state = 3; + string zip = 4; + } + + Address address = 8; + + map attributes = 9; + + repeated string phone_numbers = 10; + } + """ + bootstrap.servers = "kafkaCluster:9092" + start_mode = "earliest" + result_table_name = "kafka_table" + } +} +``` \ No newline at end of file diff --git a/docs/en/seatunnel-engine/checkpoint-storage.md b/docs/en/seatunnel-engine/checkpoint-storage.md index f5dd44e3af6..7027f8067fb 100644 --- a/docs/en/seatunnel-engine/checkpoint-storage.md +++ b/docs/en/seatunnel-engine/checkpoint-storage.md @@ -67,7 +67,6 @@ seatunnel: fs.oss.accessKeyId: your-access-key fs.oss.accessKeySecret: your-secret-key fs.oss.endpoint: endpoint address - fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider ``` For additional reading on the Hadoop Credential Provider API, you can see: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). diff --git a/docs/zh/connector-v2/sink/Kafka.md b/docs/zh/connector-v2/sink/Kafka.md index 45117a962d6..283b59c533a 100644 --- a/docs/zh/connector-v2/sink/Kafka.md +++ b/docs/zh/connector-v2/sink/Kafka.md @@ -30,19 +30,21 @@ ## ꎄꔶå™Ø选锹 -| 名ē§° | ē±»åž‹ | ę˜Æå¦éœ€č¦ | é»˜č®¤å€¼ | ꏏčæ° | -|----------------------|--------|------|------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| topic | String | ę˜Æ | - | 当č”Øē”Øä½œęŽ„ę”¶å™Øę—¶ļ¼Œtopic 名ē§°ę˜Æč¦å†™å…„ę•°ę®ēš„ topic | -| bootstrap.servers | String | ę˜Æ | - | Kafka brokers ä½æē”Ø逗号分隔 | -| kafka.config | Map | 否 | - | 除äŗ†äøŠčæ° Kafka Producer å®¢ęˆ·ē«Æåæ…é”»ęŒ‡å®šēš„å‚ę•°å¤–ļ¼Œē”Øꈷčæ˜åÆ仄äøŗ Producer å®¢ęˆ·ē«ÆęŒ‡å®šå¤šäøŖ非å¼ŗåˆ¶å‚ę•°ļ¼Œę¶µē›– [Kafkaå®˜ę–¹ę–‡ę”£äø­ęŒ‡å®šēš„ę‰€ęœ‰ē”Ÿäŗ§č€…å‚ę•°](https://kafka.apache.org/documentation.html#producerconfigs) | -| semantics | String | 否 | NON | åÆ仄选ꋩēš„čƭ义ę˜Æ EXACTLY_ONCE/AT_LEAST_ONCE/NONļ¼Œé»˜č®¤ NON怂 | -| partition_key_fields | Array | 否 | - | 配ē½®å­—ꮵē”Ø作 kafka 궈ęÆēš„key | -| partition | Int | 否 | - | åÆä»„ęŒ‡å®šåˆ†åŒŗļ¼Œę‰€ęœ‰ę¶ˆęÆéƒ½ä¼šå‘é€åˆ°ę­¤åˆ†åŒŗ | -| assign_partitions | Array | 否 | - | åÆä»„ę ¹ę®ę¶ˆęÆēš„内容决定发送å“ŖäøŖ分åŒŗ,čÆ„å‚ę•°ēš„作ē”Øę˜Æ分发äæ”ęÆ | -| transaction_prefix | String | 否 | - | å¦‚ęžœčÆ­ä¹‰ęŒ‡å®šäøŗEXACTLY_ONCEļ¼Œē”Ÿäŗ§č€…å°†ęŠŠę‰€ęœ‰ę¶ˆęÆ写兄äø€äøŖ Kafka äŗ‹åŠ”äø­ļ¼Œkafka 通čæ‡äøåŒēš„ transactionId ę„åŒŗ分äøåŒēš„äŗ‹åŠ”怂čÆ„å‚ę•°ę˜Ækafka transactionIdēš„前ē¼€ļ¼Œē”®äæäøåŒēš„作äøšä½æē”ØäøåŒēš„前ē¼€ | -| format | String | 否 | json | ę•°ę®ę ¼å¼ć€‚é»˜č®¤ę ¼å¼ę˜Æjson怂åÆé€‰ę–‡ęœ¬ę ¼å¼ļ¼Œcanal-json态debezium-json 和 avroć€‚å¦‚ęžœä½æē”Ø json ęˆ–ę–‡ęœ¬ę ¼å¼ć€‚é»˜č®¤å­—ę®µåˆ†éš”ē¬¦ę˜Æ`,`ć€‚å¦‚ęžœč‡Ŗ定义分隔ē¬¦ļ¼ŒčÆ·ę·»åŠ `field_delimiter`é€‰é”¹ć€‚å¦‚ęžœä½æē”Øcanalę ¼å¼ļ¼ŒčÆ·å‚č€ƒ[canal-json](../formats/canal-json.md)ć€‚å¦‚ęžœä½æē”Ødebeziumę ¼å¼ļ¼ŒčƷ参阅 [debezium-json](../formats/debezium-json.md) äŗ†č§£čƦē»†äæ”ęÆ | -| field_delimiter | String | 否 | , | č‡Ŗå®šä¹‰ę•°ę®ę ¼å¼ēš„å­—ę®µåˆ†éš”ē¬¦ | -| common-options | | 否 | - | Sinkę’ä»¶åøøē”Øå‚ę•°ļ¼ŒčÆ·å‚č€ƒ [Sinkåøøē”Ø选锹 ](../sink-common-options.md) äŗ†č§£čÆ¦ęƒ… | +| 名ē§° | ē±»åž‹ | ę˜Æå¦éœ€č¦ | é»˜č®¤å€¼ | ꏏčæ° | +|----------------------|--------|------|------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| topic | String | ę˜Æ | - | 当č”Øē”Øä½œęŽ„ę”¶å™Øę—¶ļ¼Œtopic 名ē§°ę˜Æč¦å†™å…„ę•°ę®ēš„ topic | +| bootstrap.servers | String | ę˜Æ | - | Kafka brokers ä½æē”Ø逗号分隔 | +| kafka.config | Map | 否 | - | 除äŗ†äøŠčæ° Kafka Producer å®¢ęˆ·ē«Æåæ…é”»ęŒ‡å®šēš„å‚ę•°å¤–ļ¼Œē”Øꈷčæ˜åÆ仄äøŗ Producer å®¢ęˆ·ē«ÆęŒ‡å®šå¤šäøŖ非å¼ŗåˆ¶å‚ę•°ļ¼Œę¶µē›– [Kafkaå®˜ę–¹ę–‡ę”£äø­ęŒ‡å®šēš„ę‰€ęœ‰ē”Ÿäŗ§č€…å‚ę•°](https://kafka.apache.org/documentation.html#producerconfigs) | +| semantics | String | 否 | NON | åÆ仄选ꋩēš„čƭ义ę˜Æ EXACTLY_ONCE/AT_LEAST_ONCE/NONļ¼Œé»˜č®¤ NON怂 | +| partition_key_fields | Array | 否 | - | 配ē½®å­—ꮵē”Ø作 kafka 궈ęÆēš„key | +| partition | Int | 否 | - | åÆä»„ęŒ‡å®šåˆ†åŒŗļ¼Œę‰€ęœ‰ę¶ˆęÆéƒ½ä¼šå‘é€åˆ°ę­¤åˆ†åŒŗ | +| assign_partitions | Array | 否 | - | åÆä»„ę ¹ę®ę¶ˆęÆēš„内容决定发送å“ŖäøŖ分åŒŗ,čÆ„å‚ę•°ēš„作ē”Øę˜Æ分发äæ”ęÆ | +| transaction_prefix | String | 否 | - | å¦‚ęžœčÆ­ä¹‰ęŒ‡å®šäøŗEXACTLY_ONCEļ¼Œē”Ÿäŗ§č€…å°†ęŠŠę‰€ęœ‰ę¶ˆęÆ写兄äø€äøŖ Kafka äŗ‹åŠ”äø­ļ¼Œkafka 通čæ‡äøåŒēš„ transactionId ę„åŒŗ分äøåŒēš„äŗ‹åŠ”怂čÆ„å‚ę•°ę˜Ækafka transactionIdēš„前ē¼€ļ¼Œē”®äæäøåŒēš„作äøšä½æē”ØäøåŒēš„前ē¼€ | +| format | String | 否 | json | ę•°ę®ę ¼å¼ć€‚é»˜č®¤ę ¼å¼ę˜Æjson怂åÆé€‰ę–‡ęœ¬ę ¼å¼ļ¼Œcanal-json态debezium-json 态 avro 和 protobufć€‚å¦‚ęžœä½æē”Ø json ęˆ–ę–‡ęœ¬ę ¼å¼ć€‚é»˜č®¤å­—ę®µåˆ†éš”ē¬¦ę˜Æ`,`ć€‚å¦‚ęžœč‡Ŗ定义分隔ē¬¦ļ¼ŒčÆ·ę·»åŠ `field_delimiter`é€‰é”¹ć€‚å¦‚ęžœä½æē”Øcanalę ¼å¼ļ¼ŒčÆ·å‚č€ƒ[canal-json](../formats/canal-json.md)ć€‚å¦‚ęžœä½æē”Ødebeziumę ¼å¼ļ¼ŒčƷ参阅 [debezium-json](../formats/debezium-json.md) äŗ†č§£čƦē»†äæ”ęÆ | +| field_delimiter | String | 否 | , | č‡Ŗå®šä¹‰ę•°ę®ę ¼å¼ēš„å­—ę®µåˆ†éš”ē¬¦ | +| common-options | | 否 | - | Sinkę’ä»¶åøøē”Øå‚ę•°ļ¼ŒčÆ·å‚č€ƒ [Sinkåøøē”Ø选锹 ](../sink-common-options.md) äŗ†č§£čÆ¦ęƒ… | +|protobuf_message_name|String|否|-| format配ē½®äøŗprotobufę—¶ē”Ÿę•ˆļ¼Œå–Message名ē§° | +|protobuf_schema|String|否|-| format配ē½®äøŗprotobufę—¶ē”Ÿę•ˆå–Schema名ē§° | ## å‚ę•°č§£é‡Š @@ -194,3 +196,56 @@ sink { } ``` +### Protobuf配ē½® + +`format` č®¾ē½®äøŗ `protobuf`ļ¼Œé…ē½®`protobuf`ę•°ę®ē»“ęž„ļ¼Œ`protobuf_message_name`和`protobuf_schema`å‚ę•° + +ä½æē”Øę ·ä¾‹ļ¼š + +```hocon +sink { + kafka { + topic = "test_protobuf_topic_fake_source" + bootstrap.servers = "kafkaCluster:9092" + format = protobuf + kafka.request.timeout.ms = 60000 + kafka.config = { + acks = "all" + request.timeout.ms = 60000 + buffer.memory = 33554432 + } + protobuf_message_name = Person + protobuf_schema = """ + syntax = "proto3"; + + package org.apache.seatunnel.format.protobuf; + + option java_outer_classname = "ProtobufE2E"; + + message Person { + int32 c_int32 = 1; + int64 c_int64 = 2; + float c_float = 3; + double c_double = 4; + bool c_bool = 5; + string c_string = 6; + bytes c_bytes = 7; + + message Address { + string street = 1; + string city = 2; + string state = 3; + string zip = 4; + } + + Address address = 8; + + map attributes = 9; + + repeated string phone_numbers = 10; + } + """ + } +} +``` + diff --git a/docs/zh/connector-v2/source/Kafka.md b/docs/zh/connector-v2/source/Kafka.md new file mode 100644 index 00000000000..63cb174bb9c --- /dev/null +++ b/docs/zh/connector-v2/source/Kafka.md @@ -0,0 +1,288 @@ +# Kafka + +> Kafka ęŗčæžęŽ„å™Ø + +## ę”ÆęŒä»„äø‹å¼•ę“Ž + +> Spark
+> Flink
+> Seatunnel Zeta
+ +## äø»č¦åŠŸčƒ½ + +- [x] [ę‰¹å¤„ē†](../../concept/connector-v2-features.md) +- [x] [ęµå¤„ē†](../../concept/connector-v2-features.md) +- [x] [ē²¾ē”®äø€ę¬”](../../concept/connector-v2-features.md) +- [ ] [åˆ—ęŠ•å½±](../../concept/connector-v2-features.md) +- [x] [å¹¶č”Œåŗ¦](../../concept/connector-v2-features.md) +- [ ] [ę”Æꌁē”Øęˆ·å®šä¹‰ę‹†åˆ†](../../concept/connector-v2-features.md) + +## ꏏčæ° + +ē”ØäŗŽ Apache Kafka ēš„ęŗčæžęŽ„å™Ø怂 + +## ę”Æꌁēš„ę•°ę®ęŗäæ”ęÆ + +ä½æē”Ø Kafka čæžęŽ„å™Øéœ€č¦ä»„äø‹ä¾čµ–é”¹ć€‚ +åÆ仄通čæ‡ install-plugin.sh äø‹č½½ęˆ–从 Maven äø­å¤®ä»“åŗ“čŽ·å–ć€‚ + +| ę•°ę®ęŗ | ę”Æꌁēš„ē‰ˆęœ¬ | Maven äø‹č½½é“¾ęŽ„ | +| ------------ | ------------- | ----------------------------------------------------------------------------------------------- | +| Kafka | 通ē”Øē‰ˆęœ¬ | [äø‹č½½](https://mvnrepository.com/artifact/org.apache.seatunnel/seatunnel-connectors-v2/connector-kafka) | + +## ęŗé€‰é”¹ + +| 名ē§° | ē±»åž‹ | ę˜Æ否åæ…唫 | é»˜č®¤å€¼ | ꏏčæ° | +| ----------------------------------- | ----------------------------------------- | -------- | -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| topic | String | ę˜Æ | - | ä½æē”Øč”Ø作äøŗę•°ę®ęŗę—¶č¦čÆ»å–ę•°ę®ēš„äø»é¢˜åē§°ć€‚它也ę”Æꌁ通čæ‡é€—号分隔ēš„多äøŖäø»é¢˜åˆ—č”Øļ¼Œä¾‹å¦‚ 'topic-1,topic-2'怂 | +| table_list | Map | 否 | - | äø»é¢˜åˆ—č”Ø配ē½®ļ¼Œä½ åÆä»„åŒę—¶é…ē½®äø€äøŖ `table_list` 和äø€äøŖ `topic`怂 | +| bootstrap.servers | String | ę˜Æ | - | 逗号分隔ēš„ Kafka brokers 列č”Ø怂 | +| pattern | Boolean | 否 | false | å¦‚ęžœ `pattern` č®¾ē½®äøŗ `true`ļ¼Œåˆ™ä¼šä½æē”ØęŒ‡å®šēš„ę­£åˆ™č”Øč¾¾å¼åŒ¹é…å¹¶č®¢é˜…äø»é¢˜ć€‚ | +| consumer.group | String | 否 | SeaTunnel-Consumer-Group | `Kafka ę¶ˆč“¹č€…ē»„ ID`ļ¼Œē”ØäŗŽåŒŗ分äøåŒēš„ę¶ˆč“¹č€…ē»„怂 | +| commit_on_checkpoint | Boolean | 否 | true | å¦‚ęžœäøŗ trueļ¼Œę¶ˆč“¹č€…ēš„偏ē§»é‡å°†ä¼šå®šęœŸåœØåŽå°ęäŗ¤ć€‚ | +| kafka.config | Map | 否 | - | 除äŗ†äøŠčæ°åæ…č¦å‚ę•°å¤–ļ¼Œē”Øꈷčæ˜åÆä»„ęŒ‡å®šå¤šäøŖ非å¼ŗ制ēš„ę¶ˆč“¹č€…å®¢ęˆ·ē«Æå‚ę•°ļ¼Œč¦†ē›– [Kafka å®˜ę–¹ę–‡ę”£](https://kafka.apache.org/documentation.html#consumerconfigs) äø­ęŒ‡å®šēš„ę‰€ęœ‰ę¶ˆč“¹č€…å‚ę•°ć€‚ | +| schema | Config | 否 | - | ę•°ę®ē»“ęž„ļ¼ŒåŒ…ę‹¬å­—ę®µåē§°å’Œå­—ꮵē±»åž‹ć€‚ | +| format | String | 否 | json | ę•°ę®ę ¼å¼ć€‚é»˜č®¤ę ¼å¼äøŗ json怂åÆé€‰ę ¼å¼åŒ…ę‹¬ text, canal_json, debezium_json, ogg_json, avro 和 protobufć€‚é»˜č®¤å­—ę®µåˆ†éš”ē¬¦äøŗ ", "ć€‚å¦‚ęžœč‡Ŗ定义分隔ē¬¦ļ¼Œę·»åŠ  "field_delimiter" é€‰é”¹ć€‚å¦‚ęžœä½æē”Ø canal ę ¼å¼ļ¼ŒčÆ·å‚č€ƒ [canal-json](../formats/canal-json.md) äŗ†č§£čƦē»†äæ”ęÆć€‚å¦‚ęžœä½æē”Ø debezium ę ¼å¼ļ¼ŒčÆ·å‚č€ƒ [debezium-json](../formats/debezium-json.md)怂 | +| format_error_handle_way | String | 否 | fail | ę•°ę®ę ¼å¼é”™čÆÆēš„处ē†ę–¹å¼ć€‚é»˜č®¤å€¼äøŗ failļ¼ŒåÆ选值äøŗ fail 和 skipć€‚å½“é€‰ę‹© fail ę—¶ļ¼Œę•°ę®ę ¼å¼é”™čÆÆå°†é˜»å”žå¹¶ęŠ›å‡ŗ异åøøć€‚å½“é€‰ę‹© skip ę—¶ļ¼Œę•°ę®ę ¼å¼é”™čÆÆå°†č·³čæ‡ę­¤č”Œę•°ę®ć€‚ | +| field_delimiter | String | 否 | , | č‡Ŗå®šä¹‰ę•°ę®ę ¼å¼ēš„å­—ę®µåˆ†éš”ē¬¦ć€‚ | +| start_mode | StartMode[earliest],[group_offsets] | 否 | group_offsets | ę¶ˆč“¹č€…ēš„åˆå§‹ę¶ˆč“¹ęØ”å¼ć€‚ | +| start_mode.offsets | Config | 否 | - | ē”ØäŗŽ specific_offsets ę¶ˆč“¹ęؔ式ēš„偏ē§»é‡ć€‚ | +| start_mode.timestamp | Long | 否 | - | ē”ØäŗŽ "timestamp" ę¶ˆč“¹ęؔ式ēš„ꗶ闓怂 | +| partition-discovery.interval-millis | Long | 否 | -1 | åŠØę€å‘ēŽ°äø»é¢˜å’Œåˆ†åŒŗēš„é—“隔ꗶ闓怂 | +| common-options | | 否 | - | ęŗę’件ēš„åøøč§å‚ę•°ļ¼ŒčÆ¦ęƒ…čÆ·å‚č€ƒ [Source Common Options](../source-common-options.md)怂 | +| protobuf_message_name | String | 否 | - | å½“ę ¼å¼č®¾ē½®äøŗ protobuf ę—¶ęœ‰ę•ˆļ¼ŒęŒ‡å®šę¶ˆęÆ名ē§°ć€‚ | +| protobuf_schema | String | 否 | - | å½“ę ¼å¼č®¾ē½®äøŗ protobuf ę—¶ęœ‰ę•ˆļ¼ŒęŒ‡å®š Schema å®šä¹‰ć€‚ | + +## 任劔ē¤ŗ例 + +### ē®€å•ē¤ŗ例 + +> ę­¤ē¤ŗ例čƻ取 Kafka ēš„ topic_1态topic_2 和 topic_3 ēš„ę•°ę®å¹¶å°†å…¶ę‰“å°åˆ°å®¢ęˆ·ē«Æć€‚å¦‚ęžœå°šęœŖå®‰č£…å’ŒéƒØē½² SeaTunnelļ¼ŒčÆ·ęŒ‰ē…§ [å®‰č£…ęŒ‡å—](../../start-v2/locally/deployment.md) čæ›č”Œå®‰č£…å’ŒéƒØē½²ć€‚ē„¶åŽļ¼ŒęŒ‰ē…§ [åæ«é€Ÿå¼€å§‹](../../start-v2/locally/quick-start-seatunnel-engine.md) čæč”Œę­¤ä»»åŠ”怂 + +```hocon +# 定义čæč”ŒēŽÆ境 +env { + parallelism = 2 + job.mode = "BATCH" +} +source { + Kafka { + schema = { + fields { + name = "string" + age = "int" + } + } + format = text + field_delimiter = "#" + topic = "topic_1,topic_2,topic_3" + bootstrap.servers = "localhost:9092" + kafka.config = { + client.id = client_1 + max.poll.records = 500 + auto.offset.reset = "earliest" + enable.auto.commit = "false" + } + } +} +sink { + Console {} +} +``` + +### ę­£åˆ™č”Øč¾¾å¼äø»é¢˜ + +```hocon +source { + Kafka { + topic = ".*seatunnel*." + pattern = "true" + bootstrap.servers = "localhost:9092" + consumer.group = "seatunnel_group" + } +} +``` + +### AWS MSK SASL/SCRAM + +将仄äø‹ `${username}` 和 `${password}` ę›æę¢äøŗ AWS MSK äø­ēš„配ē½®å€¼ć€‚ + +```hocon +source { + Kafka { + topic = "seatunnel" + bootstrap.servers = "xx.amazonaws.com.cn:9096,xxx.amazonaws.com.cn:9096,xxxx.amazonaws.com.cn:9096" + consumer.group = "seatunnel_group" + kafka.config = { + security.protocol=SASL_SSL + sasl.mechanism=SCRAM-SHA-512 + sasl.jaas.config="org.apache.kafka.common.security.scram.ScramLoginModule required username=\"username\" password=\"password\";" + } + } +} +``` + +### AWS MSK IAM + +从 [ę­¤å¤„](https://github.com/aws/aws-msk-iam-auth/releases) äø‹č½½ `aws-msk-iam-auth-1.1.5.jar` å¹¶å°†å…¶ę”¾åœØ `$SEATUNNEL_HOME/plugin/kafka/lib` ē›®å½•äø‹ć€‚ + +ē”®äæ IAM ē­–ē•„äø­åŒ…含 `"kafka-cluster:Connect"` ꝃ限ļ¼Œå¦‚äø‹ę‰€ē¤ŗļ¼š + +```hocon +"Effect": "Allow", +"Action": [ + "kafka-cluster:Connect", + "kafka-cluster:AlterCluster", + "kafka-cluster:DescribeCluster" +], +``` + +ęŗé…ē½®ē¤ŗ例ļ¼š + +```hocon +source { + Kafka { + topic = "seatunnel" + bootstrap.servers = "xx.amazonaws.com.cn:9098,xxx.amazonaws.com.cn:9098,xxxx.amazonaws.com.cn:9098" + consumer.group = "seatunnel_group" + kafka.config = { + security.protocol=SASL_SSL + sasl.mechanism=AWS_MSK_IAM + sasl.jaas.config="software.amazon.msk.auth.iam.IAMLoginModule required;" + sasl.client.callback.handler.class="software.amazon.msk.auth.iam.IAMClientCallbackHandler" + } + } +} +``` + +### Kerberos 认čƁē¤ŗ例 + +ęŗé…ē½®ē¤ŗ例ļ¼š + +```hocon +source { + Kafka { + topic = "seatunnel" + bootstrap.servers = "127.0.0.1:9092" + consumer.group = "seatunnel_group" + kafka.config = { + security.protocol=SASL_PLAINTEXT + sasl.kerberos.service.name=kafka + sasl.mechanism=GSSAPI + java.security.krb5.conf="/etc/krb5.conf" + sasl.jaas.config="com.sun.security.auth.module.Krb5LoginModule required \n useKeyTab=true \n storeKey=true \n keyTab=\"/path/to/xxx.keytab\" \n principal=\"user@xxx.com\";" + } + } +} +``` + +### 多 Kafka ęŗē¤ŗ例 + +> ę ¹ę®äøåŒēš„ Kafka äø»é¢˜å’Œę ¼å¼č§£ęžę•°ę®ļ¼Œå¹¶åŸŗäŗŽ ID ę‰§č”Œ upsert ę“ä½œć€‚ + +```hocon +env { + execution.parallelism = 1 + job.mode = "BATCH" +} + +source { + Kafka { + + + bootstrap.servers = "kafka_e2e:9092" + table_list = [ + { + topic = "^test-ogg-sou.*" + pattern = "true" + consumer.group = "ogg_multi_group" + start_mode = earliest + schema = { + fields { + id = "int" + name = "string" + description = "string" + weight = "string" + } + }, + format = ogg_json + }, + { + topic = "test-cdc_mds" + start_mode = earliest + schema = { + fields { + id = "int" + name = "string" + description = "string" + weight = "string" + } + }, + format = canal_json + } + ] + } +} + +sink { + Jdbc { + driver = org.postgresql.Driver + url = "jdbc:postgresql://postgresql:5432/test?loggerLevel=OFF" + user = test + password = test + generate_sink_sql = true + database = test + table = public.sink + primary_keys = ["id"] + } +} +``` + +### Protobuf配ē½® + +`format` č®¾ē½®äøŗ `protobuf`ļ¼Œé…ē½®`protobuf`ę•°ę®ē»“ęž„ļ¼Œ`protobuf_message_name`和`protobuf_schema`å‚ę•° + +ä½æē”Øę ·ä¾‹ļ¼š + +```hocon +source { + Kafka { + topic = "test_protobuf_topic_fake_source" + format = protobuf + protobuf_message_name = Person + protobuf_schema = """ + syntax = "proto3"; + + package org.apache.seatunnel.format.protobuf; + + option java_outer_classname = "ProtobufE2E"; + + message Person { + int32 c_int32 = 1; + int64 c_int64 = 2; + float c_float = 3; + double c_double = 4; + bool c_bool = 5; + string c_string = 6; + bytes c_bytes = 7; + + message Address { + string street = 1; + string city = 2; + string state = 3; + string zip = 4; + } + + Address address = 8; + + map attributes = 9; + + repeated string phone_numbers = 10; + } + """ + bootstrap.servers = "kafkaCluster:9092" + start_mode = "earliest" + result_table_name = "kafka_table" + } +} +``` \ No newline at end of file diff --git a/docs/zh/connector-v2/source/Opengauss-CDC.md b/docs/zh/connector-v2/source/Opengauss-CDC.md new file mode 100644 index 00000000000..83da40b363e --- /dev/null +++ b/docs/zh/connector-v2/source/Opengauss-CDC.md @@ -0,0 +1,169 @@ +# Opengauss CDC + +> Opengauss CDCęŗčæžęŽ„å™Ø + +## ę”Æꌁčæ™äŗ›å¼•ę“Ž + +> SeaTunnel Zeta
+> Flink
+ +## äø»č¦åŠŸčƒ½ + +- [ ] [ę‰¹å¤„ē†](../../concept/connector-v2-features.md) +- [x] [ęµå¤„ē†](../../concept/connector-v2-features.md) +- [x] [ē²¾ē”®äø€ę¬”](../../concept/connector-v2-features.md) +- [ ] [åˆ—ęŠ•å½±](../../concept/connector-v2-features.md) +- [x] [å¹¶č”Œåŗ¦](../../concept/connector-v2-features.md) +- [x] [ę”Æꌁē”Øęˆ·å®šä¹‰ēš„ę‹†åˆ†](../../concept/connector-v2-features.md) + +## ꏏčæ° + +Opengauss CDCčæžęŽ„å™Ø允č®ø从Opengaussę•°ę®åŗ“čƻ取åæ«ē…§ę•°ę®å’Œå¢žé‡ę•°ę®ć€‚čæ™äøŖę–‡ę”£ęčæ°å¦‚ä½•č®¾ē½®Opengauss CDCčæžęŽ„å™Ø仄åœØOpengauss databaseäø­čæč”ŒSQLęŸ„čÆ¢ć€‚ + +## ä½æē”Øę­„éŖ¤ + +> čæ™é‡Œę˜ÆåÆē”ØOpengauss CDCēš„ę­„éŖ¤: + +1. ē”®äæwal_levelč¢«č®¾ē½®äøŗlogical, ä½ åÆ仄ē›“ꎄä½æē”ØSQLå‘½ä»¤ę„äæ®ę”¹čæ™äøŖ配ē½®: + +```sql +ALTER SYSTEM SET wal_level TO 'logical'; +SELECT pg_reload_conf(); +``` + +2. ę”¹å˜ęŒ‡å®šč”Øēš„REPLICAē­–ē•„äøŗFULL + +```sql +ALTER TABLE your_table_name REPLICA IDENTITY FULL; +``` + +å¦‚ęžœä½ ęœ‰å¾ˆå¤šč”Øļ¼Œä½ åÆ仄ä½æē”Øäø‹é¢SQLēš„ē»“ęžœé›†ę„ę”¹å˜ę‰€ęœ‰č”Øēš„REPLICAē­–ē•„ + +```sql +select 'ALTER TABLE ' || schemaname || '.' || tablename || ' REPLICA IDENTITY FULL;' from pg_tables where schemaname = 'YourTableSchema' +``` + +## ę•°ę®ē±»åž‹ę˜ å°„ + +| Opengauss Data type | SeaTunnel Data type | +|-----------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------| +| BOOL
| BOOLEAN | +| BYTEA
| BYTES | +| INT2
SMALLSERIAL
INT4
SERIAL
| INT | +| INT8
BIGSERIAL
| BIGINT | +| FLOAT4
| FLOAT | +| FLOAT8
| DOUBLE | +| NUMERIC(Get the designated column's specified column size>0) | DECIMAL(Get the designated column's specified column size,Gets the number of digits in the specified column to the right of the decimal point) | +| NUMERIC(Get the designated column's specified column size<0) | DECIMAL(38, 18) | +| BPCHAR
CHARACTER
VARCHAR
TEXT
GEOMETRY
GEOGRAPHY
JSON
JSONB | STRING | +| TIMESTAMP
| TIMESTAMP | +| TIME
| TIME | +| DATE
| DATE | +| OTHER DATA TYPES | NOT SUPPORTED YET | + +## ęŗē«ÆåÆ选锹 + +| Name | Type | Required | Default | Description | +|------------------------------------------------|------|----------|----------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| base-url | 字ē¬¦äø² | ę˜Æ | - | JDBCčæžęŽ„ēš„URL. å‚č€ƒ: `jdbc:postgresql://localhost:5432/postgres_cdc?loggerLevel=OFF`. | +| username | 字ē¬¦äø² | ę˜Æ | - | čæžęŽ„ę•°ę®åŗ“ēš„ē”Øęˆ·å | +| password | 字ē¬¦äø² | ę˜Æ | - | čæžęŽ„ę•°ę®åŗ“ēš„åƆē  | +| database-names | 列č”Ø | 否 | - | ē›‘ꎧēš„ę•°ę®åŗ“名ē§° | +| table-names | 列č”Ø | ę˜Æ | - | ē›‘ꎧēš„ę•°ę®č”Ø名ē§°. č”Øåéœ€č¦åŒ…å«ę•°ę®åŗ“名ē§°, 例如: `database_name.table_name` | +| table-names-config | 列č”Ø | 否 | - | č”Ø配ē½®ēš„列č”Ø集合. 例如: [{"table": "db1.schema1.table1","primaryKeys":["key1"]}] | +| startup.mode | Ꞛäø¾ | 否 | INITIAL | Opengauss CDCę¶ˆč“¹č€…ēš„åÆ选åÆåŠØęؔ式, ꜉ꕈēš„ęžšäø¾ę˜Æ`initial`, `earliest`, `latest` and `specific`.
`initial`: åÆåŠØę—¶åŒę­„åŽ†å²ę•°ę®ļ¼Œē„¶åŽåŒę­„å¢žé‡ę•°ę®
`earliest`: 从åÆčƒ½ēš„ęœ€ę—©åē§»é‡åÆåŠØ
`latest`: ä»Žęœ€čæ‘ēš„偏ē§»é‡åÆåŠØ
`specific`: 从ē”Øęˆ·ęŒ‡å®šēš„偏ē§»é‡åÆåŠØ | +| snapshot.split.size | ę•“åž‹ | 否 | 8096 | č”Øåæ«ē…§ēš„分割大小ļ¼ˆč”Œę•°ļ¼‰ļ¼ŒåœØčƻ取č”Øēš„åæ«ē…§ę—¶ļ¼Œę•čŽ·ēš„č”Øč¢«åˆ†å‰²ęˆå¤šäøŖsplit | +| snapshot.fetch.size | ę•“åž‹ | 否 | 1024 | čƻ取č”Øåæ«ē…§ę—¶ļ¼ŒęÆę¬”č½®čÆ¢ēš„ęœ€å¤§čƻ取大小 | +| slot.name | 字ē¬¦äø² | 否 | - | Opengaussé€»č¾‘č§£ē ę’ę§½ēš„名ē§°ļ¼ŒčÆ„ę’ę§½ę˜Æäøŗē‰¹å®šę•°ę®åŗ“/ęؔ式ēš„ē‰¹å®šę’件ēš„ęµå¼ę›“ę”¹č€Œåˆ›å»ŗēš„ć€‚ęœåŠ”å™Øä½æē”Øę­¤ę’ę§½å°†äŗ‹ä»¶ęµä¼ č¾“åˆ°ę­£åœØ配ē½®ēš„čæžęŽ„å™Øć€‚é»˜č®¤å€¼äøŗseatunnel | +| decoding.plugin.name | 字ē¬¦äø² | 否 | pgoutput | å®‰č£…åœØęœåŠ”å™ØäøŠēš„Postgresé€»č¾‘č§£ē ę’件ēš„名ē§°ļ¼Œę”Æꌁēš„值ę˜Ædecoderbufs态wal2json态wal2json_rds态wal2json_streaming态wal2json_rds_streaming和pgoutput | +| server-time-zone | 字ē¬¦äø² | 否 | UTC | ę•°ę®åŗ“ęœåŠ”å™Øäø­ēš„会čÆę—¶åŒŗć€‚å¦‚ęžœę²”ęœ‰č®¾ē½®ļ¼Œåˆ™ä½æē”ØZoneId.systemDefault()ę„ē”®å®šęœåŠ”å™Øēš„ę—¶åŒŗ | +| connect.timeout.ms | ꗶ闓闓隔 | 否 | 30000 | åœØ尝čƕčæžęŽ„ę•°ę®åŗ“ęœåŠ”å™Ø之后ļ¼ŒčæžęŽ„å™ØåœØč¶…ę—¶ä¹‹å‰åŗ”čÆ„ē­‰å¾…ēš„ęœ€å¤§ę—¶é—“ | +| connect.max-retries | ę•“åž‹ | 否 | 3 | čæžęŽ„å™ØåœØå»ŗē«‹ę•°ę®åŗ“ęœåŠ”å™ØčæžęŽ„ę—¶åŗ”čƄ重čƕēš„ęœ€å¤§ę¬”ꕰ | +| connection.pool.size | ę•“åž‹ | 否 | 20 | jdbcčæžęŽ„ę± ēš„大小 | +| chunk-key.even-distribution.factor.upper-bound | åŒęµ®ē‚¹åž‹ | 否 | 100 | chunkēš„key分åøƒå› å­ēš„äøŠē•Œć€‚čƄ因子ē”ØäŗŽē”®å®šč”Øę•°ę®ę˜Æ否均匀分åøƒć€‚å¦‚ęžœåˆ†åøƒå› å­č¢«č®”ē®—äøŗ小äŗŽęˆ–ē­‰äŗŽčæ™äøŖäøŠē•Œ(即(MAX(id) - MIN(id) + 1) /č”Œę•°)ļ¼Œč”Øēš„ę‰€ęœ‰chunkå°†č¢«ä¼˜åŒ–ä»„č¾¾åˆ°å‡åŒ€åˆ†åøƒć€‚否则ļ¼Œå¦‚ęžœåˆ†åøƒå› å­ę›“大ļ¼Œåˆ™č®¤äøŗč”Ø分åøƒäøå‡åŒ€ļ¼Œå¦‚ęžœä¼°č®”ēš„分ē‰‡ę•°é‡č¶…čæ‡`sample-sharding.threshold`ęŒ‡å®šēš„值ļ¼Œåˆ™å°†ä½æē”ØåŸŗäŗŽé‡‡ę ·ēš„分ē‰‡ē­–ē•„ć€‚é»˜č®¤å€¼äøŗ100.0怂 | +| chunk-key.even-distribution.factor.lower-bound | åŒęµ®ē‚¹åž‹ | 否 | 0.05 | chunkēš„key分åøƒå› å­ēš„äø‹ē•Œć€‚čƄ因子ē”ØäŗŽē”®å®šč”Øę•°ę®ę˜Æ否均匀分åøƒć€‚å¦‚ęžœåˆ†åøƒå› å­ēš„č®”ē®—ē»“ęžœå¤§äŗŽęˆ–ē­‰äŗŽčæ™äøŖäø‹ē•Œ(即(MAX(id) - MIN(id) + 1) /č”Œę•°)ļ¼Œé‚£ä¹ˆč”Øēš„ę‰€ęœ‰å—å°†č¢«ä¼˜åŒ–ä»„č¾¾åˆ°å‡åŒ€åˆ†åøƒć€‚否则ļ¼Œå¦‚ęžœåˆ†åøƒå› å­č¾ƒå°ļ¼Œåˆ™č®¤äøŗč”Ø分åøƒäøå‡åŒ€ļ¼Œå¦‚ęžœä¼°č®”ēš„分ē‰‡ę•°é‡č¶…čæ‡`sample-sharding.threshold`ęŒ‡å®šēš„值ļ¼Œåˆ™ä½æē”ØåŸŗäŗŽé‡‡ę ·ēš„分ē‰‡ē­–ē•„怂ē¼ŗēœå€¼äøŗ0.05怂 | +| sample-sharding.threshold | ę•“åž‹ | 否 | 1000 | ꭤ配ē½®ęŒ‡å®šäŗ†ē”ØäŗŽč§¦å‘é‡‡ę ·åˆ†ē‰‡ē­–ē•„ēš„ä¼°č®”分ē‰‡ę•°ēš„é˜ˆå€¼ć€‚å½“åˆ†åøƒå› å­č¶…å‡ŗäŗ†ē”±`chunk-key.even-distribution.factor.upper-bound `和`chunk-key.even-distribution.factor.lower-bound`ļ¼Œå¹¶äø”ä¼°č®”ēš„分ē‰‡č®”ꕰ(仄čæ‘ä¼¼ēš„č”Œę•°/å—å¤§å°č®”ē®—)超čæ‡ę­¤é˜ˆå€¼ļ¼Œåˆ™å°†ä½æē”Øę ·ęœ¬åˆ†ē‰‡ē­–ē•„怂čæ™ęœ‰åŠ©äŗŽę›“ęœ‰ę•ˆåœ°å¤„ē†å¤§åž‹ę•°ę®é›†ć€‚é»˜č®¤å€¼äøŗ1000äøŖ分ē‰‡ć€‚ | +| inverse-sampling.rate | ę•“åž‹ | 否 | 1000 | é‡‡ę ·åˆ†ē‰‡ē­–ē•„äø­ä½æē”Øēš„采꠷ēŽ‡ēš„å€’ę•°ć€‚ä¾‹å¦‚ļ¼Œå¦‚ęžœčÆ„å€¼č®¾ē½®äøŗ1000ļ¼Œåˆ™ę„å‘³ē€åœØ采꠷čæ‡ē؋äø­åŗ”ē”Øäŗ†1/1000ēš„采꠷ēŽ‡ć€‚čÆ„é€‰é”¹ęä¾›äŗ†ęŽ§åˆ¶é‡‡ę ·ē²’åŗ¦ēš„ēµę“»ę€§ļ¼Œä»Žč€Œå½±å“ęœ€ē»ˆēš„分ē‰‡ę•°é‡ć€‚当处ē†éžåøø大ēš„ę•°ę®é›†ę—¶ļ¼Œå®ƒē‰¹åˆ«ęœ‰ē”Øļ¼Œå…¶äø­é¦–é€‰č¾ƒä½Žēš„采꠷ēŽ‡ć€‚ē¼ŗēœå€¼äøŗ1000怂 | +| exactly_once | åøƒå°” | 否 | false | åÆē”Øexactly oncečƭ义 | +| format | Ꞛäø¾ | 否 | DEFAULT | Opengauss CDCåÆ选ēš„č¾“å‡ŗę ¼å¼, ꜉ꕈēš„ęžšäø¾ę˜Æ`DEFAULT`, `COMPATIBLE_DEBEZIUM_JSON`. | +| debezium | 配ē½® | 否 | - | 将 [Debeziumēš„å±žę€§](https://github.com/debezium/debezium/blob/v1.9.8.Final/documentation/modules/ROOT/pages/connectors/postgresql.adoc#connector-configuration-properties) 传递到DebeziumåµŒå…„å¼å¼•ę“Žļ¼ŒčÆ„å¼•ę“Žē”ØäŗŽę•čŽ·ę„č‡ŖOpengaussęœåŠ”ēš„ę•°ę®ę›“ę”¹ | +| common-options | | 否 | - | ęŗē ę’件通ē”Øå‚ę•°, čÆ·å‚č€ƒ[Source Common Options](../source-common-options.md)čŽ·å–čÆ¦ęƒ… | + +## 任劔ē¤ŗ例 + +### ē®€å• + +> ę”ÆęŒå¤šč”ØčÆ» + +``` + +env { + # You can set engine configuration here + execution.parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 + read_limit.bytes_per_second=7000000 + read_limit.rows_per_second=400 +} + +source { + Opengauss-CDC { + result_table_name = "customers_opengauss_cdc" + username = "gaussdb" + password = "openGauss@123" + database-names = ["opengauss_cdc"] + schema-names = ["inventory"] + table-names = ["opengauss_cdc.inventory.opengauss_cdc_table_1","opengauss_cdc.inventory.opengauss_cdc_table_2"] + base-url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc" + decoding.plugin.name = "pgoutput" + } +} + +transform { + +} + +sink { + jdbc { + source_table_name = "customers_opengauss_cdc" + url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc" + driver = "org.postgresql.Driver" + user = "dailai" + password = "openGauss@123" + + compatible_mode="postgresLow" + generate_sink_sql = true + # You need to configure both database and table + database = "opengauss_cdc" + schema = "inventory" + tablePrefix = "sink_" + primary_keys = ["id"] + } +} + +``` + +### ę”Æꌁč‡Ŗ定义äø»é”® + +``` +source { + Opengauss-CDC { + result_table_name = "customers_opengauss_cdc" + username = "gaussdb" + password = "openGauss@123" + database-names = ["opengauss_cdc"] + schema-names = ["inventory"] + table-names = ["opengauss_cdc.inventory.full_types_no_primary_key"] + base-url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF" + decoding.plugin.name = "pgoutput" + exactly_once = true + table-names-config = [ + { + table = "opengauss_cdc.inventory.full_types_no_primary_key" + primaryKeys = ["id"] + } + ] + } +} +``` + diff --git a/docs/zh/seatunnel-engine/checkpoint-storage.md b/docs/zh/seatunnel-engine/checkpoint-storage.md index 7dd26ca11f0..86165d5d3be 100644 --- a/docs/zh/seatunnel-engine/checkpoint-storage.md +++ b/docs/zh/seatunnel-engine/checkpoint-storage.md @@ -65,7 +65,6 @@ seatunnel: fs.oss.accessKeyId: your-access-key fs.oss.accessKeySecret: your-secret-key fs.oss.endpoint: endpoint address - fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider ``` ęœ‰å…³Hadoop Credential Provider APIēš„ꛓ多äæ”ęÆļ¼ŒčÆ·å‚č§: [Credential Provider API](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). diff --git a/plugin-mapping.properties b/plugin-mapping.properties index a74b9e1223e..ece3bd0c77c 100644 --- a/plugin-mapping.properties +++ b/plugin-mapping.properties @@ -85,6 +85,7 @@ seatunnel.sink.InfluxDB = connector-influxdb seatunnel.source.GoogleSheets = connector-google-sheets seatunnel.sink.GoogleFirestore = connector-google-firestore seatunnel.sink.Tablestore = connector-tablestore +seatunnel.source.Tablestore = connector-tablestore seatunnel.source.Lemlist = connector-http-lemlist seatunnel.source.Klaviyo = connector-http-klaviyo seatunnel.sink.Slack = connector-slack @@ -131,6 +132,7 @@ seatunnel.source.Milvus = connector-milvus seatunnel.sink.Milvus = connector-milvus seatunnel.sink.ActiveMQ = connector-activemq seatunnel.source.Sls = connector-sls +seatunnel.source.Opengauss-CDC = connector-cdc-opengauss seatunnel.transform.Sql = seatunnel-transforms-v2 seatunnel.transform.FieldMapper = seatunnel-transforms-v2 @@ -142,3 +144,4 @@ seatunnel.transform.Split = seatunnel-transforms-v2 seatunnel.transform.Copy = seatunnel-transforms-v2 seatunnel.transform.DynamicCompile = seatunnel-transforms-v2 seatunnel.transform.LLM = seatunnel-transforms-v2 + diff --git a/release-note.md b/release-note.md index 32067c22dfa..53f5e10cc65 100644 --- a/release-note.md +++ b/release-note.md @@ -87,6 +87,7 @@ - [zeta] dynamically replace the value of the variable at runtime (#4950) - [Zeta] Add from_unixtime function (#5462) - [zeta] Fix CDC task restore throw NPE (#5507) +- [Zeta] Fix a checkpoint storage document with OSS (#7507) ### E2E diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSink.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSink.java index 923ecff8b88..3f7f7fa9c6a 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSink.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSink.java @@ -64,6 +64,7 @@ public String getPluginName() { public SinkWriter createWriter( SinkWriter.Context context) throws IOException { Map> writers = new HashMap<>(); + Map sinkWritersContext = new HashMap<>(); for (int i = 0; i < replicaNum; i++) { for (String tableIdentifier : sinks.keySet()) { SeaTunnelSink sink = sinks.get(tableIdentifier); @@ -71,15 +72,18 @@ public SinkWriter createWri writers.put( SinkIdentifier.of(tableIdentifier, index), sink.createWriter(new SinkContextProxy(index, context))); + sinkWritersContext.put(SinkIdentifier.of(tableIdentifier, index), context); } } - return new MultiTableSinkWriter(writers, replicaNum); + return new MultiTableSinkWriter(writers, replicaNum, sinkWritersContext); } @Override public SinkWriter restoreWriter( SinkWriter.Context context, List states) throws IOException { Map> writers = new HashMap<>(); + Map sinkWritersContext = new HashMap<>(); + for (int i = 0; i < replicaNum; i++) { for (String tableIdentifier : sinks.keySet()) { SeaTunnelSink sink = sinks.get(tableIdentifier); @@ -102,9 +106,10 @@ public SinkWriter restoreWr sinkIdentifier, sink.restoreWriter(new SinkContextProxy(index, context), state)); } + sinkWritersContext.put(SinkIdentifier.of(tableIdentifier, index), context); } } - return new MultiTableSinkWriter(writers, replicaNum); + return new MultiTableSinkWriter(writers, replicaNum, sinkWritersContext); } @Override diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkWriter.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkWriter.java index 3c73435fafb..38234e220c5 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkWriter.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableSinkWriter.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.api.sink.MultiTableResourceManager; import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.sink.SupportMultiTableSinkWriter; +import org.apache.seatunnel.api.sink.event.WriterCloseEvent; import org.apache.seatunnel.api.table.event.SchemaChangeEvent; import org.apache.seatunnel.api.table.type.SeaTunnelRow; @@ -45,6 +46,7 @@ public class MultiTableSinkWriter implements SinkWriter { private final Map> sinkWriters; + private final Map sinkWritersContext; private final Map> sinkPrimaryKeys = new HashMap<>(); private final List>> sinkWritersWithIndex; private final List runnable = new ArrayList<>(); @@ -55,8 +57,11 @@ public class MultiTableSinkWriter private volatile boolean submitted = false; public MultiTableSinkWriter( - Map> sinkWriters, int queueSize) { + Map> sinkWriters, + int queueSize, + Map sinkWritersContext) { this.sinkWriters = sinkWriters; + this.sinkWritersContext = sinkWritersContext; AtomicInteger cnt = new AtomicInteger(0); executorService = Executors.newFixedThreadPool( @@ -84,6 +89,7 @@ public MultiTableSinkWriter( entry.getKey().getTableIdentifier(), entry.getValue()); sinkIdentifierMap.put(entry.getKey(), entry.getValue()); }); + sinkWritersWithIndex.add(sinkIdentifierMap); blockingQueues.add(queue); MultiTableWriterRunnable r = new MultiTableWriterRunnable(tableIdWriterMap, queue); @@ -267,26 +273,34 @@ public void abortPrepare() { @Override public void close() throws IOException { - Throwable firstE = null; + // The variables used in lambda expressions should be final or valid final, so they are + // modified to arrays + final Throwable[] firstE = {null}; try { checkQueueRemain(); } catch (Exception e) { - firstE = e; + firstE[0] = e; } executorService.shutdownNow(); for (int i = 0; i < sinkWritersWithIndex.size(); i++) { synchronized (runnable.get(i)) { - for (SinkWriter sinkWriter : - sinkWritersWithIndex.get(i).values()) { - try { - sinkWriter.close(); - } catch (Throwable e) { - if (firstE == null) { - firstE = e; - } - log.error("close error", e); - } - } + Map> sinkIdentifierSinkWriterMap = + sinkWritersWithIndex.get(i); + sinkIdentifierSinkWriterMap.forEach( + (identifier, sinkWriter) -> { + try { + sinkWriter.close(); + sinkWritersContext + .get(identifier) + .getEventListener() + .onEvent(new WriterCloseEvent()); + } catch (Throwable e) { + if (firstE[0] == null) { + firstE[0] = e; + } + log.error("close error", e); + } + }); } } try { @@ -296,8 +310,8 @@ public void close() throws IOException { } catch (Throwable e) { log.error("close resourceManager error", e); } - if (firstE != null) { - throw new RuntimeException(firstE); + if (firstE[0] != null) { + throw new RuntimeException(firstE[0]); } } diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-opengauss/pom.xml b/seatunnel-connectors-v2/connector-cdc/connector-cdc-opengauss/pom.xml new file mode 100644 index 00000000000..098c60370d4 --- /dev/null +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-opengauss/pom.xml @@ -0,0 +1,91 @@ + + + + 4.0.0 + + org.apache.seatunnel + connector-cdc + ${revision} + + + connector-cdc-opengauss + SeaTunnel : Connectors V2 : CDC : Opengauss + + + 5.1.0 + + + + + org.opengauss + opengauss-jdbc + ${opengauss.version} + + + + org.apache.seatunnel + connector-cdc-postgres + ${project.version} + + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + + + + shade + + package + + false + true + false + false + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + org.postgresql + ${seatunnel.shade.package}.org.postgresql + + + + + + + + + + diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-opengauss/src/main/java/io/debezium/connector/postgresql/connection/PostgresConnection.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-opengauss/src/main/java/io/debezium/connector/postgresql/connection/PostgresConnection.java new file mode 100644 index 00000000000..57c393acfaf --- /dev/null +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-opengauss/src/main/java/io/debezium/connector/postgresql/connection/PostgresConnection.java @@ -0,0 +1,815 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.debezium.connector.postgresql.connection; + +import org.apache.kafka.connect.errors.ConnectException; + +import org.postgresql.core.BaseConnection; +import org.postgresql.jdbc.PgConnection; +import org.postgresql.jdbc.TimestampUtils; +import org.postgresql.replication.LogSequenceNumber; +import org.postgresql.util.PGmoney; +import org.postgresql.util.PSQLState; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import io.debezium.DebeziumException; +import io.debezium.annotation.VisibleForTesting; +import io.debezium.config.Configuration; +import io.debezium.connector.postgresql.PgOid; +import io.debezium.connector.postgresql.PostgresConnectorConfig; +import io.debezium.connector.postgresql.PostgresSchema; +import io.debezium.connector.postgresql.PostgresType; +import io.debezium.connector.postgresql.PostgresValueConverter; +import io.debezium.connector.postgresql.TypeRegistry; +import io.debezium.connector.postgresql.spi.SlotState; +import io.debezium.data.SpecialValueDecimal; +import io.debezium.jdbc.JdbcConfiguration; +import io.debezium.jdbc.JdbcConnection; +import io.debezium.relational.Column; +import io.debezium.relational.ColumnEditor; +import io.debezium.relational.Table; +import io.debezium.relational.TableId; +import io.debezium.relational.Tables; +import io.debezium.schema.DatabaseSchema; +import io.debezium.util.Clock; +import io.debezium.util.Metronome; + +import java.nio.charset.Charset; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.time.Duration; +import java.util.Objects; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Copied from Debezium 1.9.8.Final. {@link JdbcConnection} connection extension used for connecting + * to Postgres instances. + * + *

Line 616 : skip validateServerVersion because the version based pg of opengauss is below 9.4 + */ +public class PostgresConnection extends JdbcConnection { + + public static final String CONNECTION_STREAMING = "Debezium Streaming"; + public static final String CONNECTION_SLOT_INFO = "Debezium Slot Info"; + public static final String CONNECTION_DROP_SLOT = "Debezium Drop Slot"; + public static final String CONNECTION_VALIDATE_CONNECTION = "Debezium Validate Connection"; + public static final String CONNECTION_HEARTBEAT = "Debezium Heartbeat"; + public static final String CONNECTION_GENERAL = "Debezium General"; + + private static Logger LOGGER = LoggerFactory.getLogger(PostgresConnection.class); + + private static final String URL_PATTERN = + "jdbc:postgresql://${" + + JdbcConfiguration.HOSTNAME + + "}:${" + + JdbcConfiguration.PORT + + "}/${" + + JdbcConfiguration.DATABASE + + "}"; + protected static final ConnectionFactory FACTORY = + JdbcConnection.patternBasedFactory( + URL_PATTERN, + org.postgresql.Driver.class.getName(), + PostgresConnection.class.getClassLoader(), + JdbcConfiguration.PORT.withDefault( + PostgresConnectorConfig.PORT.defaultValueAsString())); + + /** + * Obtaining a replication slot may fail if there's a pending transaction. We're retrying to get + * a slot for 30 min. + */ + private static final int MAX_ATTEMPTS_FOR_OBTAINING_REPLICATION_SLOT = 900; + + private static final Duration PAUSE_BETWEEN_REPLICATION_SLOT_RETRIEVAL_ATTEMPTS = + Duration.ofSeconds(2); + + private final TypeRegistry typeRegistry; + private final PostgresDefaultValueConverter defaultValueConverter; + + /** + * Creates a Postgres connection using the supplied configuration. If necessary this connection + * is able to resolve data type mappings. Such a connection requires a {@link + * PostgresValueConverter}, and will provide its own {@link TypeRegistry}. Usually only one such + * connection per connector is needed. + * + * @param config {@link Configuration} instance, may not be null. + * @param valueConverterBuilder supplies a configured {@link PostgresValueConverter} for a given + * {@link TypeRegistry} + * @param connectionUsage a symbolic name of the connection to be tracked in monitoring tools + */ + public PostgresConnection( + JdbcConfiguration config, + PostgresValueConverterBuilder valueConverterBuilder, + String connectionUsage) { + super( + addDefaultSettings(config, connectionUsage), + FACTORY, + PostgresConnection::validateServerVersion, + null, + "\"", + "\""); + + if (Objects.isNull(valueConverterBuilder)) { + this.typeRegistry = null; + this.defaultValueConverter = null; + } else { + this.typeRegistry = new TypeRegistry(this); + + final PostgresValueConverter valueConverter = + valueConverterBuilder.build(this.typeRegistry); + this.defaultValueConverter = + new PostgresDefaultValueConverter(valueConverter, this.getTimestampUtils()); + } + } + + /** + * Create a Postgres connection using the supplied configuration and {@link TypeRegistry} + * + * @param config {@link Configuration} instance, may not be null. + * @param typeRegistry an existing/already-primed {@link TypeRegistry} instance + * @param connectionUsage a symbolic name of the connection to be tracked in monitoring tools + */ + public PostgresConnection( + PostgresConnectorConfig config, TypeRegistry typeRegistry, String connectionUsage) { + super( + addDefaultSettings(config.getJdbcConfig(), connectionUsage), + FACTORY, + PostgresConnection::validateServerVersion, + null, + "\"", + "\""); + if (Objects.isNull(typeRegistry)) { + this.typeRegistry = null; + this.defaultValueConverter = null; + } else { + this.typeRegistry = typeRegistry; + final PostgresValueConverter valueConverter = + PostgresValueConverter.of(config, this.getDatabaseCharset(), typeRegistry); + this.defaultValueConverter = + new PostgresDefaultValueConverter(valueConverter, this.getTimestampUtils()); + } + } + + /** + * Creates a Postgres connection using the supplied configuration. The connector is the regular + * one without datatype resolution capabilities. + * + * @param config {@link Configuration} instance, may not be null. + * @param connectionUsage a symbolic name of the connection to be tracked in monitoring tools + */ + public PostgresConnection(JdbcConfiguration config, String connectionUsage) { + this(config, null, connectionUsage); + } + + static JdbcConfiguration addDefaultSettings( + JdbcConfiguration configuration, String connectionUsage) { + // we require Postgres 9.4 as the minimum server version since that's where logical + // replication was first introduced + return JdbcConfiguration.adapt( + configuration + .edit() + .with("assumeMinServerVersion", "9.4") + .with("ApplicationName", connectionUsage) + .build()); + } + + /** + * Returns a JDBC connection string for the current configuration. + * + * @return a {@code String} where the variables in {@code urlPattern} are replaced with values + * from the configuration + */ + public String connectionString() { + return connectionString(URL_PATTERN); + } + + /** + * Prints out information about the REPLICA IDENTITY status of a table. This in turn determines + * how much information is available for UPDATE and DELETE operations for logical replication. + * + * @param tableId the identifier of the table + * @return the replica identity information; never null + * @throws SQLException if there is a problem obtaining the replica identity information for the + * given table + */ + public ServerInfo.ReplicaIdentity readReplicaIdentityInfo(TableId tableId) throws SQLException { + String statement = + "SELECT relreplident FROM pg_catalog.pg_class c " + + "LEFT JOIN pg_catalog.pg_namespace n ON c.relnamespace=n.oid " + + "WHERE n.nspname=? and c.relname=?"; + String schema = + tableId.schema() != null && tableId.schema().length() > 0 + ? tableId.schema() + : "public"; + StringBuilder replIdentity = new StringBuilder(); + prepareQuery( + statement, + stmt -> { + stmt.setString(1, schema); + stmt.setString(2, tableId.table()); + }, + rs -> { + if (rs.next()) { + replIdentity.append(rs.getString(1)); + } else { + LOGGER.warn( + "Cannot determine REPLICA IDENTITY information for table '{}'", + tableId); + } + }); + return ServerInfo.ReplicaIdentity.parseFromDB(replIdentity.toString()); + } + + /** + * Returns the current state of the replication slot + * + * @param slotName the name of the slot + * @param pluginName the name of the plugin used for the desired slot + * @return the {@link SlotState} or null, if no slot state is found + * @throws SQLException + */ + public SlotState getReplicationSlotState(String slotName, String pluginName) + throws SQLException { + ServerInfo.ReplicationSlot slot; + try { + slot = readReplicationSlotInfo(slotName, pluginName); + if (slot.equals(ServerInfo.ReplicationSlot.INVALID)) { + return null; + } else { + return slot.asSlotState(); + } + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new ConnectException( + "Interrupted while waiting for valid replication slot info", e); + } + } + + /** + * Fetches the state of a replication stage given a slot name and plugin name + * + * @param slotName the name of the slot + * @param pluginName the name of the plugin used for the desired slot + * @return the {@link ServerInfo.ReplicationSlot} object or a {@link + * ServerInfo.ReplicationSlot#INVALID} if the slot is not valid + * @throws SQLException is thrown by the underlying JDBC + */ + private ServerInfo.ReplicationSlot fetchReplicationSlotInfo(String slotName, String pluginName) + throws SQLException { + final String database = database(); + final ServerInfo.ReplicationSlot slot = + queryForSlot( + slotName, + database, + pluginName, + rs -> { + if (rs.next()) { + boolean active = rs.getBoolean("active"); + final Lsn confirmedFlushedLsn = + parseConfirmedFlushLsn(slotName, pluginName, database, rs); + if (confirmedFlushedLsn == null) { + return null; + } + Lsn restartLsn = + parseRestartLsn(slotName, pluginName, database, rs); + if (restartLsn == null) { + return null; + } + final Long xmin = rs.getLong("catalog_xmin"); + return new ServerInfo.ReplicationSlot( + active, confirmedFlushedLsn, restartLsn, xmin); + } else { + LOGGER.debug( + "No replication slot '{}' is present for plugin '{}' and database '{}'", + slotName, + pluginName, + database); + return ServerInfo.ReplicationSlot.INVALID; + } + }); + return slot; + } + + /** + * Fetches a replication slot, repeating the query until either the slot is created or until the + * max number of attempts has been reached + * + *

To fetch the slot without the retries, use the {@link + * PostgresConnection#fetchReplicationSlotInfo} call + * + * @param slotName the slot name + * @param pluginName the name of the plugin + * @return the {@link ServerInfo.ReplicationSlot} object or a {@link + * ServerInfo.ReplicationSlot#INVALID} if the slot is not valid + * @throws SQLException is thrown by the underyling jdbc driver + * @throws InterruptedException is thrown if we don't return an answer within the set number of + * retries + */ + @VisibleForTesting + ServerInfo.ReplicationSlot readReplicationSlotInfo(String slotName, String pluginName) + throws SQLException, InterruptedException { + final String database = database(); + final Metronome metronome = + Metronome.parker(PAUSE_BETWEEN_REPLICATION_SLOT_RETRIEVAL_ATTEMPTS, Clock.SYSTEM); + + for (int attempt = 1; attempt <= MAX_ATTEMPTS_FOR_OBTAINING_REPLICATION_SLOT; attempt++) { + final ServerInfo.ReplicationSlot slot = fetchReplicationSlotInfo(slotName, pluginName); + if (slot != null) { + LOGGER.info("Obtained valid replication slot {}", slot); + return slot; + } + LOGGER.warn( + "Cannot obtain valid replication slot '{}' for plugin '{}' and database '{}' [during attempt {} out of {}, concurrent tx probably blocks taking snapshot.", + slotName, + pluginName, + database, + attempt, + MAX_ATTEMPTS_FOR_OBTAINING_REPLICATION_SLOT); + metronome.pause(); + } + + throw new ConnectException( + "Unable to obtain valid replication slot. " + + "Make sure there are no long-running transactions running in parallel as they may hinder the allocation of the replication slot when starting this connector"); + } + + protected ServerInfo.ReplicationSlot queryForSlot( + String slotName, + String database, + String pluginName, + ResultSetMapper map) + throws SQLException { + return prepareQueryAndMap( + "select * from pg_replication_slots where slot_name = ? and database = ? and plugin = ?", + statement -> { + statement.setString(1, slotName); + statement.setString(2, database); + statement.setString(3, pluginName); + }, + map); + } + + /** + * Obtains the LSN to resume streaming from. On PG 9.5 there is no confirmed_flushed_lsn yet, so + * restart_lsn will be read instead. This may result in more records to be re-read after a + * restart. + */ + private Lsn parseConfirmedFlushLsn( + String slotName, String pluginName, String database, ResultSet rs) { + Lsn confirmedFlushedLsn = null; + + try { + confirmedFlushedLsn = + tryParseLsn(slotName, pluginName, database, rs, "confirmed_flush_lsn"); + } catch (SQLException e) { + LOGGER.info("unable to find confirmed_flushed_lsn, falling back to restart_lsn"); + try { + confirmedFlushedLsn = + tryParseLsn(slotName, pluginName, database, rs, "restart_lsn"); + } catch (SQLException e2) { + throw new ConnectException( + "Neither confirmed_flush_lsn nor restart_lsn could be found"); + } + } + + return confirmedFlushedLsn; + } + + private Lsn parseRestartLsn(String slotName, String pluginName, String database, ResultSet rs) { + Lsn restartLsn = null; + try { + restartLsn = tryParseLsn(slotName, pluginName, database, rs, "restart_lsn"); + } catch (SQLException e) { + throw new ConnectException("restart_lsn could be found"); + } + + return restartLsn; + } + + private Lsn tryParseLsn( + String slotName, String pluginName, String database, ResultSet rs, String column) + throws ConnectException, SQLException { + Lsn lsn = null; + + String lsnStr = rs.getString(column); + if (lsnStr == null) { + return null; + } + try { + lsn = Lsn.valueOf(lsnStr); + } catch (Exception e) { + throw new ConnectException( + "Value " + + column + + " in the pg_replication_slots table for slot = '" + + slotName + + "', plugin = '" + + pluginName + + "', database = '" + + database + + "' is not valid. This is an abnormal situation and the database status should be checked."); + } + if (!lsn.isValid()) { + throw new ConnectException("Invalid LSN returned from database"); + } + return lsn; + } + + /** + * Drops a replication slot that was created on the DB + * + * @param slotName the name of the replication slot, may not be null + * @return {@code true} if the slot was dropped, {@code false} otherwise + */ + public boolean dropReplicationSlot(String slotName) { + final int ATTEMPTS = 3; + for (int i = 0; i < ATTEMPTS; i++) { + try { + execute("select pg_drop_replication_slot('" + slotName + "')"); + return true; + } catch (SQLException e) { + // slot is active + if (PSQLState.OBJECT_IN_USE.getState().equals(e.getSQLState())) { + if (i < ATTEMPTS - 1) { + LOGGER.debug( + "Cannot drop replication slot '{}' because it's still in use", + slotName); + } else { + LOGGER.warn( + "Cannot drop replication slot '{}' because it's still in use", + slotName); + return false; + } + } else if (PSQLState.UNDEFINED_OBJECT.getState().equals(e.getSQLState())) { + LOGGER.debug("Replication slot {} has already been dropped", slotName); + return false; + } else { + LOGGER.error("Unexpected error while attempting to drop replication slot", e); + return false; + } + } + try { + Metronome.parker(Duration.ofSeconds(1), Clock.system()).pause(); + } catch (InterruptedException e) { + } + } + return false; + } + + /** + * Drops the debezium publication that was created. + * + * @param publicationName the publication name, may not be null + * @return {@code true} if the publication was dropped, {@code false} otherwise + */ + public boolean dropPublication(String publicationName) { + try { + LOGGER.debug("Dropping publication '{}'", publicationName); + execute("DROP PUBLICATION " + publicationName); + return true; + } catch (SQLException e) { + if (PSQLState.UNDEFINED_OBJECT.getState().equals(e.getSQLState())) { + LOGGER.debug("Publication {} has already been dropped", publicationName); + } else { + LOGGER.error("Unexpected error while attempting to drop publication", e); + } + return false; + } + } + + @Override + public synchronized void close() { + try { + super.close(); + } catch (SQLException e) { + LOGGER.error("Unexpected error while closing Postgres connection", e); + } + } + + /** + * Returns the PG id of the current active transaction + * + * @return a PG transaction identifier, or null if no tx is active + * @throws SQLException if anything fails. + */ + public Long currentTransactionId() throws SQLException { + AtomicLong txId = new AtomicLong(0); + query( + "select (case pg_is_in_recovery() when 't' then 0 else txid_current() end) AS pg_current_txid", + rs -> { + if (rs.next()) { + txId.compareAndSet(0, rs.getLong(1)); + } + }); + long value = txId.get(); + return value > 0 ? value : null; + } + + /** + * Returns the current position in the server tx log. + * + * @return a long value, never negative + * @throws SQLException if anything unexpected fails. + */ + public long currentXLogLocation() throws SQLException { + AtomicLong result = new AtomicLong(0); + int majorVersion = connection().getMetaData().getDatabaseMajorVersion(); + query( + majorVersion >= 10 + ? "select (case pg_is_in_recovery() when 't' then pg_last_wal_receive_lsn() else pg_current_wal_lsn() end) AS pg_current_wal_lsn" + : "select * from pg_current_xlog_location()", + rs -> { + if (!rs.next()) { + throw new IllegalStateException( + "there should always be a valid xlog position"); + } + result.compareAndSet(0, LogSequenceNumber.valueOf(rs.getString(1)).asLong()); + }); + return result.get(); + } + + /** + * Returns information about the PG server to which this instance is connected. + * + * @return a {@link ServerInfo} instance, never {@code null} + * @throws SQLException if anything fails + */ + public ServerInfo serverInfo() throws SQLException { + ServerInfo serverInfo = new ServerInfo(); + query( + "SELECT version(), current_user, current_database()", + rs -> { + if (rs.next()) { + serverInfo + .withServer(rs.getString(1)) + .withUsername(rs.getString(2)) + .withDatabase(rs.getString(3)); + } + }); + String username = serverInfo.username(); + if (username != null) { + query( + "SELECT oid, rolname, rolsuper, rolinherit, rolcreaterole, rolcreatedb, rolcanlogin, rolreplication FROM pg_roles " + + "WHERE pg_has_role('" + + username + + "', oid, 'member')", + rs -> { + while (rs.next()) { + String roleInfo = + "superuser: " + + rs.getBoolean(3) + + ", replication: " + + rs.getBoolean(8) + + ", inherit: " + + rs.getBoolean(4) + + ", create role: " + + rs.getBoolean(5) + + ", create db: " + + rs.getBoolean(6) + + ", can log in: " + + rs.getBoolean(7); + String roleName = rs.getString(2); + serverInfo.addRole(roleName, roleInfo); + } + }); + } + return serverInfo; + } + + public Charset getDatabaseCharset() { + try { + return Charset.forName(((BaseConnection) connection()).getEncoding().name()); + } catch (SQLException e) { + throw new DebeziumException("Couldn't obtain encoding for database " + database(), e); + } + } + + public TimestampUtils getTimestampUtils() { + try { + return ((PgConnection) this.connection()).getTimestampUtils(); + } catch (SQLException e) { + throw new DebeziumException( + "Couldn't get timestamp utils from underlying connection", e); + } + } + + private static void validateServerVersion(Statement statement) throws SQLException {} + + @Override + public String quotedColumnIdString(String columnName) { + if (columnName.contains("\"")) { + columnName = columnName.replaceAll("\"", "\"\""); + } + + return super.quotedColumnIdString(columnName); + } + + @Override + protected int resolveNativeType(String typeName) { + return getTypeRegistry().get(typeName).getRootType().getOid(); + } + + @Override + protected int resolveJdbcType(int metadataJdbcType, int nativeType) { + // Special care needs to be taken for columns that use user-defined domain type data types + // where resolution of the column's JDBC type needs to be that of the root type instead of + // the actual column to properly influence schema building and value conversion. + return getTypeRegistry().get(nativeType).getRootType().getJdbcId(); + } + + @Override + protected Optional readTableColumn( + ResultSet columnMetadata, TableId tableId, Tables.ColumnNameFilter columnFilter) + throws SQLException { + return doReadTableColumn(columnMetadata, tableId, columnFilter); + } + + public Optional readColumnForDecoder( + ResultSet columnMetadata, TableId tableId, Tables.ColumnNameFilter columnNameFilter) + throws SQLException { + return doReadTableColumn(columnMetadata, tableId, columnNameFilter) + .map(ColumnEditor::create); + } + + private Optional doReadTableColumn( + ResultSet columnMetadata, TableId tableId, Tables.ColumnNameFilter columnFilter) + throws SQLException { + final String columnName = columnMetadata.getString(4); + if (columnFilter == null + || columnFilter.matches( + tableId.catalog(), tableId.schema(), tableId.table(), columnName)) { + final ColumnEditor column = Column.editor().name(columnName); + column.type(columnMetadata.getString(6)); + + // first source the length/scale from the column metadata provided by the driver + // this may be overridden below if the column type is a user-defined domain type + column.length(columnMetadata.getInt(7)); + if (columnMetadata.getObject(9) != null) { + column.scale(columnMetadata.getInt(9)); + } + + column.optional(isNullable(columnMetadata.getInt(11))); + column.position(columnMetadata.getInt(17)); + column.autoIncremented("YES".equalsIgnoreCase(columnMetadata.getString(23))); + + String autogenerated = null; + try { + autogenerated = columnMetadata.getString(24); + } catch (SQLException e) { + // ignore, some drivers don't have this index - e.g. Postgres + } + column.generated("YES".equalsIgnoreCase(autogenerated)); + + // Lookup the column type from the TypeRegistry + // For all types, we need to set the Native and Jdbc types by using the root-type + final PostgresType nativeType = getTypeRegistry().get(column.typeName()); + column.nativeType(nativeType.getRootType().getOid()); + column.jdbcType(nativeType.getRootType().getJdbcId()); + + // For domain types, the postgres driver is unable to traverse a nested unbounded + // hierarchy of types and report the right length/scale of a given type. We use + // the TypeRegistry to accomplish this since it is capable of traversing the type + // hierarchy upward to resolve length/scale regardless of hierarchy depth. + if (TypeRegistry.DOMAIN_TYPE == nativeType.getJdbcId()) { + column.length(nativeType.getDefaultLength()); + column.scale(nativeType.getDefaultScale()); + } + + final String defaultValueExpression = columnMetadata.getString(13); + if (defaultValueExpression != null + && getDefaultValueConverter().supportConversion(column.typeName())) { + column.defaultValueExpression(defaultValueExpression); + } + + return Optional.of(column); + } + + return Optional.empty(); + } + + public PostgresDefaultValueConverter getDefaultValueConverter() { + Objects.requireNonNull( + defaultValueConverter, "Connection does not provide default value converter"); + return defaultValueConverter; + } + + public TypeRegistry getTypeRegistry() { + Objects.requireNonNull(typeRegistry, "Connection does not provide type registry"); + return typeRegistry; + } + + @Override + public > Object getColumnValue( + ResultSet rs, int columnIndex, Column column, Table table, T schema) + throws SQLException { + try { + final ResultSetMetaData metaData = rs.getMetaData(); + final String columnTypeName = metaData.getColumnTypeName(columnIndex); + final PostgresType type = + ((PostgresSchema) schema).getTypeRegistry().get(columnTypeName); + + LOGGER.trace("Type of incoming data is: {}", type.getOid()); + LOGGER.trace("ColumnTypeName is: {}", columnTypeName); + LOGGER.trace("Type is: {}", type); + + if (type.isArrayType()) { + return rs.getArray(columnIndex); + } + + switch (type.getOid()) { + case PgOid.MONEY: + // TODO author=Horia Chiorean date=14/11/2016 description=workaround for + // https://github.com/pgjdbc/pgjdbc/issues/100 + final String sMoney = rs.getString(columnIndex); + if (sMoney == null) { + return sMoney; + } + if (sMoney.startsWith("-")) { + // PGmoney expects negative values to be provided in the format of + // "($XXXXX.YY)" + final String negativeMoney = "(" + sMoney.substring(1) + ")"; + return new PGmoney(negativeMoney).val; + } + return new PGmoney(sMoney).val; + case PgOid.BIT: + return rs.getString(columnIndex); + case PgOid.NUMERIC: + final String s = rs.getString(columnIndex); + if (s == null) { + return s; + } + + Optional value = PostgresValueConverter.toSpecialValue(s); + return value.isPresent() + ? value.get() + : new SpecialValueDecimal(rs.getBigDecimal(columnIndex)); + case PgOid.TIME: + // To handle time 24:00:00 supported by TIME columns, read the column as a + // string. + case PgOid.TIMETZ: + // In order to guarantee that we resolve TIMETZ columns with proper microsecond + // precision, + // read the column as a string instead and then re-parse inside the converter. + return rs.getString(columnIndex); + default: + Object x = rs.getObject(columnIndex); + if (x != null) { + LOGGER.trace( + "rs getobject returns class: {}; rs getObject value is: {}", + x.getClass(), + x); + } + return x; + } + } catch (SQLException e) { + // not a known type + return super.getColumnValue(rs, columnIndex, column, table, schema); + } + } + + @Override + protected String[] supportedTableTypes() { + return new String[] {"VIEW", "MATERIALIZED VIEW", "TABLE", "PARTITIONED TABLE"}; + } + + @Override + protected boolean isTableType(String tableType) { + return "TABLE".equals(tableType) || "PARTITIONED TABLE".equals(tableType); + } + + /** + * Retrieves all {@code TableId}s in a given database catalog, including partitioned tables. + * + * @param catalogName the catalog/database name + * @return set of all table ids for existing table objects + * @throws SQLException if a database exception occurred + */ + public Set getAllTableIds(String catalogName) throws SQLException { + return readTableNames(catalogName, null, null, new String[] {"TABLE", "PARTITIONED TABLE"}); + } + + @FunctionalInterface + public interface PostgresValueConverterBuilder { + PostgresValueConverter build(TypeRegistry registry); + } +} diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-opengauss/src/main/java/io/debezium/connector/postgresql/connection/PostgresReplicationConnection.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-opengauss/src/main/java/io/debezium/connector/postgresql/connection/PostgresReplicationConnection.java new file mode 100644 index 00000000000..c69a63c7136 --- /dev/null +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-opengauss/src/main/java/io/debezium/connector/postgresql/connection/PostgresReplicationConnection.java @@ -0,0 +1,928 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.debezium.connector.postgresql.connection; + +import org.apache.kafka.connect.errors.ConnectException; + +import org.postgresql.core.BaseConnection; +import org.postgresql.core.ServerVersion; +import org.postgresql.replication.PGReplicationStream; +import org.postgresql.replication.fluent.logical.ChainedLogicalStreamBuilder; +import org.postgresql.util.PSQLException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import io.debezium.DebeziumException; +import io.debezium.connector.postgresql.PostgresConnectorConfig; +import io.debezium.connector.postgresql.PostgresSchema; +import io.debezium.connector.postgresql.TypeRegistry; +import io.debezium.connector.postgresql.spi.SlotCreationResult; +import io.debezium.jdbc.JdbcConfiguration; +import io.debezium.jdbc.JdbcConnection; +import io.debezium.jdbc.JdbcConnectionException; +import io.debezium.relational.RelationalTableFilters; +import io.debezium.relational.TableId; +import io.debezium.util.Clock; +import io.debezium.util.Metronome; + +import java.nio.ByteBuffer; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.SQLWarning; +import java.sql.Statement; +import java.time.Duration; +import java.util.HashSet; +import java.util.LinkedHashSet; +import java.util.Optional; +import java.util.Properties; +import java.util.Set; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.stream.Collectors; + +import static java.lang.Math.toIntExact; + +/** + * Copied from Debezium 1.9.8.Final. Implementation of a {@link ReplicationConnection} for + * Postgresql. Note that replication connections in PG cannot execute regular statements but only a + * limited number of replication-related commands. + * + *

Line 179 : Modify the method named initPublication which we use the regular - i.e. not a + * replication - connection to avoid the I/O error + * + *

Line 440: Modify the method named createReplicationSlot which add logical that create the slot + * if it doesn't exist + */ +public class PostgresReplicationConnection extends JdbcConnection implements ReplicationConnection { + + private static Logger LOGGER = LoggerFactory.getLogger(PostgresReplicationConnection.class); + + private final String slotName; + private final String publicationName; + private final RelationalTableFilters tableFilter; + private final PostgresConnectorConfig.AutoCreateMode publicationAutocreateMode; + private final PostgresConnectorConfig.LogicalDecoder plugin; + private final boolean dropSlotOnClose; + private final PostgresConnectorConfig connectorConfig; + private final Duration statusUpdateInterval; + private final MessageDecoder messageDecoder; + private final PostgresConnection jdbcConnection; + private final TypeRegistry typeRegistry; + private final Properties streamParams; + + private Lsn defaultStartingPos; + private SlotCreationResult slotCreationInfo; + private boolean hasInitedSlot; + + /** + * Creates a new replication connection with the given params. + * + * @param config the JDBC configuration for the connection; may not be null + * @param slotName the name of the DB slot for logical replication; may not be null + * @param publicationName the name of the DB publication for logical replication; may not be + * null + * @param tableFilter the tables to watch of the DB publication for logical replication; may not + * be null + * @param publicationAutocreateMode the mode for publication autocreation; may not be null + * @param plugin decoder matching the server side plug-in used for streaming changes; may not be + * null + * @param dropSlotOnClose whether the replication slot should be dropped once the connection is + * closed + * @param statusUpdateInterval the interval at which the replication connection should + * periodically send status + * @param doSnapshot whether the connector is doing snapshot + * @param jdbcConnection general PostgreSQL JDBC connection + * @param typeRegistry registry with PostgreSQL types + * @param streamParams additional parameters to pass to the replication stream + * @param schema the schema; must not be null + *

updates to the server + */ + private PostgresReplicationConnection( + PostgresConnectorConfig config, + String slotName, + String publicationName, + RelationalTableFilters tableFilter, + PostgresConnectorConfig.AutoCreateMode publicationAutocreateMode, + PostgresConnectorConfig.LogicalDecoder plugin, + boolean dropSlotOnClose, + boolean doSnapshot, + Duration statusUpdateInterval, + PostgresConnection jdbcConnection, + TypeRegistry typeRegistry, + Properties streamParams, + PostgresSchema schema) { + super( + addDefaultSettings(config.getJdbcConfig()), + PostgresConnection.FACTORY, + null, + null, + "\"", + "\""); + + this.connectorConfig = config; + this.slotName = slotName; + this.publicationName = publicationName; + this.tableFilter = tableFilter; + this.publicationAutocreateMode = publicationAutocreateMode; + this.plugin = plugin; + this.dropSlotOnClose = dropSlotOnClose; + this.statusUpdateInterval = statusUpdateInterval; + this.messageDecoder = + plugin.messageDecoder(new MessageDecoderContext(config, schema), jdbcConnection); + this.jdbcConnection = jdbcConnection; + this.typeRegistry = typeRegistry; + this.streamParams = streamParams; + this.slotCreationInfo = null; + this.hasInitedSlot = false; + } + + private static JdbcConfiguration addDefaultSettings(JdbcConfiguration configuration) { + // first copy the parent's default settings... + // then set some additional replication specific settings + return JdbcConfiguration.adapt( + PostgresConnection.addDefaultSettings( + configuration, PostgresConnection.CONNECTION_STREAMING) + .edit() + .with("replication", "database") + .with( + "preferQueryMode", + "simple") // replication protocol only supports simple query mode + .build()); + } + + private ServerInfo.ReplicationSlot getSlotInfo() throws SQLException, InterruptedException { + try (PostgresConnection connection = + new PostgresConnection( + connectorConfig.getJdbcConfig(), PostgresConnection.CONNECTION_SLOT_INFO)) { + return connection.readReplicationSlotInfo(slotName, plugin.getPostgresPluginName()); + } + } + + protected void initPublication() { + String tableFilterString = null; + if (PostgresConnectorConfig.LogicalDecoder.PGOUTPUT.equals(plugin)) { + LOGGER.info("Initializing PgOutput logical decoder publication"); + try { + PostgresConnection conn = jdbcConnection; + // Unless the autocommit is disabled the SELECT publication query will stay running + conn.setAutoCommit(false); + + String selectPublication = + String.format( + "SELECT COUNT(1) FROM pg_publication WHERE pubname = '%s'", + publicationName); + conn.query( + selectPublication, + rs -> { + if (rs.next()) { + Long count = rs.getLong(1); + // Close eagerly as the transaction might stay running + if (count == 0L) { + LOGGER.info( + "Creating new publication '{}' for plugin '{}'", + publicationName, + plugin); + switch (publicationAutocreateMode) { + case DISABLED: + throw new ConnectException( + "Publication autocreation is disabled, please create one and restart the connector."); + case ALL_TABLES: + String createPublicationStmt = + String.format( + "CREATE PUBLICATION %s FOR ALL TABLES;", + publicationName); + LOGGER.info( + "Creating Publication with statement '{}'", + createPublicationStmt); + // Publication doesn't exist, create it. + conn.executeWithoutCommitting(createPublicationStmt); + break; + case FILTERED: + createOrUpdatePublicationModeFilterted( + tableFilterString, conn, false); + break; + } + } else { + switch (publicationAutocreateMode) { + case FILTERED: + createOrUpdatePublicationModeFilterted( + tableFilterString, conn, true); + break; + default: + LOGGER.trace( + "A logical publication named '{}' for plugin '{}' and database '{}' is already active on the server " + + "and will be used by the plugin", + publicationName, + plugin, + database()); + } + } + } + }); + conn.commit(); + conn.setAutoCommit(true); + } catch (SQLException e) { + throw new JdbcConnectionException(e); + } + } + } + + private void createOrUpdatePublicationModeFilterted( + String tableFilterString, PostgresConnection conn, boolean isUpdate) { + String createOrUpdatePublicationStmt; + try { + Set tablesToCapture = determineCapturedTables(); + tableFilterString = + tablesToCapture.stream() + .map(TableId::toDoubleQuotedString) + .collect(Collectors.joining(", ")); + if (tableFilterString.isEmpty()) { + throw new DebeziumException( + String.format( + "No table filters found for filtered publication %s", + publicationName)); + } + createOrUpdatePublicationStmt = + isUpdate + ? String.format( + "ALTER PUBLICATION %s SET TABLE %s;", + publicationName, tableFilterString) + : String.format( + "CREATE PUBLICATION %s FOR TABLE %s;", + publicationName, tableFilterString); + LOGGER.info( + isUpdate + ? "Updating Publication with statement '{}'" + : "Creating Publication with statement '{}'", + createOrUpdatePublicationStmt); + conn.execute(createOrUpdatePublicationStmt); + } catch (Exception e) { + throw new ConnectException( + String.format( + "Unable to %s filtered publication %s for %s", + isUpdate ? "update" : "create", publicationName, tableFilterString), + e); + } + } + + private Set determineCapturedTables() throws Exception { + Set allTableIds = jdbcConnection.getAllTableIds(connectorConfig.databaseName()); + + Set capturedTables = new HashSet<>(); + + for (TableId tableId : allTableIds) { + if (tableFilter.dataCollectionFilter().isIncluded(tableId)) { + LOGGER.trace("Adding table {} to the list of captured tables", tableId); + capturedTables.add(tableId); + } else { + LOGGER.trace( + "Ignoring table {} as it's not included in the filter configuration", + tableId); + } + } + + return capturedTables.stream() + .sorted() + .collect(Collectors.toCollection(LinkedHashSet::new)); + } + + protected void initReplicationSlot() throws SQLException, InterruptedException { + ServerInfo.ReplicationSlot slotInfo = getSlotInfo(); + + boolean shouldCreateSlot = ServerInfo.ReplicationSlot.INVALID == slotInfo; + try { + // there's no info for this plugin and slot so create a new slot + if (shouldCreateSlot) { + this.createReplicationSlot(); + } + + // replication connection does not support parsing of SQL statements so we need to + // create + // the connection without executing on connect statements - see JDBC opt + // preferQueryMode=simple + pgConnection(); + final String identifySystemStatement = "IDENTIFY_SYSTEM"; + LOGGER.debug( + "running '{}' to validate replication connection", identifySystemStatement); + final Lsn xlogStart = + queryAndMap( + identifySystemStatement, + rs -> { + if (!rs.next()) { + throw new IllegalStateException( + "The DB connection is not a valid replication connection"); + } + String xlogpos = rs.getString("xlogpos"); + LOGGER.debug("received latest xlogpos '{}'", xlogpos); + return Lsn.valueOf(xlogpos); + }); + + if (slotCreationInfo != null) { + this.defaultStartingPos = slotCreationInfo.startLsn(); + } else if (shouldCreateSlot || !slotInfo.hasValidFlushedLsn()) { + // this is a new slot or we weren't able to read a valid flush LSN pos, so we always + // start from the xlog pos that was reported + this.defaultStartingPos = xlogStart; + } else { + Lsn latestFlushedLsn = slotInfo.latestFlushedLsn(); + this.defaultStartingPos = + latestFlushedLsn.compareTo(xlogStart) < 0 ? latestFlushedLsn : xlogStart; + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("found previous flushed LSN '{}'", latestFlushedLsn); + } + } + hasInitedSlot = true; + } catch (SQLException e) { + throw new JdbcConnectionException(e); + } + } + + // Temporary replication slots is a new feature of PostgreSQL 10 + private boolean useTemporarySlot() throws SQLException { + // Temporary replication slots cannot be used due to connection restart + // when finding WAL position + // return dropSlotOnClose && pgConnection().haveMinimumServerVersion(ServerVersion.v10); + return false; + } + + /** + * creating a replication connection and starting to stream involves a few steps: 1. we create + * the connection and ensure that a. the slot exists b. the slot isn't currently being used 2. + * we query to get our potential start position in the slot (lsn) 3. we try and start streaming, + * depending on our options (such as in wal2json) this may fail, which can result in the + * connection being killed and we need to start the process over if we are using a temporary + * slot 4. actually start the streamer + * + *

This method takes care of all of these and this method queries for a default starting + * position If you know where you are starting from you should call {@link #startStreaming(Lsn, + * WalPositionLocator)}, this method delegates to that method + * + * @return + * @throws SQLException + * @throws InterruptedException + */ + @Override + public ReplicationStream startStreaming(WalPositionLocator walPosition) + throws SQLException, InterruptedException { + return startStreaming(null, walPosition); + } + + @Override + public ReplicationStream startStreaming(Lsn offset, WalPositionLocator walPosition) + throws SQLException, InterruptedException { + initConnection(); + + connect(); + if (offset == null || !offset.isValid()) { + offset = defaultStartingPos; + } + Lsn lsn = offset; + if (LOGGER.isDebugEnabled()) { + LOGGER.debug("starting streaming from LSN '{}'", lsn); + } + + final int maxRetries = connectorConfig.maxRetries(); + final Duration delay = connectorConfig.retryDelay(); + int tryCount = 0; + while (true) { + try { + return createReplicationStream(lsn, walPosition); + } catch (Exception e) { + String message = "Failed to start replication stream at " + lsn; + if (++tryCount > maxRetries) { + if (e.getMessage().matches(".*replication slot .* is active.*")) { + message += + "; when setting up multiple connectors for the same database host, please make sure to use a distinct replication slot name for each."; + } + throw new DebeziumException(message, e); + } else { + LOGGER.warn( + message + ", waiting for {} ms and retrying, attempt number {} over {}", + delay, + tryCount, + maxRetries); + final Metronome metronome = Metronome.sleeper(delay, Clock.SYSTEM); + metronome.pause(); + } + } + } + } + + @Override + public void initConnection() throws SQLException, InterruptedException { + // See https://www.postgresql.org/docs/current/logical-replication-quick-setup.html + // For pgoutput specifically, the publication must be created before the slot. + initPublication(); + if (!hasInitedSlot) { + initReplicationSlot(); + } + } + + @Override + public Optional createReplicationSlot() throws SQLException { + // note that some of these options are only supported in Postgres 9.4+, additionally + // the options are not yet exported by the jdbc api wrapper, therefore, we just do + // this ourselves but eventually this should be moved back to the jdbc API + // see https://github.com/pgjdbc/pgjdbc/issues/1305 + ServerInfo.ReplicationSlot slotInfo; + try { + slotInfo = getSlotInfo(); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + + boolean shouldCreateSlot = ServerInfo.ReplicationSlot.INVALID == slotInfo; + + if (shouldCreateSlot) { + LOGGER.debug("Creating new replication slot '{}' for plugin '{}'", slotName, plugin); + String tempPart = ""; + // Exported snapshots are supported in Postgres 9.4+ + boolean canExportSnapshot = pgConnection().haveMinimumServerVersion(ServerVersion.v9_4); + if ((dropSlotOnClose) && !canExportSnapshot) { + LOGGER.warn( + "A slot marked as temporary or with an exported snapshot was created, " + + "but not on a supported version of Postgres, ignoring!"); + } + if (useTemporarySlot()) { + tempPart = "TEMPORARY"; + } + + // See https://www.postgresql.org/docs/current/logical-replication-quick-setup.html + // For pgoutput specifically, the publication must be created prior to the slot. + initPublication(); + + try (Statement stmt = pgConnection().createStatement()) { + String createCommand = + String.format( + "CREATE_REPLICATION_SLOT \"%s\" %s LOGICAL %s", + slotName, tempPart, plugin.getPostgresPluginName()); + LOGGER.info("Creating replication slot with command {}", createCommand); + stmt.execute(createCommand); + // when we are in Postgres 9.4+, we can parse the slot creation info, + // otherwise, it returns nothing + if (canExportSnapshot) { + this.slotCreationInfo = parseSlotCreation(stmt.getResultSet()); + } + } + } + return Optional.ofNullable(slotCreationInfo); + } + + protected BaseConnection pgConnection() throws SQLException { + return (BaseConnection) connection(false); + } + + private SlotCreationResult parseSlotCreation(ResultSet rs) { + try { + if (rs.next()) { + String slotName = rs.getString("slot_name"); + String startPoint = rs.getString("consistent_point"); + String snapName = rs.getString("snapshot_name"); + String pluginName = rs.getString("output_plugin"); + + return new SlotCreationResult(slotName, startPoint, snapName, pluginName); + } else { + throw new ConnectException("No replication slot found"); + } + } catch (SQLException ex) { + throw new ConnectException("Unable to parse create_replication_slot response", ex); + } + } + + private ReplicationStream createReplicationStream( + final Lsn startLsn, WalPositionLocator walPosition) + throws SQLException, InterruptedException { + PGReplicationStream s; + + try { + try { + s = + startPgReplicationStream( + startLsn, + plugin.forceRds() + ? messageDecoder::optionsWithoutMetadata + : messageDecoder::optionsWithMetadata); + messageDecoder.setContainsMetadata(plugin.forceRds() ? false : true); + } catch (PSQLException e) { + LOGGER.debug( + "Could not register for streaming, retrying without optional options", e); + + // re-init the slot after a failed start of slot, as this + // may have closed the slot + if (useTemporarySlot()) { + initReplicationSlot(); + } + + s = + startPgReplicationStream( + startLsn, + plugin.forceRds() + ? messageDecoder::optionsWithoutMetadata + : messageDecoder::optionsWithMetadata); + messageDecoder.setContainsMetadata(plugin.forceRds() ? false : true); + } + } catch (PSQLException e) { + if (e.getMessage().matches("(?s)ERROR: option .* is unknown.*")) { + // It is possible we are connecting to an old wal2json plug-in + LOGGER.warn( + "Could not register for streaming with metadata in messages, falling back to messages without metadata"); + + // re-init the slot after a failed start of slot, as this + // may have closed the slot + if (useTemporarySlot()) { + initReplicationSlot(); + } + + s = startPgReplicationStream(startLsn, messageDecoder::optionsWithoutMetadata); + messageDecoder.setContainsMetadata(false); + } else if (e.getMessage() + .matches("(?s)ERROR: requested WAL segment .* has already been removed.*")) { + LOGGER.error("Cannot rewind to last processed WAL position", e); + throw new ConnectException( + "The offset to start reading from has been removed from the database write-ahead log. Create a new snapshot and consider setting of PostgreSQL parameter wal_keep_segments = 0."); + } else { + throw e; + } + } + + final PGReplicationStream stream = s; + + return new ReplicationStream() { + + private static final int CHECK_WARNINGS_AFTER_COUNT = 100; + private int warningCheckCounter = CHECK_WARNINGS_AFTER_COUNT; + private ExecutorService keepAliveExecutor = null; + private AtomicBoolean keepAliveRunning; + private final Metronome metronome = + Metronome.sleeper(statusUpdateInterval, Clock.SYSTEM); + + // make sure this is volatile since multiple threads may be interested in this value + private volatile Lsn lastReceivedLsn; + + @Override + public void read(ReplicationMessageProcessor processor) + throws SQLException, InterruptedException { + processWarnings(false); + ByteBuffer read = stream.read(); + final Lsn lastReceiveLsn = Lsn.valueOf(stream.getLastReceiveLSN()); + LOGGER.trace( + "Streaming requested from LSN {}, received LSN {}", + startLsn, + lastReceiveLsn); + if (messageDecoder.shouldMessageBeSkipped( + read, lastReceiveLsn, startLsn, walPosition)) { + return; + } + deserializeMessages(read, processor); + } + + @Override + public boolean readPending(ReplicationMessageProcessor processor) + throws SQLException, InterruptedException { + processWarnings(false); + ByteBuffer read = stream.readPending(); + final Lsn lastReceiveLsn = Lsn.valueOf(stream.getLastReceiveLSN()); + LOGGER.trace( + "Streaming requested from LSN {}, received LSN {}", + startLsn, + lastReceiveLsn); + + if (read == null) { + return false; + } + + if (messageDecoder.shouldMessageBeSkipped( + read, lastReceiveLsn, startLsn, walPosition)) { + return true; + } + + deserializeMessages(read, processor); + + return true; + } + + private void deserializeMessages( + ByteBuffer buffer, ReplicationMessageProcessor processor) + throws SQLException, InterruptedException { + lastReceivedLsn = Lsn.valueOf(stream.getLastReceiveLSN()); + LOGGER.trace("Received message at LSN {}", lastReceivedLsn); + messageDecoder.processMessage(buffer, processor, typeRegistry); + } + + @Override + public void close() throws SQLException { + processWarnings(true); + stream.close(); + } + + @Override + public void flushLsn(Lsn lsn) throws SQLException { + doFlushLsn(lsn); + } + + private void doFlushLsn(Lsn lsn) throws SQLException { + stream.setFlushedLSN(lsn.asLogSequenceNumber()); + stream.setAppliedLSN(lsn.asLogSequenceNumber()); + + stream.forceUpdateStatus(); + } + + @Override + public Lsn lastReceivedLsn() { + return lastReceivedLsn; + } + + @Override + public void startKeepAlive(ExecutorService service) { + if (keepAliveExecutor == null) { + keepAliveExecutor = service; + keepAliveRunning = new AtomicBoolean(true); + keepAliveExecutor.submit( + () -> { + while (keepAliveRunning.get()) { + try { + LOGGER.trace( + "Forcing status update with replication stream"); + stream.forceUpdateStatus(); + metronome.pause(); + } catch (Exception exp) { + throw new RuntimeException( + "received unexpected exception will perform keep alive", + exp); + } + } + }); + } + } + + @Override + public void stopKeepAlive() { + if (keepAliveExecutor != null) { + keepAliveRunning.set(false); + keepAliveExecutor.shutdownNow(); + keepAliveExecutor = null; + } + } + + private void processWarnings(final boolean forced) throws SQLException { + if (--warningCheckCounter == 0 || forced) { + warningCheckCounter = CHECK_WARNINGS_AFTER_COUNT; + for (SQLWarning w = connection().getWarnings(); + w != null; + w = w.getNextWarning()) { + LOGGER.debug( + "Server-side message: '{}', state = {}, code = {}", + w.getMessage(), + w.getSQLState(), + w.getErrorCode()); + } + connection().clearWarnings(); + } + } + + @Override + public Lsn startLsn() { + return startLsn; + } + }; + } + + private PGReplicationStream startPgReplicationStream( + final Lsn lsn, + BiFunction< + ChainedLogicalStreamBuilder, + Function, + ChainedLogicalStreamBuilder> + configurator) + throws SQLException { + assert lsn != null; + ChainedLogicalStreamBuilder streamBuilder = + pgConnection() + .getReplicationAPI() + .replicationStream() + .logical() + .withSlotName("\"" + slotName + "\"") + .withStartPosition(lsn.asLogSequenceNumber()) + .withSlotOptions(streamParams); + streamBuilder = configurator.apply(streamBuilder, this::hasMinimumVersion); + + if (statusUpdateInterval != null && statusUpdateInterval.toMillis() > 0) { + streamBuilder.withStatusInterval( + toIntExact(statusUpdateInterval.toMillis()), TimeUnit.MILLISECONDS); + } + + PGReplicationStream stream = streamBuilder.start(); + + // TODO DBZ-508 get rid of this + // Needed by tests when connections are opened and closed in a fast sequence + try { + Thread.sleep(10); + } catch (Exception e) { + } + stream.forceUpdateStatus(); + return stream; + } + + private Boolean hasMinimumVersion(int version) { + try { + return pgConnection().haveMinimumServerVersion(version); + } catch (SQLException e) { + throw new DebeziumException(e); + } + } + + @Override + public synchronized void close() { + close(true); + } + + public synchronized void close(boolean dropSlot) { + try { + LOGGER.debug("Closing message decoder"); + messageDecoder.close(); + } catch (Throwable e) { + LOGGER.error("Unexpected error while closing message decoder", e); + } + + try { + LOGGER.debug("Closing replication connection"); + super.close(); + } catch (Throwable e) { + LOGGER.error("Unexpected error while closing Postgres connection", e); + } + if (dropSlotOnClose && dropSlot) { + // we're dropping the replication slot via a regular - i.e. not a replication - + // connection + try (PostgresConnection connection = + new PostgresConnection( + connectorConfig.getJdbcConfig(), + PostgresConnection.CONNECTION_DROP_SLOT)) { + connection.dropReplicationSlot(slotName); + connection.dropPublication(publicationName); + } catch (Throwable e) { + LOGGER.error("Unexpected error while dropping replication slot", e); + } + } + } + + @Override + public void reconnect() throws SQLException { + close(false); + // Don't re-execute initial commands on reconnection + connection(false); + } + + protected static class ReplicationConnectionBuilder implements Builder { + + private final PostgresConnectorConfig config; + private String slotName = DEFAULT_SLOT_NAME; + private String publicationName = DEFAULT_PUBLICATION_NAME; + private RelationalTableFilters tableFilter; + private PostgresConnectorConfig.AutoCreateMode publicationAutocreateMode = + PostgresConnectorConfig.AutoCreateMode.ALL_TABLES; + private PostgresConnectorConfig.LogicalDecoder plugin = + PostgresConnectorConfig.LogicalDecoder.DECODERBUFS; + private boolean dropSlotOnClose = DEFAULT_DROP_SLOT_ON_CLOSE; + private Duration statusUpdateIntervalVal; + private boolean doSnapshot; + private TypeRegistry typeRegistry; + private PostgresSchema schema; + private Properties slotStreamParams = new Properties(); + private PostgresConnection jdbcConnection; + + protected ReplicationConnectionBuilder(PostgresConnectorConfig config) { + assert config != null; + this.config = config; + } + + @Override + public ReplicationConnectionBuilder withSlot(final String slotName) { + assert slotName != null; + this.slotName = slotName; + return this; + } + + @Override + public Builder withPublication(String publicationName) { + assert publicationName != null; + this.publicationName = publicationName; + return this; + } + + @Override + public Builder withTableFilter(RelationalTableFilters tableFilter) { + assert tableFilter != null; + this.tableFilter = tableFilter; + return this; + } + + @Override + public Builder withPublicationAutocreateMode( + PostgresConnectorConfig.AutoCreateMode publicationAutocreateMode) { + assert publicationName != null; + this.publicationAutocreateMode = publicationAutocreateMode; + return this; + } + + @Override + public ReplicationConnectionBuilder withPlugin( + final PostgresConnectorConfig.LogicalDecoder plugin) { + assert plugin != null; + this.plugin = plugin; + return this; + } + + @Override + public ReplicationConnectionBuilder dropSlotOnClose(final boolean dropSlotOnClose) { + this.dropSlotOnClose = dropSlotOnClose; + return this; + } + + @Override + public ReplicationConnectionBuilder streamParams(final String slotStreamParams) { + if (slotStreamParams != null && !slotStreamParams.isEmpty()) { + this.slotStreamParams = new Properties(); + String[] paramsWithValues = slotStreamParams.split(";"); + for (String paramsWithValue : paramsWithValues) { + String[] paramAndValue = paramsWithValue.split("="); + if (paramAndValue.length == 2) { + this.slotStreamParams.setProperty(paramAndValue[0], paramAndValue[1]); + } else { + LOGGER.warn( + "The following STREAM_PARAMS value is invalid: {}", + paramsWithValue); + } + } + } + return this; + } + + @Override + public ReplicationConnectionBuilder statusUpdateInterval( + final Duration statusUpdateInterval) { + this.statusUpdateIntervalVal = statusUpdateInterval; + return this; + } + + @Override + public Builder doSnapshot(boolean doSnapshot) { + this.doSnapshot = doSnapshot; + return this; + } + + @Override + public Builder jdbcMetadataConnection(PostgresConnection jdbcConnection) { + this.jdbcConnection = jdbcConnection; + return this; + } + + @Override + public ReplicationConnection build() { + assert plugin != null : "Decoding plugin name is not set"; + return new PostgresReplicationConnection( + config, + slotName, + publicationName, + tableFilter, + publicationAutocreateMode, + plugin, + dropSlotOnClose, + doSnapshot, + statusUpdateIntervalVal, + jdbcConnection, + typeRegistry, + slotStreamParams, + schema); + } + + @Override + public Builder withTypeRegistry(TypeRegistry typeRegistry) { + this.typeRegistry = typeRegistry; + return this; + } + + @Override + public Builder withSchema(PostgresSchema schema) { + this.schema = schema; + return this; + } + } +} diff --git a/seatunnel-connectors-v2/connector-cdc/connector-cdc-opengauss/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/opengauss/OpengaussIncrementalSourceFactory.java b/seatunnel-connectors-v2/connector-cdc/connector-cdc-opengauss/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/opengauss/OpengaussIncrementalSourceFactory.java new file mode 100644 index 00000000000..e9f552db6c0 --- /dev/null +++ b/seatunnel-connectors-v2/connector-cdc/connector-cdc-opengauss/src/main/java/org/apache/seatunnel/connectors/seatunnel/cdc/opengauss/OpengaussIncrementalSourceFactory.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.cdc.opengauss; + +import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.source.SeaTunnelSource; +import org.apache.seatunnel.api.source.SourceSplit; +import org.apache.seatunnel.api.table.catalog.CatalogOptions; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.CatalogTableUtil; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.connector.TableSource; +import org.apache.seatunnel.api.table.factory.Factory; +import org.apache.seatunnel.api.table.factory.TableSourceFactory; +import org.apache.seatunnel.api.table.factory.TableSourceFactoryContext; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.cdc.base.config.JdbcSourceTableConfig; +import org.apache.seatunnel.connectors.cdc.base.option.JdbcSourceOptions; +import org.apache.seatunnel.connectors.cdc.base.option.StartupMode; +import org.apache.seatunnel.connectors.cdc.base.utils.CatalogTableUtils; +import org.apache.seatunnel.connectors.seatunnel.cdc.postgres.option.PostgresOptions; +import org.apache.seatunnel.connectors.seatunnel.cdc.postgres.source.PostgresIncrementalSource; +import org.apache.seatunnel.connectors.seatunnel.cdc.postgres.source.PostgresSourceOptions; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.JdbcCatalogOptions; + +import com.google.auto.service.AutoService; + +import java.io.Serializable; +import java.util.List; +import java.util.Optional; + +@AutoService(Factory.class) +public class OpengaussIncrementalSourceFactory implements TableSourceFactory { + private static final String IDENTIFIER = "Opengauss-CDC"; + + @Override + public String factoryIdentifier() { + return IDENTIFIER; + } + + @Override + public OptionRule optionRule() { + return JdbcSourceOptions.getBaseRule() + .required( + JdbcSourceOptions.USERNAME, + JdbcSourceOptions.PASSWORD, + JdbcCatalogOptions.BASE_URL) + .exclusive(CatalogOptions.TABLE_NAMES, CatalogOptions.TABLE_PATTERN) + .optional( + JdbcSourceOptions.DATABASE_NAMES, + JdbcSourceOptions.SERVER_TIME_ZONE, + JdbcSourceOptions.CONNECT_TIMEOUT_MS, + JdbcSourceOptions.CONNECT_MAX_RETRIES, + JdbcSourceOptions.CONNECTION_POOL_SIZE, + PostgresOptions.DECODING_PLUGIN_NAME, + PostgresOptions.SLOT_NAME, + JdbcSourceOptions.CHUNK_KEY_EVEN_DISTRIBUTION_FACTOR_LOWER_BOUND, + JdbcSourceOptions.CHUNK_KEY_EVEN_DISTRIBUTION_FACTOR_UPPER_BOUND, + JdbcSourceOptions.SAMPLE_SHARDING_THRESHOLD, + JdbcSourceOptions.TABLE_NAMES_CONFIG) + .optional(PostgresSourceOptions.STARTUP_MODE, PostgresSourceOptions.STOP_MODE) + .conditional( + PostgresSourceOptions.STARTUP_MODE, + StartupMode.INITIAL, + JdbcSourceOptions.EXACTLY_ONCE) + .build(); + } + + @Override + public Class getSourceClass() { + return PostgresIncrementalSource.class; + } + + @Override + public + TableSource createSource(TableSourceFactoryContext context) { + return () -> { + List catalogTables = + CatalogTableUtil.getCatalogTables( + "Postgres", context.getOptions(), context.getClassLoader()); + Optional> tableConfigs = + context.getOptions().getOptional(JdbcSourceOptions.TABLE_NAMES_CONFIG); + if (tableConfigs.isPresent()) { + catalogTables = + CatalogTableUtils.mergeCatalogTableConfig( + catalogTables, tableConfigs.get(), s -> TablePath.of(s, true)); + } + SeaTunnelDataType dataType = + CatalogTableUtil.convertToMultipleRowType(catalogTables); + return (SeaTunnelSource) + new PostgresIncrementalSource<>(context.getOptions(), dataType, catalogTables); + }; + } +} diff --git a/seatunnel-connectors-v2/connector-cdc/pom.xml b/seatunnel-connectors-v2/connector-cdc/pom.xml index 44916d35caa..a422f6406d3 100644 --- a/seatunnel-connectors-v2/connector-cdc/pom.xml +++ b/seatunnel-connectors-v2/connector-cdc/pom.xml @@ -36,6 +36,7 @@ connector-cdc-mongodb connector-cdc-postgres connector-cdc-oracle + connector-cdc-opengauss diff --git a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriter.java b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriter.java index 4c9e6f47605..d83e8b5c96b 100644 --- a/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriter.java +++ b/seatunnel-connectors-v2/connector-console/src/main/java/org/apache/seatunnel/connectors/seatunnel/console/sink/ConsoleSinkWriter.java @@ -19,7 +19,6 @@ import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.sink.SupportMultiTableSinkWriter; -import org.apache.seatunnel.api.sink.event.WriterCloseEvent; import org.apache.seatunnel.api.table.event.SchemaChangeEvent; import org.apache.seatunnel.api.table.event.handler.DataTypeChangeEventDispatcher; import org.apache.seatunnel.api.table.event.handler.DataTypeChangeEventHandler; @@ -99,9 +98,7 @@ public void write(SeaTunnelRow element) { } @Override - public void close() { - context.getEventListener().onEvent(new WriterCloseEvent()); - } + public void close() {} private String fieldsInfo(SeaTunnelRowType seaTunnelRowType) { String[] fieldsInfo = new String[seaTunnelRowType.getTotalFields()]; diff --git a/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeSourceReader.java b/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeSourceReader.java index 95758cb971e..063ece63d2e 100644 --- a/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeSourceReader.java +++ b/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeSourceReader.java @@ -20,8 +20,6 @@ import org.apache.seatunnel.api.source.Boundedness; import org.apache.seatunnel.api.source.Collector; import org.apache.seatunnel.api.source.SourceReader; -import org.apache.seatunnel.api.source.event.ReaderCloseEvent; -import org.apache.seatunnel.api.source.event.ReaderOpenEvent; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.connectors.seatunnel.fake.config.FakeConfig; import org.apache.seatunnel.connectors.seatunnel.fake.config.MultipleTableFakeSourceConfig; @@ -73,14 +71,10 @@ public FakeSourceReader( } @Override - public void open() { - context.getEventListener().onEvent(new ReaderOpenEvent()); - } + public void open() {} @Override - public void close() { - context.getEventListener().onEvent(new ReaderCloseEvent()); - } + public void close() {} @Override @SuppressWarnings("MagicNumber") diff --git a/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeSourceSplitEnumerator.java index ecd6d509149..311e8183766 100644 --- a/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeSourceSplitEnumerator.java +++ b/seatunnel-connectors-v2/connector-fake/src/main/java/org/apache/seatunnel/connectors/seatunnel/fake/source/FakeSourceSplitEnumerator.java @@ -18,8 +18,6 @@ package org.apache.seatunnel.connectors.seatunnel.fake.source; import org.apache.seatunnel.api.source.SourceSplitEnumerator; -import org.apache.seatunnel.api.source.event.EnumeratorCloseEvent; -import org.apache.seatunnel.api.source.event.EnumeratorOpenEvent; import org.apache.seatunnel.connectors.seatunnel.fake.config.FakeConfig; import org.apache.seatunnel.connectors.seatunnel.fake.config.MultipleTableFakeSourceConfig; import org.apache.seatunnel.connectors.seatunnel.fake.state.FakeSourceState; @@ -58,9 +56,7 @@ public FakeSourceSplitEnumerator( } @Override - public void open() { - enumeratorContext.getEventListener().onEvent(new EnumeratorOpenEvent()); - } + public void open() {} @Override public void run() throws Exception { @@ -69,9 +65,7 @@ public void run() throws Exception { } @Override - public void close() throws IOException { - enumeratorContext.getEventListener().onEvent(new EnumeratorCloseEvent()); - } + public void close() throws IOException {} @Override public void addSplitsBack(List splits, int subtaskId) { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java index c2f2405ee00..c412ca92186 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/JdbcCatalogOptions.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.api.configuration.Option; import org.apache.seatunnel.api.configuration.Options; import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions; public interface JdbcCatalogOptions { Option BASE_URL = @@ -59,7 +60,10 @@ public interface JdbcCatalogOptions { + "For example, when using OceanBase database, you need to set it to 'mysql' or 'oracle'."); OptionRule.Builder BASE_RULE = - OptionRule.builder().required(BASE_URL).required(USERNAME, PASSWORD).optional(SCHEMA); + OptionRule.builder() + .required(BASE_URL) + .required(USERNAME, PASSWORD) + .optional(SCHEMA, JdbcOptions.DECIMAL_TYPE_NARROWING); Option TABLE_PREFIX = Options.key("tablePrefix") diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java index 5aa6dcd874d..ccbbfb509dd 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalog.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.AbstractJdbcCatalog; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils.CatalogUtils; +import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oracle.OracleTypeConverter; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oracle.OracleTypeMapper; @@ -71,13 +72,32 @@ public class OracleCatalog extends AbstractJdbcCatalog { + "ORDER BY \n" + " cols.column_id \n"; + private boolean decimalTypeNarrowing; + public OracleCatalog( String catalogName, String username, String pwd, JdbcUrlUtil.UrlInfo urlInfo, String defaultSchema) { + this( + catalogName, + username, + pwd, + urlInfo, + defaultSchema, + JdbcOptions.DECIMAL_TYPE_NARROWING.defaultValue()); + } + + public OracleCatalog( + String catalogName, + String username, + String pwd, + JdbcUrlUtil.UrlInfo urlInfo, + String defaultSchema, + boolean decimalTypeNarrowing) { super(catalogName, username, pwd, urlInfo, defaultSchema); + this.decimalTypeNarrowing = decimalTypeNarrowing; } @Override @@ -162,7 +182,7 @@ protected Column buildColumn(ResultSet resultSet) throws SQLException { .defaultValue(defaultValue) .comment(columnComment) .build(); - return OracleTypeConverter.INSTANCE.convert(typeDefine); + return new OracleTypeConverter(decimalTypeNarrowing).convert(typeDefine); } @Override @@ -183,7 +203,8 @@ private List listTables() { @Override public CatalogTable getTable(String sqlQuery) throws SQLException { Connection defaultConnection = getConnection(defaultUrl); - return CatalogUtils.getCatalogTable(defaultConnection, sqlQuery, new OracleTypeMapper()); + return CatalogUtils.getCatalogTable( + defaultConnection, sqlQuery, new OracleTypeMapper(decimalTypeNarrowing)); } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogFactory.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogFactory.java index 7c90c79347a..2b51d976212 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogFactory.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/oracle/OracleCatalogFactory.java @@ -25,6 +25,7 @@ import org.apache.seatunnel.api.table.factory.Factory; import org.apache.seatunnel.common.utils.JdbcUrlUtil; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.JdbcCatalogOptions; +import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.DatabaseIdentifier; import com.google.auto.service.AutoService; @@ -52,7 +53,8 @@ public Catalog createCatalog(String catalogName, ReadonlyConfig options) { options.get(JdbcCatalogOptions.USERNAME), options.get(JdbcCatalogOptions.PASSWORD), urlInfo, - options.get(JdbcCatalogOptions.SCHEMA)); + options.get(JdbcCatalogOptions.SCHEMA), + options.get(JdbcOptions.DECIMAL_TYPE_NARROWING)); } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcConnectionConfig.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcConnectionConfig.java index dc379bb38a5..053ab71a413 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcConnectionConfig.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcConnectionConfig.java @@ -43,6 +43,8 @@ public class JdbcConnectionConfig implements Serializable { public String xaDataSourceClassName; + public boolean decimalTypeNarrowing = JdbcOptions.DECIMAL_TYPE_NARROWING.defaultValue(); + public int maxCommitAttempts = JdbcOptions.MAX_COMMIT_ATTEMPTS.defaultValue(); public int transactionTimeoutSec = JdbcOptions.TRANSACTION_TIMEOUT_SEC.defaultValue(); @@ -81,6 +83,8 @@ public static JdbcConnectionConfig of(ReadonlyConfig config) { config.getOptional(JdbcOptions.USER).ifPresent(builder::username); config.getOptional(JdbcOptions.PASSWORD).ifPresent(builder::password); config.getOptional(JdbcOptions.PROPERTIES).ifPresent(builder::properties); + config.getOptional(JdbcOptions.DECIMAL_TYPE_NARROWING) + .ifPresent(builder::decimalTypeNarrowing); return builder.build(); } @@ -108,6 +112,10 @@ public int getMaxRetries() { return maxRetries; } + public boolean isDecimalTypeNarrowing() { + return decimalTypeNarrowing; + } + public Optional getUsername() { return Optional.ofNullable(username); } @@ -153,6 +161,7 @@ public static final class Builder { private boolean autoCommit = JdbcOptions.AUTO_COMMIT.defaultValue(); private int batchSize = JdbcOptions.BATCH_SIZE.defaultValue(); private String xaDataSourceClassName; + private boolean decimalTypeNarrowing = JdbcOptions.DECIMAL_TYPE_NARROWING.defaultValue(); private int maxCommitAttempts = JdbcOptions.MAX_COMMIT_ATTEMPTS.defaultValue(); private int transactionTimeoutSec = JdbcOptions.TRANSACTION_TIMEOUT_SEC.defaultValue(); private Map properties; @@ -183,6 +192,11 @@ public Builder connectionCheckTimeoutSeconds(int connectionCheckTimeoutSeconds) return this; } + public Builder decimalTypeNarrowing(boolean decimalTypeNarrowing) { + this.decimalTypeNarrowing = decimalTypeNarrowing; + return this; + } + public Builder maxRetries(int maxRetries) { this.maxRetries = maxRetries; return this; @@ -267,6 +281,7 @@ public JdbcConnectionConfig build() { jdbcConnectionConfig.transactionTimeoutSec = this.transactionTimeoutSec; jdbcConnectionConfig.maxCommitAttempts = this.maxCommitAttempts; jdbcConnectionConfig.xaDataSourceClassName = this.xaDataSourceClassName; + jdbcConnectionConfig.decimalTypeNarrowing = this.decimalTypeNarrowing; jdbcConnectionConfig.useKerberos = this.useKerberos; jdbcConnectionConfig.kerberosPrincipal = this.kerberosPrincipal; jdbcConnectionConfig.kerberosKeytabPath = this.kerberosKeytabPath; diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcOptions.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcOptions.java index 7f0ec48f365..976650456b0 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcOptions.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcOptions.java @@ -101,6 +101,13 @@ public interface JdbcOptions { .defaultValue(false) .withDescription("generate sql using the database table"); + Option DECIMAL_TYPE_NARROWING = + Options.key("decimal_type_narrowing") + .booleanType() + .defaultValue(true) + .withDescription( + "decimal type narrowing, if true, the decimal type will be narrowed to the int or long type if without loss of precision. Only support for Oracle at now."); + Option XA_DATA_SOURCE_CLASS_NAME = Options.key("xa_data_source_class_name") .stringType() diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceConfig.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceConfig.java index 74c78013183..09cc92d70e8 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceConfig.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/config/JdbcSourceConfig.java @@ -42,6 +42,7 @@ public class JdbcSourceConfig implements Serializable { private double splitEvenDistributionFactorLowerBound; private int splitSampleShardingThreshold; private int splitInverseSamplingRate; + private boolean decimalTypeNarrowing; public static JdbcSourceConfig of(ReadonlyConfig config) { JdbcSourceConfig.Builder builder = JdbcSourceConfig.builder(); @@ -53,7 +54,7 @@ public static JdbcSourceConfig of(ReadonlyConfig config) { boolean isOldVersion = config.getOptional(JdbcOptions.QUERY).isPresent() && config.getOptional(JdbcOptions.PARTITION_COLUMN).isPresent(); - builder.useDynamicSplitter(isOldVersion ? false : true); + builder.useDynamicSplitter(!isOldVersion); builder.splitSize(config.get(JdbcSourceOptions.SPLIT_SIZE)); builder.splitEvenDistributionFactorUpperBound( @@ -64,6 +65,8 @@ public static JdbcSourceConfig of(ReadonlyConfig config) { config.get(JdbcSourceOptions.SPLIT_SAMPLE_SHARDING_THRESHOLD)); builder.splitInverseSamplingRate(config.get(JdbcSourceOptions.SPLIT_INVERSE_SAMPLING_RATE)); + builder.decimalTypeNarrowing(config.get(JdbcOptions.DECIMAL_TYPE_NARROWING)); + config.getOptional(JdbcSourceOptions.WHERE_CONDITION) .ifPresent( whereConditionClause -> { diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverter.java index d359f3fef0d..023fa949cf1 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverter.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverter.java @@ -86,6 +86,16 @@ public class OracleTypeConverter implements TypeConverter { public static final long BYTES_4GB = (long) Math.pow(2, 32); public static final OracleTypeConverter INSTANCE = new OracleTypeConverter(); + private final boolean decimalTypeNarrowing; + + public OracleTypeConverter() { + this(true); + } + + public OracleTypeConverter(boolean decimalTypeNarrowing) { + this.decimalTypeNarrowing = decimalTypeNarrowing; + } + @Override public String identifier() { return DatabaseIdentifier.ORACLE; @@ -119,12 +129,14 @@ public Column convert(BasicTypeDefine typeDefine) { if (scale <= 0) { int newPrecision = (int) (precision - scale); - if (newPrecision == 1) { - builder.dataType(BasicType.BOOLEAN_TYPE); - } else if (newPrecision <= 9) { - builder.dataType(BasicType.INT_TYPE); - } else if (newPrecision <= 18) { - builder.dataType(BasicType.LONG_TYPE); + if (newPrecision <= 18 && decimalTypeNarrowing) { + if (newPrecision == 1) { + builder.dataType(BasicType.BOOLEAN_TYPE); + } else if (newPrecision <= 9) { + builder.dataType(BasicType.INT_TYPE); + } else { + builder.dataType(BasicType.LONG_TYPE); + } } else if (newPrecision < 38) { builder.dataType(new DecimalType(newPrecision, 0)); builder.columnLength((long) newPrecision); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeMapper.java index ce5ef8af889..bbdd19af8eb 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeMapper.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeMapper.java @@ -20,6 +20,7 @@ import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.converter.BasicTypeDefine; import org.apache.seatunnel.connectors.seatunnel.common.source.TypeDefineUtils; +import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectTypeMapper; import lombok.extern.slf4j.Slf4j; @@ -31,9 +32,19 @@ @Slf4j public class OracleTypeMapper implements JdbcDialectTypeMapper { + private final boolean decimalTypeNarrowing; + + public OracleTypeMapper() { + this(JdbcOptions.DECIMAL_TYPE_NARROWING.defaultValue()); + } + + public OracleTypeMapper(boolean decimalTypeNarrowing) { + this.decimalTypeNarrowing = decimalTypeNarrowing; + } + @Override public Column mappingColumn(BasicTypeDefine typeDefine) { - return OracleTypeConverter.INSTANCE.convert(typeDefine); + return new OracleTypeConverter(decimalTypeNarrowing).convert(typeDefine); } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java index 860131041a9..1fa379acb4c 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/utils/JdbcCatalogUtils.java @@ -34,6 +34,7 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.JdbcCatalogOptions; import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils.CatalogUtils; import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcConnectionConfig; +import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcOptions; import org.apache.seatunnel.connectors.seatunnel.jdbc.config.JdbcSourceTableConfig; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.connection.JdbcConnectionProvider; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; @@ -395,6 +396,8 @@ private static ReadonlyConfig extractCatalogConfig(JdbcConnectionConfig config) .ifPresent(val -> catalogConfig.put(JdbcCatalogOptions.PASSWORD.key(), val)); Optional.ofNullable(config.getCompatibleMode()) .ifPresent(val -> catalogConfig.put(JdbcCatalogOptions.COMPATIBLE_MODE.key(), val)); + catalogConfig.put( + JdbcOptions.DECIMAL_TYPE_NARROWING.key(), config.isDecimalTypeNarrowing()); return ReadonlyConfig.fromMap(catalogConfig); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverterTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverterTest.java index 26238bad303..d4a8defddab 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverterTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/oracle/OracleTypeConverterTest.java @@ -36,12 +36,14 @@ public class OracleTypeConverterTest { + private static final OracleTypeConverter INSTANCE = new OracleTypeConverter(); + @Test public void testConvertUnsupported() { BasicTypeDefine typeDefine = BasicTypeDefine.builder().name("test").columnType("aaa").dataType("aaa").build(); try { - OracleTypeConverter.INSTANCE.convert(typeDefine); + INSTANCE.convert(typeDefine); Assertions.fail(); } catch (SeaTunnelRuntimeException e) { // ignore @@ -50,6 +52,113 @@ public void testConvertUnsupported() { } } + @Test + public void testConvertNumberWithoutDecimalTypeNarrowing() { + OracleTypeConverter converter = new OracleTypeConverter(false); + + BasicTypeDefine typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number") + .dataType("number") + .build(); + Column column = converter.convert(typeDefine); + + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(38, 18), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number(38,127)") + .dataType("number") + .precision(38L) + .scale(127) + .build(); + column = converter.convert(typeDefine); + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(38, 18), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number") + .dataType("number") + .scale(0) + .build(); + column = converter.convert(typeDefine); + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number(1,0)") + .dataType("number") + .precision(1L) + .scale(0) + .build(); + column = converter.convert(typeDefine); + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(1, 0), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number(8,0)") + .dataType("number") + .precision(8L) + .scale(0) + .build(); + column = converter.convert(typeDefine); + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(8, 0), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number(18,0)") + .dataType("number") + .precision(18L) + .scale(0) + .build(); + column = converter.convert(typeDefine); + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(18, 0), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number(38,0)") + .dataType("number") + .precision(38L) + .scale(0) + .build(); + column = converter.convert(typeDefine); + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + typeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number(39,0)") + .dataType("number") + .precision(39L) + .scale(0) + .build(); + column = converter.convert(typeDefine); + Assertions.assertEquals(typeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); + Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + } + @Test public void testConvertInteger() { BasicTypeDefine typeDefine = @@ -58,10 +167,24 @@ public void testConvertInteger() { .columnType("integer") .dataType("integer") .build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); + + // generated by int/smallint type in oracle create table sql + BasicTypeDefine numberTypeDefine = + BasicTypeDefine.builder() + .name("test") + .columnType("number") + .dataType("number") + .precision(null) + .scale(0) + .build(); + column = INSTANCE.convert(numberTypeDefine); + Assertions.assertEquals(numberTypeDefine.getName(), column.getName()); + Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); + Assertions.assertEquals(numberTypeDefine.getColumnType(), column.getSourceType()); } @Test @@ -72,7 +195,7 @@ public void testConvertNumber() { .columnType("number") .dataType("number") .build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 18), column.getDataType()); @@ -86,7 +209,7 @@ public void testConvertNumber() { .precision(38L) .scale(127) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 18), column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -98,7 +221,7 @@ public void testConvertNumber() { .dataType("number") .scale(0) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -111,7 +234,7 @@ public void testConvertNumber() { .precision(1L) .scale(0) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.BOOLEAN_TYPE, column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -124,7 +247,7 @@ public void testConvertNumber() { .precision(8L) .scale(0) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.INT_TYPE, column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -137,7 +260,7 @@ public void testConvertNumber() { .precision(18L) .scale(0) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.LONG_TYPE, column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -150,7 +273,7 @@ public void testConvertNumber() { .precision(38L) .scale(0) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -163,7 +286,7 @@ public void testConvertNumber() { .precision(39L) .scale(0) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -177,7 +300,7 @@ public void testConvertFloat() { .columnType("float") .dataType("float") .build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 18), column.getDataType()); @@ -189,7 +312,7 @@ public void testConvertFloat() { .columnType("binary_float") .dataType("binary_float") .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.FLOAT_TYPE, column.getDataType()); @@ -197,7 +320,7 @@ public void testConvertFloat() { typeDefine = BasicTypeDefine.builder().name("test").columnType("real").dataType("real").build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.FLOAT_TYPE, column.getDataType()); @@ -212,7 +335,7 @@ public void testConvertDouble() { .columnType("binary_double") .dataType("binary_double") .build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.DOUBLE_TYPE, column.getDataType()); @@ -228,7 +351,7 @@ public void testConvertChar() { .dataType("char") .length(1L) .build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -242,7 +365,7 @@ public void testConvertChar() { .dataType("nchar") .length(1L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -256,7 +379,7 @@ public void testConvertChar() { .dataType("varchar") .length(1L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -270,7 +393,7 @@ public void testConvertChar() { .dataType("varchar2") .length(1L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -284,7 +407,7 @@ public void testConvertChar() { .dataType("nvarchar2") .length(1L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -297,7 +420,7 @@ public void testConvertChar() { .columnType("rowid") .dataType("rowid") .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -311,7 +434,7 @@ public void testConvertChar() { .dataType("xmltype") .length(1L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -325,7 +448,7 @@ public void testConvertChar() { .dataType("sys.xmltype") .length(1L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -339,7 +462,7 @@ public void testConvertChar() { .dataType("long") .length(1L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -348,7 +471,7 @@ public void testConvertChar() { typeDefine = BasicTypeDefine.builder().name("test").columnType("clob").dataType("clob").build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -361,7 +484,7 @@ public void testConvertChar() { .columnType("nclob") .dataType("nclob") .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.STRING_TYPE, column.getDataType()); @@ -373,7 +496,7 @@ public void testConvertChar() { public void testConvertBytes() { BasicTypeDefine typeDefine = BasicTypeDefine.builder().name("test").columnType("blob").dataType("blob").build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(PrimitiveByteArrayType.INSTANCE, column.getDataType()); @@ -382,7 +505,7 @@ public void testConvertBytes() { typeDefine = BasicTypeDefine.builder().name("test").columnType("raw").dataType("raw").build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(PrimitiveByteArrayType.INSTANCE, column.getDataType()); @@ -396,7 +519,7 @@ public void testConvertBytes() { .dataType("raw") .length(10L) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(PrimitiveByteArrayType.INSTANCE, column.getDataType()); @@ -409,7 +532,7 @@ public void testConvertBytes() { .columnType("long raw") .dataType("long raw") .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(PrimitiveByteArrayType.INSTANCE, column.getDataType()); @@ -421,7 +544,7 @@ public void testConvertBytes() { public void testConvertDatetime() { BasicTypeDefine typeDefine = BasicTypeDefine.builder().name("test").columnType("date").dataType("date").build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(LocalTimeType.LOCAL_DATE_TIME_TYPE, column.getDataType()); @@ -435,7 +558,7 @@ public void testConvertDatetime() { .dataType("timestamp") .scale(6) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(LocalTimeType.LOCAL_DATE_TIME_TYPE, column.getDataType()); @@ -449,7 +572,7 @@ public void testConvertDatetime() { .dataType("timestamp with time zone") .scale(6) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(LocalTimeType.LOCAL_DATE_TIME_TYPE, column.getDataType()); @@ -463,7 +586,7 @@ public void testConvertDatetime() { .dataType("timestamp with local time zone") .scale(6) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(LocalTimeType.LOCAL_DATE_TIME_TYPE, column.getDataType()); @@ -482,7 +605,7 @@ public void testReconvertUnsupported() { null, null); try { - OracleTypeConverter.INSTANCE.reconvert(column); + INSTANCE.reconvert(column); Assertions.fail(); } catch (SeaTunnelRuntimeException e) { // ignore @@ -496,7 +619,7 @@ public void testReconvertBoolean() { Column column = PhysicalColumn.builder().name("test").dataType(BasicType.BOOLEAN_TYPE).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format("%s(%s)", OracleTypeConverter.ORACLE_NUMBER, 1), @@ -509,7 +632,7 @@ public void testReconvertBoolean() { public void testReconvertByte() { Column column = PhysicalColumn.builder().name("test").dataType(BasicType.BYTE_TYPE).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getDataType()); @@ -520,7 +643,7 @@ public void testReconvertShort() { Column column = PhysicalColumn.builder().name("test").dataType(BasicType.SHORT_TYPE).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getDataType()); @@ -530,7 +653,7 @@ public void testReconvertShort() { public void testReconvertInt() { Column column = PhysicalColumn.builder().name("test").dataType(BasicType.INT_TYPE).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getDataType()); @@ -540,7 +663,7 @@ public void testReconvertInt() { public void testReconvertLong() { Column column = PhysicalColumn.builder().name("test").dataType(BasicType.LONG_TYPE).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_INTEGER, typeDefine.getDataType()); @@ -551,7 +674,7 @@ public void testReconvertFloat() { Column column = PhysicalColumn.builder().name("test").dataType(BasicType.FLOAT_TYPE).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( OracleTypeConverter.ORACLE_BINARY_FLOAT, typeDefine.getColumnType()); @@ -563,7 +686,7 @@ public void testReconvertDouble() { Column column = PhysicalColumn.builder().name("test").dataType(BasicType.DOUBLE_TYPE).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( OracleTypeConverter.ORACLE_BINARY_DOUBLE, typeDefine.getColumnType()); @@ -575,7 +698,7 @@ public void testReconvertDecimal() { Column column = PhysicalColumn.builder().name("test").dataType(new DecimalType(0, 0)).build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format( @@ -588,7 +711,7 @@ public void testReconvertDecimal() { column = PhysicalColumn.builder().name("test").dataType(new DecimalType(10, 2)).build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format("%s(%s,%s)", OracleTypeConverter.ORACLE_NUMBER, 10, 2), @@ -605,7 +728,7 @@ public void testReconvertBytes() { .columnLength(null) .build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_BLOB, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_BLOB, typeDefine.getDataType()); @@ -617,7 +740,7 @@ public void testReconvertBytes() { .columnLength(2000L) .build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format("%s(%s)", OracleTypeConverter.ORACLE_RAW, column.getColumnLength()), @@ -631,7 +754,7 @@ public void testReconvertBytes() { .columnLength(BYTES_2GB) .build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_BLOB, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_BLOB, typeDefine.getDataType()); @@ -643,7 +766,7 @@ public void testReconvertBytes() { .columnLength(BYTES_2GB + 1) .build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_BLOB, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_BLOB, typeDefine.getDataType()); @@ -658,7 +781,7 @@ public void testReconvertString() { .columnLength(null) .build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals("VARCHAR2(4000)", typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_VARCHAR2, typeDefine.getDataType()); @@ -670,7 +793,7 @@ public void testReconvertString() { .columnLength(2000L) .build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format( @@ -685,7 +808,7 @@ public void testReconvertString() { .columnLength(4000L) .build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format( @@ -700,7 +823,7 @@ public void testReconvertString() { .columnLength(40001L) .build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_CLOB, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_CLOB, typeDefine.getDataType()); @@ -714,7 +837,7 @@ public void testReconvertDate() { .dataType(LocalTimeType.LOCAL_DATE_TYPE) .build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals(OracleTypeConverter.ORACLE_DATE, typeDefine.getColumnType()); Assertions.assertEquals(OracleTypeConverter.ORACLE_DATE, typeDefine.getDataType()); @@ -728,7 +851,7 @@ public void testReconvertDatetime() { .dataType(LocalTimeType.LOCAL_DATE_TIME_TYPE) .build(); - BasicTypeDefine typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + BasicTypeDefine typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( OracleTypeConverter.ORACLE_TIMESTAMP_WITH_LOCAL_TIME_ZONE, @@ -744,7 +867,7 @@ public void testReconvertDatetime() { .scale(3) .build(); - typeDefine = OracleTypeConverter.INSTANCE.reconvert(column); + typeDefine = INSTANCE.reconvert(column); Assertions.assertEquals(column.getName(), typeDefine.getName()); Assertions.assertEquals( String.format("TIMESTAMP(%s) WITH LOCAL TIME ZONE", column.getScale()), @@ -765,7 +888,7 @@ public void testNumberWithNegativeScale() { .precision(38L) .scale(-1) .build(); - Column column = OracleTypeConverter.INSTANCE.convert(typeDefine); + Column column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(38, 0), column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -778,7 +901,7 @@ public void testNumberWithNegativeScale() { .precision(5L) .scale(-2) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.INT_TYPE, column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -791,7 +914,7 @@ public void testNumberWithNegativeScale() { .precision(9L) .scale(-2) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(BasicType.LONG_TYPE, column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); @@ -804,7 +927,7 @@ public void testNumberWithNegativeScale() { .precision(14L) .scale(-11) .build(); - column = OracleTypeConverter.INSTANCE.convert(typeDefine); + column = INSTANCE.convert(typeDefine); Assertions.assertEquals(typeDefine.getName(), column.getName()); Assertions.assertEquals(new DecimalType(25, 0), column.getDataType()); Assertions.assertEquals(typeDefine.getColumnType(), column.getSourceType()); diff --git a/seatunnel-connectors-v2/connector-kafka/pom.xml b/seatunnel-connectors-v2/connector-kafka/pom.xml index 3251de5a8e7..9ffff7dbc0c 100644 --- a/seatunnel-connectors-v2/connector-kafka/pom.xml +++ b/seatunnel-connectors-v2/connector-kafka/pom.xml @@ -77,6 +77,11 @@ seatunnel-format-avro ${project.version} + + org.apache.seatunnel + seatunnel-format-protobuf + ${project.version} + diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/Config.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/Config.java index 6d6f43dc4c2..a907c9bc212 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/Config.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/Config.java @@ -177,10 +177,24 @@ public class Config { .defaultValue(KafkaSemantics.NON) .withDescription( "Semantics that can be chosen EXACTLY_ONCE/AT_LEAST_ONCE/NON, default NON."); + public static final Option>> TABLE_LIST = Options.key("table_list") .type(new TypeReference>>() {}) .noDefaultValue() .withDescription( "Topic list config. You can configure only one `table_list` or one `topic` at the same time"); + + public static final Option PROTOBUF_SCHEMA = + Options.key("protobuf_schema") + .stringType() + .noDefaultValue() + .withDescription( + "Data serialization method protobuf metadata, used to parse protobuf data."); + + public static final Option PROTOBUF_MESSAGE_NAME = + Options.key("protobuf_message_name") + .stringType() + .noDefaultValue() + .withDescription("Parsing entity class names from Protobuf data."); } diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/MessageFormat.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/MessageFormat.java index f02cebcbe35..a877c76c37d 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/MessageFormat.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/config/MessageFormat.java @@ -26,5 +26,6 @@ public enum MessageFormat { COMPATIBLE_KAFKA_CONNECT_JSON, OGG_JSON, AVRO, - MAXWELL_JSON + MAXWELL_JSON, + PROTOBUF } diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/serialize/DefaultSeaTunnelRowSerializer.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/serialize/DefaultSeaTunnelRowSerializer.java index d4a77e74b93..2fb251571f1 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/serialize/DefaultSeaTunnelRowSerializer.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/serialize/DefaultSeaTunnelRowSerializer.java @@ -17,6 +17,7 @@ package org.apache.seatunnel.connectors.seatunnel.kafka.serialize; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.serialization.SerializationSchema; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; @@ -33,6 +34,7 @@ import org.apache.seatunnel.format.json.exception.SeaTunnelJsonFormatException; import org.apache.seatunnel.format.json.maxwell.MaxWellJsonSerializationSchema; import org.apache.seatunnel.format.json.ogg.OggJsonSerializationSchema; +import org.apache.seatunnel.format.protobuf.ProtobufSerializationSchema; import org.apache.seatunnel.format.text.TextSerializationSchema; import org.apache.kafka.clients.producer.ProducerRecord; @@ -46,6 +48,9 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import static org.apache.seatunnel.connectors.seatunnel.kafka.config.Config.PROTOBUF_MESSAGE_NAME; +import static org.apache.seatunnel.connectors.seatunnel.kafka.config.Config.PROTOBUF_SCHEMA; + @RequiredArgsConstructor public class DefaultSeaTunnelRowSerializer implements SeaTunnelRowSerializer { private final Function topicExtractor; @@ -67,13 +72,17 @@ public ProducerRecord serializeRow(SeaTunnelRow row) { } public static DefaultSeaTunnelRowSerializer create( - String topic, SeaTunnelRowType rowType, MessageFormat format, String delimiter) { + String topic, + SeaTunnelRowType rowType, + MessageFormat format, + String delimiter, + ReadonlyConfig pluginConfig) { return new DefaultSeaTunnelRowSerializer( topicExtractor(topic, rowType, format), partitionExtractor(null), timestampExtractor(), - keyExtractor(null, rowType, format, delimiter), - valueExtractor(rowType, format, delimiter), + keyExtractor(null, rowType, format, delimiter, pluginConfig), + valueExtractor(rowType, format, delimiter, pluginConfig), headersExtractor()); } @@ -82,13 +91,14 @@ public static DefaultSeaTunnelRowSerializer create( Integer partition, SeaTunnelRowType rowType, MessageFormat format, - String delimiter) { + String delimiter, + ReadonlyConfig pluginConfig) { return new DefaultSeaTunnelRowSerializer( topicExtractor(topic, rowType, format), partitionExtractor(partition), timestampExtractor(), - keyExtractor(null, rowType, format, delimiter), - valueExtractor(rowType, format, delimiter), + keyExtractor(null, rowType, format, delimiter, pluginConfig), + valueExtractor(rowType, format, delimiter, pluginConfig), headersExtractor()); } @@ -97,13 +107,14 @@ public static DefaultSeaTunnelRowSerializer create( List keyFields, SeaTunnelRowType rowType, MessageFormat format, - String delimiter) { + String delimiter, + ReadonlyConfig pluginConfig) { return new DefaultSeaTunnelRowSerializer( topicExtractor(topic, rowType, format), partitionExtractor(null), timestampExtractor(), - keyExtractor(keyFields, rowType, format, delimiter), - valueExtractor(rowType, format, delimiter), + keyExtractor(keyFields, rowType, format, delimiter, pluginConfig), + valueExtractor(rowType, format, delimiter, pluginConfig), headersExtractor()); } @@ -157,7 +168,8 @@ private static Function keyExtractor( List keyFields, SeaTunnelRowType rowType, MessageFormat format, - String delimiter) { + String delimiter, + ReadonlyConfig pluginConfig) { if (MessageFormat.COMPATIBLE_DEBEZIUM_JSON.equals(format)) { CompatibleDebeziumJsonSerializationSchema serializationSchema = new CompatibleDebeziumJsonSerializationSchema(rowType, true); @@ -172,14 +184,17 @@ private static Function keyExtractor( Function keyRowExtractor = createKeyRowExtractor(keyType, rowType); SerializationSchema serializationSchema = - createSerializationSchema(keyType, format, delimiter, true); + createSerializationSchema(keyType, format, delimiter, true, pluginConfig); return row -> serializationSchema.serialize(keyRowExtractor.apply(row)); } private static Function valueExtractor( - SeaTunnelRowType rowType, MessageFormat format, String delimiter) { + SeaTunnelRowType rowType, + MessageFormat format, + String delimiter, + ReadonlyConfig pluginConfig) { SerializationSchema serializationSchema = - createSerializationSchema(rowType, format, delimiter, false); + createSerializationSchema(rowType, format, delimiter, false, pluginConfig); return row -> serializationSchema.serialize(row); } @@ -212,7 +227,11 @@ private static Function createKeyRowExtractor( } private static SerializationSchema createSerializationSchema( - SeaTunnelRowType rowType, MessageFormat format, String delimiter, boolean isKey) { + SeaTunnelRowType rowType, + MessageFormat format, + String delimiter, + boolean isKey, + ReadonlyConfig pluginConfig) { switch (format) { case JSON: return new JsonSerializationSchema(rowType); @@ -233,6 +252,11 @@ private static SerializationSchema createSerializationSchema( return new CompatibleDebeziumJsonSerializationSchema(rowType, isKey); case AVRO: return new AvroSerializationSchema(rowType); + case PROTOBUF: + String protobufMessageName = pluginConfig.get(PROTOBUF_MESSAGE_NAME); + String protobufSchema = pluginConfig.get(PROTOBUF_SCHEMA); + return new ProtobufSerializationSchema( + rowType, protobufMessageName, protobufSchema); default: throw new SeaTunnelJsonFormatException( CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE, diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaSinkWriter.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaSinkWriter.java index 44676f29d4c..6639a34a0bd 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaSinkWriter.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaSinkWriter.java @@ -188,15 +188,21 @@ private SeaTunnelRowSerializer getSerializer( getPartitionKeyFields(pluginConfig, seaTunnelRowType), seaTunnelRowType, messageFormat, - delimiter); + delimiter, + pluginConfig); } if (pluginConfig.get(PARTITION) != null) { return DefaultSeaTunnelRowSerializer.create( - topic, pluginConfig.get(PARTITION), seaTunnelRowType, messageFormat, delimiter); + topic, + pluginConfig.get(PARTITION), + seaTunnelRowType, + messageFormat, + delimiter, + pluginConfig); } // By default, all partitions are sent randomly return DefaultSeaTunnelRowSerializer.create( - topic, Arrays.asList(), seaTunnelRowType, messageFormat, delimiter); + topic, Arrays.asList(), seaTunnelRowType, messageFormat, delimiter, pluginConfig); } private KafkaSemantics getKafkaSemantics(ReadonlyConfig pluginConfig) { diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceConfig.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceConfig.java index 1c782ca6ab7..935f8d3b849 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceConfig.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/source/KafkaSourceConfig.java @@ -44,6 +44,7 @@ import org.apache.seatunnel.format.json.exception.SeaTunnelJsonFormatException; import org.apache.seatunnel.format.json.maxwell.MaxWellJsonDeserializationSchema; import org.apache.seatunnel.format.json.ogg.OggJsonDeserializationSchema; +import org.apache.seatunnel.format.protobuf.ProtobufDeserializationSchema; import org.apache.seatunnel.format.text.TextDeserializationSchema; import org.apache.seatunnel.format.text.constant.TextFormatConstant; @@ -71,6 +72,8 @@ import static org.apache.seatunnel.connectors.seatunnel.kafka.config.Config.KEY_PARTITION_DISCOVERY_INTERVAL_MILLIS; import static org.apache.seatunnel.connectors.seatunnel.kafka.config.Config.MESSAGE_FORMAT_ERROR_HANDLE_WAY_OPTION; import static org.apache.seatunnel.connectors.seatunnel.kafka.config.Config.PATTERN; +import static org.apache.seatunnel.connectors.seatunnel.kafka.config.Config.PROTOBUF_MESSAGE_NAME; +import static org.apache.seatunnel.connectors.seatunnel.kafka.config.Config.PROTOBUF_SCHEMA; import static org.apache.seatunnel.connectors.seatunnel.kafka.config.Config.START_MODE; import static org.apache.seatunnel.connectors.seatunnel.kafka.config.Config.START_MODE_OFFSETS; import static org.apache.seatunnel.connectors.seatunnel.kafka.config.Config.START_MODE_TIMESTAMP; @@ -216,7 +219,15 @@ private CatalogTable createCatalogTable(ReadonlyConfig readonlyConfig) { return CatalogTable.of( TableIdentifier.of("", tablePath), tableSchema, - Collections.emptyMap(), + new HashMap() { + { + Optional.ofNullable(readonlyConfig.get(PROTOBUF_MESSAGE_NAME)) + .ifPresent(value -> put(PROTOBUF_MESSAGE_NAME.key(), value)); + + Optional.ofNullable(readonlyConfig.get(PROTOBUF_SCHEMA)) + .ifPresent(value -> put(PROTOBUF_SCHEMA.key(), value)); + } + }, Collections.emptyList(), null); } @@ -225,6 +236,8 @@ private DeserializationSchema createDeserializationSchema( CatalogTable catalogTable, ReadonlyConfig readonlyConfig) { SeaTunnelRowType seaTunnelRowType = catalogTable.getSeaTunnelRowType(); + MessageFormat format = readonlyConfig.get(FORMAT); + if (!readonlyConfig.getOptional(TableSchemaOptions.SCHEMA).isPresent()) { return TextDeserializationSchema.builder() .seaTunnelRowType(seaTunnelRowType) @@ -233,7 +246,6 @@ private DeserializationSchema createDeserializationSchema( .build(); } - MessageFormat format = readonlyConfig.get(FORMAT); switch (format) { case JSON: return new JsonDeserializationSchema(catalogTable, false, false); @@ -270,6 +282,8 @@ private DeserializationSchema createDeserializationSchema( return new DebeziumJsonDeserializationSchema(catalogTable, true, includeSchema); case AVRO: return new AvroDeserializationSchema(catalogTable); + case PROTOBUF: + return new ProtobufDeserializationSchema(catalogTable); default: throw new SeaTunnelJsonFormatException( CommonErrorCodeDeprecated.UNSUPPORTED_DATA_TYPE, diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/config/TablestoreOptions.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/config/TablestoreOptions.java index 7b2aa6bae67..be121818932 100644 --- a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/config/TablestoreOptions.java +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/config/TablestoreOptions.java @@ -19,11 +19,14 @@ import org.apache.seatunnel.shade.com.typesafe.config.Config; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; + import lombok.AllArgsConstructor; import lombok.Data; import java.io.Serializable; import java.util.List; +import java.util.Map; import static org.apache.seatunnel.connectors.seatunnel.tablestore.config.TablestoreConfig.BATCH_SIZE; @@ -45,6 +48,8 @@ public class TablestoreOptions implements Serializable { public int batchSize = Integer.parseInt(BATCH_SIZE.defaultValue()); + public TablestoreOptions() {} + public TablestoreOptions(Config config) { this.endpoint = config.getString(TablestoreConfig.END_POINT.key()); this.instanceName = config.getString(TablestoreConfig.INSTANCE_NAME.key()); @@ -57,4 +62,18 @@ public TablestoreOptions(Config config) { this.batchSize = config.getInt(BATCH_SIZE.key()); } } + + public static TablestoreOptions of(ReadonlyConfig config) { + Map map = config.getSourceMap(); + TablestoreOptions tablestoreOptions = new TablestoreOptions(); + tablestoreOptions.setEndpoint(config.get(TablestoreConfig.END_POINT)); + tablestoreOptions.setInstanceName(config.get(TablestoreConfig.INSTANCE_NAME)); + tablestoreOptions.setAccessKeyId(config.get(TablestoreConfig.ACCESS_KEY_ID)); + tablestoreOptions.setAccessKeySecret(config.get(TablestoreConfig.ACCESS_KEY_SECRET)); + tablestoreOptions.setTable(config.get(TablestoreConfig.TABLE)); + List keys = (List) map.get(TablestoreConfig.PRIMARY_KEYS.key()); + + tablestoreOptions.setPrimaryKeys(keys); + return tablestoreOptions; + } } diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/serialize/DefaultSeaTunnelRowDeserializer.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/serialize/DefaultSeaTunnelRowDeserializer.java new file mode 100644 index 00000000000..9bdb060a49d --- /dev/null +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/serialize/DefaultSeaTunnelRowDeserializer.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.connectors.seatunnel.tablestore.serialize; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; + +import com.alicloud.openservices.tablestore.model.StreamRecord; + +import java.util.ArrayList; +import java.util.List; + +public class DefaultSeaTunnelRowDeserializer implements SeaTunnelRowDeserializer { + + @Override + public SeaTunnelRow deserialize(StreamRecord r) { + List fields = new ArrayList<>(); + r.getColumns() + .forEach( + k -> { + fields.add(k.getColumn().getValue()); + }); + return new SeaTunnelRow(fields.toArray()); + } +} diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/serialize/SeaTunnelRowDeserializer.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/serialize/SeaTunnelRowDeserializer.java new file mode 100644 index 00000000000..44a2560693f --- /dev/null +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/serialize/SeaTunnelRowDeserializer.java @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.connectors.seatunnel.tablestore.serialize; + +import org.apache.seatunnel.api.table.type.SeaTunnelRow; + +import com.alicloud.openservices.tablestore.model.StreamRecord; + +public interface SeaTunnelRowDeserializer { + + SeaTunnelRow deserialize(StreamRecord streamRecord); +} diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSource.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSource.java new file mode 100644 index 00000000000..85c0062ed32 --- /dev/null +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSource.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.connectors.seatunnel.tablestore.source; + +import org.apache.seatunnel.api.common.JobContext; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; +import org.apache.seatunnel.api.source.Boundedness; +import org.apache.seatunnel.api.source.SeaTunnelSource; +import org.apache.seatunnel.api.source.SourceReader; +import org.apache.seatunnel.api.source.SourceReader.Context; +import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.api.source.SupportColumnProjection; +import org.apache.seatunnel.api.source.SupportParallelism; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.CatalogTableUtil; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.common.constants.JobMode; +import org.apache.seatunnel.connectors.seatunnel.tablestore.config.TablestoreOptions; + +import lombok.extern.slf4j.Slf4j; + +import java.util.List; + +@Slf4j +public class TableStoreDBSource + implements SeaTunnelSource, + SupportParallelism, + SupportColumnProjection { + + private TablestoreOptions tablestoreOptions; + private SeaTunnelRowType typeInfo; + private JobContext jobContext; + + @Override + public String getPluginName() { + return "Tablestore"; + } + + @Override + public List getProducedCatalogTables() { + return SeaTunnelSource.super.getProducedCatalogTables(); + } + + public TableStoreDBSource(ReadonlyConfig config) { + this.tablestoreOptions = TablestoreOptions.of(config); + CatalogTableUtil.buildWithConfig(config); + this.typeInfo = CatalogTableUtil.buildWithConfig(config).getSeaTunnelRowType(); + } + + @Override + public Boundedness getBoundedness() { + return JobMode.BATCH.equals(jobContext.getJobMode()) + ? Boundedness.BOUNDED + : Boundedness.UNBOUNDED; + } + + @Override + public SourceReader createReader(Context readerContext) + throws Exception { + return new TableStoreDBSourceReader(readerContext, tablestoreOptions, typeInfo); + } + + @Override + public SourceSplitEnumerator createEnumerator( + org.apache.seatunnel.api.source.SourceSplitEnumerator.Context + enumeratorContext) + throws Exception { + return new TableStoreDBSourceSplitEnumerator(enumeratorContext, tablestoreOptions); + } + + @Override + public SourceSplitEnumerator + restoreEnumerator( + org.apache.seatunnel.api.source.SourceSplitEnumerator.Context< + TableStoreDBSourceSplit> + enumeratorContext, + TableStoreDBSourceState checkpointState) + throws Exception { + return new TableStoreDBSourceSplitEnumerator( + enumeratorContext, tablestoreOptions, checkpointState); + } + + @Override + public void setJobContext(JobContext jobContext) { + this.jobContext = jobContext; + } +} diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSourceReader.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSourceReader.java new file mode 100644 index 00000000000..eefd4aae031 --- /dev/null +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSourceReader.java @@ -0,0 +1,175 @@ +package org.apache.seatunnel.connectors.seatunnel.tablestore.source; +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.source.SourceReader; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.connectors.seatunnel.tablestore.config.TablestoreOptions; + +import com.alicloud.openservices.tablestore.SyncClient; +import com.alicloud.openservices.tablestore.TunnelClient; +import com.alicloud.openservices.tablestore.model.tunnel.CreateTunnelRequest; +import com.alicloud.openservices.tablestore.model.tunnel.CreateTunnelResponse; +import com.alicloud.openservices.tablestore.model.tunnel.DeleteTunnelRequest; +import com.alicloud.openservices.tablestore.model.tunnel.DeleteTunnelResponse; +import com.alicloud.openservices.tablestore.model.tunnel.DescribeTunnelRequest; +import com.alicloud.openservices.tablestore.model.tunnel.DescribeTunnelResponse; +import com.alicloud.openservices.tablestore.model.tunnel.TunnelType; +import com.alicloud.openservices.tablestore.tunnel.worker.TunnelWorker; +import com.alicloud.openservices.tablestore.tunnel.worker.TunnelWorkerConfig; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedDeque; + +@Slf4j +public class TableStoreDBSourceReader + implements SourceReader { + + protected SourceReader.Context context; + protected TablestoreOptions tablestoreOptions; + protected SeaTunnelRowType seaTunnelRowType; + Queue pendingSplits = new ConcurrentLinkedDeque<>(); + private SyncClient client; + private volatile boolean noMoreSplit; + private TunnelClient tunnelClient; + + public TableStoreDBSourceReader( + SourceReader.Context context, + TablestoreOptions options, + SeaTunnelRowType seaTunnelRowType) { + + this.context = context; + this.tablestoreOptions = options; + this.seaTunnelRowType = seaTunnelRowType; + } + + @Override + public void open() throws Exception { + client = + new SyncClient( + tablestoreOptions.getEndpoint(), + tablestoreOptions.getAccessKeyId(), + tablestoreOptions.getAccessKeySecret(), + tablestoreOptions.getInstanceName()); + tunnelClient = + new TunnelClient( + tablestoreOptions.getEndpoint(), + tablestoreOptions.getAccessKeyId(), + tablestoreOptions.getAccessKeySecret(), + tablestoreOptions.getInstanceName()); + } + + @Override + public void close() throws IOException { + tunnelClient.shutdown(); + client.shutdown(); + } + + @Override + public void pollNext(Collector output) throws Exception { + synchronized (output.getCheckpointLock()) { + TableStoreDBSourceSplit split = pendingSplits.poll(); + if (Objects.nonNull(split)) { + read(split, output); + } + /*if (split == null) { + log.info( + "TableStore Source Reader [{}] waiting for splits", + context.getIndexOfSubtask()); + }*/ + if (noMoreSplit) { + // signal to the source that we have reached the end of the data. + log.info("Closed the bounded tablestore source"); + context.signalNoMoreElement(); + Thread.sleep(2000L); + } else { + Thread.sleep(1000L); + } + } + } + + private void read(TableStoreDBSourceSplit split, Collector output) { + String tunnelId = getTunel(split); + TableStoreProcessor processor = + new TableStoreProcessor(split.getTableName(), split.getPrimaryKey(), output); + TunnelWorkerConfig workerConfig = new TunnelWorkerConfig(processor); + TunnelWorker worker = new TunnelWorker(tunnelId, tunnelClient, workerConfig); + try { + worker.connectAndWorking(); + } catch (Exception e) { + log.error("Start OTS tunnel failed.", e); + worker.shutdown(); + } + } + + public String getTunel(TableStoreDBSourceSplit split) { + deleteTunel(split); + String tunnelId = null; + String tunnelName = split.getTableName() + "_migration2aws_tunnel4" + split.getSplitId(); + + try { + DescribeTunnelRequest drequest = new DescribeTunnelRequest("test", tunnelName); + DescribeTunnelResponse dresp = tunnelClient.describeTunnel(drequest); + tunnelId = dresp.getTunnelInfo().getTunnelId(); + } catch (Exception be) { + CreateTunnelRequest crequest = + new CreateTunnelRequest( + split.getTableName(), tunnelName, TunnelType.valueOf("BaseAndStream")); + CreateTunnelResponse cresp = tunnelClient.createTunnel(crequest); + tunnelId = cresp.getTunnelId(); + } + log.info("Tunnel found, Id: " + tunnelId); + return tunnelId; + } + + public void deleteTunel(TableStoreDBSourceSplit split) { + String tunnelName = split.getTableName() + "_migration2aws_tunnel4" + split.getSplitId(); + try { + DeleteTunnelRequest drequest = + new DeleteTunnelRequest(split.getTableName(), tunnelName); + DeleteTunnelResponse dresp = tunnelClient.deleteTunnel(drequest); + log.info("Tunnel has been deleted: " + dresp.toString()); + } catch (Exception be) { + log.warn("Tunnel deletion failed due to not found: " + tunnelName); + } + } + + @Override + public List snapshotState(long checkpointId) throws Exception { + return new ArrayList<>(pendingSplits); + } + + @Override + public void addSplits(List splits) { + this.pendingSplits.addAll(splits); + } + + @Override + public void handleNoMoreSplits() { + log.info("Reader [{}] received noMoreSplit event.", context.getIndexOfSubtask()); + noMoreSplit = true; + } + + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception {} +} diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSourceSplit.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSourceSplit.java new file mode 100644 index 00000000000..24328b0a6f9 --- /dev/null +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSourceSplit.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.connectors.seatunnel.tablestore.source; + +import org.apache.seatunnel.api.source.SourceSplit; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.Setter; + +@AllArgsConstructor +@Getter +@Setter +public class TableStoreDBSourceSplit implements SourceSplit { + + private Integer splitId; + private String tableName; + private String primaryKey; + + @Override + public String splitId() { + return splitId.toString(); + } +} diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSourceSplitEnumerator.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSourceSplitEnumerator.java new file mode 100644 index 00000000000..3dd58b7e69b --- /dev/null +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSourceSplitEnumerator.java @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.connectors.seatunnel.tablestore.source; + +import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.connectors.seatunnel.tablestore.config.TablestoreOptions; + +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +@Slf4j +public class TableStoreDBSourceSplitEnumerator + implements SourceSplitEnumerator { + + private final SourceSplitEnumerator.Context enumeratorContext; + private final Map> pendingSplits; + private final TablestoreOptions tablestoreOptions; + + private final Object stateLock = new Object(); + private volatile boolean shouldEnumerate; + + /** + * @param enumeratorContext + * @param tablestoreOptions + */ + public TableStoreDBSourceSplitEnumerator( + Context enumeratorContext, + TablestoreOptions tablestoreOptions) { + this(enumeratorContext, tablestoreOptions, null); + } + + public TableStoreDBSourceSplitEnumerator( + Context enumeratorContext, + TablestoreOptions tablestoreOptions, + TableStoreDBSourceState sourceState) { + this.enumeratorContext = enumeratorContext; + this.tablestoreOptions = tablestoreOptions; + this.pendingSplits = new HashMap<>(); + this.shouldEnumerate = sourceState == null; + if (sourceState != null) { + this.shouldEnumerate = sourceState.isShouldEnumerate(); + this.pendingSplits.putAll(sourceState.getPendingSplits()); + } + } + + @Override + public void open() {} + + @Override + public void run() throws Exception { + Set readers = enumeratorContext.registeredReaders(); + if (shouldEnumerate) { + Set newSplits = getTableStoreDBSourceSplit(); + synchronized (stateLock) { + addPendingSplit(newSplits); + shouldEnumerate = false; + } + assignSplit(readers); + } + } + + private void assignSplit(Set readers) { + for (int reader : readers) { + List assignmentForReader = pendingSplits.remove(reader); + if (assignmentForReader != null && !assignmentForReader.isEmpty()) { + log.info("Assign splits {} to reader {}", assignmentForReader, reader); + try { + enumeratorContext.assignSplit(reader, assignmentForReader); + } catch (Exception e) { + log.error( + "Failed to assign splits {} to reader {}", + assignmentForReader, + reader, + e); + pendingSplits.put(reader, assignmentForReader); + } + } + } + } + + private Set getTableStoreDBSourceSplit() { + + Set allSplit = new HashSet<>(); + String tables = tablestoreOptions.getTable(); + String[] tableArr = tables.split(","); + for (int i = 0; i < tableArr.length; i++) { + allSplit.add( + new TableStoreDBSourceSplit( + i, tableArr[i], tablestoreOptions.getPrimaryKeys().get(i))); + } + return allSplit; + } + + private void addPendingSplit(Collection splits) { + int readerCount = enumeratorContext.currentParallelism(); + for (TableStoreDBSourceSplit split : splits) { + int ownerReader = split.getSplitId() % readerCount; + pendingSplits.computeIfAbsent(ownerReader, k -> new ArrayList<>()).add(split); + } + } + + @Override + public void close() throws IOException { + // TODO Auto-generated method stub + throw new UnsupportedOperationException("Unimplemented method 'close'"); + } + + @Override + public void addSplitsBack(List splits, int subtaskId) { + log.debug("Add back splits {} to tablestore.", splits); + if (!splits.isEmpty()) { + addPendingSplit(splits); + assignSplit(Collections.singleton(subtaskId)); + enumeratorContext.signalNoMoreSplits(subtaskId); + } + } + + @Override + public int currentUnassignedSplitSize() { + return pendingSplits.size(); + } + + @Override + public void handleSplitRequest(int subtaskId) {} + + @Override + public void registerReader(int subtaskId) { + log.debug("Register reader {} to TablestoreSplitEnumerator.", subtaskId); + if (!pendingSplits.isEmpty()) { + assignSplit(Collections.singleton(subtaskId)); + } + } + + @Override + public TableStoreDBSourceState snapshotState(long checkpointId) throws Exception { + synchronized (stateLock) { + return new TableStoreDBSourceState(shouldEnumerate, pendingSplits); + } + } + + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception {} +} diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSourceState.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSourceState.java new file mode 100644 index 00000000000..05a73a63101 --- /dev/null +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDBSourceState.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.connectors.seatunnel.tablestore.source; + +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.Setter; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; + +@Getter +@Setter +@AllArgsConstructor +public class TableStoreDBSourceState implements Serializable { + + private boolean shouldEnumerate; + private Map> pendingSplits; +} diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDbSourceFactory.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDbSourceFactory.java new file mode 100644 index 00000000000..f93ae4bfe32 --- /dev/null +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreDbSourceFactory.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.connectors.seatunnel.tablestore.source; + +import org.apache.seatunnel.api.configuration.util.OptionRule; +import org.apache.seatunnel.api.source.SeaTunnelSource; +import org.apache.seatunnel.api.source.SourceSplit; +import org.apache.seatunnel.api.table.connector.TableSource; +import org.apache.seatunnel.api.table.factory.Factory; +import org.apache.seatunnel.api.table.factory.TableSourceFactory; +import org.apache.seatunnel.api.table.factory.TableSourceFactoryContext; +import org.apache.seatunnel.connectors.seatunnel.tablestore.config.TablestoreConfig; + +import com.google.auto.service.AutoService; + +import java.io.Serializable; + +@AutoService(Factory.class) +public class TableStoreDbSourceFactory implements TableSourceFactory { + + @Override + public String factoryIdentifier() { + return "Tablestore"; + } + + @Override + public OptionRule optionRule() { + return OptionRule.builder() + .required( + TablestoreConfig.END_POINT, + TablestoreConfig.INSTANCE_NAME, + TablestoreConfig.ACCESS_KEY_ID, + TablestoreConfig.ACCESS_KEY_SECRET, + TablestoreConfig.TABLE, + TablestoreConfig.PRIMARY_KEYS) + .build(); + } + + @Override + public + TableSource createSource(TableSourceFactoryContext context) { + return () -> + (SeaTunnelSource) new TableStoreDBSource(context.getOptions()); + } + + @Override + public Class getSourceClass() { + return TableStoreDBSource.class; + } +} diff --git a/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreProcessor.java b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreProcessor.java new file mode 100644 index 00000000000..ba5334a85eb --- /dev/null +++ b/seatunnel-connectors-v2/connector-tablestore/src/main/java/org/apache/seatunnel/connectors/seatunnel/tablestore/source/TableStoreProcessor.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.connectors.seatunnel.tablestore.source; + +import org.apache.seatunnel.api.source.Collector; +import org.apache.seatunnel.api.table.type.RowKind; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.connectors.seatunnel.tablestore.serialize.SeaTunnelRowDeserializer; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.alicloud.openservices.tablestore.model.StreamRecord; +import com.alicloud.openservices.tablestore.tunnel.worker.IChannelProcessor; +import com.alicloud.openservices.tablestore.tunnel.worker.ProcessRecordsInput; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class TableStoreProcessor implements IChannelProcessor { + private String tableName = null; + private String primaryKey = null; + private Collector output = null; + protected SeaTunnelRowDeserializer seaTunnelRowDeserializer; + private static final Logger log = LoggerFactory.getLogger(TableStoreProcessor.class); + + public TableStoreProcessor( + String tableName, String primaryKey, Collector output) { + this.tableName = tableName; + this.primaryKey = primaryKey; + this.output = output; + } + + @Override + public void process(ProcessRecordsInput input) { + log.info("Default record processor, would print records count"); + + log.info( + String.format( + "Process %d records, NextToken: %s", + input.getRecords().size(), input.getNextToken())); + + for (StreamRecord r : input.getRecords()) { + try { + List fields = new ArrayList<>(); + Arrays.stream(r.getPrimaryKey().getPrimaryKeyColumns()) + .forEach( + k -> { + fields.add(k.getValue().toString()); + }); + r.getColumns() + .forEach( + k -> { + fields.add(k.getColumn().getValue().toString()); + }); + SeaTunnelRow row = new SeaTunnelRow(fields.toArray()); + row.setTableId(tableName); + switch ((r.getRecordType())) { + case PUT: + row.setRowKind(RowKind.INSERT); + break; + case UPDATE: + row.setRowKind(RowKind.UPDATE_AFTER); + break; + case DELETE: + row.setRowKind(RowKind.DELETE); + break; + } + output.collect(row); + } catch (Exception e) { + log.error("send to target failed with record: " + r.toString(), e); + } + } + } + + @Override + public void shutdown() { + log.info("process shutdown du to finished for table: " + tableName); + } +} diff --git a/seatunnel-dist/pom.xml b/seatunnel-dist/pom.xml index fb6935f0894..a633b64a1dd 100644 --- a/seatunnel-dist/pom.xml +++ b/seatunnel-dist/pom.xml @@ -518,6 +518,12 @@ ${project.version} provided + + org.apache.seatunnel + connector-cdc-opengauss + ${project.version} + provided + org.apache.seatunnel connector-tdengine diff --git a/seatunnel-dist/release-docs/LICENSE b/seatunnel-dist/release-docs/LICENSE index f5d9bebbe54..5cc3074d417 100644 --- a/seatunnel-dist/release-docs/LICENSE +++ b/seatunnel-dist/release-docs/LICENSE @@ -267,6 +267,7 @@ The text of each license is the standard Apache 2.0 license. (Apache-2.0) woodstox-core (com.fasterxml.woodstox:woodstox-core:5.0.3 - https://github.com/FasterXML/woodstox) (Apache-2.0) jcip-annotations (com.github.stephenc.jcip:jcip-annotations:1.0-1 - https://github.com/stephenc/jcip-annotations) (Apache-2.0) gson (com.google.code.gson:gson:2.2.4 - https://github.com/google/gson) + (Apache-2.0) gson (com.google.code.gson:gson:2.8.9 - https://github.com/google/gson) (Apache-2.0) nimbus-jose-jwt (com.nimbusds:nimbus-jose-jwt:7.9 - https://bitbucket.org/connect2id/nimbus-jose-jwt) (Apache-2.0) beanutils (commons-beanutils:commons-beanutils:1.9.4 - https://commons.apache.org/proper/commons-beanutils/) (Apache-2.0) commons-cli (commons-cli:commons-cli:1.2 - https://commons.apache.org/proper/commons-cli/) @@ -318,8 +319,10 @@ The text of each license is the standard Apache 2.0 license. (The Apache Software License, Version 2.0) hazelcast (com.hazelcast:hazelcast:5.1 - https://github.com/hazelcast/hazelcast) (Apache-2.0) disruptor (com.lmax:disruptor:3.4.4 https://lmax-exchange.github.io/disruptor/) (Apache-2.0) error_prone_annotations (com.google.errorprone:error_prone_annotations:2.2.0 https://mvnrepository.com/artifact/com.google.errorprone/error_prone_annotations/2.2.0) + (Apache-2.0) error_prone_annotations (com.google.errorprone:error_prone_annotations:2.18.0 https://mvnrepository.com/artifact/com.google.errorprone/error_prone_annotations/2.18.0) (Apache-2.0) failureaccess (com.google.guava:failureaccess:1.0 https://mvnrepository.com/artifact/com.google.guava/failureaccess/1.0) (Apache-2.0) j2objc-annotations (com.google.j2objc:j2objc-annotations:1.1 https://mvnrepository.com/artifact/com.google.j2objc/j2objc-annotations/1.1) + (Apache-2.0) j2objc-annotations (com.google.j2objc:j2objc-annotations:2.8 https://mvnrepository.com/artifact/com.google.j2objc/j2objc-annotations/2.8) (Apache-2.0) listenablefuture (com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava https://mvnrepository.com/artifact/com.google.guava/listenablefuture/9999.0-empty-to-avoid-conflict-with-guava) (Apache-2.0) accessors-smart (net.minidev:accessors-smart:2.4.7 - https://mvnrepository.com/artifact/net.minidev/accessors-smart) (Apache-2.0) json-smart (net.minidev:json-smart:2.4.7 - https://mvnrepository.com/artifact/net.minidev/json-smart) diff --git a/seatunnel-dist/release-docs/licenses/LICENSE-protoc-jar.txt b/seatunnel-dist/release-docs/licenses/LICENSE-protoc-jar.txt new file mode 100644 index 00000000000..ad410e11302 --- /dev/null +++ b/seatunnel-dist/release-docs/licenses/LICENSE-protoc-jar.txt @@ -0,0 +1,201 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/pom.xml new file mode 100644 index 00000000000..f95e5cdb1a0 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/pom.xml @@ -0,0 +1,75 @@ + + + + 4.0.0 + + org.apache.seatunnel + seatunnel-connector-v2-e2e + ${revision} + + + connector-cdc-opengauss-e2e + SeaTunnel : E2E : Connector V2 : CDC Opengauss + + + + + org.apache.seatunnel + connector-jdbc + ${project.version} + pom + import + + + + + + + + org.apache.seatunnel + connector-cdc-opengauss + ${project.version} + test + + + + org.apache.seatunnel + connector-jdbc + ${project.version} + test + + + + org.testcontainers + postgresql + ${testcontainer.version} + test + + + + + org.postgresql + postgresql + 42.5.1 + + + + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/cdc/postgres/OpengaussCDCIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/cdc/postgres/OpengaussCDCIT.java new file mode 100644 index 00000000000..dc80a083a76 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/java/org/apache/seatunnel/connectors/seatunnel/cdc/postgres/OpengaussCDCIT.java @@ -0,0 +1,727 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.cdc.postgres; + +import org.apache.seatunnel.e2e.common.TestResource; +import org.apache.seatunnel.e2e.common.TestSuiteBase; +import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; +import org.apache.seatunnel.e2e.common.container.EngineType; +import org.apache.seatunnel.e2e.common.container.TestContainer; +import org.apache.seatunnel.e2e.common.junit.DisabledOnContainer; +import org.apache.seatunnel.e2e.common.junit.TestContainerExtension; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.TestTemplate; +import org.testcontainers.containers.Container; +import org.testcontainers.containers.GenericContainer; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.lifecycle.Startables; +import org.testcontainers.utility.DockerImageName; + +import com.google.common.collect.Lists; +import lombok.extern.slf4j.Slf4j; + +import java.io.IOException; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.awaitility.Awaitility.await; +import static org.awaitility.Awaitility.given; +import static org.junit.Assert.assertNotNull; + +@Slf4j +@DisabledOnContainer( + value = {}, + type = {EngineType.SPARK}, + disabledReason = "Currently SPARK do not support cdc") +public class OpengaussCDCIT extends TestSuiteBase implements TestResource { + private static final int OPENGAUSS_PORT = 5432; + private static final Pattern COMMENT_PATTERN = Pattern.compile("^(.*)--.*$"); + private static final String USERNAME = "gaussdb"; + private static final String PASSWORD = "openGauss@123"; + private static final String OPENGAUSSQL_DATABASE = "opengauss_cdc"; + private static final String OPENGAUSSQL_DEFAULT_DATABASE = "postgres"; + private static final String OPENGAUSS_SCHEMA = "inventory"; + + private static final String SOURCE_TABLE_1 = "opengauss_cdc_table_1"; + private static final String SOURCE_TABLE_2 = "opengauss_cdc_table_2"; + private static final String SOURCE_TABLE_3 = "opengauss_cdc_table_3"; + private static final String SINK_TABLE_1 = "sink_opengauss_cdc_table_1"; + private static final String SINK_TABLE_2 = "sink_opengauss_cdc_table_2"; + private static final String SINK_TABLE_3 = "sink_opengauss_cdc_table_3"; + + private static final String SOURCE_TABLE_NO_PRIMARY_KEY = "full_types_no_primary_key"; + + private static final String OPENGAUSS_HOST = "opengauss_cdc_e2e"; + + protected static final DockerImageName OPENGAUSS_IMAGE = + DockerImageName.parse("opengauss/opengauss:5.0.0") + .asCompatibleSubstituteFor("postgres"); + + private static final String SOURCE_SQL_TEMPLATE = "select * from %s.%s order by id"; + + public static final GenericContainer OPENGAUSS_CONTAINER = + new GenericContainer<>(OPENGAUSS_IMAGE) + .withNetwork(NETWORK) + .withNetworkAliases(OPENGAUSS_HOST) + .withEnv("GS_PASSWORD", PASSWORD) + .withLogConsumer(new Slf4jLogConsumer(log)); + + private String driverUrl() { + return "https://repo1.maven.org/maven2/org/postgresql/postgresql/42.5.1/postgresql-42.5.1.jar"; + } + + @TestContainerExtension + protected final ContainerExtendedFactory extendedFactory = + container -> { + Container.ExecResult extraCommands = + container.execInContainer( + "bash", + "-c", + "mkdir -p /tmp/seatunnel/plugins/JDBC/lib && cd /tmp/seatunnel/plugins/JDBC/lib && wget " + + driverUrl()); + Assertions.assertEquals(0, extraCommands.getExitCode(), extraCommands.getStderr()); + }; + + @BeforeAll + @Override + public void startUp() throws Exception { + log.info("The second stage: Starting opengauss containers..."); + OPENGAUSS_CONTAINER.setPortBindings( + Lists.newArrayList(String.format("%s:%s", OPENGAUSS_PORT, OPENGAUSS_PORT))); + Startables.deepStart(Stream.of(OPENGAUSS_CONTAINER)).join(); + log.info("Opengauss Containers are started"); + given().ignoreExceptions() + .await() + .atLeast(100, TimeUnit.MILLISECONDS) + .pollInterval(2, TimeUnit.SECONDS) + .atMost(2, TimeUnit.MINUTES) + .untilAsserted(this::initializeOpengaussSql); + + String[] command1 = { + "/bin/sh", + "-c", + "sed -i 's/^#password_encryption_type = 2/password_encryption_type = 1/' /var/lib/opengauss/data/postgresql.conf" + }; + Container.ExecResult result1 = OPENGAUSS_CONTAINER.execInContainer(command1); + Assertions.assertEquals(0, result1.getExitCode()); + + String[] command2 = { + "/bin/sh", + "-c", + "sed -i 's/host replication gaussdb 0.0.0.0\\/0 md5/host replication gaussdb 0.0.0.0\\/0 sha256/' /var/lib/opengauss/data/pg_hba.conf" + }; + Container.ExecResult result2 = OPENGAUSS_CONTAINER.execInContainer(command2); + Assertions.assertEquals(0, result2.getExitCode()); + String[] command3 = { + "/bin/sh", + "-c", + "echo \"host all dailai 0.0.0.0/0 md5\" >> /var/lib/opengauss/data/pg_hba.conf" + }; + Container.ExecResult result3 = OPENGAUSS_CONTAINER.execInContainer(command3); + Assertions.assertEquals(0, result3.getExitCode()); + + reloadConf(); + + createNewUserForJdbcSink(); + } + + @TestTemplate + public void testOpengaussCdcCheckDataE2e(TestContainer container) { + try { + CompletableFuture.supplyAsync( + () -> { + try { + container.executeJob("/opengausscdc_to_opengauss.conf"); + } catch (Exception e) { + log.error("Commit task exception :" + e.getMessage()); + throw new RuntimeException(e); + } + return null; + }); + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + Assertions.assertIterableEquals( + query(getQuerySQL(OPENGAUSS_SCHEMA, SOURCE_TABLE_1)), + query(getQuerySQL(OPENGAUSS_SCHEMA, SINK_TABLE_1))); + }); + + // insert update delete + upsertDeleteSourceTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_1); + + // stream stage + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + Assertions.assertIterableEquals( + query(getQuerySQL(OPENGAUSS_SCHEMA, SOURCE_TABLE_1)), + query(getQuerySQL(OPENGAUSS_SCHEMA, SINK_TABLE_1))); + }); + } finally { + // Clear related content to ensure that multiple operations are not affected + clearTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_1); + clearTable(OPENGAUSS_SCHEMA, SINK_TABLE_1); + } + } + + @TestTemplate + @DisabledOnContainer( + value = {}, + type = {EngineType.SPARK, EngineType.FLINK}, + disabledReason = "Currently SPARK and FLINK do not support multi table") + public void testOpengaussCdcMultiTableE2e(TestContainer container) { + try { + CompletableFuture.supplyAsync( + () -> { + try { + container.executeJob( + "/opengausscdc_to_opengauss_with_multi_table_mode_two_table.conf"); + } catch (Exception e) { + log.error("Commit task exception :" + e.getMessage()); + throw new RuntimeException(e); + } + return null; + }); + + // stream stage + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertAll( + () -> + Assertions.assertIterableEquals( + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SOURCE_TABLE_1)), + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SINK_TABLE_1))), + () -> + Assertions.assertIterableEquals( + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SOURCE_TABLE_2)), + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SINK_TABLE_2))))); + + // insert update delete + upsertDeleteSourceTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_1); + upsertDeleteSourceTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_2); + + // stream stage + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertAll( + () -> + Assertions.assertIterableEquals( + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SOURCE_TABLE_1)), + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SINK_TABLE_1))), + () -> + Assertions.assertIterableEquals( + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SOURCE_TABLE_2)), + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SINK_TABLE_2))))); + } finally { + // Clear related content to ensure that multiple operations are not affected + clearTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_1); + clearTable(OPENGAUSS_SCHEMA, SINK_TABLE_1); + clearTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_2); + clearTable(OPENGAUSS_SCHEMA, SINK_TABLE_2); + } + } + + @TestTemplate + @DisabledOnContainer( + value = {}, + type = {EngineType.SPARK, EngineType.FLINK}, + disabledReason = "Currently SPARK and FLINK do not support multi table") + public void testMultiTableWithRestore(TestContainer container) + throws IOException, InterruptedException { + try { + CompletableFuture.supplyAsync( + () -> { + try { + return container.executeJob( + "/opengausscdc_to_opengauss_with_multi_table_mode_one_table.conf"); + } catch (Exception e) { + log.error("Commit task exception :" + e.getMessage()); + throw new RuntimeException(e); + } + }); + + // insert update delete + upsertDeleteSourceTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_1); + + // stream stage + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertAll( + () -> + Assertions.assertIterableEquals( + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SOURCE_TABLE_1)), + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SINK_TABLE_1))))); + + Pattern jobIdPattern = + Pattern.compile( + ".*Init JobMaster for Job opengausscdc_to_opengauss_with_multi_table_mode_one_table.conf \\(([0-9]*)\\).*", + Pattern.DOTALL); + Matcher matcher = jobIdPattern.matcher(container.getServerLogs()); + String jobId; + if (matcher.matches()) { + jobId = matcher.group(1); + } else { + throw new RuntimeException("Can not find jobId"); + } + + Assertions.assertEquals(0, container.savepointJob(jobId).getExitCode()); + + // Restore job with add a new table + CompletableFuture.supplyAsync( + () -> { + try { + container.restoreJob( + "/opengausscdc_to_opengauss_with_multi_table_mode_two_table.conf", + jobId); + } catch (Exception e) { + log.error("Commit task exception :" + e.getMessage()); + throw new RuntimeException(e); + } + return null; + }); + + upsertDeleteSourceTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_2); + + // stream stage + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertAll( + () -> + Assertions.assertIterableEquals( + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SOURCE_TABLE_1)), + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SINK_TABLE_1))), + () -> + Assertions.assertIterableEquals( + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SOURCE_TABLE_2)), + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SINK_TABLE_2))))); + + log.info("****************** container logs start ******************"); + String containerLogs = container.getServerLogs(); + log.info(containerLogs); + // pg cdc logs contain ERROR + // Assertions.assertFalse(containerLogs.contains("ERROR")); + log.info("****************** container logs end ******************"); + } finally { + // Clear related content to ensure that multiple operations are not affected + clearTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_1); + clearTable(OPENGAUSS_SCHEMA, SINK_TABLE_1); + clearTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_2); + clearTable(OPENGAUSS_SCHEMA, SINK_TABLE_2); + } + } + + @TestTemplate + @DisabledOnContainer( + value = {}, + type = {EngineType.SPARK, EngineType.FLINK}, + disabledReason = "Currently SPARK and FLINK do not support multi table") + public void testAddFiledWithRestore(TestContainer container) + throws IOException, InterruptedException { + try { + CompletableFuture.supplyAsync( + () -> { + try { + return container.executeJob( + "/opengausscdc_to_opengauss_test_add_Filed.conf"); + } catch (Exception e) { + log.error("Commit task exception :" + e.getMessage()); + throw new RuntimeException(e); + } + }); + + // stream stage + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertAll( + () -> + Assertions.assertIterableEquals( + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SOURCE_TABLE_3)), + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SINK_TABLE_3))))); + + Pattern jobIdPattern = + Pattern.compile( + ".*Init JobMaster for Job opengausscdc_to_opengauss_test_add_Filed.conf \\(([0-9]*)\\).*", + Pattern.DOTALL); + Matcher matcher = jobIdPattern.matcher(container.getServerLogs()); + String jobId; + if (matcher.matches()) { + jobId = matcher.group(1); + } else { + throw new RuntimeException("Can not find jobId"); + } + + Assertions.assertEquals(0, container.savepointJob(jobId).getExitCode()); + + // add filed add insert source table data + addFieldsForTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_3); + addFieldsForTable(OPENGAUSS_SCHEMA, SINK_TABLE_3); + insertSourceTableForAddFields(OPENGAUSS_SCHEMA, SOURCE_TABLE_3); + + // Restore job + CompletableFuture.supplyAsync( + () -> { + try { + container.restoreJob( + "/opengausscdc_to_opengauss_test_add_Filed.conf", jobId); + } catch (Exception e) { + log.error("Commit task exception :" + e.getMessage()); + throw new RuntimeException(e); + } + return null; + }); + + // stream stage + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertAll( + () -> + Assertions.assertIterableEquals( + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SOURCE_TABLE_3)), + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SINK_TABLE_3))))); + } finally { + // Clear related content to ensure that multiple operations are not affected + clearTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_3); + clearTable(OPENGAUSS_SCHEMA, SINK_TABLE_3); + } + } + + @TestTemplate + public void testOpengaussCdcCheckDataWithNoPrimaryKey(TestContainer container) + throws Exception { + + try { + CompletableFuture.supplyAsync( + () -> { + try { + container.executeJob( + "/opengausscdc_to_opengauss_with_no_primary_key.conf"); + } catch (Exception e) { + log.error("Commit task exception :" + e.getMessage()); + throw new RuntimeException(e); + } + return null; + }); + + // snapshot stage + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + Assertions.assertIterableEquals( + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SOURCE_TABLE_NO_PRIMARY_KEY)), + query(getQuerySQL(OPENGAUSS_SCHEMA, SINK_TABLE_1))); + }); + + // insert update delete + upsertDeleteSourceTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_NO_PRIMARY_KEY); + + // stream stage + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + Assertions.assertIterableEquals( + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SOURCE_TABLE_NO_PRIMARY_KEY)), + query(getQuerySQL(OPENGAUSS_SCHEMA, SINK_TABLE_1))); + }); + } finally { + clearTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_NO_PRIMARY_KEY); + clearTable(OPENGAUSS_SCHEMA, SINK_TABLE_1); + } + } + + @TestTemplate + public void testOpengaussCdcCheckDataWithCustomPrimaryKey(TestContainer container) + throws Exception { + + try { + CompletableFuture.supplyAsync( + () -> { + try { + container.executeJob( + "/opengausscdc_to_opengauss_with_custom_primary_key.conf"); + } catch (Exception e) { + log.error("Commit task exception :" + e.getMessage()); + throw new RuntimeException(e); + } + return null; + }); + + // snapshot stage + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + Assertions.assertIterableEquals( + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SOURCE_TABLE_NO_PRIMARY_KEY)), + query(getQuerySQL(OPENGAUSS_SCHEMA, SINK_TABLE_1))); + }); + + // insert update delete + upsertDeleteSourceTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_NO_PRIMARY_KEY); + + // stream stage + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + Assertions.assertIterableEquals( + query( + getQuerySQL( + OPENGAUSS_SCHEMA, + SOURCE_TABLE_NO_PRIMARY_KEY)), + query(getQuerySQL(OPENGAUSS_SCHEMA, SINK_TABLE_1))); + }); + } finally { + clearTable(OPENGAUSS_SCHEMA, SOURCE_TABLE_NO_PRIMARY_KEY); + clearTable(OPENGAUSS_SCHEMA, SINK_TABLE_1); + } + } + + private void addFieldsForTable(String database, String tableName) { + executeSql("ALTER TABLE " + database + "." + tableName + " ADD COLUMN f_big BIGINT"); + } + + private void insertSourceTableForAddFields(String database, String tableName) { + executeSql( + "INSERT INTO " + + database + + "." + + tableName + + " VALUES (2, '2', 32767, 65535, 2147483647);"); + } + + private void clearTable(String database, String tableName) { + executeSql("truncate table " + database + "." + tableName); + } + + private void upsertDeleteSourceTable(String database, String tableName) { + + executeSql( + "INSERT INTO " + + database + + "." + + tableName + + " VALUES (2, '2', 32767, 65535, 2147483647, 5.5, 6.6, 123.12345, 404.4443, true,\n" + + " 'Hello World', 'a', 'abc', 'abcd..xyz', '2020-07-17 18:00:22.123', '2020-07-17 18:00:22.123456',\n" + + " '2020-07-17', '18:00:22', 500);"); + + executeSql( + "INSERT INTO " + + database + + "." + + tableName + + " VALUES (3, '2', 32767, 65535, 2147483647, 5.5, 6.6, 123.12345, 404.4443, true,\n" + + " 'Hello World', 'a', 'abc', 'abcd..xyz', '2020-07-17 18:00:22.123', '2020-07-17 18:00:22.123456',\n" + + " '2020-07-17', '18:00:22', 500);"); + + executeSql("DELETE FROM " + database + "." + tableName + " where id = 2;"); + + executeSql("UPDATE " + database + "." + tableName + " SET f_big = 10000 where id = 3;"); + } + + private void executeSql(String sql) { + try (Connection connection = getJdbcConnection(OPENGAUSSQL_DATABASE); + Statement statement = connection.createStatement()) { + statement.execute("SET search_path TO inventory;"); + statement.execute(sql); + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + private String getQuerySQL(String database, String tableName) { + return String.format(SOURCE_SQL_TEMPLATE, database, tableName); + } + + private List> query(String sql) { + try (Connection connection = getJdbcConnection(OPENGAUSSQL_DATABASE)) { + ResultSet resultSet = connection.createStatement().executeQuery(sql); + List> result = new ArrayList<>(); + int columnCount = resultSet.getMetaData().getColumnCount(); + while (resultSet.next()) { + ArrayList objects = new ArrayList<>(); + for (int i = 1; i <= columnCount; i++) { + Object object = resultSet.getObject(i); + if (object instanceof byte[]) { + byte[] bytes = (byte[]) object; + object = new String(bytes, StandardCharsets.UTF_8); + } + objects.add(object); + } + log.debug( + String.format( + "Print opengauss-CDC query, sql: %s, data: %s", sql, objects)); + result.add(objects); + } + return result; + } catch (SQLException e) { + throw new RuntimeException(e); + } + } + + protected void createNewUserForJdbcSink() throws Exception { + try (Connection connection = getJdbcConnection(OPENGAUSSQL_DATABASE); + Statement stmt = connection.createStatement()) { + // create a user for jdbc sink + stmt.execute("CREATE USER dailai WITH PASSWORD 'openGauss@123';"); + stmt.execute("GRANT ALL PRIVILEGES TO dailai;"); + } + } + + protected void reloadConf() throws Exception { + try (Connection connection = getJdbcConnection(OPENGAUSSQL_DATABASE); + Statement stmt = connection.createStatement()) { + stmt.execute("select pg_reload_conf();"); + } + } + + protected void initializeOpengaussSql() throws Exception { + try (Connection connection = getJdbcConnection(OPENGAUSSQL_DEFAULT_DATABASE); + Statement stmt = connection.createStatement()) { + stmt.execute("create database " + OPENGAUSSQL_DATABASE); + } + final String ddlFile = String.format("ddl/%s.sql", "inventory"); + final URL ddlTestFile = OpengaussCDCIT.class.getClassLoader().getResource(ddlFile); + assertNotNull("Cannot locate " + ddlFile, ddlTestFile); + try (Connection connection = getJdbcConnection(OPENGAUSSQL_DATABASE); + Statement statement = connection.createStatement()) { + final List statements = + Arrays.stream( + Files.readAllLines(Paths.get(ddlTestFile.toURI())).stream() + .map(String::trim) + .filter(x -> !x.startsWith("--") && !x.isEmpty()) + .map( + x -> { + final Matcher m = + COMMENT_PATTERN.matcher(x); + return m.matches() ? m.group(1) : x; + }) + .collect(Collectors.joining("\n")) + .split(";\n")) + .collect(Collectors.toList()); + for (String stmt : statements) { + statement.execute(stmt); + } + } + } + + private Connection getJdbcConnection(String dbName) throws SQLException { + return DriverManager.getConnection( + "jdbc:postgresql://" + + OPENGAUSS_CONTAINER.getHost() + + ":" + + OPENGAUSS_CONTAINER.getMappedPort(OPENGAUSS_PORT) + + "/" + + dbName, + USERNAME, + PASSWORD); + } + + @AfterAll + @Override + public void tearDown() throws Exception { + if (OPENGAUSS_CONTAINER != null) { + OPENGAUSS_CONTAINER.close(); + } + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/ddl/inventory.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/ddl/inventory.sql new file mode 100644 index 00000000000..5d4564041b7 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/ddl/inventory.sql @@ -0,0 +1,199 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +-- ---------------------------------------------------------------------------------------------------------------- +-- DATABASE: opengauss_cdc +-- ---------------------------------------------------------------------------------------------------------------- +-- Create and populate our products using a single insert with many rows +DROP SCHEMA IF EXISTS inventory CASCADE; +CREATE SCHEMA inventory; +SET search_path TO inventory; + +CREATE TABLE opengauss_cdc_table_1 +( + id INTEGER NOT NULL, + f_bytea BYTEA, + f_small SMALLINT, + f_int INTEGER, + f_big BIGINT, + f_real REAL, + f_double_precision DOUBLE PRECISION, + f_numeric NUMERIC(10, 5), + f_decimal DECIMAL(10, 1), + f_boolean BOOLEAN, + f_text TEXT, + f_char CHAR, + f_character CHARACTER(3), + f_character_varying CHARACTER VARYING(20), + f_timestamp3 TIMESTAMP(3), + f_timestamp6 TIMESTAMP(6), + f_date DATE, + f_time TIME(0), + f_default_numeric NUMERIC, + PRIMARY KEY (id) +); + +CREATE TABLE opengauss_cdc_table_2 +( + id INTEGER NOT NULL, + f_bytea BYTEA, + f_small SMALLINT, + f_int INTEGER, + f_big BIGINT, + f_real REAL, + f_double_precision DOUBLE PRECISION, + f_numeric NUMERIC(10, 5), + f_decimal DECIMAL(10, 1), + f_boolean BOOLEAN, + f_text TEXT, + f_char CHAR, + f_character CHARACTER(3), + f_character_varying CHARACTER VARYING(20), + f_timestamp3 TIMESTAMP(3), + f_timestamp6 TIMESTAMP(6), + f_date DATE, + f_time TIME(0), + f_default_numeric NUMERIC, + PRIMARY KEY (id) +); + +CREATE TABLE sink_opengauss_cdc_table_1 +( + id INTEGER NOT NULL, + f_bytea BYTEA, + f_small SMALLINT, + f_int INTEGER, + f_big BIGINT, + f_real REAL, + f_double_precision DOUBLE PRECISION, + f_numeric NUMERIC(10, 5), + f_decimal DECIMAL(10, 1), + f_boolean BOOLEAN, + f_text TEXT, + f_char CHAR, + f_character CHARACTER(3), + f_character_varying CHARACTER VARYING(20), + f_timestamp3 TIMESTAMP(3), + f_timestamp6 TIMESTAMP(6), + f_date DATE, + f_time TIME(0), + f_default_numeric NUMERIC, + PRIMARY KEY (id) +); + +CREATE TABLE sink_opengauss_cdc_table_2 +( + id INTEGER NOT NULL, + f_bytea BYTEA, + f_small SMALLINT, + f_int INTEGER, + f_big BIGINT, + f_real REAL, + f_double_precision DOUBLE PRECISION, + f_numeric NUMERIC(10, 5), + f_decimal DECIMAL(10, 1), + f_boolean BOOLEAN, + f_text TEXT, + f_char CHAR, + f_character CHARACTER(3), + f_character_varying CHARACTER VARYING(20), + f_timestamp3 TIMESTAMP(3), + f_timestamp6 TIMESTAMP(6), + f_date DATE, + f_time TIME(0), + f_default_numeric NUMERIC, + PRIMARY KEY (id) +); + +CREATE TABLE full_types_no_primary_key +( + id INTEGER NOT NULL, + f_bytea BYTEA, + f_small SMALLINT, + f_int INTEGER, + f_big BIGINT, + f_real REAL, + f_double_precision DOUBLE PRECISION, + f_numeric NUMERIC(10, 5), + f_decimal DECIMAL(10, 1), + f_boolean BOOLEAN, + f_text TEXT, + f_char CHAR, + f_character CHARACTER(3), + f_character_varying CHARACTER VARYING(20), + f_timestamp3 TIMESTAMP(3), + f_timestamp6 TIMESTAMP(6), + f_date DATE, + f_time TIME(0), + f_default_numeric NUMERIC +); + +CREATE TABLE opengauss_cdc_table_3 +( + id INTEGER NOT NULL, + f_bytea BYTEA, + f_small SMALLINT, + f_int INTEGER, + PRIMARY KEY (id) +); + +CREATE TABLE sink_opengauss_cdc_table_3 +( + id INTEGER NOT NULL, + f_bytea BYTEA, + f_small SMALLINT, + f_int INTEGER, + PRIMARY KEY (id) +); + +ALTER TABLE opengauss_cdc_table_1 + REPLICA IDENTITY FULL; + +ALTER TABLE opengauss_cdc_table_2 + REPLICA IDENTITY FULL; + +ALTER TABLE opengauss_cdc_table_3 + REPLICA IDENTITY FULL; + +ALTER TABLE sink_opengauss_cdc_table_1 + REPLICA IDENTITY FULL; + +ALTER TABLE sink_opengauss_cdc_table_2 + REPLICA IDENTITY FULL; + +ALTER TABLE full_types_no_primary_key + REPLICA IDENTITY FULL; + +INSERT INTO opengauss_cdc_table_1 +VALUES (1, '2', 32767, 65535, 2147483647, 5.5, 6.6, 123.12345, 404.4443, true, + 'Hello World', 'a', 'abc', 'abcd..xyz', '2020-07-17 18:00:22.123', '2020-07-17 18:00:22.123456', + '2020-07-17', '18:00:22', 500); + +INSERT INTO opengauss_cdc_table_2 +VALUES (1, '2', 32767, 65535, 2147483647, 5.5, 6.6, 123.12345, 404.4443, true, + 'Hello World', 'a', 'abc', 'abcd..xyz', '2020-07-17 18:00:22.123', '2020-07-17 18:00:22.123456', + '2020-07-17', '18:00:22', 500); + +INSERT INTO opengauss_cdc_table_3 +VALUES (1, '2', 32767, 65535); + +INSERT INTO full_types_no_primary_key +VALUES (1, '2', 32767, 65535, 2147483647, 5.5, 6.6, 123.12345, 404.4443, true, + 'Hello World', 'a', 'abc', 'abcd..xyz', '2020-07-17 18:00:22.123', '2020-07-17 18:00:22.123456', + '2020-07-17', '18:00:22', 500); + + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss.conf new file mode 100644 index 00000000000..c1227226362 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss.conf @@ -0,0 +1,62 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set engine configuration here + execution.parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 + read_limit.bytes_per_second=7000000 + read_limit.rows_per_second=400 +} + +source { + Opengauss-CDC { + result_table_name = "customers_opengauss_cdc" + username = "gaussdb" + password = "openGauss@123" + database-names = ["opengauss_cdc"] + schema-names = ["inventory"] + table-names = ["opengauss_cdc.inventory.opengauss_cdc_table_1"] + base-url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF" + decoding.plugin.name = "pgoutput" + } +} + +transform { + +} + +sink { + jdbc { + source_table_name = "customers_opengauss_cdc" + url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF" + driver = "org.postgresql.Driver" + user = "dailai" + password = "openGauss@123" + + compatible_mode="postgresLow" + generate_sink_sql = true + # You need to configure both database and table + database = opengauss_cdc + table = inventory.sink_opengauss_cdc_table_1 + primary_keys = ["id"] + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_test_add_Filed.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_test_add_Filed.conf new file mode 100644 index 00000000000..3c925f55311 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_test_add_Filed.conf @@ -0,0 +1,62 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set engine configuration here + execution.parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 + read_limit.bytes_per_second=7000000 + read_limit.rows_per_second=400 +} + +source { + Opengauss-CDC { + result_table_name = "customers_opengauss_cdc" + username = "gaussdb" + password = "openGauss@123" + database-names = ["opengauss_cdc"] + schema-names = ["inventory"] + table-names = ["opengauss_cdc.inventory.opengauss_cdc_table_3"] + base-url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF" + decoding.plugin.name = "pgoutput" + } +} + +transform { + +} + +sink { + jdbc { + source_table_name = "customers_opengauss_cdc" + url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF" + driver = "org.postgresql.Driver" + user = "dailai" + password = "openGauss@123" + + compatible_mode="postgresLow" + generate_sink_sql = true + # You need to configure both database and table + database = opengauss_cdc + table = inventory.sink_opengauss_cdc_table_3 + primary_keys = ["id"] + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_with_custom_primary_key.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_with_custom_primary_key.conf new file mode 100644 index 00000000000..d916cb5e4e4 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_with_custom_primary_key.conf @@ -0,0 +1,69 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set engine configuration here + execution.parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 + read_limit.bytes_per_second=7000000 + read_limit.rows_per_second=400 +} + +source { + Opengauss-CDC { + result_table_name = "customers_opengauss_cdc" + username = "gaussdb" + password = "openGauss@123" + database-names = ["opengauss_cdc"] + schema-names = ["inventory"] + table-names = ["opengauss_cdc.inventory.full_types_no_primary_key"] + base-url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF" + decoding.plugin.name = "pgoutput" + exactly_once = true + table-names-config = [ + { + table = "opengauss_cdc.inventory.full_types_no_primary_key" + primaryKeys = ["id"] + } + ] + } +} + +transform { + +} + +sink { + jdbc { + source_table_name = "customers_opengauss_cdc" + url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF" + driver = "org.postgresql.Driver" + user = "dailai" + password = "openGauss@123" + + compatible_mode="postgresLow" + generate_sink_sql = true + # You need to configure both database and table + database = opengauss_cdc + table = inventory.sink_opengauss_cdc_table_1 + primary_keys = ["id"] + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_with_multi_table_mode_one_table.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_with_multi_table_mode_one_table.conf new file mode 100644 index 00000000000..f75d7dc97d8 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_with_multi_table_mode_one_table.conf @@ -0,0 +1,63 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set engine configuration here + execution.parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 + read_limit.bytes_per_second=7000000 + read_limit.rows_per_second=400 +} + +source { + Opengauss-CDC { + result_table_name = "customers_opengauss_cdc" + username = "gaussdb" + password = "openGauss@123" + database-names = ["opengauss_cdc"] + schema-names = ["inventory"] + table-names = ["opengauss_cdc.inventory.opengauss_cdc_table_1"] + base-url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF" + decoding.plugin.name = "pgoutput" + } +} + +transform { + +} + +sink { + jdbc { + source_table_name = "customers_opengauss_cdc" + url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF" + driver = "org.postgresql.Driver" + user = "dailai" + password = "openGauss@123" + + compatible_mode="postgresLow" + generate_sink_sql = true + # You need to configure both database and table + database = "opengauss_cdc" + schema = "inventory" + tablePrefix = "sink_" + primary_keys = ["id"] + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_with_multi_table_mode_two_table.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_with_multi_table_mode_two_table.conf new file mode 100644 index 00000000000..d5ac1000012 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_with_multi_table_mode_two_table.conf @@ -0,0 +1,63 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set engine configuration here + execution.parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 + read_limit.bytes_per_second=7000000 + read_limit.rows_per_second=400 +} + +source { + Opengauss-CDC { + result_table_name = "customers_opengauss_cdc" + username = "gaussdb" + password = "openGauss@123" + database-names = ["opengauss_cdc"] + schema-names = ["inventory"] + table-names = ["opengauss_cdc.inventory.opengauss_cdc_table_1","opengauss_cdc.inventory.opengauss_cdc_table_2"] + base-url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF" + decoding.plugin.name = "pgoutput" + } +} + +transform { + +} + +sink { + jdbc { + source_table_name = "customers_opengauss_cdc" + url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF" + driver = "org.postgresql.Driver" + user = "dailai" + password = "openGauss@123" + + compatible_mode="postgresLow" + generate_sink_sql = true + # You need to configure both database and table + database = "opengauss_cdc" + schema = "inventory" + tablePrefix = "sink_" + primary_keys = ["id"] + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_with_no_primary_key.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_with_no_primary_key.conf new file mode 100644 index 00000000000..5decfb10606 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-cdc-opengauss-e2e/src/test/resources/opengausscdc_to_opengauss_with_no_primary_key.conf @@ -0,0 +1,63 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set engine configuration here + execution.parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 + read_limit.bytes_per_second=7000000 + read_limit.rows_per_second=400 +} + +source { + Opengauss-CDC { + result_table_name = "customers_opengauss_cdc" + username = "gaussdb" + password = "openGauss@123" + database-names = ["opengauss_cdc"] + schema-names = ["inventory"] + base-url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF" + decoding.plugin.name = "pgoutput" + table-names = ["opengauss_cdc.inventory.full_types_no_primary_key"] + exactly_once = false + } +} + +transform { + +} + +sink { + jdbc { + source_table_name = "customers_opengauss_cdc" + url = "jdbc:postgresql://opengauss_cdc_e2e:5432/opengauss_cdc?loggerLevel=OFF" + driver = "org.postgresql.Driver" + user = "dailai" + password = "openGauss@123" + + compatible_mode="postgresLow" + generate_sink_sql = true + # You need to configure both database and table + database = opengauss_cdc + table = inventory.sink_opengauss_cdc_table_1 + primary_keys = ["id"] + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java index 8b63ee5bf9c..19f7f118f28 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcOracleIT.java @@ -25,11 +25,15 @@ import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.oracle.OracleDialect; import org.apache.seatunnel.connectors.seatunnel.jdbc.source.JdbcSourceTable; +import org.apache.seatunnel.e2e.common.container.TestContainer; import org.apache.commons.lang3.tuple.Pair; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestTemplate; +import org.testcontainers.containers.Container; import org.testcontainers.containers.GenericContainer; import org.testcontainers.containers.OracleContainer; import org.testcontainers.containers.output.Slf4jLogConsumer; @@ -77,6 +81,9 @@ public class JdbcOracleIT extends AbstractJdbcIT { + " VARCHAR_10_COL varchar2(10),\n" + " CHAR_10_COL char(10),\n" + " CLOB_COL clob,\n" + + " NUMBER_1 number(1),\n" + + " NUMBER_6 number(6),\n" + + " NUMBER_10 number(10),\n" + " NUMBER_3_SF_2_DP number(3, 2),\n" + " NUMBER_7_SF_N2_DP number(7, -2),\n" + " INTEGER_COL integer,\n" @@ -97,6 +104,9 @@ public class JdbcOracleIT extends AbstractJdbcIT { + " VARCHAR_10_COL varchar2(10),\n" + " CHAR_10_COL char(10),\n" + " CLOB_COL clob,\n" + + " NUMBER_1 number(1),\n" + + " NUMBER_6 number(6),\n" + + " NUMBER_10 number(10),\n" + " NUMBER_3_SF_2_DP number(3, 2),\n" + " NUMBER_7_SF_N2_DP number(7, -2),\n" + " INTEGER_COL integer,\n" @@ -115,6 +125,9 @@ public class JdbcOracleIT extends AbstractJdbcIT { "VARCHAR_10_COL", "CHAR_10_COL", "CLOB_COL", + "NUMBER_1", + "NUMBER_6", + "NUMBER_10", "NUMBER_3_SF_2_DP", "NUMBER_7_SF_N2_DP", "INTEGER_COL", @@ -148,6 +161,14 @@ public void testSampleDataFromColumnSuccess() throws Exception { dialect.sampleDataFromColumn(connection, table, "INTEGER_COL", 1, 1024); } + @TestTemplate + public void testOracleWithoutDecimalTypeNarrowing(TestContainer container) throws Exception { + Container.ExecResult execResult = + container.executeJob( + "/jdbc_oracle_source_to_sink_without_decimal_type_narrowing.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + } + @Override JdbcCase getJdbcCase() { Map containerEnv = new HashMap<>(); @@ -209,6 +230,9 @@ Pair> initTestData() { String.format("f%s", i), String.format("f%s", i), String.format("f%s", i), + 1, + i * 10, + i * 1000, BigDecimal.valueOf(1.1), BigDecimal.valueOf(2400), i, diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink.conf index d956894c340..4df8c7b9934 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink.conf @@ -30,7 +30,7 @@ source { url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" user = testUser password = testPassword - query = "SELECT VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL FROM E2E_TABLE_SOURCE" + query = "SELECT VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_1,NUMBER_6,NUMBER_10,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL FROM E2E_TABLE_SOURCE" properties { database.oracle.jdbc.timezoneAsRegion = "false" } @@ -46,7 +46,7 @@ sink { url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" user = testUser password = testPassword - query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)" + query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_1,NUMBER_6,NUMBER_10,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" properties { database.oracle.jdbc.timezoneAsRegion = "false" } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select1.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select1.conf index 8a0c8310443..1988b488721 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select1.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select1.conf @@ -31,7 +31,7 @@ source { user = testUser password = testPassword use_select_count = true - query = "SELECT VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL FROM E2E_TABLE_SOURCE" + query = "SELECT VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_1,NUMBER_6,NUMBER_10,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL FROM E2E_TABLE_SOURCE" properties { database.oracle.jdbc.timezoneAsRegion = "false" } @@ -47,7 +47,7 @@ sink { url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" user = testUser password = testPassword - query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)" + query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_1,NUMBER_6,NUMBER_10,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" properties { database.oracle.jdbc.timezoneAsRegion = "false" } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select2.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select2.conf index ebebdb55051..4d01da5c72a 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select2.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select2.conf @@ -47,7 +47,7 @@ sink { url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" user = testUser password = testPassword - query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)" + query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_1,NUMBER_6,NUMBER_10,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" properties { database.oracle.jdbc.timezoneAsRegion = "false" } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select3.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select3.conf index d00ce9b6434..94a850fdd01 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select3.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_use_select3.conf @@ -48,7 +48,7 @@ sink { url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" user = testUser password = testPassword - query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)" + query = "INSERT INTO E2E_TABLE_SINK (VARCHAR_10_COL,CHAR_10_COL,CLOB_COL,NUMBER_1,NUMBER_6,NUMBER_10,NUMBER_3_SF_2_DP,NUMBER_7_SF_N2_DP,INTEGER_COL,FLOAT_COL,REAL_COL,BINARY_FLOAT_COL,BINARY_DOUBLE_COL,DATE_COL,TIMESTAMP_WITH_3_FRAC_SEC_COL,TIMESTAMP_WITH_LOCAL_TZ,XML_TYPE_COL) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)" properties { database.oracle.jdbc.timezoneAsRegion = "false" } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_without_decimal_type_narrowing.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_without_decimal_type_narrowing.conf new file mode 100644 index 00000000000..58e98f5deff --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_oracle_source_to_sink_without_decimal_type_narrowing.conf @@ -0,0 +1,82 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" +} + +source { + Jdbc { + driver = oracle.jdbc.driver.OracleDriver + url = "jdbc:oracle:thin:@e2e_oracleDb:1521/TESTUSER" + user = testUser + password = testPassword + decimal_type_narrowing = false + query = "SELECT NUMBER_1,NUMBER_6,NUMBER_10 FROM E2E_TABLE_SOURCE" + properties { + database.oracle.jdbc.timezoneAsRegion = "false" + } + } +} + +sink { + Assert { + rules { + row_rules = [ + { + rule_type = MAX_ROW + rule_value = 20000 + }, + { + rule_type = MIN_ROW + rule_value = 20000 + } + ], + field_rules = [ + { + field_name = NUMBER_1 + field_type = "decimal(1, 0)" + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = NUMBER_6 + field_type = "decimal(6, 0)" + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = NUMBER_10 + field_type = "decimal(10, 0)" + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } + +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java index 6e67aa021d1..ffc97f4dd33 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java @@ -19,9 +19,15 @@ import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.seatunnel.shade.com.fasterxml.jackson.databind.node.ObjectNode; +import org.apache.seatunnel.shade.com.typesafe.config.Config; +import org.apache.seatunnel.shade.com.typesafe.config.ConfigFactory; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.table.catalog.CatalogTable; import org.apache.seatunnel.api.table.catalog.CatalogTableUtil; +import org.apache.seatunnel.api.table.catalog.PhysicalColumn; +import org.apache.seatunnel.api.table.catalog.TableIdentifier; +import org.apache.seatunnel.api.table.catalog.TableSchema; import org.apache.seatunnel.api.table.type.ArrayType; import org.apache.seatunnel.api.table.type.BasicType; import org.apache.seatunnel.api.table.type.DecimalType; @@ -40,6 +46,7 @@ import org.apache.seatunnel.e2e.common.container.TestContainerId; import org.apache.seatunnel.e2e.common.junit.DisabledOnContainer; import org.apache.seatunnel.format.avro.AvroDeserializationSchema; +import org.apache.seatunnel.format.protobuf.ProtobufDeserializationSchema; import org.apache.seatunnel.format.text.TextSerializationSchema; import org.apache.kafka.clients.admin.AdminClient; @@ -71,8 +78,13 @@ import lombok.extern.slf4j.Slf4j; +import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.math.BigDecimal; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Paths; import java.time.Duration; import java.time.LocalDate; import java.time.LocalDateTime; @@ -85,6 +97,7 @@ import java.util.Properties; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; +import java.util.stream.IntStream; import java.util.stream.Stream; @Slf4j @@ -126,7 +139,8 @@ public void startUp() throws Exception { "test_topic_source", SEATUNNEL_ROW_TYPE, DEFAULT_FORMAT, - DEFAULT_FIELD_DELIMITER); + DEFAULT_FIELD_DELIMITER, + null); generateTestData(serializer::serializeRow, 0, 100); } @@ -259,7 +273,8 @@ public void testSourceKafkaJsonToConsole(TestContainer container) "test_topic_json", SEATUNNEL_ROW_TYPE, DEFAULT_FORMAT, - DEFAULT_FIELD_DELIMITER); + DEFAULT_FIELD_DELIMITER, + null); generateTestData(row -> serializer.serializeRow(row), 0, 100); Container.ExecResult execResult = container.executeJob("/jsonFormatIT/kafka_source_json_to_console.conf"); @@ -274,7 +289,8 @@ public void testSourceKafkaJsonFormatErrorHandleWaySkipToConsole(TestContainer c "test_topic_error_message", SEATUNNEL_ROW_TYPE, DEFAULT_FORMAT, - DEFAULT_FIELD_DELIMITER); + DEFAULT_FIELD_DELIMITER, + null); generateTestData(row -> serializer.serializeRow(row), 0, 100); Container.ExecResult execResult = container.executeJob( @@ -290,7 +306,8 @@ public void testSourceKafkaJsonFormatErrorHandleWayFailToConsole(TestContainer c "test_topic_error_message", SEATUNNEL_ROW_TYPE, DEFAULT_FORMAT, - DEFAULT_FIELD_DELIMITER); + DEFAULT_FIELD_DELIMITER, + null); generateTestData(row -> serializer.serializeRow(row), 0, 100); Container.ExecResult execResult = container.executeJob( @@ -314,7 +331,8 @@ public void testSourceKafkaStartConfig(TestContainer container) "test_topic_group", SEATUNNEL_ROW_TYPE, DEFAULT_FORMAT, - DEFAULT_FIELD_DELIMITER); + DEFAULT_FIELD_DELIMITER, + null); generateTestData(row -> serializer.serializeRow(row), 100, 150); testKafkaGroupOffsetsToConsole(container); } @@ -331,7 +349,8 @@ public void testSourceKafkaStartConfigWithCommitOffset(TestContainer container) "test_topic_group_with_commit_offset", SEATUNNEL_ROW_TYPE, DEFAULT_FORMAT, - DEFAULT_FIELD_DELIMITER); + DEFAULT_FIELD_DELIMITER, + null); generateTestData(row -> serializer.serializeRow(row), 0, 100); testKafkaGroupOffsetsToConsoleWithCommitOffset(container); } @@ -456,7 +475,8 @@ public void testKafkaAvroToAssert(TestContainer container) "test_avro_topic", SEATUNNEL_ROW_TYPE, MessageFormat.AVRO, - DEFAULT_FIELD_DELIMITER); + DEFAULT_FIELD_DELIMITER, + null); int start = 0; int end = 100; generateTestData(row -> serializer.serializeRow(row), start, end); @@ -504,6 +524,260 @@ public void testKafkaAvroToAssert(TestContainer container) }); } + @TestTemplate + public void testFakeSourceToKafkaProtobufFormat(TestContainer container) + throws IOException, InterruptedException, URISyntaxException { + + // Execute the job and verify the exit code + Container.ExecResult execResult = + container.executeJob("/protobuf/fake_to_kafka_protobuf.conf"); + Assertions.assertEquals(0, execResult.getExitCode(), execResult.getStderr()); + + // Define the SeaTunnelRowType for the address field + SeaTunnelRowType addressType = + new SeaTunnelRowType( + new String[] {"city", "state", "street"}, + new SeaTunnelDataType[] { + BasicType.STRING_TYPE, BasicType.STRING_TYPE, BasicType.STRING_TYPE + }); + + // Define the SeaTunnelRowType for the main schema + SeaTunnelRowType seaTunnelRowType = + new SeaTunnelRowType( + new String[] { + "c_int32", + "c_int64", + "c_float", + "c_double", + "c_bool", + "c_string", + "c_bytes", + "Address", + "attributes", + "phone_numbers" + }, + new SeaTunnelDataType[] { + BasicType.INT_TYPE, + BasicType.LONG_TYPE, + BasicType.FLOAT_TYPE, + BasicType.DOUBLE_TYPE, + BasicType.BOOLEAN_TYPE, + BasicType.STRING_TYPE, + PrimitiveByteArrayType.INSTANCE, + addressType, + new MapType<>(BasicType.STRING_TYPE, BasicType.FLOAT_TYPE), + ArrayType.STRING_ARRAY_TYPE + }); + + // Parse the configuration file + String path = getTestConfigFile("/protobuf/fake_to_kafka_protobuf.conf"); + Config config = ConfigFactory.parseFile(new File(path)); + Config sinkConfig = config.getConfigList("sink").get(0); + + // Prepare the schema properties + Map schemaProperties = new HashMap<>(); + schemaProperties.put( + "protobuf_message_name", sinkConfig.getString("protobuf_message_name")); + schemaProperties.put("protobuf_schema", sinkConfig.getString("protobuf_schema")); + + // Build the table schema based on SeaTunnelRowType + TableSchema schema = + TableSchema.builder() + .columns( + Arrays.asList( + IntStream.range(0, seaTunnelRowType.getTotalFields()) + .mapToObj( + i -> + PhysicalColumn.of( + seaTunnelRowType + .getFieldName(i), + seaTunnelRowType + .getFieldType(i), + 0, + true, + null, + null)) + .toArray(PhysicalColumn[]::new))) + .build(); + + // Create the catalog table + CatalogTable catalogTable = + CatalogTable.of( + TableIdentifier.of("", "", "", "test"), + schema, + schemaProperties, + Collections.emptyList(), + "It is converted from RowType and only has column information."); + + // Initialize the Protobuf deserialization schema + ProtobufDeserializationSchema deserializationSchema = + new ProtobufDeserializationSchema(catalogTable); + + // Retrieve and verify Kafka rows + List kafkaRows = + getKafkaSTRow( + "test_protobuf_topic_fake_source", + value -> { + try { + return deserializationSchema.deserialize(value); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + Assertions.assertEquals(16, kafkaRows.size()); + + // Validate the contents of each row + kafkaRows.forEach( + row -> { + Assertions.assertInstanceOf(Integer.class, row.getField(0)); + Assertions.assertInstanceOf(Long.class, row.getField(1)); + Assertions.assertInstanceOf(Float.class, row.getField(2)); + Assertions.assertInstanceOf(Double.class, row.getField(3)); + Assertions.assertInstanceOf(Boolean.class, row.getField(4)); + Assertions.assertInstanceOf(String.class, row.getField(5)); + Assertions.assertInstanceOf(byte[].class, row.getField(6)); + Assertions.assertInstanceOf(SeaTunnelRow.class, row.getField(7)); + Assertions.assertInstanceOf(Map.class, row.getField(8)); + Assertions.assertInstanceOf(String[].class, row.getField(9)); + }); + } + + @TestTemplate + public void testKafkaProtobufToAssert(TestContainer container) + throws IOException, InterruptedException, URISyntaxException { + + String confFile = "/protobuf/kafka_protobuf_to_assert.conf"; + String path = getTestConfigFile(confFile); + Config config = ConfigFactory.parseFile(new File(path)); + Config sinkConfig = config.getConfigList("source").get(0); + ReadonlyConfig readonlyConfig = ReadonlyConfig.fromConfig(sinkConfig); + SeaTunnelRowType seaTunnelRowType = buildSeaTunnelRowType(); + + // Prepare schema properties + Map schemaProperties = new HashMap<>(); + schemaProperties.put( + "protobuf_message_name", sinkConfig.getString("protobuf_message_name")); + schemaProperties.put("protobuf_schema", sinkConfig.getString("protobuf_schema")); + + // Build the table schema + TableSchema schema = + TableSchema.builder() + .columns( + Arrays.asList( + IntStream.range(0, seaTunnelRowType.getTotalFields()) + .mapToObj( + i -> + PhysicalColumn.of( + seaTunnelRowType + .getFieldName(i), + seaTunnelRowType + .getFieldType(i), + 0, + true, + null, + null)) + .toArray(PhysicalColumn[]::new))) + .build(); + + // Create catalog table + CatalogTable catalogTable = + CatalogTable.of( + TableIdentifier.of("", "", "", "test"), + schema, + schemaProperties, + Collections.emptyList(), + "It is converted from RowType and only has column information."); + + // Initialize the Protobuf deserialization schema + ProtobufDeserializationSchema deserializationSchema = + new ProtobufDeserializationSchema(catalogTable); + + // Create serializer + DefaultSeaTunnelRowSerializer serializer = + DefaultSeaTunnelRowSerializer.create( + "test_protobuf_topic_fake_source", + seaTunnelRowType, + MessageFormat.PROTOBUF, + DEFAULT_FIELD_DELIMITER, + readonlyConfig); + + // Produce records to Kafka + IntStream.range(0, 20) + .forEach( + i -> { + try { + SeaTunnelRow originalRow = buildSeaTunnelRow(); + ProducerRecord producerRecord = + serializer.serializeRow(originalRow); + producer.send(producerRecord).get(); + } catch (InterruptedException | ExecutionException e) { + throw new RuntimeException("Error sending Kafka message", e); + } + }); + + producer.flush(); + + // Execute the job and validate + Container.ExecResult execResult = container.executeJob(confFile); + Assertions.assertEquals(0, execResult.getExitCode(), execResult.getStderr()); + + // Retrieve and verify Kafka rows + List kafkaSTRow = + getKafkaSTRow( + "test_protobuf_topic_fake_source", + value -> { + try { + return deserializationSchema.deserialize(value); + } catch (IOException e) { + throw new RuntimeException("Error deserializing Kafka message", e); + } + }); + + // Prepare expected values for assertions + SeaTunnelRow expectedAddress = new SeaTunnelRow(3); + expectedAddress.setField(0, "city_value"); + expectedAddress.setField(1, "state_value"); + expectedAddress.setField(2, "street_value"); + + Map expectedAttributesMap = new HashMap<>(); + expectedAttributesMap.put("k1", 0.1F); + expectedAttributesMap.put("k2", 2.3F); + + String[] expectedPhoneNumbers = {"1", "2"}; + + // Assertions + Assertions.assertEquals(20, kafkaSTRow.size()); + kafkaSTRow.forEach( + row -> { + Assertions.assertAll( + "Verify row fields", + () -> Assertions.assertEquals(123, (int) row.getField(0)), + () -> Assertions.assertEquals(123123123123L, (long) row.getField(1)), + () -> Assertions.assertEquals(0.123f, (float) row.getField(2)), + () -> Assertions.assertEquals(0.123d, (double) row.getField(3)), + () -> Assertions.assertFalse((boolean) row.getField(4)), + () -> Assertions.assertEquals("test data", row.getField(5).toString()), + () -> + Assertions.assertArrayEquals( + new byte[] {1, 2, 3}, (byte[]) row.getField(6)), + () -> Assertions.assertEquals(expectedAddress, row.getField(7)), + () -> Assertions.assertEquals(expectedAttributesMap, row.getField(8)), + () -> + Assertions.assertArrayEquals( + expectedPhoneNumbers, (String[]) row.getField(9))); + }); + } + + public static String getTestConfigFile(String configFile) + throws FileNotFoundException, URISyntaxException { + URL resource = KafkaIT.class.getResource(configFile); + if (resource == null) { + throw new FileNotFoundException("Can't find config file: " + configFile); + } + return Paths.get(resource.toURI()).toString(); + } + public void testKafkaLatestToConsole(TestContainer container) throws IOException, InterruptedException { Container.ExecResult execResult = @@ -759,4 +1033,68 @@ interface ProducerRecordConverter { interface ConsumerRecordConverter { SeaTunnelRow convert(byte[] value); } + + private SeaTunnelRow buildSeaTunnelRow() { + SeaTunnelRow seaTunnelRow = new SeaTunnelRow(10); + + Map attributesMap = new HashMap<>(); + attributesMap.put("k1", 0.1F); + attributesMap.put("k2", 2.3F); + + String[] phoneNumbers = {"1", "2"}; + byte[] byteVal = {1, 2, 3}; + + SeaTunnelRow address = new SeaTunnelRow(3); + address.setField(0, "city_value"); + address.setField(1, "state_value"); + address.setField(2, "street_value"); + + seaTunnelRow.setField(0, 123); + seaTunnelRow.setField(1, 123123123123L); + seaTunnelRow.setField(2, 0.123f); + seaTunnelRow.setField(3, 0.123d); + seaTunnelRow.setField(4, false); + seaTunnelRow.setField(5, "test data"); + seaTunnelRow.setField(6, byteVal); + seaTunnelRow.setField(7, address); + seaTunnelRow.setField(8, attributesMap); + seaTunnelRow.setField(9, phoneNumbers); + + return seaTunnelRow; + } + + private SeaTunnelRowType buildSeaTunnelRowType() { + SeaTunnelRowType addressType = + new SeaTunnelRowType( + new String[] {"city", "state", "street"}, + new SeaTunnelDataType[] { + BasicType.STRING_TYPE, BasicType.STRING_TYPE, BasicType.STRING_TYPE + }); + + return new SeaTunnelRowType( + new String[] { + "c_int32", + "c_int64", + "c_float", + "c_double", + "c_bool", + "c_string", + "c_bytes", + "Address", + "attributes", + "phone_numbers" + }, + new SeaTunnelDataType[] { + BasicType.INT_TYPE, + BasicType.LONG_TYPE, + BasicType.FLOAT_TYPE, + BasicType.DOUBLE_TYPE, + BasicType.BOOLEAN_TYPE, + BasicType.STRING_TYPE, + PrimitiveByteArrayType.INSTANCE, + addressType, + new MapType<>(BasicType.STRING_TYPE, BasicType.FLOAT_TYPE), + ArrayType.STRING_ARRAY_TYPE + }); + } } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/protobuf/fake_to_kafka_protobuf.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/protobuf/fake_to_kafka_protobuf.conf new file mode 100644 index 00000000000..29a9985629b --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/protobuf/fake_to_kafka_protobuf.conf @@ -0,0 +1,100 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" + + # spark config + spark.executor.instances = 1 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local + +} +source { + FakeSource { + parallelism = 1 + result_table_name = "fake" + row.num = 16 + schema = { + fields { + c_int32 = int + c_int64 = long + c_float = float + c_double = double + c_bool = boolean + c_string = string + c_bytes = bytes + + Address { + city = string + state = string + street = string + } + attributes = "map" + phone_numbers = "array" + } + } + } +} + +sink { + kafka { + topic = "test_protobuf_topic_fake_source" + bootstrap.servers = "kafkaCluster:9092" + format = protobuf + kafka.request.timeout.ms = 60000 +# semantics = EXACTLY_ONCE + kafka.config = { + acks = "all" + request.timeout.ms = 60000 + buffer.memory = 33554432 + } + protobuf_message_name = Person + protobuf_schema = """ + syntax = "proto3"; + + package org.apache.seatunnel.format.protobuf; + + option java_outer_classname = "ProtobufE2E"; + + message Person { + int32 c_int32 = 1; + int64 c_int64 = 2; + float c_float = 3; + double c_double = 4; + bool c_bool = 5; + string c_string = 6; + bytes c_bytes = 7; + + message Address { + string street = 1; + string city = 2; + string state = 3; + string zip = 4; + } + + Address address = 8; + + map attributes = 9; + + repeated string phone_numbers = 10; + } + """ + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/protobuf/kafka_protobuf_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/protobuf/kafka_protobuf_to_assert.conf new file mode 100644 index 00000000000..3375cd11d77 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/protobuf/kafka_protobuf_to_assert.conf @@ -0,0 +1,177 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +env { + parallelism = 1 + job.mode = "BATCH" + spark.app.name = "SeaTunnel" + spark.executor.instances = 1 + spark.executor.cores = 1 + spark.executor.memory = "1g" + spark.master = local +} + +source { + Kafka { + topic = "test_protobuf_topic_fake_source" + format = protobuf + protobuf_message_name = Person + protobuf_schema = """ + syntax = "proto3"; + + package org.apache.seatunnel.format.protobuf; + + option java_outer_classname = "ProtobufE2E"; + + message Person { + int32 c_int32 = 1; + int64 c_int64 = 2; + float c_float = 3; + double c_double = 4; + bool c_bool = 5; + string c_string = 6; + bytes c_bytes = 7; + + message Address { + string street = 1; + string city = 2; + string state = 3; + string zip = 4; + } + + Address address = 8; + + map attributes = 9; + + repeated string phone_numbers = 10; + } + """ + schema = { + fields { + c_int32 = int + c_int64 = long + c_float = float + c_double = double + c_bool = boolean + c_string = string + c_bytes = bytes + + Address { + city = string + state = string + street = string + } + attributes = "map" + phone_numbers = "array" + } + } + bootstrap.servers = "kafkaCluster:9092" + start_mode = "earliest" + result_table_name = "kafka_table" + } +} + +sink { + Assert { + source_table_name = "kafka_table" + rules = { + field_rules = [ + { + field_name = c_int32 + field_type = int + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_int64 + field_type = long + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_float + field_type = float + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_double + field_type = double + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_bool + field_type = boolean + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_string + field_type = string + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = c_bytes + field_type = bytes + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = attributes + field_type = "map" + field_value = [ + { + rule_type = NOT_NULL + } + ] + }, + { + field_name = phone_numbers + field_type = array + field_value = [ + { + rule_type = NOT_NULL + } + ] + } + ] + } + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml index 4933ab02057..db52e440050 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/pom.xml @@ -78,6 +78,7 @@ connector-activemq-e2e connector-sls-e2e connector-email-e2e + connector-cdc-opengauss-e2e diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-console-seatunnel-e2e/src/test/java/org/apache/seatunnel/engine/e2e/console/FakeSourceToConsoleWithEventReportIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-console-seatunnel-e2e/src/test/java/org/apache/seatunnel/engine/e2e/console/FakeSourceToConsoleWithEventReportIT.java index 8389cb3c058..8e45bbf9de5 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-console-seatunnel-e2e/src/test/java/org/apache/seatunnel/engine/e2e/console/FakeSourceToConsoleWithEventReportIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-console-seatunnel-e2e/src/test/java/org/apache/seatunnel/engine/e2e/console/FakeSourceToConsoleWithEventReportIT.java @@ -109,7 +109,7 @@ public void testEventReport() throws IOException, InterruptedException { arrayNode.elements().forEachRemaining(jsonNode -> events.add(jsonNode)); } } - Assertions.assertEquals(8, events.size()); + Assertions.assertEquals(10, events.size()); Set eventTypes = events.stream().map(e -> e.get("eventType").asText()).collect(Collectors.toSet()); Assertions.assertTrue( diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceIT.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceIT.java index ac29b4bf355..f48ca3f1817 100644 --- a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceIT.java +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/ClusterFaultToleranceIT.java @@ -992,7 +992,6 @@ public void testStreamJobRestoreFromOssInAllNodeDown() throws Exception { + " fs.oss.endpoint: " + OSS_ENDPOINT + "\n" - + " fs.oss.credentials.provider: org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider\n" + " properties:\n" + " hazelcast.invocation.max.retry.count: 200\n" + " hazelcast.tcp.join.port.try.count: 30\n" diff --git a/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/resourceIsolation/WorkerTagClusterTest.java b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/resourceIsolation/WorkerTagClusterTest.java new file mode 100644 index 00000000000..63736a90ae5 --- /dev/null +++ b/seatunnel-e2e/seatunnel-engine-e2e/connector-seatunnel-e2e-base/src/test/java/org/apache/seatunnel/engine/e2e/resourceIsolation/WorkerTagClusterTest.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.engine.e2e.resourceIsolation; + +import org.apache.seatunnel.engine.common.config.ConfigProvider; +import org.apache.seatunnel.engine.common.config.SeaTunnelConfig; +import org.apache.seatunnel.engine.e2e.TestUtils; +import org.apache.seatunnel.engine.server.SeaTunnelServer; +import org.apache.seatunnel.engine.server.SeaTunnelServerStarter; +import org.apache.seatunnel.engine.server.resourcemanager.ResourceManager; + +import org.awaitility.Awaitility; +import org.awaitility.core.ThrowingRunnable; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import com.hazelcast.config.Config; +import com.hazelcast.instance.impl.HazelcastInstanceImpl; +import com.hazelcast.spi.impl.NodeEngineImpl; +import lombok.extern.slf4j.Slf4j; + +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +@Slf4j +public class WorkerTagClusterTest { + + HazelcastInstanceImpl masterNode1 = null; + HazelcastInstanceImpl workerNode1 = null; + String testClusterName = "WorkerTagClusterTest"; + + @BeforeEach + public void before() { + SeaTunnelConfig masterNode1Config = getSeaTunnelConfig(testClusterName); + SeaTunnelConfig workerNode1Config = getSeaTunnelConfig(testClusterName); + masterNode1 = SeaTunnelServerStarter.createMasterHazelcastInstance(masterNode1Config); + workerNode1 = SeaTunnelServerStarter.createWorkerHazelcastInstance(workerNode1Config); + } + + @AfterEach + void afterClass() { + if (masterNode1 != null) { + masterNode1.shutdown(); + } + if (workerNode1 != null) { + workerNode1.shutdown(); + } + } + + @Test + public void testTagMatch() throws Exception { + Map tag = new HashMap<>(); + tag.put("group", "platform"); + tag.put("team", "team1"); + testTagFilter(tag, 1); + } + + @Test + public void testTagMatch2() throws Exception { + testTagFilter(null, 1); + } + + @Test + public void testTagNotMatch() throws Exception { + Map tag = new HashMap<>(); + tag.put("group", "platform"); + tag.put("team", "team1111111"); + testTagFilter(tag, 0); + } + + @Test + public void testTagNotMatch2() throws Exception { + testTagFilter(new HashMap<>(), 1); + } + + public void testTagFilter(Map tagFilter, int expectedWorkerCount) + throws Exception { + // waiting all node added to cluster + Awaitility.await() + .atMost(10000, TimeUnit.MILLISECONDS) + .untilAsserted( + new ThrowingRunnable() { + @Override + public void run() throws Throwable { + Thread.sleep(2000); + // check master and worker node + Assertions.assertEquals( + 2, masterNode1.getCluster().getMembers().size()); + NodeEngineImpl nodeEngine = masterNode1.node.nodeEngine; + SeaTunnelServer server = + nodeEngine.getService(SeaTunnelServer.SERVICE_NAME); + ResourceManager resourceManager = + server.getCoordinatorService().getResourceManager(); + // if tag matched, then worker count is 1 else 0 + int workerCount = resourceManager.workerCount(tagFilter); + Assertions.assertEquals(expectedWorkerCount, workerCount); + } + }); + } + + private static SeaTunnelConfig getSeaTunnelConfig(String testClusterName) { + Config hazelcastConfig = Config.loadFromString(getHazelcastConfig()); + hazelcastConfig.setClusterName(TestUtils.getClusterName(testClusterName)); + SeaTunnelConfig seaTunnelConfig = ConfigProvider.locateAndGetSeaTunnelConfig(); + seaTunnelConfig.setHazelcastConfig(hazelcastConfig); + return seaTunnelConfig; + } + + protected static String getHazelcastConfig() { + return "hazelcast:\n" + + " cluster-name: seatunnel\n" + + " network:\n" + + " rest-api:\n" + + " enabled: true\n" + + " endpoint-groups:\n" + + " CLUSTER_WRITE:\n" + + " enabled: true\n" + + " join:\n" + + " tcp-ip:\n" + + " enabled: true\n" + + " member-list:\n" + + " - localhost\n" + + " port:\n" + + " auto-increment: true\n" + + " port-count: 100\n" + + " port: 5801\n" + + "\n" + + " properties:\n" + + " hazelcast.invocation.max.retry.count: 200\n" + + " hazelcast.tcp.join.port.try.count: 30\n" + + " hazelcast.invocation.retry.pause.millis: 2000\n" + + " hazelcast.slow.operation.detector.stacktrace.logging.enabled: true\n" + + " hazelcast.logging.type: log4j2\n" + + " hazelcast.operation.generic.thread.count: 200\n" + + " member-attributes:\n" + + " group:\n" + + " type: string\n" + + " value: platform\n" + + " team:\n" + + " type: string\n" + + " value: team1"; + } +} diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/worker/WorkerProfile.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/worker/WorkerProfile.java index 0d0f8c8054b..f357a690da9 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/worker/WorkerProfile.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/resourcemanager/worker/WorkerProfile.java @@ -85,6 +85,7 @@ public void writeData(ObjectDataOutput out) throws IOException { out.writeObject(unassignedSlot); } out.writeBoolean(dynamicSlot); + out.writeObject(attributes); } @Override @@ -103,5 +104,6 @@ public void readData(ObjectDataInput in) throws IOException { unassignedSlots[i] = in.readObject(); } dynamicSlot = in.readBoolean(); + attributes = in.readObject(); } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSplitEnumeratorTask.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSplitEnumeratorTask.java index d1fc333ade2..8004068ce68 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSplitEnumeratorTask.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/SourceSplitEnumeratorTask.java @@ -22,6 +22,8 @@ import org.apache.seatunnel.api.source.SourceEvent; import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.api.source.event.EnumeratorCloseEvent; +import org.apache.seatunnel.api.source.event.EnumeratorOpenEvent; import org.apache.seatunnel.engine.core.dag.actions.SourceAction; import org.apache.seatunnel.engine.core.job.ConnectorJarIdentifier; import org.apache.seatunnel.engine.server.checkpoint.ActionStateKey; @@ -121,6 +123,7 @@ public void close() throws IOException { super.close(); if (enumerator != null) { enumerator.close(); + enumeratorContext.getEventListener().onEvent(new EnumeratorCloseEvent()); } progress.done(); } @@ -309,6 +312,7 @@ private void stateProcess() throws Exception { if (startCalled && readerRegisterComplete) { currState = STARTING; enumerator.open(); + enumeratorContext.getEventListener().onEvent(new EnumeratorOpenEvent()); } else { Thread.sleep(100); } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java index cacaa75aaef..3234560fe4b 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SinkFlowLifeCycle.java @@ -24,6 +24,7 @@ import org.apache.seatunnel.api.sink.SinkCommitter; import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.sink.SupportResourceShare; +import org.apache.seatunnel.api.sink.event.WriterCloseEvent; import org.apache.seatunnel.api.sink.multitablesink.MultiTableSink; import org.apache.seatunnel.api.table.catalog.TablePath; import org.apache.seatunnel.api.table.event.SchemaChangeEvent; @@ -69,6 +70,7 @@ public class SinkFlowLifeCycle sinkAction; private SinkWriter writer; + private SinkWriter.Context writerContext; private transient Optional> commitInfoSerializer; private transient Optional> writerStateSerializer; @@ -150,6 +152,7 @@ private Address getCommitterTaskAddress() throws ExecutionException, Interrupted public void close() throws IOException { super.close(); writer.close(); + writerContext.getEventListener().onEvent(new WriterCloseEvent()); try { if (resourceManager != null) { resourceManager.close(); @@ -283,19 +286,11 @@ public void restoreState(List actionStateList) throws Except .deserialize(bytes))) .collect(Collectors.toList()); } + this.writerContext = new SinkWriterContext(indexID, metricsContext, eventListener); if (states.isEmpty()) { - this.writer = - sinkAction - .getSink() - .createWriter( - new SinkWriterContext(indexID, metricsContext, eventListener)); + this.writer = sinkAction.getSink().createWriter(writerContext); } else { - this.writer = - sinkAction - .getSink() - .restoreWriter( - new SinkWriterContext(indexID, metricsContext, eventListener), - states); + this.writer = sinkAction.getSink().restoreWriter(writerContext, states); } if (this.writer instanceof SupportResourceShare) { resourceManager = diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SourceFlowLifeCycle.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SourceFlowLifeCycle.java index ca137b3e069..6c596da0c33 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SourceFlowLifeCycle.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/task/flow/SourceFlowLifeCycle.java @@ -23,6 +23,8 @@ import org.apache.seatunnel.api.source.SourceEvent; import org.apache.seatunnel.api.source.SourceReader; import org.apache.seatunnel.api.source.SourceSplit; +import org.apache.seatunnel.api.source.event.ReaderCloseEvent; +import org.apache.seatunnel.api.source.event.ReaderOpenEvent; import org.apache.seatunnel.api.table.type.Record; import org.apache.seatunnel.engine.core.checkpoint.CheckpointType; import org.apache.seatunnel.engine.core.checkpoint.InternalCheckpointListener; @@ -83,6 +85,7 @@ public class SourceFlowLifeCycle extends ActionFl private final MetricsContext metricsContext; private final EventListener eventListener; + private SourceReader.Context context; private final AtomicReference schemaChangePhase = new AtomicReference<>(); @@ -111,21 +114,20 @@ public void setCollector(SeaTunnelSourceCollector collector) { @Override public void init() throws Exception { this.splitSerializer = sourceAction.getSource().getSplitSerializer(); - this.reader = - sourceAction - .getSource() - .createReader( - new SourceReaderContext( - indexID, - sourceAction.getSource().getBoundedness(), - this, - metricsContext, - eventListener)); + this.context = + new SourceReaderContext( + indexID, + sourceAction.getSource().getBoundedness(), + this, + metricsContext, + eventListener); + this.reader = sourceAction.getSource().createReader(context); this.enumeratorTaskAddress = getEnumeratorTaskAddress(); } @Override public void open() throws Exception { + context.getEventListener().onEvent(new ReaderOpenEvent()); reader.open(); register(); } @@ -140,6 +142,7 @@ private Address getEnumeratorTaskAddress() throws ExecutionException, Interrupte @Override public void close() throws IOException { + context.getEventListener().onEvent(new ReaderCloseEvent()); reader.close(); super.close(); } diff --git a/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/main/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/HdfsStorageFactory.java b/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/main/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/HdfsStorageFactory.java index 213f458b9ee..eec10aeabe1 100644 --- a/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/main/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/HdfsStorageFactory.java +++ b/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/main/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/HdfsStorageFactory.java @@ -58,7 +58,6 @@ * fs.oss.accessKeySecret = "your script key" * fs.oss.endpoint = "such as: oss-cn-hangzhou.aliyuncs.com" * oss.bucket= "oss://your bucket" - * fs.oss.credentials.provider = "org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider" * */ @AutoService(CheckpointStorageFactory.class) diff --git a/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/test/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/OssFileCheckpointTest.java b/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/test/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/OssFileCheckpointTest.java index 3d7299c266f..9e6ef2409b9 100644 --- a/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/test/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/OssFileCheckpointTest.java +++ b/seatunnel-engine/seatunnel-engine-storage/checkpoint-storage-plugins/checkpoint-storage-hdfs/src/test/java/org/apache/seatunnel/engine/checkpoint/storage/hdfs/OssFileCheckpointTest.java @@ -40,9 +40,6 @@ public static void setup() throws CheckpointStorageException { config.put("fs.oss.accessKeySecret", "your access key secret"); config.put("fs.oss.endpoint", "oss-cn-hangzhou.aliyuncs.com"); config.put("oss.bucket", "oss://seatunnel-test/"); - config.put( - "fs.oss.credentials.provider", - "org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider"); STORAGE = new HdfsStorage(config); initStorageData(); } diff --git a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-file/src/test/java/org/apache/seatunnel/engine/imap/storage/file/IMapFileOSSStorageTest.java b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-file/src/test/java/org/apache/seatunnel/engine/imap/storage/file/IMapFileOSSStorageTest.java index dd92b65e762..c642670016c 100644 --- a/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-file/src/test/java/org/apache/seatunnel/engine/imap/storage/file/IMapFileOSSStorageTest.java +++ b/seatunnel-engine/seatunnel-engine-storage/imap-storage-plugins/imap-storage-file/src/test/java/org/apache/seatunnel/engine/imap/storage/file/IMapFileOSSStorageTest.java @@ -68,9 +68,6 @@ public class IMapFileOSSStorageTest { CONF.set("fs.oss.accessKeyId", OSS_ACCESS_KEY_ID); CONF.set("fs.oss.accessKeySecret", OSS_ACCESS_KEY_SECRET); CONF.set("fs.oss.impl", "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem"); - CONF.set( - "fs.oss.credentials.provider", - "org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider"); STORAGE = new IMapFileStorage(); Map properties = new HashMap<>(); @@ -81,9 +78,6 @@ public class IMapFileOSSStorageTest { properties.put("fs.oss.accessKeyId", OSS_ACCESS_KEY_ID); properties.put("fs.oss.accessKeySecret", OSS_ACCESS_KEY_SECRET); properties.put("fs.oss.impl", "org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem"); - properties.put( - "fs.oss.credentials.provider", - "org.apache.hadoop.fs.aliyun.oss.AliyunCredentialsProvider"); properties.put(FileConstants.FileInitProperties.BUSINESS_KEY, BUSINESS); properties.put(FileConstants.FileInitProperties.NAMESPACE_KEY, NAMESPACE); properties.put(FileConstants.FileInitProperties.CLUSTER_NAME, CLUSTER_NAME); diff --git a/seatunnel-formats/pom.xml b/seatunnel-formats/pom.xml index a330e9b0e05..c66c6c8eafa 100644 --- a/seatunnel-formats/pom.xml +++ b/seatunnel-formats/pom.xml @@ -32,6 +32,7 @@ seatunnel-format-compatible-debezium-json seatunnel-format-compatible-connect-json seatunnel-format-avro + seatunnel-format-protobuf diff --git a/seatunnel-formats/seatunnel-format-protobuf/pom.xml b/seatunnel-formats/seatunnel-format-protobuf/pom.xml new file mode 100644 index 00000000000..8f8f7030f64 --- /dev/null +++ b/seatunnel-formats/seatunnel-format-protobuf/pom.xml @@ -0,0 +1,59 @@ + + + + 4.0.0 + + org.apache.seatunnel + seatunnel-formats + ${revision} + + + seatunnel-format-protobuf + SeaTunnel : Formats : Protobuf + + + 8 + 8 + UTF-8 + 3.25.3 + 3.11.4 + + + + + org.apache.seatunnel + seatunnel-api + ${project.version} + provided + + + com.google.protobuf + protobuf-java + ${protobuf.version} + + + com.google.protobuf + protobuf-java-util + ${protobuf.version} + + + com.github.os72 + protoc-jar + ${protoc.jar.version} + + + diff --git a/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/CompileDescriptor.java b/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/CompileDescriptor.java new file mode 100644 index 00000000000..17a14bc84f7 --- /dev/null +++ b/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/CompileDescriptor.java @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.format.protobuf; + +import org.apache.seatunnel.common.utils.FileUtils; +import org.apache.seatunnel.format.protobuf.exception.ProtobufFormatErrorCode; +import org.apache.seatunnel.format.protobuf.exception.SeaTunnelProtobufFormatException; + +import com.github.os72.protocjar.Protoc; +import com.google.protobuf.DescriptorProtos; +import com.google.protobuf.Descriptors; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.List; + +public class CompileDescriptor { + + public static Descriptors.Descriptor compileDescriptorTempFile( + String protoContent, String messageName) + throws IOException, InterruptedException, Descriptors.DescriptorValidationException { + // Because Protobuf can only be dynamically parsed through the descriptor file, the file + // needs to be compiled and generated. The following method is used here to solve the + // problem: generate a temporary directory and compile .proto into a descriptor temporary + // file. The temporary file and directory are deleted after the JVM runs. + File tmpDir = createTempDirectory(); + File protoFile = createProtoFile(tmpDir, protoContent); + String targetDescPath = compileProtoToDescriptor(tmpDir, protoFile); + + try (FileInputStream fis = new FileInputStream(targetDescPath)) { + DescriptorProtos.FileDescriptorSet descriptorSet = + DescriptorProtos.FileDescriptorSet.parseFrom(fis); + Descriptors.FileDescriptor[] descriptorsArray = buildFileDescriptors(descriptorSet); + return descriptorsArray[0].findMessageTypeByName(messageName); + } finally { + tmpDir.delete(); + protoFile.delete(); + new File(targetDescPath).delete(); + } + } + + private static File createTempDirectory() throws IOException { + File tmpDir = File.createTempFile("tmp_protobuf_", "_proto"); + tmpDir.delete(); + tmpDir.mkdirs(); + tmpDir.deleteOnExit(); + return tmpDir; + } + + private static File createProtoFile(File tmpDir, String protoContent) throws IOException { + File protoFile = new File(tmpDir, ".proto"); + protoFile.deleteOnExit(); + FileUtils.writeStringToFile(protoFile.getPath(), protoContent); + return protoFile; + } + + private static String compileProtoToDescriptor(File tmpDir, File protoFile) + throws IOException, InterruptedException { + String targetDesc = tmpDir + "/.desc"; + new File(targetDesc).deleteOnExit(); + + int exitCode = + Protoc.runProtoc( + new String[] { + "--proto_path=" + protoFile.getParent(), + "--descriptor_set_out=" + targetDesc, + protoFile.getPath() + }); + + if (exitCode != 0) { + throw new SeaTunnelProtobufFormatException( + ProtobufFormatErrorCode.DESCRIPTOR_CONVERT_FAILED, + "Protoc compile error, exit code: " + exitCode); + } + return targetDesc; + } + + private static Descriptors.FileDescriptor[] buildFileDescriptors( + DescriptorProtos.FileDescriptorSet descriptorSet) + throws Descriptors.DescriptorValidationException { + List fileDescriptors = descriptorSet.getFileList(); + Descriptors.FileDescriptor[] descriptorsArray = + new Descriptors.FileDescriptor[fileDescriptors.size()]; + for (int i = 0; i < fileDescriptors.size(); i++) { + descriptorsArray[i] = + Descriptors.FileDescriptor.buildFrom( + fileDescriptors.get(i), new Descriptors.FileDescriptor[] {}); + } + return descriptorsArray; + } +} diff --git a/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/ProtobufDeserializationSchema.java b/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/ProtobufDeserializationSchema.java new file mode 100644 index 00000000000..34e79bc7ff1 --- /dev/null +++ b/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/ProtobufDeserializationSchema.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.format.protobuf; + +import org.apache.seatunnel.api.serialization.DeserializationSchema; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; + +import com.google.protobuf.Descriptors; +import com.google.protobuf.DynamicMessage; + +import java.io.IOException; +import java.util.Optional; + +public class ProtobufDeserializationSchema implements DeserializationSchema { + private static final long serialVersionUID = -7907358485475741366L; + + private final SeaTunnelRowType rowType; + private final ProtobufToRowConverter converter; + private final CatalogTable catalogTable; + private final String protoContent; + private final String messageName; + + public ProtobufDeserializationSchema(CatalogTable catalogTable) { + this.catalogTable = catalogTable; + this.rowType = catalogTable.getSeaTunnelRowType(); + this.messageName = catalogTable.getOptions().get("protobuf_message_name"); + this.protoContent = catalogTable.getOptions().get("protobuf_schema"); + this.converter = new ProtobufToRowConverter(protoContent, messageName); + } + + @Override + public SeaTunnelRow deserialize(byte[] message) throws IOException { + Descriptors.Descriptor descriptor = this.converter.getDescriptor(); + DynamicMessage dynamicMessage = DynamicMessage.parseFrom(descriptor, message); + SeaTunnelRow seaTunnelRow = this.converter.converter(descriptor, dynamicMessage, rowType); + Optional tablePath = + Optional.ofNullable(catalogTable).map(CatalogTable::getTablePath); + if (tablePath.isPresent()) { + seaTunnelRow.setTableId(tablePath.toString()); + } + return seaTunnelRow; + } + + @Override + public SeaTunnelDataType getProducedType() { + return this.rowType; + } +} diff --git a/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/ProtobufSerializationSchema.java b/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/ProtobufSerializationSchema.java new file mode 100644 index 00000000000..b733c542dc2 --- /dev/null +++ b/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/ProtobufSerializationSchema.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.format.protobuf; + +import org.apache.seatunnel.api.serialization.SerializationSchema; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; + +import com.google.protobuf.Descriptors; + +import java.io.IOException; + +public class ProtobufSerializationSchema implements SerializationSchema { + + private static final long serialVersionUID = 4438784443025715370L; + + private final RowToProtobufConverter converter; + + public ProtobufSerializationSchema( + SeaTunnelRowType rowType, String protobufMessageName, String protobufSchema) { + try { + Descriptors.Descriptor descriptor = + CompileDescriptor.compileDescriptorTempFile( + protobufSchema, protobufMessageName); + this.converter = new RowToProtobufConverter(rowType, descriptor); + } catch (IOException | InterruptedException | Descriptors.DescriptorValidationException e) { + throw new RuntimeException(e); + } + } + + @Override + public byte[] serialize(SeaTunnelRow element) { + return converter.convertRowToGenericRecord(element); + } +} diff --git a/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/ProtobufToRowConverter.java b/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/ProtobufToRowConverter.java new file mode 100644 index 00000000000..3f7d01b3ff6 --- /dev/null +++ b/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/ProtobufToRowConverter.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.format.protobuf; + +import org.apache.seatunnel.api.table.type.ArrayType; +import org.apache.seatunnel.api.table.type.MapType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Descriptors; +import com.google.protobuf.DynamicMessage; + +import java.io.IOException; +import java.io.Serializable; +import java.lang.reflect.Array; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class ProtobufToRowConverter implements Serializable { + private static final long serialVersionUID = 8177020083886379563L; + + private Descriptors.Descriptor descriptor = null; + private String protoContent; + private String messageName; + + public ProtobufToRowConverter(String protoContent, String messageName) { + this.protoContent = protoContent; + this.messageName = messageName; + } + + public Descriptors.Descriptor getDescriptor() { + if (descriptor == null) { + try { + descriptor = createDescriptor(); + } catch (IOException + | Descriptors.DescriptorValidationException + | InterruptedException e) { + throw new RuntimeException(e); + } + } + return descriptor; + } + + private Descriptors.Descriptor createDescriptor() + throws IOException, InterruptedException, Descriptors.DescriptorValidationException { + + return CompileDescriptor.compileDescriptorTempFile(protoContent, messageName); + } + + public SeaTunnelRow converter( + Descriptors.Descriptor descriptor, + DynamicMessage dynamicMessage, + SeaTunnelRowType rowType) { + String[] fieldNames = rowType.getFieldNames(); + Object[] values = new Object[fieldNames.length]; + for (int i = 0; i < fieldNames.length; i++) { + Descriptors.FieldDescriptor fieldByName = descriptor.findFieldByName(fieldNames[i]); + if (fieldByName == null && descriptor.findNestedTypeByName(fieldNames[i]) == null) { + values[i] = null; + } else { + values[i] = + convertField( + descriptor, + dynamicMessage, + rowType.getFieldType(i), + fieldByName == null ? null : dynamicMessage.getField(fieldByName), + fieldNames[i]); + } + } + return new SeaTunnelRow(values); + } + + private Object convertField( + Descriptors.Descriptor descriptor, + DynamicMessage dynamicMessage, + SeaTunnelDataType dataType, + Object val, + String fieldName) { + switch (dataType.getSqlType()) { + case STRING: + return val.toString(); + case BOOLEAN: + case INT: + case BIGINT: + case FLOAT: + case DOUBLE: + case NULL: + case DATE: + case DECIMAL: + case TIMESTAMP: + return val; + case BYTES: + return ((ByteString) val).toByteArray(); + case SMALLINT: + return ((Integer) val).shortValue(); + case TINYINT: + Class typeClass = dataType.getTypeClass(); + if (typeClass == Byte.class) { + Integer integer = (Integer) val; + return integer.byteValue(); + } + return val; + case MAP: + MapType mapType = (MapType) dataType; + Map res = + ((List) val) + .stream() + .collect( + Collectors.toMap( + dm -> + convertField( + descriptor, + dm, + mapType.getKeyType(), + getFieldValue(dm, "key"), + null), + dm -> + convertField( + descriptor, + dm, + mapType.getValueType(), + getFieldValue(dm, "value"), + null))); + + return res; + case ROW: + Descriptors.Descriptor nestedTypeByName = + descriptor.findNestedTypeByName(fieldName); + DynamicMessage s = + (DynamicMessage) + dynamicMessage.getField( + descriptor.findFieldByName(fieldName.toLowerCase())); + return converter(nestedTypeByName, s, (SeaTunnelRowType) dataType); + case ARRAY: + SeaTunnelDataType basicType = ((ArrayType) dataType).getElementType(); + List list = (List) val; + return convertArray(list, basicType); + default: + String errorMsg = + String.format( + "SeaTunnel avro format is not supported for this data type [%s]", + dataType.getSqlType()); + throw new RuntimeException(errorMsg); + } + } + + private Object getFieldValue(DynamicMessage dm, String fieldName) { + return dm.getAllFields().entrySet().stream() + .filter(entry -> entry.getKey().getName().equals(fieldName)) + .map(Map.Entry::getValue) + .findFirst() + .orElse(null); + } + + protected Object convertArray(List val, SeaTunnelDataType dataType) { + if (val == null) { + return null; + } + int length = val.size(); + Object instance = Array.newInstance(dataType.getTypeClass(), length); + for (int i = 0; i < val.size(); i++) { + Array.set(instance, i, convertField(null, null, dataType, val.get(i), null)); + } + return instance; + } +} diff --git a/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/RowToProtobufConverter.java b/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/RowToProtobufConverter.java new file mode 100644 index 00000000000..a6a75aaee3f --- /dev/null +++ b/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/RowToProtobufConverter.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.format.protobuf; + +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; +import org.apache.seatunnel.format.protobuf.exception.ProtobufFormatErrorCode; +import org.apache.seatunnel.format.protobuf.exception.SeaTunnelProtobufFormatException; + +import com.google.protobuf.ByteString; +import com.google.protobuf.Descriptors; +import com.google.protobuf.DynamicMessage; + +import java.io.Serializable; +import java.util.Arrays; +import java.util.Map; + +public class RowToProtobufConverter implements Serializable { + + private static final long serialVersionUID = -576124379280229724L; + private final Descriptors.Descriptor descriptor; + private final SeaTunnelRowType rowType; + + public RowToProtobufConverter(SeaTunnelRowType rowType, Descriptors.Descriptor descriptor) { + this.rowType = rowType; + this.descriptor = descriptor; + } + + public byte[] convertRowToGenericRecord(SeaTunnelRow element) { + DynamicMessage.Builder builder = DynamicMessage.newBuilder(descriptor); + String[] fieldNames = rowType.getFieldNames(); + + for (int i = 0; i < fieldNames.length; i++) { + String fieldName = rowType.getFieldName(i); + Object value = element.getField(i); + Object resolvedValue = + resolveObject(fieldName, value, rowType.getFieldType(i), builder); + if (resolvedValue != null) { + if (resolvedValue instanceof byte[]) { + resolvedValue = ByteString.copyFrom((byte[]) resolvedValue); + } + builder.setField( + descriptor.findFieldByName(fieldName.toLowerCase()), resolvedValue); + } + } + + return builder.build().toByteArray(); + } + + private Object resolveObject( + String fieldName, + Object data, + SeaTunnelDataType seaTunnelDataType, + DynamicMessage.Builder builder) { + if (data == null) { + return null; + } + + switch (seaTunnelDataType.getSqlType()) { + case STRING: + case SMALLINT: + case INT: + case BIGINT: + case FLOAT: + case DOUBLE: + case BOOLEAN: + case DECIMAL: + case DATE: + case TIMESTAMP: + case BYTES: + return data; + case TINYINT: + if (data instanceof Byte) { + return Byte.toUnsignedInt((Byte) data); + } + return data; + case MAP: + return handleMapType(fieldName, data, seaTunnelDataType, builder); + case ARRAY: + return Arrays.asList((Object[]) data); + case ROW: + return handleRowType(fieldName, data, seaTunnelDataType); + default: + throw new SeaTunnelProtobufFormatException( + ProtobufFormatErrorCode.UNSUPPORTED_DATA_TYPE, + String.format( + "SeaTunnel protobuf format is not supported for this data type [%s]", + seaTunnelDataType.getSqlType())); + } + } + + private Object handleMapType( + String fieldName, + Object data, + SeaTunnelDataType seaTunnelDataType, + DynamicMessage.Builder builder) { + Descriptors.Descriptor mapEntryDescriptor = + descriptor.findFieldByName(fieldName).getMessageType(); + + if (data instanceof Map) { + Map mapData = (Map) data; + mapData.forEach( + (key, value) -> { + DynamicMessage mapEntry = + DynamicMessage.newBuilder(mapEntryDescriptor) + .setField(mapEntryDescriptor.findFieldByName("key"), key) + .setField( + mapEntryDescriptor.findFieldByName("value"), value) + .build(); + builder.addRepeatedField(descriptor.findFieldByName(fieldName), mapEntry); + }); + } + + return null; + } + + private Object handleRowType( + String fieldName, Object data, SeaTunnelDataType seaTunnelDataType) { + SeaTunnelRow seaTunnelRow = (SeaTunnelRow) data; + SeaTunnelDataType[] fieldTypes = ((SeaTunnelRowType) seaTunnelDataType).getFieldTypes(); + String[] fieldNames = ((SeaTunnelRowType) seaTunnelDataType).getFieldNames(); + Descriptors.Descriptor nestedTypeDescriptor = descriptor.findNestedTypeByName(fieldName); + DynamicMessage.Builder nestedBuilder = DynamicMessage.newBuilder(nestedTypeDescriptor); + + for (int i = 0; i < fieldNames.length; i++) { + Object resolvedValue = + resolveObject( + fieldNames[i], seaTunnelRow.getField(i), fieldTypes[i], nestedBuilder); + nestedBuilder.setField( + nestedTypeDescriptor.findFieldByName(fieldNames[i]), resolvedValue); + } + + return nestedBuilder.build(); + } +} diff --git a/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/exception/ProtobufFormatErrorCode.java b/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/exception/ProtobufFormatErrorCode.java new file mode 100644 index 00000000000..e43125a41a1 --- /dev/null +++ b/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/exception/ProtobufFormatErrorCode.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.format.protobuf.exception; + +import org.apache.seatunnel.common.exception.SeaTunnelErrorCode; + +public enum ProtobufFormatErrorCode implements SeaTunnelErrorCode { + DESCRIPTOR_CONVERT_FAILED("PROTOBUF-01", "Protobuf descriptor conversion failed."), + UNSUPPORTED_DATA_TYPE("PROTOBUF-02", "Unsupported data type."); + + private final String code; + private final String description; + + ProtobufFormatErrorCode(String code, String description) { + this.code = code; + this.description = description; + } + + @Override + public String getCode() { + return code; + } + + @Override + public String getDescription() { + return description; + } +} diff --git a/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/exception/SeaTunnelProtobufFormatException.java b/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/exception/SeaTunnelProtobufFormatException.java new file mode 100644 index 00000000000..30f6dfd0312 --- /dev/null +++ b/seatunnel-formats/seatunnel-format-protobuf/src/main/java/org/apache/seatunnel/format/protobuf/exception/SeaTunnelProtobufFormatException.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.format.protobuf.exception; + +import org.apache.seatunnel.common.exception.SeaTunnelErrorCode; +import org.apache.seatunnel.common.exception.SeaTunnelRuntimeException; + +public class SeaTunnelProtobufFormatException extends SeaTunnelRuntimeException { + + public SeaTunnelProtobufFormatException( + SeaTunnelErrorCode seaTunnelErrorCode, String errorMessage) { + super(seaTunnelErrorCode, errorMessage); + } +} diff --git a/seatunnel-formats/seatunnel-format-protobuf/src/test/java/org/apache/seatunnel/format/protobuf/ProtobufConverterTest.java b/seatunnel-formats/seatunnel-format-protobuf/src/test/java/org/apache/seatunnel/format/protobuf/ProtobufConverterTest.java new file mode 100644 index 00000000000..e3d6dd35a2d --- /dev/null +++ b/seatunnel-formats/seatunnel-format-protobuf/src/test/java/org/apache/seatunnel/format/protobuf/ProtobufConverterTest.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.seatunnel.format.protobuf; + +import org.apache.seatunnel.api.table.type.ArrayType; +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.api.table.type.MapType; +import org.apache.seatunnel.api.table.type.PrimitiveByteArrayType; +import org.apache.seatunnel.api.table.type.SeaTunnelDataType; +import org.apache.seatunnel.api.table.type.SeaTunnelRow; +import org.apache.seatunnel.api.table.type.SeaTunnelRowType; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import com.google.protobuf.Descriptors; +import com.google.protobuf.DynamicMessage; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +class ProtobufConverterTest { + + private SeaTunnelRow buildSeaTunnelRow() { + SeaTunnelRow seaTunnelRow = new SeaTunnelRow(10); + + Map attributesMap = new HashMap<>(); + attributesMap.put("k1", 0.1F); + attributesMap.put("k2", 2.3F); + + String[] phoneNumbers = {"1", "2"}; + byte[] byteVal = {1, 2, 3}; + + SeaTunnelRow address = new SeaTunnelRow(3); + address.setField(0, "city_value"); + address.setField(1, "state_value"); + address.setField(2, "street_value"); + + seaTunnelRow.setField(0, 123); + seaTunnelRow.setField(1, 123123123123L); + seaTunnelRow.setField(2, 0.123f); + seaTunnelRow.setField(3, 0.123d); + seaTunnelRow.setField(4, false); + seaTunnelRow.setField(5, "test data"); + seaTunnelRow.setField(6, byteVal); + seaTunnelRow.setField(7, address); + seaTunnelRow.setField(8, attributesMap); + seaTunnelRow.setField(9, phoneNumbers); + + return seaTunnelRow; + } + + private SeaTunnelRowType buildSeaTunnelRowType() { + SeaTunnelRowType addressType = + new SeaTunnelRowType( + new String[] {"city", "state", "street"}, + new SeaTunnelDataType[] { + BasicType.STRING_TYPE, BasicType.STRING_TYPE, BasicType.STRING_TYPE + }); + + return new SeaTunnelRowType( + new String[] { + "c_int32", + "c_int64", + "c_float", + "c_double", + "c_bool", + "c_string", + "c_bytes", + "Address", + "attributes", + "phone_numbers" + }, + new SeaTunnelDataType[] { + BasicType.INT_TYPE, + BasicType.LONG_TYPE, + BasicType.FLOAT_TYPE, + BasicType.DOUBLE_TYPE, + BasicType.BOOLEAN_TYPE, + BasicType.STRING_TYPE, + PrimitiveByteArrayType.INSTANCE, + addressType, + new MapType<>(BasicType.STRING_TYPE, BasicType.FLOAT_TYPE), + ArrayType.STRING_ARRAY_TYPE + }); + } + + @Test + public void testConverter() + throws Descriptors.DescriptorValidationException, IOException, InterruptedException { + SeaTunnelRowType rowType = buildSeaTunnelRowType(); + SeaTunnelRow originalRow = buildSeaTunnelRow(); + + String protoContent = + "syntax = \"proto3\";\n" + + "\n" + + "package org.apache.seatunnel.format.protobuf;\n" + + "\n" + + "option java_outer_classname = \"ProtobufE2E\";\n" + + "\n" + + "message Person {\n" + + " int32 c_int32 = 1;\n" + + " int64 c_int64 = 2;\n" + + " float c_float = 3;\n" + + " double c_double = 4;\n" + + " bool c_bool = 5;\n" + + " string c_string = 6;\n" + + " bytes c_bytes = 7;\n" + + "\n" + + " message Address {\n" + + " string street = 1;\n" + + " string city = 2;\n" + + " string state = 3;\n" + + " string zip = 4;\n" + + " }\n" + + "\n" + + " Address address = 8;\n" + + "\n" + + " map attributes = 9;\n" + + "\n" + + " repeated string phone_numbers = 10;\n" + + "}"; + + String messageName = "Person"; + Descriptors.Descriptor descriptor = + CompileDescriptor.compileDescriptorTempFile(protoContent, messageName); + + RowToProtobufConverter rowToProtobufConverter = + new RowToProtobufConverter(rowType, descriptor); + byte[] protobufMessage = rowToProtobufConverter.convertRowToGenericRecord(originalRow); + + ProtobufToRowConverter protobufToRowConverter = + new ProtobufToRowConverter(protoContent, messageName); + DynamicMessage dynamicMessage = DynamicMessage.parseFrom(descriptor, protobufMessage); + SeaTunnelRow convertedRow = + protobufToRowConverter.converter(descriptor, dynamicMessage, rowType); + + Assertions.assertEquals(originalRow, convertedRow); + } +} diff --git a/seatunnel-translation/seatunnel-translation-base/src/main/java/org/apache/seatunnel/translation/source/CoordinatedSource.java b/seatunnel-translation/seatunnel-translation-base/src/main/java/org/apache/seatunnel/translation/source/CoordinatedSource.java index 11b240dd993..4e5d864369f 100644 --- a/seatunnel-translation/seatunnel-translation-base/src/main/java/org/apache/seatunnel/translation/source/CoordinatedSource.java +++ b/seatunnel-translation/seatunnel-translation-base/src/main/java/org/apache/seatunnel/translation/source/CoordinatedSource.java @@ -24,6 +24,10 @@ import org.apache.seatunnel.api.source.SourceReader; import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.api.source.event.EnumeratorCloseEvent; +import org.apache.seatunnel.api.source.event.EnumeratorOpenEvent; +import org.apache.seatunnel.api.source.event.ReaderCloseEvent; +import org.apache.seatunnel.api.source.event.ReaderOpenEvent; import org.apache.seatunnel.translation.util.ThreadPoolExecutorFactory; import lombok.extern.slf4j.Slf4j; @@ -136,6 +140,7 @@ public void open() throws Exception { ThreadPoolExecutorFactory.createScheduledThreadPoolExecutor( parallelism, "parallel-split-enumerator-executor"); splitEnumerator.open(); + coordinatedEnumeratorContext.getEventListener().onEvent(new EnumeratorOpenEvent()); restoredSplitStateMap.forEach( (subtaskId, splits) -> { splitEnumerator.addSplitsBack(splits, subtaskId); @@ -147,6 +152,10 @@ public void open() throws Exception { entry -> { try { entry.getValue().open(); + readerContextMap + .get(entry.getKey()) + .getEventListener() + .onEvent(new ReaderOpenEvent()); splitEnumerator.registerReader(entry.getKey()); } catch (Exception e) { throw new RuntimeException(e); @@ -203,6 +212,7 @@ public void close() throws IOException { for (Map.Entry> entry : readerMap.entrySet()) { readerRunningMap.get(entry.getKey()).set(false); entry.getValue().close(); + readerContextMap.get(entry.getKey()).getEventListener().onEvent(new ReaderCloseEvent()); } if (executorService != null) { @@ -211,6 +221,7 @@ public void close() throws IOException { try (SourceSplitEnumerator closed = splitEnumerator) { // just close the resources + coordinatedEnumeratorContext.getEventListener().onEvent(new EnumeratorCloseEvent()); } } diff --git a/seatunnel-translation/seatunnel-translation-base/src/main/java/org/apache/seatunnel/translation/source/ParallelSource.java b/seatunnel-translation/seatunnel-translation-base/src/main/java/org/apache/seatunnel/translation/source/ParallelSource.java index 4cc1bfd1418..ed794a5b6cb 100644 --- a/seatunnel-translation/seatunnel-translation-base/src/main/java/org/apache/seatunnel/translation/source/ParallelSource.java +++ b/seatunnel-translation/seatunnel-translation-base/src/main/java/org/apache/seatunnel/translation/source/ParallelSource.java @@ -23,6 +23,10 @@ import org.apache.seatunnel.api.source.SourceReader; import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.api.source.event.EnumeratorCloseEvent; +import org.apache.seatunnel.api.source.event.EnumeratorOpenEvent; +import org.apache.seatunnel.api.source.event.ReaderCloseEvent; +import org.apache.seatunnel.api.source.event.ReaderOpenEvent; import org.apache.seatunnel.translation.util.ThreadPoolExecutorFactory; import org.slf4j.Logger; @@ -115,7 +119,9 @@ public void open() throws Exception { splitEnumerator.addSplitsBack(restoredSplitState, subtaskId); } reader.open(); + readerContext.getEventListener().onEvent(new ReaderOpenEvent()); parallelEnumeratorContext.register(); + parallelEnumeratorContext.getEventListener().onEvent(new EnumeratorOpenEvent()); splitEnumerator.registerReader(subtaskId); } @@ -170,6 +176,8 @@ public void close() throws IOException { if (reader != null) { LOG.debug("Close the data reader for the Apache SeaTunnel source."); reader.close(); + readerContext.getEventListener().onEvent(new ReaderCloseEvent()); + parallelEnumeratorContext.getEventListener().onEvent(new EnumeratorCloseEvent()); } } diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/sink/FlinkSink.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/sink/FlinkSink.java index 4a720e347b2..2ebbcba4f91 100644 --- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/sink/FlinkSink.java +++ b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/sink/FlinkSink.java @@ -66,10 +66,7 @@ public SinkWriter, FlinkWriterState> if (states == null || states.isEmpty()) { return new FlinkSinkWriter<>( - sink.createWriter(stContext), - 1, - catalogTable.getSeaTunnelRowType(), - stContext.getMetricsContext()); + sink.createWriter(stContext), 1, catalogTable.getSeaTunnelRowType(), stContext); } else { List restoredState = states.stream().map(FlinkWriterState::getState).collect(Collectors.toList()); @@ -77,7 +74,7 @@ public SinkWriter, FlinkWriterState> sink.restoreWriter(stContext, restoredState), states.get(0).getCheckpointId() + 1, catalogTable.getSeaTunnelRowType(), - stContext.getMetricsContext()); + stContext); } } diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/sink/FlinkSinkWriter.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/sink/FlinkSinkWriter.java index 725bf606f93..8de831aee17 100644 --- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/sink/FlinkSinkWriter.java +++ b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/sink/FlinkSinkWriter.java @@ -23,6 +23,7 @@ import org.apache.seatunnel.api.common.metrics.MetricsContext; import org.apache.seatunnel.api.sink.MultiTableResourceManager; import org.apache.seatunnel.api.sink.SupportResourceShare; +import org.apache.seatunnel.api.sink.event.WriterCloseEvent; import org.apache.seatunnel.api.table.type.SeaTunnelDataType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; @@ -53,6 +54,8 @@ public class FlinkSinkWriter private final org.apache.seatunnel.api.sink.SinkWriter sinkWriter; + private final org.apache.seatunnel.api.sink.SinkWriter.Context context; + private final Counter sinkWriteCount; private final Counter sinkWriteBytes; @@ -67,9 +70,11 @@ public class FlinkSinkWriter org.apache.seatunnel.api.sink.SinkWriter sinkWriter, long checkpointId, SeaTunnelDataType dataType, - MetricsContext metricsContext) { + org.apache.seatunnel.api.sink.SinkWriter.Context context) { + this.context = context; this.sinkWriter = sinkWriter; this.checkpointId = checkpointId; + MetricsContext metricsContext = context.getMetricsContext(); this.sinkWriteCount = metricsContext.counter(MetricNames.SINK_WRITE_COUNT); this.sinkWriteBytes = metricsContext.counter(MetricNames.SINK_WRITE_BYTES); this.sinkWriterQPS = metricsContext.meter(MetricNames.SINK_WRITE_QPS); @@ -118,6 +123,7 @@ public List> snapshotState() throws IOException { @Override public void close() throws Exception { sinkWriter.close(); + context.getEventListener().onEvent(new WriterCloseEvent()); try { if (resourceManager != null) { resourceManager.close(); diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSourceEnumerator.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSourceEnumerator.java index e457d69f27c..7d8052bfd18 100644 --- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSourceEnumerator.java +++ b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSourceEnumerator.java @@ -19,6 +19,8 @@ import org.apache.seatunnel.api.source.SourceSplit; import org.apache.seatunnel.api.source.SourceSplitEnumerator; +import org.apache.seatunnel.api.source.event.EnumeratorCloseEvent; +import org.apache.seatunnel.api.source.event.EnumeratorOpenEvent; import org.apache.flink.api.connector.source.SourceEvent; import org.apache.flink.api.connector.source.SplitEnumerator; @@ -49,6 +51,7 @@ public class FlinkSourceEnumerator private final SplitEnumeratorContext> enumeratorContext; + private final SourceSplitEnumerator.Context context; private final int parallelism; private final Object lock = new Object(); @@ -62,12 +65,14 @@ public FlinkSourceEnumerator( SplitEnumeratorContext> enumContext) { this.sourceSplitEnumerator = enumerator; this.enumeratorContext = enumContext; + this.context = new FlinkSourceSplitEnumeratorContext<>(enumeratorContext); this.parallelism = enumeratorContext.currentParallelism(); } @Override public void start() { sourceSplitEnumerator.open(); + context.getEventListener().onEvent(new EnumeratorOpenEvent()); } @Override @@ -106,6 +111,7 @@ public EnumStateT snapshotState(long checkpointId) throws Exception { @Override public void close() throws IOException { sourceSplitEnumerator.close(); + context.getEventListener().onEvent(new EnumeratorCloseEvent()); } @Override diff --git a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSourceReader.java b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSourceReader.java index c2f9cde5005..fb1dc85174e 100644 --- a/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSourceReader.java +++ b/seatunnel-translation/seatunnel-translation-flink/seatunnel-translation-flink-common/src/main/java/org/apache/seatunnel/translation/flink/source/FlinkSourceReader.java @@ -20,6 +20,8 @@ import org.apache.seatunnel.shade.com.typesafe.config.Config; import org.apache.seatunnel.api.source.SourceSplit; +import org.apache.seatunnel.api.source.event.ReaderCloseEvent; +import org.apache.seatunnel.api.source.event.ReaderOpenEvent; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.flink.api.connector.source.ReaderOutput; @@ -66,6 +68,7 @@ public FlinkSourceReader( public void start() { try { sourceReader.open(); + context.getEventListener().onEvent(new ReaderOpenEvent()); } catch (Exception e) { throw new RuntimeException(e); } @@ -121,6 +124,7 @@ public void handleSourceEvents(SourceEvent sourceEvent) { @Override public void close() throws Exception { sourceReader.close(); + context.getEventListener().onEvent(new ReaderCloseEvent()); } @Override diff --git a/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-2.4/src/main/java/org/apache/seatunnel/translation/spark/sink/writer/SparkDataWriter.java b/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-2.4/src/main/java/org/apache/seatunnel/translation/spark/sink/writer/SparkDataWriter.java index 434b1ef9799..a9eac500629 100644 --- a/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-2.4/src/main/java/org/apache/seatunnel/translation/spark/sink/writer/SparkDataWriter.java +++ b/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-2.4/src/main/java/org/apache/seatunnel/translation/spark/sink/writer/SparkDataWriter.java @@ -21,6 +21,7 @@ import org.apache.seatunnel.api.sink.SinkCommitter; import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.sink.SupportResourceShare; +import org.apache.seatunnel.api.sink.event.WriterCloseEvent; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.translation.spark.execution.MultiTableManager; @@ -47,16 +48,19 @@ public class SparkDataWriter implements DataWriter sinkWriter, @Nullable SinkCommitter sinkCommitter, MultiTableManager multiTableManager, - long epochId) { + long epochId, + org.apache.seatunnel.api.sink.SinkWriter.Context context) { this.sinkWriter = sinkWriter; this.sinkCommitter = sinkCommitter; this.epochId = epochId == 0 ? 1 : epochId; this.multiTableManager = multiTableManager; + this.context = context; initResourceManger(); } @@ -97,6 +101,7 @@ public WriterCommitMessage commit() throws IOException { new SparkWriterCommitMessage<>(latestCommitInfoT); cleanCommitInfo(); sinkWriter.close(); + context.getEventListener().onEvent(new WriterCloseEvent()); try { if (resourceManager != null) { resourceManager.close(); diff --git a/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-2.4/src/main/java/org/apache/seatunnel/translation/spark/sink/writer/SparkDataWriterFactory.java b/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-2.4/src/main/java/org/apache/seatunnel/translation/spark/sink/writer/SparkDataWriterFactory.java index 3a646f3aca2..b684654103a 100644 --- a/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-2.4/src/main/java/org/apache/seatunnel/translation/spark/sink/writer/SparkDataWriterFactory.java +++ b/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-2.4/src/main/java/org/apache/seatunnel/translation/spark/sink/writer/SparkDataWriterFactory.java @@ -63,6 +63,6 @@ public DataWriter createDataWriter(int partitionId, long taskId, lo throw new RuntimeException("Failed to create SinkCommitter.", e); } return new SparkDataWriter<>( - writer, committer, new MultiTableManager(catalogTables), epochId); + writer, committer, new MultiTableManager(catalogTables), epochId, context); } } diff --git a/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-3.3/src/main/java/org/apache/seatunnel/translation/spark/sink/write/SeaTunnelSparkDataWriter.java b/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-3.3/src/main/java/org/apache/seatunnel/translation/spark/sink/write/SeaTunnelSparkDataWriter.java index 59f931e38f1..c2c24aa9147 100644 --- a/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-3.3/src/main/java/org/apache/seatunnel/translation/spark/sink/write/SeaTunnelSparkDataWriter.java +++ b/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-3.3/src/main/java/org/apache/seatunnel/translation/spark/sink/write/SeaTunnelSparkDataWriter.java @@ -21,6 +21,7 @@ import org.apache.seatunnel.api.sink.SinkCommitter; import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.sink.SupportResourceShare; +import org.apache.seatunnel.api.sink.event.WriterCloseEvent; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.translation.spark.execution.MultiTableManager; @@ -47,16 +48,19 @@ public class SeaTunnelSparkDataWriter implements DataWriter protected volatile MultiTableResourceManager resourceManager; private final MultiTableManager multiTableManager; + private final SinkWriter.Context context; public SeaTunnelSparkDataWriter( SinkWriter sinkWriter, @Nullable SinkCommitter sinkCommitter, MultiTableManager multiTableManager, - long epochId) { + long epochId, + SinkWriter.Context context) { this.sinkWriter = sinkWriter; this.sinkCommitter = sinkCommitter; this.multiTableManager = multiTableManager; this.epochId = epochId == 0 ? 1 : epochId; + this.context = context; initResourceManger(); } @@ -89,6 +93,7 @@ public WriterCommitMessage commit() throws IOException { new SeaTunnelSparkWriterCommitMessage<>(latestCommitInfoT); cleanCommitInfo(); sinkWriter.close(); + context.getEventListener().onEvent(new WriterCloseEvent()); try { if (resourceManager != null) { resourceManager.close(); diff --git a/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-3.3/src/main/java/org/apache/seatunnel/translation/spark/sink/write/SeaTunnelSparkDataWriterFactory.java b/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-3.3/src/main/java/org/apache/seatunnel/translation/spark/sink/write/SeaTunnelSparkDataWriterFactory.java index b83787cac1e..255a9cd339f 100644 --- a/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-3.3/src/main/java/org/apache/seatunnel/translation/spark/sink/write/SeaTunnelSparkDataWriterFactory.java +++ b/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-3.3/src/main/java/org/apache/seatunnel/translation/spark/sink/write/SeaTunnelSparkDataWriterFactory.java @@ -64,7 +64,7 @@ public DataWriter createWriter(int partitionId, long taskId) { throw new RuntimeException("Failed to create SinkCommitter.", e); } return new SeaTunnelSparkDataWriter<>( - writer, committer, new MultiTableManager(catalogTables), 0); + writer, committer, new MultiTableManager(catalogTables), 0, context); } @Override diff --git a/tools/dependencies/known-dependencies.txt b/tools/dependencies/known-dependencies.txt index 7f95aaf01c4..c2f431836b6 100755 --- a/tools/dependencies/known-dependencies.txt +++ b/tools/dependencies/known-dependencies.txt @@ -46,4 +46,10 @@ accessors-smart-2.4.7.jar asm-9.1.jar avro-1.11.1.jar groovy-4.0.16.jar -seatunnel-janino-2.3.8-SNAPSHOT-optional.jar \ No newline at end of file +seatunnel-janino-2.3.8-SNAPSHOT-optional.jar +protobuf-java-util-3.25.3.jar +protobuf-java-3.25.3.jar +protoc-jar-3.11.4.jar +error_prone_annotations-2.18.0.jar +gson-2.8.9.jar +j2objc-annotations-2.8.jar