Unverified Commit d09efa36 authored by ZackYoung's avatar ZackYoung Committed by GitHub

Optimize cdc SQLSinkBuilder KafkaSinkBuilder, filter to process (#806)

* 优化 cdc SQLSinkBuilder KafkaSinkBuilder, filter to process

* change "_" to "."

* change "db" to config.getSchemaFieldName()

* change "tableMap key" to table.getSchemaTableName()
parent ebe5b6af
......@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
}
});
}
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList,
......
......@@ -20,31 +20,32 @@
package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/**
* MysqlCDCBuilder
*
......@@ -89,44 +90,47 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
config.getSink().get("topic"),
new SimpleStringSchema()));
dataStreamSource.addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
config.getSink().get("topic"),
new SimpleStringSchema()));
} else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
@Override
public Map map(String value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(value, Map.class);
}
});
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String tableName = table.getName();
final String schemaName = table.getSchema();
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() {
@Override
public boolean filter(Map value) throws Exception {
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
getSinkTableName(table),
new SimpleStringSchema()));
String sinkTableName = getSinkTableName(table);
OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override
public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
}
});
tagMap.forEach((k, v) -> {
String topic = getSinkTableName(k);
process.getSideOutput(v).rebalance().addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
topic, new SimpleStringSchema())).name(topic);
});
}
}
return dataStreamSource;
......
......@@ -20,15 +20,28 @@
package com.dlink.cdc.sql;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import com.dlink.utils.FlinkBaseUtil;
import com.dlink.utils.JSONUtil;
import com.dlink.utils.LogUtil;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.dag.Transformation;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.operations.Operation;
......@@ -37,27 +50,14 @@ import org.apache.flink.table.types.utils.TypeConversions;
import org.apache.flink.types.Row;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import javax.xml.bind.DatatypeConverter;
import java.io.Serializable;
import java.math.BigDecimal;
import java.time.Instant;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import com.dlink.utils.FlinkBaseUtil;
import com.dlink.utils.JSONUtil;
import com.dlink.utils.LogUtil;
import java.util.*;
/**
* SQLSinkBuilder
......@@ -83,10 +83,10 @@ public class SQLSinkBuilder extends AbstractSinkBuilder implements SinkBuilder,
}
private DataStream<Row> buildRow(
SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList,
List<LogicalType> columnTypeList,
String schemaTableName) {
DataStream<Map> filterOperator,
List<String> columnNameList,
List<LogicalType> columnTypeList,
String schemaTableName) {
final String[] columnNames = columnNameList.toArray(new String[columnNameList.size()]);
final LogicalType[] columnTypes = columnTypeList.toArray(new LogicalType[columnTypeList.size()]);
......@@ -94,49 +94,49 @@ public class SQLSinkBuilder extends AbstractSinkBuilder implements SinkBuilder,
RowTypeInfo rowTypeInfo = new RowTypeInfo(typeInformations, columnNames);
return filterOperator
.flatMap(new FlatMapFunction<Map, Row>() {
@Override
public void flatMap(Map value, Collector<Row> out) throws Exception {
try {
switch (value.get("op").toString()) {
case "r":
case "c":
Row irow = Row.ofKind(RowKind.INSERT);
Map idata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
irow.setField(i, convertValue(idata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(irow);
break;
case "d":
Row drow = Row.ofKind(RowKind.DELETE);
Map ddata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
drow.setField(i, convertValue(ddata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(drow);
break;
case "u":
Row ubrow = Row.ofKind(RowKind.UPDATE_BEFORE);
Map ubdata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
ubrow.setField(i, convertValue(ubdata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(ubrow);
Row uarow = Row.ofKind(RowKind.UPDATE_AFTER);
Map uadata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
uarow.setField(i, convertValue(uadata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(uarow);
break;
.flatMap(new FlatMapFunction<Map, Row>() {
@Override
public void flatMap(Map value, Collector<Row> out) throws Exception {
try {
switch (value.get("op").toString()) {
case "r":
case "c":
Row irow = Row.ofKind(RowKind.INSERT);
Map idata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
irow.setField(i, convertValue(idata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(irow);
break;
case "d":
Row drow = Row.ofKind(RowKind.DELETE);
Map ddata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
drow.setField(i, convertValue(ddata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(drow);
break;
case "u":
Row ubrow = Row.ofKind(RowKind.UPDATE_BEFORE);
Map ubdata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
ubrow.setField(i, convertValue(ubdata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(ubrow);
Row uarow = Row.ofKind(RowKind.UPDATE_AFTER);
Map uadata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
uarow.setField(i, convertValue(uadata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(uarow);
break;
}
} catch (Exception e) {
logger.error("SchameTable: {} - Row: {} - Exception: {}", schemaTableName, JSONUtil.toJsonString(value), e.getCause().getMessage());
throw e;
}
} catch (Exception e) {
logger.error("SchameTable: {} - Row: {} - Exception: {}", schemaTableName, JSONUtil.toJsonString(value), e.getCause().getMessage());
throw e;
}
}
}, rowTypeInfo);
}, rowTypeInfo);
}
private void addTableSink(
......@@ -189,29 +189,56 @@ public class SQLSinkBuilder extends AbstractSinkBuilder implements SinkBuilder,
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = deserialize(dataStreamSource);
logger.info("Build deserialize successful...");
Map<Table, OutputTag<Map>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String schemaTableName = table.getSchemaTableName();
String sinkTableName = getSinkTableName(table);
OutputTag<Map> outputTag = new OutputTag<Map>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
final String schemaFieldName = config.getSchemaFieldName();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
SingleOutputStreamOperator<Map> processOperator = mapOperator.process(new ProcessFunction<Map, Map>() {
@Override
public void processElement(Map map, ProcessFunction<Map, Map>.Context ctx, Collector<Map> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
SingleOutputStreamOperator<Map> filterOperator = shunt(mapOperator, table, schemaFieldName);
logger.info("Build " + schemaTableName + " shunt successful...");
List<String> columnNameList = new ArrayList<>();
List<LogicalType> columnTypeList = new ArrayList<>();
buildColumn(columnNameList, columnTypeList, table.getColumns());
DataStream<Row> rowDataDataStream = buildRow(filterOperator, columnNameList, columnTypeList, schemaTableName);
logger.info("Build " + schemaTableName + " flatMap successful...");
logger.info("Start build " + schemaTableName + " sink...");
addTableSink(customTableEnvironment, rowDataDataStream, table, columnNameList);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<Map> outputTag = tagMap.get(table);
ctx.output(outputTag, map);
} catch (Exception e) {
logger.error("Build " + schemaTableName + " cdc sync failed...");
logger.error(LogUtil.getError(e));
out.collect(map);
}
}
}
});
tagMap.forEach((table,tag) -> {
final String schemaTableName = table.getSchemaTableName();
try {
DataStream<Map> filterOperator = shunt(processOperator, table, tag);
logger.info("Build " + schemaTableName + " shunt successful...");
List<String> columnNameList = new ArrayList<>();
List<LogicalType> columnTypeList = new ArrayList<>();
buildColumn(columnNameList, columnTypeList, table.getColumns());
DataStream<Row> rowDataDataStream = buildRow(filterOperator, columnNameList, columnTypeList, schemaTableName).rebalance();
logger.info("Build " + schemaTableName + " flatMap successful...");
logger.info("Start build " + schemaTableName + " sink...");
addTableSink(customTableEnvironment, rowDataDataStream, table, columnNameList);
} catch (Exception e) {
logger.error("Build " + schemaTableName + " cdc sync failed...");
logger.error(LogUtil.getError(e));
}
});
List<Transformation<?>> trans = customTableEnvironment.getPlanner().translate(modifyOperations);
for (Transformation<?> item : trans) {
env.addOperator(item);
......
......@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
}
});
}
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList,
......
......@@ -20,31 +20,32 @@
package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/**
* MysqlCDCBuilder
*
......@@ -89,44 +90,48 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
config.getSink().get("topic"),
new SimpleStringSchema()));
dataStreamSource.addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
config.getSink().get("topic"),
new SimpleStringSchema()));
} else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
@Override
public Map map(String value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(value, Map.class);
}
});
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String tableName = table.getName();
final String schemaName = table.getSchema();
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() {
@Override
public boolean filter(Map value) throws Exception {
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
getSinkTableName(table),
new SimpleStringSchema()));
String sinkTableName = getSinkTableName(table);
OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override
public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
}
});
tagMap.forEach((k, v) -> {
String topic = getSinkTableName(k);
process.getSideOutput(v).rebalance().addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
topic, new SimpleStringSchema())).name(topic);
});
}
}
return dataStreamSource;
......
......@@ -20,15 +20,28 @@
package com.dlink.cdc.sql;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import com.dlink.utils.FlinkBaseUtil;
import com.dlink.utils.JSONUtil;
import com.dlink.utils.LogUtil;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.dag.Transformation;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.operations.Operation;
......@@ -37,27 +50,14 @@ import org.apache.flink.table.types.utils.TypeConversions;
import org.apache.flink.types.Row;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import javax.xml.bind.DatatypeConverter;
import java.io.Serializable;
import java.math.BigDecimal;
import java.time.Instant;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import com.dlink.utils.FlinkBaseUtil;
import com.dlink.utils.JSONUtil;
import com.dlink.utils.LogUtil;
import java.util.*;
/**
* SQLSinkBuilder
......@@ -83,10 +83,10 @@ public class SQLSinkBuilder extends AbstractSinkBuilder implements SinkBuilder,
}
private DataStream<Row> buildRow(
SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList,
List<LogicalType> columnTypeList,
String schemaTableName) {
DataStream<Map> filterOperator,
List<String> columnNameList,
List<LogicalType> columnTypeList,
String schemaTableName) {
final String[] columnNames = columnNameList.toArray(new String[columnNameList.size()]);
final LogicalType[] columnTypes = columnTypeList.toArray(new LogicalType[columnTypeList.size()]);
......@@ -94,49 +94,49 @@ public class SQLSinkBuilder extends AbstractSinkBuilder implements SinkBuilder,
RowTypeInfo rowTypeInfo = new RowTypeInfo(typeInformations, columnNames);
return filterOperator
.flatMap(new FlatMapFunction<Map, Row>() {
@Override
public void flatMap(Map value, Collector<Row> out) throws Exception {
try {
switch (value.get("op").toString()) {
case "r":
case "c":
Row irow = Row.ofKind(RowKind.INSERT);
Map idata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
irow.setField(i, convertValue(idata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(irow);
break;
case "d":
Row drow = Row.ofKind(RowKind.DELETE);
Map ddata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
drow.setField(i, convertValue(ddata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(drow);
break;
case "u":
Row ubrow = Row.ofKind(RowKind.UPDATE_BEFORE);
Map ubdata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
ubrow.setField(i, convertValue(ubdata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(ubrow);
Row uarow = Row.ofKind(RowKind.UPDATE_AFTER);
Map uadata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
uarow.setField(i, convertValue(uadata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(uarow);
break;
.flatMap(new FlatMapFunction<Map, Row>() {
@Override
public void flatMap(Map value, Collector<Row> out) throws Exception {
try {
switch (value.get("op").toString()) {
case "r":
case "c":
Row irow = Row.ofKind(RowKind.INSERT);
Map idata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
irow.setField(i, convertValue(idata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(irow);
break;
case "d":
Row drow = Row.ofKind(RowKind.DELETE);
Map ddata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
drow.setField(i, convertValue(ddata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(drow);
break;
case "u":
Row ubrow = Row.ofKind(RowKind.UPDATE_BEFORE);
Map ubdata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
ubrow.setField(i, convertValue(ubdata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(ubrow);
Row uarow = Row.ofKind(RowKind.UPDATE_AFTER);
Map uadata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
uarow.setField(i, convertValue(uadata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(uarow);
break;
}
} catch (Exception e) {
logger.error("SchameTable: {} - Row: {} - Exception: {}", schemaTableName, JSONUtil.toJsonString(value), e.getCause().getMessage());
throw e;
}
} catch (Exception e) {
logger.error("SchameTable: {} - Row: {} - Exception: {}", schemaTableName, JSONUtil.toJsonString(value), e.getCause().getMessage());
throw e;
}
}
}, rowTypeInfo);
}, rowTypeInfo);
}
private void addTableSink(
......@@ -189,29 +189,56 @@ public class SQLSinkBuilder extends AbstractSinkBuilder implements SinkBuilder,
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = deserialize(dataStreamSource);
logger.info("Build deserialize successful...");
Map<Table, OutputTag<Map>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String schemaTableName = table.getSchemaTableName();
String sinkTableName = getSinkTableName(table);
OutputTag<Map> outputTag = new OutputTag<Map>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
final String schemaFieldName = config.getSchemaFieldName();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
SingleOutputStreamOperator<Map> processOperator = mapOperator.process(new ProcessFunction<Map, Map>() {
@Override
public void processElement(Map map, ProcessFunction<Map, Map>.Context ctx, Collector<Map> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
SingleOutputStreamOperator<Map> filterOperator = shunt(mapOperator, table, schemaFieldName);
logger.info("Build " + schemaTableName + " shunt successful...");
List<String> columnNameList = new ArrayList<>();
List<LogicalType> columnTypeList = new ArrayList<>();
buildColumn(columnNameList, columnTypeList, table.getColumns());
DataStream<Row> rowDataDataStream = buildRow(filterOperator, columnNameList, columnTypeList, schemaTableName);
logger.info("Build " + schemaTableName + " flatMap successful...");
logger.info("Start build " + schemaTableName + " sink...");
addTableSink(customTableEnvironment, rowDataDataStream, table, columnNameList);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<Map> outputTag = tagMap.get(table);
ctx.output(outputTag, map);
} catch (Exception e) {
logger.error("Build " + schemaTableName + " cdc sync failed...");
logger.error(LogUtil.getError(e));
out.collect(map);
}
}
}
});
tagMap.forEach((table,tag) -> {
final String schemaTableName = table.getSchemaTableName();
try {
DataStream<Map> filterOperator = shunt(processOperator, table, tag);
logger.info("Build " + schemaTableName + " shunt successful...");
List<String> columnNameList = new ArrayList<>();
List<LogicalType> columnTypeList = new ArrayList<>();
buildColumn(columnNameList, columnTypeList, table.getColumns());
DataStream<Row> rowDataDataStream = buildRow(filterOperator, columnNameList, columnTypeList, schemaTableName).rebalance();
logger.info("Build " + schemaTableName + " flatMap successful...");
logger.info("Start build " + schemaTableName + " sink...");
addTableSink(customTableEnvironment, rowDataDataStream, table, columnNameList);
} catch (Exception e) {
logger.error("Build " + schemaTableName + " cdc sync failed...");
logger.error(LogUtil.getError(e));
}
});
List<Transformation<?>> trans = customTableEnvironment.getPlanner().translate(modifyOperations);
for (Transformation<?> item : trans) {
env.addOperator(item);
......
......@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
}
});
}
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList,
......
......@@ -20,31 +20,32 @@
package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/**
* MysqlCDCBuilder
*
......@@ -79,44 +80,48 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
config.getSink().get("topic"),
new SimpleStringSchema()));
dataStreamSource.addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
config.getSink().get("topic"),
new SimpleStringSchema()));
} else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
@Override
public Map map(String value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(value, Map.class);
}
});
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String tableName = table.getName();
final String schemaName = table.getSchema();
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() {
@Override
public boolean filter(Map value) throws Exception {
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.addSink(new FlinkKafkaProducer<String>(config.getSink().get("brokers"),
getSinkTableName(table),
new SimpleStringSchema()));
String sinkTableName = getSinkTableName(table);
OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override
public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
}
});
tagMap.forEach((k, v) -> {
String topic = getSinkTableName(k);
process.getSideOutput(v).rebalance().addSink(new FlinkKafkaProducer<>(config.getSink().get("brokers"),
topic, new SimpleStringSchema())).name(topic);
});
}
}
return dataStreamSource;
......
......@@ -20,17 +20,28 @@
package com.dlink.cdc.sql;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import com.dlink.utils.FlinkBaseUtil;
import com.dlink.utils.JSONUtil;
import com.dlink.utils.LogUtil;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.dag.Transformation;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.operations.Operation;
......@@ -39,26 +50,14 @@ import org.apache.flink.table.types.utils.TypeConversions;
import org.apache.flink.types.Row;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import javax.xml.bind.DatatypeConverter;
import java.io.Serializable;
import java.math.BigDecimal;
import java.time.Instant;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import javax.xml.bind.DatatypeConverter;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import com.dlink.utils.JSONUtil;
import java.util.*;
/**
* SQLSinkBuilder
......@@ -139,7 +138,62 @@ public class SQLSinkBuilder extends AbstractSinkBuilder implements SinkBuilder,
}
}, rowTypeInfo);
}
private DataStream<Row> buildRow(
DataStream<Map> filterOperator,
List<String> columnNameList,
List<LogicalType> columnTypeList,
String schemaTableName) {
final String[] columnNames = columnNameList.toArray(new String[columnNameList.size()]);
final LogicalType[] columnTypes = columnTypeList.toArray(new LogicalType[columnTypeList.size()]);
TypeInformation<?>[] typeInformations = TypeConversions.fromDataTypeToLegacyInfo(TypeConversions.fromLogicalToDataType(columnTypes));
RowTypeInfo rowTypeInfo = new RowTypeInfo(typeInformations, columnNames);
return filterOperator
.flatMap(new FlatMapFunction<Map, Row>() {
@Override
public void flatMap(Map value, Collector<Row> out) throws Exception {
try {
switch (value.get("op").toString()) {
case "r":
case "c":
Row irow = Row.ofKind(RowKind.INSERT);
Map idata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
irow.setField(i, convertValue(idata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(irow);
break;
case "d":
Row drow = Row.ofKind(RowKind.DELETE);
Map ddata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
drow.setField(i, convertValue(ddata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(drow);
break;
case "u":
Row ubrow = Row.ofKind(RowKind.UPDATE_BEFORE);
Map ubdata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
ubrow.setField(i, convertValue(ubdata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(ubrow);
Row uarow = Row.ofKind(RowKind.UPDATE_AFTER);
Map uadata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
uarow.setField(i, convertValue(uadata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(uarow);
break;
}
} catch (Exception e) {
logger.error("SchameTable: {} - Row: {} - Exception: {}", schemaTableName, JSONUtil.toJsonString(value), e.getCause().getMessage());
throw e;
}
}
}, rowTypeInfo);
}
private void addTableSink(
CustomTableEnvironment customTableEnvironment,
DataStream<Row> rowDataDataStream,
......@@ -190,29 +244,57 @@ public class SQLSinkBuilder extends AbstractSinkBuilder implements SinkBuilder,
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = deserialize(dataStreamSource);
logger.info("Build deserialize successful...");
Map<Table, OutputTag<Map>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String schemaTableName = table.getSchemaTableName();
String sinkTableName = getSinkTableName(table);
OutputTag<Map> outputTag = new OutputTag<Map>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
final String schemaFieldName = config.getSchemaFieldName();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
SingleOutputStreamOperator<Map> processOperator = mapOperator.process(new ProcessFunction<Map, Map>() {
@Override
public void processElement(Map map, ProcessFunction<Map, Map>.Context ctx, Collector<Map> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
SingleOutputStreamOperator<Map> filterOperator = shunt(mapOperator, table, schemaFieldName);
logger.info("Build " + schemaTableName + " shunt successful...");
List<String> columnNameList = new ArrayList<>();
List<LogicalType> columnTypeList = new ArrayList<>();
buildColumn(columnNameList, columnTypeList, table.getColumns());
DataStream<Row> rowDataDataStream = buildRow(filterOperator, columnNameList, columnTypeList, schemaTableName);
logger.info("Build " + schemaTableName + " flatMap successful...");
logger.info("Start build " + schemaTableName + " sink...");
addTableSink(customTableEnvironment, rowDataDataStream, table, columnNameList);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<Map> outputTag = tagMap.get(table);
ctx.output(outputTag, map);
} catch (Exception e) {
logger.error("Build " + schemaTableName + " cdc sync failed...");
logger.error(LogUtil.getError(e));
out.collect(map);
}
}
}
});
tagMap.forEach((table,tag) -> {
final String schemaTableName = table.getSchemaTableName();
try {
DataStream<Map> filterOperator = shunt(processOperator, table, tag);
logger.info("Build " + schemaTableName + " shunt successful...");
List<String> columnNameList = new ArrayList<>();
List<LogicalType> columnTypeList = new ArrayList<>();
buildColumn(columnNameList, columnTypeList, table.getColumns());
DataStream<Row> rowDataDataStream = buildRow(filterOperator, columnNameList, columnTypeList, schemaTableName).rebalance();
logger.info("Build " + schemaTableName + " flatMap successful...");
logger.info("Start build " + schemaTableName + " sink...");
addTableSink(customTableEnvironment, rowDataDataStream, table, columnNameList);
} catch (Exception e) {
logger.error("Build " + schemaTableName + " cdc sync failed...");
logger.error(LogUtil.getError(e));
}
});
List<Transformation<?>> trans = customTableEnvironment.getPlanner().translate(modifyOperations);
for (Transformation<?> item : trans) {
env.addOperator(item);
......
......@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
}
});
}
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList,
......
......@@ -20,9 +20,16 @@
package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.base.DeliveryGuarantee;
import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
import org.apache.flink.connector.kafka.sink.KafkaSink;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
......@@ -30,22 +37,17 @@ import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/**
* MysqlCDCBuilder
*
......@@ -90,54 +92,60 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.sinkTo(KafkaSink.<String>builder()
.setBootstrapServers(config.getSink().get("brokers"))
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(config.getSink().get("topic"))
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.build());
KafkaSink<String> kafkaSink = KafkaSink.<String>builder().setBootstrapServers(config.getSink().get("brokers"))
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(config.getSink().get("topic"))
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.setDeliverGuarantee(DeliveryGuarantee.valueOf(env.getCheckpointingMode().name()))
.build();
dataStreamSource.sinkTo(kafkaSink);
} else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
String sinkTableName = getSinkTableName(table);
OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override
public Map map(String value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(value, Map.class);
public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
}
});
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String tableName = table.getName();
final String schemaName = table.getSchema();
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() {
@Override
public boolean filter(Map value) throws Exception {
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.sinkTo(KafkaSink.<String>builder()
.setBootstrapServers(config.getSink().get("brokers"))
tagMap.forEach((k, v) -> {
String topic = getSinkTableName(k);
KafkaSink<String> kafkaSink = KafkaSink.<String>builder().setBootstrapServers(config.getSink().get("brokers"))
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(getSinkTableName(table))
.setValueSerializationSchema(new SimpleStringSchema())
.build()
.setTopic(topic)
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.build());
}
}
.setDeliverGuarantee(DeliveryGuarantee.valueOf(env.getCheckpointingMode().name()))
.build();
process.getSideOutput(v).rebalance().sinkTo(kafkaSink).name(topic);
});
}
}
return dataStreamSource;
......
......@@ -25,10 +25,12 @@ import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.dag.Transformation;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.operations.Operation;
......@@ -43,9 +45,7 @@ import java.io.Serializable;
import java.math.BigDecimal;
import java.time.Instant;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.*;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
......@@ -58,6 +58,7 @@ import com.dlink.model.Table;
import com.dlink.utils.FlinkBaseUtil;
import com.dlink.utils.JSONUtil;
import com.dlink.utils.LogUtil;
import org.apache.flink.util.OutputTag;
/**
* SQLSinkBuilder
......@@ -83,10 +84,10 @@ public class SQLSinkBuilder extends AbstractSinkBuilder implements SinkBuilder,
}
private DataStream<Row> buildRow(
SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList,
List<LogicalType> columnTypeList,
String schemaTableName) {
DataStream<Map> filterOperator,
List<String> columnNameList,
List<LogicalType> columnTypeList,
String schemaTableName) {
final String[] columnNames = columnNameList.toArray(new String[columnNameList.size()]);
final LogicalType[] columnTypes = columnTypeList.toArray(new LogicalType[columnTypeList.size()]);
......@@ -94,49 +95,49 @@ public class SQLSinkBuilder extends AbstractSinkBuilder implements SinkBuilder,
RowTypeInfo rowTypeInfo = new RowTypeInfo(typeInformations, columnNames);
return filterOperator
.flatMap(new FlatMapFunction<Map, Row>() {
@Override
public void flatMap(Map value, Collector<Row> out) throws Exception {
try {
switch (value.get("op").toString()) {
case "r":
case "c":
Row irow = Row.withPositions(RowKind.INSERT, columnNameList.size());
Map idata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
irow.setField(i, convertValue(idata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(irow);
break;
case "d":
Row drow = Row.withPositions(RowKind.DELETE, columnNameList.size());
Map ddata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
drow.setField(i, convertValue(ddata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(drow);
break;
case "u":
Row ubrow = Row.withPositions(RowKind.UPDATE_BEFORE, columnNameList.size());
Map ubdata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
ubrow.setField(i, convertValue(ubdata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(ubrow);
Row uarow = Row.withPositions(RowKind.UPDATE_AFTER, columnNameList.size());
Map uadata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
uarow.setField(i, convertValue(uadata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(uarow);
break;
.flatMap(new FlatMapFunction<Map, Row>() {
@Override
public void flatMap(Map value, Collector<Row> out) throws Exception {
try {
switch (value.get("op").toString()) {
case "r":
case "c":
Row irow = Row.ofKind(RowKind.INSERT);
Map idata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
irow.setField(i, convertValue(idata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(irow);
break;
case "d":
Row drow = Row.ofKind(RowKind.DELETE);
Map ddata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
drow.setField(i, convertValue(ddata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(drow);
break;
case "u":
Row ubrow = Row.ofKind(RowKind.UPDATE_BEFORE);
Map ubdata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
ubrow.setField(i, convertValue(ubdata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(ubrow);
Row uarow = Row.ofKind(RowKind.UPDATE_AFTER);
Map uadata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
uarow.setField(i, convertValue(uadata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(uarow);
break;
}
} catch (Exception e) {
logger.error("SchameTable: {} - Row: {} - Exception: {}", schemaTableName, JSONUtil.toJsonString(value), e.getCause().getMessage());
throw e;
}
} catch (Exception e) {
logger.error("SchameTable: {} - Row: {} - Exception: {}", schemaTableName, JSONUtil.toJsonString(value), e.getCause().getMessage());
throw e;
}
}
}, rowTypeInfo);
}, rowTypeInfo);
}
private void addTableSink(
......@@ -189,29 +190,56 @@ public class SQLSinkBuilder extends AbstractSinkBuilder implements SinkBuilder,
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = deserialize(dataStreamSource);
logger.info("Build deserialize successful...");
Map<Table, OutputTag<Map>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String schemaTableName = table.getSchemaTableName();
String sinkTableName = getSinkTableName(table);
OutputTag<Map> outputTag = new OutputTag<Map>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
final String schemaFieldName = config.getSchemaFieldName();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
SingleOutputStreamOperator<Map> processOperator = mapOperator.process(new ProcessFunction<Map, Map>() {
@Override
public void processElement(Map map, ProcessFunction<Map, Map>.Context ctx, Collector<Map> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
SingleOutputStreamOperator<Map> filterOperator = shunt(mapOperator, table, schemaFieldName);
logger.info("Build " + schemaTableName + " shunt successful...");
List<String> columnNameList = new ArrayList<>();
List<LogicalType> columnTypeList = new ArrayList<>();
buildColumn(columnNameList, columnTypeList, table.getColumns());
DataStream<Row> rowDataDataStream = buildRow(filterOperator, columnNameList, columnTypeList, schemaTableName);
logger.info("Build " + schemaTableName + " flatMap successful...");
logger.info("Start build " + schemaTableName + " sink...");
addTableSink(customTableEnvironment, rowDataDataStream, table, columnNameList);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<Map> outputTag = tagMap.get(table);
ctx.output(outputTag, map);
} catch (Exception e) {
logger.error("Build " + schemaTableName + " cdc sync failed...");
logger.error(LogUtil.getError(e));
out.collect(map);
}
}
}
});
tagMap.forEach((table,tag) -> {
final String schemaTableName = table.getSchemaTableName();
try {
DataStream<Map> filterOperator = shunt(processOperator, table, tag);
logger.info("Build " + schemaTableName + " shunt successful...");
List<String> columnNameList = new ArrayList<>();
List<LogicalType> columnTypeList = new ArrayList<>();
buildColumn(columnNameList, columnTypeList, table.getColumns());
DataStream<Row> rowDataDataStream = buildRow(filterOperator, columnNameList, columnTypeList, schemaTableName).rebalance();
logger.info("Build " + schemaTableName + " flatMap successful...");
logger.info("Start build " + schemaTableName + " sink...");
addTableSink(customTableEnvironment, rowDataDataStream, table, columnNameList);
} catch (Exception e) {
logger.error("Build " + schemaTableName + " cdc sync failed...");
logger.error(LogUtil.getError(e));
}
});
List<Transformation<?>> trans = customTableEnvironment.getPlanner().translate(modifyOperations);
for (Transformation<?> item : trans) {
env.addOperator(item);
......
......@@ -37,6 +37,7 @@ import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.types.logical.*;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
......@@ -110,7 +111,13 @@ public abstract class AbstractSinkBuilder {
}
});
}
protected DataStream<Map> shunt(
SingleOutputStreamOperator<Map> processOperator,
Table table,
OutputTag<Map> tag) {
return processOperator.getSideOutput(tag);
}
protected DataStream<RowData> buildRowData(
SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList,
......
......@@ -20,9 +20,16 @@
package com.dlink.cdc.kafka;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.connector.base.DeliveryGuarantee;
import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema;
import org.apache.flink.connector.kafka.sink.KafkaSink;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
......@@ -30,22 +37,17 @@ import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
/**
* MysqlCDCBuilder
*
......@@ -90,54 +92,60 @@ public class KafkaSinkBuilder extends AbstractSinkBuilder implements SinkBuilder
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
if (Asserts.isNotNullString(config.getSink().get("topic"))) {
dataStreamSource.sinkTo(KafkaSink.<String>builder()
.setBootstrapServers(config.getSink().get("brokers"))
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(config.getSink().get("topic"))
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.build());
KafkaSink<String> kafkaSink = KafkaSink.<String>builder().setBootstrapServers(config.getSink().get("brokers"))
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(config.getSink().get("topic"))
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.setDeliverGuarantee(DeliveryGuarantee.valueOf(env.getCheckpointingMode().name()))
.build();
dataStreamSource.sinkTo(kafkaSink);
} else {
Map<Table, OutputTag<String>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(new MapFunction<String, Map>() {
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
String sinkTableName = getSinkTableName(table);
OutputTag<String> outputTag = new OutputTag<String>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
SingleOutputStreamOperator<String> process = mapOperator.process(new ProcessFunction<Map, String>() {
@Override
public Map map(String value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.readValue(value, Map.class);
public void processElement(Map map, ProcessFunction<Map, String>.Context ctx, Collector<String> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
String result = objectMapper.writeValueAsString(map);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<String> outputTag = tagMap.get(table);
ctx.output(outputTag, result);
} catch (Exception e) {
out.collect(objectMapper.writeValueAsString(map));
}
}
});
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String tableName = table.getName();
final String schemaName = table.getSchema();
SingleOutputStreamOperator<Map> filterOperator = mapOperator.filter(new FilterFunction<Map>() {
@Override
public boolean filter(Map value) throws Exception {
LinkedHashMap source = (LinkedHashMap) value.get("source");
return tableName.equals(source.get("table").toString())
&& schemaName.equals(source.get(schemaFieldName).toString());
}
});
SingleOutputStreamOperator<String> stringOperator = filterOperator.map(new MapFunction<Map, String>() {
@Override
public String map(Map value) throws Exception {
ObjectMapper objectMapper = new ObjectMapper();
return objectMapper.writeValueAsString(value);
}
});
stringOperator.sinkTo(KafkaSink.<String>builder()
.setBootstrapServers(config.getSink().get("brokers"))
tagMap.forEach((k, v) -> {
String topic = getSinkTableName(k);
KafkaSink<String> kafkaSink = KafkaSink.<String>builder().setBootstrapServers(config.getSink().get("brokers"))
.setRecordSerializer(KafkaRecordSerializationSchema.builder()
.setTopic(getSinkTableName(table))
.setValueSerializationSchema(new SimpleStringSchema())
.build()
.setTopic(topic)
.setValueSerializationSchema(new SimpleStringSchema())
.build()
)
.build());
}
}
.setDeliverGuarantee(DeliveryGuarantee.valueOf(env.getCheckpointingMode().name()))
.build();
process.getSideOutput(v).rebalance().sinkTo(kafkaSink).name(topic);
});
}
}
return dataStreamSource;
......
......@@ -20,15 +20,28 @@
package com.dlink.cdc.sql;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import com.dlink.utils.FlinkBaseUtil;
import com.dlink.utils.JSONUtil;
import com.dlink.utils.LogUtil;
import org.apache.commons.lang3.StringUtils;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.dag.Transformation;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.ProcessFunction;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.operations.ModifyOperation;
import org.apache.flink.table.operations.Operation;
......@@ -37,27 +50,14 @@ import org.apache.flink.table.types.utils.TypeConversions;
import org.apache.flink.types.Row;
import org.apache.flink.types.RowKind;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import javax.xml.bind.DatatypeConverter;
import java.io.Serializable;
import java.math.BigDecimal;
import java.time.Instant;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.dlink.assertion.Asserts;
import com.dlink.cdc.AbstractSinkBuilder;
import com.dlink.cdc.CDCBuilder;
import com.dlink.cdc.SinkBuilder;
import com.dlink.executor.CustomTableEnvironment;
import com.dlink.model.FlinkCDCConfig;
import com.dlink.model.Schema;
import com.dlink.model.Table;
import com.dlink.utils.FlinkBaseUtil;
import com.dlink.utils.JSONUtil;
import com.dlink.utils.LogUtil;
import java.util.*;
/**
* SQLSinkBuilder
......@@ -83,10 +83,10 @@ public class SQLSinkBuilder extends AbstractSinkBuilder implements SinkBuilder,
}
private DataStream<Row> buildRow(
SingleOutputStreamOperator<Map> filterOperator,
List<String> columnNameList,
List<LogicalType> columnTypeList,
String schemaTableName) {
DataStream<Map> filterOperator,
List<String> columnNameList,
List<LogicalType> columnTypeList,
String schemaTableName) {
final String[] columnNames = columnNameList.toArray(new String[columnNameList.size()]);
final LogicalType[] columnTypes = columnTypeList.toArray(new LogicalType[columnTypeList.size()]);
......@@ -94,49 +94,49 @@ public class SQLSinkBuilder extends AbstractSinkBuilder implements SinkBuilder,
RowTypeInfo rowTypeInfo = new RowTypeInfo(typeInformations, columnNames);
return filterOperator
.flatMap(new FlatMapFunction<Map, Row>() {
@Override
public void flatMap(Map value, Collector<Row> out) throws Exception {
try {
switch (value.get("op").toString()) {
case "r":
case "c":
Row irow = Row.withPositions(RowKind.INSERT, columnNameList.size());
Map idata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
irow.setField(i, convertValue(idata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(irow);
break;
case "d":
Row drow = Row.withPositions(RowKind.DELETE, columnNameList.size());
Map ddata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
drow.setField(i, convertValue(ddata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(drow);
break;
case "u":
Row ubrow = Row.withPositions(RowKind.UPDATE_BEFORE, columnNameList.size());
Map ubdata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
ubrow.setField(i, convertValue(ubdata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(ubrow);
Row uarow = Row.withPositions(RowKind.UPDATE_AFTER, columnNameList.size());
Map uadata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
uarow.setField(i, convertValue(uadata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(uarow);
break;
.flatMap(new FlatMapFunction<Map, Row>() {
@Override
public void flatMap(Map value, Collector<Row> out) throws Exception {
try {
switch (value.get("op").toString()) {
case "r":
case "c":
Row irow = Row.ofKind(RowKind.INSERT);
Map idata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
irow.setField(i, convertValue(idata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(irow);
break;
case "d":
Row drow = Row.ofKind(RowKind.DELETE);
Map ddata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
drow.setField(i, convertValue(ddata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(drow);
break;
case "u":
Row ubrow = Row.ofKind(RowKind.UPDATE_BEFORE);
Map ubdata = (Map) value.get("before");
for (int i = 0; i < columnNameList.size(); i++) {
ubrow.setField(i, convertValue(ubdata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(ubrow);
Row uarow = Row.ofKind(RowKind.UPDATE_AFTER);
Map uadata = (Map) value.get("after");
for (int i = 0; i < columnNameList.size(); i++) {
uarow.setField(i, convertValue(uadata.get(columnNameList.get(i)), columnTypeList.get(i)));
}
out.collect(uarow);
break;
}
} catch (Exception e) {
logger.error("SchameTable: {} - Row: {} - Exception: {}", schemaTableName, JSONUtil.toJsonString(value), e.getCause().getMessage());
throw e;
}
} catch (Exception e) {
logger.error("SchameTable: {} - Row: {} - Exception: {}", schemaTableName, JSONUtil.toJsonString(value), e.getCause().getMessage());
throw e;
}
}
}, rowTypeInfo);
}, rowTypeInfo);
}
private void addTableSink(
......@@ -189,29 +189,56 @@ public class SQLSinkBuilder extends AbstractSinkBuilder implements SinkBuilder,
CustomTableEnvironment customTableEnvironment,
DataStreamSource<String> dataStreamSource) {
final List<Schema> schemaList = config.getSchemaList();
final String schemaFieldName = config.getSchemaFieldName();
if (Asserts.isNotNullCollection(schemaList)) {
SingleOutputStreamOperator<Map> mapOperator = deserialize(dataStreamSource);
logger.info("Build deserialize successful...");
Map<Table, OutputTag<Map>> tagMap = new HashMap<>();
Map<String, Table> tableMap = new HashMap<>();
for (Schema schema : schemaList) {
for (Table table : schema.getTables()) {
final String schemaTableName = table.getSchemaTableName();
String sinkTableName = getSinkTableName(table);
OutputTag<Map> outputTag = new OutputTag<Map>(sinkTableName) {
};
tagMap.put(table, outputTag);
tableMap.put(table.getSchemaTableName(), table);
}
}
final String schemaFieldName = config.getSchemaFieldName();
ObjectMapper objectMapper = new ObjectMapper();
SingleOutputStreamOperator<Map> mapOperator = dataStreamSource.map(x -> objectMapper.readValue(x,Map.class)).returns(Map.class);
SingleOutputStreamOperator<Map> processOperator = mapOperator.process(new ProcessFunction<Map, Map>() {
@Override
public void processElement(Map map, ProcessFunction<Map, Map>.Context ctx, Collector<Map> out) throws Exception {
LinkedHashMap source = (LinkedHashMap) map.get("source");
try {
SingleOutputStreamOperator<Map> filterOperator = shunt(mapOperator, table, schemaFieldName);
logger.info("Build " + schemaTableName + " shunt successful...");
List<String> columnNameList = new ArrayList<>();
List<LogicalType> columnTypeList = new ArrayList<>();
buildColumn(columnNameList, columnTypeList, table.getColumns());
DataStream<Row> rowDataDataStream = buildRow(filterOperator, columnNameList, columnTypeList, schemaTableName);
logger.info("Build " + schemaTableName + " flatMap successful...");
logger.info("Start build " + schemaTableName + " sink...");
addTableSink(customTableEnvironment, rowDataDataStream, table, columnNameList);
Table table = tableMap.get(source.get(schemaFieldName).toString() + "." + source.get("table").toString());
OutputTag<Map> outputTag = tagMap.get(table);
ctx.output(outputTag, map);
} catch (Exception e) {
logger.error("Build " + schemaTableName + " cdc sync failed...");
logger.error(LogUtil.getError(e));
out.collect(map);
}
}
}
});
tagMap.forEach((table,tag) -> {
final String schemaTableName = table.getSchemaTableName();
try {
DataStream<Map> filterOperator = shunt(processOperator, table, tag);
logger.info("Build " + schemaTableName + " shunt successful...");
List<String> columnNameList = new ArrayList<>();
List<LogicalType> columnTypeList = new ArrayList<>();
buildColumn(columnNameList, columnTypeList, table.getColumns());
DataStream<Row> rowDataDataStream = buildRow(filterOperator, columnNameList, columnTypeList, schemaTableName).rebalance();
logger.info("Build " + schemaTableName + " flatMap successful...");
logger.info("Start build " + schemaTableName + " sink...");
addTableSink(customTableEnvironment, rowDataDataStream, table, columnNameList);
} catch (Exception e) {
logger.error("Build " + schemaTableName + " cdc sync failed...");
logger.error(LogUtil.getError(e));
}
});
List<Transformation<?>> trans = customTableEnvironment.getPlanner().translate(modifyOperations);
for (Transformation<?> item : trans) {
env.addOperator(item);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment