Flume 监听Avro客户端 输出到Kafka

2018-11-30  本文已影响0人  CNSTT

前言:

本文章适用于在Windows上使用Flume 监听Avro Client,模拟数据库表的增量同步到Kafka中。首先确保你的flume-ng可以启动,跳过个别步骤可自行百度。

1、MySQL创建表:

DROP TABLE IF EXISTS `avro`;
CREATE TABLE `avro` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `name` varchar(255) DEFAULT NULL,
  `createdt` datetime DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=11 DEFAULT CHARSET=latin1;

INSERT INTO `avro` VALUES ('1', 'a', '2018-11-21 04:00:00');
INSERT INTO `avro` VALUES ('2', 'b', '2018-11-22 05:00:00');
INSERT INTO `avro` VALUES ('3', 'c', '2018-11-23 06:00:00');
INSERT INTO `avro` VALUES ('4', 'd', '2018-11-24 07:00:00');
INSERT INTO `avro` VALUES ('5', 'e', '2018-11-25 08:00:00');
INSERT INTO `avro` VALUES ('6', 'f', '2018-11-26 09:00:00');
INSERT INTO `avro` VALUES ('7', 'g', '2018-11-27 10:00:00');
INSERT INTO `avro` VALUES ('8', 'h', '2018-11-28 11:00:00');
INSERT INTO `avro` VALUES ('9', 'i', '2018-11-29 12:00:00');
INSERT INTO `avro` VALUES ('10', 'j', '2018-11-30 13:56:41');

avro表如图


image.png

2、Avro 的官网实例

2.1、创建Flume Avro Client :(Thrift 同理)

可参见Flume官网实例

打开Eclipse 右击
src/main/java 新建一个package org.flume.avro
新建Class MyApp.java

package org.flume.avro;

import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.api.RpcClient;
import org.apache.flume.api.RpcClientFactory;
import org.apache.flume.event.EventBuilder;
import java.nio.charset.Charset;

public class MyApp {

    public static void main(String[] args) {
        // TODO Auto-generated method stub
        MyRpcClientFacade client = new MyRpcClientFacade();
        // Initialize client with the remote Flume agent's host and port
        //端口与avro.conf a1.sources.r1.port一致     
        client.init("localhost", 41414);        
        String sampleData = "Hello Flume!";
        for (int i = 0; i < 5; i++) {
          client.sendDataToFlume(sampleData+" " + i);
        }   
        System.out.println("输出完毕");
        client.cleanUp();
      } 
    }
    
class MyRpcClientFacade {
      private RpcClient client;
      private String hostname;
      private int port;

      public void init(String hostname, int port) {
        // Setup the RPC connection
        this.hostname = hostname;
        this.port = port;
        this.client = RpcClientFactory.getDefaultInstance(hostname, port);      //创建avro客户端
        // Use the following method to create a thrift client (instead of the above line):
        // this.client = RpcClientFactory.getThriftInstance(hostname, port);    //创建Thrift客户端
      }

      public void sendDataToFlume(String data) {
        // Create a Flume Event object that encapsulates the sample data
        // 调用EventBuilder重载的withBody()方法。
        Event event = EventBuilder.withBody(data, Charset.forName("UTF-8"));        
        try {
          client.append(event);     // Send the event 发送数据
        } catch (EventDeliveryException e) {
          // clean up and recreate the client 清理并重新创建客户端
          client.close();
          client = null;
          client = RpcClientFactory.getDefaultInstance(hostname, port);
          // Use the following method to create a thrift client (instead of the above line):
          // this.client = RpcClientFactory.getThriftInstance(hostname, port);
        }
      }

      public void cleanUp() {
        // Close the RPC connection
        client.close();
      }   
}

2.2、配置conf

Flume的conf目录新建 avro.conf

a1.channels = c1
a1.sources = r1
a1.sinks = k1

a1.sources.r1.type = avro
a1.sources.r1.bind = 0.0.0.0
a1.sources.r1.port = 41414    //端口与MyApp.java中的port一致

a1.channels.c1.type = memory

a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.topic = avrosrc
a1.sinks.k1.brokerList = localhost:9092
a1.sinks.k1.requiredAcks = 1
a1.sinks.k1.batchSize = 20

a1.sinks.k1.channel = c1
a1.sources.r1.channels = c1

2.3、输出到Kafka

此处省略Kafka启动步骤,详见链接
新建Kafka Topic avrosrc

kafka-run-class.bat kafka.admin.TopicCommand --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic avrosrc

查看Topic avrosrc(此时为空)

kafka-console-consumer.bat --bootstrap-server localhost:9092 --topic avrosrc --from-beginning

启动flume-ng

D:\com\apache-flume-1.8.0-bin>flume-ng agent -c conf -f conf/avro.conf -n a1 -property "flume.root.logger=INFO,console"

Eclipse 运行 MyApp.java(右键 → Run As → Java Application)
此时观察 Topic 有数据进入


image.png

3、Avro 自定义

每秒随机读取数据库avro表的一条数据,并输出到Kafka,模拟增量数据
修改 MyApp.java

package org.flume.avro;

import org.apache.flume.Event;
import org.apache.flume.EventDeliveryException;
import org.apache.flume.api.RpcClient;
import org.apache.flume.api.RpcClientFactory;
import org.apache.flume.event.EventBuilder;

import java.nio.charset.Charset;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Timestamp;

public class MyApp {
    
    static final String DB_URL = "jdbc:mysql://localhost:3306/***";  //输入DB名称
    static final String USER = "***";      //DB用户名
    static final String PASS = "***";    //DB密码
    
    public static void main(String[] args) {
        // TODO Auto-generated method stub
        MyRpcClientFacade client = new MyRpcClientFacade();     
        client.init("localhost", 41414);
        Connection conn = null;
        Statement stmt = null;    //真实场景使用PreparedStatement防止SQL注入
        try{            
            Class.forName("com.mysql.jdbc.Driver");                 // 注册 JDBC 驱动               
            conn = DriverManager.getConnection(DB_URL,USER,PASS);   // 打开链接   
            client.sendDataToFlume("Connect to db");         
            stmt = conn.createStatement();                          // 执行查询
            for(int i = 0;i < 10;i++){
                int index = (int)(Math.random()*10) + 1;
                String sql = "SELECT * FROM avro where id=" + index; 
                ResultSet rs = stmt.executeQuery(sql);              // 保存到结果集
                while(rs.next()){
                    int id  = rs.getInt("id");
                    String name = rs.getString("name");
                    Timestamp createdt = rs.getTimestamp("createdt");
                    System.out.print("ID: " + id);
                    System.out.print(", 名称: " + name);
                    System.out.print(", 创建时间: " + createdt);
                    System.out.print("\n");
                     //client.sendDataToFlume 发送数据!
                    client.sendDataToFlume("id: " + id + ", name: " + name + ", createdt: " + createdt);   
                }
                rs.close();
                try {
                    Thread.sleep(1000);     //等待一秒,模拟增量场景
                } catch (InterruptedException e) {
                    e.printStackTrace(); 
                }
            }            
            stmt.close();
            conn.close();
        }catch(SQLException se){  // 处理 JDBC 错误            
            se.printStackTrace();
        }catch(Exception e){  // 处理 Class.forName 错误            
            e.printStackTrace();
        }finally{  // 关闭资源            
            try{
                if(stmt!=null) stmt.close();
            }catch(SQLException se2){
            }
            try{
                if(conn!=null) conn.close();
            }catch(SQLException se){
                se.printStackTrace();
            }
        }
        client.sendDataToFlume("avro结束");    //测试中文是否乱码:是
        client.sendDataToFlume("avro over");
        System.out.println("avro结束");
        client.cleanUp();
      } 
    }
    
class MyRpcClientFacade {
      private RpcClient client;
      private String hostname;
      private int port;

      public void init(String hostname, int port) {
        // Setup the RPC connection
        this.hostname = hostname;
        this.port = port;
        this.client = RpcClientFactory.getDefaultInstance(hostname, port);      //创建avro客户端
        // Use the following method to create a thrift client (instead of the above line):
        // this.client = RpcClientFactory.getThriftInstance(hostname, port);    //创建Thrift客户端
      }

      public void sendDataToFlume(String data) {
        // Create a Flume Event object that encapsulates the sample data
        // 调用EventBuilder重载的withBody()方法。
        Event event = EventBuilder.withBody(data, Charset.forName("UTF-8"));        
        try {
          client.append(event);     // Send the event 发送数据
        } catch (EventDeliveryException e) {
          // clean up and recreate the client 清理并重新创建客户端
          client.close();
          client = null;
          client = RpcClientFactory.getDefaultInstance(hostname, port);
          // Use the following method to create a thrift client (instead of the above line):
          // this.client = RpcClientFactory.getThriftInstance(hostname, port);
        }
      }

      public void cleanUp() {
        // Close the RPC connection
        client.close();
      }

}

再次运行 MyApp.java
随机读取表中10条数据(每秒一条),输出到Kafka


image.png

至此完成在Windows环境下使用Flume 监听Avro Client并输出到Kafka中!

谢谢阅读,有帮助的点个❤!

上一篇下一篇

猜你喜欢

热点阅读