Kafka 操作
2018-01-31 本文已影响15人
BlackChen
shell操作kafka
创建主题
bin/kafka-topics.sh --create --zookeeper hadoop0:2181 --replication-factor 2 --partitions 3 --topic topicnewtest1
查看主题信息
bin/kafka-topics.sh --describe --zookeeper hadoop0:2181 --topic topicnewtest1
查看kafka中已经创建的主题列表
bin/kafka-topics.sh --list --zookeeper hadoop0:2181
删除主题
bin/kafka-topics.sh --delete --zookeeper hadoop0:2181 --topic topicnewtest1
增加分区
bin/kafka-topics.sh --alter --zookeeper hadoop0:2181 --topic topicnewtest1 --partitions 5
使用kafka自带的生产者客户端脚本
bin/kafka-console-producer.sh --broker-list hadoop3:9092,hadoop4:9092 --topic topicnewtest1
使用kafka自带的消费者客户端脚本
bin/kafka-console-consumer.sh --zookeeper hadoop0:2181 --from-beginning --topic topicnewtest1
程序操作
Producter
package tskafka;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.util.Properties;
import java.util.Random;
public class ProducerClient {
public static void main(String[] args){
Properties props = new Properties();
//broker列表
props.put("bootstrap.servers", "hadoop2:9092,hadoop3:9092,hadoop4:9092");
//ack = 1 表示Broker接收到消息成功写入本地log文件后向Producer返回 成功接收的信号,不需要等待所有的Follower全部同步完消息后 再做回应,这种方式在数据丢失风险和吞吐量之间做了平衡,默 认值1
props.put("acks", "1");
//key和value的字符串序列化类
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer<String, String> producer = new KafkaProducer<String, String>(props);
//生成随机数
Random rand = new Random();
for(int i = 0; i < 2; i++) {
String ip = "192.168.1." + rand.nextInt(255);
long runtime = System.currentTimeMillis();
String msg = runtime + "---" + ip;
// try {
// Thread.sleep(1000);
// } catch (InterruptedException e) {
// e.printStackTrace();
// }
System.out.println("send to kafka->key:" + ip + " value:" + msg);
producer.send(new ProducerRecord<String, String>("topicnewtest1", ip, msg));
}
producer.close();
}
}
Consumer
package tskafka;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
public class ConsumerClient {
public static void manualCommintClient() {
Properties props = new Properties();
//broker 列表
props.put("bootstrap.servers", "hadoop2:9092,hadoop3:9092,hadoop4:9092");
//group id
props.put("group.id", "manualcg1");
//Consumer是否自动提交偏移量,默认值true
props.put("enable.auto.commit", "false");
//Consumer从Kafka拉取消息的方式
//earliest表示从最早的偏移量开始拉取,
//latest表示从最新的偏移量开始拉取,默认值latest
//none表示如果没有发现该Consumer组之前拉取的偏移量则抛异常
props.put("auto.offset.reset", "earliest");
//反序列化类
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(props);
consumer.subscribe(Arrays.asList("topicnewtest1"));
final int minBatchSize = 10;
List<ConsumerRecord<String, String>> bufferList = new ArrayList<ConsumerRecord<String, String>>();
while (true) {
System.out.println("--------------start pull message---------------");
long starttime = System.currentTimeMillis();
ConsumerRecords<String, String> records = consumer.poll(1000);
long endtime = System.currentTimeMillis();
long tm = (endtime - starttime) / 1000;
System.out.println("--------------end pull message and times=" + tm + "s -------------");
for (ConsumerRecord<String, String> record : records) {
System.out.printf("partition = %d, offset = %d, key = %s, value = %s%n", record.partition(), record.offset(), record.key(), record.value());
bufferList.add(record);
}
System.out.println("--------------buffer size->" + bufferList.size());
if (bufferList.size() >= minBatchSize) {
System.out.println("******start deal message******");
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
System.out.println("manual commint offset start...");
consumer.commitSync();
bufferList.clear();
System.out.println("manual commint offset end...");
}
}
}
public static void autoCommintClient() {
Properties props = new Properties();
props.put("bootstrap.servers", "hadoop2:9092,hadoop3:9092,hadoop4:9092");
props.put("group.id", "newautocgt1");
props.put("enable.auto.commit", "true");
props.put("auto.commit.interval.ms", "1000");
props.put("auto.offset.reset", "earliest");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(props);
consumer.subscribe(Arrays.asList("topicnewtest1"));
while (true) {
ConsumerRecords<String, String> records = consumer.poll(1000);
for (ConsumerRecord<String, String> record : records) {
System.out.printf("partition = %d, offset = %d, key = %s, value = %s%n", record.partition(), record.offset(), record.key(), record.value());
}
}
}
public static void main(String[] args) {
autoCommintClient();
// manualCommintClient();
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>testKafka</groupId>
<artifactId>testKafka</artifactId>
<version>1.0-SNAPSHOT</version>
<dependencies>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>0.10.2.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<executions>
<execution>
<id>default-compile</id>
<phase>compile</phase>
<goals>
<goal>compile</goal>
</goals>
<configuration>
<encoding>UTF-8</encoding>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
注意: 其中hadoop0
hadoop1
等为主机名.