flume 采用Snappy格式上传到hdfs
2019-10-22 本文已影响0人
至垚
- 安装系统所需的snappy编码包
apt install libsnappy1v5
apt install libsnappy-dev
2.准备hadoop所需的native lib包
image.png
类似图片
放的目录为~/projects/hadoop/native
3.编辑环境变量文件写入flume所需lib包
位置是conf/flume-env.sh
内容新增
export FLUME_JAVA_LIBRARY_PATH=~/projects/hadoop/native
4.编辑flume配置文件
avro_to_hdfs.sources = r1
avro_to_hdfs.sinks = k1
avro_to_hdfs.channels = c1
# Describe/configure the source
avro_to_hdfs.sources.r1.type = avro
avro_to_hdfs.sources.r1.bind = 0.0.0.0
avro_to_hdfs.sources.r1.port = 5000
avro_to_hdfs.sources.r1.threads = 4
avro_to_hdfs.sources.r1.channels = c1
# to hdfs
avro_to_hdfs.sinks.k1.type = hdfs
avro_to_hdfs.sinks.k1.channel = c1
avro_to_hdfs.sinks.k1.hdfs.path=hdfs://NameNode/user/flume/%Y-%m-%d
avro_to_hdfs.sinks.k1.hdfs.writeFormat = Text
avro_to_hdfs.sinks.k1.hdfs.fileType = DataStream
avro_to_hdfs.sinks.k1.hdfs.useLocalTimeStamp = true
#avro_to_hdfs.sinks.k1.hdfs.filePrefix=%{file_key}
avro_to_hdfs.sinks.k1.hdfs.fileSuffix=.snappy
avro_to_hdfs.sinks.k1.hdfs.codeC=snappy
avro_to_hdfs.sinks.k1.hdfs.fileType=CompressedStream
avro_to_hdfs.sinks.k1.hdfs.rollSize = 5000000000
avro_to_hdfs.sinks.k1.hdfs.rollCount = 0
avro_to_hdfs.sinks.k1.hdfs.rollInterval = 21600
avro_to_hdfs.sinks.k1.hdfs.round = true
avro_to_hdfs.sinks.k1.hdfs.roundValue = 6
avro_to_hdfs.sinks.k1.hdfs.roundUnit = hour
#avro_to_hdfs.sinks.k1.hdfs.proxyUser = hive
# Use a channel which buffers events in memory
avro_to_hdfs.channels.c1.checkpointDir = ~/projects/flume_data/gth_hdfs/checkpoint
avro_to_hdfs.channels.c1.dataDirs = ~/projects/flume_data/gth_hdfs/data
avro_to_hdfs.channels.c1.type = file
avro_to_hdfs.channels.c1.maxFileSize=107374182400
avro_to_hdfs.channels.c1.capacity=10000000
avro_to_hdfs.channels.c1.transactionCapacity = 40000
备注:
如果flume本地没有hdfs环境,需要单独把hadoop-client的jar包放入到flume的lib文件夹中
hdfs-site.xml和core.site.xml放入flume的conf文件夹中