对于4gb以下配置的数据,需要3分钟。有没有办法减少时间。什么是可以实现的最佳时间?我不认为hdfs接收器有问题,但有源或通道。
identify the components on agent memoryagent
memoryagent.sources = tr_source
memoryagent.sinks = tr_sink tr1_sink
memoryagent.channels = tr_channel
Configure the source
memoryagent.sources.tr_source.type = spooldir
memoryagent.sources.tr_source.spoolDir = /home/apps/flumetest
memoryagent.sources.tr_source.deletePolicy = immediate
memoryagent.sources.tr_source.batchSize = 100000
memoryagent.sources.tr_source.deserializer.maxLineLength = 999999999
memoryagent.sinks.tr_sink.type = hdfs
memoryagent.sinks.tr_sink.hdfs.path = hdfspath/ymd=%Y%m%d
memoryagent.sinks.tr_sink.hdfs.filePrefix = sink0
memoryagent.sinks.tr_sink.hdfs.rollInterval = 0
memoryagent.sinks.tr_sink.hdfs.rollCount = 0
memoryagent.sinks.tr_sink.hdfs.rollSize = 1600000000
memoryagent.sinks.tr_sink.hdfs.batchSize = 100000
#memoryagent.sinks.tr_sink.hdfs.codeC = snappy
memoryagent.sinks.tr_sink.hdfs.fileType = DataStream
memoryagent.sinks.tr_sink.hdfs.writeFormat = Text
memoryagent.sinks.tr_sink.hdfs.useLocalTimeStamp = true
memoryagent.sinks.tr_sink.hdfs.callTimeout = 30000
memoryagent.sinks.tr_sink.hdfs.threadsPoolSize = 20
memoryagent.sinks.tr1_sink.type = hdfs
memoryagent.sinks.tr1_sink.hdfs.path = hdfspath/ymd=%Y%m%d
memoryagent.sinks.tr1_sink.hdfs.filePrefix = sink1
memoryagent.sinks.tr1_sink.hdfs.rollInterval = 0
memoryagent.sinks.tr1_sink.hdfs.rollCount = 0
memoryagent.sinks.tr1_sink.hdfs.rollSize = 1600000000
memoryagent.sinks.tr1_sink.hdfs.batchSize = 100000
#memoryagent.sinks.tr1_sink.hdfs.codeC = snappy
memoryagent.sinks.tr1_sink.hdfs.fileType = DataStream
memoryagent.sinks.tr1_sink.hdfs.writeFormat = Text
memoryagent.sinks.tr1_sink.hdfs.useLocalTimeStamp = true
memoryagent.sinks.tr1_sink.hdfs.callTimeout = 30000
memoryagent.sinks.tr1_sink.hdfs.threadsPoolSize = 20
Configure a channel that buffers events in file
memoryagent.channels.tr_channel.type = memory
memoryagent.channels.tr_channel.capacity = 999999999
memoryagent.channels.tr_channel.transactionCapacity = 99999999
memoryagent.channels.tr_channel.type.byteCapacity = 161061273600
Bind the source and sink to the channel
memoryagent.sources.tr_source.channels = tr_channel
memoryagent.sinks.tr1_sink.channel = tr_channel
memoryagent.sinks.tr_sink.channel = tr_channel