我们的开发群集中有一个kafka经纪人。
我发现down节点的kafka数据目录比其他节点大得多。
[root@5 ~]# cd /var/local/kafka/
[root@5 kafka]# du --max-depth=1 -ch|sort -hr
100G total
100G ./data
100G
[root@5 data]# ll
total 24
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-0
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-1
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-10
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-11
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-12
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-13
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-14
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-15
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-16
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-17
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-18
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-19
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-2
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-20
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-21
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-22
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-23
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-24
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-25
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-26
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-27
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-28
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-29
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-3
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-30
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-31
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-32
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-33
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-34
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-35
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-36
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-37
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-38
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-39
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-4
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-40
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-41
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-42
drwxr-xr-x 2 kafka kafka 70 Apr 6 11:25 __consumer_offsets-43
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-44
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-45
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-46
drwxr-xr-x 2 kafka kafka 70 Apr 1 13:26 __consumer_offsets-47
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-48
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-49
drwxr-xr-x 2 kafka kafka 70 Apr 6 11:25 __consumer_offsets-5
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-6
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-7
drwxr-xr-x 2 kafka kafka 70 Apr 6 11:25 __consumer_offsets-8
drwxr-xr-x 2 kafka kafka 70 Apr 6 14:00 __consumer_offsets-9
drwxr-xr-x 2 kafka kafka 8192 Apr 6 14:00 appTimeLog-0
-rw-r--r-- 1 kafka kafka 0 Apr 1 11:32 cleaner-offset-checkpoint
-rw-r--r-- 1 kafka kafka 55 Apr 1 11:34 meta.properties
-rw-r--r-- 1 kafka kafka 1218 Apr 6 01:34 recovery-point-offset-checkpoint
-rw-r--r-- 1 kafka kafka 1224 Apr 6 01:35 replication-offset-checkpoint
-rw-r--r-- 1 kafka kafka 0 Apr 6 01:35 replication-offset-checkpoint.tmp
这里有一些错误角色日志(不是asc时间顺序)
...
2017-04-01 11:38:06,357 ERROR kafka.server.ReplicaFetcherThread: [ReplicaFetcherThread-0-95], Error for partition [__consumer_offsets,19] to broker 95:org.apache.kafka.common.errors.NotLeaderForPartitionException: This server is not the leader for that topic-partition.
...
2017-04-01 11:38:30,324 ERROR state.change.logger: Broker 97 received LeaderAndIsrRequest with correlation id 1 from controller 96 epoch 1 for partition [__consumer_offsets,20] but cannot become follower since the new leader 96 is unavailable.
...
2017-04-01 11:38:36,352 ERROR kafka.server.ReplicaFetcherThread: [ReplicaFetcherThread-0-96], Error for partition [__consumer_offsets,16] to broker 96:org.apache.kafka.common.errors.NotLeaderForPartitionException: This server is not the leader for that topic-partition.
...
2017-04-06 11:25:36,079 WARN kafka.log.Log: Found a corrupted index file, /var/local/kafka/data/__consumer_offsets-18/00000000000000000000.index, deleting and rebuilding index...
...
2017-04-01 13:26:41,962 WARN kafka.server.ReplicaFetcherThread: [ReplicaFetcherThread-0-95], Error in fetch kafka.server.ReplicaFetcherThread$FetchRequest@769e1ff5. Possible cause: org.apache.kafka.common.protocol.types.SchemaException: Error reading field 'responses': Error reading array of size 1204063, only 290 bytes available
[root@3 ~]# cd /var/local/kafka/
[root@3 kafka]# du --max-depth=1 -ch|sort -hr
48M total
48M ./data
48M .
[root@4 ~]# cd /var/local/kafka/
[root@4 kafka]# du --max-depth=1 -ch|sort -hr
48M total
48M ./data
48M .
我正在寻找导致这种情况的原因?
有人有任何想法吗?
感谢。
答案 0 :(得分:1)
可能是由于多种原因造成的。这取决于您的分区和您的过程。
例如,数据由密钥分配,这意味着具有相同密钥的所有记录都在同一分区中处理。