我已经安装了1.zookeeper 2.卡夫卡 3.火花 4.Cassandra ....都在本地Windows 7 ..
现在从eclipse中,我正在执行用Scala编写的Spark Kafka程序,并尝试将数据推送到本地Cassandra表中。但是我不断在下面收到此错误。
错误:-java.lang.IllegalArgumentException:要求失败:任务 尝试0已在处注册 scala.Predef $ .require(Predef.scala:224)在 org.apache.spark.storage.BlockInfoManager.registerTask(BlockInfoManager.scala:155) 在 org.apache.spark.storage.BlockManager.registerTask(BlockManager.scala:727) 在org.apache.spark.scheduler.Task.run(Task.scala:79)在 org.apache.spark.executor.Executor $ TaskRunner.run(Executor.scala:338) 在 java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 在 java.util.concurrent.ThreadPoolExecutor $ Worker.run(ThreadPoolExecutor.java:624) 在java.lang.Thread.run(Thread.java:748)18/08/16 19:21:59警告 TaskSetManager:在阶段0.0中丢失任务0.0(TID 0,localhost,执行程序 驱动程序):java.lang.IllegalArgumentException:要求失败:任务 尝试0已在处注册 scala.Predef $ .require(Predef.scala:224)在 org.apache.spark.storage.BlockInfoManager.registerTask(BlockInfoManager.scala:155) 在 org.apache.spark.storage.BlockManager.registerTask(BlockManager.scala:727) 在org.apache.spark.scheduler.Task.run(Task.scala:79)在 org.apache.spark.executor.Executor $ TaskRunner.run(Executor.scala:338) 在 java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 在 java.util.concurrent.ThreadPoolExecutor $ Worker.run(ThreadPoolExecutor.java:624) 在java.lang.Thread.run(Thread.java:748)
有人对如何解决这个问题有任何想法?? .....
下面是我的代码:-
object KafkaDStreamWordCountConsumer {
def main(args: Array[String]) {
if (args.length < 3) {
System.err.println(s"""
|Usage: DirectKafkaWordCount <brokers> <topics>
| <brokers> is a list of one or more Kafka brokers
| <groupId> is a consumer group name to consume from topics
| <topics> is a list of one or more kafka topics to consume from
|
""".stripMargin)
System.exit(1)
}
val Array(brokers, groupId, topics) = args
// Create context with 2 second batch interval
val sparkConf = new SparkConf().setAppName("DirectKafkaWordCount")
.setMaster("local[4]")
.set("spark.driver.allowMultipleContexts","true")
.set("spark.cassandra.connection.host","127.0.0.1")
/*.set("spark.cassandra.connection.native.port", "9042")
.set("spark.cassandra.connection.rpc.port", "9160")*/
.set("spark.cassandra.connection.timeout_ms", "5000")
.set("spark.cassandra.read.timeout_ms", "200000")
val ssc = new StreamingContext(sparkConf, Seconds(10))
// create a timer that we will use to stop the processing after 60 seconds so we can print some results
val timer = new Thread() {
override def run() {
Thread.sleep(1000 * 30)
ssc.stop()
}
}
val sc = new SparkContext(sparkConf)
val sqlContext = new SQLContext(sc)
import sqlContext.implicits._
// Create direct kafka stream with brokers and topics
val topicsSet = topics.split(",").toSet
val kafkaParams = Map[String, Object](
ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers,
ConsumerConfig.GROUP_ID_CONFIG -> groupId,
ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer],
ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[StringDeserializer])
val messages = KafkaUtils.createDirectStream[String, String](
ssc,
LocationStrategies.PreferConsistent,
ConsumerStrategies.Subscribe[String, String](topicsSet, kafkaParams))
// Get the lines, split them into words, count the words and print
val lines = messages.map(_.value)
val words = lines.flatMap(_.split(" "))
val wordCounts = words.map(x => (x, 1L))
.reduceByKey(_ + _)
wordCounts.foreachRDD{ rdd =>
// Get the singleton instance of SparkSession
val spark = SparkSession.builder.config(rdd.sparkContext.getConf).getOrCreate()
rdd.map { case ((key, value)) => (key, value) }
rdd.saveToCassandra("demo", "wordcount", SomeColumns("word", "count"))
// import spark.implicits._
// Convert RDD[String] to DataFrame
val wordsDataFrame = rdd.toDF("word","count")
// Create a temporary view
wordsDataFrame.createOrReplaceTempView("wordCounts")
val wordCountsDataFrame = spark.sql("select word, count from wordCounts")
wordCountsDataFrame.show()
}
// Start the computation
ssc.start()
ssc.awaitTermination()
timer.start()
ssc.stop()
}