火花sql,rdd的foreach,找不到类异常:com.RDDForEach $$ anonfun $ main $ 1

时间:2018-09-12 07:25:27

标签: scala apache-spark rdd

我使用spark和scala,从蜂巢中的名为persons的表中获取数据,当我调用rdd的name时,该表有一个列foreach,但发生异常。错误是:Caused by: java.lang.ClassNotFoundException: test.RDDForEach$$anonfun$main$1

我要做的是从配置单元表中打印每个人的名字。 一般来说,我只想使用spark从蜂巢中获取数据并进行打印。其他任何一种方法都可以。

package test

import scala.collection.mutable.ListBuffer
import org.slf4j.LoggerFactory
import com.typesafe.config._
import org.apache.spark.sql._
import org.apache.spark.sql.types._
import org.apache.spark.sql.functions._
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkConf
import scala.reflect.api.materializeTypeTag
import com.mongodb.spark._
import org.bson._
import com.mongodb.spark.config._
import com.github.nscala_time.time.Imports._

object RDDForEach {
    private val log = LoggerFactory.getLogger(this.getClass)  
    private val conf = ConfigFactory.load()   
  private val databaseName = conf.getString ("mongodb.databasename") 
  private val collection = conf.getString ("mongodb.collection") 
  private val  mongouri_beehive = conf.getString ("mongodb.mongouri_beehive") 
  private val  mongouri_tushare = conf.getString ("mongodb.mongouri_tushare") 
  private val  mongouri_datamining = conf.getString ("mongodb.mongouri_dataming")   
  private val jar_location= conf.getString("hdfs.jar_location")
  private val hadoop_user= conf.getString("hadoop.user")
  System.setProperty("HADOOP_USER_NAME",hadoop_user) 
  System.setProperty("SPARK_YARN_MODE", "yarn") 
   def main(args: Array[String]){
             var sparkConf = new SparkConf()
            .setAppName("writeAddrMetaData")
            .set("spark.mongodb.input.uri",mongouri_hive)
            .set("spark.mongodb.input.uri",mongouri_hh)
            .set("spark.mongodb.input.database", databaseName) 
            .set("spark.mongodb.input.collection", collection) 

            .setMaster("yarn-client")   
            .set("spark.executor.memory",  "1g") 
            .set("spark.executor.cores", "1")
            .set("spark.cores.max", "2") 
            .set("spark.driver.maxResultSize", "1g")
            .set("spark.driver.memory", "1g")

            .set("spark.yarn.dist.files", "src\\main\\resources\\yarn-site.xml, src\\main\\resources\\resource-types.xml" )
            .set("spark.yarn.jars", jar_location)  
            .set("spark.files", "src\\main\\resources\\hdfs-site.xml,src\\main\\resources\\core-site.xml" ) 

            .set("spark.yarn.jars", jar_location) 
             val builder =  SparkSession.builder().config(sparkConf).enableHiveSupport()  
             val ss =  builder.getOrCreate()  
               val sc = ss.sparkContext
               import ss.implicits._
               val df= ss.sql("select name from persons");
               df.rdd.foreach(f=>println(f.getString(0)));
   }


}

例外是:

Driver stacktrace:
    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1599)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1587)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1586)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
    at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1586)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
    at scala.Option.foreach(Option.scala:257)
    at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1820)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1769)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1758)
    at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
    at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:642)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2027)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2048)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2067)
    at org.apache.spark.SparkContext.runJob(SparkContext.scala:2092)
    at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:921)
    at org.apache.spark.rdd.RDD$$anonfun$foreach$1.apply(RDD.scala:919)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
    at org.apache.spark.rdd.RDD.foreach(RDD.scala:919)
    at delme.RDDForEach$.main(RDDForEach.scala:56)
    at delme.RDDForEach.main(RDDForEach.scala)
Caused by: java.lang.ClassNotFoundException: delme.RDDForEach$$anonfun$main$1
    at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
    at java.lang.Class.forName0(Native Method)
    at java.lang.Class.forName(Class.java:348)
    at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:67)
    at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1866)
    at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1749)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2040)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1571)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2285)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2209)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2067)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1571)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2285)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2209)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2067)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1571)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2285)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2209)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2067)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1571)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:431)
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:80)
    at org.apache.spark.scheduler.Task.run(Task.scala:109)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)

0 个答案:

没有答案