去年,我从Spark-2.4.0的官方网站下载了源代码。不幸的是,我在运行Spark Fun的内部测试Spark Core时遇到异常。 此测试功能名为“ newAPIHadoopFile / wholeTextFiles / binaryFiles(SPARK-7155)的逗号分隔路径”,您可以在代码模块(/src/test/scala/org/apache/spark/SparkContextSuite.scala,代码启动在338行中。)
实际上我对这个问题一无所知。我试图建立一个新的项目来解决它,但最终还是失败了。我在徘徊,首先代码没有问题,因为它属于Spark,是的,Spark!其次,也许环境存在我没有意识到的问题。我分别检查Java和Scala的版本1.8和2.11.12,这很正常吗?第三,我试图根据控制台记录查找问题,等等,这是什么?
test(“ newAPIHadoopFile / wholeTextFiles / binaryFiles(SPARK-7155)的逗号分隔路径”){ // SPARK-7155的回归测试 // dir1和dir2用于WholeTextFiles和binaryFiles val dir1 = Utils.createTempDir() val dir2 = Utils.createTempDir()
val dirpath1 = dir1.getAbsolutePath
val dirpath2 = dir2.getAbsolutePath
// file1 and file2 are placed inside dir1, they are also used for
// textFile, hadoopFile, and newAPIHadoopFile
// file3, file4 and file5 are placed inside dir2, they are used for
// textFile, hadoopFile, and newAPIHadoopFile as well
val file1 = new File(dir1, "part-00000")
val file2 = new File(dir1, "part-00001")
val file3 = new File(dir2, "part-00000")
val file4 = new File(dir2, "part-00001")
val file5 = new File(dir2, "part-00002")
val filepath1 = file1.getAbsolutePath
val filepath2 = file2.getAbsolutePath
val filepath3 = file3.getAbsolutePath
val filepath4 = file4.getAbsolutePath
val filepath5 = file5.getAbsolutePath
try {
// Create 5 text files.
Files.write("someline1 in file1\nsomeline2 in file1\nsomeline3 in file1", file1,
StandardCharsets.UTF_8)
Files.write("someline1 in file2\nsomeline2 in file2", file2, StandardCharsets.UTF_8)
Files.write("someline1 in file3", file3, StandardCharsets.UTF_8)
Files.write("someline1 in file4\nsomeline2 in file4", file4, StandardCharsets.UTF_8)
Files.write("someline1 in file2\nsomeline2 in file5", file5, StandardCharsets.UTF_8)
// Exception occurred !!!! the program comes to a halt!!!!
sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
// Test textFile, hadoopFile, and newAPIHadoopFile for file1 and file2
assert(sc.textFile(filepath1 + "," + filepath2).count() == 5L)
assert(sc.hadoopFile(filepath1 + "," + filepath2,
classOf[TextInputFormat], classOf[LongWritable], classOf[Text]).count() == 5L)
assert(sc.newAPIHadoopFile(filepath1 + "," + filepath2,
classOf[NewTextInputFormat], classOf[LongWritable], classOf[Text]).count() == 5L)
// Test textFile, hadoopFile, and newAPIHadoopFile for file3, file4, and file5
assert(sc.textFile(filepath3 + "," + filepath4 + "," + filepath5).count() == 5L)
assert(sc.hadoopFile(filepath3 + "," + filepath4 + "," + filepath5,
classOf[TextInputFormat], classOf[LongWritable], classOf[Text]).count() == 5L)
assert(sc.newAPIHadoopFile(filepath3 + "," + filepath4 + "," + filepath5,
classOf[NewTextInputFormat], classOf[LongWritable], classOf[Text]).count() == 5L)
// Test wholeTextFiles, and binaryFiles for dir1 and dir2
assert(sc.wholeTextFiles(dirpath1 + "," + dirpath2).count() == 5L)
assert(sc.binaryFiles(dirpath1 + "," + dirpath2).count() == 5L)
} finally {
sc.stop()
}
}
控制台日志显示如下:
java.lang.NullPointerException was thrown.
java.lang.NullPointerException
at org.apache.spark.SparkContextSuite$$anonfun$29.apply(SparkContextSuite.scala:394)
at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
at org.scalatest.Transformer.apply(Transformer.scala:22)
at org.scalatest.Transformer.apply(Transformer.scala:20)
at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186)
at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:103)
at org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:183)
at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196)
at org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:196)
at org.scalatest.SuperEngine.runTestImpl(Engine.scala:289)
at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:196)
at org.apache.spark.SparkContextSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkContextSuite.scala:40)
at org.scalatest.BeforeAndAfterEach$class.runTest(BeforeAndAfterEach.scala:221)
at org.apache.spark.SparkContextSuite.runTest(SparkContextSuite.scala:40)
at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229)
at org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:229)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:396)
at org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:384)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:384)
at org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:379)
at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:461)
at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:229)
at org.scalatest.FunSuite.runTests(FunSuite.scala:1560)
at org.scalatest.Suite$class.run(Suite.scala:1147)
at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560)
at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233)
at org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:233)
at org.scalatest.SuperEngine.runImpl(Engine.scala:521)
at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:233)
at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:52)
at org.scalatest.BeforeAndAfterAll$class.liftedTree1$1(BeforeAndAfterAll.scala:213)
at org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:210)
at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:52)
at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45)
at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1340)
at org.scalatest.tools.Runner$$anonfun$doRunRunRunDaDoRunRun$1.apply(Runner.scala:1334)
at scala.collection.immutable.List.foreach(List.scala:392)
at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1334)
at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1011)
at org.scalatest.tools.Runner$$anonfun$runOptionallyWithPassFailReporter$2.apply(Runner.scala:1010)
at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1500)
at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1010)
at org.scalatest.tools.Runner$.run(Runner.scala:850)
at org.scalatest.tools.Runner.run(Runner.scala)
at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:131)
at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28)
我想找出遇到的问题。我是BJUT的学生,主修大数据。我的老师总是在逼我完成一堆任务,这让我感到很累。