SparkILoop与Streaming Context给出 - > java.lang.ClassNotFoundException:$ anonfun $ 1 $$ anonfun $ apply $ 1

时间:2017-06-18 12:59:04

标签: scala apache-spark spark-streaming read-eval-print-loop

我打算开发像zeppelin这样的门户网站。您可以编写自己的地图缩减功能。我在检查Zeppelin代码的时候发现了SparkILoop类。决定使用2.11版本的spark-repl但是在SparkILoop下尝试以下代码块;

var conf = new SparkConf().setMaster("spark://remote.cluster:7077").setAppName("println")
var sc = new SparkContext(conf)



 val output = SparkILoop.run(
  """
    |import org.apache.spark.streaming.StreamingContext
    |import org.apache.spark.streaming.Seconds
    |import spark.implicits._
    |import org.apache.spark._
    |val ssc = new StreamingContext(sc, Seconds(4))
    |val dstream = ssc.socketTextStream("localhost",9000)
    |dstream.foreachRDD(rdd => rdd.foreach(x => println(x)))
    |ssc.start()
    |ssc.awaitTermination()
  """.stripMargin)

println(s"[[[[ $output ]]]]")

发送事件时出现以下错误;

scala> org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 3.0 failed 4 times, most recent failure: Lost task 0.3 in stage 3.0 (TID 74, 192.168.1.22, executor 0): java.lang.ClassNotFoundException: $anonfun$1$$anonfun$apply$1
    at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
    at java.lang.Class.forName0(Native Method)
    at java.lang.Class.forName(Class.java:348)
    at org.apache.spark.serializer.JavaDeserializationStream$$anon$1.resolveClass(JavaSerializer.scala:67)
    at java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1613)
    at java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1518)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1774)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
    at java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1993)
    at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1918)
    at java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1801)
    at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1351)
    at java.io.ObjectInputStream.readObject(ObjectInputStream.java:371)
    at org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:75)
    at org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:114)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:80)
    at org.apache.spark.scheduler.Task.run(Task.scala:99)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:745)

1 个答案:

答案 0 :(得分:0)

这是我抓住zeppelin代码的工作JAVA示例 添加" -Yrepl-class-based"和#34; -Yrepl-outdir"参数解决了问题。

package com.custom.test;

import java.io.File;
import java.io.PrintWriter;
import java.lang.reflect.Field;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;

import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.repl.SparkILoop;
import org.apache.spark.sql.SparkSession;

import scala.Console;
import scala.collection.JavaConversions;
import scala.tools.nsc.Settings;
import scala.tools.nsc.interpreter.IMain;
import scala.tools.nsc.interpreter.Results;

public class CustomInterpreter {

    SparkSession session;
    SparkConf conf;
    SparkContext context;
    SparkILoop sparkLoop;
    Object intp;

    public CustomInterpreter(String masterURL) {


        sparkLoop = new SparkILoop((java.io.BufferedReader) null, new PrintWriter(Console.out(), false));

        Settings settings = new Settings();

        LinkedList<String> argList = new LinkedList<>();
        argList.add("-usejavacp");
        argList.add("-Yrepl-class-based");
        argList.add("-Yrepl-outdir");
        argList.add("/Users/rernas/test/ff");
        argList.add("-classpath");

        String classpath = System.getProperty("java.class.path");
        argList.add(classpath);

        scala.collection.immutable.List<String> list = JavaConversions.asScalaBuffer(argList).toList();

        settings.processArguments(list, true);
        sparkLoop.settings_$eq(settings);
        sparkLoop.createInterpreter();
        intp = Utils.invokeMethod(sparkLoop, "intp");
        Utils.invokeMethod(intp, "setContextClassLoader");
        Utils.invokeMethod(intp, "initializeSynchronous");

        System.out.println("settings.outputDirs().getSingleOutput().get() : " + settings.outputDirs().getSingleOutput().get());

//      Results.Result res = interpret("import java.lang.Integer");
//      res = interpret("val i : Integer = new Integer(2)");
//      res = interpret("val j : Integer = new Integer(3)");
//      res = interpret("val r = i + j");


        conf = new SparkConf().setMaster(masterURL);
        conf.set("spark.repl.class.outputDir", "/Users/rernas/test/ff");
        conf.set("spark.scheduler.mode", "FAIR");
        Class SparkSession = Utils.findClass("org.apache.spark.sql.SparkSession");
        Object builder = Utils.invokeStaticMethod(SparkSession, "builder");
        Utils.invokeMethod(builder, "config", new Class[] { SparkConf.class }, new Object[] { conf });

        session = (SparkSession) Utils.invokeMethod(builder, "getOrCreate");

        context = (SparkContext) Utils.invokeMethod(session, "sparkContext");



        importCommonSparkPackages();

        bindSparkComponents();

        System.out.println("intp:" + intp);
        System.out.println("session : " + session);
        System.out.println("context : " + context);

    }

    private void bindSparkComponents() {
        interpret("@transient val _binder = new java.util.HashMap[String, Object]()");
        Map<String, Object> binder = (Map<String, Object>) getLastObject();
        binder.put("sc", context);
        binder.put("conf", conf);

         interpret("@transient val sc = "
                  + "_binder.get(\"sc\").asInstanceOf[org.apache.spark.SparkContext]");
         interpret("@transient val conf = "
                  + "_binder.get(\"conf\").asInstanceOf[org.apache.spark.SparkConf]");

    }

    private void importCommonSparkPackages() {
        Results.Result res = interpret("import org.apache.spark._");
        res = interpret("import org.apache.spark.streaming._");
        res = interpret("import org.apache.spark.streaming.StreamingContext._ ");
        res = interpret("import org.apache.spark._");// replace
    }

     public Object getLastObject() {
            IMain.Request r = (IMain.Request) Utils.invokeMethod(intp, "lastRequest");
            if (r == null || r.lineRep() == null) {
              return null;
            }
            Object obj = r.lineRep().call("$result",
                JavaConversions.asScalaBuffer(new LinkedList<>()));
            return obj;
          }

    private Results.Result interpret(String line){
        return (Results.Result) Utils.invokeMethod(intp, "interpret", new Class[] { String.class },
            new Object[] { line });}

     private File createTempDir(String dir) {
            File file = null;

            // try Utils.createTempDir()
            file = (File) Utils.invokeStaticMethod(
              Utils.findClass("org.apache.spark.util.Utils"),
              "createTempDir",
              new Class[]{String.class, String.class},
              new Object[]{dir, "spark"});

            // fallback to old method
            if (file == null) {
              file = (File) Utils.invokeStaticMethod(
                Utils.findClass("org.apache.spark.util.Utils"),
                "createTempDir",
                new Class[]{String.class},
                new Object[]{dir});
            }

            return file;
          }

    public void execute(String... lines) {
        for (String line : lines ){
            interpret(line);
        }
    }

    public static void main(String[] args) {
        VerapiInterpreter v = new VerapiInterpreter("spark://ilkers-MacBook-Pro.local:7077");
//      VerapiInterpreter v = new VerapiInterpreter("local");
        v.execute(
                    "import org.apache.spark.streaming.Seconds",
                    "var ssc = new StreamingContext(sc,Seconds(10)) ",
                    "val dstream = ssc.socketTextStream(\"localhost\", 9000)",
//                  "dstream.foreachRDD(rdd => rdd.saveAsObjectFile(\"/Users/rernas/test/ff\"))",
                    "dstream.foreachRDD(rdd => rdd.foreach(x => println(x)))",
//                  "val emptyRDD = sc.parallelize(Seq(\"bisi\"))",
//                  "emptyRDD.foreach(x => println(x))"
                    "ssc.start()",
                    "ssc.awaitTermination()"
                );
    }

}