Spark SQL:将JSON格式的毫秒时间戳转换为dateformat

时间:2018-11-13 18:36:13

标签: java apache-spark apache-spark-sql

模式的dataType声明为Timestamp,但是spark作业没有以正确的格式对其进行转换。

Dataset<Row> stream = sparkSession.readStream()
          .format("kafka")
          .option("kafka.bootstrap.servers", kafkaBootstrapServersString)
          .option("subscribe", topic)
//          .option("maxOffsetsPerTrigger", 10000)
          .load();

      Dataset<Row> rawStream = stream
              .selectExpr("CAST(value AS STRING)")
              .select(from_json(col("value"), eventSpecificStructType).as("eventData"))
              .select("eventData.*")
              .filter(col("eventType").equalTo("Test"));

随着1542126896113转换为50838-01-28 18:49:111.0的时间戳记。
有没有办法将毫秒转换为日期时间格式?

2 个答案:

答案 0 :(得分:0)

如何将毫秒值除以1000。下面的值是否满足您的期望?

this.checkerList = new ArrayList<List<Checker>>();
Checker nullc = new NullChecker();
Checker [][] checkerArray = new Checker[][]
    {{nullc, new RedChecker(0,1), nullc, new RedChecker(0,3),
      nullc, new RedChecker(0,5), nullc, new RedChecker(0,7)},
      {new RedChecker(1,0), nullc, new RedChecker(1,2),nullc, new RedChecker(1,4), nullc, new RedChecker(1,6), nullc},
      {nullc, new RedChecker(2,1), nullc, new RedChecker(2,3), nullc, new RedChecker(2,5), nullc, new RedChecker(2,7)},
      {nullc, nullc, nullc, nullc,nullc, nullc,nullc, nullc},
      {nullc, nullc, nullc, nullc,nullc, nullc,nullc, nullc},
      {new BlackChecker(5,0), nullc, new BlackChecker(5,2),nullc, new BlackChecker(5,4), nullc, new BlackChecker(5,6), nullc},
      {nullc, new BlackChecker(6,1), nullc, new BlackChecker(6,3), nullc, new BlackChecker(6,5), nullc, new BlackChecker(6,7)},
      {new BlackChecker(7,0), nullc, new BlackChecker(7,2),nullc, new BlackChecker(7,4), nullc, new BlackChecker(7,6), nullc}};

for(int r = 0; r < checkerArray.length; r++){
    List <Checker> row = new ArrayList <Checker>();
    for(int c = 0; c < checkerArray[r].length; c++){
        row.add(checkerArray[r][c]);
    }
    this.checkerList.add(row);
}

输出

    refThisRoom.addValueEventListener(new ValueEventListener() {
        @Override
        public void onDataChange(@NonNull DataSnapshot dataSnapshot) {
            if(dataSnapshot.getValue(Room.class).getPlayer2() != null){
                turn = dataSnapshot.getValue(Room.class).getTurn();
                checkerList = dataSnapshot.getValue(Room.class).getCheckerList();

                for(int r = 0; r < checkerList.size(); r++){
                    Log.d("checkerList", "row" + String.valueOf(r));
                    Log.d("checkerList", "rowSize" + String.valueOf(checkerList.size()));
                    for(int c = 0; c < checkerList.get(r).size(); c++){
                        Log.d("checkerList", "Column" + String.valueOf(c));
                        Log.d("checkerList", "ColumnSize" + String.valueOf(checkerList.get(r).size()));
                        Log.d("checkerList","hello: "+checkerList.get(r).get(c).getClass().getSimpleName());
                    }

                }
                Toast.makeText(BlackCheckerActivity.this,"Player Entered",Toast.LENGTH_LONG).show();
                updateAllButtons();
                disableButtons();
            }
            else{
                Toast.makeText(BlackCheckerActivity.this,"Waiting for another player",Toast.LENGTH_LONG).show();
            }
        }

        @Override
        public void onCancelled(@NonNull DatabaseError databaseError) {

        }
    });

答案 1 :(得分:0)

您将必须用Java创建UDF。

import java.sql.Timestamp;
import java.text.SimpleDateFormat;

SimpleDateFormat dateFormat = new SimpleDateFormat("....Date time pattern...");
spark.udf().register("timestamp", new UDF1<String, Timestamp>() {
    private static final long serialVersionUID = 1335972766810808134L;
    @Override
    public Timestamp call(String source)
    {
      try{
            return new Timestamp(dateFormat.parse(source).getTime());
         } catch (ParseException e) {
                 e.printStackTrace();
         }
      }
      return null;
     }
 }, DataTypes.TimestampType);

最后:

stream = stream.withColumn("col", callUDF("timestamp", dataframe.col("col")));