Alpakka - 从S3读取Kryo序列化对象

时间:2017-11-22 08:14:29

标签: scala amazon-s3 akka kryo alpakka

我有存储在S3上的Kryo序列化二进制数据(数千个序列化对象)。

Alpakka 允许将内容读作data: Source[ByteString, NotUsed]。但Kryo格式不使用分隔符,因此我无法使用ByteString将每个序列化对象拆分为单独的data.via(Framing.delimiter(...))

因此,Kryo实际上需要读取数据以了解对象何时结束,并且它看起来不流式友好。

是否有可能以流媒体方式实现此案例,以便我在一天结束时获得Source[MyObject, NotUsed]

1 个答案:

答案 0 :(得分:1)

这是一个图表阶段。它处理序列化对象跨越两个字节字符串的情况。当对象很大(不是我的用例)并且Source[ByteString, NotUsed]中可能需要两个以上的字节字符串时,需要对它进行改进。

object KryoReadStage {
  def flow[T](kryoSupport: KryoSupport,
              `class`: Class[T],
              serializer: Serializer[_]): Flow[ByteString, immutable.Seq[T], NotUsed] =
    Flow.fromGraph(new KryoReadStage[T](kryoSupport, `class`, serializer))
}

final class KryoReadStage[T](kryoSupport: KryoSupport,
                             `class`: Class[T],
                             serializer: Serializer[_])
  extends GraphStage[FlowShape[ByteString, immutable.Seq[T]]] {

  override def shape: FlowShape[ByteString, immutable.Seq[T]] = FlowShape.of(in, out)

  override def createLogic(inheritedAttributes: Attributes): GraphStageLogic = {
    new GraphStageLogic(shape) {

      setHandler(in, new InHandler {

        override def onPush(): Unit = {
          val bytes =
            if (previousBytes.length == 0) grab(in)
            else ByteString.fromArrayUnsafe(previousBytes) ++ grab(in)

          Managed(new Input(new ByteBufferBackedInputStream(bytes.asByteBuffer))) { input =>
            var position = 0
            val acc = ListBuffer[T]()

            kryoSupport.withKryo { kryo =>
              var last = false

              while (!last && !input.eof()) {
                tryRead(kryo, input) match {
                  case Some(t) =>
                    acc += t
                    position = input.total().toInt
                    previousBytes = EmptyArray
                  case None =>
                    val bytesLeft = new Array[Byte](bytes.length - position)

                    val bb = bytes.asByteBuffer
                    bb.position(position)
                    bb.get(bytesLeft)

                    last = true
                    previousBytes = bytesLeft
                }
              }

              push(out, acc.toList)
            }
          }
        }

        private def tryRead(kryo: Kryo, input: Input): Option[T] =
          try {
            Some(kryo.readObject(input, `class`, serializer))
          } catch {
            case _: KryoException => None
          }

      })

      setHandler(out, new OutHandler {
        override def onPull(): Unit = {
          pull(in)
        }
      })

      private val EmptyArray: Array[Byte] = Array.empty

      private var previousBytes: Array[Byte] = EmptyArray

    }
  }

  override def toString: String = "KryoReadStage"

  private lazy val in: Inlet[ByteString] = Inlet("KryoReadStage.in")
  private lazy val out: Outlet[immutable.Seq[T]] = Outlet("KryoReadStage.out")

}

使用示例

client.download(BucketName, key)
  .via(KryoReadStage.flow(kryoSupport, `class`, serializer))
  .flatMapConcat(Source(_))

它使用了下面的一些额外帮助。

ByteBufferBackedInputStream

class ByteBufferBackedInputStream(buf: ByteBuffer) extends InputStream {

  override def read: Int = {
    if (!buf.hasRemaining) -1
    else buf.get & 0xFF
  }

  override def read(bytes: Array[Byte], off: Int, len: Int): Int = {
    if (!buf.hasRemaining) -1
    else {
      val read = Math.min(len, buf.remaining)
      buf.get(bytes, off, read)
      read
    }
  }

}

托管

object Managed {

  type AutoCloseableView[T] = T => AutoCloseable

  def apply[T: AutoCloseableView, V](resource: T)(op: T => V): V =
    try {
      op(resource)
    } finally {
      resource.close()
    }
}

KryoSupport

trait KryoSupport {
  def withKryo[T](f: Kryo => T): T
}

class PooledKryoSupport(serializers: (Class[_], Serializer[_])*) extends KryoSupport {

  override def withKryo[T](f: Kryo => T): T = {
    pool.run(new KryoCallback[T] {
      override def execute(kryo: Kryo): T = f(kryo)
    })
  }

  private val pool = {
    val factory = new KryoFactory() {
      override def create(): Kryo = {
        val kryo = new Kryo

        (KryoSupport.ScalaSerializers ++ serializers).foreach {
          case ((clazz, serializer)) =>
            kryo.register(clazz, serializer)
        }

        kryo
      }
    }

    new KryoPool.Builder(factory).softReferences().build()
  }

}