嗨,我需要帮助,如何在所有功能与if循环使用相同条件的情况下优化此代码 更多细节 how to use withColumn Spark Dataframe scala with while
//This function return codcat
def mdp_codcat(bRef: Broadcast[Array[RefRglSDC]]) = udf((sensop: String, cdopcz: String, lieccp:String, qualib: String) =>
{
var codcat ="NOT_CATEGORIZED"
var matchRule = false
var i = 0
while (i < bRef.value.size && !matchRule) {
if ((bRef.value(i).sensop.isEmpty || bRef.value(i).sensop.equals(sensop))
&& (bRef.value(i).cdopcz.isEmpty || Lib.matchCdopcz(cdopcz.substring(1).toInt.toString, bRef.value(i).cdopcz))
&& (bRef.value(i).libope.isEmpty || Lib.matchRule(lieccp, bRef.value(i).libope))
&& (bRef.value(i).qualib.isEmpty || Lib.matchRule(qualib, bRef.value(i).qualib))) {
matchRule = true
codcat = bRef.value(i).codcat
}
i += 1
}
codcat
})
seconde函数相同,但返回 idregl
def mdp_idregl(bRef: Broadcast[Array[RefRglSDC]]) = udf((sensop: String, cdopcz: String, lieccp:String, qualib: String) =>
{
var idregl ="-1"
var matchRule = false
var i = 0
while (i < bRef.value.size && !matchRule) {
if ((bRef.value(i).sensop.isEmpty || bRef.value(i).sensop.equals(sensop))
&& (bRef.value(i).cdopcz.isEmpty || Lib.matchCdopcz(cdopcz.substring(1).toInt.toString, bRef.value(i).cdopcz))
&& (bRef.value(i).libope.isEmpty || Lib.matchRule(lieccp, bRef.value(i).libope))
&& (bRef.value(i).qualib.isEmpty || Lib.matchRule(qualib, bRef.value(i).qualib))) {
matchRule = true
idregl = bRef.value(i).idregl.toString
}
i += 1
}
idregl
})
我用withColumn调用此函数
df.withColumn("mdp_codcat", mdp_codcat(bRef)($"signe",$"cdopcz",$"lib_ope",$"qualif_lib_ope"))
和
df.withColumn("mdp_idregl", mdp_idregl(bRef)($"signe",$"cdopcz",$"lib_ope",$"qualif_lib_ope"))