调整ggplot颜色并增加文本大小(在绘图区域内)

时间:2020-01-21 14:40:38

标签: r ggplot2

我正在使用xgboostExplainer程序包,并使用showWaterfall函数进行绘图。我想使showWaterfall图的输出更加自定义。我首先运行以下命令:

library(xgboost)
library(xgboostExplainer)
data(agaricus.train, package='xgboost')
data(agaricus.test, package='xgboost')
train <- agaricus.train
test <- agaricus.test

xgb.train.data <- xgb.DMatrix(train$data, label = train$label)
xgb.test.data <- xgb.DMatrix(test$data, label = test$label)
param <- list(objective = "binary:logistic")
xgb.model <- xgboost(param =param,  data = xgb.train.data, nrounds = 10)
explained <- buildExplainer(xgb.model, xgb.train.data, type="binary", base_score = 0.5)
pred.breakdown = explainPredictions(xgb.model,
                                    explained,
                                    xgb.test.data)

showWaterfall(xgb.model,
              explained,
              xgb.test.data, test$data,  2, type = "binary")

接下来,我从here转到showWaterfall的功能,并对代码的ggplot部分进行一些修改。

showWaterfall2 = function(xgb.model, explainer, DMatrix, data.matrix, idx, type = "binary", threshold = 0.0001, limits = c(NA, NA)){


  breakdown = explainPredictions(xgb.model, explainer, slice(DMatrix,as.integer(idx)))

  weight = rowSums(breakdown)
  if (type == 'regression'){
    pred = weight
  }else{
    pred = 1/(1+exp(-weight))
  }


  breakdown_summary = as.matrix(breakdown)[1,]

  data_for_label = data.matrix[idx,]

  i = order(abs(breakdown_summary),decreasing=TRUE)

  breakdown_summary = breakdown_summary[i]
  data_for_label = data_for_label[i]

  intercept = breakdown_summary[names(breakdown_summary)=='intercept']
  data_for_label = data_for_label[names(breakdown_summary)!='intercept']
  breakdown_summary = breakdown_summary[names(breakdown_summary)!='intercept']

  i_other =which(abs(breakdown_summary)<threshold)
  other_impact = 0

  if (length(i_other > 0)){
    other_impact = sum(breakdown_summary[i_other])
    names(other_impact) = 'other'
    breakdown_summary = breakdown_summary[-i_other]
    data_for_label = data_for_label[-i_other]
  }

  if (abs(other_impact) > 0){
    breakdown_summary = c(intercept, breakdown_summary, other_impact)
    data_for_label = c("", data_for_label,"")
    labels = paste0(names(breakdown_summary)," = ", data_for_label)
    labels[1] = 'intercept'
    labels[length(labels)] = 'other'
  }else{
    breakdown_summary = c(intercept, breakdown_summary)
    data_for_label = c("", data_for_label)
    labels = paste0(names(breakdown_summary)," = ", data_for_label)
    labels[1] = 'intercept'
  }



  if (!is.null(getinfo(DMatrix,"label"))){
    cat("\nActual: ", getinfo(slice(DMatrix,as.integer(idx)),"label"))
  }
  cat("\nPrediction: ", pred)
  cat("\nWeight: ", weight)
  cat("\nBreakdown")
  cat('\n')
  print(breakdown_summary)

  if (type == 'regression'){

    waterfalls::waterfall(values = breakdown_summary,
                          rect_text_labels = round(breakdown_summary, 2),
                          labels = labels,
                          total_rect_text = round(weight, 2),
                          calc_total = TRUE,
                          total_axis_text = "Prediction") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
  }else{

    inverse_logit_trans <- scales::trans_new("inverse logit",
                                             transform = plogis,
                                             inverse = qlogis)

    inverse_logit_labels = function(x){return (1/(1+exp(-x)))}
    logit = function(x){return(log(x/(1-x)))}

    ybreaks<-logit(seq(2,98,2)/100)

    waterfalls::waterfall(values = breakdown_summary,
                          rect_text_labels = round(breakdown_summary, 2),
                          labels = labels,
                          total_rect_text = round(weight, 2),
                          calc_total = TRUE,
                          total_axis_text = "Prediction",
                          #fill_colours = c("blue", "red"),
                          #fill_by_sign = FALSE
    )  +
      scale_y_continuous(labels = inverse_logit_labels,
                         breaks = ybreaks, limits = limits) +
      scale_color_brewer(palette = "Set1") +
      #scale_fill_manual(values = c('darkblue', 'darkred')) +
      #scale_color_manual(values = c('darkblue', 'darkred')) +
      labs(title = "MyModelTitle",
           x = "MyVariables",
           y = "ModelProbabilities") +
      #coord_flip() +
      #theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
      theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        #aspect.ratio = 1,
        axis.line.y = element_blank(),
        axis.ticks.y = element_blank(),
       strip.background = element_rect(fill = 'darkred'),
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()
      )

  }
}

我接下来运行新修改的功能:

showWaterfall2(xgb.model,
               explained,
               xgb.test.data, test$data,  2, type = "binary")

我想对函数和ggplot代码进行两个小调整。我修改的函数的唯一部分是以下内容(对应于代码的ggplot部分):

waterfalls::waterfall(values = breakdown_summary,
                          rect_text_labels = round(breakdown_summary, 2),
                          labels = labels,
                          total_rect_text = round(weight, 2),
                          calc_total = TRUE,
                          total_axis_text = "Prediction",
                          #fill_colours = c("blue", "red"),
                          #fill_by_sign = FALSE
    )  +
      scale_y_continuous(labels = inverse_logit_labels,
                         breaks = ybreaks, limits = limits) +
      scale_color_brewer(palette = "Set1") +
      #scale_fill_manual(values = c('darkblue', 'darkred')) +
      #scale_color_manual(values = c('darkblue', 'darkred')) +
      labs(title = "MyModelTitle",
           x = "MyVariables",
           y = "ModelProbabilities") +
      #coord_flip() +
      #theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
      theme(
        axis.text.x = element_text(angle = 45, hjust = 1),
        #aspect.ratio = 1,
        axis.line.y = element_blank(),
        axis.ticks.y = element_blank(),
       strip.background = element_rect(fill = 'darkred'),
        panel.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank()
      )

如果我取消对#fill_colours = c("blue", "red")#fill_by_sign = FALSE的注释,则可以自己为这些条手动着色。但是,我喜欢waterfall的{​​{1}}封装方法(负号与正号的颜色不同)。但是,fill_by_sign = TRUE软件包(here)的文档中没有任何地方说明如何更改基色。

如何更改waterfallsggplot基色? 我还如何使文本(条形图中的数字)更大?将waterfalls添加到text = element_text(size = 20)部分似乎对我不起作用。

1 个答案:

答案 0 :(得分:2)

您可以在瀑布功能中通过编辑瀑布包的子文件夹(例如,Waterfalls_0.1.2.tar.gz)中的Waterfall.R文件来更改fill_by_sign = TRUE的基色。

更改该行之后的两行

fill_colours <- ifelse(values >= 0,

到例如

gg_color_hue(4)[6],
gg_color_hue(5)[6])

然后在R中安装修改后的程序包,然后再次运行功能。

第一行表示应为瀑布图中的正向变化赋予哪种颜色。第二行,为负面变化。

[]中的数字是颜色的数量。

()中的数字是颜色编号。

更改这些值以获得所需的颜色组合。

使用[p]预览[]中的颜色

gg_color_hue <- function(n) {
    hues = seq(15, 375, length = n + 1)
    hcl(h = hues, l = 65, c = 100)[1:n]
}

n <- 6
plot(1:n, pch = 16, col =cols, cex=5)
相关问题