如何在箱线图中分离异常值和远异常值

时间:2021-03-02 14:59:37

标签: r ggplot2 boxplot outliers

我想通过离群值和远离群值将箱线图离群值分开,并以不同的象征意义呈现那些分别是从第 1 个四分位数到底部和从第 3 个四分位数到顶部的四分位距的 1.5 倍和 3.0 倍。我已经在一部作品中看到了这一点,但我无法在 R 上重现它。我没有发现任何关于如何进行划分的问题。感谢任何能提供帮助的人。我在下面留下我的代码。

structure(list(data = structure(list(month = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L), .Label = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", 
"Aug", "Sep", "Oct", "Nov", "Dec"), class = "factor"), cpue = c(0.646153846, 
10.71428571, 2.666666667, 8.333333333, 1.266666667, 2.666666667, 
5.1, 2.32, 3.2, 5.333333333, 1.6, 5.533333333, 0.977777778, 3.916666667, 
0.725, 3.333333333, 3.03030303, 8.333333333, 4.807692308, 20.16666667, 
4.071428571, 7.03125, 3.676470588, 9.083333333, 1.116666667, 
2.30952381, 34.375, 30, 2.333333333, 4.285714286, 2.257142857, 
6.333333333, 14.26666667, 0.807407407, 0.08, 3.440559441, 0.173333333, 
8.0625, 0.125, 1.75, 1.125, 0.92, 1.712, 0.24, 5.7, 8.571428571, 
21.5, 0.256410256, 11.11111111, 3.071428571, 4, 5.833333333, 
0.4375, 6.233333333, 5.8375, 0.333333333, 18.21818182, 2.1875, 
1.15, 1.883333333, 6.12244898, 1.35, 0.2, 4, 1.92, 0.6, 29.48571429, 
0.291964286, 14.33333333, 0.138888889, 1.68, 0.494117647, 0.432098765, 
0.114285714, 0.291497976, 2.453333333, 0.12, 2.6, 2.7, 0.486, 
0.32, 0.44, 7.2, 0.4, 4.615384615, 1.666666667, 30.28333333, 
16.61904762, 8.333333333, 7.45, 12.82051282, 0.796296296, 2.5, 
3.044444444, 1.666666667, 1.222222222, 2.666666667, 0.88, 10.8, 
0.48, 14.44, 4.875, 0.458333333, 0.14025974, 0.8125, 12.64285714, 
1.53, 0.657777778, 0.75, 0.208333333, 1.185185185, 1.923076923, 
2.25, 5.233333333, 0.242857143, 3.634615385, 9.277777778, 6.282051282, 
1.222222222, 1.233333333, 30.9, 4.733333333, 10.46938776, 1.2, 
0.10989011, 0.288888889, 3.966666667, 0.818045113, 0.552631579, 
0.9, 0.525, 37.5, 5, 3.523809524, 0.083916084, 0.145454545, 5.859649123, 
1.898989899, 1.333333333, 2.4, 1.8, 1, 9.523809524, 0.090909091, 
1.083333333, 5.214285714, 0.666666667, 0.4, 1.6, 6.666666667, 
0.285714286, 0.385714286, 0.25, 0.44, 0.369230769, 1.557894737, 
6.771428571, 0.63, 3.96, 1.76, 0.211764706, 1.56, 4.44, 12.5, 
12.5, 18.46153846, 22.5, 3.611111111, 75, 25.3125, 2.3, 11.875, 
3, 2.34375, 2, 3.705128205, 1.166666667, 3.5, 1.153846154, 10.96875, 
9.259259259, 10.71052632, 7.575757576, 13.88888889, 0.222222222, 
0.701298701, 0.512, 3.88, 7.146666667, 2.861176471, 1.623529412, 
7.371428571, 0.615384615, 0.9, 2.777777778, 3.333333333, 4.444444444, 
4, 7.407407407, 10, 9.583333333, 2.4, 0.3, 80.1, 1, 2.4, 0.833333333, 
1.666666667, 0.25, 0.350877193, 1.12, 1.106666667, 0.425925926, 
5.584615385, 10, 0.583333333, 1.75, 3.636363636, 0.421818182, 
0.071428571, 0.6, 3.076923077, 2, 0.333333333, 1.2, 20, 6.944444444, 
0.470588235, 2.583333333, 2.272727273, 18.28571429, 0.5, 1.5, 
0.113333333, 2.95, 0.727272727, 0.711111111, 0.092592593, 0.579710145, 
0.761904762, 1.228571429, 2.291666667, 9.863636364, 16.78321678, 
3.12, 3.703703704, 3.785714286, 8.571428571, 1.166666667, 0.282051282, 
0.933333333, 2.4, 3.68907563, 1.666666667, 1.538461538, 5.2, 
13.23529412, 10.20408163, 19.77380952, 5.269333333, 0.666666667, 
1.952380952, 4.430769231, 0.787037037, 4.8, 9.024, 4, 3.692307692, 
9.8, 5.7, 6.333333333, 0.823529412, 4.517647059, 1.6, 1.666666667, 
0.214285714, 1, 0.48, 2.035714286, 0.285714286, 3.703703704, 
7, 0.22, 0.266666667, 0.632653061, 10.72820513, 12.79384615, 
6.635294118, 9.272727273, 14.53125, 9.309090909, 1.45520362, 
0.535714286, 2.209150327, 3.2, 2.933333333, 14.75, 6.25, 5.504201681, 
8.888888889, 2.083333333, 7.777777778, 14.03571429, 2.941176471, 
5.208333333)), row.names = c(NA, -305L), class = "data.frame"), 
    layers = list(<environment>, <environment>), scales = <environment>, 
    mapping = structure(list(x = ~month, y = ~cpue), class = "uneval"), 
    theme = structure(list(line = structure(list(colour = "black", 
        size = 0.5, linetype = 1, lineend = "butt", arrow = FALSE, 
        inherit.blank = TRUE), class = c("element_line", "element"
    )), rect = structure(list(fill = "white", colour = "black", 
        size = 0.5, linetype = 1, inherit.blank = TRUE), class = c("element_rect", 
    "element")), text = structure(list(family = "", face = "plain", 
        colour = "black", size = 15, hjust = 0.5, vjust = 0.5, 
        angle = 0, lineheight = 0.9, margin = structure(c(0, 
        0, 0, 0), class = c("margin", "unit"), valid.unit = 8L, unit = "pt"), 
        debug = FALSE, inherit.blank = FALSE), class = c("element_text", 
    "element")), title = NULL, aspect.ratio = NULL, axis.title = NULL, 
        axis.title.x = structure(list(family = NULL, face = NULL, 
            colour = NULL, size = NULL, hjust = NULL, vjust = 1, 
            angle = NULL, lineheight = NULL, margin = structure(c(2.75, 
            0, 0, 0), class = c("margin", "unit"), valid.unit = 8L, unit = "pt"), 
            debug = NULL, inherit.blank = TRUE), class = c("element_text", 
        "element")), axis.title.x.top = structure(list(family = NULL, 
            face = NULL, colour = NULL, size = NULL, hjust = NULL, 
            vjust = 0, angle = NULL, lineheight = NULL, margin = structure(c(0, 
            0, 2.75, 0), class = c("margin", "unit"), valid.unit = 8L, unit = "pt"), 
            debug = NULL, inherit.blank = TRUE), class = c("element_text", 
        "element")), axis.title.x.bottom = NULL, axis.title.y = structure(list(
            family = NULL, face = NULL, colour = NULL, size = NULL, 
            hjust = NULL, vjust = 1, angle = 90, lineheight = NULL, 
            margin = structure(c(0, 2.75, 0, 0), class = c("margin", 
            "unit"), valid.unit = 8L, unit = "pt"), debug = NULL, 
            inherit.blank = TRUE), class = c("element_text", 
        "element")), axis.title.y.left = NULL, axis.title.y.right = structure(list(
            family = NULL, face = NULL, colour = NULL, size = NULL, 
            hjust = NULL, vjust = 0, angle = -90, lineheight = NULL, 
            margin = structure(c(0, 0, 0, 2.75), class = c("margin", 
            "unit"), valid.unit = 8L, unit = "pt"), debug = NULL, 
            inherit.blank = TRUE), class = c("element_text", 
        "element")), axis.text = structure(list(family = NULL, 
            face = NULL, colour = "grey30", size = structure(0.8, class = "rel"), 
            hjust = NULL, vjust = NULL, angle = NULL, lineheight = NULL, 
            margin = NULL, debug = NULL, inherit.blank = TRUE), class = c("element_text", 
        "element")), axis.text.x = structure(list(family = NULL, 
            face = NULL, colour = NULL, size = 16, hjust = NULL, 
            vjust = 1, angle = NULL, lineheight = NULL, margin = structure(c(2.2, 
            0, 0, 0), class = c("margin", "unit"), valid.unit = 8L, unit = "pt"), 
            debug = NULL, inherit.blank = FALSE), class = c("element_text", 
        "element")), axis.text.x.top = structure(list(family = NULL, 
            face = NULL, colour = NULL, size = NULL, hjust = NULL, 
            vjust = 0, angle = NULL, lineheight = NULL, margin = structure(c(0, 
            0, 2.2, 0), class = c("margin", "unit"), valid.unit = 8L, unit = "pt"), 
            debug = NULL, inherit.blank = TRUE), class = c("element_text", 
        "element")), axis.text.x.bottom = NULL, axis.text.y = structure(list(
            family = NULL, face = NULL, colour = NULL, size = 16, 
            hjust = 1, vjust = NULL, angle = NULL, lineheight = NULL, 
            margin = structure(c(0, 2.2, 0, 0), class = c("margin", 
            "unit"), valid.unit = 8L, unit = "pt"), debug = NULL, 
            inherit.blank = FALSE), class = c("element_text", 
        "element")), axis.text.y.left = NULL, axis.text.y.right = structure(list(
            family = NULL, face = NULL, colour = NULL, size = NULL, 
            hjust = 0, vjust = NULL, angle = NULL, lineheight = NULL, 
            margin = structure(c(0, 0, 0, 2.2), class = c("margin", 
            "unit"), valid.unit = 8L, unit = "pt"), debug = NULL, 
            inherit.blank = TRUE), class = c("element_text", 
        "element")), axis.ticks = structure(list(colour = "grey20", 
            size = NULL, linetype = NULL, lineend = NULL, arrow = FALSE, 
            inherit.blank = TRUE), class = c("element_line", 
        "element")), axis.ticks.x = NULL, axis.ticks.x.top = NULL, 
        axis.ticks.x.bottom = NULL, axis.ticks.y = NULL, axis.ticks.y.left = NULL, 
        axis.ticks.y.right = NULL, axis.ticks.length = structure(2.75, class = "unit", valid.unit = 8L, unit = "pt"), 
        axis.ticks.length.x = NULL, axis.ticks.length.x.top = NULL, 
        axis.ticks.length.x.bottom = NULL, axis.ticks.length.y = NULL, 
        axis.ticks.length.y.left = NULL, axis.ticks.length.y.right = NULL, 
        axis.line = structure(list(colour = "black", size = structure(1, class = "rel"), 
            linetype = NULL, lineend = NULL, arrow = FALSE, inherit.blank = TRUE), class = c("element_line", 
        "element")), axis.line.x = NULL, axis.line.x.top = NULL, 
        axis.line.x.bottom = NULL, axis.line.y = NULL, axis.line.y.left = NULL, 
        axis.line.y.right = NULL, legend.background = structure(list(
            fill = NULL, colour = NA, size = NULL, linetype = NULL, 
            inherit.blank = TRUE), class = c("element_rect", 
        "element")), legend.margin = structure(c(5.5, 5.5, 5.5, 
        5.5), class = c("margin", "unit"), valid.unit = 8L, unit = "pt"), 
        legend.spacing = structure(11, class = "unit", valid.unit = 8L, unit = "pt"), 
        legend.spacing.x = NULL, legend.spacing.y = NULL, legend.key = structure(list(), class = c("element_blank", 
        "element")), legend.key.size = structure(1.2, class = "unit", valid.unit = 3L, unit = "lines"), 
        legend.key.height = NULL, legend.key.width = NULL, legend.text = structure(list(
            family = NULL, face = NULL, colour = NULL, size = structure(0.8, class = "rel"), 
            hjust = NULL, vjust = NULL, angle = NULL, lineheight = NULL, 
            margin = NULL, debug = NULL, inherit.blank = TRUE), class = c("element_text", 
        "element")), legend.text.align = NULL, legend.title = structure(list(
            family = NULL, face = NULL, colour = NULL, size = NULL, 
            hjust = 0, vjust = NULL, angle = NULL, lineheight = NULL, 
            margin = NULL, debug = NULL, inherit.blank = TRUE), class = c("element_text", 
        "element")), legend.title.align = NULL, legend.position = "right", 
        legend.direction = NULL, legend.justification = "center", 
        legend.box = NULL, legend.box.just = NULL, legend.box.margin = structure(c(0, 
        0, 0, 0), class = c("margin", "unit"), valid.unit = 1L, unit = "cm"), 
        legend.box.background = structure(list(), class = c("element_blank", 
        "element")), legend.box.spacing = structure(11, class = "unit", valid.unit = 8L, unit = "pt"), 
        panel.background = structure(list(fill = "white", colour = NA, 
            size = NULL, linetype = NULL, inherit.blank = TRUE), class = c("element_rect", 
        "element")), panel.border = structure(list(), class = c("element_blank", 
        "element")), panel.spacing = structure(5.5, class = "unit", valid.unit = 8L, unit = "pt"), 
        panel.spacing.x = NULL, panel.spacing.y = NULL, panel.grid = structure(list(
            colour = "grey92", size = NULL, linetype = NULL, 
            lineend = NULL, arrow = FALSE, inherit.blank = TRUE), class = c("element_line", 
        "element")), panel.grid.major = structure(list(), class = c("element_blank", 
        "element")), panel.grid.minor = structure(list(), class = c("element_blank", 
        "element")), panel.grid.major.x = NULL, panel.grid.major.y = NULL, 
        panel.grid.minor.x = NULL, panel.grid.minor.y = NULL, 
        panel.ontop = FALSE, plot.background = structure(list(
            fill = NULL, colour = "white", size = NULL, linetype = NULL, 
            inherit.blank = TRUE), class = c("element_rect", 
        "element")), plot.title = structure(list(family = NULL, 
            face = "bold.italic", colour = "black", size = 15, 
            hjust = 0, vjust = 1, angle = NULL, lineheight = NULL, 
            margin = structure(c(0, 0, 5.5, 0), class = c("margin", 
            "unit"), valid.unit = 8L, unit = "pt"), debug = NULL, 
            inherit.blank = FALSE), class = c("element_text", 
        "element")), plot.title.position = "panel", plot.subtitle = structure(list(
            family = NULL, face = NULL, colour = NULL, size = NULL, 
            hjust = 0, vjust = 1, angle = NULL, lineheight = NULL, 
            margin = structure(c(0, 0, 5.5, 0), class = c("margin", 
            "unit"), valid.unit = 8L, unit = "pt"), debug = NULL, 
            inherit.blank = TRUE), class = c("element_text", 
        "element")), plot.caption = structure(list(family = NULL, 
            face = NULL, colour = NULL, size = structure(0.8, class = "rel"), 
            hjust = 1, vjust = 1, angle = NULL, lineheight = NULL, 
            margin = structure(c(5.5, 0, 0, 0), class = c("margin", 
            "unit"), valid.unit = 8L, unit = "pt"), debug = NULL, 
            inherit.blank = TRUE), class = c("element_text", 
        "element")), plot.caption.position = "panel", plot.tag = structure(list(
            family = NULL, face = NULL, colour = NULL, size = structure(1.2, class = "rel"), 
            hjust = 0.5, vjust = 0.5, angle = NULL, lineheight = NULL, 
            margin = NULL, debug = NULL, inherit.blank = TRUE), class = c("element_text", 
        "element")), plot.tag.position = "topleft", plot.margin = structure(c(5.5, 
        5.5, 5.5, 5.5), class = c("margin", "unit"), valid.unit = 8L, unit = "pt"), 
        strip.background = structure(list(fill = "white", colour = "black", 
            size = structure(2, class = "rel"), linetype = NULL, 
            inherit.blank = TRUE), class = c("element_rect", 
        "element")), strip.background.x = NULL, strip.background.y = NULL, 
        strip.placement = "inside", strip.text = structure(list(
            family = NULL, face = NULL, colour = "grey10", size = structure(0.8, class = "rel"), 
            hjust = NULL, vjust = NULL, angle = NULL, lineheight = NULL, 
            margin = structure(c(4.4, 4.4, 4.4, 4.4), class = c("margin", 
            "unit"), valid.unit = 8L, unit = "pt"), debug = NULL, 
            inherit.blank = TRUE), class = c("element_text", 
        "element")), strip.text.x = NULL, strip.text.y = structure(list(
            family = NULL, face = NULL, colour = NULL, size = NULL, 
            hjust = NULL, vjust = NULL, angle = -90, lineheight = NULL, 
            margin = NULL, debug = NULL, inherit.blank = TRUE), class = c("element_text", 
        "element")), strip.switch.pad.grid = structure(2.75, class = "unit", valid.unit = 8L, unit = "pt"), 
        strip.switch.pad.wrap = structure(2.75, class = "unit", valid.unit = 8L, unit = "pt"), 
        strip.text.y.left = structure(list(family = NULL, face = NULL, 
            colour = NULL, size = NULL, hjust = NULL, vjust = NULL, 
            angle = 90, lineheight = NULL, margin = NULL, debug = NULL, 
            inherit.blank = TRUE), class = c("element_text", 
        "element"))), class = c("theme", "gg"), complete = TRUE, validate = TRUE), 
    coordinates = <environment>, facet = <environment>, plot_env = <environment>, 
    labels = list(y = "CPUE (kg/fisherman.day)\n", x = "\nMonth", 
        yintercept = "yintercept")), class = c("gg", "ggplot"
))

我想要这样的东西:

enter image description here

1 个答案:

答案 0 :(得分:1)

我在绘图中看不到执行此操作的方法,看起来您需要在绘图之前手动计算点来自框中的 IQR 数,然后将它们添加为点。使用您的数据执行此操作的一种方法是:

library(tidyverse)

#Calculate the number of IQRs from edge of box, 
f <- function(x) pmax((x-quantile(x,0.75))/(quantile(x,0.75)-quantile(x,0.25)),0) + 
  pmax((x-quantile(x,0.25))/(quantile(x,0.25)-quantile(x,0.75)), 0)

#Apply the IQR calculation to each group, cut and plot
data %>% group_by(month) %>% mutate(sym=cut(f(cpue), c(-Inf, 1.5, 3, Inf))) %>%
ggplot(aes(x=month, y=cpue)) + 
  geom_boxplot(outlier.shape = NA) + #Doesn't plot outliers, we add these manually
  geom_point(aes(shape = sym, color=sym)) + #add outliers
  scale_shape_manual(name = "IQRs from box", values=c(NA, 1, 2, 3)) + 
  scale_color_discrete(name = "IQRs from box")

结果图显示了由颜色和形状标记的异常值

Boxplot with varying outlier symbols