Boxplot:需要捕获所有极端异常值

时间:2018-03-13 15:48:04

标签: r boxplot

我试图在箱线图中捕获我的所有数据。我在Cross Validated中找到了一个很好的例子,但它并不完全适合我,我希望有人可以帮助我。

我的代码是:

boxplot(x,horizontal=TRUE,boxwex=.7,axes=FALSE,frame.plot=TRUE)
axis(1,at=xlab,labels=xlab)
opar <- par()
layout(matrix(1:3,nr=1,nc=3),heights=c(1,1,1),widths=c(1,6,1))
par(oma = c(5,4,0,0) + 0.1,mar = c(0,0,1,1) + 0.1)
stripchart(x[x< -400],pch=1,cex=1,xlim=c(-1700000,-400),method="jitter")
boxplot(x[abs(x)<400],horizontal=TRUE,ylim=c(-400,400),at=0,boxwex=.7,cex=1,method="jitter")
stripchart(x[x> 400],pch=1,cex=1,xlim=c(400,60000),method="jitter")
par(opar)

但是抖动在箱线图中不起作用,并且条形图不应该从0开始。如果我能弄清楚如何粘贴输出图表,我会这样做。

  [1] -1620000.00   -85000.00   -32672.62   -30963.50   -28335.64   -26531.30   -18305.68   -13964.04   -13500.00
 [10]   -13248.48   -10975.05    -7410.00    -6034.32    -5629.00    -5349.09    -5125.00    -4994.45    -4973.72
 [19]    -4404.84    -4063.76    -3632.77    -3118.50    -3056.18    -3000.00    -2774.00    -2699.86    -2541.50
 [28]    -2327.06    -2238.89    -1750.00    -1548.63    -1343.25    -1271.67    -1187.55    -1114.80    -1087.44
 [37]    -1084.59    -1080.00     -977.20     -936.00     -900.00     -896.50     -853.60     -850.00     -792.00
 [46]     -791.44     -773.53     -750.00     -750.00     -710.82     -700.00     -697.68     -678.00     -665.00
 [55]     -620.00     -578.49     -513.96     -500.00     -474.18     -468.51     -412.47     -334.50     -332.50
 [64]     -331.20     -305.32     -300.00     -300.00     -244.04     -239.65     -212.30     -210.00     -203.32
 [73]     -202.15     -199.50     -198.24     -188.64     -177.25     -174.78     -169.80     -168.80     -168.25
 [82]     -166.75     -144.35     -140.00     -129.98     -126.74     -120.33     -120.00     -115.92     -114.99
 [91]     -112.45     -108.00     -106.64     -103.40     -100.00     -100.00      -98.28      -95.68      -89.36
[100]      -87.84      -86.59      -75.68      -72.16      -72.04      -71.13      -65.52      -51.00      -50.00
[109]      -50.00      -44.12      -41.25      -40.00      -35.18      -35.14      -34.41      -33.82      -33.80
[118]      -33.60      -32.98      -30.00      -30.00      -29.13      -28.00      -27.44      -26.46      -26.32
[127]      -25.92      -25.50      -25.06      -25.00      -21.84      -20.00      -19.63      -19.14      -18.64
[136]      -18.60      -18.00      -17.25      -16.72      -16.69      -16.54      -15.50      -15.00      -13.51
[145]      -12.16      -11.78      -11.69      -11.56      -11.26      -10.97      -10.88      -10.84      -10.62
[154]      -10.45      -10.20      -10.00       -9.83       -9.04       -9.00       -8.75       -8.70       -8.50
[163]       -8.28       -8.26       -7.92       -7.88       -7.74       -6.70       -6.44       -6.10       -5.35
[172]       -5.04       -4.84       -4.73       -4.65       -4.50       -4.44       -4.40       -4.34       -4.25
[181]       -4.00       -3.99       -3.98       -3.96       -3.94       -3.70       -3.08       -2.88       -2.85
[190]       -2.75       -2.52       -2.14       -2.06       -2.00       -1.98       -1.96       -1.92       -1.74
[199]       -1.68       -1.50       -1.10       -1.08       -0.89       -0.67       -0.60       -0.50       -0.48
[208]       -0.42       -0.40       -0.30       -0.14       -0.04        0.00        0.00        0.00        0.00
[217]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[226]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[235]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[244]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[253]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[262]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[271]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[280]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[289]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[298]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[307]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[316]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[325]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[334]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[343]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[352]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[361]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[370]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[379]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[388]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[397]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[406]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[415]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[424]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[433]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[442]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[451]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[460]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[469]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[478]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[487]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[496]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[505]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[514]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[523]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[532]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[541]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[550]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[559]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[568]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[577]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[586]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[595]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[604]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[613]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[622]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[631]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[640]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[649]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[658]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[667]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[676]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.00
[685]        0.00        0.00        0.00        0.00        0.00        0.00        0.00        0.20        0.21
[694]        0.40        0.44        0.46        0.46        0.48        0.59        0.70        1.00        1.14
[703]        1.17        1.25        1.28        1.40        1.42        1.60        1.68        2.10        2.10
[712]        2.16        2.32        2.34        2.37        2.52        2.80        2.88        3.50        3.51
[721]        3.99        4.76        5.00        5.63        5.76        5.85        5.85        6.00        6.20
[730]        6.50        7.36        8.07        8.68        9.25        9.67        9.80        9.82       11.02
[739]       14.00       15.00       15.04       15.27       16.60       17.55       17.68       19.50       20.94
[748]       21.81       23.51       23.86       24.57       24.57       25.96       27.36       27.44       27.81
[757]       29.20       29.59       29.72       30.30       38.50       39.77       47.20       47.92       50.00
[766]       50.59       51.00       54.20       65.02       68.00       71.28       75.00       92.80       95.28
[775]      105.29      110.00      126.84      134.04      134.24      140.00      140.58      147.50      148.78
[784]      152.48      173.80      181.37      181.80      185.60      186.90      188.48      201.30      209.50
[793]      215.27      228.64      240.00      243.68      248.08      250.00      250.00      255.58      277.50
[802]      282.00      285.40      290.80      304.39      325.00      327.76      339.80      362.00      372.93
[811]      373.24      380.70      400.00      440.00      450.00      493.74      508.50      510.64      538.20
[820]      551.37      565.00      570.95      612.22      616.00      653.40      665.24      666.75      667.20
[829]      718.23      770.66      825.26      855.79      884.00     1000.00     1064.00     1064.77     1080.00
[838]     1152.00     1159.62     1177.24     1271.27     1495.52     1590.00     1670.00     1739.79     2075.68
[847]     2496.00     3570.00     3648.64     4152.64     4158.00     4556.44     4594.75     5040.00     5099.40
[856]     5150.67     5926.65     5967.81     6110.64     6144.00     6942.20     7350.00     7525.32     8667.90
[865]     9601.02    11557.20    12360.12    14425.70    15000.00    17962.14    27655.72    34709.96    45430.00
[874]    50000.00    57785.00

1 个答案:

答案 0 :(得分:0)

好的,我没有经历过我使用的繁琐过程(在获得另一个Tumbleweed“奖之前”),而是在2015年发布了一个更好的解决方案,由bdemarest发布,标题为“了解抖动的Boxplot”。如果我的数据框名为DRP,标题为“Cost_Delta”和“Month”(2017年1月的数据在第一篇文章中),我的解决方案图表可以在https://i.stack.imgur.com/sSWtr.png找到。代码如下。

DRP<-read.table("C:\\Projects\\Mat Group\\DRP\\1000_Item_Data\\RFiles\\Cost Delta\\DRP_CostDelta2.txt",header=TRUE)
DRP$Month <- as.character(DRP$Month)
DRP$Month <- factor(DRP$Month, levels=unique(DRP$Month))
library(ggplot2)
p<-ggplot(DRP, aes(x=Month, y=Cost_Delta)) +
  geom_point(aes(fill=Month), size=2, shape=21, colour="grey20",
             position=position_jitter(width=0.2, height=0.1)) +
  geom_boxplot(outlier.colour=NA, fill=NA, colour="grey20")
p + scale_y_continuous(labels=comma,breaks=seq(-300000,350000,50000)) +
labs(x="Month-Year", y="Cost Delta (Demand-DRP Forecast)") +
#*** January Outliers
  geom_text(x=1, y=-250000, label="-1,620,000",size=3) +
  geom_segment(aes(x=1, xend=1, y=-275000, yend=-276000), 
               arrow = arrow(length = unit(0.3, "cm"),ends="last", type = "closed"),col="red") +
#*** February Outliers
  geom_text(x=2, y=300000, label="1,101,786",size=3) +
  geom_segment(aes(x=2, xend=2, y=325000, yend=326000), 
               arrow = arrow(length = unit(0.3, "cm"),ends="last", type = "closed"),col="red") +
  geom_text(x=2, y=-250000, label="-7,020,000",size=3) +
  geom_segment(aes(x=2, xend=2, y=-275000, yend=-276000), 
               arrow = arrow(length = unit(0.3, "cm"),ends="last", type = "closed"),col="red") +
#*** March Outliers
  geom_text(x=3, y=-250000, label="-3,780,000",size=3) +
  geom_segment(aes(x=3, xend=3, y=-275000, yend=-276000), 
               arrow = arrow(length = unit(0.3, "cm"),ends="last", type = "closed"),col="red") +
#*** August Outliers
  geom_text(x=6, y=-225000, label="-484,960",size=3) +
  geom_text(x=6, y=-250000, label="-540,000",size=3) +
  geom_segment(aes(x=6, xend=6, y=-275000, yend=-276000), 
               arrow = arrow(length = unit(0.3, "cm"),ends="last", type = "closed"),col="red") +
#*** September Outliers
  geom_text(x=7, y=300000, label="593,960",size=3) +
  geom_segment(aes(x=7, xend=7, y=325000, yend=326000), 
               arrow = arrow(length = unit(0.3, "cm"),ends="last", type = "closed"),col="red") +
  geom_text(x=7, y=-250000, label="-484,960",size=3) +
  geom_segment(aes(x=7, xend=7, y=-275000, yend=-276000), 
               arrow = arrow(length = unit(0.3, "cm"),ends="last", type = "closed"),col="red") +
#*** October Outliers
  geom_text(x=8, y=300000, label="969,920",size=3) +
  geom_segment(aes(x=8, xend=8, y=325000, yend=326000), 
               arrow = arrow(length = unit(0.3, "cm"),ends="last", type = "closed"),col="red") +
#*** November Outliers
  geom_text(x=9, y=300000, label="2,909,760",size=3) +
  geom_segment(aes(x=9, xend=9, y=325000, yend=326000), 
               arrow = arrow(length = unit(0.3, "cm"),ends="last", type = "closed"),col="red") +
#*** December Outliers
  geom_text(x=10, y=300000, label="1,080,000",size=3) +
  geom_segment(aes(x=10, xend=10, y=325000, yend=326000), 
               arrow = arrow(length = unit(0.3, "cm"),ends="last", type = "closed"),col="red") +
  geom_text(x=10, y=-250000, label="-1,939,000",size=3) +
  geom_segment(aes(x=10, xend=10, y=-275000, yend=-276000), 
               arrow = arrow(length = unit(0.3, "cm"),ends="last", type = "closed"),col="red") 
相关问题