Question

我的数据集包含以下3列：

date client_id sales
01/01/2012 client 1 $1000
02/01/2012 client 1 $900
...
...
12/01/2014 client 1 $1000
01/01/2012 client 2 $300
02/01/2012 client 2 $450
...
..
12/01/2014 client 2 $375

等98个其他客户（每个客户24个月数据点）

我有多个客户（大约100个）...数据是每个客户的时间序列格式（每月24个数据点）

如何在R中使用auto.arima自动预测所有100个客户的销售额？是否有一个声明选项？还是我必须使用循环？

由于

Answer 1

您始终可以使用lapply()：

lapply(tsMat, function(x) forecast(auto.arima(x)))

下面是一个小例子：

library(forecast)
#generate some time-series:
sales <- replicate(100, 
    arima.sim(n = 24, list(ar = c(0.8), ma = c(-0.2)), sd = sqrt(0.1)) 
)
dates <- seq(as.Date("2012/1/1"), by = "month", length.out=24)
df <- data.frame(date=rep(dates,100), client_id=rep(1:100,each=24), sales=c(sales))
#reshape and convert it to a proper time-series format like ts:
tsMat <- ts(reshape2::dcast(df, date~client_id), start=2012, freq=12)
#forecast by auto.arima:
output <- lapply(tsMat, function(x) forecast(auto.arima(x)))

Answer 2

您还可以通过在预测调用中使用“ h =＃ofPeriods”来指定将来要预测的数字

Forecast.allStates <-as.data.frame（lapply（ts.allStates，function（x）Forecast（auto.arima（x），h = 67））

Answer 3

另一种选择可能是tsibble和fable：

library(tsibble)
library(fable)
library(dplyr)

df %>%
   as_tsibble(key = client_id, index = date) %>%
   mutate(date = yearmonth(date)) %>% 
   model(arima = ARIMA(sales)) %>% 
   forecast(h = "1 year")
#> # A fable: 1,200 x 5 [1M]
#> # Key:     client_id, .model [100]
#>    client_id .model     date           sales   .mean
#>        <int> <chr>     <mth>          <dist>   <dbl>
#>  1         1 arima  2014 gen N(0.072, 0.089)  0.0718
#>  2         1 arima  2014 feb   N(0.28, 0.11)  0.281 
#>  3         1 arima  2014 mar   N(0.35, 0.12)  0.351 
#>  4         1 arima  2014 apr  N(0.024, 0.12)  0.0242
#>  5         1 arima  2014 mag  N(-0.16, 0.12) -0.162 
#>  6         1 arima  2014 giu  N(0.029, 0.12)  0.0292
#>  7         1 arima  2014 lug   N(0.24, 0.12)  0.243 
#>  8         1 arima  2014 ago   N(0.11, 0.12)  0.110 
#>  9         1 arima  2014 set   N(0.37, 0.12)  0.374 
#> 10         1 arima  2014 ott   N(0.37, 0.12)  0.369 
#> # ... with 1,190 more rows

其中df是：

set.seed(1)
sales <- replicate(100, arima.sim(n = 24, list(ar = c(0.8), ma = c(-0.2)), sd = sqrt(0.1)))
dates <- seq(as.Date("2012/1/1"), by = "month", length.out=24)
df <- data.frame(date=rep(dates,100), client_id=rep(1:100,each=24), sales=c(sales))

使用auto.arima预测R中的多个时间序列

3 个答案: