使用雅虎财经数据测试两只股票的协整

时间:2010-12-02 03:29:59

标签: r yahoo zoo quantmod

我正在尝试使用雅虎财经的数据对两只股票进行协整测试。从我一直在阅读的内容来看,检索Yahoo数据的方法并不复杂。我需要检索两个证券并将它们定义为stk1stk2,并且能够调整检索到的数据的时间范围。这是我到目前为止所拥有的。

library(zoo)            
library(tseries)        

# Read the CSV files into data frames
stk1 <- read.csv("http://ichart.finance.yahoo.com/table.csv?s=CAT&a=8&b=1&c=2009&d=12&e=31&f=2010&g=d&ignore=.csv", stringsAsFactors=F)
stk2 <- read.csv("http://ichart.finance.yahoo.com/table.csv?s=DD&a=8&b=1&c=2009&d=12&e=31&f=2010&g=d&ignore=.csv", stringsAsFactors=F)

# The first column contains dates. as.Date converts strings into Date objects
stk1_dates <- as.Date(stk1[,1])
stk2_dates <- as.Date(stk2[,1])

# The seventh column contains the adjusted close. We use the zoo function to
# create zoo objects from that data. The function takes two arguments: a
# vector of data and a vector of dates.
stk1 <- zoo(stk1[,7], stk1_dates)
stk2 <- zoo(stk2[,7], stk2_dates)

# The merge function combines two (or more) zoo objects,
# computing either their intersection (all=FALSE) or union (all=TRUE).
t.zoo <- merge(stk1, stk2, all=FALSE)

# At this point, t.zoo is a zoo object with two columns: stk1 and stk2.
# Most statistical functions expect a data frame for input, so we convert.
t <- as.data.frame(t.zoo)

# Tell the user what dates are spanned by the data.
cat("Date range is", format(start(t.zoo)), "to", format(end(t.zoo)), "\n")

m <- lm(stk1 ~ stk2 + 0, data=t)
beta <- coef(m)[1]
cat("Assumed hedge ratio is", beta, "\n")

sprd <- t$stk1 - beta*t$stk2
ht <- adf.test(sprd, alternative="stationary", k=0)
cat("ADF p-value is", ht$p.value, "\n")

if (ht$p.value < 0.05) {
    cat("The spread is likely mean-reverting\n")
} else {
    cat("The spread is not mean-reverting.\n")
}

有哪些工具可以使这更容易和/或更健壮?

1 个答案:

答案 0 :(得分:2)

quantmod为Yahoo(和其他提供商)数据提供了一个非常好的界面:

library(quantmod)
library(tseries)

stk1 <- getSymbols("DD",  from="2009-01-01", auto.assign=FALSE)
stk2 <- getSymbols("CAT", from="2009-01-01", auto.assign=FALSE)

# UPDATE: Here's how I would approach the rest of the OP's example
# Ad() is a helper function in quantmod
pair <- merge(Ad(stk1), Ad(stk2), all=FALSE)

cat("Date range is", format(start(pair)), "to", format(end(pair)), "\n")

# build the formula with the instrument names
eqn <- as.formula(paste(colnames(pair), collapse=" ~ 0 + "))
# note that you can use zoo/xts objects with lm();
# you don't *need* a data.frame, but you can't mix types
# with zoo/xts because they use a matrix internally
m <- lm(eqn, data=pair)
beta <- coef(m)[1]

cat("Assumed hedge ratio is", beta, "\n")

# index by number, since we won't always know the colnames
sprd <- pair[,1] - beta*pair[,2]
ht <- adf.test(sprd, alternative="stationary", k=0)

cat("ADF p-value is", ht$p.value, "\n")

if (ht$p.value < 0.05) {
    cat("The spread is likely mean-reverting\n")
} else {
    cat("The spread is not mean-reverting.\n")
}