Friday, March 8, 2013

cross-platform parallel download of yahoo data

### Define directories
if(.Platform$OS.type == "windows") {
currentdir <- "c:/R/sp500"
} else {
currentdir <- "~/R/sp500"
}
setwd(currentdir)

### Download S&P 500 Data to R
### Download stock prices of companies included in S&P 500 index.
### Download from finance.yahoo
### sp500.csv contains a list of nearly all 500 companies
### loop to download all the data for every company in the list.


### Parallel implementation
### package doMC not available on Windows

library(quantmod)
library(tseries)
library(timeDate)
library(foreach)
symbols = read.csv("sp500.csv", header = FALSE, stringsAsFactors = FALSE)
nStocks = length(symbols[,1])
dateStart = "1999-12-31"


# doSNOW library also available on LINUX
# efficiency untested

if(.Platform$OS.type == "windows") {

# WINDOWS

library(doSNOW)
cl <- makeCluster(8) # number of CPU cores to be used
registerDoSNOW(cl)
z <- foreach(i = 1:nStocks, .combine = merge.xts) %dopar%
{
cat("Downloading ", i, " out of ", nStocks , "\n")
x <- try(get.hist.quote(instrument = symbols[i,],
start = dateStart,
quote = "AdjClose",
retclass = "zoo",
quiet = TRUE),
TRUE)
colnames(x) <- symbols[i,1]
x <- as.xts(x)
### as.xts(x) is more efficient
}
save(x,file="sp500.RData")
stopCluster(cl)
registerDoSNOW()

} else {

# LINUX

library(doMC)
ncores <- getDoParWorkers() # query number of cores
registerDoMC(cores=ncores) # number of CPU cores to be used
z <- foreach(i = 1:nStocks, .combine = merge.xts) %dopar%
{
cat("Downloading ", i, " out of ", nStocks , "\n")

x <- try(get.hist.quote(instrument = symbols[i,],
start = dateStart,
quote = "AdjClose",
retclass = "zoo",
quiet = TRUE),
TRUE)
colnames(x) <- symbols[i,1]
x <- as.xts(x)
### as.xts(x) is more efficient
}
save(x,file="sp500linux.RData")
registerDoMC()

}

No comments: