Download and Parse DJ/UBS Commodities Indexes

Here is another data downloading and parsing script, this one for the Dow Jones/UBS Commodities Indexes. Compared to the last post, this parser deals with multiple sheets and multiple columns in each sheet. It also constructs monthly series from the daily data, and stores it using a different symbol. Finally, it’s a good example of how a script can be functionalized, maybe for use with cron.

Let’s look at the code…

download.DJUBS <- function (filesroot = "~/Data/DJUBS") {

  # Load needed packages:
  require(zoo)
  require(gdata)
  require(FinancialInstrument)
  require(quantmod)

  # filesroot: Set the working directory, where there's a .incoming folder that 
  # contains the downloaded spreadsheet.
  
  # Create and set the working directory if it doesn't exist
  if (!file.exists(filesroot))
    dir.create(filesroot, mode="0777")
  
  # Create and set the .incoming directory if it doesn't exist
  if (!file.exists(paste(filesroot, "/.incoming", sep="")))
    dir.create(paste(filesroot, "/.incoming", sep=""), mode="0777")
  setwd(paste(filesroot, "/.incoming", sep=""))
  
  # Remove the old file from .incoming
  if(file.exists("DJUBS_full_hist.xls"))
    system("rm DJUBS_full_hist.xls")
  
  # Download the xls workbook directly from the web site:
  print("Downloading excel spreadsheet from DJUBS web site...")
  system("wget http://www.djindexes.com/mdsidx/downloads/xlspages/ubsci_public/DJUBS_full_hist.xls")
  
  if(!file.exists("DJUBS_full_hist.xls"))
    stop(paste("No spreadsheet exists.  Download the spreadsheet to be processed from www.djindexes.com into ", filesroot, "/.incoming", sep=""))
  
  sheetnames=c("Excess Return", "Total Return")
  for(sheet in sheetnames){  
    print(paste("Reading", sheet, "sheet... This will take a moment..."))
    x = read.xls("DJUBS_full_hist.xls", sheet=sheet)
    
    # Add column names, get the descriptions to add as attributes
    colnames(x)=t(as.data.frame(apply(x[2,], FUN=as.character, MARGIN=1), stringsAsFactors=FALSE))
    x.attr = t(as.data.frame(x[1,], stringsAsFactors=FALSE))
    x=x[-1:-2,]
    
    # Get rid of the last line, which contains the disclaimer
    x=x[-dim(x)[1],]
    # Remove blank columns between sections
    x=x[,-which(apply(x,2,function(x)all(is.na(x))))]
    
    # Get attributes and labels
    categoryNames = x.attr[,!is.na(x.attr)]
    symbolNames = paste(make.names(colnames(x[,])), ".IDX", sep="")
    symbolNamesMonthly = paste(make.names(colnames(x[,])), ".M.IDX", sep="")
    ISOdates = as.Date(x[,1], "%m/%d/%Y")
    
    for(i in 2:length(symbolNames)) {
      # check to make sure directories exist for each symbol, first for daily series...
      dir.create(paste(filesroot, symbolNames[i], sep="/"), showWarnings = FALSE, 
      recursive = FALSE, mode = "0777")
      # ... then for monthly series
      dir.create(paste(filesroot, symbolNamesMonthly[i], sep="/"), showWarnings = FALSE, 
      recursive = FALSE, mode = "0777")
    }
    
    # Parse the columns into individual price objects
    print("Processing columns as symbols...")
    for( i in 2:dim(x)[2]){
      x.xts = as.xts(as.numeric(x[,i]), order.by=ISOdates)
      R.xts = Return.calculate(x.xts)
      x.xts = cbind(x.xts, R.xts)
      colnames(x.xts)=c("Close", "Returns")
      xtsAttributes(x.xts) <- list(Description = paste(categoryNames[,i], sheet, "Index"))
  
      save(x.xts, file=paste(filesroot, symbolNames[i], paste(symbolNames[i], ".rda", sep=""), sep="/"))
      print(paste(symbolNames[i],", ",categoryNames[,i], ", ", sheet, sep=""))
      
      # Describe the metadata for each index
      instrument(symbolNames[i], currency="USD", multiplier=1, tick_size=.01, start_date=head(index(x.xts),1), description=paste(categoryNames[,i], "Index"), data="CR", source="DJUBS", frequency="Daily", assign_i=TRUE)
      
      # Construct a monthly series from the daily series
      x.m.xts = to.monthly(Cl(x.xts))
      x.m.xts = cbind(x.m.xts[,4], Return.calculate(x.m.xts[,4]))
      colnames(x.m.xts)=c("Close","Returns")
      # @ TODO Want to delete the last line off ONLY IF the month is incomplete
      if(tail(index(x.xts),1) != as.Date(as.yearmon(tail(index(x.xts),1)), frac=1)) {
        # That test isn't quite right, but its close.  It won't work on the first
        # day of a new month when the last business day wasn't the last day of 
        # the month.  It will work for the second day.
        x.m.xts = x.m.xts[-dim(x.m.xts)[1],]
      }
        
      # Index is set to last trading day of the month.  
      # Reset index to last day of the month to make alignment easier with other monthly series.  
      index(x.m.xts)=as.Date(index(x.m.xts), frac=1)
        
      xtsAttributes(x.m.xts) <- list(Description = paste(categoryNames[,i], sheet, "Index"))
  
      save(x.m.xts, file=paste(filesroot, symbolNamesMonthly[i], paste(symbolNamesMonthly[i], ".rda", sep=""), sep="/"))
      print(paste(symbolNamesMonthly[i],", ",categoryNames[,i], ", ", sheet, sep=""))
      # Describe the metadata for each index
      instrument(symbolNamesMonthly[i], currency="USD", multiplier=1, tick_size=.01, start_date=head(index(x.xts),1), description=paste(categoryNames[,i], "Index"), data="CR", source="DJUBS", frequency="Monthly", assign_i=TRUE)
    }
  }
  
  setSymbolLookup.FI(base_dir=filesroot, split_method='common')
  print( "Now, whenever you log in you need to register the instruments.  This")
  print( "might be a line you put into .Rprofile so that it happens automatically:")
  print( "> require(quantmod) # this requires a dev build after revision 560 or so.")
  print( "> setSymbolLookup.FI(base_dir=filesroot, split_method='common')")
  print( "Now you should be able to type:")
  print( "> getSymbols('DJUBSTR.IDX') ")
}

The data source provides daily data for the indexes, so the script constructs monthly time series for those symbols as well. Once the data is parsed and stored, we can do some analysis. In a fresh R session, I would do the following to prepare the workspace:

library(quantmod)
library(FinancialInstrument)
library(PerformanceAnalytics)
setSymbolLookup.FI(base_dir="~/Data/DJUBS", split_method='common')

Now I’ll retrieve the data from disk…

getSymbols("DJUBSTR.IDX")
tail(DJUBSTR.IDX)

and chart the DJUBS Total Returns Index:

charts.PerformanceSummary(DJUBSTR.IDX[,"Returns"], ylog=TRUE, wealth.index=TRUE, main = "DJUBS Total Returns Index Returns")

Or I can use quantmod’s charts:

chartSeries(Cl(DJUBSTR.IDX), theme="white")

Those are the daily series, but I want to look at real returns during the last couple of decades so I’ll use the monthly series instead:

symbols=c('DJUBSTR.M.IDX','DJUBS.M.IDX','DJUBSSP.M.IDX')
getSymbols(symbols)

The code is available on R-Forge, in the /inst/parsers directory of the FinancialInstrument package.

One thought on “Download and Parse DJ/UBS Commodities Indexes”

Costas says:

August 27, 2012 at 2:56 pm

Hi,

Great idea. However:

Maybe I am doing something wrong but when I issue

getSymbols(“DJUBSTR.IDX”)
chartSeries(Cl(DJUBSTR.IDX), theme=”white”)

I get a very funny plot for this index which does not correspond to the actual series… It seems it is joining prices from different parts of the spreadsheet…

Also:

charts.PerformanceSummary(DJUBSTR.IDX[,”Returns”], ylog=TRUE, wealth.index=TRUE, main = “DJUBS Total Returns Index Returns”)

returns

harts.PerformanceSummary(DJUBSTR.IDX[,”Returns”], ylog=TRUE, wealth.index=TRUE, main = “DJUBS Total Returns Index Returns”)

> sessionInfo()
R version 2.14.1 (2011-12-22)
Platform: i686-pc-linux-gnu (32-bit)

locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=C LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C

attached base packages:
[1] stats graphics grDevices utils datasets methods base

other attached packages:
[1] FinancialInstrument_1.0 quantmod_0.3-17
[3] TTR_0.21-1 Defaults_1.1-1
[5] gdata_2.11.0 PerformanceAnalytics_1.0.4.4
[7] xts_0.8-6 zoo_1.7-7

loaded via a namespace (and not attached):
[1] grid_2.14.1 gtools_2.7.0 lattice_0.20-0

tradeblotter

"Information isn’t what’s scarce; it’s the willingness to do something with it." — Tyler Cowen