# Rjarshi Guha # 08/17/2006 # require(XML) vot2df <- function(doc, url=FALSE, isString=FALSE) { tlist <- list() tcount <- 1 if (isString) { tn <- tempfile('votmp') con <- file(tn, open='w') cat(doc, file=con) close(con) x <- xmlTreeParse(tn) } else { x <- xmlTreeParse(doc, isURL=url) } root <- xmlRoot(x) tables <- xmlElementsByTagName(root, 'TABLE', recursive=TRUE) for (aTable in tables) { fields <- xmlElementsByTagName(aTable, 'FIELD') fieldnames <- lapply(fields, xmlGetAttr, name='name') fieldtypes <- lapply(fields, xmlGetAttr, name='datatype', default='char') tdata <- xmlElementsByTagName(aTable, 'TABLEDATA', recursive=TRUE) if (length(tdata) == 0) { warn("Looks like I couldn't find a table") } rowdata <- xmlElementsByTagName(tdata[[1]], 'TR') tmp <- list() i <- 1 for (aRow in rowdata) { items <- xmlElementsByTagName(aRow, 'TD') # the TD elements tmp[[i]] <- lapply( items, function(x) { val <- xmlValue(x) if (is.null(val)) val <- NA return(val) }) # value of the TD elements, NA is value missing i <- i+1 } ncol <- unique(sapply(tmp, length)) if (length(ncol) > 1) { warn("A table had non-uniform rows!") } ## make the initial data.frame tmp <- as.data.frame(do.call(rbind, tmp)) ## set the column names if (length(fieldnames) > 0) names(tmp) <- fieldnames ## set column types for (i in 1:length(fieldtypes)) { col <- unlist(tmp[,i]) if (fieldtypes[i] %in% c('double', 'float', 'long', 'short')) tmp[,i] <- as.double(col) else if (fieldtypes[i] == 'int') tmp[,i] <- as.integer(col) else if (fieldtypes[i] == 'char') tmp[,i] <- col else if (fieldtypes[i] == 'boolean') tmp[,i] <- ifelse( col %in% c('T','t','1'), TRUE, FALSE ) } # get the table description desc <- xmlElementsByTagName(aTable, 'DESCRIPTION') if (length(desc)) attr(tmp, 'description') <- xmlValue(desc[[1]]) tlist[[tcount]] <- tmp tcount <- tcount + 1 } if (length(tlist) == 1) { return(tlist[[1]]) } else { return(tlist) } }