# Rajarshi Guha # 13/05/2005 # # Implementation of the Fast Correlation Based Filter # described in # # Yu, L. and Liu, H.; Feature Selection # for High-Dimensional Data: A Fast Correlation Based Filter Solution, # Proc. 20th Intl. Conf. Mach. Learn. (ICML-2003), Washington DC, 2003 # # Require the functions in entropy.R which can be obtained from # http://blue.chem.psu.edu/~rajarshi/code/R/#entropy source('entropy.R') .get.next.elem <- function(s, fp) { idx <- which(s == fp) if (idx == length(s)) { NA } else { s[idx+1] } } fcbf <- function(x,y, thresh, verbose=FALSE) { nvar <- ncol(x) suic <- apply(x,2, function(xx,yy) { SU(xx,yy) }, y) sprime <- data.frame( f=(1:nvar)[which(suic >= thresh)], su=suic[ which(suic >= thresh) ]) sprime <- sprime[ sort.list(sprime$su,decreasing=TRUE), ] sprime <- sprime[,1] if (length(sprime) == 1) { sprime } else if (length(sprime) == 0) { stop("No prospective features for this threshold level") } print(paste('Number of prospective features = ',length(sprime))) fp <- sprime[1] cnt <- 1 while(TRUE) { if (verbose) { cat("Round "); cat(cnt,"\n"); cnt <- cnt + 1 print(paste('fp round ( |sprime| =',length(sprime), ')' ,sep=' ')) } fq <- .get.next.elem(sprime, fp) if (!is.na(fq)) { while(TRUE) { fqp <- fq su1 = SU(x[,fp], x[,fq]) su2 = SU(x[,fq],y) if ( SU(x[,fp], x[,fq]) > SU(x[,fq],y) ) { fq <- .get.next.elem(sprime,fq) sprime <- sprime[ -which(sprime==fqp) ] if (verbose) { cat(" ", su1, " ", su2," ","Removed feature ", fqp, "\n") } } else { fq <- .get.next.elem(sprime, fq) } if (is.na(fq)) { break } } } fp <- .get.next.elem(sprime, fp) if (is.na(fp)) { break } } if (length(sprime) > 1) { suvalues <- apply(x[,sprime], 2, function(xx,yy) { SU(xx,yy) }, y ) data.frame(idx=sprime,SU=suvalues) } else { data.frame(idx=sprime, SU=SU(x[,sprime],y)) } }