utils.R

Uploaded by:webmgr

              #Licensed under http://www.gnu.org/licenses/gpl-3.0.html
#author: Robert Koblischke

build_webservice_response <- function(xml, ltfllns=T) {
	if (ltfllns) {
		#xml = paste('', xml, '', sep="");
		xml = paste('', xml, '', sep="");
	} else {
		xml = paste('', xml, '', sep="");
	}
	return(xml)
}

trimStopwords <- function(content, language) {
	require(tm)
	content <-	strsplit(tolower(content),"\\s+")[[1]]
	while (content[1] %in% stopwords(language)) {
		content <- content[2:length(content)]
	}
	while (content[length(content)] %in% stopwords(language)) {
		content <- content[1:(length(content)-1)]
	}
	if (length(content)>0) {
		return(paste(content, collapse=" "))
	} else {
		return("")
	}
}

getVectorDir <- function(spaceId, globalVectors) {
    paste(globalVectors,spaceId,sep="/")
}

getFileUserSentences <- function (user, vectorDir) {
    paste(vectorDir,"/users/", gsub("/","_",user),".RData",sep="")
}

loadSynthesisPdocs <- function(user, spaceId, synthesisSentences, globalVectors) {
    
    #Load existing pdocs
    vectorDir <- getVectorDir(spaceId, globalVectors)
    fileUserSentences <- getFileUserSentences(user, vectorDir)
    
    result <- try({load(fileUserSentences)},T)
    if (class(result)=="try-error" && !exists("pdocs")) {
            rapache_log(paste("Cannot open vector file", fileUserSentences,". Generating a new one."))
            if (!file.exists(vectorDir)) buildFileStructure(vectorDir)
            
            #generate empty matrix with as many columns as space dimensions
            space <- loadSpace(spaceId); #assign space variable globally - not only in function environment
            pdocs <- matrix(nrow=0,ncol=length(space$sk))
    }

    #check for missing pdocs, calculate and save if new
    missing_ids <- which(!synthesisSentences$id %in% rownames(pdocs))

    if (length(missing_ids)>0) {
	rapache_log("Creating and saving new vectors for synthesis sentences")
        rapache_log(paste(missing_ids,collapse=", "))
	if (!exists("space")) space <- loadSpace(spaceId)
	require(lsa)
	# missing_pdocs <- pseudodocs(synthesisSentences$sentence[missing_ids], space, preprocess=T)
	missing_pdocs <- query(synthesisSentences$sentence[missing_ids], rownames(space$tk))
	rownames(missing_pdocs) <- synthesisSentences$id[missing_ids]
	pdocs <- rbind(pdocs, missing_pdocs)
	save(pdocs, file=fileUserSentences)
    }

    pdocs[as.character(synthesisSentences$id),]
}

getLastVersion <- function(con, synthesisId) {
    sql = paste("SELECT MAX(idVersion) as id FROM versions WHERE idSynthesis='",synthesisId,"'",sep="")
    result = dbGetQuery(con, sql)
    last_version = 0
    if(nrow(result)!=0){
        last_version = result$id
    }
    
    last_version
}

getSynthesisSentences <- function(con, synthesisId, version) {
    sql <- sprintf("SELECT linessynthesis_id as id, linessynthesis_sentence as sentence FROM linessynthesis, versions
                        WHERE versions.idSynthesis = '%d' AND linessynthesis.linessynthesis_id = versions.idSentence AND versions.idVersion='%d' ORDER BY linePos",
                        synthesisId,
                        version)
    synthesisSentences = dbGetQuery(con,sql)
    Encoding(synthesisSentences$sentence) <- "latin1"
    
    synthesisSentences
}

getDomainId <- function(con, synthesisId){
    sql = paste("SELECT domain_id AS id FROM work WHERE synthesis_id='", synthesisId, "'",sep="")
    result = dbGetQuery(con,sql)
    
    #maybe needs some error handling?
    result$id
}

getCourseSentences <- function(con, domainId) {
    sql <- sprintf("SELECT linescourse_id as id, linescourse_sentence as sentence FROM linescourse, course WHERE (linescourse.course_id = course.course_id) AND (course.domain_id = '%d') ORDER BY `linescourse`.`course_id`, `linescourse_nb`",
                    domainId)
    courseSentences <- dbGetQuery(con,sql)
    Encoding(courseSentences$sentence) <- "latin1"
    
    courseSentences
}

getFileCourseSentences <- function (domainId, vectorDir) {
    paste(vectorDir,"/domains/",domainId,".RData",sep="")
}

loadCoursePdocs <- function(domainId, spaceId, courseSentences, globalVectors) {
    
    #Load existing pdocs
    vectorDir <- getVectorDir(spaceId, globalVectors)
    fileCourseSentences <- getFileCourseSentences(domainId, vectorDir)
    
    result <- try({load(fileCourseSentences)},T)
    if (class(result)=="try-error" && !exists("pdocs")) {
            rapache_log(paste("Cannot open vector file", fileCourseSentences,". Generating a new one."))
            if (!file.exists(vectorDir)) buildFileStructure(vectorDir)
            
            #generate empty matrix with as many columns as space dimensions
            load("/data/user-data/fwild/public/data/openlearn-space.rda")
	    #space <- loadSpace(spaceId); #assign space variable globally - not only in function environment
            pdocs <- matrix(nrow=0,ncol=length(space$sk))
    }

    #check for missing pdocs, calculate and save if new
    missing_ids <- which(!courseSentences$id %in% rownames(pdocs))

    if (length(missing_ids)>0) {
	rapache_log("Creating and saving new vectors for course sentences")
        rapache_log(paste(missing_ids,collapse=", "))
	if (!exists("space")) space <- loadSpace(spaceId)
	missing_pdocs <- pseudodocs(courseSentences$sentence[missing_ids], space, preprocess=T)
	rownames(missing_pdocs) <- courseSentences$id[missing_ids]
	pdocs <- rbind(pdocs, missing_pdocs)
        rapache_log(fileCourseSentences)
	save(pdocs, file=fileCourseSentences)
        rapache_log("..")
    }

    pdocs[as.character(courseSentences$id),]
}

buildFileStructure <- function(vectorDir) {
    cmd <- paste("mkdir",vectorDir)
    system(cmd)
    cmd <- paste("mkdir",paste(vectorDir,"users",sep="/"))
    system(cmd)
    cmd <- paste("mkdir",paste(vectorDir,"domains",sep="/"))
    system(cmd)
}