HV_stemming.R

Uploaded by:acooper

              #Show how words get stemmed and which other words have the same stems
library("tm")
library("RMySQL")
#for manual processing. Overridden when run as a service
entry.words<-"gamification"
source.type<-"b"

if(exists("GET")){
  entry.words<-ifelse(is.null(GET$words),"",GET$words)
  source.type<-ifelse(is.null(GET$source_type),"b",GET$source_type)
}
entry.words<-unlist(strsplit(entry.words," "))
word.list<-list((entry.words))[[1]]
if(length(word.list)>0){
source("/data/user-data/acooper/public/services/private/TMWS_Admin/mySQL_TMWS.R")
cat("

Stemming Analysis

") cat("

word -> stem: {frequent words with the same stem}

") for(i in 1:length(word.list)){ word<-tolower(word.list[i]) stem<-stemDocument(word) other.words<-dbGetQuery(db,paste("SELECT word, cnt from dictionary where stem ='",stem,"'",sep="")) min.cnt<-other.words$cnt[which(other.words$word==word)]/5 selected.words<-other.words$word[other.words$cnt>=min.cnt] cat("

") cat(paste(word,"->",stem,": {",paste(selected.words, collapse=", "),"}")) cat("

") } cat("

The databse is queried for the stem. "Frequent" means a word is at least 20% as frequent as the entered word.

") }