Find associated words within grouping variable(s).
word_cor( text.var, grouping.var = qdapTools::id(text.var), word, r =0.7, values =TRUE, method ="pearson",...)
Arguments
text.var: The text variable (or frequency matrix).
grouping.var: The grouping variables. Default uses each row as a group. Also takes a single grouping variable or a list of 1 or more grouping variables. Unlike other qdap functions, this cannot be NULL.
word: The word(s) vector to find associated words for.
r: The correlation level find associated words for. If positive this is the minimum value, if negative this is the maximum value.
values: logical. If TRUE returns the named correlates (names are the words). If FALSE only the associated words are returned.
method: A character string indicating which correlation coefficient is to be computed ("pearson", "kendall", or "spearman").
...: Other arguments passed to wfm.
Returns
Returns a vector of associated words or correlation matrix if r = NULL.
Note
Note that if a word has no variablity in it's usage across grouping variable(s) the sd will result in 0, thus cor will will likely return a warning as in this example: cor(rep(3, 10), rnorm(10)).
Examples
## Not run:x <- factor(with(rajSPLIT, paste(act, pad(TOT(tot)), sep ="|")))word_cor(rajSPLIT$dialogue, x,"romeo",.45)word_cor(rajSPLIT$dialogue, x,"love",.5)## Negative correlationword_cor(rajSPLIT$dialogue, x,"you",-.1)with(rajSPLIT, word_cor(dialogue, list(person, act),"hate"))words <- c("hate","i","love","ghost")with(rajSPLIT, word_cor(dialogue, x, words, r =.5))with(rajSPLIT, word_cor(dialogue, x, words, r =.4))## Set `r = NULL` to get matrix between wordswith(rajSPLIT, word_cor(dialogue, x, words, r =NULL))## Plotting library(tm)data("crude")oil_cor1 <- apply_as_df(crude, word_cor, word ="oil", r=.7)plot(oil_cor1)oil_cor2 <- apply_as_df(crude, word_cor, word = qcv(texas, oil, money), r=.7)plot(oil_cor2)plot(oil_cor2, ncol=2)oil_cor3 <- apply_as_df(crude, word_cor, word = qcv(texas, oil, money), r=NULL)plot(oil_cor3)## Run on multiple times/person/nested## Split and apply to data sets## Suggested use of stemmingDATA3 <- split(DATA2, DATA2$person)## Find correlations between words per turn of talk by person## Throws multiple warning because small data setlibrary(qdapTools)lapply(DATA3,function(x){ word_cor(x[,"state"], qdapTools::id(x), qcv(computer, i, no, good), r =NULL)})## Find words correlated per turn of talk by person## Throws multiple warning because small data setlapply(DATA3,function(x){ word_cor(x[,"state"], qdapTools::id(x), qcv(computer, i, no, good))})## A real exampledat <- pres_debates2012
dat$TOT <- factor(with(dat, paste(time, pad(TOT(tot)), sep ="|")))dat <- dat[dat$person %in% qcv(OBAMA, ROMNEY),]dat$person <- factor(dat$person)dat.split <- with(dat, split(dat, list(person, time)))wrds <- qcv(america, debt, dollar, people, tax, health)lapply(dat.split,function(x){ word_cor(x[,"dialogue"], x[,"TOT"], wrds, r=NULL)})## Supply a matrix (make sure to use `t` on a `wfm` matrix)worlis <- list( pronouns = c("you","it","it's","we","i'm","i"), negative = qcv(no, dumb, distrust, not, stinks), literacy = qcv(computer, talking, telling))y <- wfdf(DATA$state, qdapTools::id(DATA, prefix =TRUE))z <- wfm_combine(y, worlis)out <- word_cor(t(z), word = c(names(worlis),"else.words"), r =NULL)out
plot(out)## Additional plotting/viewingrequire(tm)data("crude")out1 <- word_cor(t(as.wfm(crude)), word ="oil", r=.7)vect2df(out1[[1]],"word","cor")plot(out1)qheat(vect2df(out1[[1]],"word","cor"), values=TRUE, high="red", digits=2, order.by ="cor", plot=FALSE)+ coord_flip()out2 <- word_cor(t(as.wfm(crude)), word = c("oil","country"), r=.7)plot(out2)## End(Not run)