Compute classification or regression error metrics.
Compute classification or regression error metrics.
mmetric(y, x =NULL, metric, D =0.5, TC =-1, val =NULL, aggregate ="no")
Arguments
y: if there are predictions (!is.null(x)), y should be a numeric vector or factor with the target desired responses (or output values).
Else, y should be a list returned by the mining function.
x: the predictions (should be a numeric vector if task="reg", matrix if task="prob" or factor if task="class" (used if y is not a list).
metric: a R function or a character.
Note: if a R function, then it should be set to provide lower values for better models if the intention is to be used within the search argument of fit and mining (i.e., "<" meaning).
Valid character options are (">" means "better" if higher value; "<" means "better" if lower value):
ALL -- returns all classification or regression metrics (context dependent, multi-metric).
if vector -- returns all metrics included in the vector, vector elements can be any of the options below (multi-metric).
ACC -- classification accuracy rate, equal to micro averaged F1 score (classification, ">", [0-%100]).
macroACC -- macro average ACC score, for multiclass tasks (classification, ">", [0-%100]).
weightedACC -- weighted average ACC score, for multiclass tasks (classification, ">", [0-%100]).
CE -- classification error or misclassification error rate (classification, "<", [0-%100]).
MAEO -- mean absolute error for ordinal classification (classification, "<", [0-Inf[).
MSEO -- mean squared error for ordinal classification (classification, "<", [0-Inf[).
KENDALL -- Kendalls's coefficient for ordinal classification or (mean if) ranking (classification, ">", [-1;1]). Note: if ranking, y is a matrix and mean metric is computed.
SPEARMAN -- Mean Spearman's rho coefficient for ranking (classification, ">", [-1;1]). Note: if ranking, y is a matrix and mean metric is computed.
BER -- balanced error rate (classification, "<", [0-%100]).
KAPPA -- kappa index (classification, "<", [0-%100]).
CRAMERV -- Cramer's V (classification, ">", [0,1.0]).
ACCLASS -- classification accuracy rate per class (classification, ">", [0-%100]).
BAL_ACC -- balanced accuracy rate per class (classification, ">", [0-%100]).
TPRATFPR -- the TPR (given a fixed val=FPR, classification "prob", ">", [0,1.0]).
LIFT -- accumulative percent of responses captured (LIFT accumulative curve, classification "prob", list with several components).
ALIFT -- area of the accumulative percent of responses captured (LIFT accumulative curve, classification "prob", ">", [0,1.0]).
NALIFT -- normalized ALIFT (given a fixed val=percentage of examples, classification "prob", ">", [0,1.0]).
ALIFTATPERC -- ALIFT value (given a fixed val=percentage of examples, classification "prob", ">", [0,1.0]).
SAE -- sum absolute error/deviation (regression, "<", [0,Inf[).
MAE -- mean absolute error (regression, "<", [0,Inf[).
MdAE -- median absolute error (regression, "<", [0,Inf[).
GMAE -- geometric mean absolute error (regression, "<", [0,Inf[).
MaxAE -- maximum absolute error (regression, "<", [0,Inf[).
NMAE -- normalized mean absolute error (regression, "<", [0%,Inf[). Note: by default, this metric assumes the range of y as the denominator of NMAE; a different range can be set by setting the optional val argument (see example).
RAE -- relative absolute error (regression, "<", [0%,Inf[).
SSE -- sum squared error (regression, "<", [0,Inf[).
MSE -- mean squared error (regression, "<", [0,Inf[).
MdSE -- median squared error (regression, "<", [0,Inf[).
RMSE -- root mean squared error (regression, "<", [0,Inf[).
GMSE -- geometric mean squared error (regression, "<", [0,Inf[).
R22 -- 2nd variant of coefficient of determination R^2 (regression, ">", most general definition that however can lead to negative values: ]-Inf,1]. In previous rminer versions, this variant was known as "R2").
EV -- explained variance, 1 - var(y-x)/var(y) (regression, ">", ]-Inf,1]).
Q2 -- R^2/SD test error metric, as used by M.J. Embrechts (regression, "<", [0,Inf[).
REC -- Regression Error Characteristic curve (regression, list with several components).
NAREC -- normalized REC area (given a fixed val=tolerance, regression, ">", [0,1.0]).
TOLERANCE -- the tolerance (y-axis value) of a REC curve given a fixed val=tolerance value, regression, ">", [0,1.0]).
TOLERANCEPERC -- the tolerance (y-axis value) of a REC curve given a percentage val= value (in terms of y range), regression, ">", [0,1.0]).
MRAE -- Mean Relative Absolute mmetric forecasting metric (val should contain the last in-sample/training data value (for random walk) or full benchmark time series related with out-of-sample values, regression, "<", [0,Inf[).
MdRAE -- Median Relative Absolute mmetric forecasting metric (val should contain the last in-sample/training data value (for random walk) or full benchmark time series, regression, "<", [0,Inf[).
GMRAE -- Geometric Mean Relative Absoluate mmetric forecasting metric (val should contain the last in-sample/training data value (for random walk) or full benchmark time series, regression, "<", [0,Inf[).
THEILSU2 -- Theils'U2 forecasting metric (val should contain the last in-sample/training data value (for random walk) or full benchmark time series, regression, "<", [0,Inf[).
MASE -- MASE forecasting metric (val should contain the time series in-samples or training data, regression, "<", [0,Inf[).
D: decision threshold (for task="prob", probabilistic classification) within [0,1]. The class is TRUE if prob>D.
TC: target class index or vector of indexes (for multi-class classification class) from 1 to Nc, where Nc is the number of classes:
if TC==-1 (the default value), then it is assumed:
if metric is "CONF" -- D is ignored and highest probability class is assumed (if TC\\>0, the metric is computed for positive TC class and D is used).
if metric is "ACC", "CE", "BER", "KAPPA", "CRAMERV", "BRIER", or "AUC" -- the global metric (for all classes) is computed (if TC\\>0, the metric is computed for positive TC class).
if metric is "ACCLASS", "TPR", "TNR", "Precision", "F1", "MCC", "ROC", "BRIERCLASS", "AUCCLASS" -- it returns one result per class (if TC\\>0, it returns negative (e.g. "TPR1") and positive (TC, e.g. "TPR2") result).
if metric is "NAUC", "TPRATFPR", "LIFT", "ALIFT", "NALIFT" or "ALIFTATPERC" -- TC is set to the index of the last class.
val: auxiliary value:
when two or more metrics need different val values, then val should be a vector list, see example.
if numeric or vector -- check the metric argument for specific details of each metric val meaning.
aggregate: character with type of aggregation performed when y is a mining list. Valid options are:
no -- returns all metrics for all mining runs. If metric includes "CONF", "ROC", "LIFT" or "REC", it returns a vector list, else if metric includes a single metric, it returns a vector; else it retuns a data.frame (runs x metrics).
sum -- sums all run results.
mean -- averages all run results.
note: both "sum" and "mean" only work if only metric=="CONF" is used or if metric does not contain "ROC", "LIFT" or "REC".
Details
Compute classification or regression error metrics:
mmetric -- compute one or more classification/regression metrics given y and x OR a mining list.
metrics -- deprecated function, same as mmetric(x,y,metric="ALL"), included here just for compatability purposes but will be removed from the package.
Returns
Returns the computed error metric(s):
one value if only one metric is requested (and y is not a mining list);
named vector if 2 or more elements are requested in metric (and y is not a mining list);
list if there is a "CONF", "ROC", "LIFT" or "REC" request on metric (other metrics are stored in field $res, and y is not a mining list).
if y is a mining list then there can be several runs, thus:
a vector list of size y$runs is returned if metric includes "CONF", "ROC", "LIFT" or "REC" and aggregate="no";
a data.frame is returned if aggregate="no" and metric does not include "CONF", "ROC", "LIFT" or "REC";
a table is returned if aggregate="sum" or "mean" and metric="CONF";
a vector or numeric value is returned if aggregate="sum" or "mean" and metric is not "CONF".
References
To check for more details about rminer and for citation purposes:
P. Cortez.
Data Mining with Neural Networks and Support Vector Machines Using the R/rminer Tool.
In P. Perner (Ed.), Advances in Data Mining - Applications and Theoretical Aspects 10th Industrial Conference on Data Mining (ICDM 2010), Lecture Notes in Artificial Intelligence 6171, pp. 572-583, Berlin, Germany, July, 2010. Springer. ISBN: 978-3-642-14399-1.
A tutorial on using the rminer R package for data mining tasks.
Teaching Report, Department of Information Systems, ALGORITMI Research Centre, Engineering School, University of Minho, Guimaraes, Portugal, July 2015.
fit, predict.fit, mining, mgraph, savemining and Importance.
Examples
### pure binary classification y=factor(c("a","a","a","a","b","b","b","b"))x=factor(c("a","a","b","a","b","a","b","a"))print(mmetric(y,x,"CONF")$conf)print(mmetric(y,x,metric=c("ACC","TPR","ACCLASS")))print(mmetric(y,x,"ALL"))### probabilities binary classification y=factor(c("a","a","a","a","b","b","b","b"))px=matrix(nrow=8,ncol=2)px[,1]=c(1.0,0.9,0.8,0.7,0.6,0.5,0.4,0.3)px[,2]=1-px[,1]print(px)print(mmetric(y,px,"CONF")$conf)print(mmetric(y,px,"CONF",D=0.5,TC=2)$conf)print(mmetric(y,px,"CONF",D=0.3,TC=2)$conf)print(mmetric(y,px,metric="ALL",D=0.3,TC=2))print(mmetric(y,px,metric=c("ACC","AUC","AUCCLASS","BRIER","BRIERCLASS","CE"),D=0.3,TC=2))# ACC and confusion matrix:print(mmetric(y,px,metric=c("ACC","CONF"),D=0.3,TC=2))# ACC and ROC curve:print(mmetric(y,px,metric=c("ACC","ROC"),D=0.3,TC=2))# ACC, ROC and LIFT curve:print(mmetric(y,px,metric=c("ACC","ROC","LIFT"),D=0.3,TC=2))### pure multi-class classification y=c('A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','B','B','B','B','B','B','B','B','B','B','C','C','C','C','C','C','C','C','C','C','C','C','C','C','C','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','E','E','E','E','E')x=c('A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','A','E','E','E','E','E','D','D','D','D','D','B','B','B','B','B','B','B','B','B','D','C','C','C','C','C','C','C','B','B','B','B','B','C','C','C','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','D','C','C','E','A','A','B','B')y=factor(y)x=factor(x)print(mmetric(y,x,metric="CONF")$conf)# confusion matrixprint(mmetric(y,x,metric="CONF",TC=-1)$conf)# same thingprint(mmetric(y,x,metric="CONF",TC=1)$conf)# for target class TC=1: "A"mshow=function(y,x,metric) print(round(mmetric(y,x,metric),digits=0))mshow(y,x,"ALL")mshow(y,x,c("ACCLASS","BAL_ACC","KAPPA"))mshow(y,x,c("PRECISION"))# precisionmshow(y,x,c("TPR"))# recall mshow(y,x,c("F1"))# F1 score# micro (=ACC), macro and weighted average:mshow(y,x,c("ACC","macroPRECISION","weightedPRECISION"))mshow(y,x,c("ACC","macroTPR","weightedTPR"))mshow(y,x,c("ACC","macroF1","weightedF1"))mshow(y,x,c("ACC","macroACC","weightedACC"))# several metrics in a single returned object:print(mmetric(y,x,metric=c("CONF","macroF1","weightedF1","ACC")))### probabilities multi-class y=factor(c("a","a","b","b","c","c"))px=matrix(nrow=6,ncol=3)px[,1]=c(1.0,0.7,0.5,0.3,0.1,0.7)px[,2]=c(0.0,0.2,0.4,0.7,0.3,0.2)px[,3]=1-px[,1]-px[,2]print(px)print(mmetric(y,px,metric="ALL",TC=-1,val=0.1))print(mmetric(y,px,metric=c("AUC","AUCCLASS","NAUC"),TC=-1,val=0.1))print(mmetric(y,px,metric=c("AUC","NAUC"),TC=3,val=0.1))print(mmetric(y,px,metric=c("ACC","ACCLASS"),TC=-1))print(mmetric(y,px,metric=c("CONF"),TC=3,D=0.5)$conf)print(mmetric(y,px,metric=c("ACCLASS"),TC=3,D=0.5))print(mmetric(y,px,metric=c("CONF"),TC=3,D=0.7)$conf)print(mmetric(y,px,metric=c("ACCLASS"),TC=3,D=0.7))### ordinal multi-class (example in Ricardo Sousa PhD thesis 2012)y=ordered(c(rep("a",4),rep("b",6),rep("d",3)),levels=c("a","b","c","d"))x=ordered(c(rep("c",(4+6)),rep("d",3)),levels=c("a","b","c","d"))print(mmetric(y,x,metric="CONF")$conf)print(mmetric(y,x,metric=c("CE","MAEO","MSEO","KENDALL")))# note: only y needs to be orderedx=factor(c(rep("b",4),rep("a",6),rep("d",3)),levels=c("a","b","c","d"))print(mmetric(y,x,metric="CONF")$conf)print(mmetric(y,x,metric=c("CE","MAEO","MSEO","KENDALL")))print(mmetric(y,x,metric="ALL"))### ranking (multi-class) y=matrix(nrow=1,ncol=12);x=y
# http://www.youtube.com/watch?v=D56dvoVrBBEy[1,]=1:12x[1,]=c(2,1,4,3,6,5,8,7,10,9,12,11)print(mmetric(y,x,metric="KENDALL"))print(mmetric(y,x,metric="ALL"))y=matrix(nrow=2,ncol=7);x=y
y[1,]=c(2,6,5,4,3,7,1)y[2,]=7:1x[1,]=1:7x[2,]=1:7print(mmetric(y,x,metric="ALL"))### regression examples: y - desired values; x - predictionsy=c(95.01,96.1,97.2,98.0,99.3,99.7);x=95:100print(mmetric(y,x,"ALL"))print(mmetric(y,x,"MAE"))mshow=function(y,x,metric) print(round(mmetric(y,x,metric),digits=2))mshow(y,x,c("MAE","RMSE","RAE","RSE"))# getting NMAE:m=mmetric(y,x,"NMAE")cat("NMAE:",round(m,digits=2)," (denominator=",diff(range(y)),")\n")m=mmetric(y,x,"NMAE",val=5)# usage of different rangecat("NMAE:",round(m,digits=2)," (denominator=",5,")\n")# get REC curve and other measures:m=mmetric(y,x,c("REC","TOLERANCEPERC","MAE"),val=5)print(m)# correlation or similar measures:mshow(y,x,c("COR","R2","R22","EV"))# ideal is close to 1mshow(y,x,c("q2","Q2"))# ideal is close to 0# other measures:print(mmetric(y,x,c("TOLERANCE","NAREC"),val=0.5))# if admitted/accepted absolute error is 0.5print(mmetric(y,x,"TOLERANCEPERC",val=0.05))# tolerance for a 5% of yrange # tolerance for fixed 0.1 value and 5% of yrange:print(mmetric(y,x,c("TOLERANCE","TOLERANCEPERC"),val=c(0.1,0.05)))print(mmetric(y,x,"THEILSU2",val=94.1))# val = 1-ahead random walk, c(y,94.1), same as belowprint(mmetric(y,x,"THEILSU2",val=c(94.1,y[1:5])))# val = 1-ahead random walk (previous y values)print(mmetric(y,x,"MASE",val=c(88.1,89.9,93.2,94.1)))# val = in-samplesval=vector("list",length=4)val[[2]]=0.5;val[[3]]=94.1;val[[4]]=c(88.1,89.9,93.2,94.1)print(mmetric(y,x,c("MAE","NAREC","THEILSU2","MASE"),val=val))# user defined error function example:# myerror = number of samples with absolute error above 0.1% of y: myerror=function(y,x){return (sum(abs(y-x)>(0.001*y)))}print(mmetric(y,x,metric=myerror))# example that returns a list since "REC" is included:print(mmetric(y,x,c("MAE","REC","TOLERANCE","EV"),val=1))### mining, several runs, prob multi-class## Not run:data(iris)M=mining(Species~.,iris,model="rpart",Runs=2)R=mmetric(M,metric="CONF",aggregate="no")print(R[[1]]$conf)print(R[[2]]$conf)print(mmetric(M,metric="CONF",aggregate="mean"))print(mmetric(M,metric="CONF",aggregate="sum"))print(mmetric(M,metric=c("ACC","ACCLASS"),aggregate="no"))print(mmetric(M,metric=c("ACC","ACCLASS"),aggregate="mean"))print(mmetric(M,metric="ALL",aggregate="no"))print(mmetric(M,metric="ALL",aggregate="mean"))## End(Not run)### mining, several runs, regression## Not run:data(sin1reg)S=sample(1:nrow(sin1reg),40)M=mining(y~.,data=sin1reg[S,],model="ksvm",search=2^3,Runs=10)R=mmetric(M,metric="MAE")print(mmetric(M,metric="MAE",aggregate="mean"))miR=meanint(R)# mean and t-student confidence intervalscat("MAE=",round(miR$mean,digits=2),"+-",round(miR$int,digits=2),"\n")print(mmetric(M,metric=c("MAE","RMSE")))print(mmetric(M,metric=c("MAE","RMSE"),aggregate="mean"))R=mmetric(M,metric="REC",aggregate="no")print(R[[1]]$rec)print(mmetric(M,metric=c("TOLERANCE","NAREC"),val=0.2))print(mmetric(M,metric=c("TOLERANCE","NAREC"),val=0.2,aggregate="mean"))## End(Not run)