Mercurial > forge
changeset 12591:3aeba3530595 octave-forge
[nan] upgrade liblinear to v1.8
author | schloegl |
---|---|
date | Sun, 12 Apr 2015 17:52:15 +0000 |
parents | fae7c16ebcb4 |
children | d03ad555e14e |
files | extra/NaN/src/linear.cpp extra/NaN/src/linear.h extra/NaN/src/predict.c extra/NaN/src/train.c |
diffstat | 4 files changed, 1012 insertions(+), 928 deletions(-) [+] |
line wrap: on
line diff
--- a/extra/NaN/src/linear.cpp Sun Apr 12 15:18:03 2015 +0000 +++ b/extra/NaN/src/linear.cpp Sun Apr 12 17:52:15 2015 +0000 @@ -1,12 +1,11 @@ /* -$Id$ -Copyright (c) 2007-2009 The LIBLINEAR Project. -Copyright (c) 2010 Alois Schloegl <alois.schloegl@gmail.com> +Copyright (c) 2007-2011 The LIBLINEAR Project. +Copyright (c) 2010,2015 Alois Schloegl <alois.schloegl@ist.ac.at> This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ -This code was extracted from liblinear-1.51 in Jan 2010 and +This code was extracted from liblinear-1.8 in Apr 2015 and modified for the use with Octave This program is free software; you can redistribute it and/or modify @@ -54,7 +53,7 @@ fflush(stdout); } -void (*liblinear_print_string) (const char *) = &print_string_stdout; +static void (*liblinear_print_string) (const char *) = &print_string_stdout; #if 1 static void info(const char *fmt,...) @@ -107,9 +106,9 @@ for (i=0; i<l; i++) { if (y[i] == 1) - C[i] = prob->W[i] * Cp; + C[i] = Cp; else - C[i] = prob->W[i] * Cn; + C[i] = Cn; } } @@ -266,9 +265,9 @@ for (i=0; i<l; i++) { if (y[i] == 1) - C[i] = prob->W[i] * Cp; + C[i] = Cp; else - C[i] = prob->W[i] * Cn; + C[i] = Cn; } } @@ -418,8 +417,10 @@ // eps is the stopping tolerance // // solution will be put in w +// +// See Appendix of LIBLINEAR paper, Fan et al. (2008) -#define GETI(i) (i) +#define GETI(i) (prob->y[i]) // To support weights for instances, use GETI(i) (i) class Solver_MCSVM_CS @@ -449,16 +450,13 @@ this->prob = prob; this->B = new double[nr_class]; this->G = new double[nr_class]; - this->C = new double[prob->l]; - for(int i = 0; i < prob->l; i++) - this->C[i] = prob->W[i] * weighted_C[prob->y[i]]; + this->C = weighted_C; } Solver_MCSVM_CS::~Solver_MCSVM_CS() { delete[] B; delete[] G; - delete[] C; } int compare_double(const void *a, const void *b) @@ -676,7 +674,7 @@ info("\noptimization finished, #iter = %d\n",iter); if (iter >= max_iter) - info("Warning: reaching max number of iterations\n"); + info("\nWARNING: reaching max number of iterations\n"); // calculate objective value double v = 0; @@ -729,9 +727,11 @@ // eps is the stopping tolerance // // solution will be put in w +// +// See Algorithm 3 of Hsieh et al., ICML 2008 #undef GETI -#define GETI(i) (i) +#define GETI(i) (y[i]+1) // To support weights for instances, use GETI(i) (i) static void solve_l2r_l1l2_svc( @@ -756,25 +756,14 @@ double PGmax_new, PGmin_new; // default solver_type: L2R_L2LOSS_SVC_DUAL - double *diag = new double[l]; - double *upper_bound = new double[l]; - double *C_ = new double[l]; - for(i=0; i<l; i++) - { - if(prob->y[i]>0) - C_[i] = prob->W[i] * Cp; - else - C_[i] = prob->W[i] * Cn; - diag[i] = 0.5/C_[i]; - upper_bound[i] = INF; - } + double diag[3] = {0.5/Cn, 0, 0.5/Cp}; + double upper_bound[3] = {INF, 0, INF}; if(solver_type == L2R_L1LOSS_SVC_DUAL) { - for(i=0; i<l; i++) - { - diag[i] = 0; - upper_bound[i] = C_[i]; - } + diag[0] = 0; + diag[2] = 0; + upper_bound[0] = Cn; + upper_bound[2] = Cp; } for(i=0; i<w_size; i++) @@ -812,7 +801,7 @@ swap(index[i], index[j]); } - for (s=0;s<active_size;s++) + for (s=0; s<active_size; s++) { i = index[s]; G = 0; @@ -918,9 +907,6 @@ info("Objective value = %lf\n",v/2); info("nSV = %d\n",nSV); - delete [] upper_bound; - delete [] diag; - delete [] C_; delete [] QD; delete [] alpha; delete [] y; @@ -928,6 +914,174 @@ } // A coordinate descent algorithm for +// the dual of L2-regularized logistic regression problems +// +// min_\alpha 0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - alpha_i) log (upper_bound_i - alpha_i) , +// s.t. 0 <= alpha_i <= upper_bound_i, +// +// where Qij = yi yj xi^T xj and +// upper_bound_i = Cp if y_i = 1 +// upper_bound_i = Cn if y_i = -1 +// +// Given: +// x, y, Cp, Cn +// eps is the stopping tolerance +// +// solution will be put in w +// +// See Algorithm 5 of Yu et al., MLJ 2010 + +#undef GETI +#define GETI(i) (y[i]+1) +// To support weights for instances, use GETI(i) (i) + +void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn) +{ + int l = prob->l; + int w_size = prob->n; + int i, s, iter = 0; + double *xTx = new double[l]; + int max_iter = 1000; + int *index = new int[l]; + double *alpha = new double[2*l]; // store alpha and C - alpha + schar *y = new schar[l]; + int max_inner_iter = 100; // for inner Newton + double innereps = 1e-2; + double innereps_min = min(1e-8, eps); + double upper_bound[3] = {Cn, 0, Cp}; + + for(i=0; i<w_size; i++) + w[i] = 0; + for(i=0; i<l; i++) + { + if(prob->y[i] > 0) + { + y[i] = +1; + } + else + { + y[i] = -1; + } + alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8); + alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i]; + + xTx[i] = 0; + feature_node *xi = prob->x[i]; + while (xi->index != -1) + { + xTx[i] += (xi->value)*(xi->value); + w[xi->index-1] += y[i]*alpha[2*i]*xi->value; + xi++; + } + index[i] = i; + } + + while (iter < max_iter) + { + for (i=0; i<l; i++) + { + int j = i+rand()%(l-i); + swap(index[i], index[j]); + } + int newton_iter = 0; + double Gmax = 0; + for (s=0; s<l; s++) + { + i = index[s]; + schar yi = y[i]; + double C = upper_bound[GETI(i)]; + double ywTx = 0, xisq = xTx[i]; + feature_node *xi = prob->x[i]; + while (xi->index != -1) + { + ywTx += w[xi->index-1]*xi->value; + xi++; + } + ywTx *= y[i]; + double a = xisq, b = ywTx; + + // Decide to minimize g_1(z) or g_2(z) + int ind1 = 2*i, ind2 = 2*i+1, sign = 1; + if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0) + { + ind1 = 2*i+1; + ind2 = 2*i; + sign = -1; + } + + // g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old) + double alpha_old = alpha[ind1]; + double z = alpha_old; + if(C - z < 0.5 * C) + z = 0.1*z; + double gp = a*(z-alpha_old)+sign*b+log(z/(C-z)); + Gmax = max(Gmax, fabs(gp)); + + // Newton method on the sub-problem + const double eta = 0.1; // xi in the paper + int inner_iter = 0; + while (inner_iter <= max_inner_iter) + { + if(fabs(gp) < innereps) + break; + double gpp = a + C/(C-z)/z; + double tmpz = z - gp/gpp; + if(tmpz <= 0) + z *= eta; + else // tmpz in (0, C) + z = tmpz; + gp = a*(z-alpha_old)+sign*b+log(z/(C-z)); + newton_iter++; + inner_iter++; + } + + if(inner_iter > 0) // update w + { + alpha[ind1] = z; + alpha[ind2] = C-z; + xi = prob->x[i]; + while (xi->index != -1) + { + w[xi->index-1] += sign*(z-alpha_old)*yi*xi->value; + xi++; + } + } + } + + iter++; + if(iter % 10 == 0) + info("."); + + if(Gmax < eps) + break; + + if(newton_iter <= l/10) + innereps = max(innereps_min, 0.1*innereps); + + } + + info("\noptimization finished, #iter = %d\n",iter); + if (iter >= max_iter) + info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n"); + + // calculate objective value + + double v = 0; + for(i=0; i<w_size; i++) + v += w[i] * w[i]; + v *= 0.5; + for(i=0; i<l; i++) + v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1]) + - upper_bound[GETI(i)] * log(upper_bound[GETI(i)]); + info("Objective value = %lf\n", v); + + delete [] xTx; + delete [] alpha; + delete [] y; + delete [] index; +} + +// A coordinate descent algorithm for // L1-regularized L2-loss support vector classification // // min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2, @@ -937,9 +1091,11 @@ // eps is the stopping tolerance // // solution will be put in w +// +// See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008) #undef GETI -#define GETI(i) (i) +#define GETI(i) (y[i]+1) // To support weights for instances, use GETI(i) (i) static void solve_l1r_l2_svc( @@ -956,8 +1112,8 @@ double sigma = 0.01; double d, G_loss, G, H; double Gmax_old = INF; - double Gmax_new; - double Gmax_init = 0.0; + double Gmax_new, Gnorm1_new; + double Gnorm1_init; double d_old, d_diff; double loss_old, loss_new; double appxcond, cond; @@ -968,21 +1124,15 @@ double *xj_sq = new double[w_size]; feature_node *x; - double *C = new double[l]; + double C[3] = {Cn,0,Cp}; for(j=0; j<l; j++) { b[j] = 1; if(prob_col->y[j] > 0) - { y[j] = 1; - C[j] = prob_col->W[j] * Cp; - } else - { y[j] = -1; - C[j] = prob_col->W[j] * Cn; - } } for(j=0; j<w_size; j++) { @@ -1002,7 +1152,8 @@ while(iter < max_iter) { - Gmax_new = 0; + Gmax_new = 0; + Gnorm1_new = 0; for(j=0; j<active_size; j++) { @@ -1058,6 +1209,7 @@ violation = fabs(Gn); Gmax_new = max(Gmax_new, violation); + Gnorm1_new += violation; // obtain Newton direction d if(Gp <= H*w[j]) @@ -1156,12 +1308,12 @@ } if(iter == 0) - Gmax_init = Gmax_new; + Gnorm1_init = Gnorm1_new; iter++; if(iter % 10 == 0) info("."); - if(Gmax_new <= eps*Gmax_init) + if(Gnorm1_new <= eps*Gnorm1_init) { if(active_size == w_size) break; @@ -1206,7 +1358,6 @@ info("Objective value = %lf\n", v); info("#nonzeros/#features = %d/%d\n", nnz, w_size); - delete [] C; delete [] index; delete [] y; delete [] b; @@ -1223,6 +1374,8 @@ // eps is the stopping tolerance // // solution will be put in w +// +// See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008) #undef GETI #define GETI(i) (y[i]+1) @@ -1234,106 +1387,93 @@ { int l = prob_col->l; int w_size = prob_col->n; - int j, s, iter = 0; + int j, s, newton_iter=0, iter=0; + int max_newton_iter = 100; int max_iter = 1000; - int active_size = w_size; int max_num_linesearch = 20; + int active_size; + int QP_active_size; - double x_min = 0; + double nu = 1e-12; + double inner_eps = 1; double sigma = 0.01; - double d, G, H; + double w_norm=0, w_norm_new; + double z, G, H; + double Gnorm1_init; double Gmax_old = INF; - double Gmax_new; - double Gmax_init = 0.0; - double sum1, appxcond1; - double sum2, appxcond2; - double cond; + double Gmax_new, Gnorm1_new; + double QP_Gmax_old = INF; + double QP_Gmax_new, QP_Gnorm1_new; + double delta, negsum_xTd, cond; int *index = new int[w_size]; schar *y = new schar[l]; + double *Hdiag = new double[w_size]; + double *Grad = new double[w_size]; + double *wpd = new double[w_size]; + double *xjneg_sum = new double[w_size]; + double *xTd = new double[l]; double *exp_wTx = new double[l]; double *exp_wTx_new = new double[l]; - double *xj_max = new double[w_size]; - double *C_sum = new double[w_size]; - double *xjneg_sum = new double[w_size]; - double *xjpos_sum = new double[w_size]; + double *tau = new double[l]; + double *D = new double[l]; feature_node *x; - double *C = new double[l]; + double C[3] = {Cn,0,Cp}; for(j=0; j<l; j++) { - exp_wTx[j] = 1; if(prob_col->y[j] > 0) - { y[j] = 1; - C[j] = prob_col->W[j] * Cp; - } else - { y[j] = -1; - C[j] = prob_col->W[j] * Cn; - } + + // assume initial w is 0 + exp_wTx[j] = 1; + tau[j] = C[GETI(j)]*0.5; + D[j] = C[GETI(j)]*0.25; } for(j=0; j<w_size; j++) { w[j] = 0; + wpd[j] = w[j]; index[j] = j; - xj_max[j] = 0; - C_sum[j] = 0; xjneg_sum[j] = 0; - xjpos_sum[j] = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; - double val = x->value; - x_min = min(x_min, val); - xj_max[j] = max(xj_max[j], val); - C_sum[j] += C[GETI(ind)]; if(y[ind] == -1) - xjneg_sum[j] += C[GETI(ind)]*val; - else - xjpos_sum[j] += C[GETI(ind)]*val; + xjneg_sum[j] += C[GETI(ind)]*x->value; x++; } } - while(iter < max_iter) + while(newton_iter < max_newton_iter) { Gmax_new = 0; - - for(j=0; j<active_size; j++) - { - int i = j+rand()%(active_size-j); - swap(index[i], index[j]); - } + Gnorm1_new = 0; + active_size = w_size; for(s=0; s<active_size; s++) { j = index[s]; - sum1 = 0; - sum2 = 0; - H = 0; + Hdiag[j] = nu; + Grad[j] = 0; + double tmp = 0; x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; - double exp_wTxind = exp_wTx[ind]; - double tmp1 = x->value/(1+exp_wTxind); - double tmp2 = C[GETI(ind)]*tmp1; - double tmp3 = tmp2*exp_wTxind; - sum2 += tmp2; - sum1 += tmp3; - H += tmp1*tmp3; + Hdiag[j] += x->value*x->value*D[ind]; + tmp += x->value*tau[ind]; x++; } - - G = -sum2 + xjneg_sum[j]; + Grad[j] = -tmp + xjneg_sum[j]; - double Gp = G+1; - double Gn = G-1; + double Gp = Grad[j]+1; + double Gn = Grad[j]-1; double violation = 0; if(w[j] == 0) { @@ -1341,6 +1481,7 @@ violation = -Gp; else if(Gn > 0) violation = Gn; + //outer-level shrinking else if(Gp>Gmax_old/l && Gn<-Gmax_old/l) { active_size--; @@ -1355,125 +1496,210 @@ violation = fabs(Gn); Gmax_new = max(Gmax_new, violation); + Gnorm1_new += violation; + } - // obtain Newton direction d - if(Gp <= H*w[j]) - d = -Gp/H; - else if(Gn >= H*w[j]) - d = -Gn/H; - else - d = -w[j]; + if(newton_iter == 0) + Gnorm1_init = Gnorm1_new; + + if(Gnorm1_new <= eps*Gnorm1_init) + break; + + iter = 0; + QP_Gmax_old = INF; + QP_active_size = active_size; - if(fabs(d) < 1.0e-12) - continue; + for(int i=0; i<l; i++) + xTd[i] = 0; - d = min(max(d,-10.0),10.0); + // optimize QP over wpd + while(iter < max_iter) + { + QP_Gmax_new = 0; + QP_Gnorm1_new = 0; + + for(j=0; j<QP_active_size; j++) + { + int i = j+rand()%(QP_active_size-j); + swap(index[i], index[j]); + } - double delta = fabs(w[j]+d)-fabs(w[j]) + G*d; - int num_linesearch; - for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) + for(s=0; s<QP_active_size; s++) { - cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta; + j = index[s]; + H = Hdiag[j]; - if(x_min >= 0) + x = prob_col->x[j]; + G = Grad[j] + (wpd[j]-w[j])*nu; + while(x->index != -1) { - double tmp = exp(d*xj_max[j]); - appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j]; - appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j]; - if(min(appxcond1,appxcond2) <= 0) + int ind = x->index-1; + G += x->value*D[ind]*xTd[ind]; + x++; + } + + double Gp = G+1; + double Gn = G-1; + double violation = 0; + if(wpd[j] == 0) + { + if(Gp < 0) + violation = -Gp; + else if(Gn > 0) + violation = Gn; + //inner-level shrinking + else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l) { - x = prob_col->x[j]; - while(x->index != -1) - { - exp_wTx[x->index-1] *= exp(d*x->value); - x++; - } - break; + QP_active_size--; + swap(index[s], index[QP_active_size]); + s--; + continue; } } + else if(wpd[j] > 0) + violation = fabs(Gp); + else + violation = fabs(Gn); - cond += d*xjneg_sum[j]; + QP_Gmax_new = max(QP_Gmax_new, violation); + QP_Gnorm1_new += violation; - int i = 0; + // obtain solution of one-variable problem + if(Gp <= H*wpd[j]) + z = -Gp/H; + else if(Gn >= H*wpd[j]) + z = -Gn/H; + else + z = -wpd[j]; + + if(fabs(z) < 1.0e-12) + continue; + z = min(max(z,-10.0),10.0); + + wpd[j] += z; + x = prob_col->x[j]; while(x->index != -1) { int ind = x->index-1; - double exp_dx = exp(d*x->value); - exp_wTx_new[i] = exp_wTx[ind]*exp_dx; - cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i])); - x++; i++; + xTd[ind] += x->value*z; + x++; } + } + + iter++; - if(cond <= 0) - { - int i = 0; - x = prob_col->x[j]; - while(x->index != -1) - { - int ind = x->index-1; - exp_wTx[ind] = exp_wTx_new[i]; - x++; i++; - } + if(QP_Gnorm1_new <= inner_eps*Gnorm1_init) + { + //inner stopping + if(QP_active_size == active_size) break; - } + //active set reactivation else { - d *= 0.5; - delta *= 0.5; + QP_active_size = active_size; + QP_Gmax_old = INF; + continue; } } - w[j] += d; + QP_Gmax_old = QP_Gmax_new; + } + + if(iter >= max_iter) + info("WARNING: reaching max number of inner iterations\n"); - // recompute exp_wTx[] if line search takes too many steps - if(num_linesearch >= max_num_linesearch) + delta = 0; + w_norm_new = 0; + for(j=0; j<w_size; j++) + { + delta += Grad[j]*(wpd[j]-w[j]); + if(wpd[j] != 0) + w_norm_new += fabs(wpd[j]); + } + delta += (w_norm_new-w_norm); + + negsum_xTd = 0; + for(int i=0; i<l; i++) + if(y[i] == -1) + negsum_xTd += C[GETI(i)]*xTd[i]; + + int num_linesearch; + for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++) + { + cond = w_norm_new - w_norm + negsum_xTd - sigma*delta; + + for(int i=0; i<l; i++) { - info("#"); + double exp_xTd = exp(xTd[i]); + exp_wTx_new[i] = exp_wTx[i]*exp_xTd; + cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i])); + } + + if(cond <= 0) + { + w_norm = w_norm_new; + for(j=0; j<w_size; j++) + w[j] = wpd[j]; for(int i=0; i<l; i++) - exp_wTx[i] = 0; - - for(int i=0; i<w_size; i++) { - if(w[i]==0) continue; - x = prob_col->x[i]; - while(x->index != -1) - { - exp_wTx[x->index-1] += w[i]*x->value; - x++; - } + exp_wTx[i] = exp_wTx_new[i]; + double tau_tmp = 1/(1+exp_wTx[i]); + tau[i] = C[GETI(i)]*tau_tmp; + D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp; } - + break; + } + else + { + w_norm_new = 0; + for(j=0; j<w_size; j++) + { + wpd[j] = (w[j]+wpd[j])*0.5; + if(wpd[j] != 0) + w_norm_new += fabs(wpd[j]); + } + delta *= 0.5; + negsum_xTd *= 0.5; for(int i=0; i<l; i++) - exp_wTx[i] = exp(exp_wTx[i]); + xTd[i] *= 0.5; } } - if(iter == 0) - Gmax_init = Gmax_new; - iter++; - if(iter % 10 == 0) - info("."); - - if(Gmax_new <= eps*Gmax_init) + // Recompute some info due to too many line search steps + if(num_linesearch >= max_num_linesearch) { - if(active_size == w_size) - break; - else + for(int i=0; i<l; i++) + exp_wTx[i] = 0; + + for(int i=0; i<w_size; i++) { - active_size = w_size; - info("*"); - Gmax_old = INF; - continue; + if(w[i]==0) continue; + x = prob_col->x[i]; + while(x->index != -1) + { + exp_wTx[x->index-1] += w[i]*x->value; + x++; + } } + + for(int i=0; i<l; i++) + exp_wTx[i] = exp(exp_wTx[i]); } + if(iter == 1) + inner_eps *= 0.25; + + newton_iter++; Gmax_old = Gmax_new; + + info("iter %3d #CD cycles %d\n", newton_iter, iter); } - info("\noptimization finished, #iter = %d\n", iter); - if(iter >= max_iter) - info("\nWARNING: reaching max number of iterations\n"); + info("=========================\n"); + info("optimization finished, #iter = %d\n", newton_iter); + if(newton_iter >= max_newton_iter) + info("WARNING: reaching max number of iterations\n"); // calculate objective value @@ -1494,15 +1720,17 @@ info("Objective value = %lf\n", v); info("#nonzeros/#features = %d/%d\n", nnz, w_size); - delete [] C; delete [] index; delete [] y; + delete [] Hdiag; + delete [] Grad; + delete [] wpd; + delete [] xjneg_sum; + delete [] xTd; delete [] exp_wTx; delete [] exp_wTx_new; - delete [] xj_max; - delete [] C_sum; - delete [] xjneg_sum; - delete [] xjpos_sum; + delete [] tau; + delete [] D; } // transpose matrix X from row format to column format @@ -1518,13 +1746,9 @@ prob_col->n = n; prob_col->y = new int[l]; prob_col->x = new feature_node*[n]; - prob_col->W = new double[l]; for(i=0; i<l; i++) - { prob_col->y[i] = prob->y[i]; - prob_col->W[i] = prob->W[i]; - } for(i=0; i<n+1; i++) col_ptr[i] = 0; @@ -1669,7 +1893,6 @@ solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn); delete [] prob_col.y; delete [] prob_col.x; - delete [] prob_col.W; delete [] x_space; break; } @@ -1681,10 +1904,12 @@ solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn); delete [] prob_col.y; delete [] prob_col.x; - delete [] prob_col.W; delete [] x_space; break; } + case L2R_LR_DUAL: + solve_l2r_lr_dual(prob, w, eps, Cp, Cn); + break; default: fprintf(stderr, "Error: unknown solver_type\n"); break; @@ -1692,40 +1917,10 @@ } // -// Remove zero weighed data as libsvm and some liblinear solvers require C > 0. -// -static void remove_zero_weight(problem *newprob, const problem *prob) -{ - int i; - int l = 0; - for(i=0;i<prob->l;i++) - if(prob->W[i] > 0) l++; - *newprob = *prob; - newprob->l = l; - newprob->x = Malloc(feature_node*,l); - newprob->y = Malloc(int,l); - newprob->W = Malloc(double,l); - - int j = 0; - for(i=0;i<prob->l;i++) - if(prob->W[i] > 0) - { - newprob->x[j] = prob->x[i]; - newprob->y[j] = prob->y[i]; - newprob->W[j] = prob->W[i]; - j++; - } -} - -// // Interface functions // model* train(const problem *prob, const parameter *param) { - problem newprob; - remove_zero_weight(&newprob, prob); - prob = &newprob; - int i,j; int l = prob->l; int n = prob->n; @@ -1763,19 +1958,15 @@ if(param->weight_label[i] == label[j]) break; if(j == nr_class) - fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]); + fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]); else weighted_C[j] *= param->weight[i]; } // constructing the subproblem feature_node **x = Malloc(feature_node *,l); - double *W = Malloc(double,l); for(i=0;i<l;i++) - { x[i] = prob->x[perm[i]]; - W[i] = prob->W[perm[i]]; - } int k; problem sub_prob; @@ -1783,13 +1974,9 @@ sub_prob.n = n; sub_prob.x = Malloc(feature_node *,sub_prob.l); sub_prob.y = Malloc(int,sub_prob.l); - sub_prob.W = Malloc(double,sub_prob.l); for(k=0; k<sub_prob.l; k++) - { sub_prob.x[k] = x[k]; - sub_prob.W[k] = W[k]; - } // multi-class svm by Crammer and Singer if(param->solver_type == MCSVM_CS) @@ -1844,33 +2031,157 @@ } free(x); - free(W); free(label); free(start); free(count); free(perm); free(sub_prob.x); free(sub_prob.y); - free(sub_prob.W); free(weighted_C); - free(newprob.x); - free(newprob.y); - free(newprob.W); return model_; } -void destroy_model(struct model *model_) +void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target) +{ + int i; + int *fold_start = Malloc(int,nr_fold+1); + int l = prob->l; + int *perm = Malloc(int,l); + + for(i=0;i<l;i++) perm[i]=i; + for(i=0;i<l;i++) + { + int j = i+rand()%(l-i); + swap(perm[i],perm[j]); + } + for(i=0;i<=nr_fold;i++) + fold_start[i]=i*l/nr_fold; + + for(i=0;i<nr_fold;i++) + { + int begin = fold_start[i]; + int end = fold_start[i+1]; + int j,k; + struct problem subprob; + + subprob.bias = prob->bias; + subprob.n = prob->n; + subprob.l = l-(end-begin); + subprob.x = Malloc(struct feature_node*,subprob.l); + subprob.y = Malloc(int,subprob.l); + + k=0; + for(j=0;j<begin;j++) + { + subprob.x[k] = prob->x[perm[j]]; + subprob.y[k] = prob->y[perm[j]]; + ++k; + } + for(j=end;j<l;j++) + { + subprob.x[k] = prob->x[perm[j]]; + subprob.y[k] = prob->y[perm[j]]; + ++k; + } + struct model *submodel = train(&subprob,param); + for(j=begin;j<end;j++) + target[perm[j]] = predict(submodel,prob->x[perm[j]]); + free_and_destroy_model(&submodel); + free(subprob.x); + free(subprob.y); + } + free(fold_start); + free(perm); +} + +int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values) { - if(model_->w != NULL) - free(model_->w); - if(model_->label != NULL) - free(model_->label); - free(model_); + int idx; + int n; + if(model_->bias>=0) + n=model_->nr_feature+1; + else + n=model_->nr_feature; + double *w=model_->w; + int nr_class=model_->nr_class; + int i; + int nr_w; + if(nr_class==2 && model_->param.solver_type != MCSVM_CS) + nr_w = 1; + else + nr_w = nr_class; + + const feature_node *lx=x; + for(i=0;i<nr_w;i++) + dec_values[i] = 0; + for(; (idx=lx->index)!=-1; lx++) + { + // the dimension of testing data may exceed that of training + if(idx<=n) + for(i=0;i<nr_w;i++) + dec_values[i] += w[(idx-1)*nr_w+i]*lx->value; + } + + if(nr_class==2) + return (dec_values[0]>0)?model_->label[0]:model_->label[1]; + else + { + int dec_max_idx = 0; + for(i=1;i<nr_class;i++) + { + if(dec_values[i] > dec_values[dec_max_idx]) + dec_max_idx = i; + } + return model_->label[dec_max_idx]; + } +} + +int predict(const model *model_, const feature_node *x) +{ + double *dec_values = Malloc(double, model_->nr_class); + int label=predict_values(model_, x, dec_values); + free(dec_values); + return label; +} + +int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates) +{ + if(check_probability_model(model_)) + { + int i; + int nr_class=model_->nr_class; + int nr_w; + if(nr_class==2) + nr_w = 1; + else + nr_w = nr_class; + + int label=predict_values(model_, x, prob_estimates); + for(i=0;i<nr_w;i++) + prob_estimates[i]=1/(1+exp(-prob_estimates[i])); + + if(nr_class==2) // for binary classification + prob_estimates[1]=1.-prob_estimates[0]; + else + { + double sum=0; + for(i=0; i<nr_class; i++) + sum+=prob_estimates[i]; + + for(i=0; i<nr_class; i++) + prob_estimates[i]=prob_estimates[i]/sum; + } + + return label; + } + else + return 0; } static const char *solver_type_table[]= { - "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC","L2R_L1LOSS_SVC_DUAL","MCSVM_CS", "L1R_L2LOSS_SVC","L1R_LR", NULL + "L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS", + "L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL", NULL }; int save_model(const char *model_file_name, const struct model *model_) @@ -2016,175 +2327,6 @@ return model_; } -int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values) -{ - int idx; - int n; - if(model_->bias>=0) - n=model_->nr_feature+1; - else - n=model_->nr_feature; - double *w=model_->w; - int nr_class=model_->nr_class; - int i; - int nr_w; - if(nr_class==2 && model_->param.solver_type != MCSVM_CS) - nr_w = 1; - else - nr_w = nr_class; - - const feature_node *lx=x; - for(i=0;i<nr_w;i++) - dec_values[i] = 0; - for(; (idx=lx->index)!=-1; lx++) - { - // the dimension of testing data may exceed that of training - if(idx<=n) - for(i=0;i<nr_w;i++) - dec_values[i] += w[(idx-1)*nr_w+i]*lx->value; - } - - if(nr_class==2) - return (dec_values[0]>0)?model_->label[0]:model_->label[1]; - else - { - int dec_max_idx = 0; - for(i=1;i<nr_class;i++) - { - if(dec_values[i] > dec_values[dec_max_idx]) - dec_max_idx = i; - } - return model_->label[dec_max_idx]; - } -} - -int predict(const model *model_, const feature_node *x) -{ - double *dec_values = Malloc(double, model_->nr_class); - int label=predict_values(model_, x, dec_values); - free(dec_values); - return label; -} - -int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates) -{ - if(model_->param.solver_type==L2R_LR) - { - int i; - int nr_class=model_->nr_class; - int nr_w; - if(nr_class==2) - nr_w = 1; - else - nr_w = nr_class; - - int label=predict_values(model_, x, prob_estimates); - for(i=0;i<nr_w;i++) - prob_estimates[i]=1/(1+exp(-prob_estimates[i])); - - if(nr_class==2) // for binary classification - prob_estimates[1]=1.-prob_estimates[0]; - else - { - double sum=0; - for(i=0; i<nr_class; i++) - sum+=prob_estimates[i]; - - for(i=0; i<nr_class; i++) - prob_estimates[i]=prob_estimates[i]/sum; - } - - return label; - } - else - return 0; -} - -void destroy_param(parameter* param) -{ - if(param->weight_label != NULL) - free(param->weight_label); - if(param->weight != NULL) - free(param->weight); -} - -const char *check_parameter(const parameter *param) -{ - if(param->eps <= 0) - return "eps <= 0"; - - if(param->C <= 0) - return "C <= 0"; - - if(param->solver_type != L2R_LR - && param->solver_type != L2R_L2LOSS_SVC_DUAL - && param->solver_type != L2R_L2LOSS_SVC - && param->solver_type != L2R_L1LOSS_SVC_DUAL - && param->solver_type != MCSVM_CS - && param->solver_type != L1R_L2LOSS_SVC - && param->solver_type != L1R_LR) - return "unknown solver type"; - - return NULL; -} - -void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target) -{ - int i; - int *fold_start = Malloc(int,nr_fold+1); - int l = prob->l; - int *perm = Malloc(int,l); - - for(i=0;i<l;i++) perm[i]=i; - for(i=0;i<l;i++) - { - int j = i+rand()%(l-i); - swap(perm[i],perm[j]); - } - for(i=0;i<=nr_fold;i++) - fold_start[i]=i*l/nr_fold; - - for(i=0;i<nr_fold;i++) - { - int begin = fold_start[i]; - int end = fold_start[i+1]; - int j,k; - struct problem subprob; - - subprob.bias = prob->bias; - subprob.n = prob->n; - subprob.l = l-(end-begin); - subprob.x = Malloc(struct feature_node*,subprob.l); - subprob.y = Malloc(int,subprob.l); - subprob.W = Malloc(double,subprob.l); - - k=0; - for(j=0;j<begin;j++) - { - subprob.x[k] = prob->x[perm[j]]; - subprob.y[k] = prob->y[perm[j]]; - subprob.W[k] = prob->W[perm[j]]; - ++k; - } - for(j=end;j<l;j++) - { - subprob.x[k] = prob->x[perm[j]]; - subprob.y[k] = prob->y[perm[j]]; - subprob.W[k] = prob->W[perm[j]]; - ++k; - } - struct model *submodel = train(&subprob,param); - for(j=begin;j<end;j++) - target[perm[j]] = predict(submodel,prob->x[perm[j]]); - destroy_model(submodel); - free(subprob.x); - free(subprob.y); - free(subprob.W); - } - free(fold_start); - free(perm); -} - int get_nr_feature(const model *model_) { return model_->nr_feature; @@ -2202,3 +2344,65 @@ label[i] = model_->label[i]; } +void free_model_content(struct model *model_ptr) +{ + if(model_ptr->w != NULL) + free(model_ptr->w); + if(model_ptr->label != NULL) + free(model_ptr->label); +} + +void free_and_destroy_model(struct model **model_ptr_ptr) +{ + struct model *model_ptr = *model_ptr_ptr; + if(model_ptr != NULL) + { + free_model_content(model_ptr); + free(model_ptr); + } +} + +void destroy_param(parameter* param) +{ + if(param->weight_label != NULL) + free(param->weight_label); + if(param->weight != NULL) + free(param->weight); +} + +const char *check_parameter(const problem *prob, const parameter *param) +{ + if(param->eps <= 0) + return "eps <= 0"; + + if(param->C <= 0) + return "C <= 0"; + + if(param->solver_type != L2R_LR + && param->solver_type != L2R_L2LOSS_SVC_DUAL + && param->solver_type != L2R_L2LOSS_SVC + && param->solver_type != L2R_L1LOSS_SVC_DUAL + && param->solver_type != MCSVM_CS + && param->solver_type != L1R_L2LOSS_SVC + && param->solver_type != L1R_LR + && param->solver_type != L2R_LR_DUAL) + return "unknown solver type"; + + return NULL; +} + +int check_probability_model(const struct model *model_) +{ + return (model_->param.solver_type==L2R_LR || + model_->param.solver_type==L2R_LR_DUAL || + model_->param.solver_type==L1R_LR); +} + +void set_print_string_function(void (*print_func)(const char*)) +{ + if (print_func == NULL) + liblinear_print_string = &print_string_stdout; + else + liblinear_print_string = print_func; +} +
--- a/extra/NaN/src/linear.h Sun Apr 12 15:18:03 2015 +0000 +++ b/extra/NaN/src/linear.h Sun Apr 12 17:52:15 2015 +0000 @@ -1,12 +1,11 @@ /* -$Id$ -Copyright (c) 2007-2009 The LIBLINEAR Project. -Copyright (c) 2010 Alois Schloegl <alois.schloegl@gmail.com> +Copyright (c) 2007-2011 The LIBLINEAR Project. +Copyright (c) 2010,2015 Alois Schloegl <alois.schloegl@ist.ac.at> This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ -This code was extracted from liblinear-1.51 in Jan 2010 and +This code was extracted from liblinear-1.8 in Apr 2015 and modified for the use with Octave This program is free software; you can redistribute it and/or modify @@ -43,10 +42,9 @@ int *y; struct feature_node **x; double bias; /* < 0 if no bias term */ - double *W; /* instance weight */ }; -enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR }; /* solver_type */ +enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL }; /* solver_type */ struct parameter { @@ -84,10 +82,13 @@ int get_nr_class(const struct model *model_); void get_labels(const struct model *model_, int* label); -void destroy_model(struct model *model_); +void free_model_content(struct model *model_ptr); +void free_and_destroy_model(struct model **model_ptr_ptr); void destroy_param(struct parameter *param); -const char *check_parameter(const struct parameter *param); -extern void (*liblinear_print_string) (const char *); + +const char *check_parameter(const struct problem *prob, const struct parameter *param); +int check_probability_model(const struct model *model); +void set_print_string_function(void (*print_func) (const char*)); #ifdef __cplusplus }
--- a/extra/NaN/src/predict.c Sun Apr 12 15:18:03 2015 +0000 +++ b/extra/NaN/src/predict.c Sun Apr 12 17:52:15 2015 +0000 @@ -1,12 +1,11 @@ /* -$Id$ -Copyright (c) 2007-2009 The LIBLINEAR Project. -Copyright (c) 2010 Alois Schloegl <alois.schloegl@gmail.com> +Copyright (c) 2007-2011 The LIBLINEAR Project. +Copyright (c) 2010,2015 Alois Schloegl <alois.schloegl@ist.ac.at> This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ -This code was extracted from liblinear-1.51 in Jan 2010 and +This code was extracted from liblinear-1.8 in Apr 2015 and modified for the use with Octave This program is free software; you can redistribute it and/or modify @@ -25,304 +24,220 @@ */ #include <stdio.h> +#include <ctype.h> #include <stdlib.h> #include <string.h> +#include <errno.h> #include "linear.h" -#include "mex.h" -#include "linear_model_matlab.h" - -#ifdef tmwtypes_h - #if (MX_API_VER<=0x07020000) - typedef int mwSize; - #endif -#endif - -#define CMD_LEN 2048 - -#define Malloc(type,n) (type *)malloc((n)*sizeof(type)) - -int col_format_flag; - -void read_sparse_instance(const mxArray *prhs, int index, struct feature_node *x, int feature_number, double bias) -{ - int i, j, low, high; - mwIndex *ir, *jc; - double *samples; +struct feature_node *x; +int max_nr_attr = 64; - ir = mxGetIr(prhs); - jc = mxGetJc(prhs); - samples = mxGetPr(prhs); +struct model* model_; +int flag_predict_probability=0; - // each column is one instance - j = 0; - low = (int) jc[index], high = (int) jc[index+1]; - for(i=low; i<high && (int) (ir[i])<feature_number; i++) - { - x[j].index = (int) ir[i]+1; - x[j].value = samples[i]; - j++; - } - if(bias>=0) - { - x[j].index = feature_number+1; - x[j].value = bias; - j++; - } - x[j].index = -1; +void exit_input_error(int line_num) +{ + fprintf(stderr,"Wrong input format at line %d\n", line_num); + exit(1); } -static void fake_answer(mxArray *plhs[]) +static char *line = NULL; +static int max_line_len; + +static char* readline(FILE *input) { - plhs[0] = mxCreateDoubleMatrix(0, 0, mxREAL); - plhs[1] = mxCreateDoubleMatrix(0, 0, mxREAL); - plhs[2] = mxCreateDoubleMatrix(0, 0, mxREAL); + int len; + + if(fgets(line,max_line_len,input) == NULL) + return NULL; + + while(strrchr(line,'\n') == NULL) + { + max_line_len *= 2; + line = (char *) realloc(line,max_line_len); + len = (int) strlen(line); + if(fgets(line+len,max_line_len-len,input) == NULL) + break; + } + return line; } -void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag) +void do_predict(FILE *input, FILE *output, struct model* model_) { - int label_vector_row_num, label_vector_col_num; - int feature_number, testing_instance_number; - int instance_index; - double *ptr_instance, *ptr_label, *ptr_predict_label; - double *ptr_prob_estimates, *ptr_dec_values, *ptr; - struct feature_node *x; - mxArray *pplhs[1]; // instance sparse matrix in row format - int correct = 0; int total = 0; int nr_class=get_nr_class(model_); - int nr_w; double *prob_estimates=NULL; + int j, n; + int nr_feature=get_nr_feature(model_); + if(model_->bias>=0) + n=nr_feature+1; + else + n=nr_feature; - if(nr_class==2 && model_->param.solver_type!=MCSVM_CS) - nr_w=1; - else - nr_w=nr_class; + if(flag_predict_probability) + { + int *labels; - // prhs[1] = testing instance matrix - feature_number = get_nr_feature(model_); - testing_instance_number = (int) mxGetM(prhs[1]); - if(col_format_flag) - { - feature_number = (int) mxGetM(prhs[1]); - testing_instance_number = (int) mxGetN(prhs[1]); + if(!check_probability_model(model_)) + { + fprintf(stderr, "probability output is only supported for logistic regression\n"); + exit(1); + } + + labels=(int *) malloc(nr_class*sizeof(int)); + get_labels(model_,labels); + prob_estimates = (double *) malloc(nr_class*sizeof(double)); + fprintf(output,"labels"); + for(j=0;j<nr_class;j++) + fprintf(output," %d",labels[j]); + fprintf(output,"\n"); + free(labels); } - label_vector_row_num = (int) mxGetM(prhs[0]); - label_vector_col_num = (int) mxGetN(prhs[0]); - - if(label_vector_row_num!=testing_instance_number) - { - mexPrintf("Length of label vector does not match # of instances.\n"); - fake_answer(plhs); - return; - } - if(label_vector_col_num!=1) + max_line_len = 1024; + line = (char *)malloc(max_line_len*sizeof(char)); + while(readline(input) != NULL) { - mexPrintf("label (1st argument) should be a vector (# of column is 1).\n"); - fake_answer(plhs); - return; - } + int i = 0; + int target_label, predict_label; + char *idx, *val, *label, *endptr; + int inst_max_index = 0; // strtol gives 0 if wrong format + + label = strtok(line," \t\n"); + if(label == NULL) // empty line + exit_input_error(total+1); + + target_label = (int) strtol(label,&endptr,10); + if(endptr == label || *endptr != '\0') + exit_input_error(total+1); + + while(1) + { + if(i>=max_nr_attr-2) // need one more for index = -1 + { + max_nr_attr *= 2; + x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node)); + } + + idx = strtok(NULL,":"); + val = strtok(NULL," \t"); - ptr_instance = mxGetPr(prhs[1]); - ptr_label = mxGetPr(prhs[0]); + if(val == NULL) + break; + errno = 0; + x[i].index = (int) strtol(idx,&endptr,10); + if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index) + exit_input_error(total+1); + else + inst_max_index = x[i].index; + + errno = 0; + x[i].value = strtod(val,&endptr); + if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) + exit_input_error(total+1); - // transpose instance matrix - if(mxIsSparse(prhs[1])) - { - if(col_format_flag) + // feature indices larger than those in training are not used + if(x[i].index <= nr_feature) + ++i; + } + + if(model_->bias>=0) { - pplhs[0] = (mxArray *)prhs[1]; + x[i].index = n; + x[i].value = model_->bias; + i++; + } + x[i].index = -1; + + if(flag_predict_probability) + { + int j; + predict_label = predict_probability(model_,x,prob_estimates); + fprintf(output,"%d",predict_label); + for(j=0;j<model_->nr_class;j++) + fprintf(output," %g",prob_estimates[j]); + fprintf(output,"\n"); } else { - mxArray *pprhs[1]; - pprhs[0] = mxDuplicateArray(prhs[1]); - if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose")) - { - mexPrintf("Error: cannot transpose testing instance matrix\n"); - fake_answer(plhs); - return; - } - } - } - else - mexPrintf("Testing_instance_matrix must be sparse\n"); - - - prob_estimates = Malloc(double, nr_class); - - plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL); - if(predict_probability_flag) - plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL); - else - plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL); - - ptr_predict_label = mxGetPr(plhs[0]); - ptr_prob_estimates = mxGetPr(plhs[2]); - ptr_dec_values = mxGetPr(plhs[2]); - x = Malloc(struct feature_node, feature_number+2); - for(instance_index=0;instance_index<testing_instance_number;instance_index++) - { - int i; - double target,v; - - target = ptr_label[instance_index]; - - // prhs[1] and prhs[1]^T are sparse - read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias); - - if(predict_probability_flag) - { - v = predict_probability(model_, x, prob_estimates); - ptr_predict_label[instance_index] = v; - for(i=0;i<nr_class;i++) - ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i]; - } - else - { - double *dec_values = Malloc(double, nr_class); - v = predict(model_, x); - ptr_predict_label[instance_index] = v; - - predict_values(model_, x, dec_values); - for(i=0;i<nr_w;i++) - ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i]; - free(dec_values); + predict_label = predict(model_,x); + fprintf(output,"%d\n",predict_label); } - if(v == target) + if(predict_label == target_label) ++correct; ++total; } - mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total); - - // return accuracy, mean squared error, squared correlation coefficient - plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL); - ptr = mxGetPr(plhs[1]); - ptr[0] = (double) correct/total*100; - - free(x); - if(prob_estimates != NULL) + printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total); + if(flag_predict_probability) free(prob_estimates); } void exit_with_help() { - mexPrintf( - "Usage: [predicted_label, accuracy, decision_values/prob_estimates] = predict(testing_label_vector, testing_instance_matrix, model, 'liblinear_options','col')\n" - "liblinear_options:\n" - "-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0)\n" - "col:\n" - " if 'col' is setted testing_instance_matrix is parsed in column format, otherwise is in row format" - ); + printf( + "Usage: predict [options] test_file model_file output_file\n" + "options:\n" + "-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0)\n" + ); + exit(1); } -void mexFunction( int nlhs, mxArray *plhs[], - int nrhs, const mxArray *prhs[] ) +int main(int argc, char **argv) { - int prob_estimate_flag = 0; - struct model *model_; - char cmd[CMD_LEN]; - col_format_flag = 0; + FILE *input, *output; + int i; - if(nrhs > 5 || nrhs < 3) + // parse options + for(i=1;i<argc;i++) { - exit_with_help(); - fake_answer(plhs); - return; - } - if(nrhs == 5) - { - mxGetString(prhs[4], cmd, mxGetN(prhs[4])+1); - if(strcmp(cmd, "col") == 0) - { - col_format_flag = 1; + if(argv[i][0] != '-') break; + ++i; + switch(argv[i-1][1]) + { + case 'b': + flag_predict_probability = atoi(argv[i]); + break; + + default: + fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]); + exit_with_help(); + break; } } + if(i>=argc) + exit_with_help(); - if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) { - mexPrintf("Error: label vector and instance matrix must be double\n"); - fake_answer(plhs); - return; + input = fopen(argv[i],"r"); + if(input == NULL) + { + fprintf(stderr,"can't open input file %s\n",argv[i]); + exit(1); } - if(mxIsStruct(prhs[2])) + output = fopen(argv[i+2],"w"); + if(output == NULL) { - const char *error_msg; - - // parse options - if(nrhs>=4) - { - int i, argc = 1; - char *argv[CMD_LEN/2]; - - // put options in argv[] - mxGetString(prhs[3], cmd, mxGetN(prhs[3]) + 1); - if((argv[argc] = strtok(cmd, " ")) != NULL) - while((argv[++argc] = strtok(NULL, " ")) != NULL) - ; - - for(i=1;i<argc;i++) - { - if(argv[i][0] != '-') break; - if(++i>=argc) - { - exit_with_help(); - fake_answer(plhs); - return; - } - switch(argv[i-1][1]) - { - case 'b': - prob_estimate_flag = atoi(argv[i]); - break; - default: - mexPrintf("unknown option\n"); - exit_with_help(); - fake_answer(plhs); - return; - } - } - } - - model_ = Malloc(struct model, 1); - error_msg = matlab_matrix_to_model(model_, prhs[2]); - if(error_msg) - { - mexPrintf("Error: can't read model: %s\n", error_msg); - destroy_model(model_); - fake_answer(plhs); - return; - } - - if(prob_estimate_flag) - { - if(model_->param.solver_type!=L2R_LR) - { - mexPrintf("probability output is only supported for logistic regression\n"); - prob_estimate_flag=0; - } - } - - if(mxIsSparse(prhs[1])) - do_predict(plhs, prhs, model_, prob_estimate_flag); - else - { - mexPrintf("Testing_instance_matrix must be sparse\n"); - fake_answer(plhs); - } - - // destroy model_ - destroy_model(model_); - } - else - { - mexPrintf("model file should be a struct array\n"); - fake_answer(plhs); + fprintf(stderr,"can't open output file %s\n",argv[i+2]); + exit(1); } - return; + if((model_=load_model(argv[i+1]))==0) + { + fprintf(stderr,"can't open model file %s\n",argv[i+1]); + exit(1); + } + + x = (struct feature_node *) malloc(max_nr_attr*sizeof(struct feature_node)); + do_predict(input, output, model_); + free_and_destroy_model(&model_); + free(line); + free(x); + fclose(input); + fclose(output); + return 0; } +
--- a/extra/NaN/src/train.c Sun Apr 12 15:18:03 2015 +0000 +++ b/extra/NaN/src/train.c Sun Apr 12 17:52:15 2015 +0000 @@ -1,12 +1,11 @@ /* -$Id$ -Copyright (c) 2007-2009 The LIBLINEAR Project. -Copyright (c) 2010 Alois Schloegl <alois.schloegl@gmail.com> +Copyright (c) 2007-2011 The LIBLINEAR Project. +Copyright (c) 2010,2015 Alois Schloegl <alois.schloegl@ist.ac.at> This function is part of the NaN-toolbox http://pub.ist.ac.at/~schloegl/matlab/NaN/ -This code was extracted from liblinear-1.51 in Jan 2010 and +This code was extracted from liblinear-1.8 in Apr 2015 and modified for the use with Octave This program is free software; you can redistribute it and/or modify @@ -29,93 +28,144 @@ #include <stdlib.h> #include <string.h> #include <ctype.h> +#include <errno.h> #include "linear.h" - -#include "mex.h" -#include "linear_model_matlab.h" - -#ifdef tmwtypes_h - #if (MX_API_VER<=0x07020000) - typedef int mwSize; - typedef int mwIndex; - #endif -#endif - -#define CMD_LEN 2048 #define Malloc(type,n) (type *)malloc((n)*sizeof(type)) #define INF HUGE_VAL -void print_null(const char *s){} - -void (*liblinear_default_print_string) (const char *); +void print_null(const char *s) {} void exit_with_help() { - mexPrintf( - "Usage: model = train(weight_vector, training_label_vector, training_instance_matrix, 'liblinear_options', 'col');\n" - "liblinear_options:\n" + printf( + "Usage: train [options] training_set_file [model_file]\n" + "options:\n" "-s type : set type of solver (default 1)\n" - " 0 -- L2-regularized logistic regression\n" + " 0 -- L2-regularized logistic regression (primal)\n" " 1 -- L2-regularized L2-loss support vector classification (dual)\n" " 2 -- L2-regularized L2-loss support vector classification (primal)\n" " 3 -- L2-regularized L1-loss support vector classification (dual)\n" " 4 -- multi-class support vector classification by Crammer and Singer\n" " 5 -- L1-regularized L2-loss support vector classification\n" " 6 -- L1-regularized logistic regression\n" + " 7 -- L2-regularized logistic regression (dual)\n" "-c cost : set the parameter C (default 1)\n" "-e epsilon : set tolerance of termination criterion\n" " -s 0 and 2\n" " |f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n" - " where f is the primal function, (default 0.01)\n" - " -s 1, 3, and 4\n" + " where f is the primal function and pos/neg are # of\n" + " positive/negative data (default 0.01)\n" + " -s 1, 3, 4 and 7\n" " Dual maximal violation <= eps; similar to libsvm (default 0.1)\n" " -s 5 and 6\n" - " |f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,\n" + " |f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n" " where f is the primal function (default 0.01)\n" "-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n" "-wi weight: weights adjust the parameter C of different classes (see README for details)\n" "-v n: n-fold cross validation mode\n" "-q : quiet mode (no outputs)\n" - "col:\n" - " if 'col' is setted, training_instance_matrix is parsed in column format, otherwise is in row format\n" ); + exit(1); +} + +void exit_input_error(int line_num) +{ + fprintf(stderr,"Wrong input format at line %d\n", line_num); + exit(1); } -// liblinear arguments -struct parameter param; // set by parse_command_line -struct problem prob; // set by read_problem -struct model *model_; +static char *line = NULL; +static int max_line_len; + +static char* readline(FILE *input) +{ + int len; + + if(fgets(line,max_line_len,input) == NULL) + return NULL; + + while(strrchr(line,'\n') == NULL) + { + max_line_len *= 2; + line = (char *) realloc(line,max_line_len); + len = (int) strlen(line); + if(fgets(line+len,max_line_len-len,input) == NULL) + break; + } + return line; +} + +void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name); +void read_problem(const char *filename); +void do_cross_validation(); + struct feature_node *x_space; -int cross_validation_flag; -int col_format_flag; +struct parameter param; +struct problem prob; +struct model* model_; +int flag_cross_validation; int nr_fold; double bias; -double do_cross_validation() +int main(int argc, char **argv) +{ + char input_file_name[1024]; + char model_file_name[1024]; + const char *error_msg; + + parse_command_line(argc, argv, input_file_name, model_file_name); + read_problem(input_file_name); + error_msg = check_parameter(&prob,¶m); + + if(error_msg) + { + fprintf(stderr,"Error: %s\n",error_msg); + exit(1); + } + + if(flag_cross_validation) + { + do_cross_validation(); + } + else + { + model_=train(&prob, ¶m); + if(save_model(model_file_name, model_)) + { + fprintf(stderr,"can't save model to file %s\n",model_file_name); + exit(1); + } + free_and_destroy_model(&model_); + } + destroy_param(¶m); + free(prob.y); + free(prob.x); + free(x_space); + free(line); + + return 0; +} + +void do_cross_validation() { int i; int total_correct = 0; - int *target = Malloc(int,prob.l); - double retval = 0.0; + int *target = Malloc(int, prob.l); cross_validation(&prob,¶m,nr_fold,target); for(i=0;i<prob.l;i++) if(target[i] == prob.y[i]) ++total_correct; - mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l); - retval = 100.0*total_correct/prob.l; + printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l); free(target); - return retval; } -// nrhs should be 4 -int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name) +void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name) { - int i, argc = 1; - char cmd[CMD_LEN]; - char *argv[CMD_LEN/2]; + int i; + void (*print_func)(const char*) = NULL; // default printing to stdout // default values param.solver_type = L2R_L2LOSS_SVC_DUAL; @@ -124,65 +174,33 @@ param.nr_weight = 0; param.weight_label = NULL; param.weight = NULL; - cross_validation_flag = 0; - col_format_flag = 0; + flag_cross_validation = 0; bias = -1; - // train loaded only once under matlab - if(liblinear_default_print_string == NULL) - liblinear_default_print_string = liblinear_print_string; - else - liblinear_print_string = liblinear_default_print_string; - - if(nrhs <= 2) - return 1; - - if(nrhs == 5) - { - mxGetString(prhs[4], cmd, mxGetN(prhs[4])+1); - if(strcmp(cmd, "col") == 0) - col_format_flag = 1; - } - - // put options in argv[] - if(nrhs > 3) - { - mxGetString(prhs[3], cmd, mxGetN(prhs[3]) + 1); - if((argv[argc] = strtok(cmd, " ")) != NULL) - while((argv[++argc] = strtok(NULL, " ")) != NULL) - ; - } - // parse options for(i=1;i<argc;i++) { if(argv[i][0] != '-') break; - ++i; - if(i>=argc && argv[i-1][1] != 'q') // since option -q has no parameter - return 1; + if(++i>=argc) + exit_with_help(); switch(argv[i-1][1]) { case 's': param.solver_type = atoi(argv[i]); break; + case 'c': param.C = atof(argv[i]); break; + case 'e': param.eps = atof(argv[i]); break; + case 'B': bias = atof(argv[i]); break; - case 'v': - cross_validation_flag = 1; - nr_fold = atoi(argv[i]); - if(nr_fold < 2) - { - mexPrintf("n-fold cross validation: n must >= 2\n"); - return 1; - } - break; + case 'w': ++param.nr_weight; param.weight_label = (int *) realloc(param.weight_label,sizeof(int)*param.nr_weight); @@ -190,212 +208,158 @@ param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]); param.weight[param.nr_weight-1] = atof(argv[i]); break; + + case 'v': + flag_cross_validation = 1; + nr_fold = atoi(argv[i]); + if(nr_fold < 2) + { + fprintf(stderr,"n-fold cross validation: n must >= 2\n"); + exit_with_help(); + } + break; + case 'q': - liblinear_print_string = &print_null; + print_func = &print_null; i--; break; + default: - mexPrintf("unknown option\n"); - return 1; + fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]); + exit_with_help(); + break; } } + set_print_string_function(print_func); + + // determine filenames + if(i>=argc) + exit_with_help(); + + strcpy(input_file_name, argv[i]); + + if(i<argc-1) + strcpy(model_file_name,argv[i+1]); + else + { + char *p = strrchr(argv[i],'/'); + if(p==NULL) + p = argv[i]; + else + ++p; + sprintf(model_file_name,"%s.model",p); + } + if(param.eps == INF) { if(param.solver_type == L2R_LR || param.solver_type == L2R_L2LOSS_SVC) param.eps = 0.01; - else if(param.solver_type == L2R_L2LOSS_SVC_DUAL || param.solver_type == L2R_L1LOSS_SVC_DUAL || param.solver_type == MCSVM_CS) + else if(param.solver_type == L2R_L2LOSS_SVC_DUAL || param.solver_type == L2R_L1LOSS_SVC_DUAL || param.solver_type == MCSVM_CS || param.solver_type == L2R_LR_DUAL) param.eps = 0.1; else if(param.solver_type == L1R_L2LOSS_SVC || param.solver_type == L1R_LR) param.eps = 0.01; } - return 0; -} - -static void fake_answer(mxArray *plhs[]) -{ - plhs[0] = mxCreateDoubleMatrix(0, 0, mxREAL); } -int read_problem_sparse(const mxArray *weight_vec, const mxArray *label_vec, const mxArray *instance_mat) +// read in a problem (in libsvm format) +void read_problem(const char *filename) { - int i, j, k, low, high; - mwIndex *ir, *jc; - int elements, max_index, num_samples, label_vector_row_num, weight_vector_row_num; - double *samples, *labels, *weights; - mxArray *instance_mat_col; // instance sparse matrix in column format - - prob.x = NULL; - prob.y = NULL; - prob.W = NULL; - x_space = NULL; + int max_index, inst_max_index, i; + long int elements, j; + FILE *fp = fopen(filename,"r"); + char *endptr; + char *idx, *val, *label; - if(col_format_flag) - instance_mat_col = (mxArray *)instance_mat; - else + if(fp == NULL) { - // transpose instance matrix - mxArray *prhs[1], *plhs[1]; - prhs[0] = mxDuplicateArray(instance_mat); - if(mexCallMATLAB(1, plhs, 1, prhs, "transpose")) - { - mexPrintf("Error: cannot transpose training instance matrix\n"); - return -1; - } - instance_mat_col = plhs[0]; - mxDestroyArray(prhs[0]); + fprintf(stderr,"can't open input file %s\n",filename); + exit(1); } - // the number of instance - prob.l = (int) mxGetN(instance_mat_col); - weight_vector_row_num = (int) mxGetM(weight_vec); - label_vector_row_num = (int) mxGetM(label_vec); + prob.l = 0; + elements = 0; + max_line_len = 1024; + line = Malloc(char,max_line_len); + while(readline(fp)!=NULL) + { + char *p = strtok(line," \t"); // label - if(weight_vector_row_num == 0) - ;//mexPrintf("Warning: treat each instance with weight 1.0\n"); - else if(weight_vector_row_num!=prob.l) - { - mexPrintf("Length of weight vector does not match # of instances.\n"); - return -1; - } - if(label_vector_row_num!=prob.l) - { - mexPrintf("Length of label vector does not match # of instances.\n"); - return -1; + // features + while(1) + { + p = strtok(NULL," \t"); + if(p == NULL || *p == '\n') // check '\n' as ' ' may be after the last feature + break; + elements++; + } + elements++; // for bias term + prob.l++; } - - // each column is one instance - weights = mxGetPr(weight_vec); - labels = mxGetPr(label_vec); - samples = mxGetPr(instance_mat_col); - ir = mxGetIr(instance_mat_col); - jc = mxGetJc(instance_mat_col); - - num_samples = (int) mxGetNzmax(instance_mat_col); - - elements = num_samples + prob.l*2; - max_index = (int) mxGetM(instance_mat_col); - - prob.y = Malloc(int, prob.l); - prob.W = Malloc(double,prob.l); - prob.x = Malloc(struct feature_node*, prob.l); - x_space = Malloc(struct feature_node, elements); + rewind(fp); prob.bias=bias; - j = 0; + prob.y = Malloc(int,prob.l); + prob.x = Malloc(struct feature_node *,prob.l); + x_space = Malloc(struct feature_node,elements+prob.l); + + max_index = 0; + j=0; for(i=0;i<prob.l;i++) { + inst_max_index = 0; // strtol gives 0 if wrong format + readline(fp); prob.x[i] = &x_space[j]; - prob.y[i] = (int) labels[i]; - prob.W[i] = 1; - if(weight_vector_row_num > 0) - prob.W[i] *= (double) weights[i]; - low = (int) jc[i], high = (int) jc[i+1]; - for(k=low;k<high;k++) + label = strtok(line," \t\n"); + if(label == NULL) // empty line + exit_input_error(i+1); + + prob.y[i] = (int) strtol(label,&endptr,10); + if(endptr == label || *endptr != '\0') + exit_input_error(i+1); + + while(1) { - x_space[j].index = (int) ir[k]+1; - x_space[j].value = samples[k]; - j++; - } - if(prob.bias>=0) - { - x_space[j].index = max_index+1; - x_space[j].value = prob.bias; - j++; + idx = strtok(NULL,":"); + val = strtok(NULL," \t"); + + if(val == NULL) + break; + + errno = 0; + x_space[j].index = (int) strtol(idx,&endptr,10); + if(endptr == idx || errno != 0 || *endptr != '\0' || x_space[j].index <= inst_max_index) + exit_input_error(i+1); + else + inst_max_index = x_space[j].index; + + errno = 0; + x_space[j].value = strtod(val,&endptr); + if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr))) + exit_input_error(i+1); + + ++j; } + + if(inst_max_index > max_index) + max_index = inst_max_index; + + if(prob.bias >= 0) + x_space[j++].value = prob.bias; + x_space[j++].index = -1; } - if(prob.bias>=0) - prob.n = max_index+1; - else - prob.n = max_index; - - return 0; -} - -// Interface function of matlab -// now assume prhs[0]: label prhs[1]: features -void mexFunction( int nlhs, mxArray *plhs[], - int nrhs, const mxArray *prhs[] ) -{ - const char *error_msg; - // fix random seed to have same results for each run - // (for cross validation) - srand(1); - - // Transform the input Matrix to libsvm format - if(nrhs > 2 && nrhs < 6) + if(prob.bias >= 0) { - int err=0; - - if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1]) || !mxIsDouble(prhs[2])) { - mexPrintf("Error: weight vector, label vector and instance matrix must be double\n"); - fake_answer(plhs); - return; - } - - if(parse_command_line(nrhs, prhs, NULL)) - { - exit_with_help(); - destroy_param(¶m); - fake_answer(plhs); - return; - } - - if(mxIsSparse(prhs[2])) - err = read_problem_sparse(prhs[0], prhs[1], prhs[2]); - else - { - mexPrintf("Training_instance_matrix must be sparse\n"); - destroy_param(¶m); - fake_answer(plhs); - return; - } - - // train's original code - error_msg = check_parameter(¶m); - - if(err || error_msg) - { - if (error_msg != NULL) - mexPrintf("Error: %s\n", error_msg); - destroy_param(¶m); - free(prob.y); - free(prob.x); - free(x_space); - fake_answer(plhs); - return; - } - - if(cross_validation_flag) - { - double *ptr; - plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL); - ptr = mxGetPr(plhs[0]); - ptr[0] = do_cross_validation(); - } - else - { - const char *error_msg; - - model_ = train(&prob, ¶m); - error_msg = model_to_matlab_structure(plhs, model_); - if(error_msg) - mexPrintf("Error: can't convert libsvm model to matrix structure: %s\n", error_msg); - destroy_model(model_); - } - destroy_param(¶m); - free(prob.y); - free(prob.x); - free(prob.W); - free(x_space); + prob.n=max_index+1; + for(i=1;i<prob.l;i++) + (prob.x[i]-2)->index = prob.n; + x_space[j-2].index = prob.n; } else - { - exit_with_help(); - fake_answer(plhs); - return; - } + prob.n=max_index; + + fclose(fp); }