Mercurial > forge

--- a/extra/NaN/src/linear.cpp	Sun Apr 12 15:18:03 2015 +0000
+++ b/extra/NaN/src/linear.cpp	Sun Apr 12 17:52:15 2015 +0000
@@ -1,12 +1,11 @@
 /*

-$Id$
-Copyright (c) 2007-2009 The LIBLINEAR Project.
-Copyright (c) 2010 Alois Schloegl <alois.schloegl@gmail.com>
+Copyright (c) 2007-2011 The LIBLINEAR Project.
+Copyright (c) 2010,2015 Alois Schloegl <alois.schloegl@ist.ac.at>
 This function is part of the NaN-toolbox
 http://pub.ist.ac.at/~schloegl/matlab/NaN/

-This code was extracted from liblinear-1.51 in Jan 2010 and
+This code was extracted from liblinear-1.8 in Apr 2015 and
 modified for the use with Octave

 This program is free software; you can redistribute it and/or modify
@@ -54,7 +53,7 @@
 	fflush(stdout);
 }

-void (*liblinear_print_string) (const char *) = &print_string_stdout;
+static void (*liblinear_print_string) (const char *) = &print_string_stdout;

 #if 1
 static void info(const char *fmt,...)
@@ -107,9 +106,9 @@
 	for (i=0; i<l; i++)
 	{
 		if (y[i] == 1)
-			C[i] = prob->W[i] * Cp;
+			C[i] = Cp;
 		else
-			C[i] = prob->W[i] * Cn;
+			C[i] = Cn;
 	}
 }

@@ -266,9 +265,9 @@
 	for (i=0; i<l; i++)
 	{
 		if (y[i] == 1)
-			C[i] = prob->W[i] * Cp;
+			C[i] = Cp;
 		else
-			C[i] = prob->W[i] * Cn;
+			C[i] = Cn;
 	}
 }

@@ -418,8 +417,10 @@
 // eps is the stopping tolerance
 //
 // solution will be put in w
+//
+// See Appendix of LIBLINEAR paper, Fan et al. (2008)

-#define GETI(i) (i)
+#define GETI(i) (prob->y[i])
 // To support weights for instances, use GETI(i) (i)

 class Solver_MCSVM_CS
@@ -449,16 +450,13 @@
 	this->prob = prob;
 	this->B = new double[nr_class];
 	this->G = new double[nr_class];
-	this->C = new double[prob->l];
-	for(int i = 0; i < prob->l; i++)
-		this->C[i] = prob->W[i] * weighted_C[prob->y[i]];
+	this->C = weighted_C;
 }

 Solver_MCSVM_CS::~Solver_MCSVM_CS()
 {
 	delete[] B;
 	delete[] G;
-	delete[] C;
 }

 int compare_double(const void *a, const void *b)
@@ -676,7 +674,7 @@

 	info("\noptimization finished, #iter = %d\n",iter);
 	if (iter >= max_iter)
-		info("Warning: reaching max number of iterations\n");
+		info("\nWARNING: reaching max number of iterations\n");

 	// calculate objective value
 	double v = 0;
@@ -729,9 +727,11 @@
 // eps is the stopping tolerance
 //
 // solution will be put in w
+//
+// See Algorithm 3 of Hsieh et al., ICML 2008

 #undef GETI
-#define GETI(i) (i)
+#define GETI(i) (y[i]+1)
 // To support weights for instances, use GETI(i) (i)

 static void solve_l2r_l1l2_svc(
@@ -756,25 +756,14 @@
 	double PGmax_new, PGmin_new;

 	// default solver_type: L2R_L2LOSS_SVC_DUAL
-	double *diag = new double[l];
-	double *upper_bound = new double[l];
-	double *C_ = new double[l];
-	for(i=0; i<l; i++)
-	{
-		if(prob->y[i]>0)
-			C_[i] = prob->W[i] * Cp;
-		else
-			C_[i] = prob->W[i] * Cn;
-		diag[i] = 0.5/C_[i];
-		upper_bound[i] = INF;
-	}
+	double diag[3] = {0.5/Cn, 0, 0.5/Cp};
+	double upper_bound[3] = {INF, 0, INF};
 	if(solver_type == L2R_L1LOSS_SVC_DUAL)
 	{
-		for(i=0; i<l; i++)
-		{
-			diag[i] = 0;
-			upper_bound[i] = C_[i];
-		}
+		diag[0] = 0;
+		diag[2] = 0;
+		upper_bound[0] = Cn;
+		upper_bound[2] = Cp;
 	}

 	for(i=0; i<w_size; i++)
@@ -812,7 +801,7 @@
 			swap(index[i], index[j]);
 		}

-		for (s=0;s<active_size;s++)
+		for (s=0; s<active_size; s++)
 		{
 			i = index[s];
 			G = 0;
@@ -918,9 +907,6 @@
 	info("Objective value = %lf\n",v/2);
 	info("nSV = %d\n",nSV);

-	delete [] upper_bound;
-	delete [] diag;
-	delete [] C_;
 	delete [] QD;
 	delete [] alpha;
 	delete [] y;
@@ -928,6 +914,174 @@
 }

 // A coordinate descent algorithm for
+// the dual of L2-regularized logistic regression problems
+//
+//  min_\alpha  0.5(\alpha^T Q \alpha) + \sum \alpha_i log (\alpha_i) + (upper_bound_i - alpha_i) log (upper_bound_i - alpha_i) ,
+//    s.t.      0 <= alpha_i <= upper_bound_i,
+//
+//  where Qij = yi yj xi^T xj and
+//  upper_bound_i = Cp if y_i = 1
+//  upper_bound_i = Cn if y_i = -1
+//
+// Given:
+// x, y, Cp, Cn
+// eps is the stopping tolerance
+//
+// solution will be put in w
+//
+// See Algorithm 5 of Yu et al., MLJ 2010
+
+#undef GETI
+#define GETI(i) (y[i]+1)
+// To support weights for instances, use GETI(i) (i)
+
+void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
+{
+	int l = prob->l;
+	int w_size = prob->n;
+	int i, s, iter = 0;
+	double *xTx = new double[l];
+	int max_iter = 1000;
+	int *index = new int[l];
+	double *alpha = new double[2*l]; // store alpha and C - alpha
+	schar *y = new schar[l];
+	int max_inner_iter = 100; // for inner Newton
+	double innereps = 1e-2;
+	double innereps_min = min(1e-8, eps);
+	double upper_bound[3] = {Cn, 0, Cp};
+
+	for(i=0; i<w_size; i++)
+		w[i] = 0;
+	for(i=0; i<l; i++)
+	{
+		if(prob->y[i] > 0)
+		{
+			y[i] = +1;
+		}
+		else
+		{
+			y[i] = -1;
+		}
+		alpha[2*i] = min(0.001*upper_bound[GETI(i)], 1e-8);
+		alpha[2*i+1] = upper_bound[GETI(i)] - alpha[2*i];
+
+		xTx[i] = 0;
+		feature_node *xi = prob->x[i];
+		while (xi->index != -1)
+		{
+			xTx[i] += (xi->value)*(xi->value);
+			w[xi->index-1] += y[i]*alpha[2*i]*xi->value;
+			xi++;
+		}
+		index[i] = i;
+	}
+
+	while (iter < max_iter)
+	{
+		for (i=0; i<l; i++)
+		{
+			int j = i+rand()%(l-i);
+			swap(index[i], index[j]);
+		}
+		int newton_iter = 0;
+		double Gmax = 0;
+		for (s=0; s<l; s++)
+		{
+			i = index[s];
+			schar yi = y[i];
+			double C = upper_bound[GETI(i)];
+			double ywTx = 0, xisq = xTx[i];
+			feature_node *xi = prob->x[i];
+			while (xi->index != -1)
+			{
+				ywTx += w[xi->index-1]*xi->value;
+				xi++;
+			}
+			ywTx *= y[i];
+			double a = xisq, b = ywTx;
+
+			// Decide to minimize g_1(z) or g_2(z)
+			int ind1 = 2*i, ind2 = 2*i+1, sign = 1;
+			if(0.5*a*(alpha[ind2]-alpha[ind1])+b < 0)
+			{
+				ind1 = 2*i+1;
+				ind2 = 2*i;
+				sign = -1;
+			}
+
+			//  g_t(z) = z*log(z) + (C-z)*log(C-z) + 0.5a(z-alpha_old)^2 + sign*b(z-alpha_old)
+			double alpha_old = alpha[ind1];
+			double z = alpha_old;
+			if(C - z < 0.5 * C)
+				z = 0.1*z;
+			double gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
+			Gmax = max(Gmax, fabs(gp));
+
+			// Newton method on the sub-problem
+			const double eta = 0.1; // xi in the paper
+			int inner_iter = 0;
+			while (inner_iter <= max_inner_iter)
+			{
+				if(fabs(gp) < innereps)
+					break;
+				double gpp = a + C/(C-z)/z;
+				double tmpz = z - gp/gpp;
+				if(tmpz <= 0)
+					z *= eta;
+				else // tmpz in (0, C)
+					z = tmpz;
+				gp = a*(z-alpha_old)+sign*b+log(z/(C-z));
+				newton_iter++;
+				inner_iter++;
+			}
+
+			if(inner_iter > 0) // update w
+			{
+				alpha[ind1] = z;
+				alpha[ind2] = C-z;
+				xi = prob->x[i];
+				while (xi->index != -1)
+				{
+					w[xi->index-1] += sign*(z-alpha_old)*yi*xi->value;
+					xi++;
+				}
+			}
+		}
+
+		iter++;
+		if(iter % 10 == 0)
+			info(".");
+
+		if(Gmax < eps)
+			break;
+
+		if(newton_iter <= l/10)
+			innereps = max(innereps_min, 0.1*innereps);
+
+	}
+
+	info("\noptimization finished, #iter = %d\n",iter);
+	if (iter >= max_iter)
+		info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
+
+	// calculate objective value
+
+	double v = 0;
+	for(i=0; i<w_size; i++)
+		v += w[i] * w[i];
+	v *= 0.5;
+	for(i=0; i<l; i++)
+		v += alpha[2*i] * log(alpha[2*i]) + alpha[2*i+1] * log(alpha[2*i+1])
+			- upper_bound[GETI(i)] * log(upper_bound[GETI(i)]);
+	info("Objective value = %lf\n", v);
+
+	delete [] xTx;
+	delete [] alpha;
+	delete [] y;
+	delete [] index;
+}
+
+// A coordinate descent algorithm for
 // L1-regularized L2-loss support vector classification
 //
 //  min_w \sum |wj| + C \sum max(0, 1-yi w^T xi)^2,
@@ -937,9 +1091,11 @@
 // eps is the stopping tolerance
 //
 // solution will be put in w
+//
+// See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)

 #undef GETI
-#define GETI(i) (i)
+#define GETI(i) (y[i]+1)
 // To support weights for instances, use GETI(i) (i)

 static void solve_l1r_l2_svc(
@@ -956,8 +1112,8 @@
 	double sigma = 0.01;
 	double d, G_loss, G, H;
 	double Gmax_old = INF;
-	double Gmax_new;
-	double Gmax_init = 0.0;
+	double Gmax_new, Gnorm1_new;
+	double Gnorm1_init;
 	double d_old, d_diff;
 	double loss_old, loss_new;
 	double appxcond, cond;
@@ -968,21 +1124,15 @@
 	double *xj_sq = new double[w_size];
 	feature_node *x;

-	double *C = new double[l];
+	double C[3] = {Cn,0,Cp};

 	for(j=0; j<l; j++)
 	{
 		b[j] = 1;
 		if(prob_col->y[j] > 0)
-		{
 			y[j] = 1;
-			C[j] = prob_col->W[j] * Cp;
-		}
 		else
-		{
 			y[j] = -1;
-			C[j] = prob_col->W[j] * Cn;
-		}
 	}
 	for(j=0; j<w_size; j++)
 	{
@@ -1002,7 +1152,8 @@

 	while(iter < max_iter)
 	{
-		Gmax_new  = 0;
+		Gmax_new = 0;
+		Gnorm1_new = 0;

 		for(j=0; j<active_size; j++)
 		{
@@ -1058,6 +1209,7 @@
 				violation = fabs(Gn);

 			Gmax_new = max(Gmax_new, violation);
+			Gnorm1_new += violation;

 			// obtain Newton direction d
 			if(Gp <= H*w[j])
@@ -1156,12 +1308,12 @@
 		}

 		if(iter == 0)
-			Gmax_init = Gmax_new;
+			Gnorm1_init = Gnorm1_new;
 		iter++;
 		if(iter % 10 == 0)
 			info(".");

-		if(Gmax_new <= eps*Gmax_init)
+		if(Gnorm1_new <= eps*Gnorm1_init)
 		{
 			if(active_size == w_size)
 				break;
@@ -1206,7 +1358,6 @@
 	info("Objective value = %lf\n", v);
 	info("#nonzeros/#features = %d/%d\n", nnz, w_size);

-	delete [] C;
 	delete [] index;
 	delete [] y;
 	delete [] b;
@@ -1223,6 +1374,8 @@
 // eps is the stopping tolerance
 //
 // solution will be put in w
+//
+// See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)

 #undef GETI
 #define GETI(i) (y[i]+1)
@@ -1234,106 +1387,93 @@
 {
 	int l = prob_col->l;
 	int w_size = prob_col->n;
-	int j, s, iter = 0;
+	int j, s, newton_iter=0, iter=0;
+	int max_newton_iter = 100;
 	int max_iter = 1000;
-	int active_size = w_size;
 	int max_num_linesearch = 20;
+	int active_size;
+	int QP_active_size;

-	double x_min = 0;
+	double nu = 1e-12;
+	double inner_eps = 1;
 	double sigma = 0.01;
-	double d, G, H;
+	double w_norm=0, w_norm_new;
+	double z, G, H;
+	double Gnorm1_init;
 	double Gmax_old = INF;
-	double Gmax_new;
-	double Gmax_init = 0.0;
-	double sum1, appxcond1;
-	double sum2, appxcond2;
-	double cond;
+	double Gmax_new, Gnorm1_new;
+	double QP_Gmax_old = INF;
+	double QP_Gmax_new, QP_Gnorm1_new;
+	double delta, negsum_xTd, cond;

 	int *index = new int[w_size];
 	schar *y = new schar[l];
+	double *Hdiag = new double[w_size];
+	double *Grad = new double[w_size];
+	double *wpd = new double[w_size];
+	double *xjneg_sum = new double[w_size];
+	double *xTd = new double[l];
 	double *exp_wTx = new double[l];
 	double *exp_wTx_new = new double[l];
-	double *xj_max = new double[w_size];
-	double *C_sum = new double[w_size];
-	double *xjneg_sum = new double[w_size];
-	double *xjpos_sum = new double[w_size];
+	double *tau = new double[l];
+	double *D = new double[l];
 	feature_node *x;

-	double *C = new double[l];
+	double C[3] = {Cn,0,Cp};

 	for(j=0; j<l; j++)
 	{
-		exp_wTx[j] = 1;
 		if(prob_col->y[j] > 0)
-		{
 			y[j] = 1;
-			C[j] = prob_col->W[j] * Cp;
-		}
 		else
-		{
 			y[j] = -1;
-			C[j] = prob_col->W[j] * Cn;
-		}
+
+		// assume initial w is 0
+		exp_wTx[j] = 1;
+		tau[j] = C[GETI(j)]*0.5;
+		D[j] = C[GETI(j)]*0.25;
 	}
 	for(j=0; j<w_size; j++)
 	{
 		w[j] = 0;
+		wpd[j] = w[j];
 		index[j] = j;
-		xj_max[j] = 0;
-		C_sum[j] = 0;
 		xjneg_sum[j] = 0;
-		xjpos_sum[j] = 0;
 		x = prob_col->x[j];
 		while(x->index != -1)
 		{
 			int ind = x->index-1;
-			double val = x->value;
-			x_min = min(x_min, val);
-			xj_max[j] = max(xj_max[j], val);
-			C_sum[j] += C[GETI(ind)];
 			if(y[ind] == -1)
-				xjneg_sum[j] += C[GETI(ind)]*val;
-			else
-				xjpos_sum[j] += C[GETI(ind)]*val;
+				xjneg_sum[j] += C[GETI(ind)]*x->value;
 			x++;
 		}
 	}

-	while(iter < max_iter)
+	while(newton_iter < max_newton_iter)
 	{
 		Gmax_new = 0;
-
-		for(j=0; j<active_size; j++)
-		{
-			int i = j+rand()%(active_size-j);
-			swap(index[i], index[j]);
-		}
+		Gnorm1_new = 0;
+		active_size = w_size;

 		for(s=0; s<active_size; s++)
 		{
 			j = index[s];
-			sum1 = 0;
-			sum2 = 0;
-			H = 0;
+			Hdiag[j] = nu;
+			Grad[j] = 0;

+			double tmp = 0;
 			x = prob_col->x[j];
 			while(x->index != -1)
 			{
 				int ind = x->index-1;
-				double exp_wTxind = exp_wTx[ind];
-				double tmp1 = x->value/(1+exp_wTxind);
-				double tmp2 = C[GETI(ind)]*tmp1;
-				double tmp3 = tmp2*exp_wTxind;
-				sum2 += tmp2;
-				sum1 += tmp3;
-				H += tmp1*tmp3;
+				Hdiag[j] += x->value*x->value*D[ind];
+				tmp += x->value*tau[ind];
 				x++;
 			}
-
-			G = -sum2 + xjneg_sum[j];
+			Grad[j] = -tmp + xjneg_sum[j];

-			double Gp = G+1;
-			double Gn = G-1;
+			double Gp = Grad[j]+1;
+			double Gn = Grad[j]-1;
 			double violation = 0;
 			if(w[j] == 0)
 			{
@@ -1341,6 +1481,7 @@
 					violation = -Gp;
 				else if(Gn > 0)
 					violation = Gn;
+				//outer-level shrinking
 				else if(Gp>Gmax_old/l && Gn<-Gmax_old/l)
 				{
 					active_size--;
@@ -1355,125 +1496,210 @@
 				violation = fabs(Gn);

 			Gmax_new = max(Gmax_new, violation);
+			Gnorm1_new += violation;
+		}

-			// obtain Newton direction d
-			if(Gp <= H*w[j])
-				d = -Gp/H;
-			else if(Gn >= H*w[j])
-				d = -Gn/H;
-			else
-				d = -w[j];
+		if(newton_iter == 0)
+			Gnorm1_init = Gnorm1_new;
+
+		if(Gnorm1_new <= eps*Gnorm1_init)
+			break;
+
+		iter = 0;
+		QP_Gmax_old = INF;
+		QP_active_size = active_size;

-			if(fabs(d) < 1.0e-12)
-				continue;
+		for(int i=0; i<l; i++)
+			xTd[i] = 0;

-			d = min(max(d,-10.0),10.0);
+		// optimize QP over wpd
+		while(iter < max_iter)
+		{
+			QP_Gmax_new = 0;
+			QP_Gnorm1_new = 0;
+
+			for(j=0; j<QP_active_size; j++)
+			{
+				int i = j+rand()%(QP_active_size-j);
+				swap(index[i], index[j]);
+			}

-			double delta = fabs(w[j]+d)-fabs(w[j]) + G*d;
-			int num_linesearch;
-			for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
+			for(s=0; s<QP_active_size; s++)
 			{
-				cond = fabs(w[j]+d)-fabs(w[j]) - sigma*delta;
+				j = index[s];
+				H = Hdiag[j];

-				if(x_min >= 0)
+				x = prob_col->x[j];
+				G = Grad[j] + (wpd[j]-w[j])*nu;
+				while(x->index != -1)
 				{
-					double tmp = exp(d*xj_max[j]);
-					appxcond1 = log(1+sum1*(tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond - d*xjpos_sum[j];
-					appxcond2 = log(1+sum2*(1/tmp-1)/xj_max[j]/C_sum[j])*C_sum[j] + cond + d*xjneg_sum[j];
-					if(min(appxcond1,appxcond2) <= 0)
+					int ind = x->index-1;
+					G += x->value*D[ind]*xTd[ind];
+					x++;
+				}
+
+				double Gp = G+1;
+				double Gn = G-1;
+				double violation = 0;
+				if(wpd[j] == 0)
+				{
+					if(Gp < 0)
+						violation = -Gp;
+					else if(Gn > 0)
+						violation = Gn;
+					//inner-level shrinking
+					else if(Gp>QP_Gmax_old/l && Gn<-QP_Gmax_old/l)
 					{
-						x = prob_col->x[j];
-						while(x->index != -1)
-						{
-							exp_wTx[x->index-1] *= exp(d*x->value);
-							x++;
-						}
-						break;
+						QP_active_size--;
+						swap(index[s], index[QP_active_size]);
+						s--;
+						continue;
 					}
 				}
+				else if(wpd[j] > 0)
+					violation = fabs(Gp);
+				else
+					violation = fabs(Gn);

-				cond += d*xjneg_sum[j];
+				QP_Gmax_new = max(QP_Gmax_new, violation);
+				QP_Gnorm1_new += violation;

-				int i = 0;
+				// obtain solution of one-variable problem
+				if(Gp <= H*wpd[j])
+					z = -Gp/H;
+				else if(Gn >= H*wpd[j])
+					z = -Gn/H;
+				else
+					z = -wpd[j];
+
+				if(fabs(z) < 1.0e-12)
+					continue;
+				z = min(max(z,-10.0),10.0);
+
+				wpd[j] += z;
+
 				x = prob_col->x[j];
 				while(x->index != -1)
 				{
 					int ind = x->index-1;
-					double exp_dx = exp(d*x->value);
-					exp_wTx_new[i] = exp_wTx[ind]*exp_dx;
-					cond += C[GETI(ind)]*log((1+exp_wTx_new[i])/(exp_dx+exp_wTx_new[i]));
-					x++; i++;
+					xTd[ind] += x->value*z;
+					x++;
 				}
+			}
+
+			iter++;

-				if(cond <= 0)
-				{
-					int i = 0;
-					x = prob_col->x[j];
-					while(x->index != -1)
-					{
-						int ind = x->index-1;
-						exp_wTx[ind] = exp_wTx_new[i];
-						x++; i++;
-					}
+			if(QP_Gnorm1_new <= inner_eps*Gnorm1_init)
+			{
+				//inner stopping
+				if(QP_active_size == active_size)
 					break;
-				}
+				//active set reactivation
 				else
 				{
-					d *= 0.5;
-					delta *= 0.5;
+					QP_active_size = active_size;
+					QP_Gmax_old = INF;
+					continue;
 				}
 			}

-			w[j] += d;
+			QP_Gmax_old = QP_Gmax_new;
+		}
+
+		if(iter >= max_iter)
+			info("WARNING: reaching max number of inner iterations\n");

-			// recompute exp_wTx[] if line search takes too many steps
-			if(num_linesearch >= max_num_linesearch)
+		delta = 0;
+		w_norm_new = 0;
+		for(j=0; j<w_size; j++)
+		{
+			delta += Grad[j]*(wpd[j]-w[j]);
+			if(wpd[j] != 0)
+				w_norm_new += fabs(wpd[j]);
+		}
+		delta += (w_norm_new-w_norm);
+
+		negsum_xTd = 0;
+		for(int i=0; i<l; i++)
+			if(y[i] == -1)
+				negsum_xTd += C[GETI(i)]*xTd[i];
+
+		int num_linesearch;
+		for(num_linesearch=0; num_linesearch < max_num_linesearch; num_linesearch++)
+		{
+			cond = w_norm_new - w_norm + negsum_xTd - sigma*delta;
+
+			for(int i=0; i<l; i++)
 			{
-				info("#");
+				double exp_xTd = exp(xTd[i]);
+				exp_wTx_new[i] = exp_wTx[i]*exp_xTd;
+				cond += C[GETI(i)]*log((1+exp_wTx_new[i])/(exp_xTd+exp_wTx_new[i]));
+			}
+
+			if(cond <= 0)
+			{
+				w_norm = w_norm_new;
+				for(j=0; j<w_size; j++)
+					w[j] = wpd[j];
 				for(int i=0; i<l; i++)
-					exp_wTx[i] = 0;
-
-				for(int i=0; i<w_size; i++)
 				{
-					if(w[i]==0) continue;
-					x = prob_col->x[i];
-					while(x->index != -1)
-					{
-						exp_wTx[x->index-1] += w[i]*x->value;
-						x++;
-					}
+					exp_wTx[i] = exp_wTx_new[i];
+					double tau_tmp = 1/(1+exp_wTx[i]);
+					tau[i] = C[GETI(i)]*tau_tmp;
+					D[i] = C[GETI(i)]*exp_wTx[i]*tau_tmp*tau_tmp;
 				}
-
+				break;
+			}
+			else
+			{
+				w_norm_new = 0;
+				for(j=0; j<w_size; j++)
+				{
+					wpd[j] = (w[j]+wpd[j])*0.5;
+					if(wpd[j] != 0)
+						w_norm_new += fabs(wpd[j]);
+				}
+				delta *= 0.5;
+				negsum_xTd *= 0.5;
 				for(int i=0; i<l; i++)
-					exp_wTx[i] = exp(exp_wTx[i]);
+					xTd[i] *= 0.5;
 			}
 		}

-		if(iter == 0)
-			Gmax_init = Gmax_new;
-		iter++;
-		if(iter % 10 == 0)
-			info(".");
-
-		if(Gmax_new <= eps*Gmax_init)
+		// Recompute some info due to too many line search steps
+		if(num_linesearch >= max_num_linesearch)
 		{
-			if(active_size == w_size)
-				break;
-			else
+			for(int i=0; i<l; i++)
+				exp_wTx[i] = 0;
+
+			for(int i=0; i<w_size; i++)
 			{
-				active_size = w_size;
-				info("*");
-				Gmax_old = INF;
-				continue;
+				if(w[i]==0) continue;
+				x = prob_col->x[i];
+				while(x->index != -1)
+				{
+					exp_wTx[x->index-1] += w[i]*x->value;
+					x++;
+				}
 			}
+
+			for(int i=0; i<l; i++)
+				exp_wTx[i] = exp(exp_wTx[i]);
 		}

+		if(iter == 1)
+			inner_eps *= 0.25;
+
+		newton_iter++;
 		Gmax_old = Gmax_new;
+
+		info("iter %3d  #CD cycles %d\n", newton_iter, iter);
 	}

-	info("\noptimization finished, #iter = %d\n", iter);
-	if(iter >= max_iter)
-		info("\nWARNING: reaching max number of iterations\n");
+	info("=========================\n");
+	info("optimization finished, #iter = %d\n", newton_iter);
+	if(newton_iter >= max_newton_iter)
+		info("WARNING: reaching max number of iterations\n");

 	// calculate objective value

@@ -1494,15 +1720,17 @@
 	info("Objective value = %lf\n", v);
 	info("#nonzeros/#features = %d/%d\n", nnz, w_size);

-	delete [] C;
 	delete [] index;
 	delete [] y;
+	delete [] Hdiag;
+	delete [] Grad;
+	delete [] wpd;
+	delete [] xjneg_sum;
+	delete [] xTd;
 	delete [] exp_wTx;
 	delete [] exp_wTx_new;
-	delete [] xj_max;
-	delete [] C_sum;
-	delete [] xjneg_sum;
-	delete [] xjpos_sum;
+	delete [] tau;
+	delete [] D;
 }

 // transpose matrix X from row format to column format
@@ -1518,13 +1746,9 @@
 	prob_col->n = n;
 	prob_col->y = new int[l];
 	prob_col->x = new feature_node*[n];
-	prob_col->W = new double[l];

 	for(i=0; i<l; i++)
-	{
 		prob_col->y[i] = prob->y[i];
-		prob_col->W[i] = prob->W[i];
-	}

 	for(i=0; i<n+1; i++)
 		col_ptr[i] = 0;
@@ -1669,7 +1893,6 @@
 			solve_l1r_l2_svc(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
 			delete [] prob_col.y;
 			delete [] prob_col.x;
-			delete [] prob_col.W;
 			delete [] x_space;
 			break;
 		}
@@ -1681,10 +1904,12 @@
 			solve_l1r_lr(&prob_col, w, eps*min(pos,neg)/prob->l, Cp, Cn);
 			delete [] prob_col.y;
 			delete [] prob_col.x;
-			delete [] prob_col.W;
 			delete [] x_space;
 			break;
 		}
+		case L2R_LR_DUAL:
+			solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
+			break;
 		default:
 			fprintf(stderr, "Error: unknown solver_type\n");
 			break;
@@ -1692,40 +1917,10 @@
 }

 //
-// Remove zero weighed data as libsvm and some liblinear solvers require C > 0.
-//
-static void remove_zero_weight(problem *newprob, const problem *prob)
-{
-	int i;
-	int l = 0;
-	for(i=0;i<prob->l;i++)
-		if(prob->W[i] > 0) l++;
-	*newprob = *prob;
-	newprob->l = l;
-	newprob->x = Malloc(feature_node*,l);
-	newprob->y = Malloc(int,l);
-	newprob->W = Malloc(double,l);
-
-	int j = 0;
-	for(i=0;i<prob->l;i++)
-		if(prob->W[i] > 0)
-		{
-			newprob->x[j] = prob->x[i];
-			newprob->y[j] = prob->y[i];
-			newprob->W[j] = prob->W[i];
-			j++;
-		}
-}
-
-//
 // Interface functions
 //
 model* train(const problem *prob, const parameter *param)
 {
-	problem newprob;
-	remove_zero_weight(&newprob, prob);
-	prob = &newprob;
-
 	int i,j;
 	int l = prob->l;
 	int n = prob->n;
@@ -1763,19 +1958,15 @@
 			if(param->weight_label[i] == label[j])
 				break;
 		if(j == nr_class)
-			fprintf(stderr,"warning: class label %d specified in weight is not found\n", param->weight_label[i]);
+			fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
 		else
 			weighted_C[j] *= param->weight[i];
 	}

 	// constructing the subproblem
 	feature_node **x = Malloc(feature_node *,l);
-	double *W = Malloc(double,l);
 	for(i=0;i<l;i++)
-	{
 		x[i] = prob->x[perm[i]];
-		W[i] = prob->W[perm[i]];
-	}

 	int k;
 	problem sub_prob;
@@ -1783,13 +1974,9 @@
 	sub_prob.n = n;
 	sub_prob.x = Malloc(feature_node *,sub_prob.l);
 	sub_prob.y = Malloc(int,sub_prob.l);
-	sub_prob.W = Malloc(double,sub_prob.l);

 	for(k=0; k<sub_prob.l; k++)
-	{
 		sub_prob.x[k] = x[k];
-		sub_prob.W[k] = W[k];
-	}

 	// multi-class svm by Crammer and Singer
 	if(param->solver_type == MCSVM_CS)
@@ -1844,33 +2031,157 @@
 	}

 	free(x);
-	free(W);
 	free(label);
 	free(start);
 	free(count);
 	free(perm);
 	free(sub_prob.x);
 	free(sub_prob.y);
-	free(sub_prob.W);
 	free(weighted_C);
-	free(newprob.x);
-	free(newprob.y);
-	free(newprob.W);
 	return model_;
 }

-void destroy_model(struct model *model_)
+void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target)
+{
+	int i;
+	int *fold_start = Malloc(int,nr_fold+1);
+	int l = prob->l;
+	int *perm = Malloc(int,l);
+
+	for(i=0;i<l;i++) perm[i]=i;
+	for(i=0;i<l;i++)
+	{
+		int j = i+rand()%(l-i);
+		swap(perm[i],perm[j]);
+	}
+	for(i=0;i<=nr_fold;i++)
+		fold_start[i]=i*l/nr_fold;
+
+	for(i=0;i<nr_fold;i++)
+	{
+		int begin = fold_start[i];
+		int end = fold_start[i+1];
+		int j,k;
+		struct problem subprob;
+
+		subprob.bias = prob->bias;
+		subprob.n = prob->n;
+		subprob.l = l-(end-begin);
+		subprob.x = Malloc(struct feature_node*,subprob.l);
+		subprob.y = Malloc(int,subprob.l);
+
+		k=0;
+		for(j=0;j<begin;j++)
+		{
+			subprob.x[k] = prob->x[perm[j]];
+			subprob.y[k] = prob->y[perm[j]];
+			++k;
+		}
+		for(j=end;j<l;j++)
+		{
+			subprob.x[k] = prob->x[perm[j]];
+			subprob.y[k] = prob->y[perm[j]];
+			++k;
+		}
+		struct model *submodel = train(&subprob,param);
+		for(j=begin;j<end;j++)
+			target[perm[j]] = predict(submodel,prob->x[perm[j]]);
+		free_and_destroy_model(&submodel);
+		free(subprob.x);
+		free(subprob.y);
+	}
+	free(fold_start);
+	free(perm);
+}
+
+int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
 {
-	if(model_->w != NULL)
-		free(model_->w);
-	if(model_->label != NULL)
-		free(model_->label);
-	free(model_);
+	int idx;
+	int n;
+	if(model_->bias>=0)
+		n=model_->nr_feature+1;
+	else
+		n=model_->nr_feature;
+	double *w=model_->w;
+	int nr_class=model_->nr_class;
+	int i;
+	int nr_w;
+	if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
+		nr_w = 1;
+	else
+		nr_w = nr_class;
+
+	const feature_node *lx=x;
+	for(i=0;i<nr_w;i++)
+		dec_values[i] = 0;
+	for(; (idx=lx->index)!=-1; lx++)
+	{
+		// the dimension of testing data may exceed that of training
+		if(idx<=n)
+			for(i=0;i<nr_w;i++)
+				dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
+	}
+
+	if(nr_class==2)
+		return (dec_values[0]>0)?model_->label[0]:model_->label[1];
+	else
+	{
+		int dec_max_idx = 0;
+		for(i=1;i<nr_class;i++)
+		{
+			if(dec_values[i] > dec_values[dec_max_idx])
+				dec_max_idx = i;
+		}
+		return model_->label[dec_max_idx];
+	}
+}
+
+int predict(const model *model_, const feature_node *x)
+{
+	double *dec_values = Malloc(double, model_->nr_class);
+	int label=predict_values(model_, x, dec_values);
+	free(dec_values);
+	return label;
+}
+
+int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
+{
+	if(check_probability_model(model_))
+	{
+		int i;
+		int nr_class=model_->nr_class;
+		int nr_w;
+		if(nr_class==2)
+			nr_w = 1;
+		else
+			nr_w = nr_class;
+
+		int label=predict_values(model_, x, prob_estimates);
+		for(i=0;i<nr_w;i++)
+			prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
+
+		if(nr_class==2) // for binary classification
+			prob_estimates[1]=1.-prob_estimates[0];
+		else
+		{
+			double sum=0;
+			for(i=0; i<nr_class; i++)
+				sum+=prob_estimates[i];
+
+			for(i=0; i<nr_class; i++)
+				prob_estimates[i]=prob_estimates[i]/sum;
+		}
+
+		return label;
+	}
+	else
+		return 0;
 }

 static const char *solver_type_table[]=
 {
-	"L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC","L2R_L1LOSS_SVC_DUAL","MCSVM_CS", "L1R_L2LOSS_SVC","L1R_LR", NULL
+	"L2R_LR", "L2R_L2LOSS_SVC_DUAL", "L2R_L2LOSS_SVC", "L2R_L1LOSS_SVC_DUAL", "MCSVM_CS",
+	"L1R_L2LOSS_SVC", "L1R_LR", "L2R_LR_DUAL", NULL
 };

 int save_model(const char *model_file_name, const struct model *model_)
@@ -2016,175 +2327,6 @@
 	return model_;
 }

-int predict_values(const struct model *model_, const struct feature_node *x, double *dec_values)
-{
-	int idx;
-	int n;
-	if(model_->bias>=0)
-		n=model_->nr_feature+1;
-	else
-		n=model_->nr_feature;
-	double *w=model_->w;
-	int nr_class=model_->nr_class;
-	int i;
-	int nr_w;
-	if(nr_class==2 && model_->param.solver_type != MCSVM_CS)
-		nr_w = 1;
-	else
-		nr_w = nr_class;
-
-	const feature_node *lx=x;
-	for(i=0;i<nr_w;i++)
-		dec_values[i] = 0;
-	for(; (idx=lx->index)!=-1; lx++)
-	{
-		// the dimension of testing data may exceed that of training
-		if(idx<=n)
-			for(i=0;i<nr_w;i++)
-				dec_values[i] += w[(idx-1)*nr_w+i]*lx->value;
-	}
-
-	if(nr_class==2)
-		return (dec_values[0]>0)?model_->label[0]:model_->label[1];
-	else
-	{
-		int dec_max_idx = 0;
-		for(i=1;i<nr_class;i++)
-		{
-			if(dec_values[i] > dec_values[dec_max_idx])
-				dec_max_idx = i;
-		}
-		return model_->label[dec_max_idx];
-	}
-}
-
-int predict(const model *model_, const feature_node *x)
-{
-	double *dec_values = Malloc(double, model_->nr_class);
-	int label=predict_values(model_, x, dec_values);
-	free(dec_values);
-	return label;
-}
-
-int predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates)
-{
-	if(model_->param.solver_type==L2R_LR)
-	{
-		int i;
-		int nr_class=model_->nr_class;
-		int nr_w;
-		if(nr_class==2)
-			nr_w = 1;
-		else
-			nr_w = nr_class;
-
-		int label=predict_values(model_, x, prob_estimates);
-		for(i=0;i<nr_w;i++)
-			prob_estimates[i]=1/(1+exp(-prob_estimates[i]));
-
-		if(nr_class==2) // for binary classification
-			prob_estimates[1]=1.-prob_estimates[0];
-		else
-		{
-			double sum=0;
-			for(i=0; i<nr_class; i++)
-				sum+=prob_estimates[i];
-
-			for(i=0; i<nr_class; i++)
-				prob_estimates[i]=prob_estimates[i]/sum;
-		}
-
-		return label;
-	}
-	else
-		return 0;
-}
-
-void destroy_param(parameter* param)
-{
-	if(param->weight_label != NULL)
-		free(param->weight_label);
-	if(param->weight != NULL)
-		free(param->weight);
-}
-
-const char *check_parameter(const parameter *param)
-{
-	if(param->eps <= 0)
-		return "eps <= 0";
-
-	if(param->C <= 0)
-		return "C <= 0";
-
-	if(param->solver_type != L2R_LR
-		&& param->solver_type != L2R_L2LOSS_SVC_DUAL
-		&& param->solver_type != L2R_L2LOSS_SVC
-		&& param->solver_type != L2R_L1LOSS_SVC_DUAL
-		&& param->solver_type != MCSVM_CS
-		&& param->solver_type != L1R_L2LOSS_SVC
-		&& param->solver_type != L1R_LR)
-		return "unknown solver type";
-
-	return NULL;
-}
-
-void cross_validation(const problem *prob, const parameter *param, int nr_fold, int *target)
-{
-	int i;
-	int *fold_start = Malloc(int,nr_fold+1);
-	int l = prob->l;
-	int *perm = Malloc(int,l);
-
-	for(i=0;i<l;i++) perm[i]=i;
-	for(i=0;i<l;i++)
-	{
-		int j = i+rand()%(l-i);
-		swap(perm[i],perm[j]);
-	}
-	for(i=0;i<=nr_fold;i++)
-		fold_start[i]=i*l/nr_fold;
-
-	for(i=0;i<nr_fold;i++)
-	{
-		int begin = fold_start[i];
-		int end = fold_start[i+1];
-		int j,k;
-		struct problem subprob;
-
-		subprob.bias = prob->bias;
-		subprob.n = prob->n;
-		subprob.l = l-(end-begin);
-		subprob.x = Malloc(struct feature_node*,subprob.l);
-		subprob.y = Malloc(int,subprob.l);
-		subprob.W = Malloc(double,subprob.l);
-
-		k=0;
-		for(j=0;j<begin;j++)
-		{
-			subprob.x[k] = prob->x[perm[j]];
-			subprob.y[k] = prob->y[perm[j]];
-			subprob.W[k] = prob->W[perm[j]];
-			++k;
-		}
-		for(j=end;j<l;j++)
-		{
-			subprob.x[k] = prob->x[perm[j]];
-			subprob.y[k] = prob->y[perm[j]];
-			subprob.W[k] = prob->W[perm[j]];
-			++k;
-		}
-		struct model *submodel = train(&subprob,param);
-		for(j=begin;j<end;j++)
-			target[perm[j]] = predict(submodel,prob->x[perm[j]]);
-		destroy_model(submodel);
-		free(subprob.x);
-		free(subprob.y);
-		free(subprob.W);
-	}
-	free(fold_start);
-	free(perm);
-}
-
 int get_nr_feature(const model *model_)
 {
 	return model_->nr_feature;
@@ -2202,3 +2344,65 @@
 			label[i] = model_->label[i];
 }

+void free_model_content(struct model *model_ptr)
+{
+	if(model_ptr->w != NULL)
+		free(model_ptr->w);
+	if(model_ptr->label != NULL)
+		free(model_ptr->label);
+}
+
+void free_and_destroy_model(struct model **model_ptr_ptr)
+{
+	struct model *model_ptr = *model_ptr_ptr;
+	if(model_ptr != NULL)
+	{
+		free_model_content(model_ptr);
+		free(model_ptr);
+	}
+}
+
+void destroy_param(parameter* param)
+{
+	if(param->weight_label != NULL)
+		free(param->weight_label);
+	if(param->weight != NULL)
+		free(param->weight);
+}
+
+const char *check_parameter(const problem *prob, const parameter *param)
+{
+	if(param->eps <= 0)
+		return "eps <= 0";
+
+	if(param->C <= 0)
+		return "C <= 0";
+
+	if(param->solver_type != L2R_LR
+		&& param->solver_type != L2R_L2LOSS_SVC_DUAL
+		&& param->solver_type != L2R_L2LOSS_SVC
+		&& param->solver_type != L2R_L1LOSS_SVC_DUAL
+		&& param->solver_type != MCSVM_CS
+		&& param->solver_type != L1R_L2LOSS_SVC
+		&& param->solver_type != L1R_LR
+		&& param->solver_type != L2R_LR_DUAL)
+		return "unknown solver type";
+
+	return NULL;
+}
+
+int check_probability_model(const struct model *model_)
+{
+	return (model_->param.solver_type==L2R_LR ||
+			model_->param.solver_type==L2R_LR_DUAL ||
+			model_->param.solver_type==L1R_LR);
+}
+
+void set_print_string_function(void (*print_func)(const char*))
+{
+	if (print_func == NULL)
+		liblinear_print_string = &print_string_stdout;
+	else
+		liblinear_print_string = print_func;
+}
+
--- a/extra/NaN/src/linear.h	Sun Apr 12 15:18:03 2015 +0000
+++ b/extra/NaN/src/linear.h	Sun Apr 12 17:52:15 2015 +0000
@@ -1,12 +1,11 @@
 /*

-$Id$
-Copyright (c) 2007-2009 The LIBLINEAR Project.
-Copyright (c) 2010 Alois Schloegl <alois.schloegl@gmail.com>
+Copyright (c) 2007-2011 The LIBLINEAR Project.
+Copyright (c) 2010,2015 Alois Schloegl <alois.schloegl@ist.ac.at>
 This function is part of the NaN-toolbox
 http://pub.ist.ac.at/~schloegl/matlab/NaN/

-This code was extracted from liblinear-1.51 in Jan 2010 and
+This code was extracted from liblinear-1.8 in Apr 2015 and
 modified for the use with Octave

 This program is free software; you can redistribute it and/or modify
@@ -43,10 +42,9 @@
 	int *y;
 	struct feature_node **x;
 	double bias;            /* < 0 if no bias term */
-	double *W;              /* instance weight */
 };

-enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR }; /* solver_type */
+enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL }; /* solver_type */

 struct parameter
 {
@@ -84,10 +82,13 @@
 int get_nr_class(const struct model *model_);
 void get_labels(const struct model *model_, int* label);

-void destroy_model(struct model *model_);
+void free_model_content(struct model *model_ptr);
+void free_and_destroy_model(struct model **model_ptr_ptr);
 void destroy_param(struct parameter *param);
-const char *check_parameter(const struct parameter *param);
-extern void (*liblinear_print_string) (const char *);
+
+const char *check_parameter(const struct problem *prob, const struct parameter *param);
+int check_probability_model(const struct model *model);
+void set_print_string_function(void (*print_func) (const char*));

 #ifdef __cplusplus
 }
--- a/extra/NaN/src/predict.c	Sun Apr 12 15:18:03 2015 +0000
+++ b/extra/NaN/src/predict.c	Sun Apr 12 17:52:15 2015 +0000
@@ -1,12 +1,11 @@
 /*

-$Id$
-Copyright (c) 2007-2009 The LIBLINEAR Project.
-Copyright (c) 2010 Alois Schloegl <alois.schloegl@gmail.com>
+Copyright (c) 2007-2011 The LIBLINEAR Project.
+Copyright (c) 2010,2015 Alois Schloegl <alois.schloegl@ist.ac.at>
 This function is part of the NaN-toolbox
 http://pub.ist.ac.at/~schloegl/matlab/NaN/

-This code was extracted from liblinear-1.51 in Jan 2010 and
+This code was extracted from liblinear-1.8 in Apr 2015 and
 modified for the use with Octave

 This program is free software; you can redistribute it and/or modify
@@ -25,304 +24,220 @@
 */

 #include <stdio.h>
+#include <ctype.h>
 #include <stdlib.h>
 #include <string.h>
+#include <errno.h>
 #include "linear.h"

-#include "mex.h"
-#include "linear_model_matlab.h"
-
-#ifdef tmwtypes_h
-  #if (MX_API_VER<=0x07020000)
-    typedef int mwSize;
-  #endif
-#endif
-
-#define CMD_LEN 2048
-
-#define Malloc(type,n) (type *)malloc((n)*sizeof(type))
-
-int col_format_flag;
-
-void read_sparse_instance(const mxArray *prhs, int index, struct feature_node *x, int feature_number, double bias)
-{
-	int i, j, low, high;
-	mwIndex *ir, *jc;
-	double *samples;
+struct feature_node *x;
+int max_nr_attr = 64;

-	ir = mxGetIr(prhs);
-	jc = mxGetJc(prhs);
-	samples = mxGetPr(prhs);
+struct model* model_;
+int flag_predict_probability=0;

-	// each column is one instance
-	j = 0;
-	low = (int) jc[index], high = (int) jc[index+1];
-	for(i=low; i<high && (int) (ir[i])<feature_number; i++)
-	{
-		x[j].index = (int) ir[i]+1;
-		x[j].value = samples[i];
-		j++;
-	}
-	if(bias>=0)
-	{
-		x[j].index = feature_number+1;
-		x[j].value = bias;
-		j++;
-	}
-	x[j].index = -1;
+void exit_input_error(int line_num)
+{
+	fprintf(stderr,"Wrong input format at line %d\n", line_num);
+	exit(1);
 }

-static void fake_answer(mxArray *plhs[])
+static char *line = NULL;
+static int max_line_len;
+
+static char* readline(FILE *input)
 {
-	plhs[0] = mxCreateDoubleMatrix(0, 0, mxREAL);
-	plhs[1] = mxCreateDoubleMatrix(0, 0, mxREAL);
-	plhs[2] = mxCreateDoubleMatrix(0, 0, mxREAL);
+	int len;
+
+	if(fgets(line,max_line_len,input) == NULL)
+		return NULL;
+
+	while(strrchr(line,'\n') == NULL)
+	{
+		max_line_len *= 2;
+		line = (char *) realloc(line,max_line_len);
+		len = (int) strlen(line);
+		if(fgets(line+len,max_line_len-len,input) == NULL)
+			break;
+	}
+	return line;
 }

-void do_predict(mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag)
+void do_predict(FILE *input, FILE *output, struct model* model_)
 {
-	int label_vector_row_num, label_vector_col_num;
-	int feature_number, testing_instance_number;
-	int instance_index;
-	double *ptr_instance, *ptr_label, *ptr_predict_label;
-	double *ptr_prob_estimates, *ptr_dec_values, *ptr;
-	struct feature_node *x;
-	mxArray *pplhs[1]; // instance sparse matrix in row format
-
 	int correct = 0;
 	int total = 0;

 	int nr_class=get_nr_class(model_);
-	int nr_w;
 	double *prob_estimates=NULL;
+	int j, n;
+	int nr_feature=get_nr_feature(model_);
+	if(model_->bias>=0)
+		n=nr_feature+1;
+	else
+		n=nr_feature;

-	if(nr_class==2 && model_->param.solver_type!=MCSVM_CS)
-		nr_w=1;
-	else
-		nr_w=nr_class;
+	if(flag_predict_probability)
+	{
+		int *labels;

-	// prhs[1] = testing instance matrix
-	feature_number = get_nr_feature(model_);
-	testing_instance_number = (int) mxGetM(prhs[1]);
-	if(col_format_flag)
-	{
-		feature_number = (int) mxGetM(prhs[1]);
-		testing_instance_number = (int) mxGetN(prhs[1]);
+		if(!check_probability_model(model_))
+		{
+			fprintf(stderr, "probability output is only supported for logistic regression\n");
+			exit(1);
+		}
+
+		labels=(int *) malloc(nr_class*sizeof(int));
+		get_labels(model_,labels);
+		prob_estimates = (double *) malloc(nr_class*sizeof(double));
+		fprintf(output,"labels");
+		for(j=0;j<nr_class;j++)
+			fprintf(output," %d",labels[j]);
+		fprintf(output,"\n");
+		free(labels);
 	}

-	label_vector_row_num = (int) mxGetM(prhs[0]);
-	label_vector_col_num = (int) mxGetN(prhs[0]);
-
-	if(label_vector_row_num!=testing_instance_number)
-	{
-		mexPrintf("Length of label vector does not match # of instances.\n");
-		fake_answer(plhs);
-		return;
-	}
-	if(label_vector_col_num!=1)
+	max_line_len = 1024;
+	line = (char *)malloc(max_line_len*sizeof(char));
+	while(readline(input) != NULL)
 	{
-		mexPrintf("label (1st argument) should be a vector (# of column is 1).\n");
-		fake_answer(plhs);
-		return;
-	}
+		int i = 0;
+		int target_label, predict_label;
+		char *idx, *val, *label, *endptr;
+		int inst_max_index = 0; // strtol gives 0 if wrong format
+
+		label = strtok(line," \t\n");
+		if(label == NULL) // empty line
+			exit_input_error(total+1);
+
+		target_label = (int) strtol(label,&endptr,10);
+		if(endptr == label || *endptr != '\0')
+			exit_input_error(total+1);
+
+		while(1)
+		{
+			if(i>=max_nr_attr-2)	// need one more for index = -1
+			{
+				max_nr_attr *= 2;
+				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
+			}
+
+			idx = strtok(NULL,":");
+			val = strtok(NULL," \t");

-	ptr_instance = mxGetPr(prhs[1]);
-	ptr_label    = mxGetPr(prhs[0]);
+			if(val == NULL)
+				break;
+			errno = 0;
+			x[i].index = (int) strtol(idx,&endptr,10);
+			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
+				exit_input_error(total+1);
+			else
+				inst_max_index = x[i].index;
+
+			errno = 0;
+			x[i].value = strtod(val,&endptr);
+			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
+				exit_input_error(total+1);

-	// transpose instance matrix
-	if(mxIsSparse(prhs[1]))
-	{
-		if(col_format_flag)
+			// feature indices larger than those in training are not used
+			if(x[i].index <= nr_feature)
+				++i;
+		}
+
+		if(model_->bias>=0)
 		{
-			pplhs[0] = (mxArray *)prhs[1];
+			x[i].index = n;
+			x[i].value = model_->bias;
+			i++;
+		}
+		x[i].index = -1;
+
+		if(flag_predict_probability)
+		{
+			int j;
+			predict_label = predict_probability(model_,x,prob_estimates);
+			fprintf(output,"%d",predict_label);
+			for(j=0;j<model_->nr_class;j++)
+				fprintf(output," %g",prob_estimates[j]);
+			fprintf(output,"\n");
 		}
 		else
 		{
-			mxArray *pprhs[1];
-			pprhs[0] = mxDuplicateArray(prhs[1]);
-			if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose"))
-			{
-				mexPrintf("Error: cannot transpose testing instance matrix\n");
-				fake_answer(plhs);
-				return;
-			}
-		}
-	}
-	else
-		mexPrintf("Testing_instance_matrix must be sparse\n");
-
-
-	prob_estimates = Malloc(double, nr_class);
-
-	plhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL);
-	if(predict_probability_flag)
-		plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL);
-	else
-		plhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL);
-
-	ptr_predict_label = mxGetPr(plhs[0]);
-	ptr_prob_estimates = mxGetPr(plhs[2]);
-	ptr_dec_values = mxGetPr(plhs[2]);
-	x = Malloc(struct feature_node, feature_number+2);
-	for(instance_index=0;instance_index<testing_instance_number;instance_index++)
-	{
-		int i;
-		double target,v;
-
-		target = ptr_label[instance_index];
-
-		// prhs[1] and prhs[1]^T are sparse
-		read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias);
-
-		if(predict_probability_flag)
-		{
-			v = predict_probability(model_, x, prob_estimates);
-			ptr_predict_label[instance_index] = v;
-			for(i=0;i<nr_class;i++)
-				ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i];
-		}
-		else
-		{
-			double *dec_values = Malloc(double, nr_class);
-			v = predict(model_, x);
-			ptr_predict_label[instance_index] = v;
-
-			predict_values(model_, x, dec_values);
-			for(i=0;i<nr_w;i++)
-				ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i];
-			free(dec_values);
+			predict_label = predict(model_,x);
+			fprintf(output,"%d\n",predict_label);
 		}

-		if(v == target)
+		if(predict_label == target_label)
 			++correct;
 		++total;
 	}
-	mexPrintf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);
-
-	// return accuracy, mean squared error, squared correlation coefficient
-	plhs[1] = mxCreateDoubleMatrix(1, 1, mxREAL);
-	ptr = mxGetPr(plhs[1]);
-	ptr[0] = (double) correct/total*100;
-
-	free(x);
-	if(prob_estimates != NULL)
+	printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
+	if(flag_predict_probability)
 		free(prob_estimates);
 }

 void exit_with_help()
 {
-	mexPrintf(
-			"Usage: [predicted_label, accuracy, decision_values/prob_estimates] = predict(testing_label_vector, testing_instance_matrix, model, 'liblinear_options','col')\n"
-			"liblinear_options:\n"
-			"-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0)\n"
-			"col:\n"
-			"	if 'col' is setted testing_instance_matrix is parsed in column format, otherwise is in row format"
-			);
+	printf(
+	"Usage: predict [options] test_file model_file output_file\n"
+	"options:\n"
+	"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0)\n"
+	);
+	exit(1);
 }

-void mexFunction( int nlhs, mxArray *plhs[],
-		int nrhs, const mxArray *prhs[] )
+int main(int argc, char **argv)
 {
-	int prob_estimate_flag = 0;
-	struct model *model_;
-	char cmd[CMD_LEN];
-	col_format_flag = 0;
+	FILE *input, *output;
+	int i;

-	if(nrhs > 5 || nrhs < 3)
+	// parse options
+	for(i=1;i<argc;i++)
 	{
-		exit_with_help();
-		fake_answer(plhs);
-		return;
-	}
-	if(nrhs == 5)
-	{
-		mxGetString(prhs[4], cmd, mxGetN(prhs[4])+1);
-		if(strcmp(cmd, "col") == 0)
-		{
-			col_format_flag = 1;
+		if(argv[i][0] != '-') break;
+		++i;
+		switch(argv[i-1][1])
+		{
+			case 'b':
+				flag_predict_probability = atoi(argv[i]);
+				break;
+
+			default:
+				fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
+				exit_with_help();
+				break;
 		}
 	}
+	if(i>=argc)
+		exit_with_help();

-	if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) {
-		mexPrintf("Error: label vector and instance matrix must be double\n");
-		fake_answer(plhs);
-		return;
+	input = fopen(argv[i],"r");
+	if(input == NULL)
+	{
+		fprintf(stderr,"can't open input file %s\n",argv[i]);
+		exit(1);
 	}

-	if(mxIsStruct(prhs[2]))
+	output = fopen(argv[i+2],"w");
+	if(output == NULL)
 	{
-		const char *error_msg;
-
-		// parse options
-		if(nrhs>=4)
-		{
-			int i, argc = 1;
-			char *argv[CMD_LEN/2];
-
-			// put options in argv[]
-			mxGetString(prhs[3], cmd,  mxGetN(prhs[3]) + 1);
-			if((argv[argc] = strtok(cmd, " ")) != NULL)
-				while((argv[++argc] = strtok(NULL, " ")) != NULL)
-					;
-
-			for(i=1;i<argc;i++)
-			{
-				if(argv[i][0] != '-') break;
-				if(++i>=argc)
-				{
-					exit_with_help();
-					fake_answer(plhs);
-					return;
-				}
-				switch(argv[i-1][1])
-				{
-					case 'b':
-						prob_estimate_flag = atoi(argv[i]);
-						break;
-					default:
-						mexPrintf("unknown option\n");
-						exit_with_help();
-						fake_answer(plhs);
-						return;
-				}
-			}
-		}
-
-		model_ = Malloc(struct model, 1);
-		error_msg = matlab_matrix_to_model(model_, prhs[2]);
-		if(error_msg)
-		{
-			mexPrintf("Error: can't read model: %s\n", error_msg);
-			destroy_model(model_);
-			fake_answer(plhs);
-			return;
-		}
-
-		if(prob_estimate_flag)
-		{
-			if(model_->param.solver_type!=L2R_LR)
-			{
-				mexPrintf("probability output is only supported for logistic regression\n");
-				prob_estimate_flag=0;
-			}
-		}
-
-		if(mxIsSparse(prhs[1]))
-			do_predict(plhs, prhs, model_, prob_estimate_flag);
-		else
-		{
-			mexPrintf("Testing_instance_matrix must be sparse\n");
-			fake_answer(plhs);
-		}
-
-		// destroy model_
-		destroy_model(model_);
-	}
-	else
-	{
-		mexPrintf("model file should be a struct array\n");
-		fake_answer(plhs);
+		fprintf(stderr,"can't open output file %s\n",argv[i+2]);
+		exit(1);
 	}

-	return;
+	if((model_=load_model(argv[i+1]))==0)
+	{
+		fprintf(stderr,"can't open model file %s\n",argv[i+1]);
+		exit(1);
+	}
+
+	x = (struct feature_node *) malloc(max_nr_attr*sizeof(struct feature_node));
+	do_predict(input, output, model_);
+	free_and_destroy_model(&model_);
+	free(line);
+	free(x);
+	fclose(input);
+	fclose(output);
+	return 0;
 }
+
--- a/extra/NaN/src/train.c	Sun Apr 12 15:18:03 2015 +0000
+++ b/extra/NaN/src/train.c	Sun Apr 12 17:52:15 2015 +0000
@@ -1,12 +1,11 @@
 /*

-$Id$
-Copyright (c) 2007-2009 The LIBLINEAR Project.
-Copyright (c) 2010 Alois Schloegl <alois.schloegl@gmail.com>
+Copyright (c) 2007-2011 The LIBLINEAR Project.
+Copyright (c) 2010,2015 Alois Schloegl <alois.schloegl@ist.ac.at>
 This function is part of the NaN-toolbox
 http://pub.ist.ac.at/~schloegl/matlab/NaN/

-This code was extracted from liblinear-1.51 in Jan 2010 and
+This code was extracted from liblinear-1.8 in Apr 2015 and
 modified for the use with Octave

 This program is free software; you can redistribute it and/or modify
@@ -29,93 +28,144 @@
 #include <stdlib.h>
 #include <string.h>
 #include <ctype.h>
+#include <errno.h>
 #include "linear.h"
-
-#include "mex.h"
-#include "linear_model_matlab.h"
-
-#ifdef tmwtypes_h
-  #if (MX_API_VER<=0x07020000)
-    typedef int mwSize;
-    typedef int mwIndex;
-  #endif
-#endif
-
-#define CMD_LEN 2048
 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 #define INF HUGE_VAL

-void print_null(const char *s){}
-
-void (*liblinear_default_print_string) (const char *);
+void print_null(const char *s) {}

 void exit_with_help()
 {
-	mexPrintf(
-	"Usage: model = train(weight_vector, training_label_vector, training_instance_matrix, 'liblinear_options', 'col');\n"
-	"liblinear_options:\n"
+	printf(
+	"Usage: train [options] training_set_file [model_file]\n"
+	"options:\n"
 	"-s type : set type of solver (default 1)\n"
-	"	0 -- L2-regularized logistic regression\n"
+	"	0 -- L2-regularized logistic regression (primal)\n"
 	"	1 -- L2-regularized L2-loss support vector classification (dual)\n"
 	"	2 -- L2-regularized L2-loss support vector classification (primal)\n"
 	"	3 -- L2-regularized L1-loss support vector classification (dual)\n"
 	"	4 -- multi-class support vector classification by Crammer and Singer\n"
 	"	5 -- L1-regularized L2-loss support vector classification\n"
 	"	6 -- L1-regularized logistic regression\n"
+	"	7 -- L2-regularized logistic regression (dual)\n"
 	"-c cost : set the parameter C (default 1)\n"
 	"-e epsilon : set tolerance of termination criterion\n"
 	"	-s 0 and 2\n"
 	"		|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n"
-	"		where f is the primal function, (default 0.01)\n"
-	"	-s 1, 3, and 4\n"
+	"		where f is the primal function and pos/neg are # of\n"
+	"		positive/negative data (default 0.01)\n"
+	"	-s 1, 3, 4 and 7\n"
 	"		Dual maximal violation <= eps; similar to libsvm (default 0.1)\n"
 	"	-s 5 and 6\n"
-	"		|f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,\n"
+	"		|f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n"
 	"		where f is the primal function (default 0.01)\n"
 	"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
 	"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
 	"-v n: n-fold cross validation mode\n"
 	"-q : quiet mode (no outputs)\n"
-	"col:\n"
-	"	if 'col' is setted, training_instance_matrix is parsed in column format, otherwise is in row format\n"
 	);
+	exit(1);
+}
+
+void exit_input_error(int line_num)
+{
+	fprintf(stderr,"Wrong input format at line %d\n", line_num);
+	exit(1);
 }

-// liblinear arguments
-struct parameter param;		// set by parse_command_line
-struct problem prob;		// set by read_problem
-struct model *model_;
+static char *line = NULL;
+static int max_line_len;
+
+static char* readline(FILE *input)
+{
+	int len;
+
+	if(fgets(line,max_line_len,input) == NULL)
+		return NULL;
+
+	while(strrchr(line,'\n') == NULL)
+	{
+		max_line_len *= 2;
+		line = (char *) realloc(line,max_line_len);
+		len = (int) strlen(line);
+		if(fgets(line+len,max_line_len-len,input) == NULL)
+			break;
+	}
+	return line;
+}
+
+void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
+void read_problem(const char *filename);
+void do_cross_validation();
+
 struct feature_node *x_space;
-int cross_validation_flag;
-int col_format_flag;
+struct parameter param;
+struct problem prob;
+struct model* model_;
+int flag_cross_validation;
 int nr_fold;
 double bias;

-double do_cross_validation()
+int main(int argc, char **argv)
+{
+	char input_file_name[1024];
+	char model_file_name[1024];
+	const char *error_msg;
+
+	parse_command_line(argc, argv, input_file_name, model_file_name);
+	read_problem(input_file_name);
+	error_msg = check_parameter(&prob,&param);
+
+	if(error_msg)
+	{
+		fprintf(stderr,"Error: %s\n",error_msg);
+		exit(1);
+	}
+
+	if(flag_cross_validation)
+	{
+		do_cross_validation();
+	}
+	else
+	{
+		model_=train(&prob, &param);
+		if(save_model(model_file_name, model_))
+		{
+			fprintf(stderr,"can't save model to file %s\n",model_file_name);
+			exit(1);
+		}
+		free_and_destroy_model(&model_);
+	}
+	destroy_param(&param);
+	free(prob.y);
+	free(prob.x);
+	free(x_space);
+	free(line);
+
+	return 0;
+}
+
+void do_cross_validation()
 {
 	int i;
 	int total_correct = 0;
-	int *target = Malloc(int,prob.l);
-	double retval = 0.0;
+	int *target = Malloc(int, prob.l);

 	cross_validation(&prob,&param,nr_fold,target);

 	for(i=0;i<prob.l;i++)
 		if(target[i] == prob.y[i])
 			++total_correct;
-	mexPrintf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
-	retval = 100.0*total_correct/prob.l;
+	printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);

 	free(target);
-	return retval;
 }

-// nrhs should be 4
-int parse_command_line(int nrhs, const mxArray *prhs[], char *model_file_name)
+void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name)
 {
-	int i, argc = 1;
-	char cmd[CMD_LEN];
-	char *argv[CMD_LEN/2];
+	int i;
+	void (*print_func)(const char*) = NULL;	// default printing to stdout

 	// default values
 	param.solver_type = L2R_L2LOSS_SVC_DUAL;
@@ -124,65 +174,33 @@
 	param.nr_weight = 0;
 	param.weight_label = NULL;
 	param.weight = NULL;
-	cross_validation_flag = 0;
-	col_format_flag = 0;
+	flag_cross_validation = 0;
 	bias = -1;

-	// train loaded only once under matlab
-	if(liblinear_default_print_string == NULL)
-		liblinear_default_print_string = liblinear_print_string;
-	else
-		liblinear_print_string = liblinear_default_print_string;
-
-	if(nrhs <= 2)
-		return 1;
-
-	if(nrhs == 5)
-	{
-		mxGetString(prhs[4], cmd, mxGetN(prhs[4])+1);
-		if(strcmp(cmd, "col") == 0)
-			col_format_flag = 1;
-	}
-
-	// put options in argv[]
-	if(nrhs > 3)
-	{
-		mxGetString(prhs[3], cmd,  mxGetN(prhs[3]) + 1);
-		if((argv[argc] = strtok(cmd, " ")) != NULL)
-			while((argv[++argc] = strtok(NULL, " ")) != NULL)
-				;
-	}
-
 	// parse options
 	for(i=1;i<argc;i++)
 	{
 		if(argv[i][0] != '-') break;
-		++i;
-		if(i>=argc && argv[i-1][1] != 'q') // since option -q has no parameter
-			return 1;
+		if(++i>=argc)
+			exit_with_help();
 		switch(argv[i-1][1])
 		{
 			case 's':
 				param.solver_type = atoi(argv[i]);
 				break;
+
 			case 'c':
 				param.C = atof(argv[i]);
 				break;
+
 			case 'e':
 				param.eps = atof(argv[i]);
 				break;
+
 			case 'B':
 				bias = atof(argv[i]);
 				break;
-			case 'v':
-				cross_validation_flag = 1;
-				nr_fold = atoi(argv[i]);
-				if(nr_fold < 2)
-				{
-					mexPrintf("n-fold cross validation: n must >= 2\n");
-					return 1;
-				}
-				break;
+
 			case 'w':
 				++param.nr_weight;
 				param.weight_label = (int *) realloc(param.weight_label,sizeof(int)*param.nr_weight);
@@ -190,212 +208,158 @@
 				param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]);
 				param.weight[param.nr_weight-1] = atof(argv[i]);
 				break;
+
+			case 'v':
+				flag_cross_validation = 1;
+				nr_fold = atoi(argv[i]);
+				if(nr_fold < 2)
+				{
+					fprintf(stderr,"n-fold cross validation: n must >= 2\n");
+					exit_with_help();
+				}
+				break;
+
 			case 'q':
-				liblinear_print_string = &print_null;
+				print_func = &print_null;
 				i--;
 				break;
+
 			default:
-				mexPrintf("unknown option\n");
-				return 1;
+				fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
+				exit_with_help();
+				break;
 		}
 	}

+	set_print_string_function(print_func);
+
+	// determine filenames
+	if(i>=argc)
+		exit_with_help();
+
+	strcpy(input_file_name, argv[i]);
+
+	if(i<argc-1)
+		strcpy(model_file_name,argv[i+1]);
+	else
+	{
+		char *p = strrchr(argv[i],'/');
+		if(p==NULL)
+			p = argv[i];
+		else
+			++p;
+		sprintf(model_file_name,"%s.model",p);
+	}
+
 	if(param.eps == INF)
 	{
 		if(param.solver_type == L2R_LR || param.solver_type == L2R_L2LOSS_SVC)
 			param.eps = 0.01;
-		else if(param.solver_type == L2R_L2LOSS_SVC_DUAL || param.solver_type == L2R_L1LOSS_SVC_DUAL || param.solver_type == MCSVM_CS)
+		else if(param.solver_type == L2R_L2LOSS_SVC_DUAL || param.solver_type == L2R_L1LOSS_SVC_DUAL || param.solver_type == MCSVM_CS || param.solver_type == L2R_LR_DUAL)
 			param.eps = 0.1;
 		else if(param.solver_type == L1R_L2LOSS_SVC || param.solver_type == L1R_LR)
 			param.eps = 0.01;
 	}
-	return 0;
-}
-
-static void fake_answer(mxArray *plhs[])
-{
-	plhs[0] = mxCreateDoubleMatrix(0, 0, mxREAL);
 }

-int read_problem_sparse(const mxArray *weight_vec, const mxArray *label_vec, const mxArray *instance_mat)
+// read in a problem (in libsvm format)
+void read_problem(const char *filename)
 {
-	int i, j, k, low, high;
-	mwIndex *ir, *jc;
-	int elements, max_index, num_samples, label_vector_row_num, weight_vector_row_num;
-	double *samples, *labels, *weights;
-	mxArray *instance_mat_col; // instance sparse matrix in column format
-
-	prob.x = NULL;
-	prob.y = NULL;
-	prob.W = NULL;
-	x_space = NULL;
+	int max_index, inst_max_index, i;
+	long int elements, j;
+	FILE *fp = fopen(filename,"r");
+	char *endptr;
+	char *idx, *val, *label;

-	if(col_format_flag)
-		instance_mat_col = (mxArray *)instance_mat;
-	else
+	if(fp == NULL)
 	{
-		// transpose instance matrix
-		mxArray *prhs[1], *plhs[1];
-		prhs[0] = mxDuplicateArray(instance_mat);
-		if(mexCallMATLAB(1, plhs, 1, prhs, "transpose"))
-		{
-			mexPrintf("Error: cannot transpose training instance matrix\n");
-			return -1;
-		}
-		instance_mat_col = plhs[0];
-		mxDestroyArray(prhs[0]);
+		fprintf(stderr,"can't open input file %s\n",filename);
+		exit(1);
 	}

-	// the number of instance
-	prob.l = (int) mxGetN(instance_mat_col);
-	weight_vector_row_num = (int) mxGetM(weight_vec);
-	label_vector_row_num = (int) mxGetM(label_vec);
+	prob.l = 0;
+	elements = 0;
+	max_line_len = 1024;
+	line = Malloc(char,max_line_len);
+	while(readline(fp)!=NULL)
+	{
+		char *p = strtok(line," \t"); // label

-	if(weight_vector_row_num == 0)
-		;//mexPrintf("Warning: treat each instance with weight 1.0\n");
-	else if(weight_vector_row_num!=prob.l)
-	{
-		mexPrintf("Length of weight vector does not match # of instances.\n");
-		return -1;
-	}
-	if(label_vector_row_num!=prob.l)
-	{
-		mexPrintf("Length of label vector does not match # of instances.\n");
-		return -1;
+		// features
+		while(1)
+		{
+			p = strtok(NULL," \t");
+			if(p == NULL || *p == '\n') // check '\n' as ' ' may be after the last feature
+				break;
+			elements++;
+		}
+		elements++; // for bias term
+		prob.l++;
 	}
-
-	// each column is one instance
-	weights = mxGetPr(weight_vec);
-	labels = mxGetPr(label_vec);
-	samples = mxGetPr(instance_mat_col);
-	ir = mxGetIr(instance_mat_col);
-	jc = mxGetJc(instance_mat_col);
-
-	num_samples = (int) mxGetNzmax(instance_mat_col);
-
-	elements = num_samples + prob.l*2;
-	max_index = (int) mxGetM(instance_mat_col);
-
-	prob.y = Malloc(int, prob.l);
-	prob.W = Malloc(double,prob.l);
-	prob.x = Malloc(struct feature_node*, prob.l);
-	x_space = Malloc(struct feature_node, elements);
+	rewind(fp);

 	prob.bias=bias;

-	j = 0;
+	prob.y = Malloc(int,prob.l);
+	prob.x = Malloc(struct feature_node *,prob.l);
+	x_space = Malloc(struct feature_node,elements+prob.l);
+
+	max_index = 0;
+	j=0;
 	for(i=0;i<prob.l;i++)
 	{
+		inst_max_index = 0; // strtol gives 0 if wrong format
+		readline(fp);
 		prob.x[i] = &x_space[j];
-		prob.y[i] = (int) labels[i];
-		prob.W[i] = 1;
-		if(weight_vector_row_num > 0)
-			prob.W[i] *= (double) weights[i];
-		low = (int) jc[i], high = (int) jc[i+1];
-		for(k=low;k<high;k++)
+		label = strtok(line," \t\n");
+		if(label == NULL) // empty line
+			exit_input_error(i+1);
+
+		prob.y[i] = (int) strtol(label,&endptr,10);
+		if(endptr == label || *endptr != '\0')
+			exit_input_error(i+1);
+
+		while(1)
 		{
-			x_space[j].index = (int) ir[k]+1;
-			x_space[j].value = samples[k];
-			j++;
-	 	}
-		if(prob.bias>=0)
-		{
-			x_space[j].index = max_index+1;
-			x_space[j].value = prob.bias;
-			j++;
+			idx = strtok(NULL,":");
+			val = strtok(NULL," \t");
+
+			if(val == NULL)
+				break;
+
+			errno = 0;
+			x_space[j].index = (int) strtol(idx,&endptr,10);
+			if(endptr == idx || errno != 0 || *endptr != '\0' || x_space[j].index <= inst_max_index)
+				exit_input_error(i+1);
+			else
+				inst_max_index = x_space[j].index;
+
+			errno = 0;
+			x_space[j].value = strtod(val,&endptr);
+			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
+				exit_input_error(i+1);
+
+			++j;
 		}
+
+		if(inst_max_index > max_index)
+			max_index = inst_max_index;
+
+		if(prob.bias >= 0)
+			x_space[j++].value = prob.bias;
+
 		x_space[j++].index = -1;
 	}

-	if(prob.bias>=0)
-		prob.n = max_index+1;
-	else
-		prob.n = max_index;
-
-	return 0;
-}
-
-// Interface function of matlab
-// now assume prhs[0]: label prhs[1]: features
-void mexFunction( int nlhs, mxArray *plhs[],
-		int nrhs, const mxArray *prhs[] )
-{
-	const char *error_msg;
-	// fix random seed to have same results for each run
-	// (for cross validation)
-	srand(1);
-
-	// Transform the input Matrix to libsvm format
-	if(nrhs > 2 && nrhs < 6)
+	if(prob.bias >= 0)
 	{
-		int err=0;
-
-		if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1]) || !mxIsDouble(prhs[2])) {
-			mexPrintf("Error: weight vector, label vector and instance matrix must be double\n");
-			fake_answer(plhs);
-			return;
-		}
-
-		if(parse_command_line(nrhs, prhs, NULL))
-		{
-			exit_with_help();
-			destroy_param(&param);
-			fake_answer(plhs);
-			return;
-		}
-
-		if(mxIsSparse(prhs[2]))
-			err = read_problem_sparse(prhs[0], prhs[1], prhs[2]);
-		else
-		{
-			mexPrintf("Training_instance_matrix must be sparse\n");
-			destroy_param(&param);
-			fake_answer(plhs);
-			return;
-		}
-
-		// train's original code
-		error_msg = check_parameter(&param);
-
-		if(err || error_msg)
-		{
-			if (error_msg != NULL)
-				mexPrintf("Error: %s\n", error_msg);
-			destroy_param(&param);
-			free(prob.y);
-			free(prob.x);
-			free(x_space);
-			fake_answer(plhs);
-			return;
-		}
-
-		if(cross_validation_flag)
-		{
-			double *ptr;
-			plhs[0] = mxCreateDoubleMatrix(1, 1, mxREAL);
-			ptr = mxGetPr(plhs[0]);
-			ptr[0] = do_cross_validation();
-		}
-		else
-		{
-			const char *error_msg;
-
-			model_ = train(&prob, &param);
-			error_msg = model_to_matlab_structure(plhs, model_);
-			if(error_msg)
-				mexPrintf("Error: can't convert libsvm model to matrix structure: %s\n", error_msg);
-			destroy_model(model_);
-		}
-		destroy_param(&param);
-		free(prob.y);
-		free(prob.x);
-		free(prob.W);
-		free(x_space);
+		prob.n=max_index+1;
+		for(i=1;i<prob.l;i++)
+			(prob.x[i]-2)->index = prob.n;
+		x_space[j-2].index = prob.n;
 	}
 	else
-	{
-		exit_with_help();
-		fake_answer(plhs);
-		return;
-	}
+		prob.n=max_index;
+
+	fclose(fp);
 }