changeset 2962:18fae735987a octave-forge

initial commit of functions for nonparametric smoothing. Includes kernel regression and density. Can make use of MPITB for parallel computing if it's available.
author mcreel
date Wed, 24 Jan 2007 10:55:54 +0000
parents e23b10b8855a
children 83fd03c8b547
files main/econometrics/inst/__kernel_epanechnikov.m main/econometrics/inst/__kernel_normal.m main/econometrics/inst/kernel_density.m main/econometrics/inst/kernel_density_cvscore.m main/econometrics/inst/kernel_density_nodes.m main/econometrics/inst/kernel_example.m main/econometrics/inst/kernel_optimal_bandwidth.m main/econometrics/inst/kernel_regression.m main/econometrics/inst/kernel_regression_cvscore.m main/econometrics/inst/kernel_regression_nodes.m
diffstat 10 files changed, 601 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/econometrics/inst/__kernel_epanechnikov.m	Wed Jan 24 10:55:54 2007 +0000
@@ -0,0 +1,36 @@
+# Copyright (C) 2006 Michael Creel <michael.creel@uab.es>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# __kernel_epanechnikov: this function is for internal use by kernel_density
+# and kernel_regression
+#
+# multivariate spherical Epanechnikov kernel
+# input: PxK matrix - P data points, each of which is in R^K
+# output: Px1 vector, input matrix passed though the kernel
+# other multivariate kernel functions should follow this convention
+
+function z = __kernel_epanechnikov(z)
+
+	K = columns(z);
+
+	# Volume of d-dimensional unit sphere
+	c = pi ^ (K/2) / gamma(K/2 + 1);
+
+	# compute kernel
+	z  =  sumsq(z, 2);
+	z = ((1/2) / c * (K + 2) * (1 - z)) .* (z < 1);
+
+endfunction
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/econometrics/inst/__kernel_normal.m	Wed Jan 24 10:55:54 2007 +0000
@@ -0,0 +1,30 @@
+# Copyright (C) 2006 Michael Creel <michael.creel@uab.es>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# __kernel_normal: this function is for internal use by kernel_density
+# and kernel_regression
+#
+# product normal kernel
+# input: PxK matrix - P data points, each of which is in R^K
+# output: Px1 vector, input matrix passed though the kernel
+# other multivariate kernel functions should follow this convention
+
+function z = __kernel_normal(z)
+
+	z = normal_pdf(z);
+	z = prod(z,2);
+
+endfunction
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/econometrics/inst/kernel_density.m	Wed Jan 24 10:55:54 2007 +0000
@@ -0,0 +1,116 @@
+# Copyright (C) 2006 Michael Creel <michael.creel@uab.es>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# kernel_density: multivariate kernel density estimator
+#
+# usage:
+# 	dens = kernel_density(eval_points, data, bandwidth)
+#
+# inputs:
+#	eval_points: PxK matrix of points at which to calculate the density
+# 	data: NxK matrix of data points
+#	bandwidth: positive scalar, the smoothing parameter. The fit
+# 		is more smooth as the bandwidth increases.
+#	do_cv: bool (optional). default false. If true, calculate leave-1-out
+#		 density for cross validation
+#	nslaves: int (optional, default 0). Number of compute nodes for parallel evaluation
+#	debug: bool (optional, default false). show results on compute nodes if doing
+#		 parallel run
+#	bandwith_matrix (optional): nonsingular KxK matrix. Rotates data.
+# 		Default is Choleski decomposition of inverse of covariance,
+#		to approximate independence after the transformation, which
+#		makes a product kernel a reasonable choice.
+#	kernel (optional): string. Name of the kernel function. Default is radial
+#		symmetric Epanechnikov kernel.
+# outputs:
+#	dens: Px1 vector: the fitted density value at each of the P evaluation points.
+#
+# References:
+# Wand, M.P. and Jones, M.C. (1995), 'Kernel smoothing'.
+# http://www.xplore-stat.de/ebooks/scripts/spm/html/spmhtmlframe73.html
+
+function z = kernel_density(eval_points, data, bandwidth, do_cv, nslaves, debug, bandwith_matrix, kernel)
+
+	if nargin < 3; error("kernel_density: at least 3 arguments are required"); endif
+
+	# set defaults for optional args
+	# default ordinary density, not leave-1-out
+	if (nargin < 4)	do_cv = false; endif
+	# default serial
+	if (nargin < 5)	nslaves = 0; endif
+	# debug or not (default)
+	if (nargin < 6)	debug = false; endif;
+	# default bandwidth matrix (up to factor of proportionality)
+	if (nargin < 7) bandwidth_matrix = chol(cov(data)); endif # default bandwidth matrix
+	# default kernel
+	if (nargin < 8) kernel = "__kernel_epanechnikov"; endif 	# default kernel
+
+
+	nn = rows(eval_points);
+	n = rows(data);
+
+	# Inverse bandwidth matrix H_inv
+	H = bandwidth_matrix*bandwidth;
+	H_inv = inv(H);
+
+	# weight by inverse bandwidth matrix
+	eval_points = eval_points*H_inv;
+	data = data*H_inv;
+
+	# check if doing this parallel or serial
+	global PARALLEL NSLAVES NEWORLD NSLAVES TAG
+	PARALLEL = 0;
+
+	if nslaves > 0
+		PARALLEL = 1;
+		NSLAVES = nslaves;
+		LAM_Init(nslaves, debug);
+	endif
+
+	if !PARALLEL # ordinary serial version
+		points_per_node = nn; # do the all on this node
+		z = kernel_density_nodes(eval_points, data, do_cv, kernel, points_per_node, nslaves, debug);
+	else # parallel version
+		z = zeros(nn,1);
+		points_per_node = floor(nn/(NSLAVES + 1)); # number of obsns per slave
+		# The command that the slave nodes will execute
+		cmd=['z_on_node = kernel_density_nodes(eval_points, data, do_cv, kernel, points_per_node, nslaves, debug); ',...
+		'MPI_Send(z_on_node, 0, TAG, NEWORLD);'];
+
+		# send items to slaves
+
+		NumCmds_Send({"eval_points", "data", "do_cv", "kernel", "points_per_node", "nslaves", "debug","cmd"}, {eval_points, data, do_cv, kernel, points_per_node, nslaves, debug, cmd});
+
+		# evaluate last block on master while slaves are busy
+		z_on_node = kernel_density_nodes(eval_points, data, do_cv, kernel, points_per_node, nslaves, debug);
+		startblock = NSLAVES*points_per_node + 1;
+		endblock = nn;
+		z(startblock:endblock,:) = z(startblock:endblock,:) + z_on_node;
+
+		# collect slaves' results
+		z_on_node = zeros(points_per_node,1); # size may differ between master and compute nodes - reset here
+		for i = 1:NSLAVES
+			MPI_Recv(z_on_node,i,TAG,NEWORLD);
+			startblock = i*points_per_node - points_per_node + 1;
+			endblock = i*points_per_node;
+			z(startblock:endblock,:) = z(startblock:endblock,:) + z_on_node;
+		endfor
+
+		# clean up after parallel
+		LAM_Finalize;
+	endif
+	z = z*det(H_inv);
+endfunction
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/econometrics/inst/kernel_density_cvscore.m	Wed Jan 24 10:55:54 2007 +0000
@@ -0,0 +1,6 @@
+function cvscore = kernel_density_cvscore(bandwidth, data, depvar)
+		dens = kernel_density(data, data, exp(bandwidth), true);
+		dens = dens + eps; # some kernels can assign zero density
+		cvscore = -mean(log(dens));
+endfunction
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/econometrics/inst/kernel_density_nodes.m	Wed Jan 24 10:55:54 2007 +0000
@@ -0,0 +1,60 @@
+# Copyright (C) 2006  Michael Creel <michael.creel@uab.es>
+# under the terms of the GNU General Public License.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# Kernel density, parallel version: do calculations on nodes
+
+function z = kernel_density_nodes(eval_points, data, do_cv, kernel, points_per_node, nslaves, debug)
+
+	if (nslaves > 0)
+		global NEWORLD
+		[info, myrank] = MPI_Comm_rank(NEWORLD);
+	else myrank = 0; # if not parallel then do all on master node
+	endif
+
+	if myrank == 0 # Do this if I'm master
+		startblock = nslaves*points_per_node + 1;
+		endblock = rows(eval_points);
+	else	# this is for the slaves
+		startblock = myrank*points_per_node - points_per_node + 1;
+		endblock = myrank*points_per_node;
+	endif
+
+	# the block of eval_points this node does
+	myeval = eval_points(startblock:endblock,:);
+	nn = rows(myeval);
+	n = rows(data);
+
+	# calculate distances
+	# note to self: loop is as fast as double kronecker with
+	# reshape. Also, simpler and safe in terms of memory.
+	z = zeros(nn,1);
+	for i = 1:nn
+		zz = data - kron(myeval(i,:), ones(n,1));
+		zz = feval(kernel, zz);
+		z(i) = sum(zz);
+		if (do_cv) z(i) = z(i) - zz(i); endif
+	endfor
+
+	if (do_cv) z = z / (n-1); else 	z = z/n; endif
+
+	if debug
+		printf("z on node %d: \n", myrank);
+		z'
+	endif
+endfunction
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/econometrics/inst/kernel_example.m	Wed Jan 24 10:55:54 2007 +0000
@@ -0,0 +1,144 @@
+# Copyright (C) 2007 Michael Creel <michael.creel@uab.es>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# kernel_example: examples of how to use kernel density and regression functions
+#
+# usage: kernel_example;
+
+# sample size - should get better fit (on average) as this increases
+n = 500;
+
+# set this to greater than 0 to try parallel computations (requires MPITB)
+compute_nodes = 0;
+
+nodes = compute_nodes + 1; # count master node
+
+############################################################
+# kernel regression example
+# uniformly spaced data points on [0,2]
+x = 1:n;
+x = x';
+x = 2*x/n;
+# generate dependent variable
+trueline =  x + (x.^2)/2 - 3.1*(x.^3)/3 + 1.2*(x.^4)/4;
+sig = 0.5;
+y = trueline + sig*randn(n,1);
+# default bandwidth
+bandwidth = 0.45;
+# get optimal bandwidth (time consuming, uncomment if you want to try it)
+# bandwidth = kernel_optimal_bandwidth(x, y);
+# get the fit and do the plot
+tic;
+fit = kernel_regression(x, y, x, bandwidth, false, compute_nodes);
+t1 = toc;
+printf("\n");
+printf("########################################################################\n");
+printf("Kernel regression example\n");
+printf("time using %d data points and %d compute nodes: %f\n", n, nodes, t1);
+grid("on");
+title("Example 1: Kernel regression fit");
+plot(x, fit, x, trueline);
+
+############################################################
+# kernel density example: univariate - fit to Chi^2(3) data
+data = sumsq(randn(n,3),2);
+# evaluation point are on a grid for plotting
+stepsize = 0.2;
+grid_x = (-1:stepsize:11)';
+bandwidth = 0.55;
+# get optimal bandwidth (time consuming, uncomment if you want to try it)
+# bandwidth = kernel_optimal_bandwidth(data);
+# get the fitted density and do a plot
+tic;
+dens = kernel_density(grid_x, data, bandwidth, false, compute_nodes);
+t1 = toc;
+printf("\n");
+printf("########################################################################\n");
+printf("Univariate kernel density example\n");
+printf("time using %d data points and %d compute nodes: %f\n", n, nodes, t1);
+printf("A rough integration under the fitted univariate density is %f\n", sum(dens)*stepsize);
+figure();
+title("Example 2: Kernel density fit: Univariate Chi^2(3) data");
+xlabel("true density is Chi^2(3)");
+plot(grid_x, [dens chisquare_pdf(grid_x,3)]);
+
+############################################################
+# kernel density example: bivariate
+# X ~ N(0,1)
+# Y ~ Chi squared(3)
+# X, Y are dependent
+d = randn(n,3);
+data = [d(:,1) sumsq(d,2)];
+# evaluation points are on a grid for plotting
+stepsize = 0.2;
+a = (-5:stepsize:5)'; # for the N(0,1)
+b = (-1:stepsize:9)';  # for the Chi squared(3)
+gridsize = rows(a);
+[grid_x, grid_y] = meshgrid(a, b);
+eval_points = [vec(grid_x) vec(grid_y)];
+bandwidth = 0.85;
+# get optimal bandwidth (time consuming, uncomment if you want to try it)
+# bandwidth = kernel_optimal_bandwidth(data);
+# get the fitted density and do a plot
+tic;
+dens = kernel_density(eval_points, data, bandwidth, false, compute_nodes);
+t1 = toc;
+printf("\n");
+printf("########################################################################\n");
+printf("Multivatiate kernel density example\n");
+printf("time using %d data points and %d compute nodes: %f\n", n, nodes, t1);
+dens = reshape(dens, gridsize, gridsize);
+printf("A rough integration under the fitted bivariate density is %f\n", sum(sum(dens))*stepsize^2);
+figure();
+legend("off");
+title("Example 3: Kernel density fit: dependent bivatiate data");
+xlabel("true marginal density is N(0,1)");
+ylabel("true marginal density is Chi^2(3)");
+surf(grid_x, grid_y, dens);
+
+
+# more extensive test of parallel
+if compute_nodes > 0 # only try this if parallel is available
+	############################################################
+	# kernel density example: bivariate
+	# X ~ N(0,1)
+	# Y ~ Chi squared(3)
+	# X, Y are dependent
+	# evaluation points are on a grid for plotting
+	stepsize = 0.2;
+	a = (-5:stepsize:5)'; # for the N(0,1)
+	b = (-1:stepsize:9)';  # for the Chi squared(3)
+	gridsize = rows(a);
+	[grid_x, grid_y] = meshgrid(a, b);
+	eval_points = [vec(grid_x) vec(grid_y)];
+	bandwidth = 0.85;
+	ns = [1000; 2000; 4000; 8000];
+	printf("\n");
+	printf("########################################################################\n");
+	printf("multivariate kernel density example with several sample sizes serial/parallel timings\n");
+	for i = 1:rows(ns)
+		for compute_nodes = 0:1
+			nodes = compute_nodes + 1;
+			n = ns(i,:);
+			d = randn(n,3);
+			data = [d(:,1) sumsq(d,2)];
+			tic;
+			dens = kernel_density(eval_points, data, bandwidth, false, compute_nodes);
+			t1 = toc;
+			printf(" %d data points and %d compute nodes: %f\n", n, nodes, t1);
+		endfor
+	endfor
+endif
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/econometrics/inst/kernel_optimal_bandwidth.m	Wed Jan 24 10:55:54 2007 +0000
@@ -0,0 +1,27 @@
+function bandwidth = kernel_optimal_bandwidth(data, depvar)
+	# SA controls
+	ub = 1;
+	lb = -5;
+	nt = 1;
+	ns = 1;
+	rt = 0.05;
+	maxevals = 50;
+	neps = 5;
+	functol = 1e-2;
+	paramtol = 1e-3;
+	verbosity = 0;
+	minarg = 1;
+	sa_control = { lb, ub, nt, ns, rt, maxevals, neps, functol, paramtol, verbosity, 1};
+
+	# bfgs controls
+	bfgs_control = {100,0,0};
+
+	if (nargin == 1)
+		bandwidth = samin("kernel_density_cvscore", {0, data}, sa_control);
+		bandwidth = bfgsmin("kernel_density_cvscore", {bandwidth, data}, bfgs_control);
+	else
+		bandwidth = samin("kernel_regression_cvscore", {0, data, depvar}, sa_control);
+		bandwidth = bfgsmin("kernel_regression_cvscore", {bandwidth, data, depvar}, bfgs_control);
+	endif
+	bandwidth = exp(bandwidth);
+endfunction
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/econometrics/inst/kernel_regression.m	Wed Jan 24 10:55:54 2007 +0000
@@ -0,0 +1,114 @@
+# Copyright (C) 2006 Michael Creel <michael.creel@uab.es>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# kernel_density: kernel regression estimator
+#
+# usage:
+# 	dens = kernel_regression(eval_points, depvar, condvars, bandwidth)
+#
+# inputs:
+#	eval_points: PxK matrix of points at which to calculate the density
+#	dev_var: Nx1 vector of observations of the dependent variable
+# 	condvars: NxK matrix. N observations on the K conditioning variables.
+#	bandwidth: positive scalar, the smoothing parameter. The fit
+# 		is more smooth as the bandwidth increases.
+#	do_cv: bool (optional). default false. If true, calculate leave-1-out
+#		density for cross validation
+#	nslaves: int (optional, default 0). Number of compute nodes for parallel evaluation
+#	debug: bool (optional, default false). show results on compute nodes if doing
+#		 parallel run
+#	bandwith_matrix (optional): nonsingular KxK matrix. Rotates data.
+# 		Default is Choleski decomposition of inverse of covariance,
+#		to approximate independence after the transformation, which
+#		makes a product kernel a reasonable choice.
+#	kernel (optional): string. Name of the kernel function. Default is radial
+#		symmetric Epanechnikov kernel.
+# outputs:
+#	dens: Px1 vector: the fitted density value at each of the P evaluation points.
+#
+
+function z = kernel_regression(eval_points, depvar, condvars, bandwidth, do_cv, nslaves, debug, bandwith_matrix, kernel)
+
+	if nargin < 4; error("kernel_regression: at least 4 arguments are required"); endif
+
+	# set defaults for optional args
+	# default ordinary density, not leave-1-out
+	if (nargin < 5)	do_cv = false; endif
+	# default serial
+	if (nargin < 6)	nslaves = 0; endif
+	# debug or not (default)
+	if (nargin < 7)	debug = false; endif;
+	# default bandwidth matrix (up to factor of proportionality)
+	if (nargin < 8) bandwidth_matrix = chol(cov(condvars)); endif # default bandwidth matrix
+	# default kernel
+	if (nargin < 9) kernel = "__kernel_epanechnikov"; endif 	# default kernel
+
+	nn = rows(eval_points);
+	n = rows(depvar);
+
+	# Inverse bandwidth matrix H_inv
+	H = bandwidth_matrix*bandwidth;
+	H_inv = inv(H);
+
+	# weight by inverse bandwidth matrix
+	eval_points = eval_points*H_inv;
+	condvars = condvars*H_inv;
+
+	data = [depvar condvars]; # put it all together for sending to nodes
+
+	# check if doing this parallel or serial
+	global PARALLEL NSLAVES NEWORLD NSLAVES TAG
+	PARALLEL = 0;
+
+	if nslaves > 0
+		PARALLEL = 1;
+		NSLAVES = nslaves;
+		LAM_Init(nslaves, debug);
+	endif
+
+	if !PARALLEL # ordinary serial version
+		points_per_node = nn; # do the all on this node
+		z = kernel_regression_nodes(eval_points, data, do_cv, kernel, points_per_node, nslaves, debug);
+	else # parallel version
+		z = zeros(nn,1);
+		points_per_node = floor(nn/(NSLAVES + 1)); # number of obsns per slave
+		# The command that the slave nodes will execute
+		cmd=['z_on_node = kernel_regression_nodes(eval_points, data, do_cv, kernel, points_per_node, nslaves, debug); ',...
+		'MPI_Send(z_on_node, 0, TAG, NEWORLD);'];
+
+		# send items to slaves
+
+		NumCmds_Send({"eval_points", "data", "do_cv", "kernel", "points_per_node", "nslaves", "debug","cmd"}, {eval_points, data, do_cv, kernel, points_per_node, nslaves, debug, cmd});
+
+		# evaluate last block on master while slaves are busy
+		z_on_node = kernel_regression_nodes(eval_points, data, do_cv, kernel, points_per_node, nslaves, debug);
+		startblock = NSLAVES*points_per_node + 1;
+		endblock = nn;
+		z(startblock:endblock,:) = z(startblock:endblock,:) + z_on_node;
+
+		# collect slaves' results
+		z_on_node = zeros(points_per_node,1); # size may differ between master and compute nodes - reset here
+		for i = 1:NSLAVES
+			MPI_Recv(z_on_node,i,TAG,NEWORLD);
+			startblock = i*points_per_node - points_per_node + 1;
+			endblock = i*points_per_node;
+			z(startblock:endblock,:) = z(startblock:endblock,:) + z_on_node;
+		endfor
+
+		# clean up after parallel
+		LAM_Finalize;
+	endif
+endfunction
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/econometrics/inst/kernel_regression_cvscore.m	Wed Jan 24 10:55:54 2007 +0000
@@ -0,0 +1,5 @@
+function cvscore = kernel_regression_cvscore(bandwidth, data, depvar)
+	fit = kernel_regression(data, depvar, data, exp(bandwidth), true);
+	cvscore = norm(depvar - fit);
+endfunction
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/main/econometrics/inst/kernel_regression_nodes.m	Wed Jan 24 10:55:54 2007 +0000
@@ -0,0 +1,63 @@
+# Copyright (C) 2006  Michael Creel <michael.creel@uab.es>
+# under the terms of the GNU General Public License.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+# Kernel regression, parallel version: do calculations on nodes
+
+function z = kernel_regression_nodes(eval_points, data, do_cv, kernel, points_per_node, nslaves, debug)
+
+	if (nslaves > 0)
+		global NEWORLD
+		[info, myrank] = MPI_Comm_rank(NEWORLD);
+	else myrank = 0; # if not parallel then do all on master node
+	endif
+
+	if myrank == 0 # Do this if I'm master
+		startblock = nslaves*points_per_node + 1;
+		endblock = rows(eval_points);
+	else	# this is for the slaves
+		startblock = myrank*points_per_node - points_per_node + 1;
+		endblock = myrank*points_per_node;
+	endif
+
+	# the block of eval_points this node does
+	myeval = eval_points(startblock:endblock,:);
+	nn = rows(myeval);
+	n = rows(data);
+
+	y = data(:,1);
+	data = data(:,2:columns(data));
+
+	# calculate distances
+	# note to self: loop is as fast as double kronecker with
+	# reshape. Also, simpler and safe in terms of memory.
+	z = zeros(nn,1);
+	for i = 1:nn
+		zz = data - kron(myeval(i,:), ones(n,1));
+		zz = feval(kernel, zz);
+		if (do_cv) zz(i,:) = 0; endif
+		temp = sum(zz);
+		if (temp > 0) zz = zz / temp; endif
+		z(i,:) = zz'*y;
+	endfor
+
+	if debug
+		printf("z on node %d: \n", myrank);
+		z'
+	endif
+endfunction
+
+