annotate libcruft/blas-xtra/sdot3.f @ 12312:b10ea6efdc58 release-3-4-x ss-3-3-91

version is now 3.3.91
author John W. Eaton <jwe@octave.org>
date Mon, 31 Jan 2011 08:36:58 -0500
parents fd0a3ac60b0e
children e81ddf9cacd5
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11523
fd0a3ac60b0e update copyright notices
John W. Eaton <jwe@octave.org>
parents: 9876
diff changeset
1 c Copyright (C) 2009-2011 VZLU Prague, a.s., Czech Republic
9874
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
2 c
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
3 c Author: Jaroslav Hajek <highegg@gmail.com>
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
4 c
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
5 c This file is part of Octave.
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
6 c
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
7 c Octave is free software; you can redistribute it and/or modify
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
8 c it under the terms of the GNU General Public License as published by
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
9 c the Free Software Foundation; either version 3 of the License, or
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
10 c (at your option) any later version.
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
11 c
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
12 c This program is distributed in the hope that it will be useful,
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
13 c but WITHOUT ANY WARRANTY; without even the implied warranty of
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
14 c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
15 c GNU General Public License for more details.
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
16 c
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
17 c You should have received a copy of the GNU General Public License
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
18 c along with this software; see the file COPYING. If not, see
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
19 c <http://www.gnu.org/licenses/>.
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
20 c
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
21 subroutine sdot3(m,n,k,a,b,c)
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
22 c purpose: a 3-dimensional dot product.
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
23 c c = sum (a .* b, 2), where a and b are 3d arrays.
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
24 c arguments:
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
25 c m,n,k (in) the dimensions of a and b
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
26 c a,b (in) real input arrays of size (m,k,n)
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
27 c c (out) real output array, size (m,n)
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
28 integer m,n,k,i,j,l
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
29 real a(m,k,n),b(m,k,n)
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
30 real c(m,n)
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
31
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
32 real sdot
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
33 external sdot
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
34
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
35 c quick return if possible.
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
36 if (m <= 0 .or. n <= 0) return
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
37
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
38 if (m == 1) then
9876
21d81d06b221 cache-aligned loop for rowwise dot
Jaroslav Hajek <highegg@gmail.com>
parents: 9874
diff changeset
39 c the column-major case.
9874
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
40 do j = 1,n
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
41 c(1,j) = sdot(k,a(1,1,j),1,b(1,1,j),1)
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
42 end do
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
43 else
9876
21d81d06b221 cache-aligned loop for rowwise dot
Jaroslav Hajek <highegg@gmail.com>
parents: 9874
diff changeset
44 c We prefer performance here, because that's what we generally
21d81d06b221 cache-aligned loop for rowwise dot
Jaroslav Hajek <highegg@gmail.com>
parents: 9874
diff changeset
45 c do by default in reduction functions. Besides, the accuracy
21d81d06b221 cache-aligned loop for rowwise dot
Jaroslav Hajek <highegg@gmail.com>
parents: 9874
diff changeset
46 c of xDOT is questionable. Hence, do a cache-aligned nested loop.
9874
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
47 do j = 1,n
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
48 do i = 1,m
9876
21d81d06b221 cache-aligned loop for rowwise dot
Jaroslav Hajek <highegg@gmail.com>
parents: 9874
diff changeset
49 c(i,j) = 0d0
9874
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
50 end do
9876
21d81d06b221 cache-aligned loop for rowwise dot
Jaroslav Hajek <highegg@gmail.com>
parents: 9874
diff changeset
51 do l = 1,k
9874
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
52 do i = 1,m
9876
21d81d06b221 cache-aligned loop for rowwise dot
Jaroslav Hajek <highegg@gmail.com>
parents: 9874
diff changeset
53 c(i,j) = c(i,j) + a(i,l,j)*b(i,l,j)
9874
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
54 end do
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
55 end do
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
56 end do
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
57 end if
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
58
90bc0cc4518f implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff changeset
59 end subroutine