Mercurial > octave-nkf
annotate libcruft/blas-xtra/sdot3.f @ 12312:b10ea6efdc58 release-3-4-x ss-3-3-91
version is now 3.3.91
author | John W. Eaton <jwe@octave.org> |
---|---|
date | Mon, 31 Jan 2011 08:36:58 -0500 |
parents | fd0a3ac60b0e |
children | e81ddf9cacd5 |
rev | line source |
---|---|
11523 | 1 c Copyright (C) 2009-2011 VZLU Prague, a.s., Czech Republic |
9874
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
2 c |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
3 c Author: Jaroslav Hajek <highegg@gmail.com> |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
4 c |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
5 c This file is part of Octave. |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
6 c |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
7 c Octave is free software; you can redistribute it and/or modify |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
8 c it under the terms of the GNU General Public License as published by |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
9 c the Free Software Foundation; either version 3 of the License, or |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
10 c (at your option) any later version. |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
11 c |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
12 c This program is distributed in the hope that it will be useful, |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
13 c but WITHOUT ANY WARRANTY; without even the implied warranty of |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
14 c MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
15 c GNU General Public License for more details. |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
16 c |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
17 c You should have received a copy of the GNU General Public License |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
18 c along with this software; see the file COPYING. If not, see |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
19 c <http://www.gnu.org/licenses/>. |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
20 c |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
21 subroutine sdot3(m,n,k,a,b,c) |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
22 c purpose: a 3-dimensional dot product. |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
23 c c = sum (a .* b, 2), where a and b are 3d arrays. |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
24 c arguments: |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
25 c m,n,k (in) the dimensions of a and b |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
26 c a,b (in) real input arrays of size (m,k,n) |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
27 c c (out) real output array, size (m,n) |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
28 integer m,n,k,i,j,l |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
29 real a(m,k,n),b(m,k,n) |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
30 real c(m,n) |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
31 |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
32 real sdot |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
33 external sdot |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
34 |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
35 c quick return if possible. |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
36 if (m <= 0 .or. n <= 0) return |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
37 |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
38 if (m == 1) then |
9876
21d81d06b221
cache-aligned loop for rowwise dot
Jaroslav Hajek <highegg@gmail.com>
parents:
9874
diff
changeset
|
39 c the column-major case. |
9874
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
40 do j = 1,n |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
41 c(1,j) = sdot(k,a(1,1,j),1,b(1,1,j),1) |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
42 end do |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
43 else |
9876
21d81d06b221
cache-aligned loop for rowwise dot
Jaroslav Hajek <highegg@gmail.com>
parents:
9874
diff
changeset
|
44 c We prefer performance here, because that's what we generally |
21d81d06b221
cache-aligned loop for rowwise dot
Jaroslav Hajek <highegg@gmail.com>
parents:
9874
diff
changeset
|
45 c do by default in reduction functions. Besides, the accuracy |
21d81d06b221
cache-aligned loop for rowwise dot
Jaroslav Hajek <highegg@gmail.com>
parents:
9874
diff
changeset
|
46 c of xDOT is questionable. Hence, do a cache-aligned nested loop. |
9874
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
47 do j = 1,n |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
48 do i = 1,m |
9876
21d81d06b221
cache-aligned loop for rowwise dot
Jaroslav Hajek <highegg@gmail.com>
parents:
9874
diff
changeset
|
49 c(i,j) = 0d0 |
9874
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
50 end do |
9876
21d81d06b221
cache-aligned loop for rowwise dot
Jaroslav Hajek <highegg@gmail.com>
parents:
9874
diff
changeset
|
51 do l = 1,k |
9874
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
52 do i = 1,m |
9876
21d81d06b221
cache-aligned loop for rowwise dot
Jaroslav Hajek <highegg@gmail.com>
parents:
9874
diff
changeset
|
53 c(i,j) = c(i,j) + a(i,l,j)*b(i,l,j) |
9874
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
54 end do |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
55 end do |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
56 end do |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
57 end if |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
58 |
90bc0cc4518f
implement compiled dot and blkmm
Jaroslav Hajek <highegg@gmail.com>
parents:
diff
changeset
|
59 end subroutine |