comparison liboctave/Array-f.cc @ 8700:314be237cd5b

sorting optimizations
author Jaroslav Hajek <highegg@gmail.com>
date Mon, 09 Feb 2009 13:05:35 +0100
parents ea8e65ca234f
children 739141cde75a
comparison
equal deleted inserted replaced
8699:6e764b7317bd 8700:314be237cd5b
25 #include <config.h> 25 #include <config.h>
26 #endif 26 #endif
27 27
28 // Instantiate Arrays of float values. 28 // Instantiate Arrays of float values.
29 29
30 #include "lo-ieee.h"
30 #include "Array.h" 31 #include "Array.h"
31 #include "Array.cc" 32 #include "Array.cc"
32 #include "oct-sort.cc"
33 #include "oct-locbuf.h" 33 #include "oct-locbuf.h"
34 34
35 #if defined (HAVE_IEEE754_DATA_FORMAT) 35 #define INLINE_ASCENDING_SORT
36 #define INLINE_DESCENDING_SORT
37 #include "oct-sort.cc"
36 38
37 static inline uint32_t 39 template <>
38 FloatFlip (uint32_t f) 40 inline bool _sort_isnan (float x)
39 { 41 {
40 uint32_t mask 42 return lo_ieee_isnan (x);
41 = -static_cast<int32_t>(f >> 31) | 0x80000000UL;
42
43 return f ^ mask;
44 } 43 }
45 44
46 static inline uint32_t 45 INSTANTIATE_ARRAY_SORT (double);
47 IFloatFlip (uint32_t f)
48 {
49 uint32_t mask = ((f >> 31) - 1) | 0x80000000UL;
50
51 return f ^ mask;
52 }
53
54 template <>
55 bool
56 ascending_compare (float a, float b)
57 {
58 return (xisnan (b) || (a < b));
59 }
60
61 template <>
62 bool
63 ascending_compare (const float *a, const float *b)
64 {
65 return (xisnan (*b) || (*a < *b));
66 }
67
68 template <>
69 bool
70 descending_compare (float a, float b)
71 {
72 return (xisnan (a) || (a > b));
73 }
74
75 template <>
76 bool
77 descending_compare (const float *a, const float *b)
78 {
79 return (xisnan (*b) || (*a > *b));
80 }
81
82 INSTANTIATE_ARRAY_SORT (uint32_t);
83
84 template <>
85 Array<float>
86 Array<float>::sort (octave_idx_type dim, sortmode mode) const
87 {
88 Array<float> m (dims ());
89
90 dim_vector dv = m.dims ();
91
92 if (m.length () < 1)
93 return m;
94
95 octave_idx_type ns = dv(dim);
96 octave_idx_type iter = dv.numel () / ns;
97 octave_idx_type stride = 1;
98 for (int i = 0; i < dim; i++)
99 stride *= dv(i);
100
101 float *v = m.fortran_vec ();
102 const float *ov = data ();
103
104 uint32_t *p = reinterpret_cast<uint32_t *> (v);
105 const uint32_t *op = reinterpret_cast<const uint32_t *> (ov);
106
107 octave_sort<uint32_t> lsort;
108
109 if (mode == ASCENDING)
110 lsort.set_compare (ascending_compare);
111 else if (mode == DESCENDING)
112 lsort.set_compare (descending_compare);
113 else
114 abort ();
115
116 if (stride == 1)
117 {
118 for (octave_idx_type j = 0; j < iter; j++)
119 {
120 // Flip the data in the vector so that int compares on
121 // IEEE754 give the correct ordering.
122
123 for (octave_idx_type i = 0; i < ns; i++)
124 p[i] = FloatFlip (op[i]);
125
126 lsort.sort (p, ns);
127
128 // Flip the data out of the vector so that int compares
129 // on IEEE754 give the correct ordering.
130
131 for (octave_idx_type i = 0; i < ns; i++)
132 p[i] = IFloatFlip (p[i]);
133
134 // There are two representations of NaN. One will be
135 // sorted to the beginning of the vector and the other
136 // to the end. If it will be sorted incorrectly, fix
137 // things up.
138
139 if (lo_ieee_signbit (octave_Float_NaN))
140 {
141 if (mode == ASCENDING)
142 {
143 octave_idx_type i = 0;
144 float *vtmp = reinterpret_cast<float *> (p);
145 while (xisnan (vtmp[i++]) && i < ns)
146 /* do nothing */;
147 for (octave_idx_type l = 0; l < ns - i + 1; l++)
148 vtmp[l] = vtmp[l+i-1];
149 for (octave_idx_type l = ns - i + 1; l < ns; l++)
150 vtmp[l] = octave_Float_NaN;
151 }
152 else
153 {
154 octave_idx_type i = ns;
155 float *vtmp = reinterpret_cast<float *> (p);
156 while (xisnan (vtmp[--i]) && i > 0)
157 /* do nothing */;
158 for (octave_idx_type l = i; l >= 0; l--)
159 vtmp[l-i+ns-1] = vtmp[l];
160 for (octave_idx_type l = 0; l < ns - i - 1; l++)
161 vtmp[l] = octave_Float_NaN;
162 }
163 }
164
165 p += ns;
166 op += ns;
167 }
168 }
169 else
170 {
171 OCTAVE_LOCAL_BUFFER (uint32_t, vi, ns);
172
173 for (octave_idx_type j = 0; j < iter; j++)
174 {
175 octave_idx_type offset = j;
176 octave_idx_type offset2 = 0;
177 while (offset >= stride)
178 {
179 offset -= stride;
180 offset2++;
181 }
182 offset += offset2 * stride * ns;
183
184 // Flip the data in the vector so that int compares on
185 // IEEE754 give the correct ordering.
186
187 for (octave_idx_type i = 0; i < ns; i++)
188 vi[i] = FloatFlip (op[i*stride + offset]);
189
190 lsort.sort (vi, ns);
191
192 // Flip the data out of the vector so that int compares
193 // on IEEE754 give the correct ordering.
194
195 for (octave_idx_type i = 0; i < ns; i++)
196 p[i*stride + offset] = IFloatFlip (vi[i]);
197
198 // There are two representations of NaN. One will be
199 // sorted to the beginning of the vector and the other
200 // to the end. If it will be sorted to the beginning,
201 // fix things up.
202
203 if (lo_ieee_signbit (octave_Float_NaN))
204 {
205 if (mode == ASCENDING)
206 {
207 octave_idx_type i = 0;
208 while (xisnan (v[i++*stride + offset]) && i < ns)
209 /* do nothing */;
210 for (octave_idx_type l = 0; l < ns - i + 1; l++)
211 v[l*stride + offset] = v[(l+i-1)*stride + offset];
212 for (octave_idx_type l = ns - i + 1; l < ns; l++)
213 v[l*stride + offset] = octave_Float_NaN;
214 }
215 else
216 {
217 octave_idx_type i = ns;
218 while (xisnan (v[--i*stride + offset]) && i > 0)
219 /* do nothing */;
220 for (octave_idx_type l = i; l >= 0; l--)
221 v[(l-i+ns-1)*stride + offset] = v[l*stride + offset];
222 for (octave_idx_type l = 0; l < ns - i - 1; l++)
223 v[l*stride + offset] = octave_Float_NaN;
224 }
225 }
226 }
227 }
228
229 return m;
230 }
231
232 template <>
233 Array<float>
234 Array<float>::sort (Array<octave_idx_type> &sidx, octave_idx_type dim,
235 sortmode mode) const
236 {
237 Array<float> m (dims ());
238
239 dim_vector dv = m.dims ();
240
241 if (m.length () < 1)
242 {
243 sidx = Array<octave_idx_type> (dv);
244 return m;
245 }
246
247 octave_idx_type ns = dv(dim);
248 octave_idx_type iter = dv.numel () / ns;
249 octave_idx_type stride = 1;
250 for (int i = 0; i < dim; i++)
251 stride *= dv(i);
252
253 float *v = m.fortran_vec ();
254 const float *ov = data ();
255
256 uint32_t *p = reinterpret_cast<uint32_t *> (v);
257 const uint32_t *op = reinterpret_cast<const uint32_t *> (ov);
258
259 octave_sort<const uint32_t *> indexed_sort;
260
261 if (mode == ASCENDING)
262 indexed_sort.set_compare (ascending_compare);
263 else if (mode == DESCENDING)
264 indexed_sort.set_compare (descending_compare);
265 else
266 abort ();
267
268 OCTAVE_LOCAL_BUFFER (const uint32_t *, vi, ns);
269 OCTAVE_LOCAL_BUFFER (uint32_t, vix, ns);
270
271 sidx = Array<octave_idx_type> (dv);
272
273 for (octave_idx_type j = 0; j < iter; j++)
274 {
275 octave_idx_type offset = j;
276 octave_idx_type offset2 = 0;
277 while (offset >= stride)
278 {
279 offset -= stride;
280 offset2++;
281 }
282 offset += offset2 * stride * ns;
283
284 // Flip the data in the vector so that int compares on
285 // IEEE754 give the correct ordering.
286
287 for (octave_idx_type i = 0; i < ns; i++)
288 {
289 vix[i] = FloatFlip (op[i*stride + offset]);
290 vi[i] = vix + i;
291 }
292
293 indexed_sort.sort (vi, ns);
294
295 // Flip the data out of the vector so that int compares on
296 // IEEE754 give the correct ordering
297
298 for (octave_idx_type i = 0; i < ns; i++)
299 {
300 p[i*stride + offset] = IFloatFlip (*vi[i]);
301 sidx(i*stride + offset) = vi[i] - vix;
302 }
303
304 // There are two representations of NaN. One will be sorted
305 // to the beginning of the vector and the other to the end.
306 // If it will be sorted to the beginning, fix things up.
307
308 if (lo_ieee_signbit (octave_Float_NaN))
309 {
310 if (mode == ASCENDING)
311 {
312 octave_idx_type i = 0;
313 while (xisnan (v[i++*stride+offset]) && i < ns)
314 /* do nothing */;
315 OCTAVE_LOCAL_BUFFER (float, itmp, i - 1);
316 for (octave_idx_type l = 0; l < i -1; l++)
317 itmp[l] = sidx(l*stride + offset);
318 for (octave_idx_type l = 0; l < ns - i + 1; l++)
319 {
320 v[l*stride + offset] = v[(l+i-1)*stride + offset];
321 sidx(l*stride + offset) = sidx((l+i-1)*stride + offset);
322 }
323 for (octave_idx_type k = 0, l = ns - i + 1; l < ns; l++, k++)
324 {
325 v[l*stride + offset] = octave_Float_NaN;
326 sidx(l*stride + offset) =
327 static_cast<octave_idx_type>(itmp[k]);
328 }
329 }
330 else
331 {
332 octave_idx_type i = ns;
333 while (xisnan (v[--i*stride+offset]) && i > 0)
334 /* do nothing */;
335 OCTAVE_LOCAL_BUFFER (float, itmp, ns - i - 1);
336 for (octave_idx_type l = 0; l < ns - i -1; l++)
337 itmp[l] = sidx((l+i+1)*stride + offset);
338 for (octave_idx_type l = i; l >= 0; l--)
339 {
340 v[(l-i+ns-1)*stride + offset] = v[l*stride + offset];
341 sidx((l-i+ns-1)*stride + offset) = sidx(l*stride + offset);
342 }
343 for (octave_idx_type k = 0, l = 0; l < ns - i - 1; l++, k++)
344 {
345 v[l*stride + offset] = octave_Float_NaN;
346 sidx(l*stride + offset) =
347 static_cast<octave_idx_type>(itmp[k]);
348 }
349 }
350 }
351 }
352
353 return m;
354 }
355
356 #else
357
358 template <>
359 bool
360 ascending_compare (float a, float b)
361 {
362 return (xisnan (b) || (a < b));
363 }
364
365 template <>
366 bool
367 ascending_compare (const float *a,
368 const float *b)
369 {
370 return (xisnan (*b) || (*a < *b));
371 }
372
373 template <>
374 bool
375 descending_compare (float a, float b)
376 {
377 return (xisnan (a) || (a > b));
378 }
379
380 template <>
381 bool
382 descending_compare (const float *a,
383 const float *b)
384 {
385 return (xisnan (*b) || (*a > *b));
386 }
387
388 INSTANTIATE_ARRAY_SORT (float);
389
390 #endif
391 46
392 INSTANTIATE_ARRAY_AND_ASSIGN (float, OCTAVE_API); 47 INSTANTIATE_ARRAY_AND_ASSIGN (float, OCTAVE_API);
393 48
394 INSTANTIATE_ARRAY_ASSIGN (float, int, OCTAVE_API) 49 INSTANTIATE_ARRAY_ASSIGN (float, int, OCTAVE_API)
395 INSTANTIATE_ARRAY_ASSIGN (float, short, OCTAVE_API) 50 INSTANTIATE_ARRAY_ASSIGN (float, short, OCTAVE_API)