Mercurial > octave
comparison liboctave/Array-f.cc @ 8700:314be237cd5b
sorting optimizations
author | Jaroslav Hajek <highegg@gmail.com> |
---|---|
date | Mon, 09 Feb 2009 13:05:35 +0100 |
parents | ea8e65ca234f |
children | 739141cde75a |
comparison
equal
deleted
inserted
replaced
8699:6e764b7317bd | 8700:314be237cd5b |
---|---|
25 #include <config.h> | 25 #include <config.h> |
26 #endif | 26 #endif |
27 | 27 |
28 // Instantiate Arrays of float values. | 28 // Instantiate Arrays of float values. |
29 | 29 |
30 #include "lo-ieee.h" | |
30 #include "Array.h" | 31 #include "Array.h" |
31 #include "Array.cc" | 32 #include "Array.cc" |
32 #include "oct-sort.cc" | |
33 #include "oct-locbuf.h" | 33 #include "oct-locbuf.h" |
34 | 34 |
35 #if defined (HAVE_IEEE754_DATA_FORMAT) | 35 #define INLINE_ASCENDING_SORT |
36 #define INLINE_DESCENDING_SORT | |
37 #include "oct-sort.cc" | |
36 | 38 |
37 static inline uint32_t | 39 template <> |
38 FloatFlip (uint32_t f) | 40 inline bool _sort_isnan (float x) |
39 { | 41 { |
40 uint32_t mask | 42 return lo_ieee_isnan (x); |
41 = -static_cast<int32_t>(f >> 31) | 0x80000000UL; | |
42 | |
43 return f ^ mask; | |
44 } | 43 } |
45 | 44 |
46 static inline uint32_t | 45 INSTANTIATE_ARRAY_SORT (double); |
47 IFloatFlip (uint32_t f) | |
48 { | |
49 uint32_t mask = ((f >> 31) - 1) | 0x80000000UL; | |
50 | |
51 return f ^ mask; | |
52 } | |
53 | |
54 template <> | |
55 bool | |
56 ascending_compare (float a, float b) | |
57 { | |
58 return (xisnan (b) || (a < b)); | |
59 } | |
60 | |
61 template <> | |
62 bool | |
63 ascending_compare (const float *a, const float *b) | |
64 { | |
65 return (xisnan (*b) || (*a < *b)); | |
66 } | |
67 | |
68 template <> | |
69 bool | |
70 descending_compare (float a, float b) | |
71 { | |
72 return (xisnan (a) || (a > b)); | |
73 } | |
74 | |
75 template <> | |
76 bool | |
77 descending_compare (const float *a, const float *b) | |
78 { | |
79 return (xisnan (*b) || (*a > *b)); | |
80 } | |
81 | |
82 INSTANTIATE_ARRAY_SORT (uint32_t); | |
83 | |
84 template <> | |
85 Array<float> | |
86 Array<float>::sort (octave_idx_type dim, sortmode mode) const | |
87 { | |
88 Array<float> m (dims ()); | |
89 | |
90 dim_vector dv = m.dims (); | |
91 | |
92 if (m.length () < 1) | |
93 return m; | |
94 | |
95 octave_idx_type ns = dv(dim); | |
96 octave_idx_type iter = dv.numel () / ns; | |
97 octave_idx_type stride = 1; | |
98 for (int i = 0; i < dim; i++) | |
99 stride *= dv(i); | |
100 | |
101 float *v = m.fortran_vec (); | |
102 const float *ov = data (); | |
103 | |
104 uint32_t *p = reinterpret_cast<uint32_t *> (v); | |
105 const uint32_t *op = reinterpret_cast<const uint32_t *> (ov); | |
106 | |
107 octave_sort<uint32_t> lsort; | |
108 | |
109 if (mode == ASCENDING) | |
110 lsort.set_compare (ascending_compare); | |
111 else if (mode == DESCENDING) | |
112 lsort.set_compare (descending_compare); | |
113 else | |
114 abort (); | |
115 | |
116 if (stride == 1) | |
117 { | |
118 for (octave_idx_type j = 0; j < iter; j++) | |
119 { | |
120 // Flip the data in the vector so that int compares on | |
121 // IEEE754 give the correct ordering. | |
122 | |
123 for (octave_idx_type i = 0; i < ns; i++) | |
124 p[i] = FloatFlip (op[i]); | |
125 | |
126 lsort.sort (p, ns); | |
127 | |
128 // Flip the data out of the vector so that int compares | |
129 // on IEEE754 give the correct ordering. | |
130 | |
131 for (octave_idx_type i = 0; i < ns; i++) | |
132 p[i] = IFloatFlip (p[i]); | |
133 | |
134 // There are two representations of NaN. One will be | |
135 // sorted to the beginning of the vector and the other | |
136 // to the end. If it will be sorted incorrectly, fix | |
137 // things up. | |
138 | |
139 if (lo_ieee_signbit (octave_Float_NaN)) | |
140 { | |
141 if (mode == ASCENDING) | |
142 { | |
143 octave_idx_type i = 0; | |
144 float *vtmp = reinterpret_cast<float *> (p); | |
145 while (xisnan (vtmp[i++]) && i < ns) | |
146 /* do nothing */; | |
147 for (octave_idx_type l = 0; l < ns - i + 1; l++) | |
148 vtmp[l] = vtmp[l+i-1]; | |
149 for (octave_idx_type l = ns - i + 1; l < ns; l++) | |
150 vtmp[l] = octave_Float_NaN; | |
151 } | |
152 else | |
153 { | |
154 octave_idx_type i = ns; | |
155 float *vtmp = reinterpret_cast<float *> (p); | |
156 while (xisnan (vtmp[--i]) && i > 0) | |
157 /* do nothing */; | |
158 for (octave_idx_type l = i; l >= 0; l--) | |
159 vtmp[l-i+ns-1] = vtmp[l]; | |
160 for (octave_idx_type l = 0; l < ns - i - 1; l++) | |
161 vtmp[l] = octave_Float_NaN; | |
162 } | |
163 } | |
164 | |
165 p += ns; | |
166 op += ns; | |
167 } | |
168 } | |
169 else | |
170 { | |
171 OCTAVE_LOCAL_BUFFER (uint32_t, vi, ns); | |
172 | |
173 for (octave_idx_type j = 0; j < iter; j++) | |
174 { | |
175 octave_idx_type offset = j; | |
176 octave_idx_type offset2 = 0; | |
177 while (offset >= stride) | |
178 { | |
179 offset -= stride; | |
180 offset2++; | |
181 } | |
182 offset += offset2 * stride * ns; | |
183 | |
184 // Flip the data in the vector so that int compares on | |
185 // IEEE754 give the correct ordering. | |
186 | |
187 for (octave_idx_type i = 0; i < ns; i++) | |
188 vi[i] = FloatFlip (op[i*stride + offset]); | |
189 | |
190 lsort.sort (vi, ns); | |
191 | |
192 // Flip the data out of the vector so that int compares | |
193 // on IEEE754 give the correct ordering. | |
194 | |
195 for (octave_idx_type i = 0; i < ns; i++) | |
196 p[i*stride + offset] = IFloatFlip (vi[i]); | |
197 | |
198 // There are two representations of NaN. One will be | |
199 // sorted to the beginning of the vector and the other | |
200 // to the end. If it will be sorted to the beginning, | |
201 // fix things up. | |
202 | |
203 if (lo_ieee_signbit (octave_Float_NaN)) | |
204 { | |
205 if (mode == ASCENDING) | |
206 { | |
207 octave_idx_type i = 0; | |
208 while (xisnan (v[i++*stride + offset]) && i < ns) | |
209 /* do nothing */; | |
210 for (octave_idx_type l = 0; l < ns - i + 1; l++) | |
211 v[l*stride + offset] = v[(l+i-1)*stride + offset]; | |
212 for (octave_idx_type l = ns - i + 1; l < ns; l++) | |
213 v[l*stride + offset] = octave_Float_NaN; | |
214 } | |
215 else | |
216 { | |
217 octave_idx_type i = ns; | |
218 while (xisnan (v[--i*stride + offset]) && i > 0) | |
219 /* do nothing */; | |
220 for (octave_idx_type l = i; l >= 0; l--) | |
221 v[(l-i+ns-1)*stride + offset] = v[l*stride + offset]; | |
222 for (octave_idx_type l = 0; l < ns - i - 1; l++) | |
223 v[l*stride + offset] = octave_Float_NaN; | |
224 } | |
225 } | |
226 } | |
227 } | |
228 | |
229 return m; | |
230 } | |
231 | |
232 template <> | |
233 Array<float> | |
234 Array<float>::sort (Array<octave_idx_type> &sidx, octave_idx_type dim, | |
235 sortmode mode) const | |
236 { | |
237 Array<float> m (dims ()); | |
238 | |
239 dim_vector dv = m.dims (); | |
240 | |
241 if (m.length () < 1) | |
242 { | |
243 sidx = Array<octave_idx_type> (dv); | |
244 return m; | |
245 } | |
246 | |
247 octave_idx_type ns = dv(dim); | |
248 octave_idx_type iter = dv.numel () / ns; | |
249 octave_idx_type stride = 1; | |
250 for (int i = 0; i < dim; i++) | |
251 stride *= dv(i); | |
252 | |
253 float *v = m.fortran_vec (); | |
254 const float *ov = data (); | |
255 | |
256 uint32_t *p = reinterpret_cast<uint32_t *> (v); | |
257 const uint32_t *op = reinterpret_cast<const uint32_t *> (ov); | |
258 | |
259 octave_sort<const uint32_t *> indexed_sort; | |
260 | |
261 if (mode == ASCENDING) | |
262 indexed_sort.set_compare (ascending_compare); | |
263 else if (mode == DESCENDING) | |
264 indexed_sort.set_compare (descending_compare); | |
265 else | |
266 abort (); | |
267 | |
268 OCTAVE_LOCAL_BUFFER (const uint32_t *, vi, ns); | |
269 OCTAVE_LOCAL_BUFFER (uint32_t, vix, ns); | |
270 | |
271 sidx = Array<octave_idx_type> (dv); | |
272 | |
273 for (octave_idx_type j = 0; j < iter; j++) | |
274 { | |
275 octave_idx_type offset = j; | |
276 octave_idx_type offset2 = 0; | |
277 while (offset >= stride) | |
278 { | |
279 offset -= stride; | |
280 offset2++; | |
281 } | |
282 offset += offset2 * stride * ns; | |
283 | |
284 // Flip the data in the vector so that int compares on | |
285 // IEEE754 give the correct ordering. | |
286 | |
287 for (octave_idx_type i = 0; i < ns; i++) | |
288 { | |
289 vix[i] = FloatFlip (op[i*stride + offset]); | |
290 vi[i] = vix + i; | |
291 } | |
292 | |
293 indexed_sort.sort (vi, ns); | |
294 | |
295 // Flip the data out of the vector so that int compares on | |
296 // IEEE754 give the correct ordering | |
297 | |
298 for (octave_idx_type i = 0; i < ns; i++) | |
299 { | |
300 p[i*stride + offset] = IFloatFlip (*vi[i]); | |
301 sidx(i*stride + offset) = vi[i] - vix; | |
302 } | |
303 | |
304 // There are two representations of NaN. One will be sorted | |
305 // to the beginning of the vector and the other to the end. | |
306 // If it will be sorted to the beginning, fix things up. | |
307 | |
308 if (lo_ieee_signbit (octave_Float_NaN)) | |
309 { | |
310 if (mode == ASCENDING) | |
311 { | |
312 octave_idx_type i = 0; | |
313 while (xisnan (v[i++*stride+offset]) && i < ns) | |
314 /* do nothing */; | |
315 OCTAVE_LOCAL_BUFFER (float, itmp, i - 1); | |
316 for (octave_idx_type l = 0; l < i -1; l++) | |
317 itmp[l] = sidx(l*stride + offset); | |
318 for (octave_idx_type l = 0; l < ns - i + 1; l++) | |
319 { | |
320 v[l*stride + offset] = v[(l+i-1)*stride + offset]; | |
321 sidx(l*stride + offset) = sidx((l+i-1)*stride + offset); | |
322 } | |
323 for (octave_idx_type k = 0, l = ns - i + 1; l < ns; l++, k++) | |
324 { | |
325 v[l*stride + offset] = octave_Float_NaN; | |
326 sidx(l*stride + offset) = | |
327 static_cast<octave_idx_type>(itmp[k]); | |
328 } | |
329 } | |
330 else | |
331 { | |
332 octave_idx_type i = ns; | |
333 while (xisnan (v[--i*stride+offset]) && i > 0) | |
334 /* do nothing */; | |
335 OCTAVE_LOCAL_BUFFER (float, itmp, ns - i - 1); | |
336 for (octave_idx_type l = 0; l < ns - i -1; l++) | |
337 itmp[l] = sidx((l+i+1)*stride + offset); | |
338 for (octave_idx_type l = i; l >= 0; l--) | |
339 { | |
340 v[(l-i+ns-1)*stride + offset] = v[l*stride + offset]; | |
341 sidx((l-i+ns-1)*stride + offset) = sidx(l*stride + offset); | |
342 } | |
343 for (octave_idx_type k = 0, l = 0; l < ns - i - 1; l++, k++) | |
344 { | |
345 v[l*stride + offset] = octave_Float_NaN; | |
346 sidx(l*stride + offset) = | |
347 static_cast<octave_idx_type>(itmp[k]); | |
348 } | |
349 } | |
350 } | |
351 } | |
352 | |
353 return m; | |
354 } | |
355 | |
356 #else | |
357 | |
358 template <> | |
359 bool | |
360 ascending_compare (float a, float b) | |
361 { | |
362 return (xisnan (b) || (a < b)); | |
363 } | |
364 | |
365 template <> | |
366 bool | |
367 ascending_compare (const float *a, | |
368 const float *b) | |
369 { | |
370 return (xisnan (*b) || (*a < *b)); | |
371 } | |
372 | |
373 template <> | |
374 bool | |
375 descending_compare (float a, float b) | |
376 { | |
377 return (xisnan (a) || (a > b)); | |
378 } | |
379 | |
380 template <> | |
381 bool | |
382 descending_compare (const float *a, | |
383 const float *b) | |
384 { | |
385 return (xisnan (*b) || (*a > *b)); | |
386 } | |
387 | |
388 INSTANTIATE_ARRAY_SORT (float); | |
389 | |
390 #endif | |
391 | 46 |
392 INSTANTIATE_ARRAY_AND_ASSIGN (float, OCTAVE_API); | 47 INSTANTIATE_ARRAY_AND_ASSIGN (float, OCTAVE_API); |
393 | 48 |
394 INSTANTIATE_ARRAY_ASSIGN (float, int, OCTAVE_API) | 49 INSTANTIATE_ARRAY_ASSIGN (float, int, OCTAVE_API) |
395 INSTANTIATE_ARRAY_ASSIGN (float, short, OCTAVE_API) | 50 INSTANTIATE_ARRAY_ASSIGN (float, short, OCTAVE_API) |