comparison libinterp/corefcn/oct-stream.cc @ 17453:669ad11f282d

improve efficiency of fread * oct-stream.cc, oct-stream.h (octave_stream::finalize_read, convert_and_copy): New functions. (TABLE_ELT): New macro. (FILL_TABLE_ROW, conv_fptr): Update to new conversion function. (do_read): Delete function. (octave_stream::read): Read data in bigger chunks. Convert format once all data has been read.
author John W. Eaton <jwe@octave.org>
date Thu, 19 Sep 2013 11:20:28 -0400
parents 6690dba6078a
children cc13924a4266
comparison
equal deleted inserted replaced
17452:68f9b3bf0840 17453:669ad11f282d
2956 { 2956 {
2957 if (stream_ok ()) 2957 if (stream_ok ())
2958 rep->close (); 2958 rep->close ();
2959 } 2959 }
2960 2960
2961 template <class RET_T, class READ_T> 2961 template <class SRC_T, class DST_T>
2962 static octave_value
2963 convert_and_copy (std::list<void *>& input_buf_list,
2964 octave_idx_type input_buf_elts,
2965 octave_idx_type elts_read,
2966 octave_idx_type nr, octave_idx_type nc, bool swap,
2967 bool do_float_fmt_conv, bool do_NA_conv,
2968 oct_mach_info::float_format from_flt_fmt)
2969 {
2970 typedef typename DST_T::element_type dst_elt_type;
2971
2972 DST_T conv (dim_vector (nr, nc));
2973
2974 dst_elt_type *conv_data = conv.fortran_vec ();
2975
2976 octave_idx_type j = 0;
2977
2978 for (std::list<void *>::const_iterator it = input_buf_list.begin ();
2979 it != input_buf_list.end (); it++)
2980 {
2981 SRC_T *data = static_cast<SRC_T *> (*it);
2982
2983 for (octave_idx_type i = 0; i < input_buf_elts && j < elts_read; i++, j++)
2984 {
2985 if (swap)
2986 swap_bytes<sizeof (SRC_T)> (&data[i]);
2987 else if (do_float_fmt_conv)
2988 do_float_format_conversion (&data[i], sizeof (SRC_T),
2989 1, from_flt_fmt,
2990 oct_mach_info::float_format ());
2991
2992 dst_elt_type tmp (data[i]);
2993
2994 if (do_NA_conv && __lo_ieee_is_old_NA (tmp))
2995 tmp = __lo_ieee_replace_old_NA (tmp);
2996
2997 conv_data[j] = tmp;
2998 }
2999
3000 delete [] data;
3001 }
3002
3003 input_buf_list.clear ();
3004
3005 for (octave_idx_type i = elts_read; i < nr * nc; i++)
3006 conv_data[i] = dst_elt_type (0);
3007
3008 return conv;
3009 }
3010
3011 typedef octave_value (*conv_fptr)
3012 (std::list<void *>& input_buf_list, octave_idx_type input_buf_elts,
3013 octave_idx_type elts_read, octave_idx_type nr, octave_idx_type nc,
3014 bool swap, bool do_float_fmt_conv, bool do_NA_conv,
3015 oct_mach_info::float_format from_flt_fmt);
3016
3017 #define TABLE_ELT(T, U, V, W) \
3018 conv_fptr_table[oct_data_conv::T][oct_data_conv::U] = convert_and_copy<V, W>
3019
3020 #undef FILL_TABLE_ROW
3021 #define FILL_TABLE_ROW(T, V) \
3022 TABLE_ELT (T, dt_int8, V, int8NDArray); \
3023 TABLE_ELT (T, dt_uint8, V, uint8NDArray); \
3024 TABLE_ELT (T, dt_int16, V, int16NDArray); \
3025 TABLE_ELT (T, dt_uint16, V, uint16NDArray); \
3026 TABLE_ELT (T, dt_int32, V, int32NDArray); \
3027 TABLE_ELT (T, dt_uint32, V, uint32NDArray); \
3028 TABLE_ELT (T, dt_int64, V, int64NDArray); \
3029 TABLE_ELT (T, dt_uint64, V, uint64NDArray); \
3030 TABLE_ELT (T, dt_single, V, FloatNDArray); \
3031 TABLE_ELT (T, dt_double, V, NDArray); \
3032 TABLE_ELT (T, dt_char, V, charNDArray); \
3033 TABLE_ELT (T, dt_schar, V, charNDArray); \
3034 TABLE_ELT (T, dt_uchar, V, charNDArray); \
3035 TABLE_ELT (T, dt_logical, V, boolNDArray);
3036
2962 octave_value 3037 octave_value
2963 do_read (octave_stream& strm, octave_idx_type nr, octave_idx_type nc, octave_idx_type block_size, 3038 octave_stream::finalize_read (std::list<void *>& input_buf_list,
2964 octave_idx_type skip, bool do_float_fmt_conv, bool do_NA_conv, 3039 octave_idx_type input_buf_elts,
2965 oct_mach_info::float_format from_flt_fmt, octave_idx_type& count) 3040 octave_idx_type elts_read,
3041 octave_idx_type nr, octave_idx_type nc,
3042 oct_data_conv::data_type input_type,
3043 oct_data_conv::data_type output_type,
3044 oct_mach_info::float_format ffmt)
2966 { 3045 {
2967 octave_value retval; 3046 octave_value retval;
2968 3047
2969 RET_T nda; 3048 static bool initialized = false;
2970 3049
2971 count = 0; 3050 // Table function pointers for return types x read types.
2972 3051
2973 typedef typename RET_T::element_type ELMT; 3052 static conv_fptr conv_fptr_table[oct_data_conv::dt_unknown][14];
2974 ELMT elt_zero = ELMT (); 3053
2975 3054 if (! initialized)
2976 ELMT *dat = 0; 3055 {
2977 3056 for (int i = 0; i < oct_data_conv::dt_unknown; i++)
2978 octave_idx_type max_size = 0; 3057 for (int j = 0; j < 14; j++)
2979 3058 conv_fptr_table[i][j] = 0;
2980 octave_idx_type final_nr = 0; 3059
2981 octave_idx_type final_nc = 1; 3060 FILL_TABLE_ROW (dt_int8, int8_t);
2982 3061 FILL_TABLE_ROW (dt_uint8, uint8_t);
2983 if (nr > 0) 3062 FILL_TABLE_ROW (dt_int16, int16_t);
2984 { 3063 FILL_TABLE_ROW (dt_uint16, uint16_t);
2985 if (nc > 0) 3064 FILL_TABLE_ROW (dt_int32, int32_t);
2986 { 3065 FILL_TABLE_ROW (dt_uint32, uint32_t);
2987 nda.resize (dim_vector (nr, nc), elt_zero); 3066 FILL_TABLE_ROW (dt_int64, int64_t);
2988 dat = nda.fortran_vec (); 3067 FILL_TABLE_ROW (dt_uint64, uint64_t);
2989 max_size = nr * nc; 3068 FILL_TABLE_ROW (dt_single, float);
2990 } 3069 FILL_TABLE_ROW (dt_double, double);
2991 else 3070 FILL_TABLE_ROW (dt_char, char);
2992 { 3071 FILL_TABLE_ROW (dt_schar, signed char);
2993 nda.resize (dim_vector (nr, 32), elt_zero); 3072 FILL_TABLE_ROW (dt_uchar, unsigned char);
2994 dat = nda.fortran_vec (); 3073 FILL_TABLE_ROW (dt_logical, bool);
2995 max_size = nr * 32; 3074
2996 } 3075 initialized = true;
2997 } 3076 }
3077
3078 bool swap = false;
3079
3080 if (ffmt == oct_mach_info::flt_fmt_unknown)
3081 ffmt = float_format ();
3082
3083 if (oct_mach_info::words_big_endian ())
3084 swap = (ffmt == oct_mach_info::flt_fmt_ieee_little_endian);
2998 else 3085 else
2999 { 3086 swap = (ffmt == oct_mach_info::flt_fmt_ieee_big_endian);
3000 nda.resize (dim_vector (32, 1), elt_zero); 3087
3001 dat = nda.fortran_vec (); 3088 bool do_float_fmt_conv = ((input_type == oct_data_conv::dt_double
3002 max_size = 32; 3089 || input_type == oct_data_conv::dt_single)
3003 } 3090 && ffmt != float_format ());
3004 3091
3005 bool swap = false; 3092 bool do_NA_conv = (output_type == oct_data_conv::dt_double);
3006 3093
3007 if (oct_mach_info::words_big_endian ()) 3094 switch (output_type)
3008 swap = (from_flt_fmt == oct_mach_info::flt_fmt_ieee_little_endian); 3095 {
3009 else 3096 case oct_data_conv::dt_int8:
3010 swap = (from_flt_fmt == oct_mach_info::flt_fmt_ieee_big_endian); 3097 case oct_data_conv::dt_uint8:
3011 3098 case oct_data_conv::dt_int16:
3012 union 3099 case oct_data_conv::dt_uint16:
3013 { 3100 case oct_data_conv::dt_int32:
3014 char buf[sizeof (typename strip_template_param<octave_int, READ_T>::type)]; 3101 case oct_data_conv::dt_uint32:
3015 typename strip_template_param<octave_int, READ_T>::type val; 3102 case oct_data_conv::dt_int64:
3016 } u; 3103 case oct_data_conv::dt_uint64:
3017 3104 case oct_data_conv::dt_single:
3018 std::istream *isp = strm.input_stream (); 3105 case oct_data_conv::dt_double:
3019 3106 case oct_data_conv::dt_char:
3020 if (isp) 3107 case oct_data_conv::dt_schar:
3021 { 3108 case oct_data_conv::dt_uchar:
3022 std::istream& is = *isp; 3109 case oct_data_conv::dt_logical:
3023 3110 {
3024 octave_idx_type elts_read = 0; 3111 conv_fptr fptr = conv_fptr_table[input_type][output_type];
3025 3112
3026 for (;;) 3113 retval = fptr (input_buf_list, input_buf_elts, elts_read,
3027 { 3114 nr, nc, swap, do_float_fmt_conv, do_NA_conv, ffmt);
3028 // FIXME -- maybe there should be a special case for 3115 }
3029 // skip == 0. 3116 break;
3030 3117
3031 if (is) 3118 default:
3032 { 3119 retval = false;
3033 if (nr > 0 && nc > 0 && count == max_size) 3120 (*current_liboctave_error_handler)
3034 { 3121 ("read: invalid type specification");
3035 final_nr = nr; 3122 break;
3036 final_nc = nc; 3123 }
3037 3124
3038 break; 3125
3039 } 3126 return retval;
3040 3127 }
3041 is.read (u.buf, sizeof (typename strip_template_param<octave_int, READ_T>::type));
3042
3043 // We only swap bytes for integer types. For float
3044 // types, the format conversion will also handle byte
3045 // swapping.
3046
3047 if (swap)
3048 swap_bytes<sizeof (typename strip_template_param<octave_int, READ_T>::type)> (u.buf);
3049 else if (do_float_fmt_conv)
3050 do_float_format_conversion
3051 (u.buf,
3052 sizeof (typename strip_template_param<octave_int, READ_T>::type),
3053 1, from_flt_fmt, oct_mach_info::float_format ());
3054
3055 typename RET_T::element_type tmp
3056 = static_cast <typename RET_T::element_type> (u.val);
3057
3058 if (is)
3059 {
3060 if (count == max_size)
3061 {
3062 max_size *= 2;
3063
3064 if (nr > 0)
3065 nda.resize (dim_vector (nr, max_size / nr),
3066 elt_zero);
3067 else
3068 nda.resize (dim_vector (max_size, 1), elt_zero);
3069
3070 dat = nda.fortran_vec ();
3071 }
3072
3073 if (do_NA_conv && __lo_ieee_is_old_NA (tmp))
3074 tmp = __lo_ieee_replace_old_NA (tmp);
3075
3076 dat[count++] = tmp;
3077
3078 elts_read++;
3079 }
3080
3081 int seek_status = 0;
3082
3083 if (skip != 0 && elts_read == block_size)
3084 {
3085 seek_status = strm.seek (skip, SEEK_CUR);
3086 elts_read = 0;
3087 }
3088
3089 if (is.eof () || seek_status < 0)
3090 {
3091 if (nr > 0)
3092 {
3093 if (count > nr)
3094 {
3095 final_nr = nr;
3096 final_nc = (count - 1) / nr + 1;
3097 }
3098 else
3099 {
3100 final_nr = count;
3101 final_nc = 1;
3102 }
3103 }
3104 else
3105 {
3106 final_nr = count;
3107 final_nc = 1;
3108 }
3109
3110 break;
3111 }
3112 }
3113 else if (is.eof ())
3114 break;
3115 }
3116 }
3117
3118 nda.resize (dim_vector (final_nr, final_nc), elt_zero);
3119
3120 retval = nda;
3121
3122 return retval;
3123 }
3124
3125 typedef octave_value (*read_fptr) (octave_stream&, octave_idx_type,
3126 octave_idx_type, octave_idx_type,
3127 octave_idx_type, bool, bool,
3128 oct_mach_info::float_format ffmt,
3129 octave_idx_type&);
3130
3131 #define FILL_TABLE_ROW(R, VAL_T) \
3132 read_fptr_table[R][oct_data_conv::dt_int8] = do_read<VAL_T, octave_int8>; \
3133 read_fptr_table[R][oct_data_conv::dt_uint8] = do_read<VAL_T, octave_uint8>; \
3134 read_fptr_table[R][oct_data_conv::dt_int16] = do_read<VAL_T, octave_int16>; \
3135 read_fptr_table[R][oct_data_conv::dt_uint16] = do_read<VAL_T, octave_uint16>; \
3136 read_fptr_table[R][oct_data_conv::dt_int32] = do_read<VAL_T, octave_int32>; \
3137 read_fptr_table[R][oct_data_conv::dt_uint32] = do_read<VAL_T, octave_uint32>; \
3138 read_fptr_table[R][oct_data_conv::dt_int64] = do_read<VAL_T, octave_int64>; \
3139 read_fptr_table[R][oct_data_conv::dt_uint64] = do_read<VAL_T, octave_uint64>; \
3140 read_fptr_table[R][oct_data_conv::dt_single] = do_read<VAL_T, float>; \
3141 read_fptr_table[R][oct_data_conv::dt_double] = do_read<VAL_T, double>; \
3142 read_fptr_table[R][oct_data_conv::dt_char] = do_read<VAL_T, char>; \
3143 read_fptr_table[R][oct_data_conv::dt_schar] = do_read<VAL_T, signed char>; \
3144 read_fptr_table[R][oct_data_conv::dt_uchar] = do_read<VAL_T, unsigned char>; \
3145 read_fptr_table[R][oct_data_conv::dt_logical] = do_read<VAL_T, unsigned char>
3146 3128
3147 octave_value 3129 octave_value
3148 octave_stream::read (const Array<double>& size, octave_idx_type block_size, 3130 octave_stream::read (const Array<double>& size, octave_idx_type block_size,
3149 oct_data_conv::data_type input_type, 3131 oct_data_conv::data_type input_type,
3150 oct_data_conv::data_type output_type, 3132 oct_data_conv::data_type output_type,
3151 octave_idx_type skip, oct_mach_info::float_format ffmt, 3133 octave_idx_type skip, oct_mach_info::float_format ffmt,
3152 octave_idx_type& char_count) 3134 octave_idx_type& char_count)
3153 { 3135 {
3154 static bool initialized = false;
3155
3156 // Table function pointers for return types x read types.
3157
3158 static read_fptr read_fptr_table[oct_data_conv::dt_unknown][14];
3159
3160 if (! initialized)
3161 {
3162 for (int i = 0; i < oct_data_conv::dt_unknown; i++)
3163 for (int j = 0; j < 14; j++)
3164 read_fptr_table[i][j] = 0;
3165
3166 FILL_TABLE_ROW (oct_data_conv::dt_int8, int8NDArray);
3167 FILL_TABLE_ROW (oct_data_conv::dt_uint8, uint8NDArray);
3168 FILL_TABLE_ROW (oct_data_conv::dt_int16, int16NDArray);
3169 FILL_TABLE_ROW (oct_data_conv::dt_uint16, uint16NDArray);
3170 FILL_TABLE_ROW (oct_data_conv::dt_int32, int32NDArray);
3171 FILL_TABLE_ROW (oct_data_conv::dt_uint32, uint32NDArray);
3172 FILL_TABLE_ROW (oct_data_conv::dt_int64, int64NDArray);
3173 FILL_TABLE_ROW (oct_data_conv::dt_uint64, uint64NDArray);
3174 FILL_TABLE_ROW (oct_data_conv::dt_single, FloatNDArray);
3175 FILL_TABLE_ROW (oct_data_conv::dt_double, NDArray);
3176 FILL_TABLE_ROW (oct_data_conv::dt_char, charNDArray);
3177 FILL_TABLE_ROW (oct_data_conv::dt_schar, charNDArray);
3178 FILL_TABLE_ROW (oct_data_conv::dt_uchar, charNDArray);
3179 FILL_TABLE_ROW (oct_data_conv::dt_logical, boolNDArray);
3180
3181 initialized = true;
3182 }
3183
3184 octave_value retval; 3136 octave_value retval;
3137
3138 octave_idx_type nr = -1;
3139 octave_idx_type nc = -1;
3140
3141 bool one_elt_size_spec = false;
3185 3142
3186 if (stream_ok ()) 3143 if (stream_ok ())
3187 { 3144 {
3188 // FIXME -- we may eventually want to make this extensible. 3145 // FIXME -- we may eventually want to make this extensible.
3189 3146
3191 // numbering stays consistent with the order of the elements in the 3148 // numbering stays consistent with the order of the elements in the
3192 // data_type enum in the oct_data_conv class. 3149 // data_type enum in the oct_data_conv class.
3193 3150
3194 char_count = 0; 3151 char_count = 0;
3195 3152
3196 octave_idx_type nr = -1; 3153 get_size (size, nr, nc, one_elt_size_spec, "fread");
3197 octave_idx_type nc = -1;
3198
3199 bool ignore;
3200
3201 get_size (size, nr, nc, ignore, "fread");
3202 3154
3203 if (! error_state) 3155 if (! error_state)
3204 { 3156 {
3205 if (nr == 0 || nc == 0) 3157
3206 retval = Matrix (nr, nc); 3158 octave_idx_type elts_to_read = std::numeric_limits<octave_idx_type>::max ();
3159
3160 if (one_elt_size_spec)
3161 {
3162 // If NR == 0, Matlab returns [](0x0).
3163
3164 // If NR > 0, the result will be a column vector with the given
3165 // number of rows.
3166
3167 // If NR < 0, then we have Inf and the result will be a column
3168 // vector but we have to wait to see how big NR will be.
3169
3170 if (nr == 0)
3171 nr = nc = 0;
3172 else
3173 nc = 1;
3174 }
3207 else 3175 else
3208 { 3176 {
3209 if (ffmt == oct_mach_info::flt_fmt_unknown) 3177 // Matlab returns [] even if there are two elements in the size
3210 ffmt = float_format (); 3178 // specification and one is nonzero.
3211 3179
3212 read_fptr fcn = read_fptr_table[output_type][input_type]; 3180 // If NC < 0 we have [NR, Inf] and we'll wait to decide how big NC
3213 3181 // should be.
3214 bool do_float_fmt_conv = ((input_type == oct_data_conv::dt_double 3182
3215 || input_type == oct_data_conv::dt_single) 3183 if (nr == 0 || nc == 0)
3216 && ffmt != float_format ()); 3184 nr = nc = 0;
3217 3185 }
3218 bool do_NA_conv = (output_type == oct_data_conv::dt_double); 3186
3219 3187 // FIXME -- ensure that this does not overflow.
3220 if (fcn) 3188
3189 elts_to_read = nr * nc;
3190
3191 bool read_to_eof = elts_to_read < 0;
3192
3193 octave_idx_type input_buf_elts = -1;
3194
3195 if (skip == 0)
3196 {
3197 if (read_to_eof)
3198 input_buf_elts = 1024 * 1024;
3199 else
3200 input_buf_elts = elts_to_read;
3201 }
3202 else
3203 input_buf_elts = block_size;
3204
3205 octave_idx_type input_elt_size = oct_data_conv::data_type_size (input_type);
3206
3207 octave_idx_type input_buf_size = input_buf_elts * input_elt_size;
3208
3209 assert (input_buf_size >= 0);
3210
3211 // Must also work and return correct type object for 0 elements to read.
3212
3213 std::istream *isp = input_stream ();
3214
3215 if (isp)
3216 {
3217 std::istream& is = *isp;
3218
3219 std::list <void *> input_buf_list;
3220
3221 octave_idx_type elts_read = 0;
3222
3223 while (is && ! is.eof () && (read_to_eof || elts_read < elts_to_read))
3221 { 3224 {
3222 retval = (*fcn) (*this, nr, nc, block_size, skip, 3225 char *input_buf = new char [input_buf_size];
3223 do_float_fmt_conv, do_NA_conv, 3226
3224 ffmt, char_count); 3227 is.read (input_buf, input_buf_size);
3225 3228
3226 // FIXME -- kluge! 3229 size_t count = is.gcount ();
3227 3230
3228 if (! error_state 3231 char_count += count;
3229 && (output_type == oct_data_conv::dt_char 3232
3230 || output_type == oct_data_conv::dt_schar 3233 elts_read += count / input_elt_size;
3231 || output_type == oct_data_conv::dt_uchar)) 3234
3232 retval = retval.char_matrix_value (); 3235 input_buf_list.push_back (input_buf);
3236
3237 if (is && skip != 0 && elts_read == block_size)
3238 {
3239 int seek_status = seek (skip, SEEK_CUR);
3240
3241 if (seek_status < 0)
3242 break;
3243 }
3233 } 3244 }
3234 else 3245
3235 error ("fread: unable to read and convert requested types"); 3246 if (read_to_eof)
3247 {
3248 if (nc < 0)
3249 nc = elts_read / nr + 1;
3250 else
3251 nr = elts_read;
3252 }
3253
3254 retval = finalize_read (input_buf_list, input_buf_elts, elts_read,
3255 nr, nc, input_type, output_type, ffmt);
3236 } 3256 }
3257 else
3258 error ("fread: invalid input stream");
3237 } 3259 }
3238 else 3260 else
3239 invalid_operation ("fread", "reading"); 3261 invalid_operation ("fread", "reading");
3240 } 3262 }
3241 3263