view scripts/strings/unicode2native.m @ 30875:5d3faba0342e

doc: Ensure documentation lists output argument when it exists for all m-files. For new users of Octave it is best to show explicit calling forms in the documentation and to show a return argument when it exists. * bp-table.cc, shift.m, accumarray.m, accumdim.m, bincoeff.m, bitcmp.m, bitget.m, bitset.m, blkdiag.m, celldisp.m, cplxpair.m, dblquad.m, flip.m, fliplr.m, flipud.m, idivide.m, int2str.m, interpft.m, logspace.m, num2str.m, polyarea.m, postpad.m, prepad.m, randi.m, repmat.m, rng.m, rot90.m, rotdim.m, structfun.m, triplequad.m, uibuttongroup.m, uicontrol.m, uipanel.m, uipushtool.m, uitoggletool.m, uitoolbar.m, waitforbuttonpress.m, help.m, __additional_help_message__.m, hsv.m, im2double.m, im2frame.m, javachk.m, usejava.m, argnames.m, char.m, formula.m, inline.m, __vectorize__.m, findstr.m, flipdim.m, strmatch.m, vectorize.m, commutation_matrix.m, cond.m, cross.m, duplication_matrix.m, expm.m, orth.m, rank.m, rref.m, trace.m, vech.m, cast.m, compare_versions.m, delete.m, dir.m, fileattrib.m, grabcode.m, gunzip.m, inputname.m, license.m, list_primes.m, ls.m, mexext.m, movefile.m, namelengthmax.m, nargoutchk.m, nthargout.m, substruct.m, swapbytes.m, ver.m, verLessThan.m, what.m, fminunc.m, fsolve.m, fzero.m, optimget.m, __fdjac__.m, matlabroot.m, savepath.m, campos.m, camroll.m, camtarget.m, camup.m, camva.m, camzoom.m, clabel.m, diffuse.m, legend.m, orient.m, rticks.m, specular.m, thetaticks.m, xlim.m, xtickangle.m, xticklabels.m, xticks.m, ylim.m, ytickangle.m, yticklabels.m, yticks.m, zlim.m, ztickangle.m, zticklabels.m, zticks.m, ellipsoid.m, isocolors.m, isonormals.m, stairs.m, surfnorm.m, __actual_axis_position__.m, __pltopt__.m, close.m, graphics_toolkit.m, pan.m, print.m, printd.m, __ghostscript__.m, __gnuplot_print__.m, __opengl_print__.m, rotate3d.m, subplot.m, zoom.m, compan.m, conv.m, poly.m, polyaffine.m, polyder.m, polyint.m, polyout.m, polyreduce.m, polyvalm.m, roots.m, prefdir.m, prefsfile.m, profexplore.m, profexport.m, profshow.m, powerset.m, unique.m, arch_rnd.m, arma_rnd.m, autoreg_matrix.m, bartlett.m, blackman.m, detrend.m, durbinlevinson.m, fftconv.m, fftfilt.m, fftshift.m, fractdiff.m, hamming.m, hanning.m, hurst.m, ifftshift.m, rectangle_lw.m, rectangle_sw.m, triangle_lw.m, sinc.m, sinetone.m, sinewave.m, spectral_adf.m, spectral_xdf.m, spencer.m, ilu.m, __sprand__.m, sprand.m, sprandn.m, sprandsym.m, treelayout.m, beta.m, betainc.m, betaincinv.m, betaln.m, cosint.m, expint.m, factorial.m, gammainc.m, gammaincinv.m, lcm.m, nthroot.m, perms.m, reallog.m, realpow.m, realsqrt.m, sinint.m, hadamard.m, hankel.m, hilb.m, invhilb.m, magic.m, pascal.m, rosser.m, toeplitz.m, vander.m, wilkinson.m, center.m, corr.m, cov.m, discrete_cdf.m, discrete_inv.m, discrete_pdf.m, discrete_rnd.m, empirical_cdf.m, empirical_inv.m, empirical_pdf.m, empirical_rnd.m, kendall.m, kurtosis.m, mad.m, mean.m, meansq.m, median.m, mode.m, moment.m, range.m, ranks.m, run_count.m, skewness.m, spearman.m, statistics.m, std.m, base2dec.m, bin2dec.m, blanks.m, cstrcat.m, deblank.m, dec2base.m, dec2bin.m, dec2hex.m, hex2dec.m, index.m, regexptranslate.m, rindex.m, strcat.m, strjust.m, strtrim.m, strtrunc.m, substr.m, untabify.m, __have_feature__.m, __prog_output_assert__.m, __run_test_suite__.m, example.m, fail.m, asctime.m, calendar.m, ctime.m, date.m, etime.m: Add return arguments to @deftypefn macros where they were missing. Rename variables in functions (particularly generic "retval") to match documentation. Rename some return variables for (hopefully) better clarity (e.g., 'ax' to 'hax' to indicate it is a graphics handle to an axes object).
author Rik <rik@octave.org>
date Wed, 30 Mar 2022 20:40:27 -0700
parents 796f54d4ddbf
children 597f3ee61a48
line wrap: on
line source

########################################################################
##
## Copyright (C) 2016-2022 The Octave Project Developers
##
## See the file COPYRIGHT.md in the top-level directory of this
## distribution or <https://octave.org/copyright/>.
##
## This file is part of Octave.
##
## Octave is free software: you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <https://www.gnu.org/licenses/>.
##
########################################################################

## -*- texinfo -*-
## @deftypefn  {} {@var{native_bytes} =} unicode2native (@var{utf8_str}, @var{codepage})
## @deftypefnx {} {@var{native_bytes} =} unicode2native (@var{utf8_str})
## Convert UTF-8 string @var{utf8_str} to byte stream using @var{codepage}.
##
## The character vector @var{utf8_str} is converted to a byte stream
## @var{native_bytes} using the code page given by @var{codepage}.  The
## string @var{codepage} must be an identifier of a valid code page.
## Examples for valid code pages are @qcode{"ISO-8859-1"},
## @qcode{"Shift-JIS"}, or @qcode{"UTF-16"}.  For a list of supported code
## pages, see @url{https://www.gnu.org/software/libiconv}.  If @var{codepage}
## is omitted or empty, the system default codepage is used.
##
## If any of the characters cannot be mapped into the codepage @var{codepage},
## they are replaced with the appropriate substitution sequence for that
## codepage.
##
## @seealso{native2unicode}
## @end deftypefn

function native_bytes = unicode2native (utf8_str, codepage = "")

  if (nargin < 1)
    print_usage ();
  endif

  ## For Matlab compatibility, return empty output for empty input.
  if (isempty (utf8_str))
    native_bytes = uint8 ([]);
    return;
  endif

  if (! ischar (utf8_str) || ! isvector (utf8_str))
    error ("unicode2native: UTF8_STR must be a character vector");
  endif

  if (! (ischar (codepage) && isrow (codepage)))
    error ("unicode2native: CODEPAGE must be a string");
  endif

  native_bytes = __unicode2native__ (utf8_str, codepage);

  if (iscolumn (utf8_str))
    native_bytes = native_bytes.';
  endif

endfunction


%!testif HAVE_ICONV
%! assert (unicode2native ("ЄЅІЇЈЉЊ", "ISO-8859-5"), uint8 (164:170));
%!testif HAVE_ICONV
%! assert (unicode2native (["ЄЅІ" "\0" "ЇЈЉЊ"], "ISO-8859-5"),
%!         uint8 ([164:166 0 167:170]));
%!assert <*60480> (unicode2native (''), uint8 ([]))

%!error <Invalid call> unicode2native ()
%!error <called with too many inputs> unicode2native ('a', 'ISO-8859-1', 'test')
%!error <UTF8_STR must be a character vector> unicode2native (['ab'; 'cd'])
%!error <UTF8_STR must be a character vector> unicode2native ({1 2 3 4})
%!error <CODEPAGE must be a string> unicode2native ('ЄЅІЇЈЉЊ', 123)
%!error <CODEPAGE must be a string> unicode2native ('ЄЅІЇЈЉЊ', ['ISO-8859-1']')
%!testif HAVE_ICONV
%! fail ("unicode2native ('a', 'foo')",
%!       "converting from UTF-8 to codepage 'foo'");