view scripts/strings/unicode2native.m @ 23221:debe0c7dcefc

maint: Update copyright dates.
author John W. Eaton <jwe@octave.org>
date Wed, 22 Feb 2017 13:00:04 -0500
parents c6ca5fe1505c
children c9852320f004
line wrap: on
line source

## Copyright (C) 2016-2017 Markus Mützel
##
## This file is part of Octave.
##
## Octave is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or
## (at your option) any later version.
##
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <http://www.gnu.org/licenses/>.

## -*- texinfo -*-
## @deftypefn  {} {@var{native_bytes} =} unicode2native (@var{utf8_str}, @var{codepage})
## @deftypefnx {} {@var{native_bytes} =} unicode2native (@var{utf8_str})
## Convert UTF-8 string @var{utf8_str} to byte stream using @var{codepage}.
##
## The character vector @var{utf8_str} is converted to a byte stream
## @var{native_bytes} using the codepage given by @var{codepage}.
## The string @var{codepage} must be an identifier of a valid codepage.
## Examples for valid codepages are "ISO 8859-1", "Latin-1" or "Shift-JIS".
## If @var{codepage} is omitted or empty, the system default codepage is used.
##
## If any of the characters cannot be mapped into the codepage @var{codepage},
## they are replaced with the appropriate substitution sequence for that
## codepage.
##
## @seealso{native2unicode}
## @end deftypefn

function native_bytes = unicode2native (utf8_str, codepage = "")

  if (nargin < 1 || nargin > 2)
    print_usage ();
  endif

  if (! ischar (utf8_str) || ! isvector (utf8_str))
    error ("unicode2native: UTF8_STR must be a character vector");
  endif

  if (! ischar (codepage))
    error ("unicode2native: CODEPAGE must be a string");
  endif

  native_bytes = __unicode2native__ (utf8_str, codepage);

  if (iscolumn (utf8_str))
    native_bytes = native_bytes';
  endif

endfunction

%!assert (unicode2native ("ЄЅІЇЈЉЊ", "ISO 8859-5"), uint8 (164:170));
%!assert (unicode2native (["ЄЅІ" 0 "ЇЈЉЊ"], "ISO 8859-5"), uint8 ([164:166 0 167:170]));

%!error <UTF8_STR must be a character vector> unicode2native (['ab'; 'cd'])
%!error <UTF8_STR must be a character vector> unicode2native ({1 2 3 4})
%!error <CODEPAGE must be a string> unicode2native ('ЄЅІЇЈЉЊ', 123)
%!error <converting from UTF-8 to codepage 'foo'> unicode2native ('a', 'foo')
%!error <Invalid call> unicode2native ()
%!error <Invalid call> unicode2native ('a', 'Latin-1', 'test')