reference/intl/uconverter/transcode.xml
1976eae0d815797af97a1e16c5cd90ffc2868395
...
...
@@ -1,27 +1,23 @@
1
1
<?xml version="1.0" encoding="utf-8"?>
2
2
<!-- $Revision$ -->
3
-

4
3
<refentry xml:id="uconverter.transcode" xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink">
5
4
<refnamediv>
6
5
<refname>UConverter::transcode</refname>
7
-
<refpurpose>Convert string from one charset to another</refpurpose>
6
+
<refpurpose>Convert a string from one character encoding to another</refpurpose>
8
7
</refnamediv>
9
8

10
9
<refsect1 role="description">
11
10
&reftitle.description;
12
-
<methodsynopsis>
13
-
<modifier>public</modifier> <modifier>static</modifier> <type>string</type><methodname>UConverter::transcode</methodname>
11
+
<methodsynopsis role="UConverter">
12
+
<modifier>public</modifier> <modifier>static</modifier> <type class="union"><type>string</type><type>false</type></type><methodname>UConverter::transcode</methodname>
14
13
<methodparam><type>string</type><parameter>str</parameter></methodparam>
15
14
<methodparam><type>string</type><parameter>toEncoding</parameter></methodparam>
16
15
<methodparam><type>string</type><parameter>fromEncoding</parameter></methodparam>
17
-
<methodparam choice="opt"><type>array</type><parameter>options</parameter></methodparam>
16
+
<methodparam choice="opt"><type class="union"><type>array</type><type>null</type></type><parameter>options</parameter><initializer>&null;</initializer></methodparam>
18
17
</methodsynopsis>
19
18
<para>
20
-

19
+
Converts <parameter>str</parameter> from <parameter>fromEncoding</parameter> to <parameter>toEncoding</parameter>.
21
20
</para>
22
-

23
-
&warn.undocumented.func;
24
-

25
21
</refsect1>
26
22

27
23
<refsect1 role="parameters">
...
...
@@ -31,7 +27,7 @@
31
27
<term><parameter>str</parameter></term>
32
28
<listitem>
33
29
<para>
34
-
30
+
The &string; to be converted.
35
31
</para>
36
32
</listitem>
37
33
</varlistentry>
...
...
@@ -39,7 +35,7 @@
39
35
<term><parameter>toEncoding</parameter></term>
40
36
<listitem>
41
37
<para>
42
-
38
+
The desired encoding of the result.
43
39
</para>
44
40
</listitem>
45
41
</varlistentry>
...
...
@@ -47,7 +43,7 @@
47
43
<term><parameter>fromEncoding</parameter></term>
48
44
<listitem>
49
45
<para>
50
-
46
+
The current encoding used to interpret <parameter>str</parameter>.
51
47
</para>
52
48
</listitem>
53
49
</varlistentry>
...
...
@@ -55,7 +51,15 @@
55
51
<term><parameter>options</parameter></term>
56
52
<listitem>
57
53
<para>
58
-
54
+
An optional &array;, which may contain the following keys:
55
+
<simplelist>
56
+
<member>
57
+
<literal>'to_subst'</literal> - the substitution character to use
58
+
in place of any character of <parameter>str</parameter> which cannot
59
+
be encoded in <parameter>toEncoding</parameter>. If specified, it must
60
+
represent a single character in the target encoding.
61
+
</member>
62
+
</simplelist>
59
63
</para>
60
64
</listitem>
61
65
</varlistentry>
...
...
@@ -65,13 +69,112 @@
65
69
<refsect1 role="returnvalues">
66
70
&reftitle.returnvalues;
67
71
<para>
68
-
72
+
Returns the converted string&return.falseforfailure;.
69
73
</para>
70
74
</refsect1>
71
75

76
+
<refsect1 role="examples">
77
+
&reftitle.examples;
78
+
<example>
79
+
<title>Converting from UTF-8 to UTF-16 and back</title>
80
+
<programlisting role="php">
81
+
<![CDATA[
82
+
<?php
83
+
$utf8_string = "\x5A\x6F\xC3\xAB"; // 'Zoë' in UTF-8
84
+
$utf16_string = UConverter::transcode($utf8_string, 'UTF-16BE', 'UTF-8');
85
+
echo bin2hex($utf16_string), "\n";
72
86

73
-
</refentry>
87
+
$new_utf8_string = UConverter::transcode($utf16_string, 'UTF-8', 'UTF-16BE');
88
+
echo bin2hex($new_utf8_string), "\n";
89
+
?>
90
+
]]>
91
+
</programlisting>
92
+
&example.outputs;
93
+
<screen>
94
+
<![CDATA[
95
+
005a006f00eb
96
+
5a6fc3ab
97
+
]]>
98
+
</screen>
99
+
</example>
100
+
<example>
101
+
<title>Invalid characters in input</title>
102
+
<para>
103
+
If the input string contains a sequence of bytes which is not valid in
104
+
the encoding specified by <parameter>fromEncoding</parameter>, they are
105
+
replaced by Unicode code point U+FFFD (Replacement Character) before
106
+
converting to <parameter>toEncoding</parameter>.
107
+
</para>
108
+
<programlisting role="php">
109
+
<![CDATA[
110
+
<?php
111
+
$invalid_utf8_string = "\xC3"; // incomplete multi-byte UTF-8 sequence
112
+
$utf16_string = UConverter::transcode($invalid_utf8_string, 'UTF-16BE', 'UTF-8');
113
+
echo bin2hex($utf16_string), "\n";
114
+
?>
115
+
]]>
116
+
</programlisting>
117
+
&example.outputs;
118
+
<screen>
119
+
<![CDATA[
120
+
fffd
121
+
]]>
122
+
</screen>
123
+
</example>
124
+
<example>
125
+
<title>Characters which cannot be encoded</title>
126
+
<para>
127
+
If the input string contains characters which cannot be represented
128
+
in <parameter>toEncoding</parameter>, they are replaced with a single
129
+
character. The default character to use depends on the encoding, and
130
+
can be controlled using the <literal>'to_subst'</literal> option.
131
+
</para>
132
+
<programlisting role="php">
133
+
<![CDATA[
134
+
<?php
135
+
$utf8_string = "\xE2\x82\xAC"; // € (Euro Sign) does not exist in ISO 8859-1
74
136

137
+
// Default replacement in ISO 8859-1 is "\x1A" (Substitute)
138
+
$iso8859_1_string = UConverter::transcode($utf8_string, 'ISO-8859-1', 'UTF-8');
139
+
echo bin2hex($iso8859_1_string), "\n";
140
+

141
+
// Specify a replacement of '?' ("\x3F") instead
142
+
$iso8859_1_string = UConverter::transcode(
143
+
$utf8_string, 'ISO-8859-1', 'UTF-8', ['to_subst' => '?']
144
+
);
145
+
echo bin2hex($iso8859_1_string), "\n";
146
+

147
+
// Since ISO 8859-1 cannot map U+FFFD, invalid input is also replaced by to_subst
148
+
$invalid_utf8_string = "\xC3"; // incomplete multi-byte UTF-8 sequence
149
+
$iso8859_1_string = UConverter::transcode(
150
+
$invalid_utf8_string, 'ISO-8859-1', 'UTF-8', ['to_subst' => '?']
151
+
);
152
+
echo bin2hex($iso8859_1_string), "\n";
153
+
?>
154
+
]]>
155
+
</programlisting>
156
+
&example.outputs;
157
+
<screen>
158
+
<![CDATA[
159
+
1a
160
+
3f
161
+
3f
162
+
]]>
163
+
</screen>
164
+
</example>
165
+
</refsect1>
166
+

167
+
<refsect1 role="seealso">
168
+
&reftitle.seealso;
169
+
<para>
170
+
<simplelist>
171
+
<member><function>mb_convert_encoding</function></member>
172
+
<member><function>iconv</function></member>
173
+
</simplelist>
174
+
</para>
175
+
</refsect1>
176
+

177
+
</refentry>
75
178
<!-- Keep this comment at the end of the file
76
179
Local variables:
77
180
mode: sgml
78
181