reference/intl/uconverter/transcode.xml
1976eae0d815797af97a1e16c5cd90ffc2868395
...
...
@@ -3,12 +3,12 @@
3
3
<refentry xml:id="uconverter.transcode" xmlns="http://docbook.org/ns/docbook" xmlns:xlink="http://www.w3.org/1999/xlink">
4
4
<refnamediv>
5
5
<refname>UConverter::transcode</refname>
6
-
<refpurpose>Convert string from one charset to another</refpurpose>
6
+
<refpurpose>Convert a string from one character encoding to another</refpurpose>
7
7
</refnamediv>
8
8

9
9
<refsect1 role="description">
10
10
&reftitle.description;
11
-
<methodsynopsis>
11
+
<methodsynopsis role="UConverter">
12
12
<modifier>public</modifier> <modifier>static</modifier> <type class="union"><type>string</type><type>false</type></type><methodname>UConverter::transcode</methodname>
13
13
<methodparam><type>string</type><parameter>str</parameter></methodparam>
14
14
<methodparam><type>string</type><parameter>toEncoding</parameter></methodparam>
...
...
@@ -16,11 +16,8 @@
16
16
<methodparam choice="opt"><type class="union"><type>array</type><type>null</type></type><parameter>options</parameter><initializer>&null;</initializer></methodparam>
17
17
</methodsynopsis>
18
18
<para>
19
-

19
+
Converts <parameter>str</parameter> from <parameter>fromEncoding</parameter> to <parameter>toEncoding</parameter>.
20
20
</para>
21
-

22
-
&warn.undocumented.func;
23
-

24
21
</refsect1>
25
22

26
23
<refsect1 role="parameters">
...
...
@@ -30,7 +27,7 @@
30
27
<term><parameter>str</parameter></term>
31
28
<listitem>
32
29
<para>
33
-
30
+
The &string; to be converted.
34
31
</para>
35
32
</listitem>
36
33
</varlistentry>
...
...
@@ -38,7 +35,7 @@
38
35
<term><parameter>toEncoding</parameter></term>
39
36
<listitem>
40
37
<para>
41
-
38
+
The desired encoding of the result.
42
39
</para>
43
40
</listitem>
44
41
</varlistentry>
...
...
@@ -46,7 +43,7 @@
46
43
<term><parameter>fromEncoding</parameter></term>
47
44
<listitem>
48
45
<para>
49
-
46
+
The current encoding used to interpret <parameter>str</parameter>.
50
47
</para>
51
48
</listitem>
52
49
</varlistentry>
...
...
@@ -54,7 +51,15 @@
54
51
<term><parameter>options</parameter></term>
55
52
<listitem>
56
53
<para>
57
-
54
+
An optional &array;, which may contain the following keys:
55
+
<simplelist>
56
+
<member>
57
+
<literal>'to_subst'</literal> - the substitution character to use
58
+
in place of any character of <parameter>str</parameter> which cannot
59
+
be encoded in <parameter>toEncoding</parameter>. If specified, it must
60
+
represent a single character in the target encoding.
61
+
</member>
62
+
</simplelist>
58
63
</para>
59
64
</listitem>
60
65
</varlistentry>
...
...
@@ -64,10 +69,110 @@
64
69
<refsect1 role="returnvalues">
65
70
&reftitle.returnvalues;
66
71
<para>
67
-
72
+
Returns the converted string&return.falseforfailure;.
68
73
</para>
69
74
</refsect1>
70
75

76
+
<refsect1 role="examples">
77
+
&reftitle.examples;
78
+
<example>
79
+
<title>Converting from UTF-8 to UTF-16 and back</title>
80
+
<programlisting role="php">
81
+
<![CDATA[
82
+
<?php
83
+
$utf8_string = "\x5A\x6F\xC3\xAB"; // 'Zoë' in UTF-8
84
+
$utf16_string = UConverter::transcode($utf8_string, 'UTF-16BE', 'UTF-8');
85
+
echo bin2hex($utf16_string), "\n";
86
+

87
+
$new_utf8_string = UConverter::transcode($utf16_string, 'UTF-8', 'UTF-16BE');
88
+
echo bin2hex($new_utf8_string), "\n";
89
+
?>
90
+
]]>
91
+
</programlisting>
92
+
&example.outputs;
93
+
<screen>
94
+
<![CDATA[
95
+
005a006f00eb
96
+
5a6fc3ab
97
+
]]>
98
+
</screen>
99
+
</example>
100
+
<example>
101
+
<title>Invalid characters in input</title>
102
+
<para>
103
+
If the input string contains a sequence of bytes which is not valid in
104
+
the encoding specified by <parameter>fromEncoding</parameter>, they are
105
+
replaced by Unicode code point U+FFFD (Replacement Character) before
106
+
converting to <parameter>toEncoding</parameter>.
107
+
</para>
108
+
<programlisting role="php">
109
+
<![CDATA[
110
+
<?php
111
+
$invalid_utf8_string = "\xC3"; // incomplete multi-byte UTF-8 sequence
112
+
$utf16_string = UConverter::transcode($invalid_utf8_string, 'UTF-16BE', 'UTF-8');
113
+
echo bin2hex($utf16_string), "\n";
114
+
?>
115
+
]]>
116
+
</programlisting>
117
+
&example.outputs;
118
+
<screen>
119
+
<![CDATA[
120
+
fffd
121
+
]]>
122
+
</screen>
123
+
</example>
124
+
<example>
125
+
<title>Characters which cannot be encoded</title>
126
+
<para>
127
+
If the input string contains characters which cannot be represented
128
+
in <parameter>toEncoding</parameter>, they are replaced with a single
129
+
character. The default character to use depends on the encoding, and
130
+
can be controlled using the <literal>'to_subst'</literal> option.
131
+
</para>
132
+
<programlisting role="php">
133
+
<![CDATA[
134
+
<?php
135
+
$utf8_string = "\xE2\x82\xAC"; // € (Euro Sign) does not exist in ISO 8859-1
136
+

137
+
// Default replacement in ISO 8859-1 is "\x1A" (Substitute)
138
+
$iso8859_1_string = UConverter::transcode($utf8_string, 'ISO-8859-1', 'UTF-8');
139
+
echo bin2hex($iso8859_1_string), "\n";
140
+

141
+
// Specify a replacement of '?' ("\x3F") instead
142
+
$iso8859_1_string = UConverter::transcode(
143
+
$utf8_string, 'ISO-8859-1', 'UTF-8', ['to_subst' => '?']
144
+
);
145
+
echo bin2hex($iso8859_1_string), "\n";
146
+

147
+
// Since ISO 8859-1 cannot map U+FFFD, invalid input is also replaced by to_subst
148
+
$invalid_utf8_string = "\xC3"; // incomplete multi-byte UTF-8 sequence
149
+
$iso8859_1_string = UConverter::transcode(
150
+
$invalid_utf8_string, 'ISO-8859-1', 'UTF-8', ['to_subst' => '?']
151
+
);
152
+
echo bin2hex($iso8859_1_string), "\n";
153
+
?>
154
+
]]>
155
+
</programlisting>
156
+
&example.outputs;
157
+
<screen>
158
+
<![CDATA[
159
+
1a
160
+
3f
161
+
3f
162
+
]]>
163
+
</screen>
164
+
</example>
165
+
</refsect1>
166
+

167
+
<refsect1 role="seealso">
168
+
&reftitle.seealso;
169
+
<para>
170
+
<simplelist>
171
+
<member><function>mb_convert_encoding</function></member>
172
+
<member><function>iconv</function></member>
173
+
</simplelist>
174
+
</para>
175
+
</refsect1>
71
176

72
177
</refentry>
73
178
<!-- Keep this comment at the end of the file
74
179