net/mlx4: Fix firmware command timeout during interrupt test
[cascardo/linux.git] / fs / udf / unicode.c
1 /*
2  * unicode.c
3  *
4  * PURPOSE
5  *      Routines for converting between UTF-8 and OSTA Compressed Unicode.
6  *      Also handles filename mangling
7  *
8  * DESCRIPTION
9  *      OSTA Compressed Unicode is explained in the OSTA UDF specification.
10  *              http://www.osta.org/
11  *      UTF-8 is explained in the IETF RFC XXXX.
12  *              ftp://ftp.internic.net/rfc/rfcxxxx.txt
13  *
14  * COPYRIGHT
15  *      This file is distributed under the terms of the GNU General Public
16  *      License (GPL). Copies of the GPL can be obtained from:
17  *              ftp://prep.ai.mit.edu/pub/gnu/GPL
18  *      Each contributing author retains all rights to their own work.
19  */
20
21 #include "udfdecl.h"
22
23 #include <linux/kernel.h>
24 #include <linux/string.h>       /* for memset */
25 #include <linux/nls.h>
26 #include <linux/crc-itu-t.h>
27 #include <linux/slab.h>
28
29 #include "udf_sb.h"
30
31 static int udf_uni2char_utf8(wchar_t uni,
32                              unsigned char *out,
33                              int boundlen)
34 {
35         int u_len = 0;
36
37         if (boundlen <= 0)
38                 return -ENAMETOOLONG;
39
40         if (uni < 0x80) {
41                 out[u_len++] = (unsigned char)uni;
42         } else if (uni < 0x800) {
43                 if (boundlen < 2)
44                         return -ENAMETOOLONG;
45                 out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
46                 out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
47         } else {
48                 if (boundlen < 3)
49                         return -ENAMETOOLONG;
50                 out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
51                 out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
52                 out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
53         }
54         return u_len;
55 }
56
57 static int udf_char2uni_utf8(const unsigned char *in,
58                              int boundlen,
59                              wchar_t *uni)
60 {
61         unsigned int utf_char;
62         unsigned char c;
63         int utf_cnt, u_len;
64
65         utf_char = 0;
66         utf_cnt = 0;
67         for (u_len = 0; u_len < boundlen;) {
68                 c = in[u_len++];
69
70                 /* Complete a multi-byte UTF-8 character */
71                 if (utf_cnt) {
72                         utf_char = (utf_char << 6) | (c & 0x3f);
73                         if (--utf_cnt)
74                                 continue;
75                 } else {
76                         /* Check for a multi-byte UTF-8 character */
77                         if (c & 0x80) {
78                                 /* Start a multi-byte UTF-8 character */
79                                 if ((c & 0xe0) == 0xc0) {
80                                         utf_char = c & 0x1f;
81                                         utf_cnt = 1;
82                                 } else if ((c & 0xf0) == 0xe0) {
83                                         utf_char = c & 0x0f;
84                                         utf_cnt = 2;
85                                 } else if ((c & 0xf8) == 0xf0) {
86                                         utf_char = c & 0x07;
87                                         utf_cnt = 3;
88                                 } else if ((c & 0xfc) == 0xf8) {
89                                         utf_char = c & 0x03;
90                                         utf_cnt = 4;
91                                 } else if ((c & 0xfe) == 0xfc) {
92                                         utf_char = c & 0x01;
93                                         utf_cnt = 5;
94                                 } else {
95                                         utf_cnt = -1;
96                                         break;
97                                 }
98                                 continue;
99                         } else {
100                                 /* Single byte UTF-8 character (most common) */
101                                 utf_char = c;
102                         }
103                 }
104                 *uni = utf_char;
105                 break;
106         }
107         if (utf_cnt) {
108                 *uni = '?';
109                 return -EINVAL;
110         }
111         return u_len;
112 }
113
114 #define ILLEGAL_CHAR_MARK       '_'
115 #define EXT_MARK                '.'
116 #define CRC_MARK                '#'
117 #define EXT_SIZE                5
118 /* Number of chars we need to store generated CRC to make filename unique */
119 #define CRC_LEN                 5
120
121 static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len,
122                               int *str_o_idx,
123                               const uint8_t *str_i, int str_i_max_len,
124                               int *str_i_idx,
125                               int u_ch, int *needsCRC,
126                               int (*conv_f)(wchar_t, unsigned char *, int),
127                               int translate)
128 {
129         uint32_t c;
130         int illChar = 0;
131         int len, gotch = 0;
132
133         for (; (!gotch) && (*str_i_idx < str_i_max_len); *str_i_idx += u_ch) {
134                 if (*str_o_idx >= str_o_max_len) {
135                         *needsCRC = 1;
136                         return gotch;
137                 }
138
139                 /* Expand OSTA compressed Unicode to Unicode */
140                 c = str_i[*str_i_idx];
141                 if (u_ch > 1)
142                         c = (c << 8) | str_i[*str_i_idx + 1];
143
144                 if (translate && (c == '/' || c == 0))
145                         illChar = 1;
146                 else if (illChar)
147                         break;
148                 else
149                         gotch = 1;
150         }
151         if (illChar) {
152                 *needsCRC = 1;
153                 c = ILLEGAL_CHAR_MARK;
154                 gotch = 1;
155         }
156         if (gotch) {
157                 len = conv_f(c, &str_o[*str_o_idx], str_o_max_len - *str_o_idx);
158                 /* Valid character? */
159                 if (len >= 0)
160                         *str_o_idx += len;
161                 else if (len == -ENAMETOOLONG) {
162                         *needsCRC = 1;
163                         gotch = 0;
164                 } else {
165                         str_o[(*str_o_idx)++] = '?';
166                         *needsCRC = 1;
167                 }
168         }
169         return gotch;
170 }
171
172 static int udf_name_from_CS0(uint8_t *str_o, int str_max_len,
173                              const uint8_t *ocu, int ocu_len,
174                              int (*conv_f)(wchar_t, unsigned char *, int),
175                              int translate)
176 {
177         uint32_t c;
178         uint8_t cmp_id;
179         int idx, len;
180         int u_ch;
181         int needsCRC = 0;
182         int ext_i_len, ext_max_len;
183         int str_o_len = 0;      /* Length of resulting output */
184         int ext_o_len = 0;      /* Extension output length */
185         int ext_crc_len = 0;    /* Extension output length if used with CRC */
186         int i_ext = -1;         /* Extension position in input buffer */
187         int o_crc = 0;          /* Rightmost possible output pos for CRC+ext */
188         unsigned short valueCRC;
189         uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1];
190         uint8_t crc[CRC_LEN];
191
192         if (str_max_len <= 0)
193                 return 0;
194
195         if (ocu_len == 0) {
196                 memset(str_o, 0, str_max_len);
197                 return 0;
198         }
199
200         cmp_id = ocu[0];
201         if (cmp_id != 8 && cmp_id != 16) {
202                 memset(str_o, 0, str_max_len);
203                 pr_err("unknown compression code (%d)\n", cmp_id);
204                 return -EINVAL;
205         }
206         u_ch = cmp_id >> 3;
207
208         ocu++;
209         ocu_len--;
210
211         if (ocu_len % u_ch) {
212                 pr_err("incorrect filename length (%d)\n", ocu_len + 1);
213                 return -EINVAL;
214         }
215
216         if (translate) {
217                 /* Look for extension */
218                 for (idx = ocu_len - u_ch, ext_i_len = 0;
219                      (idx >= 0) && (ext_i_len < EXT_SIZE);
220                      idx -= u_ch, ext_i_len++) {
221                         c = ocu[idx];
222                         if (u_ch > 1)
223                                 c = (c << 8) | ocu[idx + 1];
224
225                         if (c == EXT_MARK) {
226                                 if (ext_i_len)
227                                         i_ext = idx;
228                                 break;
229                         }
230                 }
231                 if (i_ext >= 0) {
232                         /* Convert extension */
233                         ext_max_len = min_t(int, sizeof(ext), str_max_len);
234                         ext[ext_o_len++] = EXT_MARK;
235                         idx = i_ext + u_ch;
236                         while (udf_name_conv_char(ext, ext_max_len, &ext_o_len,
237                                                   ocu, ocu_len, &idx,
238                                                   u_ch, &needsCRC,
239                                                   conv_f, translate)) {
240                                 if ((ext_o_len + CRC_LEN) < str_max_len)
241                                         ext_crc_len = ext_o_len;
242                         }
243                 }
244         }
245
246         idx = 0;
247         while (1) {
248                 if (translate && (idx == i_ext)) {
249                         if (str_o_len > (str_max_len - ext_o_len))
250                                 needsCRC = 1;
251                         break;
252                 }
253
254                 if (!udf_name_conv_char(str_o, str_max_len, &str_o_len,
255                                         ocu, ocu_len, &idx,
256                                         u_ch, &needsCRC, conv_f, translate))
257                         break;
258
259                 if (translate &&
260                     (str_o_len <= (str_max_len - ext_o_len - CRC_LEN)))
261                         o_crc = str_o_len;
262         }
263
264         if (translate) {
265                 if (str_o_len <= 2 && str_o[0] == '.' &&
266                     (str_o_len == 1 || str_o[1] == '.'))
267                         needsCRC = 1;
268                 if (needsCRC) {
269                         str_o_len = o_crc;
270                         valueCRC = crc_itu_t(0, ocu, ocu_len);
271                         crc[0] = CRC_MARK;
272                         crc[1] = hex_asc_upper_hi(valueCRC >> 8);
273                         crc[2] = hex_asc_upper_lo(valueCRC >> 8);
274                         crc[3] = hex_asc_upper_hi(valueCRC);
275                         crc[4] = hex_asc_upper_lo(valueCRC);
276                         len = min_t(int, CRC_LEN, str_max_len - str_o_len);
277                         memcpy(&str_o[str_o_len], crc, len);
278                         str_o_len += len;
279                         ext_o_len = ext_crc_len;
280                 }
281                 if (ext_o_len > 0) {
282                         memcpy(&str_o[str_o_len], ext, ext_o_len);
283                         str_o_len += ext_o_len;
284                 }
285         }
286
287         return str_o_len;
288 }
289
290 static int udf_name_to_CS0(uint8_t *ocu, int ocu_max_len,
291                            const uint8_t *str_i, int str_len,
292                            int (*conv_f)(const unsigned char *, int, wchar_t *))
293 {
294         int i, len;
295         unsigned int max_val;
296         wchar_t uni_char;
297         int u_len, u_ch;
298
299         if (ocu_max_len <= 0)
300                 return 0;
301
302         memset(ocu, 0, ocu_max_len);
303         ocu[0] = 8;
304         max_val = 0xff;
305         u_ch = 1;
306
307 try_again:
308         u_len = 1;
309         for (i = 0; i < str_len; i++) {
310                 /* Name didn't fit? */
311                 if (u_len + u_ch > ocu_max_len)
312                         return 0;
313                 len = conv_f(&str_i[i], str_len - i, &uni_char);
314                 if (!len)
315                         continue;
316                 /* Invalid character, deal with it */
317                 if (len < 0) {
318                         len = 1;
319                         uni_char = '?';
320                 }
321
322                 if (uni_char > max_val) {
323                         max_val = 0xffff;
324                         ocu[0] = 0x10;
325                         u_ch = 2;
326                         goto try_again;
327                 }
328
329                 if (max_val == 0xffff)
330                         ocu[u_len++] = (uint8_t)(uni_char >> 8);
331                 ocu[u_len++] = (uint8_t)(uni_char & 0xff);
332                 i += len - 1;
333         }
334
335         return u_len;
336 }
337
338 int udf_dstrCS0toUTF8(uint8_t *utf_o, int o_len,
339                       const uint8_t *ocu_i, int i_len)
340 {
341         int s_len = 0;
342
343         if (i_len > 0) {
344                 s_len = ocu_i[i_len - 1];
345                 if (s_len >= i_len) {
346                         pr_err("incorrect dstring lengths (%d/%d)\n",
347                                s_len, i_len);
348                         return -EINVAL;
349                 }
350         }
351
352         return udf_name_from_CS0(utf_o, o_len, ocu_i, s_len,
353                                  udf_uni2char_utf8, 0);
354 }
355
356 int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen,
357                      uint8_t *dname, int dlen)
358 {
359         int (*conv_f)(wchar_t, unsigned char *, int);
360         int ret;
361
362         if (!slen)
363                 return -EIO;
364
365         if (dlen <= 0)
366                 return 0;
367
368         if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
369                 conv_f = udf_uni2char_utf8;
370         } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
371                 conv_f = UDF_SB(sb)->s_nls_map->uni2char;
372         } else
373                 BUG();
374
375         ret = udf_name_from_CS0(dname, dlen, sname, slen, conv_f, 1);
376         /* Zero length filename isn't valid... */
377         if (ret == 0)
378                 ret = -EINVAL;
379         return ret;
380 }
381
382 int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
383                      uint8_t *dname, int dlen)
384 {
385         int (*conv_f)(const unsigned char *, int, wchar_t *);
386
387         if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
388                 conv_f = udf_char2uni_utf8;
389         } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
390                 conv_f = UDF_SB(sb)->s_nls_map->char2uni;
391         } else
392                 BUG();
393
394         return udf_name_to_CS0(dname, dlen, sname, slen, conv_f);
395 }
396