mimehead.c (6323B)
1 /* $Id$ */ 2 /* 3 * MIME header support by Akinori ITO 4 */ 5 6 #include <sys/types.h> 7 #include "fm.h" 8 #include "myctype.h" 9 #include "Str.h" 10 11 #define MIME_ENCODED_LINE_LIMIT 80 12 #define MIME_ENCODED_WORD_LENGTH_OFFSET 18 13 #define MIME_ENCODED_WORD_LENGTH_ESTIMATION(x) \ 14 (((x)+2)*4/3+MIME_ENCODED_WORD_LENGTH_OFFSET) 15 #define MIME_DECODED_WORD_LENGTH_ESTIMATION(x) \ 16 (((x)-MIME_ENCODED_WORD_LENGTH_OFFSET)/4*3) 17 #define J_CHARSET "ISO-2022-JP" 18 19 #define BAD_BASE64 255 20 21 static 22 unsigned char 23 c2e(char x) 24 { 25 if ('A' <= x && x <= 'Z') 26 return (x) - 'A'; 27 if ('a' <= x && x <= 'z') 28 return (x) - 'a' + 26; 29 if ('0' <= x && x <= '9') 30 return (x) - '0' + 52; 31 if (x == '+') 32 return 62; 33 if (x == '/') 34 return 63; 35 return BAD_BASE64; 36 } 37 38 static 39 int 40 ha2d(char x, char y) 41 { 42 int r = 0; 43 44 if ('0' <= x && x <= '9') 45 r = x - '0'; 46 else if ('A' <= x && x <= 'F') 47 r = x - 'A' + 10; 48 else if ('a' <= x && x <= 'f') 49 r = x - 'a' + 10; 50 51 r <<= 4; 52 53 if ('0' <= y && y <= '9') 54 r += y - '0'; 55 else if ('A' <= y && y <= 'F') 56 r += y - 'A' + 10; 57 else if ('a' <= y && y <= 'f') 58 r += y - 'a' + 10; 59 60 return r; 61 62 } 63 64 Str 65 decodeB(char **ww) 66 { 67 unsigned char c[4]; 68 char *wp = *ww; 69 char d[3]; 70 int i, n_pad; 71 Str ap = Strnew_size(strlen(wp)); 72 73 n_pad = 0; 74 while (1) { 75 for (i = 0; i < 4; i++) { 76 c[i] = *(wp++); 77 if (*wp == '\0' || *wp == '?') { 78 i++; 79 for (; i < 4; i++) { 80 c[i] = '='; 81 } 82 break; 83 } 84 } 85 if (c[3] == '=') { 86 n_pad++; 87 c[3] = 'A'; 88 if (c[2] == '=') { 89 n_pad++; 90 c[2] = 'A'; 91 } 92 } 93 for (i = 0; i < 4; i++) { 94 c[i] = c2e(c[i]); 95 if (c[i] == BAD_BASE64) { 96 *ww = wp; 97 return ap; 98 } 99 } 100 d[0] = ((c[0] << 2) | (c[1] >> 4)); 101 d[1] = ((c[1] << 4) | (c[2] >> 2)); 102 d[2] = ((c[2] << 6) | c[3]); 103 for (i = 0; i < 3 - n_pad; i++) { 104 Strcat_char(ap, d[i]); 105 } 106 if (n_pad || *wp == '\0' || *wp == '?') 107 break; 108 } 109 *ww = wp; 110 return ap; 111 } 112 113 Str 114 decodeU(char **ww) 115 { 116 unsigned char c1, c2; 117 char *w = *ww; 118 int n, i; 119 Str a; 120 121 if (*w <= 0x20 || *w >= 0x60) 122 return Strnew_size(0); 123 n = *w - 0x20; 124 a = Strnew_size(n); 125 for (w++, i = 2; *w != '\0' && n; n--) { 126 c1 = (w[0] - 0x20) % 0x40; 127 c2 = (w[1] - 0x20) % 0x40; 128 Strcat_char(a, (c1 << i) | (c2 >> (6 - i))); 129 if (i == 6) { 130 w += 2; 131 i = 2; 132 } 133 else { 134 w++; 135 i += 2; 136 } 137 } 138 return a; 139 } 140 141 /* RFC2047 (4.2. The "Q" encoding) */ 142 Str 143 decodeQ(char **ww) 144 { 145 char *w = *ww; 146 Str a = Strnew_size(strlen(w)); 147 148 for (; *w != '\0' && *w != '?'; w++) { 149 if (*w == '=') { 150 w++; 151 Strcat_char(a, ha2d(*w, *(w + 1))); 152 w++; 153 } 154 else if (*w == '_') { 155 Strcat_char(a, ' '); 156 } 157 else 158 Strcat_char(a, *w); 159 } 160 *ww = w; 161 return a; 162 } 163 164 /* RFC2045 (6.7. Quoted-Printable Content-Transfer-Encoding) */ 165 Str 166 decodeQP(char **ww) 167 { 168 char *w = *ww; 169 Str a = Strnew_size(strlen(w)); 170 171 for (; *w != '\0'; w++) { 172 if (*w == '=') { 173 w++; 174 if (*w == '\n' || *w == '\r' || *w == ' ' || *w == '\t') { 175 while (*w != '\n' && *w != '\0') 176 w++; 177 if (*w == '\0') 178 break; 179 } 180 else { 181 if (*w == '\0' || *(w + 1) == '\0') 182 break; 183 Strcat_char(a, ha2d(*w, *(w + 1))); 184 w++; 185 } 186 } 187 else 188 Strcat_char(a, *w); 189 } 190 *ww = w; 191 return a; 192 } 193 194 #ifdef USE_M17N 195 Str 196 decodeWord(char **ow, wc_ces * charset) 197 #else 198 Str 199 decodeWord0(char **ow) 200 #endif 201 { 202 #ifdef USE_M17N 203 wc_ces c; 204 #endif 205 char *p, *w = *ow; 206 char method; 207 Str a = Strnew(); 208 Str tmp = Strnew(); 209 210 if (*w != '=' || *(w + 1) != '?') 211 goto convert_fail; 212 w += 2; 213 for (; *w != '?'; w++) { 214 if (*w == '\0') 215 goto convert_fail; 216 Strcat_char(tmp, *w); 217 } 218 #ifdef USE_M17N 219 c = wc_guess_charset(tmp->ptr, 0); 220 if (!c) 221 goto convert_fail; 222 #else 223 if (strcasecmp(tmp->ptr, "ISO-8859-1") != 0 && strcasecmp(tmp->ptr, "US_ASCII") != 0) 224 /* NOT ISO-8859-1 encoding ... don't convert */ 225 goto convert_fail; 226 #endif 227 w++; 228 method = *(w++); 229 if (*w != '?') 230 goto convert_fail; 231 w++; 232 p = w; 233 switch (TOUPPER(method)) { 234 case 'B': 235 a = decodeB(&w); 236 break; 237 case 'Q': 238 a = decodeQ(&w); 239 break; 240 default: 241 goto convert_fail; 242 } 243 if (p == w) 244 goto convert_fail; 245 if (*w == '?') { 246 w++; 247 if (*w == '=') 248 w++; 249 } 250 *ow = w; 251 #ifdef USE_M17N 252 *charset = c; 253 #endif 254 return a; 255 256 convert_fail: 257 return Strnew(); 258 } 259 260 /* 261 * convert MIME encoded string to the original one 262 */ 263 #ifdef USE_M17N 264 Str 265 decodeMIME(Str orgstr, wc_ces * charset) 266 #else 267 Str 268 decodeMIME0(Str orgstr) 269 #endif 270 { 271 char *org = orgstr->ptr, *endp = org + orgstr->length; 272 char *org0, *p; 273 Str cnv = NULL; 274 275 #ifdef USE_M17N 276 *charset = 0; 277 #endif 278 while (org < endp) { 279 if (*org == '=' && *(org + 1) == '?') { 280 if (cnv == NULL) { 281 cnv = Strnew_size(orgstr->length); 282 Strcat_charp_n(cnv, orgstr->ptr, org - orgstr->ptr); 283 } 284 nextEncodeWord: 285 p = org; 286 Strcat(cnv, decodeWord(&org, charset)); 287 if (org == p) { /* Convert failure */ 288 Strcat_charp(cnv, org); 289 return cnv; 290 } 291 org0 = org; 292 SPCRLoop: 293 switch (*org0) { 294 case ' ': 295 case '\t': 296 case '\n': 297 case '\r': 298 org0++; 299 goto SPCRLoop; 300 case '=': 301 if (org0[1] == '?') { 302 org = org0; 303 goto nextEncodeWord; 304 } 305 default: 306 break; 307 } 308 } 309 else { 310 if (cnv != NULL) 311 Strcat_char(cnv, *org); 312 org++; 313 } 314 } 315 if (cnv == NULL) 316 return orgstr; 317 return cnv; 318 } 319 320 /* encoding */ 321 322 static char Base64Table[] = 323 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; 324 325 Str 326 encodeB(char *a) 327 { 328 unsigned char d[3]; 329 unsigned char c1, c2, c3, c4; 330 int i, n_pad; 331 Str w = Strnew(); 332 333 while (1) { 334 if (*a == '\0') 335 break; 336 n_pad = 0; 337 d[1] = d[2] = 0; 338 for (i = 0; i < 3; i++) { 339 d[i] = a[i]; 340 if (a[i] == '\0') { 341 n_pad = 3 - i; 342 break; 343 } 344 } 345 c1 = d[0] >> 2; 346 c2 = (((d[0] << 4) | (d[1] >> 4)) & 0x3f); 347 if (n_pad == 2) { 348 c3 = c4 = 64; 349 } 350 else if (n_pad == 1) { 351 c3 = ((d[1] << 2) & 0x3f); 352 c4 = 64; 353 } 354 else { 355 c3 = (((d[1] << 2) | (d[2] >> 6)) & 0x3f); 356 c4 = (d[2] & 0x3f); 357 } 358 Strcat_char(w, Base64Table[c1]); 359 Strcat_char(w, Base64Table[c2]); 360 Strcat_char(w, Base64Table[c3]); 361 Strcat_char(w, Base64Table[c4]); 362 if (n_pad) 363 break; 364 a += 3; 365 } 366 return w; 367 }