url.c (53708B)
1 /* $Id$ */ 2 #include "fm.h" 3 #ifndef __MINGW32_VERSION 4 #include <sys/types.h> 5 #include <sys/socket.h> 6 #include <netinet/in.h> 7 #include <arpa/inet.h> 8 #include <netdb.h> 9 #else 10 #include <winsock.h> 11 #endif /* __MINGW32_VERSION */ 12 13 #include <signal.h> 14 #include <setjmp.h> 15 #include <errno.h> 16 17 #include <sys/stat.h> 18 #ifdef __EMX__ 19 #include <io.h> /* ?? */ 20 #endif /* __EMX__ */ 21 22 #include "html.h" 23 #include "Str.h" 24 #include "myctype.h" 25 #include "regex.h" 26 27 #ifdef USE_SSL 28 #ifndef SSLEAY_VERSION_NUMBER 29 #include <openssl/crypto.h> /* SSLEAY_VERSION_NUMBER may be here */ 30 #endif 31 #include <openssl/err.h> 32 #endif 33 34 #ifdef __WATT32__ 35 #define write(a,b,c) write_s(a,b,c) 36 #endif /* __WATT32__ */ 37 38 #ifdef __MINGW32_VERSION 39 #define write(a,b,c) send(a,b,c, 0) 40 #define close(fd) closesocket(fd) 41 #endif 42 43 #ifdef INET6 44 /* see rc.c, "dns_order" and dnsorders[] */ 45 int ai_family_order_table[7][3] = { 46 {PF_UNSPEC, PF_UNSPEC, PF_UNSPEC}, /* 0:unspec */ 47 {PF_INET, PF_INET6, PF_UNSPEC}, /* 1:inet inet6 */ 48 {PF_INET6, PF_INET, PF_UNSPEC}, /* 2:inet6 inet */ 49 {PF_UNSPEC, PF_UNSPEC, PF_UNSPEC}, /* 3: --- */ 50 {PF_INET, PF_UNSPEC, PF_UNSPEC}, /* 4:inet */ 51 {PF_UNSPEC, PF_UNSPEC, PF_UNSPEC}, /* 5: --- */ 52 {PF_INET6, PF_UNSPEC, PF_UNSPEC}, /* 6:inet6 */ 53 }; 54 #endif /* INET6 */ 55 56 static JMP_BUF AbortLoading; 57 58 /* XXX: note html.h SCM_ */ 59 static int 60 DefaultPort[] = { 61 80, /* http */ 62 70, /* gopher */ 63 21, /* ftp */ 64 21, /* ftpdir */ 65 0, /* local - not defined */ 66 0, /* local-CGI - not defined? */ 67 0, /* exec - not defined? */ 68 119, /* nntp */ 69 119, /* nntp group */ 70 119, /* news */ 71 119, /* news group */ 72 0, /* data - not defined */ 73 0, /* mailto - not defined */ 74 #ifdef USE_SSL 75 443, /* https */ 76 #endif /* USE_SSL */ 77 }; 78 79 struct cmdtable schemetable[] = { 80 {"http", SCM_HTTP}, 81 {"gopher", SCM_GOPHER}, 82 {"ftp", SCM_FTP}, 83 {"local", SCM_LOCAL}, 84 {"file", SCM_LOCAL}, 85 /* {"exec", SCM_EXEC}, */ 86 {"nntp", SCM_NNTP}, 87 /* {"nntp", SCM_NNTP_GROUP}, */ 88 {"news", SCM_NEWS}, 89 /* {"news", SCM_NEWS_GROUP}, */ 90 {"data", SCM_DATA}, 91 #ifndef USE_W3MMAILER 92 {"mailto", SCM_MAILTO}, 93 #endif 94 #ifdef USE_SSL 95 {"https", SCM_HTTPS}, 96 #endif /* USE_SSL */ 97 {NULL, SCM_UNKNOWN}, 98 }; 99 100 static struct table2 DefaultGuess[] = { 101 {"html", "text/html"}, 102 {"htm", "text/html"}, 103 {"shtml", "text/html"}, 104 {"xhtml", "application/xhtml+xml"}, 105 {"gif", "image/gif"}, 106 {"jpeg", "image/jpeg"}, 107 {"jpg", "image/jpeg"}, 108 {"png", "image/png"}, 109 {"xbm", "image/xbm"}, 110 {"au", "audio/basic"}, 111 {"gz", "application/x-gzip"}, 112 {"Z", "application/x-compress"}, 113 {"bz2", "application/x-bzip"}, 114 {"tar", "application/x-tar"}, 115 {"zip", "application/x-zip"}, 116 {"lha", "application/x-lha"}, 117 {"lzh", "application/x-lha"}, 118 {"ps", "application/postscript"}, 119 {"pdf", "application/pdf"}, 120 {NULL, NULL} 121 }; 122 123 static void add_index_file(ParsedURL *pu, URLFile *uf); 124 125 /* #define HTTP_DEFAULT_FILE "/index.html" */ 126 127 #ifndef HTTP_DEFAULT_FILE 128 #define HTTP_DEFAULT_FILE "/" 129 #endif /* not HTTP_DEFAULT_FILE */ 130 131 #ifdef SOCK_DEBUG 132 #include <stdarg.h> 133 134 static void 135 sock_log(char *message, ...) 136 { 137 FILE *f = fopen("zzzsocklog", "a"); 138 va_list va; 139 140 if (f == NULL) 141 return; 142 va_start(va, message); 143 vfprintf(f, message, va); 144 fclose(f); 145 } 146 147 #endif 148 149 static TextList *mimetypes_list; 150 static struct table2 **UserMimeTypes; 151 152 static struct table2 * 153 loadMimeTypes(char *filename) 154 { 155 FILE *f; 156 char *d, *type; 157 int i, n; 158 Str tmp; 159 struct table2 *mtypes; 160 161 f = fopen(expandPath(filename), "r"); 162 if (f == NULL) 163 return NULL; 164 n = 0; 165 while (tmp = Strfgets(f), tmp->length > 0) { 166 d = tmp->ptr; 167 if (d[0] != '#') { 168 d = strtok(d, " \t\n\r"); 169 if (d != NULL) { 170 d = strtok(NULL, " \t\n\r"); 171 for (i = 0; d != NULL; i++) 172 d = strtok(NULL, " \t\n\r"); 173 n += i; 174 } 175 } 176 } 177 fseek(f, 0, 0); 178 mtypes = New_N(struct table2, n + 1); 179 i = 0; 180 while (tmp = Strfgets(f), tmp->length > 0) { 181 d = tmp->ptr; 182 if (d[0] == '#') 183 continue; 184 type = strtok(d, " \t\n\r"); 185 if (type == NULL) 186 continue; 187 while (1) { 188 d = strtok(NULL, " \t\n\r"); 189 if (d == NULL) 190 break; 191 mtypes[i].item1 = Strnew_charp(d)->ptr; 192 mtypes[i].item2 = Strnew_charp(type)->ptr; 193 i++; 194 } 195 } 196 mtypes[i].item1 = NULL; 197 mtypes[i].item2 = NULL; 198 fclose(f); 199 return mtypes; 200 } 201 202 void 203 initMimeTypes() 204 { 205 int i; 206 TextListItem *tl; 207 208 if (non_null(mimetypes_files)) 209 mimetypes_list = make_domain_list(mimetypes_files); 210 else 211 mimetypes_list = NULL; 212 if (mimetypes_list == NULL) 213 return; 214 UserMimeTypes = New_N(struct table2 *, mimetypes_list->nitem); 215 for (i = 0, tl = mimetypes_list->first; tl; i++, tl = tl->next) 216 UserMimeTypes[i] = loadMimeTypes(tl->ptr); 217 } 218 219 static char * 220 DefaultFile(int scheme) 221 { 222 switch (scheme) { 223 case SCM_HTTP: 224 #ifdef USE_SSL 225 case SCM_HTTPS: 226 #endif /* USE_SSL */ 227 return allocStr(HTTP_DEFAULT_FILE, -1); 228 #ifdef USE_GOPHER 229 case SCM_GOPHER: 230 return allocStr("1", -1); 231 #endif /* USE_GOPHER */ 232 case SCM_LOCAL: 233 case SCM_LOCAL_CGI: 234 case SCM_FTP: 235 case SCM_FTPDIR: 236 return allocStr("/", -1); 237 } 238 return NULL; 239 } 240 241 static MySignalHandler 242 KeyAbort(SIGNAL_ARG) 243 { 244 LONGJMP(AbortLoading, 1); 245 SIGNAL_RETURN; 246 } 247 248 #ifdef USE_SSL 249 SSL_CTX *ssl_ctx = NULL; 250 251 void 252 free_ssl_ctx() 253 { 254 if (ssl_ctx != NULL) 255 SSL_CTX_free(ssl_ctx); 256 ssl_ctx = NULL; 257 ssl_accept_this_site(NULL); 258 } 259 260 #if SSLEAY_VERSION_NUMBER >= 0x00905100 261 #include <openssl/rand.h> 262 static void 263 init_PRNG() 264 { 265 char buffer[256]; 266 const char *file; 267 long l; 268 if (RAND_status()) 269 return; 270 if ((file = RAND_file_name(buffer, sizeof(buffer)))) { 271 #ifdef USE_EGD 272 if (RAND_egd(file) > 0) 273 return; 274 #endif 275 RAND_load_file(file, -1); 276 } 277 if (RAND_status()) 278 goto seeded; 279 srand48((long)time(NULL)); 280 while (!RAND_status()) { 281 l = lrand48(); 282 RAND_seed((unsigned char *)&l, sizeof(long)); 283 } 284 seeded: 285 if (file) 286 RAND_write_file(file); 287 } 288 #endif /* SSLEAY_VERSION_NUMBER >= 0x00905100 */ 289 290 static SSL * 291 openSSLHandle(int sock, char *hostname, char **p_cert) 292 { 293 SSL *handle = NULL; 294 static char *old_ssl_forbid_method = NULL; 295 #ifdef USE_SSL_VERIFY 296 static int old_ssl_verify_server = -1; 297 #endif 298 299 if (old_ssl_forbid_method != ssl_forbid_method 300 && (!old_ssl_forbid_method || !ssl_forbid_method || 301 strcmp(old_ssl_forbid_method, ssl_forbid_method))) { 302 old_ssl_forbid_method = ssl_forbid_method; 303 #ifdef USE_SSL_VERIFY 304 ssl_path_modified = 1; 305 #else 306 free_ssl_ctx(); 307 #endif 308 } 309 #ifdef USE_SSL_VERIFY 310 if (old_ssl_verify_server != ssl_verify_server) { 311 old_ssl_verify_server = ssl_verify_server; 312 ssl_path_modified = 1; 313 } 314 if (ssl_path_modified) { 315 free_ssl_ctx(); 316 ssl_path_modified = 0; 317 } 318 #endif /* defined(USE_SSL_VERIFY) */ 319 if (ssl_ctx == NULL) { 320 int option; 321 #if SSLEAY_VERSION_NUMBER < 0x0800 322 ssl_ctx = SSL_CTX_new(); 323 X509_set_default_verify_paths(ssl_ctx->cert); 324 #else /* SSLEAY_VERSION_NUMBER >= 0x0800 */ 325 SSLeay_add_ssl_algorithms(); 326 SSL_load_error_strings(); 327 if (!(ssl_ctx = SSL_CTX_new(SSLv23_client_method()))) 328 goto eend; 329 option = SSL_OP_ALL; 330 if (ssl_forbid_method) { 331 if (strchr(ssl_forbid_method, '2')) 332 option |= SSL_OP_NO_SSLv2; 333 if (strchr(ssl_forbid_method, '3')) 334 option |= SSL_OP_NO_SSLv3; 335 if (strchr(ssl_forbid_method, 't')) 336 option |= SSL_OP_NO_TLSv1; 337 if (strchr(ssl_forbid_method, 'T')) 338 option |= SSL_OP_NO_TLSv1; 339 } 340 SSL_CTX_set_options(ssl_ctx, option); 341 #ifdef USE_SSL_VERIFY 342 /* derived from openssl-0.9.5/apps/s_{client,cb}.c */ 343 #if 1 /* use SSL_get_verify_result() to verify cert */ 344 SSL_CTX_set_verify(ssl_ctx, SSL_VERIFY_NONE, NULL); 345 #else 346 SSL_CTX_set_verify(ssl_ctx, 347 ssl_verify_server ? SSL_VERIFY_PEER : 348 SSL_VERIFY_NONE, NULL); 349 #endif 350 if (ssl_cert_file != NULL && *ssl_cert_file != '\0') { 351 int ng = 1; 352 if (SSL_CTX_use_certificate_file 353 (ssl_ctx, ssl_cert_file, SSL_FILETYPE_PEM) > 0) { 354 char *key_file = (ssl_key_file == NULL 355 || *ssl_key_file == 356 '\0') ? ssl_cert_file : ssl_key_file; 357 if (SSL_CTX_use_PrivateKey_file 358 (ssl_ctx, key_file, SSL_FILETYPE_PEM) > 0) 359 if (SSL_CTX_check_private_key(ssl_ctx)) 360 ng = 0; 361 } 362 if (ng) { 363 free_ssl_ctx(); 364 goto eend; 365 } 366 } 367 if ((!ssl_ca_file && !ssl_ca_path) 368 || SSL_CTX_load_verify_locations(ssl_ctx, ssl_ca_file, ssl_ca_path)) 369 #endif /* defined(USE_SSL_VERIFY) */ 370 SSL_CTX_set_default_verify_paths(ssl_ctx); 371 #endif /* SSLEAY_VERSION_NUMBER >= 0x0800 */ 372 } 373 handle = SSL_new(ssl_ctx); 374 SSL_set_fd(handle, sock); 375 #if SSLEAY_VERSION_NUMBER >= 0x00905100 376 init_PRNG(); 377 #endif /* SSLEAY_VERSION_NUMBER >= 0x00905100 */ 378 #if (SSLEAY_VERSION_NUMBER >= 0x00908070) && !defined(OPENSSL_NO_TLSEXT) 379 SSL_set_tlsext_host_name(handle,hostname); 380 #endif /* (SSLEAY_VERSION_NUMBER >= 0x00908070) && !defined(OPENSSL_NO_TLSEXT) */ 381 if (SSL_connect(handle) > 0) { 382 Str serv_cert = ssl_get_certificate(handle, hostname); 383 if (serv_cert) { 384 *p_cert = serv_cert->ptr; 385 return handle; 386 } 387 close(sock); 388 SSL_free(handle); 389 return NULL; 390 } 391 eend: 392 close(sock); 393 if (handle) 394 SSL_free(handle); 395 /* FIXME: gettextize? */ 396 disp_err_message(Sprintf 397 ("SSL error: %s", 398 ERR_error_string(ERR_get_error(), NULL))->ptr, FALSE); 399 return NULL; 400 } 401 402 static void 403 SSL_write_from_file(SSL * ssl, char *file) 404 { 405 FILE *fd; 406 int c; 407 char buf[1]; 408 fd = fopen(file, "r"); 409 if (fd != NULL) { 410 while ((c = fgetc(fd)) != EOF) { 411 buf[0] = c; 412 SSL_write(ssl, buf, 1); 413 } 414 fclose(fd); 415 } 416 } 417 418 #endif /* USE_SSL */ 419 420 static void 421 write_from_file(int sock, char *file) 422 { 423 FILE *fd; 424 int c; 425 char buf[1]; 426 fd = fopen(file, "r"); 427 if (fd != NULL) { 428 while ((c = fgetc(fd)) != EOF) { 429 buf[0] = c; 430 write(sock, buf, 1); 431 } 432 fclose(fd); 433 } 434 } 435 436 ParsedURL * 437 baseURL(Buffer *buf) 438 { 439 if (buf->bufferprop & BP_NO_URL) { 440 /* no URL is defined for the buffer */ 441 return NULL; 442 } 443 if (buf->baseURL != NULL) { 444 /* <BASE> tag is defined in the document */ 445 return buf->baseURL; 446 } 447 else 448 return &buf->currentURL; 449 } 450 451 int 452 openSocket(char *const hostname, 453 char *remoteport_name, unsigned short remoteport_num) 454 { 455 volatile int sock = -1; 456 #ifdef INET6 457 int *af; 458 struct addrinfo hints, *res0, *res; 459 int error; 460 char *hname; 461 #else /* not INET6 */ 462 struct sockaddr_in hostaddr; 463 struct hostent *entry; 464 struct protoent *proto; 465 unsigned short s_port; 466 int a1, a2, a3, a4; 467 unsigned long adr; 468 #endif /* not INET6 */ 469 MySignalHandler(*volatile prevtrap) (SIGNAL_ARG) = NULL; 470 471 if (fmInitialized) { 472 /* FIXME: gettextize? */ 473 message(Sprintf("Opening socket...")->ptr, 0, 0); 474 refresh(); 475 } 476 if (SETJMP(AbortLoading) != 0) { 477 #ifdef SOCK_DEBUG 478 sock_log("openSocket() failed. reason: user abort\n"); 479 #endif 480 if (sock >= 0) 481 close(sock); 482 goto error; 483 } 484 TRAP_ON; 485 if (hostname == NULL) { 486 #ifdef SOCK_DEBUG 487 sock_log("openSocket() failed. reason: Bad hostname \"%s\"\n", 488 hostname); 489 #endif 490 goto error; 491 } 492 493 #ifdef INET6 494 /* rfc2732 compliance */ 495 hname = hostname; 496 if (hname != NULL && hname[0] == '[' && hname[strlen(hname) - 1] == ']') { 497 hname = allocStr(hostname + 1, -1); 498 hname[strlen(hname) - 1] = '\0'; 499 if (strspn(hname, "0123456789abcdefABCDEF:.") != strlen(hname)) 500 goto error; 501 } 502 for (af = ai_family_order_table[DNS_order];; af++) { 503 memset(&hints, 0, sizeof(hints)); 504 hints.ai_family = *af; 505 hints.ai_socktype = SOCK_STREAM; 506 if (remoteport_num != 0) { 507 Str portbuf = Sprintf("%d", remoteport_num); 508 error = getaddrinfo(hname, portbuf->ptr, &hints, &res0); 509 } 510 else { 511 error = -1; 512 } 513 if (error && remoteport_name && remoteport_name[0] != '\0') { 514 /* try default port */ 515 error = getaddrinfo(hname, remoteport_name, &hints, &res0); 516 } 517 if (error) { 518 if (*af == PF_UNSPEC) { 519 goto error; 520 } 521 /* try next ai family */ 522 continue; 523 } 524 sock = -1; 525 for (res = res0; res; res = res->ai_next) { 526 sock = socket(res->ai_family, res->ai_socktype, res->ai_protocol); 527 if (sock < 0) { 528 continue; 529 } 530 if (connect(sock, res->ai_addr, res->ai_addrlen) < 0) { 531 close(sock); 532 sock = -1; 533 continue; 534 } 535 break; 536 } 537 if (sock < 0) { 538 freeaddrinfo(res0); 539 if (*af == PF_UNSPEC) { 540 goto error; 541 } 542 /* try next ai family */ 543 continue; 544 } 545 freeaddrinfo(res0); 546 break; 547 } 548 #else /* not INET6 */ 549 s_port = htons(remoteport_num); 550 bzero((char *)&hostaddr, sizeof(struct sockaddr_in)); 551 if ((proto = getprotobyname("tcp")) == NULL) { 552 /* protocol number of TCP is 6 */ 553 proto = New(struct protoent); 554 proto->p_proto = 6; 555 } 556 if ((sock = socket(AF_INET, SOCK_STREAM, proto->p_proto)) < 0) { 557 #ifdef SOCK_DEBUG 558 sock_log("openSocket: socket() failed. reason: %s\n", strerror(errno)); 559 #endif 560 goto error; 561 } 562 regexCompile("^[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+$", 0); 563 if (regexMatch(hostname, -1, 1)) { 564 sscanf(hostname, "%d.%d.%d.%d", &a1, &a2, &a3, &a4); 565 adr = htonl((a1 << 24) | (a2 << 16) | (a3 << 8) | a4); 566 bcopy((void *)&adr, (void *)&hostaddr.sin_addr, sizeof(long)); 567 hostaddr.sin_family = AF_INET; 568 hostaddr.sin_port = s_port; 569 if (fmInitialized) { 570 message(Sprintf("Connecting to %s", hostname)->ptr, 0, 0); 571 refresh(); 572 } 573 if (connect(sock, (struct sockaddr *)&hostaddr, 574 sizeof(struct sockaddr_in)) < 0) { 575 #ifdef SOCK_DEBUG 576 sock_log("openSocket: connect() failed. reason: %s\n", 577 strerror(errno)); 578 #endif 579 goto error; 580 } 581 } 582 else { 583 char **h_addr_list; 584 int result = -1; 585 if (fmInitialized) { 586 message(Sprintf("Performing hostname lookup on %s", hostname)->ptr, 587 0, 0); 588 refresh(); 589 } 590 if ((entry = gethostbyname(hostname)) == NULL) { 591 #ifdef SOCK_DEBUG 592 sock_log("openSocket: gethostbyname() failed. reason: %s\n", 593 strerror(errno)); 594 #endif 595 goto error; 596 } 597 hostaddr.sin_family = AF_INET; 598 hostaddr.sin_port = s_port; 599 for (h_addr_list = entry->h_addr_list; *h_addr_list; h_addr_list++) { 600 bcopy((void *)h_addr_list[0], (void *)&hostaddr.sin_addr, 601 entry->h_length); 602 #ifdef SOCK_DEBUG 603 adr = ntohl(*(long *)&hostaddr.sin_addr); 604 sock_log("openSocket: connecting %d.%d.%d.%d\n", 605 (adr >> 24) & 0xff, 606 (adr >> 16) & 0xff, (adr >> 8) & 0xff, adr & 0xff); 607 #endif 608 if (fmInitialized) { 609 message(Sprintf("Connecting to %s", hostname)->ptr, 0, 0); 610 refresh(); 611 } 612 if ((result = connect(sock, (struct sockaddr *)&hostaddr, 613 sizeof(struct sockaddr_in))) == 0) { 614 break; 615 } 616 #ifdef SOCK_DEBUG 617 else { 618 sock_log("openSocket: connect() failed. reason: %s\n", 619 strerror(errno)); 620 } 621 #endif 622 } 623 if (result < 0) { 624 goto error; 625 } 626 } 627 #endif /* not INET6 */ 628 629 TRAP_OFF; 630 return sock; 631 error: 632 TRAP_OFF; 633 return -1; 634 635 } 636 637 638 #define COPYPATH_SPC_ALLOW 0 639 #define COPYPATH_SPC_IGNORE 1 640 #define COPYPATH_SPC_REPLACE 2 641 642 static char * 643 copyPath(char *orgpath, int length, int option) 644 { 645 Str tmp = Strnew(); 646 while (*orgpath && length != 0) { 647 if (IS_SPACE(*orgpath)) { 648 switch (option) { 649 case COPYPATH_SPC_ALLOW: 650 Strcat_char(tmp, *orgpath); 651 break; 652 case COPYPATH_SPC_IGNORE: 653 /* do nothing */ 654 break; 655 case COPYPATH_SPC_REPLACE: 656 Strcat_charp(tmp, "%20"); 657 break; 658 } 659 } 660 else 661 Strcat_char(tmp, *orgpath); 662 orgpath++; 663 length--; 664 } 665 return tmp->ptr; 666 } 667 668 void 669 parseURL(char *url, ParsedURL *p_url, ParsedURL *current) 670 { 671 char *p, *q; 672 Str tmp; 673 674 url = url_quote(url); /* quote 0x01-0x20, 0x7F-0xFF */ 675 676 p = url; 677 p_url->scheme = SCM_MISSING; 678 p_url->port = 0; 679 p_url->user = NULL; 680 p_url->pass = NULL; 681 p_url->host = NULL; 682 p_url->is_nocache = 0; 683 p_url->file = NULL; 684 p_url->real_file = NULL; 685 p_url->query = NULL; 686 p_url->label = NULL; 687 688 /* RFC1808: Relative Uniform Resource Locators 689 * 4. Resolving Relative URLs 690 */ 691 if (*url == '\0' || *url == '#') { 692 if (current) 693 copyParsedURL(p_url, current); 694 goto do_label; 695 } 696 #if defined( __EMX__ ) || defined( __CYGWIN__ ) 697 if (!strncmp(url, "file://localhost/", 17)) { 698 p_url->scheme = SCM_LOCAL; 699 p += 17 - 1; 700 url += 17 - 1; 701 } 702 #endif 703 #ifdef SUPPORT_DOS_DRIVE_PREFIX 704 if (IS_ALPHA(*p) && (p[1] == ':' || p[1] == '|')) { 705 p_url->scheme = SCM_LOCAL; 706 goto analyze_file; 707 } 708 #endif /* SUPPORT_DOS_DRIVE_PREFIX */ 709 /* search for scheme */ 710 p_url->scheme = getURLScheme(&p); 711 if (p_url->scheme == SCM_MISSING) { 712 /* scheme part is not found in the url. This means either 713 * (a) the url is relative to the current or (b) the url 714 * denotes a filename (therefore the scheme is SCM_LOCAL). 715 */ 716 if (current) { 717 switch (current->scheme) { 718 case SCM_LOCAL: 719 case SCM_LOCAL_CGI: 720 p_url->scheme = SCM_LOCAL; 721 break; 722 case SCM_FTP: 723 case SCM_FTPDIR: 724 p_url->scheme = SCM_FTP; 725 break; 726 #ifdef USE_NNTP 727 case SCM_NNTP: 728 case SCM_NNTP_GROUP: 729 p_url->scheme = SCM_NNTP; 730 break; 731 case SCM_NEWS: 732 case SCM_NEWS_GROUP: 733 p_url->scheme = SCM_NEWS; 734 break; 735 #endif 736 default: 737 p_url->scheme = current->scheme; 738 break; 739 } 740 } 741 else 742 p_url->scheme = SCM_LOCAL; 743 p = url; 744 if (!strncmp(p, "//", 2)) { 745 /* URL begins with // */ 746 /* it means that 'scheme:' is abbreviated */ 747 p += 2; 748 goto analyze_url; 749 } 750 /* the url doesn't begin with '//' */ 751 goto analyze_file; 752 } 753 /* scheme part has been found */ 754 if (p_url->scheme == SCM_UNKNOWN) { 755 p_url->file = allocStr(url, -1); 756 return; 757 } 758 /* get host and port */ 759 if (p[0] != '/' || p[1] != '/') { /* scheme:foo or scheme:/foo */ 760 p_url->host = NULL; 761 if (p_url->scheme != SCM_UNKNOWN) 762 p_url->port = DefaultPort[p_url->scheme]; 763 else 764 p_url->port = 0; 765 goto analyze_file; 766 } 767 /* after here, p begins with // */ 768 if (p_url->scheme == SCM_LOCAL) { /* file://foo */ 769 #ifdef __EMX__ 770 p += 2; 771 goto analyze_file; 772 #else 773 if (p[2] == '/' || p[2] == '~' 774 /* <A HREF="file:///foo">file:///foo</A> or <A HREF="file://~user">file://~user</A> */ 775 #ifdef SUPPORT_DOS_DRIVE_PREFIX 776 || (IS_ALPHA(p[2]) && (p[3] == ':' || p[3] == '|')) 777 /* <A HREF="file://DRIVE/foo">file://DRIVE/foo</A> */ 778 #endif /* SUPPORT_DOS_DRIVE_PREFIX */ 779 ) { 780 p += 2; 781 goto analyze_file; 782 } 783 #endif /* __EMX__ */ 784 } 785 p += 2; /* scheme://foo */ 786 /* ^p is here */ 787 analyze_url: 788 q = p; 789 #ifdef INET6 790 if (*q == '[') { /* rfc2732,rfc2373 compliance */ 791 p++; 792 while (IS_XDIGIT(*p) || *p == ':' || *p == '.') 793 p++; 794 if (*p != ']' || (*(p + 1) && strchr(":/?#", *(p + 1)) == NULL)) 795 p = q; 796 } 797 #endif 798 while (*p && strchr(":/@?#", *p) == NULL) 799 p++; 800 switch (*p) { 801 case ':': 802 /* scheme://user:pass@host or 803 * scheme://host:port 804 */ 805 p_url->host = copyPath(q, p - q, COPYPATH_SPC_IGNORE); 806 q = ++p; 807 while (*p && strchr("@/?#", *p) == NULL) 808 p++; 809 if (*p == '@') { 810 /* scheme://user:pass@... */ 811 p_url->pass = copyPath(q, p - q, COPYPATH_SPC_ALLOW); 812 q = ++p; 813 p_url->user = p_url->host; 814 p_url->host = NULL; 815 goto analyze_url; 816 } 817 /* scheme://host:port/ */ 818 tmp = Strnew_charp_n(q, p - q); 819 p_url->port = atoi(tmp->ptr); 820 /* *p is one of ['\0', '/', '?', '#'] */ 821 break; 822 case '@': 823 /* scheme://user@... */ 824 p_url->user = copyPath(q, p - q, COPYPATH_SPC_IGNORE); 825 q = ++p; 826 goto analyze_url; 827 case '\0': 828 /* scheme://host */ 829 case '/': 830 case '?': 831 case '#': 832 p_url->host = copyPath(q, p - q, COPYPATH_SPC_IGNORE); 833 p_url->port = DefaultPort[p_url->scheme]; 834 break; 835 } 836 analyze_file: 837 #ifndef SUPPORT_NETBIOS_SHARE 838 if (p_url->scheme == SCM_LOCAL && p_url->user == NULL && 839 p_url->host != NULL && *p_url->host != '\0' && 840 strcmp(p_url->host, "localhost")) { 841 /* 842 * In the environments other than CYGWIN, a URL like 843 * file://host/file is regarded as ftp://host/file. 844 * On the other hand, file://host/file on CYGWIN is 845 * regarded as local access to the file //host/file. 846 * `host' is a netbios-hostname, drive, or any other 847 * name; It is CYGWIN system call who interprets that. 848 */ 849 850 p_url->scheme = SCM_FTP; /* ftp://host/... */ 851 if (p_url->port == 0) 852 p_url->port = DefaultPort[SCM_FTP]; 853 } 854 #endif 855 if ((*p == '\0' || *p == '#' || *p == '?') && p_url->host == NULL) { 856 p_url->file = ""; 857 goto do_query; 858 } 859 #ifdef SUPPORT_DOS_DRIVE_PREFIX 860 if (p_url->scheme == SCM_LOCAL) { 861 q = p; 862 if (*q == '/') 863 q++; 864 if (IS_ALPHA(q[0]) && (q[1] == ':' || q[1] == '|')) { 865 if (q[1] == '|') { 866 p = allocStr(q, -1); 867 p[1] = ':'; 868 } 869 else 870 p = q; 871 } 872 } 873 #endif 874 875 q = p; 876 #ifdef USE_GOPHER 877 if (p_url->scheme == SCM_GOPHER) { 878 if (*q == '/') 879 q++; 880 if (*q && q[0] != '/' && q[1] != '/' && q[2] == '/') 881 q++; 882 } 883 #endif /* USE_GOPHER */ 884 if (*p == '/') 885 p++; 886 if (*p == '\0' || *p == '#' || *p == '?') { /* scheme://host[:port]/ */ 887 p_url->file = DefaultFile(p_url->scheme); 888 goto do_query; 889 } 890 #ifdef USE_GOPHER 891 if (p_url->scheme == SCM_GOPHER && *p == 'R') { 892 p++; 893 tmp = Strnew(); 894 Strcat_char(tmp, *(p++)); 895 while (*p && *p != '/') 896 p++; 897 Strcat_charp(tmp, p); 898 while (*p) 899 p++; 900 p_url->file = copyPath(tmp->ptr, -1, COPYPATH_SPC_IGNORE); 901 } 902 else 903 #endif /* USE_GOPHER */ 904 { 905 char *cgi = strchr(p, '?'); 906 again: 907 while (*p && *p != '#' && p != cgi) 908 p++; 909 if (*p == '#' && p_url->scheme == SCM_LOCAL) { 910 /* 911 * According to RFC2396, # means the beginning of 912 * URI-reference, and # should be escaped. But, 913 * if the scheme is SCM_LOCAL, the special 914 * treatment will apply to # for convinience. 915 */ 916 if (p > q && *(p - 1) == '/' && (cgi == NULL || p < cgi)) { 917 /* 918 * # comes as the first character of the file name 919 * that means, # is not a label but a part of the file 920 * name. 921 */ 922 p++; 923 goto again; 924 } 925 else if (*(p + 1) == '\0') { 926 /* 927 * # comes as the last character of the file name that 928 * means, # is not a label but a part of the file 929 * name. 930 */ 931 p++; 932 } 933 } 934 if (p_url->scheme == SCM_LOCAL || p_url->scheme == SCM_MISSING) 935 p_url->file = copyPath(q, p - q, COPYPATH_SPC_ALLOW); 936 else 937 p_url->file = copyPath(q, p - q, COPYPATH_SPC_IGNORE); 938 } 939 940 do_query: 941 if (*p == '?') { 942 q = ++p; 943 while (*p && *p != '#') 944 p++; 945 p_url->query = copyPath(q, p - q, COPYPATH_SPC_ALLOW); 946 } 947 do_label: 948 if (p_url->scheme == SCM_MISSING) { 949 p_url->scheme = SCM_LOCAL; 950 p_url->file = allocStr(p, -1); 951 p_url->label = NULL; 952 } 953 else if (*p == '#') 954 p_url->label = allocStr(p + 1, -1); 955 else 956 p_url->label = NULL; 957 } 958 959 #define initParsedURL(p) bzero(p,sizeof(ParsedURL)) 960 #define ALLOC_STR(s) ((s)==NULL?NULL:allocStr(s,-1)) 961 962 void 963 copyParsedURL(ParsedURL *p, ParsedURL *q) 964 { 965 p->scheme = q->scheme; 966 p->port = q->port; 967 p->is_nocache = q->is_nocache; 968 p->user = ALLOC_STR(q->user); 969 p->pass = ALLOC_STR(q->pass); 970 p->host = ALLOC_STR(q->host); 971 p->file = ALLOC_STR(q->file); 972 p->real_file = ALLOC_STR(q->real_file); 973 p->label = ALLOC_STR(q->label); 974 p->query = ALLOC_STR(q->query); 975 } 976 977 void 978 parseURL2(char *url, ParsedURL *pu, ParsedURL *current) 979 { 980 char *p; 981 Str tmp; 982 int relative_uri = FALSE; 983 984 parseURL(url, pu, current); 985 #ifndef USE_W3MMAILER 986 if (pu->scheme == SCM_MAILTO) 987 return; 988 #endif 989 if (pu->scheme == SCM_DATA) 990 return; 991 if (pu->scheme == SCM_NEWS || pu->scheme == SCM_NEWS_GROUP) { 992 if (pu->file && !strchr(pu->file, '@') && 993 (!(p = strchr(pu->file, '/')) || strchr(p + 1, '-') || 994 *(p + 1) == '\0')) 995 pu->scheme = SCM_NEWS_GROUP; 996 else 997 pu->scheme = SCM_NEWS; 998 return; 999 } 1000 if (pu->scheme == SCM_NNTP || pu->scheme == SCM_NNTP_GROUP) { 1001 if (pu->file && *pu->file == '/') 1002 pu->file = allocStr(pu->file + 1, -1); 1003 if (pu->file && !strchr(pu->file, '@') && 1004 (!(p = strchr(pu->file, '/')) || strchr(p + 1, '-') || 1005 *(p + 1) == '\0')) 1006 pu->scheme = SCM_NNTP_GROUP; 1007 else 1008 pu->scheme = SCM_NNTP; 1009 if (current && (current->scheme == SCM_NNTP || 1010 current->scheme == SCM_NNTP_GROUP)) { 1011 if (pu->host == NULL) { 1012 pu->host = current->host; 1013 pu->port = current->port; 1014 } 1015 } 1016 return; 1017 } 1018 if (pu->scheme == SCM_LOCAL) { 1019 char *q = expandName(file_unquote(pu->file)); 1020 #ifdef SUPPORT_DOS_DRIVE_PREFIX 1021 Str drive; 1022 if (IS_ALPHA(q[0]) && q[1] == ':') { 1023 drive = Strnew_charp_n(q, 2); 1024 Strcat_charp(drive, file_quote(q+2)); 1025 pu->file = drive->ptr; 1026 } 1027 else 1028 #endif 1029 pu->file = file_quote(q); 1030 } 1031 1032 if (current && (pu->scheme == current->scheme || 1033 (pu->scheme == SCM_FTP && current->scheme == SCM_FTPDIR) || 1034 (pu->scheme == SCM_LOCAL && 1035 current->scheme == SCM_LOCAL_CGI)) 1036 && pu->host == NULL) { 1037 /* Copy omitted element from the current URL */ 1038 pu->user = current->user; 1039 pu->pass = current->pass; 1040 pu->host = current->host; 1041 pu->port = current->port; 1042 if (pu->file && *pu->file) { 1043 #ifdef USE_EXTERNAL_URI_LOADER 1044 if (pu->scheme == SCM_UNKNOWN 1045 && strchr(pu->file, ':') == NULL 1046 && current && (p = strchr(current->file, ':')) != NULL) { 1047 pu->file = Sprintf("%s:%s", 1048 allocStr(current->file, 1049 p - current->file), pu->file)->ptr; 1050 } 1051 else 1052 #endif 1053 if ( 1054 #ifdef USE_GOPHER 1055 pu->scheme != SCM_GOPHER && 1056 #endif /* USE_GOPHER */ 1057 pu->file[0] != '/' 1058 #ifdef SUPPORT_DOS_DRIVE_PREFIX 1059 && !(pu->scheme == SCM_LOCAL && IS_ALPHA(pu->file[0]) 1060 && pu->file[1] == ':') 1061 #endif 1062 ) { 1063 /* file is relative [process 1] */ 1064 p = pu->file; 1065 if (current->file) { 1066 tmp = Strnew_charp(current->file); 1067 while (tmp->length > 0) { 1068 if (Strlastchar(tmp) == '/') 1069 break; 1070 Strshrink(tmp, 1); 1071 } 1072 Strcat_charp(tmp, p); 1073 pu->file = tmp->ptr; 1074 relative_uri = TRUE; 1075 } 1076 } 1077 #ifdef USE_GOPHER 1078 else if (pu->scheme == SCM_GOPHER && pu->file[0] == '/') { 1079 p = pu->file; 1080 pu->file = allocStr(p + 1, -1); 1081 } 1082 #endif /* USE_GOPHER */ 1083 } 1084 else { /* scheme:[?query][#label] */ 1085 pu->file = current->file; 1086 if (!pu->query) 1087 pu->query = current->query; 1088 } 1089 /* comment: query part need not to be completed 1090 * from the current URL. */ 1091 } 1092 if (pu->file) { 1093 #ifdef __EMX__ 1094 if (pu->scheme == SCM_LOCAL) { 1095 if (strncmp(pu->file, "/$LIB/", 6)) { 1096 char abs[_MAX_PATH]; 1097 1098 _abspath(abs, file_unquote(pu->file), _MAX_PATH); 1099 pu->file = file_quote(cleanupName(abs)); 1100 } 1101 } 1102 #else 1103 if (pu->scheme == SCM_LOCAL && pu->file[0] != '/' && 1104 #ifdef SUPPORT_DOS_DRIVE_PREFIX /* for 'drive:' */ 1105 !(IS_ALPHA(pu->file[0]) && pu->file[1] == ':') && 1106 #endif 1107 strcmp(pu->file, "-")) { 1108 /* local file, relative path */ 1109 tmp = Strnew_charp(CurrentDir); 1110 if (Strlastchar(tmp) != '/') 1111 Strcat_char(tmp, '/'); 1112 Strcat_charp(tmp, file_unquote(pu->file)); 1113 pu->file = file_quote(cleanupName(tmp->ptr)); 1114 } 1115 #endif 1116 else if (pu->scheme == SCM_HTTP 1117 #ifdef USE_SSL 1118 || pu->scheme == SCM_HTTPS 1119 #endif 1120 ) { 1121 if (relative_uri) { 1122 /* In this case, pu->file is created by [process 1] above. 1123 * pu->file may contain relative path (for example, 1124 * "/foo/../bar/./baz.html"), cleanupName() must be applied. 1125 * When the entire abs_path is given, it still may contain 1126 * elements like `//', `..' or `.' in the pu->file. It is 1127 * server's responsibility to canonicalize such path. 1128 */ 1129 pu->file = cleanupName(pu->file); 1130 } 1131 } 1132 else if ( 1133 #ifdef USE_GOPHER 1134 pu->scheme != SCM_GOPHER && 1135 #endif /* USE_GOPHER */ 1136 pu->file[0] == '/') { 1137 /* 1138 * this happens on the following conditions: 1139 * (1) ftp scheme (2) local, looks like absolute path. 1140 * In both case, there must be no side effect with 1141 * cleanupName(). (I hope so...) 1142 */ 1143 pu->file = cleanupName(pu->file); 1144 } 1145 if (pu->scheme == SCM_LOCAL) { 1146 #ifdef SUPPORT_NETBIOS_SHARE 1147 if (pu->host && strcmp(pu->host, "localhost") != 0) { 1148 Str tmp = Strnew_charp("//"); 1149 Strcat_m_charp(tmp, pu->host, 1150 cleanupName(file_unquote(pu->file)), NULL); 1151 pu->real_file = tmp->ptr; 1152 } 1153 else 1154 #endif 1155 pu->real_file = cleanupName(file_unquote(pu->file)); 1156 } 1157 } 1158 } 1159 1160 static Str 1161 _parsedURL2Str(ParsedURL *pu, int pass) 1162 { 1163 Str tmp; 1164 static char *scheme_str[] = { 1165 "http", "gopher", "ftp", "ftp", "file", "file", "exec", "nntp", "nntp", 1166 "news", "news", "data", "mailto", 1167 #ifdef USE_SSL 1168 "https", 1169 #endif /* USE_SSL */ 1170 }; 1171 1172 if (pu->scheme == SCM_MISSING) { 1173 return Strnew_charp("???"); 1174 } 1175 else if (pu->scheme == SCM_UNKNOWN) { 1176 return Strnew_charp(pu->file); 1177 } 1178 if (pu->host == NULL && pu->file == NULL && pu->label != NULL) { 1179 /* local label */ 1180 return Sprintf("#%s", pu->label); 1181 } 1182 if (pu->scheme == SCM_LOCAL && !strcmp(pu->file, "-")) { 1183 tmp = Strnew_charp("-"); 1184 if (pu->label) { 1185 Strcat_char(tmp, '#'); 1186 Strcat_charp(tmp, pu->label); 1187 } 1188 return tmp; 1189 } 1190 tmp = Strnew_charp(scheme_str[pu->scheme]); 1191 Strcat_char(tmp, ':'); 1192 #ifndef USE_W3MMAILER 1193 if (pu->scheme == SCM_MAILTO) { 1194 Strcat_charp(tmp, pu->file); 1195 if (pu->query) { 1196 Strcat_char(tmp, '?'); 1197 Strcat_charp(tmp, pu->query); 1198 } 1199 return tmp; 1200 } 1201 #endif 1202 if (pu->scheme == SCM_DATA) { 1203 Strcat_charp(tmp, pu->file); 1204 return tmp; 1205 } 1206 #ifdef USE_NNTP 1207 if (pu->scheme != SCM_NEWS && pu->scheme != SCM_NEWS_GROUP) 1208 #endif /* USE_NNTP */ 1209 { 1210 Strcat_charp(tmp, "//"); 1211 } 1212 if (pu->user) { 1213 Strcat_charp(tmp, pu->user); 1214 if (pass && pu->pass) { 1215 Strcat_char(tmp, ':'); 1216 Strcat_charp(tmp, pu->pass); 1217 } 1218 Strcat_char(tmp, '@'); 1219 } 1220 if (pu->host) { 1221 Strcat_charp(tmp, pu->host); 1222 if (pu->port != DefaultPort[pu->scheme]) { 1223 Strcat_char(tmp, ':'); 1224 Strcat(tmp, Sprintf("%d", pu->port)); 1225 } 1226 } 1227 if ( 1228 #ifdef USE_NNTP 1229 pu->scheme != SCM_NEWS && pu->scheme != SCM_NEWS_GROUP && 1230 #endif /* USE_NNTP */ 1231 (pu->file == NULL || (pu->file[0] != '/' 1232 #ifdef SUPPORT_DOS_DRIVE_PREFIX 1233 && !(IS_ALPHA(pu->file[0]) 1234 && pu->file[1] == ':' 1235 && pu->host == NULL) 1236 #endif 1237 ))) 1238 Strcat_char(tmp, '/'); 1239 Strcat_charp(tmp, pu->file); 1240 if (pu->scheme == SCM_FTPDIR && Strlastchar(tmp) != '/') 1241 Strcat_char(tmp, '/'); 1242 if (pu->query) { 1243 Strcat_char(tmp, '?'); 1244 Strcat_charp(tmp, pu->query); 1245 } 1246 if (pu->label) { 1247 Strcat_char(tmp, '#'); 1248 Strcat_charp(tmp, pu->label); 1249 } 1250 return tmp; 1251 } 1252 1253 Str 1254 parsedURL2Str(ParsedURL *pu) 1255 { 1256 return _parsedURL2Str(pu, FALSE); 1257 } 1258 1259 int 1260 getURLScheme(char **url) 1261 { 1262 char *p = *url, *q; 1263 int i; 1264 int scheme = SCM_MISSING; 1265 1266 while (*p && (IS_ALNUM(*p) || *p == '.' || *p == '+' || *p == '-')) 1267 p++; 1268 if (*p == ':') { /* scheme found */ 1269 scheme = SCM_UNKNOWN; 1270 for (i = 0; (q = schemetable[i].cmdname) != NULL; i++) { 1271 int len = strlen(q); 1272 if (!strncasecmp(q, *url, len) && (*url)[len] == ':') { 1273 scheme = schemetable[i].cmd; 1274 *url = p + 1; 1275 break; 1276 } 1277 } 1278 } 1279 return scheme; 1280 } 1281 1282 static char * 1283 otherinfo(ParsedURL *target, ParsedURL *current, char *referer) 1284 { 1285 Str s = Strnew(); 1286 1287 Strcat_charp(s, "User-Agent: "); 1288 if (UserAgent == NULL || *UserAgent == '\0') 1289 Strcat_charp(s, w3m_version); 1290 else 1291 Strcat_charp(s, UserAgent); 1292 Strcat_charp(s, "\r\n"); 1293 1294 Strcat_m_charp(s, "Accept: ", AcceptMedia, "\r\n", NULL); 1295 Strcat_m_charp(s, "Accept-Encoding: ", AcceptEncoding, "\r\n", NULL); 1296 Strcat_m_charp(s, "Accept-Language: ", AcceptLang, "\r\n", NULL); 1297 1298 if (target->host) { 1299 Strcat_charp(s, "Host: "); 1300 Strcat_charp(s, target->host); 1301 if (target->port != DefaultPort[target->scheme]) 1302 Strcat(s, Sprintf(":%d", target->port)); 1303 Strcat_charp(s, "\r\n"); 1304 } 1305 if (target->is_nocache || NoCache) { 1306 Strcat_charp(s, "Pragma: no-cache\r\n"); 1307 Strcat_charp(s, "Cache-control: no-cache\r\n"); 1308 } 1309 if (!NoSendReferer) { 1310 #ifdef USE_SSL 1311 if (current && current->scheme == SCM_HTTPS && target->scheme != SCM_HTTPS) { 1312 /* Don't send Referer: if https:// -> http:// */ 1313 } 1314 else 1315 #endif 1316 if (referer == NULL && current && current->scheme != SCM_LOCAL && 1317 (current->scheme != SCM_FTP || 1318 (current->user == NULL && current->pass == NULL))) { 1319 char *p = current->label; 1320 Strcat_charp(s, "Referer: "); 1321 current->label = NULL; 1322 Strcat(s, parsedURL2Str(current)); 1323 current->label = p; 1324 Strcat_charp(s, "\r\n"); 1325 } 1326 else if (referer != NULL && referer != NO_REFERER) { 1327 char *p = strchr(referer, '#'); 1328 Strcat_charp(s, "Referer: "); 1329 if (p) 1330 Strcat_charp_n(s, referer, p - referer); 1331 else 1332 Strcat_charp(s, referer); 1333 Strcat_charp(s, "\r\n"); 1334 } 1335 } 1336 return s->ptr; 1337 } 1338 1339 Str 1340 HTTPrequestMethod(HRequest *hr) 1341 { 1342 switch (hr->command) { 1343 case HR_COMMAND_CONNECT: 1344 return Strnew_charp("CONNECT"); 1345 case HR_COMMAND_POST: 1346 return Strnew_charp("POST"); 1347 break; 1348 case HR_COMMAND_HEAD: 1349 return Strnew_charp("HEAD"); 1350 break; 1351 case HR_COMMAND_GET: 1352 default: 1353 return Strnew_charp("GET"); 1354 } 1355 return NULL; 1356 } 1357 1358 Str 1359 HTTPrequestURI(ParsedURL *pu, HRequest *hr) 1360 { 1361 Str tmp = Strnew(); 1362 if (hr->command == HR_COMMAND_CONNECT) { 1363 Strcat_charp(tmp, pu->host); 1364 Strcat(tmp, Sprintf(":%d", pu->port)); 1365 } 1366 else if (hr->flag & HR_FLAG_LOCAL) { 1367 Strcat_charp(tmp, pu->file); 1368 if (pu->query) { 1369 Strcat_char(tmp, '?'); 1370 Strcat_charp(tmp, pu->query); 1371 } 1372 } 1373 else { 1374 char *save_label = pu->label; 1375 pu->label = NULL; 1376 Strcat(tmp, _parsedURL2Str(pu, TRUE)); 1377 pu->label = save_label; 1378 } 1379 return tmp; 1380 } 1381 1382 static Str 1383 HTTPrequest(ParsedURL *pu, ParsedURL *current, HRequest *hr, TextList *extra) 1384 { 1385 Str tmp; 1386 TextListItem *i; 1387 int seen_www_auth = 0; 1388 #ifdef USE_COOKIE 1389 Str cookie; 1390 #endif /* USE_COOKIE */ 1391 tmp = HTTPrequestMethod(hr); 1392 Strcat_charp(tmp, " "); 1393 Strcat_charp(tmp, HTTPrequestURI(pu, hr)->ptr); 1394 Strcat_charp(tmp, " HTTP/1.0\r\n"); 1395 if (hr->referer == NO_REFERER) 1396 Strcat_charp(tmp, otherinfo(pu, NULL, NULL)); 1397 else 1398 Strcat_charp(tmp, otherinfo(pu, current, hr->referer)); 1399 if (extra != NULL) 1400 for (i = extra->first; i != NULL; i = i->next) { 1401 if (strncasecmp(i->ptr, "Authorization:", 1402 sizeof("Authorization:") - 1) == 0) { 1403 seen_www_auth = 1; 1404 #ifdef USE_SSL 1405 if (hr->command == HR_COMMAND_CONNECT) 1406 continue; 1407 #endif 1408 } 1409 if (strncasecmp(i->ptr, "Proxy-Authorization:", 1410 sizeof("Proxy-Authorization:") - 1) == 0) { 1411 #ifdef USE_SSL 1412 if (pu->scheme == SCM_HTTPS 1413 && hr->command != HR_COMMAND_CONNECT) 1414 continue; 1415 #endif 1416 } 1417 Strcat_charp(tmp, i->ptr); 1418 } 1419 1420 #ifdef USE_COOKIE 1421 if (hr->command != HR_COMMAND_CONNECT && 1422 use_cookie && (cookie = find_cookie(pu))) { 1423 Strcat_charp(tmp, "Cookie: "); 1424 Strcat(tmp, cookie); 1425 Strcat_charp(tmp, "\r\n"); 1426 /* [DRAFT 12] s. 10.1 */ 1427 if (cookie->ptr[0] != '$') 1428 Strcat_charp(tmp, "Cookie2: $Version=\"1\"\r\n"); 1429 } 1430 #endif /* USE_COOKIE */ 1431 if (hr->command == HR_COMMAND_POST) { 1432 if (hr->request->enctype == FORM_ENCTYPE_MULTIPART) { 1433 Strcat_charp(tmp, "Content-type: multipart/form-data; boundary="); 1434 Strcat_charp(tmp, hr->request->boundary); 1435 Strcat_charp(tmp, "\r\n"); 1436 Strcat(tmp, 1437 Sprintf("Content-length: %ld\r\n", hr->request->length)); 1438 Strcat_charp(tmp, "\r\n"); 1439 } 1440 else { 1441 if (!override_content_type) { 1442 Strcat_charp(tmp, 1443 "Content-type: application/x-www-form-urlencoded\r\n"); 1444 } 1445 Strcat(tmp, 1446 Sprintf("Content-length: %ld\r\n", hr->request->length)); 1447 if (header_string) 1448 Strcat(tmp, header_string); 1449 Strcat_charp(tmp, "\r\n"); 1450 Strcat_charp_n(tmp, hr->request->body, hr->request->length); 1451 Strcat_charp(tmp, "\r\n"); 1452 } 1453 } 1454 else { 1455 if (header_string) 1456 Strcat(tmp, header_string); 1457 Strcat_charp(tmp, "\r\n"); 1458 } 1459 #ifdef DEBUG 1460 fprintf(stderr, "HTTPrequest: [ %s ]\n\n", tmp->ptr); 1461 #endif /* DEBUG */ 1462 return tmp; 1463 } 1464 1465 void 1466 init_stream(URLFile *uf, int scheme, InputStream stream) 1467 { 1468 memset(uf, 0, sizeof(URLFile)); 1469 uf->stream = stream; 1470 uf->scheme = scheme; 1471 uf->encoding = ENC_7BIT; 1472 uf->is_cgi = FALSE; 1473 uf->compression = CMP_NOCOMPRESS; 1474 uf->content_encoding = CMP_NOCOMPRESS; 1475 uf->guess_type = NULL; 1476 uf->ext = NULL; 1477 uf->modtime = -1; 1478 } 1479 1480 URLFile 1481 openURL(char *url, ParsedURL *pu, ParsedURL *current, 1482 URLOption *option, FormList *request, TextList *extra_header, 1483 URLFile *ouf, HRequest *hr, unsigned char *status) 1484 { 1485 Str tmp; 1486 int sock, scheme; 1487 char *p, *q, *u; 1488 URLFile uf; 1489 HRequest hr0; 1490 #ifdef USE_SSL 1491 SSL *sslh = NULL; 1492 #endif /* USE_SSL */ 1493 1494 if (hr == NULL) 1495 hr = &hr0; 1496 1497 if (ouf) { 1498 uf = *ouf; 1499 } 1500 else { 1501 init_stream(&uf, SCM_MISSING, NULL); 1502 } 1503 1504 u = url; 1505 scheme = getURLScheme(&u); 1506 if (current == NULL && scheme == SCM_MISSING && !ArgvIsURL) 1507 u = file_to_url(url); /* force to local file */ 1508 else 1509 u = url; 1510 retry: 1511 parseURL2(u, pu, current); 1512 if (pu->scheme == SCM_LOCAL && pu->file == NULL) { 1513 if (pu->label != NULL) { 1514 /* #hogege is not a label but a filename */ 1515 Str tmp2 = Strnew_charp("#"); 1516 Strcat_charp(tmp2, pu->label); 1517 pu->file = tmp2->ptr; 1518 pu->real_file = cleanupName(file_unquote(pu->file)); 1519 pu->label = NULL; 1520 } 1521 else { 1522 /* given URL must be null string */ 1523 #ifdef SOCK_DEBUG 1524 sock_log("given URL must be null string\n"); 1525 #endif 1526 return uf; 1527 } 1528 } 1529 1530 uf.scheme = pu->scheme; 1531 uf.url = parsedURL2Str(pu)->ptr; 1532 pu->is_nocache = (option->flag & RG_NOCACHE); 1533 uf.ext = filename_extension(pu->file, 1); 1534 1535 hr->command = HR_COMMAND_GET; 1536 hr->flag = 0; 1537 hr->referer = option->referer; 1538 hr->request = request; 1539 1540 switch (pu->scheme) { 1541 case SCM_LOCAL: 1542 case SCM_LOCAL_CGI: 1543 if (request && request->body) 1544 /* local CGI: POST */ 1545 uf.stream = newFileStream(localcgi_post(pu->real_file, pu->query, 1546 request, option->referer), 1547 (void (*)())fclose); 1548 else 1549 /* lodal CGI: GET */ 1550 uf.stream = newFileStream(localcgi_get(pu->real_file, pu->query, 1551 option->referer), 1552 (void (*)())fclose); 1553 if (uf.stream) { 1554 uf.is_cgi = TRUE; 1555 uf.scheme = pu->scheme = SCM_LOCAL_CGI; 1556 return uf; 1557 } 1558 examineFile(pu->real_file, &uf); 1559 if (uf.stream == NULL) { 1560 if (dir_exist(pu->real_file)) { 1561 add_index_file(pu, &uf); 1562 if (uf.stream == NULL) 1563 return uf; 1564 } 1565 else if (document_root != NULL) { 1566 tmp = Strnew_charp(document_root); 1567 if (Strlastchar(tmp) != '/' && pu->file[0] != '/') 1568 Strcat_char(tmp, '/'); 1569 Strcat_charp(tmp, pu->file); 1570 p = cleanupName(tmp->ptr); 1571 q = cleanupName(file_unquote(p)); 1572 if (dir_exist(q)) { 1573 pu->file = p; 1574 pu->real_file = q; 1575 add_index_file(pu, &uf); 1576 if (uf.stream == NULL) { 1577 return uf; 1578 } 1579 } 1580 else { 1581 examineFile(q, &uf); 1582 if (uf.stream) { 1583 pu->file = p; 1584 pu->real_file = q; 1585 } 1586 } 1587 } 1588 } 1589 if (uf.stream == NULL && retryAsHttp && url[0] != '/') { 1590 if (scheme == SCM_MISSING || scheme == SCM_UNKNOWN) { 1591 /* retry it as "http://" */ 1592 u = Strnew_m_charp("http://", url, NULL)->ptr; 1593 goto retry; 1594 } 1595 } 1596 return uf; 1597 case SCM_FTP: 1598 case SCM_FTPDIR: 1599 if (pu->file == NULL) 1600 pu->file = allocStr("/", -1); 1601 if (non_null(FTP_proxy) && 1602 !Do_not_use_proxy && 1603 pu->host != NULL && !check_no_proxy(pu->host)) { 1604 hr->flag |= HR_FLAG_PROXY; 1605 sock = openSocket(FTP_proxy_parsed.host, 1606 schemetable[FTP_proxy_parsed.scheme].cmdname, 1607 FTP_proxy_parsed.port); 1608 if (sock < 0) 1609 return uf; 1610 uf.scheme = SCM_HTTP; 1611 tmp = HTTPrequest(pu, current, hr, extra_header); 1612 write(sock, tmp->ptr, tmp->length); 1613 } 1614 else { 1615 uf.stream = openFTPStream(pu, &uf); 1616 uf.scheme = pu->scheme; 1617 return uf; 1618 } 1619 break; 1620 case SCM_HTTP: 1621 #ifdef USE_SSL 1622 case SCM_HTTPS: 1623 #endif /* USE_SSL */ 1624 if (pu->file == NULL) 1625 pu->file = allocStr("/", -1); 1626 if (request && request->method == FORM_METHOD_POST && request->body) 1627 hr->command = HR_COMMAND_POST; 1628 if (request && request->method == FORM_METHOD_HEAD) 1629 hr->command = HR_COMMAND_HEAD; 1630 if (( 1631 #ifdef USE_SSL 1632 (pu->scheme == SCM_HTTPS) ? non_null(HTTPS_proxy) : 1633 #endif /* USE_SSL */ 1634 non_null(HTTP_proxy)) && !Do_not_use_proxy && 1635 pu->host != NULL && !check_no_proxy(pu->host)) { 1636 hr->flag |= HR_FLAG_PROXY; 1637 #ifdef USE_SSL 1638 if (pu->scheme == SCM_HTTPS && *status == HTST_CONNECT) { 1639 sock = ssl_socket_of(ouf->stream); 1640 if (!(sslh = openSSLHandle(sock, pu->host, 1641 &uf.ssl_certificate))) { 1642 *status = HTST_MISSING; 1643 return uf; 1644 } 1645 } 1646 else if (pu->scheme == SCM_HTTPS) { 1647 sock = openSocket(HTTPS_proxy_parsed.host, 1648 schemetable[HTTPS_proxy_parsed.scheme]. 1649 cmdname, HTTPS_proxy_parsed.port); 1650 sslh = NULL; 1651 } 1652 else { 1653 #endif /* USE_SSL */ 1654 sock = openSocket(HTTP_proxy_parsed.host, 1655 schemetable[HTTP_proxy_parsed.scheme]. 1656 cmdname, HTTP_proxy_parsed.port); 1657 #ifdef USE_SSL 1658 sslh = NULL; 1659 } 1660 #endif /* USE_SSL */ 1661 if (sock < 0) { 1662 #ifdef SOCK_DEBUG 1663 sock_log("Can't open socket\n"); 1664 #endif 1665 return uf; 1666 } 1667 #ifdef USE_SSL 1668 if (pu->scheme == SCM_HTTPS) { 1669 if (*status == HTST_NORMAL) { 1670 hr->command = HR_COMMAND_CONNECT; 1671 tmp = HTTPrequest(pu, current, hr, extra_header); 1672 *status = HTST_CONNECT; 1673 } 1674 else { 1675 hr->flag |= HR_FLAG_LOCAL; 1676 tmp = HTTPrequest(pu, current, hr, extra_header); 1677 *status = HTST_NORMAL; 1678 } 1679 } 1680 else 1681 #endif /* USE_SSL */ 1682 { 1683 tmp = HTTPrequest(pu, current, hr, extra_header); 1684 *status = HTST_NORMAL; 1685 } 1686 } 1687 else { 1688 sock = openSocket(pu->host, 1689 schemetable[pu->scheme].cmdname, pu->port); 1690 if (sock < 0) { 1691 *status = HTST_MISSING; 1692 return uf; 1693 } 1694 #ifdef USE_SSL 1695 if (pu->scheme == SCM_HTTPS) { 1696 if (!(sslh = openSSLHandle(sock, pu->host, 1697 &uf.ssl_certificate))) { 1698 *status = HTST_MISSING; 1699 return uf; 1700 } 1701 } 1702 #endif /* USE_SSL */ 1703 hr->flag |= HR_FLAG_LOCAL; 1704 tmp = HTTPrequest(pu, current, hr, extra_header); 1705 *status = HTST_NORMAL; 1706 } 1707 #ifdef USE_SSL 1708 if (pu->scheme == SCM_HTTPS) { 1709 uf.stream = newSSLStream(sslh, sock); 1710 if (sslh) 1711 SSL_write(sslh, tmp->ptr, tmp->length); 1712 else 1713 write(sock, tmp->ptr, tmp->length); 1714 if(w3m_reqlog){ 1715 FILE *ff = fopen(w3m_reqlog, "a"); 1716 if (sslh) 1717 fputs("HTTPS: request via SSL\n", ff); 1718 else 1719 fputs("HTTPS: request without SSL\n", ff); 1720 fwrite(tmp->ptr, sizeof(char), tmp->length, ff); 1721 fclose(ff); 1722 } 1723 if (hr->command == HR_COMMAND_POST && 1724 request->enctype == FORM_ENCTYPE_MULTIPART) { 1725 if (sslh) 1726 SSL_write_from_file(sslh, request->body); 1727 else 1728 write_from_file(sock, request->body); 1729 } 1730 return uf; 1731 } 1732 else 1733 #endif /* USE_SSL */ 1734 { 1735 write(sock, tmp->ptr, tmp->length); 1736 if(w3m_reqlog){ 1737 FILE *ff = fopen(w3m_reqlog, "a"); 1738 fwrite(tmp->ptr, sizeof(char), tmp->length, ff); 1739 fclose(ff); 1740 } 1741 if (hr->command == HR_COMMAND_POST && 1742 request->enctype == FORM_ENCTYPE_MULTIPART) 1743 write_from_file(sock, request->body); 1744 } 1745 break; 1746 #ifdef USE_GOPHER 1747 case SCM_GOPHER: 1748 if (non_null(GOPHER_proxy) && 1749 !Do_not_use_proxy && 1750 pu->host != NULL && !check_no_proxy(pu->host)) { 1751 hr->flag |= HR_FLAG_PROXY; 1752 sock = openSocket(GOPHER_proxy_parsed.host, 1753 schemetable[GOPHER_proxy_parsed.scheme].cmdname, 1754 GOPHER_proxy_parsed.port); 1755 if (sock < 0) 1756 return uf; 1757 uf.scheme = SCM_HTTP; 1758 tmp = HTTPrequest(pu, current, hr, extra_header); 1759 } 1760 else { 1761 sock = openSocket(pu->host, 1762 schemetable[pu->scheme].cmdname, pu->port); 1763 if (sock < 0) 1764 return uf; 1765 if (pu->file == NULL) 1766 pu->file = "1"; 1767 tmp = Strnew_charp(file_unquote(pu->file)); 1768 Strcat_char(tmp, '\n'); 1769 } 1770 write(sock, tmp->ptr, tmp->length); 1771 break; 1772 #endif /* USE_GOPHER */ 1773 #ifdef USE_NNTP 1774 case SCM_NNTP: 1775 case SCM_NNTP_GROUP: 1776 case SCM_NEWS: 1777 case SCM_NEWS_GROUP: 1778 if (pu->scheme == SCM_NNTP || pu->scheme == SCM_NEWS) 1779 uf.scheme = SCM_NEWS; 1780 else 1781 uf.scheme = SCM_NEWS_GROUP; 1782 uf.stream = openNewsStream(pu); 1783 return uf; 1784 #endif /* USE_NNTP */ 1785 case SCM_DATA: 1786 if (pu->file == NULL) 1787 return uf; 1788 p = Strnew_charp(pu->file)->ptr; 1789 q = strchr(p, ','); 1790 if (q == NULL) 1791 return uf; 1792 *q++ = '\0'; 1793 tmp = Strnew_charp(q); 1794 q = strrchr(p, ';'); 1795 if (q != NULL && !strcmp(q, ";base64")) { 1796 *q = '\0'; 1797 uf.encoding = ENC_BASE64; 1798 } 1799 else 1800 tmp = Str_url_unquote(tmp, FALSE, FALSE); 1801 uf.stream = newStrStream(tmp); 1802 uf.guess_type = (*p != '\0') ? p : "text/plain"; 1803 return uf; 1804 case SCM_UNKNOWN: 1805 default: 1806 return uf; 1807 } 1808 uf.stream = newInputStream(sock); 1809 return uf; 1810 } 1811 1812 /* add index_file if exists */ 1813 static void 1814 add_index_file(ParsedURL *pu, URLFile *uf) 1815 { 1816 char *p, *q; 1817 TextList *index_file_list = NULL; 1818 TextListItem *ti; 1819 1820 if (non_null(index_file)) 1821 index_file_list = make_domain_list(index_file); 1822 if (index_file_list == NULL) { 1823 uf->stream = NULL; 1824 return; 1825 } 1826 for (ti = index_file_list->first; ti; ti = ti->next) { 1827 p = Strnew_m_charp(pu->file, "/", file_quote(ti->ptr), NULL)->ptr; 1828 p = cleanupName(p); 1829 q = cleanupName(file_unquote(p)); 1830 examineFile(q, uf); 1831 if (uf->stream != NULL) { 1832 pu->file = p; 1833 pu->real_file = q; 1834 return; 1835 } 1836 } 1837 } 1838 1839 static char * 1840 guessContentTypeFromTable(struct table2 *table, char *filename) 1841 { 1842 struct table2 *t; 1843 char *p; 1844 if (table == NULL) 1845 return NULL; 1846 p = &filename[strlen(filename) - 1]; 1847 while (filename < p && *p != '.') 1848 p--; 1849 if (p == filename) 1850 return NULL; 1851 p++; 1852 for (t = table; t->item1; t++) { 1853 if (!strcmp(p, t->item1)) 1854 return t->item2; 1855 } 1856 for (t = table; t->item1; t++) { 1857 if (!strcasecmp(p, t->item1)) 1858 return t->item2; 1859 } 1860 return NULL; 1861 } 1862 1863 char * 1864 guessContentType(char *filename) 1865 { 1866 char *ret; 1867 int i; 1868 1869 if (filename == NULL) 1870 return NULL; 1871 if (mimetypes_list == NULL) 1872 goto no_user_mimetypes; 1873 1874 for (i = 0; i < mimetypes_list->nitem; i++) { 1875 if ((ret = 1876 guessContentTypeFromTable(UserMimeTypes[i], filename)) != NULL) 1877 return ret; 1878 } 1879 1880 no_user_mimetypes: 1881 return guessContentTypeFromTable(DefaultGuess, filename); 1882 } 1883 1884 TextList * 1885 make_domain_list(char *domain_list) 1886 { 1887 char *p; 1888 Str tmp; 1889 TextList *domains = NULL; 1890 1891 p = domain_list; 1892 tmp = Strnew_size(64); 1893 while (*p) { 1894 while (*p && IS_SPACE(*p)) 1895 p++; 1896 Strclear(tmp); 1897 while (*p && !IS_SPACE(*p) && *p != ',') 1898 Strcat_char(tmp, *p++); 1899 if (tmp->length > 0) { 1900 if (domains == NULL) 1901 domains = newTextList(); 1902 pushText(domains, tmp->ptr); 1903 } 1904 while (*p && IS_SPACE(*p)) 1905 p++; 1906 if (*p == ',') 1907 p++; 1908 } 1909 return domains; 1910 } 1911 1912 static int 1913 domain_match(char *pat, char *domain) 1914 { 1915 if (domain == NULL) 1916 return 0; 1917 if (*pat == '.') 1918 pat++; 1919 for (;;) { 1920 if (!strcasecmp(pat, domain)) 1921 return 1; 1922 domain = strchr(domain, '.'); 1923 if (domain == NULL) 1924 return 0; 1925 domain++; 1926 } 1927 } 1928 1929 int 1930 check_no_proxy(char *domain) 1931 { 1932 TextListItem *tl; 1933 volatile int ret = 0; 1934 MySignalHandler(*volatile prevtrap) (SIGNAL_ARG) = NULL; 1935 1936 if (NO_proxy_domains == NULL || NO_proxy_domains->nitem == 0 || 1937 domain == NULL) 1938 return 0; 1939 for (tl = NO_proxy_domains->first; tl != NULL; tl = tl->next) { 1940 if (domain_match(tl->ptr, domain)) 1941 return 1; 1942 } 1943 if (!NOproxy_netaddr) { 1944 return 0; 1945 } 1946 /* 1947 * to check noproxy by network addr 1948 */ 1949 if (SETJMP(AbortLoading) != 0) { 1950 ret = 0; 1951 goto end; 1952 } 1953 TRAP_ON; 1954 { 1955 #ifndef INET6 1956 struct hostent *he; 1957 int n; 1958 unsigned char **h_addr_list; 1959 char addr[4 * 16], buf[5]; 1960 1961 he = gethostbyname(domain); 1962 if (!he) { 1963 ret = 0; 1964 goto end; 1965 } 1966 for (h_addr_list = (unsigned char **)he->h_addr_list; *h_addr_list; 1967 h_addr_list++) { 1968 sprintf(addr, "%d", h_addr_list[0][0]); 1969 for (n = 1; n < he->h_length; n++) { 1970 sprintf(buf, ".%d", h_addr_list[0][n]); 1971 strcat(addr, buf); 1972 } 1973 for (tl = NO_proxy_domains->first; tl != NULL; tl = tl->next) { 1974 if (strncmp(tl->ptr, addr, strlen(tl->ptr)) == 0) { 1975 ret = 1; 1976 goto end; 1977 } 1978 } 1979 } 1980 #else /* INET6 */ 1981 int error; 1982 struct addrinfo hints; 1983 struct addrinfo *res, *res0; 1984 char addr[4 * 16]; 1985 int *af; 1986 1987 for (af = ai_family_order_table[DNS_order];; af++) { 1988 memset(&hints, 0, sizeof(hints)); 1989 hints.ai_family = *af; 1990 error = getaddrinfo(domain, NULL, &hints, &res0); 1991 if (error) { 1992 if (*af == PF_UNSPEC) { 1993 break; 1994 } 1995 /* try next */ 1996 continue; 1997 } 1998 for (res = res0; res != NULL; res = res->ai_next) { 1999 switch (res->ai_family) { 2000 case AF_INET: 2001 inet_ntop(AF_INET, 2002 &((struct sockaddr_in *)res->ai_addr)->sin_addr, 2003 addr, sizeof(addr)); 2004 break; 2005 case AF_INET6: 2006 inet_ntop(AF_INET6, 2007 &((struct sockaddr_in6 *)res->ai_addr)-> 2008 sin6_addr, addr, sizeof(addr)); 2009 break; 2010 default: 2011 /* unknown */ 2012 continue; 2013 } 2014 for (tl = NO_proxy_domains->first; tl != NULL; tl = tl->next) { 2015 if (strncmp(tl->ptr, addr, strlen(tl->ptr)) == 0) { 2016 freeaddrinfo(res0); 2017 ret = 1; 2018 goto end; 2019 } 2020 } 2021 } 2022 freeaddrinfo(res0); 2023 if (*af == PF_UNSPEC) { 2024 break; 2025 } 2026 } 2027 #endif /* INET6 */ 2028 } 2029 end: 2030 TRAP_OFF; 2031 return ret; 2032 } 2033 2034 char * 2035 filename_extension(char *path, int is_url) 2036 { 2037 char *last_dot = "", *p = path; 2038 int i; 2039 2040 if (path == NULL) 2041 return last_dot; 2042 if (*p == '.') 2043 p++; 2044 for (; *p; p++) { 2045 if (*p == '.') { 2046 last_dot = p; 2047 } 2048 else if (is_url && *p == '?') 2049 break; 2050 } 2051 if (*last_dot == '.') { 2052 for (i = 1; last_dot[i] && i < 8; i++) { 2053 if (is_url && !IS_ALNUM(last_dot[i])) 2054 break; 2055 } 2056 return allocStr(last_dot, i); 2057 } 2058 else 2059 return last_dot; 2060 } 2061 2062 #ifdef USE_EXTERNAL_URI_LOADER 2063 static struct table2 **urimethods; 2064 static struct table2 default_urimethods[] = { 2065 {"mailto", "file:///$LIB/w3mmail.cgi?%s"}, 2066 {NULL, NULL} 2067 }; 2068 2069 static struct table2 * 2070 loadURIMethods(char *filename) 2071 { 2072 FILE *f; 2073 int i, n; 2074 Str tmp; 2075 struct table2 *um; 2076 char *up, *p; 2077 2078 f = fopen(expandPath(filename), "r"); 2079 if (f == NULL) 2080 return NULL; 2081 i = 0; 2082 while (tmp = Strfgets(f), tmp->length > 0) { 2083 if (tmp->ptr[0] != '#') 2084 i++; 2085 } 2086 fseek(f, 0, 0); 2087 n = i; 2088 um = New_N(struct table2, n + 1); 2089 i = 0; 2090 while (tmp = Strfgets(f), tmp->length > 0) { 2091 if (tmp->ptr[0] == '#') 2092 continue; 2093 while (IS_SPACE(Strlastchar(tmp))) 2094 Strshrink(tmp, 1); 2095 for (up = p = tmp->ptr; *p != '\0'; p++) { 2096 if (*p == ':') { 2097 um[i].item1 = Strnew_charp_n(up, p - up)->ptr; 2098 p++; 2099 break; 2100 } 2101 } 2102 if (*p == '\0') 2103 continue; 2104 while (*p != '\0' && IS_SPACE(*p)) 2105 p++; 2106 um[i].item2 = Strnew_charp(p)->ptr; 2107 i++; 2108 } 2109 um[i].item1 = NULL; 2110 um[i].item2 = NULL; 2111 fclose(f); 2112 return um; 2113 } 2114 2115 void 2116 initURIMethods() 2117 { 2118 TextList *methodmap_list = NULL; 2119 TextListItem *tl; 2120 int i; 2121 2122 if (non_null(urimethodmap_files)) 2123 methodmap_list = make_domain_list(urimethodmap_files); 2124 if (methodmap_list == NULL) 2125 return; 2126 urimethods = New_N(struct table2 *, (methodmap_list->nitem + 1)); 2127 for (i = 0, tl = methodmap_list->first; tl; tl = tl->next) { 2128 urimethods[i] = loadURIMethods(tl->ptr); 2129 if (urimethods[i]) 2130 i++; 2131 } 2132 urimethods[i] = NULL; 2133 } 2134 2135 Str 2136 searchURIMethods(ParsedURL *pu) 2137 { 2138 struct table2 *ump; 2139 int i; 2140 Str scheme = NULL; 2141 Str url; 2142 char *p; 2143 2144 if (pu->scheme != SCM_UNKNOWN) 2145 return NULL; /* use internal */ 2146 if (urimethods == NULL) 2147 return NULL; 2148 url = parsedURL2Str(pu); 2149 for (p = url->ptr; *p != '\0'; p++) { 2150 if (*p == ':') { 2151 scheme = Strnew_charp_n(url->ptr, p - url->ptr); 2152 break; 2153 } 2154 } 2155 if (scheme == NULL) 2156 return NULL; 2157 2158 /* 2159 * RFC2396 3.1. Scheme Component 2160 * For resiliency, programs interpreting URI should treat upper case 2161 * letters as equivalent to lower case in scheme names (e.g., allow 2162 * "HTTP" as well as "http"). 2163 */ 2164 for (i = 0; (ump = urimethods[i]) != NULL; i++) { 2165 for (; ump->item1 != NULL; ump++) { 2166 if (strcasecmp(ump->item1, scheme->ptr) == 0) { 2167 return Sprintf(ump->item2, url_quote(url->ptr)); 2168 } 2169 } 2170 } 2171 for (ump = default_urimethods; ump->item1 != NULL; ump++) { 2172 if (strcasecmp(ump->item1, scheme->ptr) == 0) { 2173 return Sprintf(ump->item2, url_quote(url->ptr)); 2174 } 2175 } 2176 return NULL; 2177 } 2178 2179 /* 2180 * RFC2396: Uniform Resource Identifiers (URI): Generic Syntax 2181 * Appendix A. Collected BNF for URI 2182 * uric = reserved | unreserved | escaped 2183 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 2184 * "$" | "," 2185 * unreserved = alphanum | mark 2186 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | 2187 * "(" | ")" 2188 * escaped = "%" hex hex 2189 */ 2190 2191 #define URI_PATTERN "([-;/?:@&=+$,a-zA-Z0-9_.!~*'()]|%[0-9A-Fa-f][0-9A-Fa-f])*" 2192 void 2193 chkExternalURIBuffer(Buffer *buf) 2194 { 2195 int i; 2196 struct table2 *ump; 2197 2198 for (i = 0; (ump = urimethods[i]) != NULL; i++) { 2199 for (; ump->item1 != NULL; ump++) { 2200 reAnchor(buf, Sprintf("%s:%s", ump->item1, URI_PATTERN)->ptr); 2201 } 2202 } 2203 for (ump = default_urimethods; ump->item1 != NULL; ump++) { 2204 reAnchor(buf, Sprintf("%s:%s", ump->item1, URI_PATTERN)->ptr); 2205 } 2206 } 2207 #endif 2208 2209 ParsedURL * 2210 schemeToProxy(int scheme) 2211 { 2212 ParsedURL *pu = NULL; /* for gcc */ 2213 switch (scheme) { 2214 case SCM_HTTP: 2215 pu = &HTTP_proxy_parsed; 2216 break; 2217 #ifdef USE_SSL 2218 case SCM_HTTPS: 2219 pu = &HTTPS_proxy_parsed; 2220 break; 2221 #endif 2222 case SCM_FTP: 2223 pu = &FTP_proxy_parsed; 2224 break; 2225 #ifdef USE_GOPHER 2226 case SCM_GOPHER: 2227 pu = &GOPHER_proxy_parsed; 2228 break; 2229 #endif 2230 #ifdef DEBUG 2231 default: 2232 abort(); 2233 #endif 2234 } 2235 return pu; 2236 }