[w3m-dev 02503] From: aito@fw.ipsj.or.jp closes: Debian Bug#120540 - w3m - Unnamed repository; edit this file to name it for gitweb.

commit ea6b1bf2d9dde070915090a352f07f2041ac9478
parent ec4d1fa0668d0cdd65c95ec5b1d1f4edfda9b413
Author: ukai <ukai>
Date:   Thu, 22 Nov 2001 14:15:19 +0000

[w3m-dev 02503]
From: aito@fw.ipsj.or.jp
closes: Debian Bug#120540

Diffstat:
M ChangeLog  | 6 ++++++
M indep.c  | 17 +++++++++++++++++

2 files changed, 23 insertions(+), 0 deletions(-)
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,9 @@
+2001-11-22  aito@fw.ipsj.or.jp
+
+	* [w3m-dev 02503]
+	* indep.c (getescapechar): allow incomplete entity references in URL
+	  closes: Debian Bug#120540
+
 2001-11-22  Fumitoshi UKAI  <ukai@debian.or.jp>
 
 	* [w3m-dev 02506]
diff --git a/indep.c b/indep.c
@@ -275,6 +275,7 @@ getescapechar(char **str)
 {
     int dummy = -1;
     char *p = *str, *q;
+    int strict_entity = TRUE;
 
     if (*p == '&')
 	p++;
@@ -319,8 +320,24 @@ getescapechar(char **str)
     for (p++; IS_ALNUM(*p); p++)
 	;
     q = allocStr(q, p - q);
+    if (strcasestr("lt gt amp quot nbsp",q) &&
+	*p != '=') {
+	/* a character entity MUST be terminated with ";". However,
+	   there's MANY web pages which uses &lt , &gt or something
+	   like them as &lt;, &gt;, etc. Therefore, we treat the most
+	   popular character entities (including &#xxxx;) without
+	   the last ";" as character entities. If the trailing character
+	   is "=", it must be a part of query in an URL. So &lt=, &gt=, etc.
+	   are not regarded as character entities.
+	*/
+	strict_entity = FALSE;
+    }
     if (*p == ';')
 	p++;
+    else if (strict_entity) {
+	*str = p;
+	return -1;
+    }
     *str = p;
     return getHash_si(&entity, q, -1);
 }

	w3m Unnamed repository; edit this file to name it for gitweb.
	git clone https://logand.com/git/w3m.git/
	Log \| Files \| Refs \| README