w3m

Unnamed repository; edit this file to name it for gitweb.
git clone https://logand.com/git/w3m.git/
Log | Files | Refs | README

commit ea6b1bf2d9dde070915090a352f07f2041ac9478
parent ec4d1fa0668d0cdd65c95ec5b1d1f4edfda9b413
Author: ukai <ukai>
Date:   Thu, 22 Nov 2001 14:15:19 +0000

[w3m-dev 02503]
From: aito@fw.ipsj.or.jp
closes: Debian Bug#120540

Diffstat:
MChangeLog | 6++++++
Mindep.c | 17+++++++++++++++++
2 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/ChangeLog b/ChangeLog @@ -1,3 +1,9 @@ +2001-11-22 aito@fw.ipsj.or.jp + + * [w3m-dev 02503] + * indep.c (getescapechar): allow incomplete entity references in URL + closes: Debian Bug#120540 + 2001-11-22 Fumitoshi UKAI <ukai@debian.or.jp> * [w3m-dev 02506] diff --git a/indep.c b/indep.c @@ -275,6 +275,7 @@ getescapechar(char **str) { int dummy = -1; char *p = *str, *q; + int strict_entity = TRUE; if (*p == '&') p++; @@ -319,8 +320,24 @@ getescapechar(char **str) for (p++; IS_ALNUM(*p); p++) ; q = allocStr(q, p - q); + if (strcasestr("lt gt amp quot nbsp",q) && + *p != '=') { + /* a character entity MUST be terminated with ";". However, + there's MANY web pages which uses &lt , &gt or something + like them as &lt;, &gt;, etc. Therefore, we treat the most + popular character entities (including &#xxxx;) without + the last ";" as character entities. If the trailing character + is "=", it must be a part of query in an URL. So &lt=, &gt=, etc. + are not regarded as character entities. + */ + strict_entity = FALSE; + } if (*p == ';') p++; + else if (strict_entity) { + *str = p; + return -1; + } *str = p; return getHash_si(&entity, q, -1); }