commit 2d86c8596ca1e22ab98c584d378832852978be1b
parent af2e1adbb7a2ac1882762e4c2518fddbc0763670
Author: ukai <ukai>
Date: Tue, 3 Dec 2002 15:35:09 +0000
[w3m-dev 03509] HTML parser
* file.c (close_textarea): delete
(HTMLtagproc1): rewrite
delete HTML_EOL
move HTML_LISTING, HTML_N_LISTING
add HTML_PRE_PLAIN, HTML_N_PRE_PLAIN
add HTML_PLAINTEXT
end_tag
(HTMLlineproc0): s/str/line/
rewrite
(completeHTMLstream): </textarea> if necessary
* fm.h (struct readbuffer): delete ignore_tag
add end_tag
(RB_XMPMODE): deleted
(RB_LSTMODE): deleted
(RB_SCRIPT): added
(RB_STYLE): added
(RB_*): renumber
(R_ST_EOL): added
(R_ST_*): renumber
(ST_IS_TAG): check R_ST_EOL
* form.c (form_fputs_decode): remove <eol> handling
* frame.c (newFrame): remove_space()
(CASE_TABLE_TAG): added
(createFrameFile): rewrite
* html.c (TagMAP): delete eol
add pre_plain, /pre_plain
* html.h (HTML_EOL): deleted
(HTML_PRE_PLAIN): added
(HTML_N_PRE_PLAIN): added
* table.c (visible_length): rewrite
(visible_length_plain): added
(maximum_visible_length_plain): added
(do_refill): R_ST_EOL
(table_close_select): end_tag
(table_close_textarea): end_tag
(TAG_ACTION_PLAIN): added
(feed_table_tag): rewrite
(feed_table): rewrite
* table.h (TBLM_*) reassign
(struct table_mode): delete ignore_tag
add end_tag
* tagtable.tab (eol): deleted
(pre_plain): added
(/pre_plain): added
From: Hironori SAKAMOTO <hsaka@mth.biglobe.ne.jp>
Diffstat:
M | ChangeLog | | | 48 | ++++++++++++++++++++++++++++++++++++++++++++++++ |
M | file.c | | | 325 | +++++++++++++++++++++++++++++++++++-------------------------------------------- |
M | fm.h | | | 64 | +++++++++++++++++++++++++++++++--------------------------------- |
M | form.c | | | 10 | ---------- |
M | frame.c | | | 158 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-------------- |
M | html.c | | | 5 | +++-- |
M | html.h | | | 7 | ++++--- |
M | table.c | | | 254 | +++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------- |
M | table.h | | | 28 | ++++++++++++---------------- |
M | tagtable.tab | | | 3 | ++- |
10 files changed, 544 insertions(+), 358 deletions(-)
diff --git a/ChangeLog b/ChangeLog
@@ -1,3 +1,51 @@
+2002-12-04 Hironori SAKAMOTO <hsaka@mth.biglobe.ne.jp>
+
+ * [w3m-dev 03509] HTML parser
+ * file.c (close_textarea): delete
+ (HTMLtagproc1): rewrite
+ delete HTML_EOL
+ move HTML_LISTING, HTML_N_LISTING
+ add HTML_PRE_PLAIN, HTML_N_PRE_PLAIN
+ add HTML_PLAINTEXT
+ end_tag
+ (HTMLlineproc0): s/str/line/
+ rewrite
+ (completeHTMLstream): </textarea> if necessary
+ * fm.h (struct readbuffer): delete ignore_tag
+ add end_tag
+ (RB_XMPMODE): deleted
+ (RB_LSTMODE): deleted
+ (RB_SCRIPT): added
+ (RB_STYLE): added
+ (RB_*): renumber
+ (R_ST_EOL): added
+ (R_ST_*): renumber
+ (ST_IS_TAG): check R_ST_EOL
+ * form.c (form_fputs_decode): remove <eol> handling
+ * frame.c (newFrame): remove_space()
+ (CASE_TABLE_TAG): added
+ (createFrameFile): rewrite
+ * html.c (TagMAP): delete eol
+ add pre_plain, /pre_plain
+ * html.h (HTML_EOL): deleted
+ (HTML_PRE_PLAIN): added
+ (HTML_N_PRE_PLAIN): added
+ * table.c (visible_length): rewrite
+ (visible_length_plain): added
+ (maximum_visible_length_plain): added
+ (do_refill): R_ST_EOL
+ (table_close_select): end_tag
+ (table_close_textarea): end_tag
+ (TAG_ACTION_PLAIN): added
+ (feed_table_tag): rewrite
+ (feed_table): rewrite
+ * table.h (TBLM_*) reassign
+ (struct table_mode): delete ignore_tag
+ add end_tag
+ * tagtable.tab (eol): deleted
+ (pre_plain): added
+ (/pre_plain): added
+
2002-12-03 Hironori SAKAMOTO <hsaka@mth.biglobe.ne.jp>
* [w3m-dev 03505] Re: coredump when ssl error
diff --git a/file.c b/file.c
@@ -34,7 +34,6 @@ static FILE *lessopen_stream(char *path);
static Buffer *loadcmdout(char *cmd,
Buffer *(*loadproc) (URLFile *, Buffer *),
Buffer *defaultbuf);
-static void close_textarea(struct html_feed_environ *h_env);
static void addnewline(Buffer *buf, char *line, Lineprop *prop,
#ifdef USE_ANSI_COLOR
Linecolor *color,
@@ -4064,10 +4063,6 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)
flushline(h_env, obuf, envs[h_env->envc].indent, 1, h_env->limit);
h_env->blank_lines = 0;
return 1;
- case HTML_EOL:
- if ((obuf->flag & RB_PREMODE) && obuf->pos > envs[h_env->envc].indent)
- flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit);
- return 1;
case HTML_H:
if (!(obuf->flag & (RB_PREMODE | RB_IGNORE_P))) {
flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit);
@@ -4366,46 +4361,74 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)
if (obuf->nobr_level == 0)
obuf->flag &= ~RB_NOBR;
return 0;
- case HTML_LISTING:
+ case HTML_PRE_PLAIN:
CLOSE_P;
- flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit);
- obuf->flag |= (RB_LSTMODE | RB_IGNORE_P);
- /* istr = str; */
+ if (!(obuf->flag & RB_IGNORE_P)) {
+ flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit);
+ do_blankline(h_env, obuf, envs[h_env->envc].indent, 0,
+ h_env->limit);
+ }
+ obuf->flag |= (RB_PRE | RB_IGNORE_P);
return 1;
- case HTML_N_LISTING:
+ case HTML_N_PRE_PLAIN:
CLOSE_P;
- flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit);
- obuf->flag &= ~RB_LSTMODE;
+ if (!(obuf->flag & RB_IGNORE_P)) {
+ flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit);
+ do_blankline(h_env, obuf, envs[h_env->envc].indent, 0,
+ h_env->limit);
+ obuf->flag |= RB_IGNORE_P;
+ }
+ obuf->flag &= ~RB_PRE;
return 1;
+ case HTML_LISTING:
case HTML_XMP:
+ case HTML_PLAINTEXT:
CLOSE_P;
- flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit);
- obuf->flag |= (RB_XMPMODE | RB_IGNORE_P);
- /* istr = str; */
+ if (!(obuf->flag & RB_IGNORE_P)) {
+ flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit);
+ do_blankline(h_env, obuf, envs[h_env->envc].indent, 0,
+ h_env->limit);
+ }
+ obuf->flag |= (RB_PLAIN | RB_IGNORE_P);
+ switch (cmd) {
+ case HTML_LISTING:
+ obuf->end_tag = HTML_N_LISTING;
+ break;
+ case HTML_XMP:
+ obuf->end_tag = HTML_N_XMP;
+ break;
+ case HTML_PLAINTEXT:
+ obuf->end_tag = MAX_HTMLTAG;
+ break;
+ }
return 1;
+ case HTML_N_LISTING:
case HTML_N_XMP:
CLOSE_P;
- flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit);
- obuf->flag &= ~RB_XMPMODE;
+ if (!(obuf->flag & RB_IGNORE_P)) {
+ flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit);
+ do_blankline(h_env, obuf, envs[h_env->envc].indent, 0,
+ h_env->limit);
+ obuf->flag |= RB_IGNORE_P;
+ }
+ obuf->flag &= ~RB_PLAIN;
+ obuf->end_tag = 0;
return 1;
case HTML_SCRIPT:
- obuf->flag |= RB_IGNORE;
- obuf->ignore_tag = Strnew_charp("</script>");
- return 1;
- case HTML_N_SCRIPT:
- /* should not be reached */
+ obuf->flag |= RB_SCRIPT;
+ obuf->end_tag = HTML_N_SCRIPT;
return 1;
case HTML_STYLE:
- obuf->flag |= RB_IGNORE;
- obuf->ignore_tag = Strnew_charp("</style>");
+ obuf->flag |= RB_STYLE;
+ obuf->end_tag = HTML_N_STYLE;
return 1;
- case HTML_N_STYLE:
- /* should not be reached */
+ case HTML_N_SCRIPT:
+ obuf->flag &= ~RB_SCRIPT;
+ obuf->end_tag = 0;
return 1;
- case HTML_PLAINTEXT:
- flushline(h_env, obuf, envs[h_env->envc].indent, 0, h_env->limit);
- obuf->flag |= RB_PLAIN;
- /* istr = str; */
+ case HTML_N_STYLE:
+ obuf->flag &= ~RB_STYLE;
+ obuf->end_tag = 0;
return 1;
case HTML_A:
if (obuf->anchor)
@@ -4513,7 +4536,7 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)
table_mode[obuf->table_level].indent_level = 0;
table_mode[obuf->table_level].nobr_level = 0;
table_mode[obuf->table_level].caption = 0;
- table_mode[obuf->table_level].ignore_tag = NULL;
+ table_mode[obuf->table_level].end_tag = 0; /* HTML_UNKNOWN */
#ifndef TABLE_EXPAND
tables[obuf->table_level]->total_width = width;
#else
@@ -4572,9 +4595,11 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)
if (tmp)
HTMLlineproc1(tmp->ptr, h_env);
obuf->flag |= RB_INSELECT;
+ obuf->end_tag = HTML_N_SELECT;
return 1;
case HTML_N_SELECT:
obuf->flag &= ~RB_INSELECT;
+ obuf->end_tag = 0;
tmp = process_n_select();
if (tmp)
HTMLlineproc1(tmp->ptr, h_env);
@@ -4587,9 +4612,14 @@ HTMLtagproc1(struct parsed_tag *tag, struct html_feed_environ *h_env)
if (tmp)
HTMLlineproc1(tmp->ptr, h_env);
obuf->flag |= RB_INTXTA;
+ obuf->end_tag = HTML_N_TEXTAREA;
return 1;
case HTML_N_TEXTAREA:
- close_textarea(h_env);
+ obuf->flag &= ~RB_INTXTA;
+ obuf->end_tag = 0;
+ tmp = process_n_textarea();
+ if (tmp)
+ HTMLlineproc1(tmp->ptr, h_env);
return 1;
case HTML_ISINDEX:
p = "";
@@ -5448,10 +5478,9 @@ table_width(struct html_feed_environ *h_env, int table_level)
/* HTML processing first pass */
void
-HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)
+HTMLlineproc0(char *line, struct html_feed_environ *h_env, int internal)
{
Lineprop mode;
- char *q;
int cmd;
struct readbuffer *obuf = h_env->obuf;
int indent, delta;
@@ -5467,25 +5496,12 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)
(obuf->flag & RB_PREMODE) ? 'P' : ' ',
(obuf->table_level >= 0) ? 'T' : ' ',
(obuf->flag & RB_INTXTA) ? 'X' : ' ',
- (obuf->flag & RB_IGNORE) ? 'I' : ' ');
- fprintf(f, "HTMLlineproc1(\"%s\",%d,%lx)\n", str, h_env->limit,
+ (obuf->flag & (RB_SCRIPT | RB_STYLE)) ? 'S' : ' ');
+ fprintf(f, "HTMLlineproc1(\"%s\",%d,%lx)\n", line, h_env->limit,
(unsigned long)h_env);
fclose(f);
}
-#if 0
- /* comment processing */
- if (obuf->status == R_ST_CMNT || obuf->status == R_ST_NCMNT3 ||
- obuf->status == R_ST_IRRTAG) {
- while (*str != '\0' && obuf->status != R_ST_NORMAL) {
- next_status(*str, &obuf->status);
- str++;
- }
- if (obuf->status != R_ST_NORMAL)
- return;
- }
-#endif
-
tokbuf = Strnew();
table_start:
@@ -5496,132 +5512,93 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)
tbl_width = table_width(h_env, level);
}
- while (*str != '\0') {
+ while (*line != '\0') {
+ char *str, *p;
int is_tag = FALSE;
- int pre_mode = (obuf->table_level >= 0) ?
- tbl_mode->pre_mode & TBLM_PLAIN : obuf->flag & RB_PLAINMODE;
-
- if (obuf->flag & RB_PLAIN)
- goto read_as_plain; /* don't process tag */
+ int pre_mode = (obuf->table_level >= 0) ? tbl_mode->pre_mode :
+ obuf->flag;
+ int end_tag = (obuf->table_level >= 0) ? tbl_mode->end_tag :
+ obuf->end_tag;
- if (ST_IS_COMMENT(obuf->status)) {
- read_token(h_env->tagbuf, &str, &obuf->status, pre_mode, 1);
- if (obuf->status != R_ST_NORMAL)
- return;
- if (pre_mode) {
- is_tag = TRUE;
- q = h_env->tagbuf->ptr;
- goto read_as_pre_mode;
- }
- continue;
- }
- if (*str == '<' || ST_IS_TAG(obuf->status)) {
+ if (*line == '<' || obuf->status != R_ST_NORMAL) {
/*
* Tag processing
*/
- if (ST_IS_TAG(obuf->status)) {
-/*** continuation of a tag ***/
- read_token(h_env->tagbuf, &str, &obuf->status, pre_mode, 1);
- }
+ if (obuf->status == R_ST_EOL)
+ obuf->status = R_ST_NORMAL;
else {
- if (!REALLY_THE_BEGINNING_OF_A_TAG(str)) {
- /* this is NOT a beginning of a tag */
- obuf->status = R_ST_NORMAL;
- if (pre_mode)
- goto read_as_pre_mode;
- HTMLlineproc1("<", h_env);
- str++;
- continue;
- }
- read_token(h_env->tagbuf, &str, &obuf->status, pre_mode, 0);
- }
-#if 0
- if (ST_IS_COMMENT(obuf->status)) {
- if ((obuf->table_level >= 0) ? tbl_mode->pre_mode & TBLM_IGNORE
- : obuf->flag & RB_IGNORE)
- /* within ignored tag, such as *
- * <script>..</script>, don't process comment. */
- obuf->status = R_ST_NORMAL;
- return;
+ read_token(h_env->tagbuf, &line, &obuf->status,
+ pre_mode & RB_PREMODE, obuf->status != R_ST_NORMAL);
+ if (obuf->status != R_ST_NORMAL)
+ return;
}
-#endif
if (h_env->tagbuf->length == 0)
continue;
- if (obuf->status != R_ST_NORMAL) {
- if (!pre_mode) {
- if (Strlastchar(h_env->tagbuf) == '\n')
- Strchop(h_env->tagbuf);
- if (ST_IS_REAL_TAG(obuf->status))
- Strcat_char(h_env->tagbuf, ' ');
+ str = h_env->tagbuf->ptr;
+ if (*str == '<') {
+ if (str[1] && REALLY_THE_BEGINNING_OF_A_TAG(str))
+ is_tag = TRUE;
+ else if (!(pre_mode & (RB_PLAIN | RB_INTXTA | RB_INSELECT |
+ RB_SCRIPT | RB_STYLE))) {
+ line = Strnew_m_charp(str + 1, line, NULL)->ptr;
+ str = "<";
}
- if ((obuf->table_level >= 0)
- ? ((tbl_mode->pre_mode & TBLM_IGNORE) &&
- !TAG_IS(h_env->tagbuf->ptr, tbl_mode->ignore_tag->ptr,
- tbl_mode->ignore_tag->length - 1))
- : ((obuf->flag & RB_IGNORE) &&
- !TAG_IS(h_env->tagbuf->ptr, obuf->ignore_tag->ptr,
- obuf->ignore_tag->length - 1)))
- /* within ignored tag, such as *
- * <script>..</script>, don't process tag. */
- obuf->status = R_ST_NORMAL;
- continue;
}
- is_tag = TRUE;
- q = h_env->tagbuf->ptr;
+ }
+ else {
+ read_token(tokbuf, &line, &obuf->status, pre_mode & RB_PREMODE, 0);
+ if (obuf->status != R_ST_NORMAL) /* R_ST_AMP ? */
+ continue;
+ str = tokbuf->ptr;
}
- read_as_pre_mode:
- if (obuf->flag & (RB_INTXTA | RB_INSELECT | RB_IGNORE)) {
- cmd = HTML_UNKNOWN;
- if (!is_tag) {
- read_token(tokbuf, &str, &obuf->status,
- (obuf->flag & RB_INTXTA) ? 1 : 0, 0);
- if (obuf->status != R_ST_NORMAL)
- continue;
- q = tokbuf->ptr;
- }
- else {
- char *p = q;
- cmd = gethtmlcmd(&p);
- }
-
- /* textarea */
- if (obuf->flag & RB_INTXTA) {
- if (cmd == HTML_N_TEXTAREA)
- goto proc_normal;
- feed_textarea(q);
+ if (pre_mode & (RB_PLAIN | RB_INTXTA | RB_INSELECT | RB_SCRIPT |
+ RB_STYLE)) {
+ if (is_tag) {
+ p = str;
+ if ((tag = parse_tag(&p, internal))) {
+ if (tag->tagid == end_tag ||
+ (pre_mode & RB_INSELECT && tag->tagid == HTML_N_FORM))
+ goto proc_normal;
+ }
}
- else if (obuf->flag & RB_INSELECT) {
- if (cmd == HTML_N_SELECT || cmd == HTML_N_FORM)
+ /* select */
+ if (pre_mode & RB_INSELECT) {
+ if (obuf->table_level >= 0)
goto proc_normal;
- feed_select(q);
+ feed_select(str);
+ continue;
}
- /* script */
- else if (obuf->flag & RB_IGNORE) {
- if (TAG_IS(q, obuf->ignore_tag->ptr,
- obuf->ignore_tag->length - 1)) {
- obuf->flag &= ~RB_IGNORE;
+ if (is_tag) {
+ if (strncmp(str, "<!--", 4) && (p = strchr(str + 1, '<'))) {
+ str = Strnew_charp_n(str, p - str)->ptr;
+ line = Strnew_m_charp(p, line, NULL)->ptr;
}
+ is_tag = FALSE;
}
- continue;
+ if (obuf->table_level >= 0)
+ goto proc_normal;
+ /* textarea */
+ if (pre_mode & RB_INTXTA) {
+ feed_textarea(str);
+ continue;
+ }
+ /* script */
+ if (pre_mode & RB_SCRIPT)
+ continue;
+ /* style */
+ if (pre_mode & RB_STYLE)
+ continue;
}
+ proc_normal:
if (obuf->table_level >= 0) {
/*
* within table: in <table>..</table>, all input tokens
* are fed to the table renderer, and then the renderer
* makes HTML output.
*/
-
- if (!is_tag) {
- read_token(tokbuf, &str, &obuf->status,
- tbl_mode->pre_mode & TBLM_PREMODE, 0);
- if (obuf->status != R_ST_NORMAL)
- continue;
- q = tokbuf->ptr;
- }
-
- switch (feed_table(tbl, q, tbl_mode, tbl_width, internal)) {
+ switch (feed_table(tbl, str, tbl_mode, tbl_width, internal)) {
case 0:
/* </table> tag */
obuf->table_level--;
@@ -5629,14 +5606,13 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)
continue;
end_table(tbl);
if (obuf->table_level >= 0) {
- Str tmp;
struct table *tbl0 = tables[obuf->table_level];
- tmp = Sprintf("<table_alt tid=%d>", tbl0->ntable);
+ str = Sprintf("<table_alt tid=%d>", tbl0->ntable)->ptr;
pushTable(tbl0, tbl);
tbl = tbl0;
tbl_mode = &table_mode[obuf->table_level];
tbl_width = table_width(h_env, obuf->table_level);
- feed_table(tbl, tmp->ptr, tbl_mode, tbl_width, TRUE);
+ feed_table(tbl, str, tbl_mode, tbl_width, TRUE);
continue;
/* continue to the next */
}
@@ -5659,27 +5635,17 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)
continue;
case 1:
/* <table> tag */
- goto proc_normal;
+ break;
default:
continue;
}
}
- proc_normal:
if (is_tag) {
/*** Beginning of a new tag ***/
- if ((tag = parse_tag(&q, internal)))
+ if ((tag = parse_tag(&str, internal)))
cmd = tag->tagid;
else
- cmd = HTML_UNKNOWN;
- if (((obuf->flag & RB_XMPMODE) && cmd != HTML_N_XMP) ||
- ((obuf->flag & RB_LSTMODE) && cmd != HTML_N_LISTING)) {
- Str tmp = Strdup(h_env->tagbuf);
- Strcat_charp(tmp, str);
- str = tmp->ptr;
- goto read_as_plain;
- }
- if (cmd == HTML_UNKNOWN)
continue;
/* process tags */
if (HTMLtagproc1(tag, h_env) == 0) {
@@ -5701,12 +5667,12 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)
continue;
}
- read_as_plain:
+ while (*str) {
mode = get_mctype(str);
delta = get_mclen(mode);
if (obuf->flag & (RB_SPECIAL & ~RB_NOBR)) {
char ch = *str;
- if (!(obuf->flag & RB_PLAINMODE) && (*str == '&')) {
+ if (!(obuf->flag & RB_PLAIN) && (*str == '&')) {
char *p = str;
int ech = getescapechar(&p);
if (ech == '\n' || ech == '\r') {
@@ -5739,7 +5705,7 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)
% Tabstop != 0);
str++;
}
- else if (obuf->flag & RB_PLAINMODE) {
+ else if (obuf->flag & RB_PLAIN) {
char *p = html_quote_char(*str);
if (p) {
push_charp(obuf, 1, p, PC_ASCII);
@@ -5820,10 +5786,10 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)
#endif /* FORMAT_NICE */
HTMLlineproc1(line->ptr, h_env);
}
+ }
}
}
- if (!(obuf->flag & (RB_PREMODE | RB_NOBR | RB_INTXTA | RB_INSELECT
- | RB_PLAINMODE | RB_IGNORE))) {
+ if (!(obuf->flag & (RB_SPECIAL | RB_INTXTA | RB_INSELECT))) {
char *tp;
int i = 0;
@@ -5849,17 +5815,6 @@ HTMLlineproc0(char *str, struct html_feed_environ *h_env, int internal)
}
}
-static void
-close_textarea(struct html_feed_environ *h_env)
-{
- Str tmp;
-
- h_env->obuf->flag &= ~RB_INTXTA;
- tmp = process_n_textarea();
- if (tmp != NULL)
- HTMLlineproc1(tmp->ptr, h_env);
-}
-
extern char *NullLine;
extern Lineprop NullProp[];
@@ -6135,6 +6090,8 @@ completeHTMLstream(struct html_feed_environ *h_env, struct readbuffer *obuf)
push_tag(obuf, "</u>", HTML_N_U);
obuf->in_under = 0;
}
+ if (obuf->flag & RB_INTXTA)
+ HTMLlineproc1("</textarea>", h_env);
/* for unbalanced select tag */
if (obuf->flag & RB_INSELECT)
HTMLlineproc1("</select>", h_env);
@@ -6142,7 +6099,7 @@ completeHTMLstream(struct html_feed_environ *h_env, struct readbuffer *obuf)
/* for unbalanced table tag */
while (obuf->table_level >= 0) {
table_mode[obuf->table_level].pre_mode
- &= ~(TBLM_IGNORE | TBLM_XMP | TBLM_LST);
+ &= ~(TBLM_SCRIPT | TBLM_STYLE | TBLM_PLAIN);
HTMLlineproc1("</table>", h_env);
}
}
@@ -6351,8 +6308,10 @@ loadHTMLstream(URLFile *f, Buffer *newBuf, FILE * src, int internal)
#endif /* USE_NNTP */
HTMLlineproc0(lineBuf2->ptr, &htmlenv1, internal);
}
- if (obuf.status != R_ST_NORMAL)
- HTMLlineproc0(correct_irrtag(obuf.status)->ptr, &htmlenv1, internal);
+ if (obuf.status != R_ST_NORMAL) {
+ obuf.status = R_ST_EOL;
+ HTMLlineproc0("\n", &htmlenv1, internal);
+ }
obuf.status = R_ST_NORMAL;
completeHTMLstream(&htmlenv1, &obuf);
flushline(&htmlenv1, &obuf, 0, 2, htmlenv1.limit);
diff --git a/fm.h b/fm.h
@@ -534,7 +534,7 @@ struct readbuffer {
long flag_stack[RB_STACK_SIZE];
int flag_sp;
int status;
- Str ignore_tag;
+ unsigned char end_tag;
short table_level;
short nobr_level;
Str anchor;
@@ -557,33 +557,30 @@ struct readbuffer {
#define in_stand fontstat[2]
#define RB_PRE 0x01
-#define RB_XMPMODE 0x02
-#define RB_LSTMODE 0x04
+#define RB_SCRIPT 0x02
+#define RB_STYLE 0x04
#define RB_PLAIN 0x08
-#define RB_LEFT 0x80000
-#define RB_CENTER 0x10
-#define RB_RIGHT 0x20
-#define RB_ALIGN (RB_LEFT| RB_CENTER | RB_RIGHT)
-#define RB_NOBR 0x40
-#define RB_P 0x80
-#define RB_PRE_INT 0x100
-#define RB_PREMODE (RB_PRE | RB_PRE_INT)
-#define RB_SPECIAL (RB_PRE|RB_XMPMODE|RB_LSTMODE|RB_PLAIN|RB_NOBR|RB_PRE_INT)
-#define RB_PLAINMODE (RB_XMPMODE|RB_LSTMODE|RB_PLAIN)
-
-#define RB_IN_DT 0x200
-#define RB_INTXTA 0x400
-#define RB_INSELECT 0x800
-#define RB_IGNORE 0x1000
-#define RB_INSEL 0x2000
-#define RB_IGNORE_P 0x4000
-#define RB_TITLE 0x8000
-#define RB_NFLUSHED 0x10000
-#define RB_NOFRAMES 0x20000
-#define RB_INTABLE 0x40000
+#define RB_LEFT 0x10
+#define RB_CENTER 0x20
+#define RB_RIGHT 0x40
+#define RB_ALIGN (RB_LEFT | RB_CENTER | RB_RIGHT)
+#define RB_NOBR 0x80
+#define RB_P 0x100
+#define RB_PRE_INT 0x200
+#define RB_IN_DT 0x400
+#define RB_INTXTA 0x800
+#define RB_INSELECT 0x1000
+#define RB_IGNORE_P 0x2000
+#define RB_TITLE 0x4000
+#define RB_NFLUSHED 0x8000
+#define RB_NOFRAMES 0x10000
+#define RB_INTABLE 0x20000
+#define RB_PREMODE (RB_PRE | RB_PRE_INT | RB_SCRIPT | RB_STYLE | RB_PLAIN | RB_INTXTA)
+#define RB_SPECIAL (RB_PRE | RB_PRE_INT | RB_SCRIPT | RB_STYLE | RB_PLAIN | RB_NOBR)
+#define RB_PLAIN_PRE 0x40000
#ifdef FORMAT_NICE
-#define RB_FILL 0x200000
+#define RB_FILL 0x80000
#endif /* FORMAT_NICE */
#define RB_GET_ALIGN(obuf) ((obuf)->flag&RB_ALIGN)
@@ -605,17 +602,18 @@ struct readbuffer {
#define R_ST_DQUOTE 4 /* within double quote */
#define R_ST_EQL 5 /* = */
#define R_ST_AMP 6 /* within ampersand quote */
-#define R_ST_CMNT1 7 /* <! */
-#define R_ST_CMNT2 8 /* <!- */
-#define R_ST_CMNT 9 /* within comment */
-#define R_ST_NCMNT1 10 /* comment - */
-#define R_ST_NCMNT2 11 /* comment -- */
-#define R_ST_NCMNT3 12 /* comment -- space */
-#define R_ST_IRRTAG 13 /* within irregular tag */
+#define R_ST_EOL 7 /* end of file */
+#define R_ST_CMNT1 8 /* <! */
+#define R_ST_CMNT2 9 /* <!- */
+#define R_ST_CMNT 10 /* within comment */
+#define R_ST_NCMNT1 11 /* comment - */
+#define R_ST_NCMNT2 12 /* comment -- */
+#define R_ST_NCMNT3 13 /* comment -- space */
+#define R_ST_IRRTAG 14 /* within irregular tag */
#define ST_IS_REAL_TAG(s) ((s)==R_ST_TAG||(s)==R_ST_TAG0||(s)==R_ST_EQL)
#define ST_IS_COMMENT(s) ((s)>=R_ST_CMNT1)
-#define ST_IS_TAG(s) ((s)!=R_ST_NORMAL&&(s)!=R_ST_AMP&&!ST_IS_COMMENT(s))
+#define ST_IS_TAG(s) ((s)!=R_ST_NORMAL&&(s)!=R_ST_AMP&&!ST_IS_COMMENT(s)&&(s)!=R_ST_EOL)
/* is this '<' really means the beginning of a tag? */
#define REALLY_THE_BEGINNING_OF_A_TAG(p) \
diff --git a/form.c b/form.c
@@ -430,16 +430,6 @@ form_fputs_decode(Str s, FILE * f)
for (p = s->ptr; *p;) {
switch (*p) {
- case '<':
- if (!strncasecmp(p, "<eol>", 5)) {
- Strcat_char(z, '\n');
- p += 5;
- }
- else {
- Strcat_char(z, *p);
- p++;
- }
- break;
#if !defined( __CYGWIN__ ) && !defined( __EMX__ )
case '\r':
if (*(p + 1) == '\n')
diff --git a/frame.c b/frame.c
@@ -97,7 +97,7 @@ newFrame(struct parsed_tag *tag, Buffer *buf)
body->baseURL = baseURL(buf);
if (tag) {
if (parsedtag_get_value(tag, ATTR_SRC, &p))
- body->url = url_quote_conv(p, buf->document_code);
+ body->url = url_quote_conv(remove_space(p), buf->document_code);
if (parsedtag_get_value(tag, ATTR_NAME, &p) && *p != '_')
body->name = url_quote_conv(p, buf->document_code);
}
@@ -412,6 +412,23 @@ frame_download_source(struct frame_body *b, ParsedURL *currentURL,
return ret_frameset;
}
+#define CASE_TABLE_TAG \
+ case HTML_TR:\
+ case HTML_N_TR:\
+ case HTML_TD:\
+ case HTML_N_TD:\
+ case HTML_TH:\
+ case HTML_N_TH:\
+ case HTML_THEAD:\
+ case HTML_N_THEAD:\
+ case HTML_TBODY:\
+ case HTML_N_TBODY:\
+ case HTML_TFOOT:\
+ case HTML_N_TFOOT:\
+ case HTML_COLGROUP:\
+ case HTML_N_COLGROUP:\
+ case HTML_COL
+
static int
createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,
int force_reload)
@@ -467,8 +484,10 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,
struct frameset *f_frameset;
int i = c + r * f->col;
char *p = "";
+ int status = R_ST_NORMAL;
Str tok = Strnew();
- int status;
+ int pre_mode = 0;
+ int end_tag = 0;
frame = f->frame[i];
@@ -557,12 +576,13 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,
break;
}
do {
- status = R_ST_NORMAL;
+ int is_tag = FALSE;
+ char *q;
+ struct parsed_tag *tag;
+
do {
if (*p == '\0') {
Str tmp = StrmyUFgets(&f2);
- if (tmp->length == 0 && status != R_ST_NORMAL)
- tmp = correct_irrtag(status);
if (tmp->length == 0)
break;
#ifdef JP_CHARSET
@@ -573,21 +593,67 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,
cleanup_line(tmp, HTML_MODE);
p = tmp->ptr;
}
- if (status == R_ST_NORMAL)
- read_token(tok, &p, &status, 1, 0);
- else if (ST_IS_COMMENT(status))
- read_token(tok, &p, &status, 0, 0);
- else
- read_token(tok, &p, &status, 1, 1);
+ read_token(tok, &p, &status, 1, status != R_ST_NORMAL);
} while (status != R_ST_NORMAL);
if (tok->length == 0)
continue;
if (tok->ptr[0] == '<') {
+ is_tag = TRUE;
+ if (pre_mode & (RB_PLAIN | RB_INTXTA | RB_SCRIPT |
+ RB_STYLE)) {
+ q = tok->ptr;
+ if ((tag = parse_tag(&q, FALSE)) &&
+ tag->tagid == end_tag) {
+ if (pre_mode & RB_PLAIN) {
+ fputs("</PRE_PLAIN>", f1);
+ pre_mode = 0;
+ end_tag = 0;
+ goto token_end;
+ }
+ pre_mode = 0;
+ end_tag = 0;
+ goto proc_normal;
+ }
+ if (strncmp(tok->ptr, "<!--", 4) &&
+ (q = strchr(tok->ptr + 1, '<'))) {
+ tok = Strnew_charp_n(tok->ptr, q - tok->ptr);
+ p = Strnew_m_charp(q, p, NULL)->ptr;
+ status = R_ST_NORMAL;
+ }
+ is_tag = FALSE;
+ }
+ else if (pre_mode & RB_INSELECT) {
+ q = tok->ptr;
+ if ((tag = parse_tag(&q, FALSE))) {
+ if ((tag->tagid == end_tag) ||
+ (tag->tagid == HTML_N_FORM)) {
+ if (tag->tagid == HTML_N_FORM)
+ fputs("</SELECT>", f1);
+ pre_mode = 0;
+ end_tag = 0;
+ goto proc_normal;
+ }
+ if (t_stack) {
+ switch (tag->tagid) {
+ case HTML_TABLE:
+ case HTML_N_TABLE:
+ CASE_TABLE_TAG:
+ fputs("</SELECT>", f1);
+ pre_mode = 0;
+ end_tag = 0;
+ goto proc_normal;
+ }
+ }
+ }
+ }
+ }
+
+ proc_normal:
+ if (is_tag) {
char *q = tok->ptr;
int j, a_target = 0;
- struct parsed_tag *tag;
ParsedURL url;
if (!(tag = parse_tag(&q, FALSE)))
@@ -603,7 +669,7 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,
case HTML_BASE:
/* "BASE" is prohibit tag */
if (parsedtag_get_value(tag, ATTR_HREF, &q)) {
- q = url_quote_conv(q, code);
+ q = url_quote_conv(remove_space(q), code);
parseURL(q, &base, NULL);
}
if (parsedtag_get_value(tag, ATTR_TARGET, &q)) {
@@ -660,18 +726,7 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,
goto token_end;
}
break;
- case HTML_THEAD:
- case HTML_N_THEAD:
- case HTML_TBODY:
- case HTML_N_TBODY:
- case HTML_TFOOT:
- case HTML_N_TFOOT:
- case HTML_TD:
- case HTML_N_TD:
- case HTML_TR:
- case HTML_N_TR:
- case HTML_TH:
- case HTML_N_TH:
+ CASE_TABLE_TAG:
/* table_tags MUST be in table stack */
if (!t_stack) {
Strshrinkfirst(tok, 1);
@@ -682,6 +737,37 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,
}
break;
+ case HTML_SELECT:
+ pre_mode = RB_INSELECT;
+ end_tag = HTML_N_SELECT;
+ break;
+ case HTML_TEXTAREA:
+ pre_mode = RB_INTXTA;
+ end_tag = HTML_N_TEXTAREA;
+ break;
+ case HTML_SCRIPT:
+ pre_mode = RB_SCRIPT;
+ end_tag = HTML_N_SCRIPT;
+ break;
+ case HTML_STYLE:
+ pre_mode = RB_STYLE;
+ end_tag = HTML_N_STYLE;
+ break;
+ case HTML_LISTING:
+ pre_mode = RB_PLAIN;
+ end_tag = HTML_N_LISTING;
+ fputs("<PRE_PLAIN>", f1);
+ goto token_end;
+ case HTML_XMP:
+ pre_mode = RB_PLAIN;
+ end_tag = HTML_N_XMP;
+ fputs("<PRE_PLAIN>", f1);
+ goto token_end;
+ case HTML_PLAINTEXT:
+ pre_mode = RB_PLAIN;
+ end_tag = MAX_HTMLTAG;
+ fputs("<PRE_PLAIN>", f1);
+ goto token_end;
default:
break;
}
@@ -693,7 +779,8 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,
if (!tag->value[j])
break;
tag->value[j] =
- url_quote_conv(tag->value[j], code);
+ url_quote_conv(remove_space(tag->value[j]),
+ code);
parseURL2(tag->value[j], &url, &base);
if (url.scheme == SCM_UNKNOWN ||
#ifndef USE_W3MMAILER
@@ -748,11 +835,28 @@ createFrameFile(struct frameset *f, FILE * f1, Buffer *current, int level,
Strfputs(tok, f1);
}
else {
- Strfputs(tok, f1);
+ if (pre_mode & (RB_PLAIN | RB_INTXTA))
+ fprintf(f1, "%s", html_quote(tok->ptr));
+ else
+ Strfputs(tok, f1);
}
token_end:
Strclear(tok);
} while (*p != '\0' || !iseos(f2.stream));
+ if (pre_mode & RB_PLAIN)
+ fputs("</PRE_PLAIN>\n", f1);
+ else if (pre_mode & RB_INTXTA)
+ fputs("</TEXTAREA></FORM>\n", f1);
+ else if (pre_mode & RB_INSELECT)
+ fputs("</SELECT></FORM>\n", f1);
+ else if (pre_mode & (RB_SCRIPT | RB_STYLE)) {
+ if (status != R_ST_NORMAL)
+ fputs(correct_irrtag(status)->ptr, f1);
+ if (pre_mode & RB_SCRIPT)
+ fputs("</SCRIPT>\n", f1);
+ else if (pre_mode & RB_STYLE)
+ fputs("</STYLE>\n", f1);
+ }
while (t_stack--)
fputs("</TABLE>\n", f1);
UFclose(&f2);
diff --git a/html.c b/html.c
@@ -248,8 +248,9 @@ TagInfo TagMAP[MAX_HTMLTAG] = {
{"/input_alt", NULL, 0, TFLG_INT | TFLG_END}, /* 123 HTML_N_INPUT_ALT */
{"img_alt", ALST_IMG_ALT, MAXA_IMG_ALT, TFLG_INT}, /* 124 HTML_IMG_ALT */
{"/img_alt", NULL, 0, TFLG_INT | TFLG_END}, /* 125 HTML_N_IMG_ALT */
- {"eol", NULL, 0, TFLG_INT}, /* 126 HTML_EOL */
- {" ", ALST_NOP, MAXA_NOP, TFLG_INT}, /* 127 HTML_NOP */
+ {" ", ALST_NOP, MAXA_NOP, TFLG_INT}, /* 126 HTML_NOP */
+ {"pre_plain", NULL, 0, TFLG_INT}, /* 127 HTML_PRE_PLAIN */
+ {"/pre_plain", NULL, 0, TFLG_INT | TFLG_END}, /* 128 HTML_N_PRE_PLAIN */
};
TagAttrInfo AttrMAP[MAX_TAGATTR] = {
diff --git a/html.h b/html.h
@@ -213,10 +213,11 @@ typedef struct {
#define HTML_N_INPUT_ALT 123
#define HTML_IMG_ALT 124
#define HTML_N_IMG_ALT 125
-#define HTML_EOL 126
-#define HTML_NOP 127
+#define HTML_NOP 126
+#define HTML_PRE_PLAIN 127
+#define HTML_N_PRE_PLAIN 128
-#define MAX_HTMLTAG 128
+#define MAX_HTMLTAG 129
/* Tag attribute */
diff --git a/table.c b/table.c
@@ -490,11 +490,11 @@ visible_length(char *str)
else if (status == R_ST_AMP) {
if (prev_status == R_ST_NORMAL) {
Strclear(tagbuf);
+ len--;
amp_len = 0;
}
else {
Strcat_char(tagbuf, *str);
- len++;
amp_len++;
}
}
@@ -502,10 +502,13 @@ visible_length(char *str)
Strcat_char(tagbuf, *str);
r2 = tagbuf->ptr;
t = getescapecmd(&r2);
- len += strlen(t) - 1 - amp_len;
- if (*r2 != '\0') {
- str -= strlen(r2);
+ if (!*r2 && (*t == '\r' || *t == '\n')) {
+ if (len > max_len)
+ max_len = len;
+ len = 0;
}
+ else
+ len += strlen(t) + strlen(r2);
}
else if (status == R_ST_NORMAL && ST_IS_REAL_TAG(prev_status)) {
;
@@ -516,22 +519,42 @@ visible_length(char *str)
len++;
} while ((visible_length_offset + len) % Tabstop != 0);
}
- else if (*str == '\n' || *str == '\r') {
+ else if (*str == '\r' || *str == '\n') {
+ len--;
if (len > max_len)
max_len = len;
len = 0;
}
- else if (*str == '\n' || *str == '\r')
- len = 0;
str++;
}
if (status == R_ST_AMP) {
r2 = tagbuf->ptr;
t = getescapecmd(&r2);
- len += strlen(t) - 1 - amp_len;
- if (*r2 != '\0') {
- len += strlen(r2);
+ if (*t != '\r' && *t != '\n')
+ len += strlen(t) + strlen(r2);
+ }
+ return len > max_len ? len : max_len;
+}
+
+int
+visible_length_plain(char *str)
+{
+ int len = 0, max_len = 0;
+
+ while (*str) {
+ if (*str == '\t') {
+ do {
+ len++;
+ } while ((visible_length_offset + len) % Tabstop != 0);
+ }
+ else if (*str == '\r' || *str == '\n') {
+ if (len > max_len)
+ max_len = len;
+ len = 0;
}
+ else
+ len++;
+ str++;
}
return len > max_len ? len : max_len;
}
@@ -558,6 +581,28 @@ maximum_visible_length(char *str)
return maxlen;
}
+int
+maximum_visible_length_plain(char *str)
+{
+ int maxlen, len;
+
+ visible_length_offset = 0;
+ maxlen = visible_length_plain(str);
+
+ if (!strchr(str, '\t'))
+ return maxlen;
+
+ for (visible_length_offset = 1; visible_length_offset < Tabstop;
+ visible_length_offset++) {
+ len = visible_length_plain(str);
+ if (maxlen < len) {
+ maxlen = len;
+ break;
+ }
+ }
+ return maxlen;
+}
+
void
align(TextLine *lbuf, int width, int mode)
{
@@ -810,6 +855,10 @@ do_refill(struct table *tbl, int row, int col, int maxlimit)
else
HTMLlineproc1(l->ptr, &h_env);
}
+ if (obuf.status != R_ST_NORMAL) {
+ obuf.status = R_ST_EOL;
+ HTMLlineproc1("\n", &h_env);
+ }
completeHTMLstream(&h_env, &obuf);
flushline(&h_env, &obuf, 0, 2, h_env.limit);
if (tbl->border_mode == BORDER_NONE) {
@@ -2361,6 +2410,7 @@ table_close_select(struct table *tbl, struct table_mode *mode, int width)
{
Str tmp = process_n_select();
mode->pre_mode &= ~TBLM_INSELECT;
+ mode->end_tag = 0;
feed_table1(tbl, tmp, mode, width);
}
@@ -2369,6 +2419,7 @@ table_close_textarea(struct table *tbl, struct table_mode *mode, int width)
{
Str tmp = process_n_textarea();
mode->pre_mode &= ~TBLM_INTXTA;
+ mode->end_tag = 0;
feed_table1(tbl, tmp, mode, width);
}
@@ -2394,6 +2445,7 @@ table_close_anchor0(struct table *tbl, struct table_mode *mode)
#define TAG_ACTION_FEED 1
#define TAG_ACTION_TABLE 2
#define TAG_ACTION_N_TABLE 3
+#define TAG_ACTION_PLAIN 4
#define CASE_TABLE_TAG \
case HTML_TABLE:\
@@ -2429,53 +2481,62 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode,
cmd = tag->tagid;
- if (mode->pre_mode & TBLM_IGNORE) {
- switch (cmd) {
- case HTML_N_STYLE:
- mode->pre_mode &= ~TBLM_STYLE;
+ if (mode->pre_mode & TBLM_PLAIN) {
+ if (mode->end_tag == cmd) {
+ mode->pre_mode &= ~TBLM_PLAIN;
+ mode->end_tag = 0;
+ feed_table_block_tag(tbl, line, mode, 0, cmd);
return TAG_ACTION_NONE;
- case HTML_N_SCRIPT:
+ }
+ return TAG_ACTION_PLAIN;
+ }
+ if (mode->pre_mode & TBLM_INTXTA) {
+ if (mode->end_tag == cmd) {
+ table_close_textarea(tbl, mode, width);
+ return TAG_ACTION_NONE;
+ }
+ return TAG_ACTION_FEED;
+ }
+ if (mode->pre_mode & TBLM_SCRIPT) {
+ if (mode->end_tag == cmd) {
mode->pre_mode &= ~TBLM_SCRIPT;
+ mode->end_tag = 0;
return TAG_ACTION_NONE;
- default:
+ }
+ return TAG_ACTION_PLAIN;
+ }
+ if (mode->pre_mode & TBLM_STYLE) {
+ if (mode->end_tag == cmd) {
+ mode->pre_mode &= ~TBLM_STYLE;
+ mode->end_tag = 0;
return TAG_ACTION_NONE;
}
+ return TAG_ACTION_PLAIN;
}
-
- switch (cmd) {
- CASE_TABLE_TAG:
- if (mode->caption)
- mode->caption = 0;
- if (mode->pre_mode & (TBLM_IGNORE | TBLM_XMP | TBLM_LST))
- mode->pre_mode &= ~(TBLM_IGNORE | TBLM_XMP | TBLM_LST);
- if (mode->pre_mode & TBLM_INTXTA)
- table_close_textarea(tbl, mode, width);
- if (mode->pre_mode & TBLM_INSELECT)
+ /* failsafe: a tag other than <option></option>and </select> in *
+ * <select> environment is regarded as the end of <select>. */
+ if (mode->pre_mode & TBLM_INSELECT) {
+ switch (cmd) {
+ CASE_TABLE_TAG:
+ case HTML_N_FORM:
+ case HTML_N_SELECT: /* mode->end_tag */
table_close_select(tbl, mode, width);
+ break;
+ default:
+ return TAG_ACTION_FEED;
+ }
}
-
if (mode->caption) {
switch (cmd) {
+ CASE_TABLE_TAG:
case HTML_N_CAPTION:
mode->caption = 0;
- return TAG_ACTION_NONE;
+ break;
default:
return TAG_ACTION_FEED;
}
}
- /* failsafe: a tag other than <option></option>and </select> in *
- * <select> environment is regarded as the end of <select>. */
- if (mode->pre_mode & TBLM_INSELECT && cmd == HTML_N_FORM) {
- table_close_select(tbl, mode, width);
- }
-
- if ((mode->pre_mode & TBLM_INSELECT && cmd != HTML_N_SELECT) ||
- (mode->pre_mode & TBLM_INTXTA && cmd != HTML_N_TEXTAREA) ||
- (mode->pre_mode & TBLM_XMP && cmd != HTML_N_XMP) ||
- (mode->pre_mode & TBLM_LST && cmd != HTML_N_LISTING))
- return TAG_ACTION_FEED;
-
if (mode->pre_mode & TBLM_PRE) {
switch (cmd) {
case HTML_NOBR:
@@ -2742,33 +2803,33 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode,
case HTML_LI:
case HTML_PRE:
case HTML_N_PRE:
+ case HTML_HR:
case HTML_LISTING:
- case HTML_N_LISTING:
case HTML_XMP:
- case HTML_N_XMP:
case HTML_PLAINTEXT:
+ case HTML_PRE_PLAIN:
+ case HTML_N_PRE_PLAIN:
feed_table_block_tag(tbl, line, mode, 0, cmd);
switch (cmd) {
case HTML_PRE:
+ case HTML_PRE_PLAIN:
mode->pre_mode |= TBLM_PRE;
break;
case HTML_N_PRE:
+ case HTML_N_PRE_PLAIN:
mode->pre_mode &= ~TBLM_PRE;
break;
case HTML_LISTING:
- mode->pre_mode |= TBLM_LST;
- break;
- case HTML_N_LISTING:
- mode->pre_mode &= ~TBLM_LST;
+ mode->pre_mode |= TBLM_PLAIN;
+ mode->end_tag = HTML_N_LISTING;
break;
case HTML_XMP:
- mode->pre_mode |= TBLM_XMP;
- break;
- case HTML_N_XMP:
- mode->pre_mode &= ~TBLM_XMP;
+ mode->pre_mode |= TBLM_PLAIN;
+ mode->end_tag = HTML_N_XMP;
break;
case HTML_PLAINTEXT:
- mode->pre_mode |= TBLM_PLAINTEXT;
+ mode->pre_mode |= TBLM_PLAIN;
+ mode->end_tag = MAX_HTMLTAG;
break;
}
break;
@@ -2857,9 +2918,7 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode,
if (tmp)
feed_table1(tbl, tmp, mode, width);
mode->pre_mode |= TBLM_INSELECT;
- break;
- case HTML_N_SELECT:
- table_close_select(tbl, mode, width);
+ mode->end_tag = HTML_N_SELECT;
break;
case HTML_OPTION:
/* nothing */
@@ -2880,9 +2939,7 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode,
if (tmp)
feed_table1(tbl, tmp, mode, width);
mode->pre_mode |= TBLM_INTXTA;
- break;
- case HTML_N_TEXTAREA:
- table_close_textarea(tbl, mode, width);
+ mode->end_tag = HTML_N_TEXTAREA;
break;
case HTML_A:
table_close_anchor0(tbl, mode);
@@ -2969,11 +3026,11 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode,
break;
case HTML_SCRIPT:
mode->pre_mode |= TBLM_SCRIPT;
- mode->ignore_tag = Strnew_charp("</script>");
+ mode->end_tag = HTML_N_SCRIPT;
break;
case HTML_STYLE:
mode->pre_mode |= TBLM_STYLE;
- mode->ignore_tag = Strnew_charp("</style>");
+ mode->end_tag = HTML_N_STYLE;
break;
case HTML_N_A:
table_close_anchor0(tbl, mode);
@@ -2994,7 +3051,6 @@ feed_table_tag(struct table *tbl, char *line, struct table_mode *mode,
case HTML_TEXTAREA_INT:
case HTML_N_TEXTAREA_INT:
case HTML_IMG_ALT:
- case HTML_EOL:
case HTML_RULE:
case HTML_N_RULE:
default:
@@ -3014,25 +3070,29 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode,
Str tmp;
struct table_linfo *linfo = &tbl->linfo;
- if (*line == '<') {
- int action;
+ if (*line == '<' && line[1] && REALLY_THE_BEGINNING_OF_A_TAG(line)) {
struct parsed_tag *tag;
p = line;
tag = parse_tag(&p, internal);
if (tag) {
- action = feed_table_tag(tbl, line, mode, width, tag);
- if (action == TAG_ACTION_NONE)
+ switch (feed_table_tag(tbl, line, mode, width, tag)) {
+ case TAG_ACTION_NONE:
return -1;
- else if (action == TAG_ACTION_N_TABLE)
+ case TAG_ACTION_N_TABLE:
return 0;
- else if (action == TAG_ACTION_TABLE) {
+ case TAG_ACTION_TABLE:
return 1;
+ case TAG_ACTION_PLAIN:
+ break;
+ case TAG_ACTION_FEED:
+ default:
+ if (parsedtag_need_reconstruct(tag))
+ line = parsedtag2str(tag)->ptr;
}
- else if (parsedtag_need_reconstruct(tag))
- line = parsedtag2str(tag)->ptr;
}
else {
- if (!(mode->pre_mode & TBLM_PLAIN))
+ if (!(mode->pre_mode & (TBLM_PLAIN | TBLM_INTXTA | TBLM_INSELECT |
+ TBLM_SCRIPT | TBLM_STYLE)))
return -1;
}
}
@@ -3040,7 +3100,9 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode,
Strcat_charp(tbl->caption, line);
return -1;
}
- if (mode->pre_mode & TBLM_IGNORE)
+ if (mode->pre_mode & TBLM_SCRIPT)
+ return -1;
+ if (mode->pre_mode & TBLM_STYLE)
return -1;
if (mode->pre_mode & TBLM_INTXTA) {
feed_textarea(line);
@@ -3100,7 +3162,7 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode,
}
line = tmp->ptr;
}
- if (!(mode->pre_mode & TBLM_SPECIAL)) {
+ if (!(mode->pre_mode & (TBLM_SPECIAL & ~TBLM_NOBR))) {
if (!(tbl->flag & TBL_IN_COL) || linfo->prev_spaces != 0)
while (IS_SPACE(*line))
line++;
@@ -3114,25 +3176,51 @@ feed_table(struct table *tbl, char *line, struct table_mode *mode,
i = skip_space(tbl, line, linfo, !(mode->pre_mode & TBLM_NOBR));
addcontentssize(tbl, visible_length(line) - i);
setwidth(tbl, mode);
+ pushdata(tbl, tbl->row, tbl->col, line);
}
- else {
- /* <pre> mode or something like it */
+ else if (mode->pre_mode & TBLM_PRE_INT) {
check_rowcol(tbl, mode);
- if (mode->pre_mode & TBLM_PRE_INT && mode->nobr_offset < 0)
+ if (mode->nobr_offset < 0)
mode->nobr_offset = tbl->tabcontentssize;
- if (mode->pre_mode & TBLM_PLAIN)
- i = strlen(line);
- else
- i = maximum_visible_length(line);
- addcontentssize(tbl, i);
+ addcontentssize(tbl, maximum_visible_length(line));
setwidth(tbl, mode);
- if (!(mode->pre_mode & TBLM_PRE_INT)) {
- p = line + strlen(line) - 1;
- if (*p == '\r' || *p == '\n')
+ pushdata(tbl, tbl->row, tbl->col, line);
+ }
+ else {
+ /* <pre> mode or something like it */
+ check_rowcol(tbl, mode);
+ while (*line) {
+ int nl = FALSE;
+ if ((p = strchr(line, '\r')) || (p = strchr(line, '\n'))) {
+ if (*p == '\r' && p[1] == '\n')
+ p++;
+ if (p[1]) {
+ p++;
+ tmp = Strnew_charp_n(line, p - line);
+ line = p;
+ p = tmp->ptr;
+ }
+ else {
+ p = line;
+ line = "";
+ }
+ nl = TRUE;
+ }
+ else {
+ p = line;
+ line = "";
+ }
+ if (mode->pre_mode & TBLM_PLAIN)
+ i = maximum_visible_length_plain(p);
+ else
+ i = maximum_visible_length(p);
+ addcontentssize(tbl, i);
+ setwidth(tbl, mode);
+ if (nl)
clearcontentssize(tbl, mode);
+ pushdata(tbl, tbl->row, tbl->col, p);
}
}
- pushdata(tbl, tbl->row, tbl->col, line);
return -1;
}
diff --git a/table.h b/table.h
@@ -114,21 +114,17 @@ struct table {
int sloppy_width;
};
-#define TBLM_PRE 1
-#define TBLM_NOBR 2
-#define TBLM_XMP 4
-#define TBLM_LST 8
-#define TBLM_PLAINTEXT 16
-#define TBLM_PRE_INT 32
-#define TBLM_INTXTA 64
-#define TBLM_INSELECT 128
-#define TBLM_PREMODE (TBLM_PRE|TBLM_INTXTA|TBLM_INSELECT|TBLM_PLAIN)
-#define TBLM_SPECIAL (TBLM_PRE|TBLM_PRE_INT|TBLM_PLAIN)
-#define TBLM_PLAIN (TBLM_PLAINTEXT|TBLM_XMP|TBLM_LST)
-#define TBLM_SCRIPT 256
-#define TBLM_STYLE 512
-#define TBLM_IGNORE (TBLM_SCRIPT|TBLM_STYLE)
-#define TBLM_ANCHOR 1024
+#define TBLM_PRE RB_PRE
+#define TBLM_SCRIPT RB_SCRIPT
+#define TBLM_STYLE RB_STYLE
+#define TBLM_PLAIN RB_PLAIN
+#define TBLM_NOBR RB_NOBR
+#define TBLM_PRE_INT RB_PRE_INT
+#define TBLM_INTXTA RB_INTXTA
+#define TBLM_INSELECT RB_INSELECT
+#define TBLM_PREMODE (TBLM_PRE | TBLM_PRE_INT | TBLM_SCRIPT | TBLM_STYLE | TBLM_PLAIN | TBLM_INTXTA)
+#define TBLM_SPECIAL (TBLM_PRE | TBLM_PRE_INT | TBLM_SCRIPT | TBLM_STYLE | TBLM_PLAIN | TBLM_NOBR)
+#define TBLM_ANCHOR 0x100000
#define uchar unsigned char
#define ushort unsigned short
@@ -139,7 +135,7 @@ struct table_mode {
short nobr_offset;
char nobr_level;
short anchor_offset;
- Str ignore_tag;
+ unsigned char end_tag;
};
/* Local Variables: */
diff --git a/tagtable.tab b/tagtable.tab
@@ -154,7 +154,6 @@ input_alt HTML_INPUT_ALT
/input_alt HTML_N_INPUT_ALT
img_alt HTML_IMG_ALT
/img_alt HTML_N_IMG_ALT
-eol HTML_EOL
pre_int HTML_PRE_INT
/pre_int HTML_N_PRE_INT
bgsound HTML_BGSOUND
@@ -167,3 +166,5 @@ select_int HTML_SELECT_INT
option_int HTML_OPTION_INT
textarea_int HTML_TEXTAREA_INT
/textarea_int HTML_N_TEXTAREA_INT
+pre_plain HTML_PRE_PLAIN
+/pre_plain HTML_N_PRE_PLAIN