emacs-unoffice

Emacs library to reclaim text from office documents (abw, odt, docx).
Log | Files | Refs

commit 3dd05a7375c00219cf6d8cdd3f27f0c778cd6a9a
parent 51f4ebb86c7a7da915312b1ab633c4e04cea5c7a
Author: Tomas Hlavaty <tom@logand.com>
Date:   Sun, 31 Jan 2021 23:18:22 +0100

new abiword contains text also in p elements

not only in c elements

Diffstat:
Memacs-unoffice.el | 30+++++++++++++++++-------------
1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/emacs-unoffice.el b/emacs-unoffice.el @@ -185,7 +185,7 @@ (rec x)))) (defun unoffice--from-abw () - (let (z align p) + (let (z align pp p) (cl-labels ((alignment (x) (let ((props (xml-get-attribute x 'props))) (when props @@ -197,20 +197,24 @@ ((cl-search "text-align:right" props) '(right)))))) (ins (x) - (when (and x (not (equal "" x))) + (when (and x pp (not (equal "" x))) (push x p))) (rec (x) - (when (consp x) - (case (car x) - (p - (setq p nil) - (let ((a (alignment x))) - (unless (eq align a) - (push (setq align a) z))) - (mapc #'rec (cddr x)) - (push (cons 'p (nreverse p)) z)) - (c (mapc #'ins (cddr x))) - (t (mapc #'rec (cddr x))))))) + (typecase x + (string (ins x)) + (cons + (case (car x) + (p + (setq pp t + p nil) + (let ((a (alignment x))) + (unless (eq align a) + (push (setq align a) z))) + (mapc #'rec (cddr x)) + (push (cons 'p (nreverse p)) z) + (setq pp nil)) + (c (mapc #'rec (cddr x))) + (t (mapc #'rec (cddr x)))))))) (rec (car (xml-parse-file buffer-file-name)))) (cons 'unoffice (nreverse z))))