emacs-unoffice

Emacs library to reclaim text from office documents (abw, odt, docx).
Log | Files | Refs

commit b725f476bafb33f8d3ce31d384899637db8893b3
parent e8f0159f965233b8e2756d9747051e68fb912b45
Author: Tomas Hlavaty <tom@logand.com>
Date:   Wed, 27 Jan 2021 03:46:23 +0100

use org-mode for undocx

handle bold, italic, underline text; tabs; bookmarks and page breaks

Diffstat:
Memacs-unoffice.el | 79++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
1 file changed, 68 insertions(+), 11 deletions(-)

diff --git a/emacs-unoffice.el b/emacs-unoffice.el @@ -4,12 +4,14 @@ ;;; ;;; Reclaim text from office documents (abw, odt, docx). ;;; -;;; Copyright (C) 2020 Tomas Hlavaty <tom at logand dot com> +;;; Copyright (C) 2020--2021 Tomas Hlavaty <tom at logand dot com> ;;; ;;; License: GPLv3 or later ;;; ;;; Download: git clone https://logand.com/git/emacs-unoffice.git ;;; +;;; News: https://logand.com/sw/emacs-unoffice/atom.xml +;;; ;;; Example configuration: ;;; ;;; (require 'unoffice) @@ -19,6 +21,7 @@ (require 'arc-mode) (require 'cl) +(require 'org) (require 'org-table) (require 'view) (require 'xml) @@ -63,15 +66,43 @@ (interactive) (with-silent-modifications (erase-buffer) - (let (wrote tablep) + (insert "# -*- org -*-\n") + (insert "#+STARTUP: showeverything\n") + (insert "\n") + (let (wrote tablep pb rb pi ri pu ru tabs) (cl-labels ((ins (x) (when (and x (not (equal "" x))) - (insert x) - (unless tablep - (setq wrote t)))) + (setq wrote t) + (insert x))) + (start-biu () + (unless (eq ru pu) + (setq pu (not pu)) + (ins "_")) + (unless (eq ri pi) + (setq pi (not pi)) + (ins "/")) + (unless (eq rb pb) + (setq pb (not pb)) + (when (bolp) ;; bold vs headings + (ins " ")) + (ins "*"))) + (end-biu () + (when pb + (ins "*")) + (when pi + (ins "/")) + (when pu + (ins "_"))) (rec (x) (when (consp x) (case (car x) + (w:bookmarkStart + (ins "<<") + (ins (xml-get-attribute x 'w:name)) + (ins ">>")) + (w:lastRenderedPageBreak + (unless tablep + (insert " \n"))) (w:tc (insert "|") (mapc #'rec (cddr x))) @@ -82,12 +113,37 @@ (setq tablep t) (mapc #'rec (cddr x)) (setq tablep nil) - (org-table-align)) - (w:p (mapc #'rec (cddr x)) - (when wrote - (ins "\n\n") - (setq wrote nil))) - (w:t (mapc #'ins (cddr x))) + (org-table-align) + (insert "\n")) + (w:p + (setq wrote nil + pb nil + pi nil + pu nil) + (mapc #'rec (cddr x)) + (end-biu) + (unless tablep + (when wrote + (insert "\n\n")))) + (w:r + (setq rb nil + ri nil + ru nil) + (mapc #'rec (cddr x))) + (w:b (setq rb t)) + (w:i (setq ri t)) + (w:u (setq ru t)) + (w:t + (start-biu) + (mapc #'ins (cddr x))) + (w:tabs + (setq tabs t) + (mapc #'rec (cddr x)) + (setq tabs nil)) + (w:tab + (unless tabs + (start-biu) + (ins "\t"))) (t (mapc #'rec (cddr x))))))) (rec (let ((f buffer-file-name)) @@ -97,6 +153,7 @@ (car (xml-parse-region))))))))) (setq buffer-read-only t) (goto-char (point-min)) + (org-mode) (view-mode)) (provide 'unoffice)