cl-olefs

OLE File System tools for Common Lisp
git clone https://logand.com/git/cl-olefs.git/
Log | Files | Refs

commit 782176aad12ffaad13f2c26e0caea4d8c6868637
parent 2e0e679a4e854f3d09b03e0ccf3dd2b14e6322bd
Author: Tomas Hlavaty <tom@logand.com>
Date:   Sun,  6 Jan 2013 20:47:47 +0100

traverse-directories removed

Diffstat:
Molefs.lisp | 247+++++++++++++++++++++++++++++++++----------------------------------------------
1 file changed, 104 insertions(+), 143 deletions(-)

diff --git a/olefs.lisp b/olefs.lisp @@ -357,11 +357,24 @@ (defun ole-directory-stream (ole-file) (funcall ole-file 'ole-directory-stream)) -(defun traverse-directories (ole-file fn) ;; TODO better (streaming) interface - (let (e (s (ole-directory-stream ole-file))) - (loop - while (setq e (funcall s)) - do (funcall fn e)))) +(defun find-ole-entry (ole-file &key name type) + (loop + with s = (ole-directory-stream ole-file) + with e = nil + while (setq e (funcall s)) + do (when (and (or (not type) + (let ((x (ole-entry.object-type e))) + (if (atom type) + (eql x type) + (member x type)))) + (or (not name) + (let ((x (ole-entry-name-to-string + (ole-entry.name e) + (ole-entry.name-length e)))) + (if (atom name) + (equal x name) + (member x name :test #'equal))))) + (return-from find-ole-entry e)))) (defun %ole-directory-stream (directories) (let ((pending (list (cons 0 0)))) @@ -412,45 +425,28 @@ (ole-directory-stream (%ole-directory-stream directories)))))) (defun extract-pictures (ole-file dir html) - (traverse-directories + (walk-RecordHeader-tree ole-file - (lambda (entry) - (case (ole-entry.object-type entry) - (2 ;; stream - (let ((entry-name (ole-entry-name-to-string - (ole-entry.name entry) - (ole-entry.name-length entry)))) - #+nil - (with-stream (in (ole-entry-stream ole-file entry)) - (with-open-file (out (format nil "~a/~a" dir entry-name) + (find-ole-entry ole-file :name "Pictures" :type 2) + (lambda (in level i h start end parents) + (declare (ignore level start end parents)) + (multiple-value-bind (blip kind) + (read-record-body + in + h + (lambda (blip in) + (with-open-file (out (format nil "~a/~d.~a" + dir + i + (blip-ext blip)) :direction :output :if-does-not-exist :create :if-exists :supersede :element-type '(unsigned-byte 8)) - (copy-stream in out))) - (when (equal "Pictures" entry-name) - (walk-RecordHeader-tree - ole-file - entry - (lambda (in level i h start end parents) - (declare (ignore level start end parents)) - (multiple-value-bind (blip kind) - (read-record-body - in - h - (lambda (blip in) - (with-open-file (out (format nil "~a/~d.~a" - dir - i - (blip-ext blip)) - :direction :output - :if-does-not-exist :create - :if-exists :supersede - :element-type '(unsigned-byte 8)) - (copy-stream in out)))) - (declare (ignore blip)) - (when html - (format html "<p><img src=\"~d.~(~a~)\">~%" i kind)))))))))))) + (copy-stream in out)))) + (declare (ignore blip)) + (when html + (format html "<p><img src=\"~d.~(~a~)\">~%" i kind)))))) (defun extract-ole-file (filename &optional (dir "/tmp")) ;; TODO extract audio files (with-stream (ole-file (ole-file-stream filename)) @@ -646,16 +642,9 @@ (defun print-RecordHeader-tree-from-ppt-file (filename) (with-stream (ole-file (ole-file-stream filename)) - (traverse-directories + (print-RecordHeader-tree ole-file - (lambda (entry) - (case (ole-entry.object-type entry) - (2 ;; stream - (let ((entry-name (ole-entry-name-to-string - (ole-entry.name entry) - (ole-entry.name-length entry)))) - (when (equal "PowerPoint Document" entry-name) - (print-RecordHeader-tree ole-file entry))))))))) + (find-ole-entry ole-file :name "PowerPoint Document" :type 2)))) (defun utf-char (n) ;; TODO utf properly (if (member n '(#x0a #x0b #x0d)) ;; #x0b = vertical tab @@ -834,22 +823,12 @@ ;;(out "~s~%" texts) (out "</body>~%</html>~%")))) -(defun find-ole-entry (ole-file name) - (traverse-directories - ole-file - (lambda (entry) - (let ((entry-name (ole-entry-name-to-string - (ole-entry.name entry) - (ole-entry.name-length entry)))) - (when (equal name entry-name) - (return-from find-ole-entry entry)))))) - (defun ppt-file-to-html-naive (filename &optional (stream *standard-output*)) (with-stream (ole-file (ole-file-stream filename)) (let ((pictures nil)) ;;(extract-pictures ole-file dir html) ;; TODO mount olefs and traverse Pictures only once (walk-RecordHeader-tree ole-file - (find-ole-entry ole-file "Pictures") + (find-ole-entry ole-file :name "Pictures" :type 2) (lambda (in level i h start end parents) (declare (ignore level end parents)) (multiple-value-bind (blip kind) @@ -857,7 +836,9 @@ (declare (ignore blip)) (push (list i (- start 8) kind) pictures)))) (ppt-entry-to-html-naive ole-file - (find-ole-entry ole-file "PowerPoint Document") + (find-ole-entry ole-file + :name "PowerPoint Document" + :type 2) stream filename pictures @@ -924,27 +905,20 @@ (defun ppt-file-to-html (filename &optional (stream *standard-output*)) (with-stream (ole-file (ole-file-stream filename)) (let ((u (block CurrentUser - (traverse-directories + (walk-RecordHeader-tree ole-file - (lambda (entry) - (case (ole-entry.object-type entry) - (2 ;; stream - (let ((entry-name (ole-entry-name-to-string - (ole-entry.name entry) - (ole-entry.name-length entry)))) - (when (equal "Current User" entry-name) - (walk-RecordHeader-tree - ole-file - entry - (lambda (in level i h start end parents) - (declare (ignore level i start end parents)) - (return-from CurrentUser - (cadr (read-record-body in h)))))))))))))) + (find-ole-entry ole-file :name "Current User" :type 2) + (lambda (in level i h start end parents) + (declare (ignore level i start end parents)) + (return-from CurrentUser + (cadr (read-record-body in h)))))))) ;;(describe u) (let ((pictures nil)) ;;(extract-pictures ole-file dir html) ;; TODO mount olefs and traverse Pictures only once (walk-RecordHeader-tree ole-file - (find-ole-entry ole-file "Pictures") + (find-ole-entry ole-file + :name "Pictures" + :type 2) (lambda (in level i h start end parents) (declare (ignore level end parents)) (multiple-value-bind (blip kind) @@ -954,7 +928,9 @@ (print (list :pictures pictures)) (with-stream (in (ole-entry-stream ole-file - (find-ole-entry ole-file "PowerPoint Document"))) + (find-ole-entry ole-file + :name "PowerPoint Document" + :type 2))) (let ((htab (make-hash-table)) ;; persist oid -> fpos (first-UserEditAtom nil)) (stream-position in (CurrentUserAtom.offsetToCurrentEdit u)) @@ -1388,72 +1364,57 @@ (with-stream (ole-file (ole-file-stream filename)) #+nil(break "~s" ole-file) (let (fcPlfLst lcbPlfLst) - (block found1 - (traverse-directories - ole-file - (lambda (entry) - (case (ole-entry.object-type entry) - (2 ;; stream - (let ((entry-name (ole-entry-name-to-string - (ole-entry.name entry) - (ole-entry.name-length entry)))) - (when (equal "WordDocument" entry-name) - (with-stream (in (ole-entry-stream ole-file entry)) - (let ((fib (read-fib in))) - ;;(describe fib) - (let ((x (fib-fibRgFcLcb fib))) - (setq fcPlfLst (FibRgFcLcb97.fcPlfLst x) - lcbPlfLst (FibRgFcLcb97.lcbPlfLst x))) - (return-from found1) - #+nil - (multiple-value-bind (fcPlfLst lcbPlfLst) - (with-stream (s (vector-stream (subseq (fib-fibRgFcLcbBlob fib) #.(* 4 146)))) - (values (read-dword s) (read-dword s))) - (print (list :@@@ fcPlfLst lcbPlfLst)) - )))))))))) - (block found2 - (traverse-directories - ole-file - (lambda (entry) - (case (ole-entry.object-type entry) - (2 ;; stream - (let ((entry-name (ole-entry-name-to-string - (ole-entry.name entry) - (ole-entry.name-length entry)))) - (when (or (equal "0Table" entry-name) ;; TODO be sure which one? - (equal "1Table" entry-name)) - (with-stream (in (ole-entry-stream ole-file entry)) - (stream-position in fcPlfLst) - (let ((PlfLst (read-PlfLst in))) - (let ((n 0)) - (dotimes (i (length PlfLst)) - (incf n (if (LSTF.fSimpleList (aref PlfLst i)) 1 9))) - (let ((lvls (make-array n))) - (dotimes (i n) - (setf (aref lvls i) (read-lvl in))) - ;; now I have lstf[] and lvl[] - (let (anums ;; roughly like w:abstractNum - (j 0)) - (dotimes (i (length PlfLst)) - (let ((lstf (aref PlfLst i))) - (unless (LSTF.fSimpleList lstf) - (push (list i #+nil lstf j) anums)) - (incf j (if (LSTF.fSimpleList lstf) 1 9)))) - (setq anums (nreverse anums)) - ;;(print anums) - (dolist (a anums) - (destructuring-bind (i j) a ;; i_lstf j_lvl0 - (declare (ignore i)) - (let* ((lvl (aref lvls (1+ j))) ;; hardcode second level - (lvlf (LVL-lvlf lvl))) - ;;(print (list :@@@ j (LVLF.fNoRestart lvlf) (LVLF.ilvlRestartLim lvlf))) - (push (LVLF.%physical-stream-position lvlf) offsets))))) - #+nil - (dotimes (i n) - (let* ((lvl (aref lvls i)) - (lvlf (LVL-lvlf lvl))) - (print (list :@@@ i (LVLF.fNoRestart lvlf) (LVLF.ilvlRestartLim lvlf))))))))) - (return-from found2)))))))) + (with-stream (in (ole-entry-stream + ole-file + (find-ole-entry ole-file + :name "WordDocument" + :type 2))) + (let ((fib (read-fib in))) + ;;(describe fib) + (let ((x (fib-fibRgFcLcb fib))) + (setq fcPlfLst (FibRgFcLcb97.fcPlfLst x) + lcbPlfLst (FibRgFcLcb97.lcbPlfLst x))) + #+nil + (multiple-value-bind (fcPlfLst lcbPlfLst) + (with-stream (s (vector-stream (subseq (fib-fibRgFcLcbBlob fib) #.(* 4 146)))) + (values (read-dword s) (read-dword s))) + (print (list :@@@ fcPlfLst lcbPlfLst)) + ))) + (with-stream (in (ole-entry-stream + ole-file + (find-ole-entry ole-file + :name '("0Table" "1Table") ;; TODO be sure which one? + :type 2))) + (stream-position in fcPlfLst) + (let ((PlfLst (read-PlfLst in))) + (let ((n 0)) + (dotimes (i (length PlfLst)) + (incf n (if (LSTF.fSimpleList (aref PlfLst i)) 1 9))) + (let ((lvls (make-array n))) + (dotimes (i n) + (setf (aref lvls i) (read-lvl in))) + ;; now I have lstf[] and lvl[] + (let (anums ;; roughly like w:abstractNum + (j 0)) + (dotimes (i (length PlfLst)) + (let ((lstf (aref PlfLst i))) + (unless (LSTF.fSimpleList lstf) + (push (list i #+nil lstf j) anums)) + (incf j (if (LSTF.fSimpleList lstf) 1 9)))) + (setq anums (nreverse anums)) + ;;(print anums) + (dolist (a anums) + (destructuring-bind (i j) a ;; i_lstf j_lvl0 + (declare (ignore i)) + (let* ((lvl (aref lvls (1+ j))) ;; hardcode second level + (lvlf (LVL-lvlf lvl))) + ;;(print (list :@@@ j (LVLF.fNoRestart lvlf) (LVLF.ilvlRestartLim lvlf))) + (push (LVLF.%physical-stream-position lvlf) offsets))))) + #+nil + (dotimes (i n) + (let* ((lvl (aref lvls i)) + (lvlf (LVL-lvlf lvl))) + (print (list :@@@ i (LVLF.fNoRestart lvlf) (LVLF.ilvlRestartLim lvlf))))))))) #+nil(values fcPlfLst lcbPlfLst))) (let ((fixed (format nil "~a.fixed.doc" filename))) (copy-file filename fixed)