cl-olefs

OLE File System tools for Common Lisp
git clone https://logand.com/git/cl-olefs.git/
Log | Files | Refs

commit 2e148acc3a79f4881a3bc098189e8432d542cd9a
parent 782176aad12ffaad13f2c26e0caea4d8c6868637
Author: Tomas Hlavaty <tom@logand.com>
Date:   Sun, 24 Mar 2013 03:22:29 +0100

parse-xls-file added

Diffstat:
Mcdef.lisp | 3++-
Molefs.lisp | 286+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 284 insertions(+), 5 deletions(-)

diff --git a/cdef.lisp b/cdef.lisp @@ -51,12 +51,13 @@ ;;(slot-type-read #+nil 'dword #+nil '(byte 6) '(wchar 6)) -(defun slot-reader-let-definition (name type &key compute always member) +(defun slot-reader-let-definition (name type &key compute always member when default) (list name (flet ((value () (cond (compute compute) + (when `(if ,when ,(slot-type-read type) ,default)) (t (slot-type-read type))))) (cond (always `(let ((x ,(value))) (assert (equal x ,always)) x)) diff --git a/olefs.lisp b/olefs.lisp @@ -68,6 +68,7 @@ (ecase msg (close (setq stream nil)) (stream-position offset) + (physical-stream-position (physical-stream-position stream)) (read-octet (unless (< offset size) (error 'end-of-file)) @@ -189,9 +190,33 @@ (starting-sector-location dword) (stream-size ulonglong)) -(defun ole-entry-name-to-string (name length) - (coerce (mapcar #'code-char (coerce (subseq name 0 (1- (/ length 2))) 'list)) - 'string)) +(defun string-from-achars (achars &optional length) ;; TODO encoding? + (let* ((n (or length (length achars))) + (s (make-string n))) + (dotimes (i n s) + (setf (aref s i) (code-char (aref achars i)))))) + +(defun string-from-wchars (wchars &optional length) ;; TODO encoding? + (let* ((n (or length (length wchars))) + (s (make-string n))) + (dotimes (i n s) + (setf (aref s i) (code-char (aref wchars i)))))) + +(defun string-from-octets (octets fHighByte &optional nbytes) ;; TODO encoding? + (if fHighByte + (multiple-value-bind (n m) (floor (or nbytes (length octets)) 2) + (assert (zerop m)) + (let ((s (make-string n))) + (dotimes (i n s) + (setf (aref s i) (code-char (let ((2*i (ash i 1))) + (+ (aref octets 2*i) + (ash (aref octets (1+ 2*i)) 8)))))))) + (string-from-achars octets nbytes))) + +(defun ole-entry-name-to-string (octets n) + (multiple-value-bind (n m) (floor n 2) + (assert (zerop m)) + (string-from-achars octets (1- n)))) ;; minus #\null (defun print-ole-entry (ole-entry stream) (print-unreadable-object (ole-entry stream :type t :identity t) @@ -510,7 +535,7 @@ (defstruct PersistDirectoryAtom header entries) -(defun read-record-body (stream RecordHeader &optional fn) +(defun read-record-body (stream RecordHeader &optional fn) ;; TODO move up?! (let ((x RecordHeader #+nil(read-RecordHeader stream))) (with-slots (recVer recInstance recType recLen) x (flet ((blip (ext guid2 &optional metafileHeader) @@ -1432,3 +1457,256 @@ #+nil(write-byte (logand #x07 flags) s)) (stream-position s (+ 26 o)) (write-byte 0 s)))))) + +(defun extract-files (filename &optional (dir "/tmp")) + (with-stream (ole-file (ole-file-stream filename)) + (do ((s (ole-directory-stream ole-file)) + e + (i 0)) + ((not (setq e (funcall s)))) + (print-ole-entry e *standard-output*) + (terpri) + (ecase (ole-entry.object-type e) + ((0 1 5)) + (2 (with-stream (in (ole-entry-stream ole-file e)) + (with-open-file (out (format nil "~a/XX-~d" dir (incf i)) + :direction :output + :if-does-not-exist :create + :if-exists :supersede + :element-type '(unsigned-byte 8)) + (copy-stream in out)))))))) + +;;; MS-XLS Excel binary file + +(define-structure BIFFRecordHeader () + (tag ushort) + (length ushort)) + +(define-structure BIFF-ShortXLUnicodeString () + (cch ubyte) + (%dummy ubyte :member '(0 1)) + (fHighByte t :compute (not (zerop (logand #x80 %dummy)))) + (reserved1 t :compute (assert (zerop (logand #x7f %dummy)))) + (rgb (ubyte (if fHighByte (* 2 cch) cch))) + (decoded t :compute (string-from-octets rgb fHighByte))) + +(define-structure BIFF-BoundSheet8 () + (lbPlyPos dword) + (hsState ubyte :member '(0 1 2)) + (dt ubyte :member '(0 1 2 6)) + (stName BIFF-ShortXLUnicodeString)) + +(define-structure BIFF-Cell () + (rw ushort) + (col ushort) + (ixfe ushort)) + +(define-structure BIFF-Blank () + (cell DIFF-Cell)) + +(define-structure BIFF-RkNumber () + (%dummy dword) + (percent t :compute (not (zerop (logand #x80000000 %dummy)))) + (signed t :compute (not (zerop (logand #x40000000 %dummy)))) + (value t :compute (if signed + (let* ((x (logand #x1fffffff)) + (y (if (zerop (logand #x20000000)) + x + x #+nil (- (1+ (lognot x)))))) ;; TODO!!! + (if percent (/ y 100) y)) + %dummy #+nil(error "TODO partial double")))) ;; TODO!!! + +(define-structure BIFF-RkRec () + (ixfe ushort) + (rk BIFF-RkNumber)) + +(define-structure BIFF-RK () + (rw ushort) + (col ushort) + (rkRec BIFF-RkRec)) + +(define-structure BIFF-Bes () + (bBoolErr ubyte) + (fError ubyte :member '(0 1)) + (decoded t :compute (if (zerop fError) + (ecase bBoolErr + (0 nil) + (1 t)) + (ecase fError + (0 :#NULL!) + (7 :#DIV/0!) + (#xf :#VALUE!) + (#x17 :#REF!) + (#x1d :#NAME!) + (#x24 :#NUM!) + (#x2a :#N/A) + (#x2b :#GETTING_DATA))))) + +(define-structure BIFF-BoolErr () + (cell BIFF-Cell) + (bes BIFF-Bes)) + +(define-structure BIFF-Number () + (cell BIFF-Cell) + (num ulonglong)) ;; TODO double + +(define-structure BIFF-LabelSst () + (cell BIFF-Cell) + (isst dword)) + +(define-structure BIFF-FormulaValue () ;; TODO + (byte1 ubyte) + (byte2 ubyte) + (byte3 ubyte) + (byte4 ubyte) + (byte5 ubyte) + (byte6 ubyte) + (fExprO ushort)) + +#+nil +(define-structure BIFF-CellParsedFormula () ;; TODO + (cce ushort) + (rgce (ubyte cce)) + (rgcb BIFF-RgbExtra)) + +#+nil +(define-structure BIFF-Formula () ;; TODO + (cell BIFF-Cell) + (val BIFF-FormulaValue) + (%dummy ushort) + (fAlwaysCalc t :compute (not (zerop (logand #x8000)))) + (reserved1 t :compute (assert (zerop (logand #x4000)))) + (fFill t :compute (not (zerop (logand #x2000)))) + (fShrFmla t :compute (not (zerop (logand #x1000)))) + (reserved2 t :compute (assert (zerop (logand #x800)))) + (fClearErrors t :compute (not (zerop (logand #x400)))) + (reserved3 t :compute (assert (zerop (logand #x3ff)))) + (chn dword) + (formula BIFF-CellParsedFormula)) + +#+nil +(define-structure BIFF-MulBlank () ;; TODO + (rw ushort) + (colFirst ushort)) + +(define-structure BIFF-FormatRun () + (ich ushort) + (ifnt ushort)) + +(define-structure BIFF-LPWideString () + (cchCharacters ushort) + (rgchData (wchar cchCharacters)) + (decoded t :compute (string-from-wchars rgchData))) + +(define-structure BIFF-RPHSSub () + (crun ushort) + (cch ushort) + (st BIFF-LPWideString)) + +(define-structure BIFF-ExtRst () + (reserved ushort) + (cb ushort) + (phs dword) + (rphssub BIFF-RPHSSub) + (rgphruns (PhRuns (BIFF-RPHSSub.crun rphssub)))) + +(define-structure BIFF-XLUnicodeRichExtendedString () + (cch ushort) + (%dummy ubyte) + (fHighByte t :compute (not (zerop (logand #x80 %dummy)))) + (reserved1 t :compute (assert (zerop (logand #x40 %dummy)))) + (fExtSt t :compute (not (zerop (logand #x20 %dummy)))) + (fRichSt t :compute (not (zerop (logand #x10 %dummy)))) + (reserved2 t :compute (assert (zerop (logand #xf %dummy)))) + (cRun ushort :when fRichSt :default 0) + (cbExtRst dword :when fExtSt :default 0) + (rgb (ubyte (if fHighByte (* 2 cch) cch))) + (rgRun (BIFF-FormatRun cRun) :when fRichSt :default #()) + (ExtRst (BIFF-ExtRst cbExtRst) :when fExtSt :default #()) + (decoded t :compute (string-from-octets rgb fHighByte))) + +(define-structure BIFF-SST () + (cstTotal dword) + (cstUnique dword) + (rgb (BIFF-XLUnicodeRichExtendedString cstUnique))) + +(define-structure BIFF-DefColWidth () + (cchdefColWidth ushort)) + +(define-structure BIFF-Index () + (reserved dword :always 0) + (rwMic dword) + (rwMac dword) + (ibXF dword) + (rgibRw (dword 1))) + +(defun biff-substream (ole-entry-stream) + (let ((in ole-entry-stream) + end + eof) + (flet ((header () + (let* ((h (read-BIFFRecordHeader in)) + (nbytes (BIFFRecordHeader.length h))) + (setq end (+ (stream-position in) nbytes)) + (values (BIFFRecordHeader.tag h) + (shorter-stream in nbytes))))) + (assert (member (header) '(#x0009 #x0209 #x0409 #x0809))) ;; bof + (lambda () + (assert (not eof)) + (stream-position in end) + (multiple-value-bind (tag s) (header) + (case tag ;; TODO more cell types + (#x000a (not (setq eof t))) + ;;(#x000b :index1) + (#x0085 (read-BIFF-BoundSheet8 s)) + (#x00fc (read-BIFF-SST s)) + (#x00fd (read-BIFF-LabelSst s)) + ;;(#x020b (read-BIFF-Index s)) + (#x027e (read-BIFF-Rk s)) + (t tag))))))) + +(defun princ-cell-value (x sst) + (typecase x + (BIFF-LabelSst + (let ((c (BIFF-LabelSst.cell x))) + `(:label ,(BIFF-Cell.rw c) + ,(BIFF-Cell.col c) + ,(BIFF-XLUnicodeRichExtendedString.decoded + (aref (BIFF-SST.rgb sst) (BIFF-LabelSst.isst x)))))) + (BIFF-RK + `(:number ,(BIFF-RK.rw x) + ,(BIFF-RK.col x) + ,(BIFF-RkNumber.value (BIFF-RkRec.rk (BIFF-RK.RkRec x))))))) + +(defun parse-sheet (BIFF-BoundSheet8 stream sst) + (stream-position stream (BIFF-BoundSheet8.lbPlyPos BIFF-BoundSheet8)) + (do (z x (s (biff-substream stream))) + ((not (setq x (funcall s))) + (nreverse z)) + (let ((v (princ-cell-value x sst))) + (when v + (push v z)))) + #+nil + (let ((index (funcall (biff-substream stream)))) + (etypecase index + (BIFF-Index index #+nil(BIFF-Index.rgibRw ))))) + +(defun parse-xls-file (filename) + (with-stream (f (ole-file-stream filename)) + (let ((e (find-ole-entry f :name "Workbook" :type 2))) + (when e + (with-stream (in (ole-entry-stream f e)) + (let (sheets sst) + (do (x (globals (biff-substream in))) + ((not (setq x (funcall globals))) + (setq sheets (nreverse sheets))) + (typecase x + (BIFF-BoundSheet8 (push x sheets)) + (BIFF-SST (setq sst x)))) + `(:workbook + ,@(loop + for x in sheets + collect `(:sheet + ,(BIFF-ShortXLUnicodeString.decoded + (BIFF-BoundSheet8.stName x)) + ,@(parse-sheet x in sst))))))))))