commit 013966898f742dc7907f6270804e1b7dbbf1e2db
parent 36589e26dc00c8e2b0dbf7a1f3648cdccfb52bc3
Author: Tomas Hlavaty <tom@logand.com>
Date: Wed, 15 Jun 2011 00:34:39 +0200
handle text and images in html output
Diffstat:
M | enums.lisp | | | 24 | +++++++++++++++++++----- |
M | olefs.lisp | | | 104 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------- |
2 files changed, 107 insertions(+), 21 deletions(-)
diff --git a/enums.lisp b/enums.lisp
@@ -234,18 +234,32 @@
(RT_OfficeArtDggContainer #xF000)
(RT_OfficeArtBStoreContainer #xF001)
(RT_OfficeArtDgContainer #xF002)
+ (RT_OfficeArtSpgrContainer #xF003)
+ (RT_OfficeArtSpContainer #xF004)
(RT_OfficeArtFDGGBlock #xF006)
(RT_OfficeArtFBSE #xF007)
(RT_OfficeArtFDG #xF008)
+ (RT_OfficeArtFSPGR #xF009)
+ (RT_OfficeArtFSP #xF00A)
+ (RT_OfficeArtFOPT #xF00B)
(RT_OfficeArtClientTextbox #xF00D)
(RT_OfficeArtClientAnchor #xF010)
(RT_OfficeArtClientData #xF011)
(RT_OfficeArtFRITContainer #xF118)
- (RT_OfficeArtSpgrContainer #xF003)
- (RT_OfficeArtSpContainer #xF004)
- (RT_OfficeArtFSPGR #xF009)
- (RT_OfficeArtFSP #xF00A)
- (RT_OfficeArtFOPT #xF00B)
(RT_OfficeArtColorMRUContainer #xF11A)
(RT_OfficeArtSplitMenuColorContainer #xF11E)
(RT_OfficeArtTertiaryFOPT #xF122))
+
+(defenum OfficeArtFOPTEOPID ()
+ (pib #x0104)
+ (fillColor #x0181)
+ (fillOpacity #x0182)
+ (fillBackColor #x0183)
+ (fillBackOpacity #x0184)
+ (fillCrMod #x0185)
+ (fillBlip #x0186)
+ (fillBlipName #x0187)
+ (fillBlipFlags #x0188)
+ (fillWidth #x0189)
+
+ (fillAngle #x018B))
diff --git a/olefs.lisp b/olefs.lisp
@@ -601,10 +601,33 @@
"<br/>"
(code-char n)))
-(defun ppt-entry-to-html-naive (ole-file entry stream title)
+(define-structure OfficeArtFOPTEOPID ()
+ (%dummy ushort)
+ (opid t :compute (logand #x3fff %dummy))
+ (fBid t :compute (if (zerop (logand #x4000 %dummy)) nil t))
+ (fComplex t :compute (if (zerop (logand #x8000 %dummy)) nil t)))
+
+(define-structure OfficeArtFBSE ()
+ (btWin32 ubyte)
+ (btMacOS ubyte)
+ (rgbUid GUID)
+ (tag ushort)
+ (size dword)
+ (cRef dword)
+ (foDelay dword)
+ (unused1 ubyte)
+ (cbName ubyte)
+ (unused2 ubyte)
+ (unused3 ubyte)
+ #+nil(nameData (ubyte cbName))
+ #+nil(embeddedBlip (ubyte size)))
+
+(defun ppt-entry-to-html-naive (ole-file entry stream title pictures)
(macrolet ((out (&rest args)
`(format stream ,@args)))
- (let ((slide-no 0))
+ (let ((slide-no 0)
+ (blip-no 0)
+ (blips nil))
(out "<html>~%<head>~%")
(when title
(out "<title>~a</title>~%" title))
@@ -617,10 +640,23 @@
(declare (ignore i level start end))
(case (RecordHeader.recType h)
(#.RT_Document
+ #+nil
(out "<div><p>@@@ ~a #x~x ~a</p>~%"
(RecordHeader.recType h)
(RecordHeader.recType h)
(enum-by-value 'RecordType (RecordHeader.recType h))))
+ (#.RT_OfficeArtFBSE
+ (let* ((x (read-OfficeArtFBSE in))
+ (y (find (OfficeArtFBSE.foDelay x) pictures :key #'cadr)))
+ (assert y)
+ (push (list (incf blip-no) (car y) (caddr y)) blips)
+ #+nil
+ (out "<div><p>@@@ ~a #x~x ~a === img ~s ~s</p>~%"
+ (RecordHeader.recType h)
+ (RecordHeader.recType h)
+ (enum-by-value 'RecordType (RecordHeader.recType h))
+ blip-no
+ (OfficeArtFBSE.foDelay x))))
(#.RT_Slide
(out "<hr/>~%</div>~%")
(incf slide-no)
@@ -640,6 +676,28 @@
for j from 0 below (RecordHeader.recLen h)
do (out "~a" (ascii-char (read-byte in))))
(out "</p>~%"))
+ (#.RT_OfficeArtFOPT
+ #+nil
+ (out "<p>!!! ~a #x~x ~a</p>~%"
+ (RecordHeader.recType h)
+ (RecordHeader.recType h)
+ (enum-by-value 'RecordType (RecordHeader.recType h)))
+ (with-shorter-stream (s in (RecordHeader.recLen h))
+ (let ((len (RecordHeader.recLen h)))
+ (loop
+ while (< (file-position s) len)
+ do (let ((opid (read-OfficeArtFOPTEOPID s))
+ (value (read-value 'dword s)))
+ ;;(out "<p>...... ~s ~s</p>~%" opid value)
+ (when (OfficeArtFOPTEOPID.fComplex opid)
+ (decf len value))
+ (case (OfficeArtFOPTEOPID.opid opid)
+ (#.pib
+ (assert (OfficeArtFOPTEOPID.fBid opid))
+ (destructuring-bind (j n ext) (assoc value blips)
+ (assert (and j n ext))
+ (out "<img src=\"~a.~(~a~)\"/>~%" n ext)))))))))
+ #+nil
(t (out "<p>@@@ ~a #x~x ~a</p>~%"
(RecordHeader.recType h)
(RecordHeader.recType h)
@@ -648,19 +706,33 @@
(out "</div>~%"))
(out "</body>~%</html>~%"))))
+(defun find-ole-entry (ole-file name)
+ (traverse-directories
+ ole-file
+ (lambda (entry id level)
+ (declare (ignore id level))
+ (let ((entry-name (ole-entry-name-to-string
+ (ole-entry.name entry)
+ (ole-entry.name-length entry))))
+ (when (equal name entry-name)
+ (return-from find-ole-entry entry))))))
+
(defun ppt-file-to-html-naive (filename &optional (stream *standard-output*))
(with-ole-file (ole-file filename)
- (traverse-directories
- ole-file
- (lambda (entry id level)
- (declare (ignore id level))
- (case (ole-entry.object-type entry)
- (2 ;; stream
- (let ((entry-name (ole-entry-name-to-string
- (ole-entry.name entry)
- (ole-entry.name-length entry))))
- (when (equal "PowerPoint Document" entry-name)
- (ppt-entry-to-html-naive ole-file entry stream filename)))))))))
+ (let ((pictures nil))
+ (walk-RecordHeader-tree ole-file
+ (find-ole-entry ole-file "Pictures")
+ (lambda (in level i h start end)
+ (declare (ignore level end))
+ (multiple-value-bind (blip kind)
+ (read-record-body in h)
+ (declare (ignore blip))
+ (push (list i (- start 8) kind) pictures))))
+ (ppt-entry-to-html-naive ole-file
+ (find-ole-entry ole-file "PowerPoint Document")
+ stream
+ filename
+ pictures))))
(define-structure UserEditAtom ()
(lastSlideIdRef dword)
@@ -692,6 +764,8 @@
(case (RecordHeader.recType h)
(#.RT_Document
(out "<div>~%"))
+ (#.RT_Slide
+ (out "<hr/>~%</div>~%<div class=\"slide\">~%<h1>Slide ~d</h1>~%" (incf slide-no)))
((#.RT_TextCharsAtom #.RT_CString) ;; utf16le
(out "<p>")
(loop
@@ -703,9 +777,7 @@
(loop
for j from 0 below (RecordHeader.recLen h)
do (out "~a" (ascii-char (read-byte in))))
- (out "</p>~%"))
- (#.RT_Slide
- (out "<hr/>~%</div>~%<div class=\"slide\">~%<h1>Slide ~d</h1>~%" (incf slide-no))))))
+ (out "</p>~%")))))
(out "</div>~%</body>~%</html>~%"))))
(defun process-PersistDirectoryAtom (htab in)