olefs.lisp (64946B)
1 ;;; Copyright (C) 2011, 2012, 2013, 2014 Tomas Hlavaty <tom@logand.com> 2 ;;; 3 ;;; Permission is hereby granted, free of charge, to any person 4 ;;; obtaining a copy of this software and associated documentation 5 ;;; files (the "Software"), to deal in the Software without 6 ;;; restriction, including without limitation the rights to use, copy, 7 ;;; modify, merge, publish, distribute, sublicense, and/or sell copies 8 ;;; of the Software, and to permit persons to whom the Software is 9 ;;; furnished to do so, subject to the following conditions: 10 ;;; 11 ;;; The above copyright notice and this permission notice shall be 12 ;;; included in all copies or substantial portions of the Software. 13 ;;; 14 ;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 ;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 ;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 ;;; NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 18 ;;; HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 19 ;;; WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 ;;; OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 ;;; DEALINGS IN THE SOFTWARE. 22 23 (in-package :olefs) 24 25 (defun double-float-from-bits (high low) 26 (declare (optimize (speed 3) (debug 0)) 27 (type (unsigned-byte 32) high low)) 28 #+ccl 29 (ccl::double-float-from-bits high low) 30 #+sbcl 31 (sb-kernel:make-double-float (sb-c::mask-signed-field 32 high) low) 32 #-(or ccl sbcl) 33 (let ((bignum 0)) 34 (declare (type (unsigned-byte 64) bignum)) 35 (setf (ldb (byte 32 0) bignum) low 36 (ldb (byte 32 32) bignum) high) 37 (ieee-floats:decode-float64 bignum))) 38 39 (defmacro with-stream ((var stream) &body body) 40 `(let ((,var ,stream)) 41 (unwind-protect (progn ,@body) 42 (funcall ,var 'close)))) 43 44 (defun stream-position (stream &optional newpos) 45 (if (functionp stream) 46 (funcall stream 'stream-position newpos) 47 (if newpos 48 (file-position stream newpos) 49 (file-position stream)))) 50 51 (defun physical-stream-position (stream) 52 (if (functionp stream) 53 (funcall stream 'physical-stream-position) 54 (file-position stream))) 55 56 (defun read-octet (stream) 57 (if (functionp stream) 58 (funcall stream 'read-octet) 59 (read-byte stream))) 60 61 (defun copy-stream (in out) 62 (handler-case (loop (write-byte (read-octet in) out)) 63 (end-of-file ()))) 64 65 (defun copy-file (in out) 66 (with-open-file (i in :element-type '(unsigned-byte 8)) 67 (with-open-file (o out 68 :element-type '(unsigned-byte 8) 69 :direction :output 70 :if-exists :error 71 :if-does-not-exist :create) 72 (loop 73 with buf = (make-array 4096 :element-type '(unsigned-byte 8)) 74 with n = nil 75 while (plusp (setq n (read-sequence buf i))) 76 do (write-sequence buf o :end n))))) 77 78 (defun shorter-stream (stream size) 79 (let ((offset 0) 80 self) 81 (setq self 82 (lambda (msg) 83 (assert stream) 84 (ecase msg 85 (close (setq stream nil)) 86 (stream-position offset) 87 (physical-stream-position (physical-stream-position stream)) 88 (read-octet 89 (unless (< offset size) 90 (error 'end-of-file :stream self)) 91 (incf offset) 92 (read-octet stream))))))) 93 94 (defun vector-stream (vector physical-stream-position) 95 (let ((offset 0) 96 (size (length vector)) 97 self) 98 (setq self 99 (lambda (msg) 100 (assert vector) 101 (ecase msg 102 (close (setq vector nil)) 103 (stream-position offset) 104 (physical-stream-position (+ offset physical-stream-position)) 105 (read-octet 106 (unless (< offset size) 107 (error 'end-of-file :stream self)) 108 (prog1 (aref vector offset) 109 (incf offset)))))))) 110 111 ;;; MS-CFB Compound File Binary File Format 112 113 (defconstant +unused-sector+ 0) 114 (defconstant +maxregsect+ #xfffffffa) 115 (defconstant +difsect+ #xfffffffc) 116 (defconstant +fatsect+ #xfffffffd) 117 (defconstant +endofchain+ #xfffffffe) 118 (defconstant +freesect+ #xffffffff) 119 120 (defconstant +maxregsig+ #xfffffffa) 121 (defconstant +nostream+ #xffffffff) 122 123 #+nil 124 (defconstant clsid-null (make-array 16 125 :element-type '(unsigned-byte 8) 126 :initial-element 0)) 127 128 (defun read-ushort (stream) 129 (logior (read-octet stream) 130 (ash (read-octet stream) 8))) 131 132 (defun read-dword (stream) 133 (logior (read-octet stream) 134 (ash (read-octet stream) 8) 135 (ash (read-octet stream) 16) 136 (ash (read-octet stream) 24))) 137 138 (defun read-ulonglong (stream) 139 (logior (read-octet stream) 140 (ash (read-octet stream) 8) 141 (ash (read-octet stream) 16) 142 (ash (read-octet stream) 24) 143 (ash (read-octet stream) 32) 144 (ash (read-octet stream) 40) 145 (ash (read-octet stream) 48) 146 (ash (read-octet stream) 56))) 147 148 (defun read-achar (stream) 149 (read-octet stream)) 150 151 (defun read-wchar (stream) 152 (read-ushort stream)) 153 154 (defun read-filetime (stream) 155 (read-ulonglong stream)) 156 157 (defun read-octets (stream n) 158 (let ((x (make-array n :element-type '(unsigned-byte 8) :initial-element 0))) 159 (if (functionp stream) 160 (let ((i 0)) 161 (handler-case (do () 162 ((<= n i)) 163 (setf (aref x i) (read-octet stream)) 164 (incf i)) 165 (end-of-file () i))) 166 (read-sequence x stream)) 167 x)) 168 169 (defun read-guid (stream) 170 (read-octets stream 16)) 171 172 (defun read-vector (stream n element-type reader) 173 (let ((x (make-array n :element-type element-type :initial-element 0))) 174 (dotimes (i n x) 175 (setf (aref x i) (funcall reader stream))))) 176 177 (define-structure ole-header () 178 (signature (ubyte 8)) 179 (clsid guid) 180 (minor-version ushort) 181 (major-version ushort) 182 (byte-order ushort) 183 (sector-shift ushort) 184 (mini-sector-shift ushort) 185 (reserved (ubyte 6)) 186 (number-of-directory-sectors dword) 187 (number-of-fat-sectors dword) 188 (first-directory-sector-location dword) 189 (transaction-signature-number dword) 190 (mini-stream-cutoff-size dword) 191 (first-mini-fat-sector-location dword) 192 (number-of-mini-fat-sectors dword) 193 (first-difat-sector-location dword) 194 (number-of-difat-sectors dword)) 195 196 (define-structure ole-entry () 197 (name (wchar 32)) 198 (name-length ushort) 199 (object-type ubyte) 200 (color-flag ubyte) 201 (left-sibling-id dword) 202 (right-sibling-id dword) 203 (child-id dword) 204 (clsid guid) 205 (state-bits dword) 206 (creation-time filetime) 207 (modified-time filetime) 208 (starting-sector-location dword) 209 (stream-size ulonglong)) 210 211 (defun string-from-achars (achars &optional length) ;; TODO encoding? 212 (let* ((n (or length (length achars))) 213 (s (make-string n))) 214 (dotimes (i n s) 215 (let ((c (aref achars i))) 216 (assert (plusp c)) 217 (setf (aref s i) (code-char c)))))) 218 219 (defun string-from-wchars (wchars &optional length) ;; TODO encoding? 220 (let* ((n (or length (length wchars))) 221 (s (make-string n))) 222 (dotimes (i n s) 223 (let ((c (aref wchars i))) 224 (assert (plusp c)) 225 (setf (aref s i) (code-char c)))))) 226 227 (defun string-from-octets (octets fHighByte &optional nbytes) ;; TODO encoding? 228 (if fHighByte 229 (multiple-value-bind (n m) (floor (or nbytes (length octets)) 2) 230 (assert (zerop m)) 231 (let ((s (make-string n))) 232 (dotimes (i n s) 233 (let ((c (let ((2*i (ash i 1))) 234 (+ (aref octets 2*i) 235 (ash (aref octets (1+ 2*i)) 8))))) 236 (assert (plusp c)) 237 (setf (aref s i) (code-char c)))))) 238 (string-from-achars octets nbytes))) 239 240 (defun ole-entry-name-to-string (octets n) 241 (multiple-value-bind (n m) (floor n 2) 242 (assert (zerop m)) 243 (string-from-achars octets (1- n)))) ;; minus #\null 244 245 (defun print-ole-entry (ole-entry stream) 246 (print-unreadable-object (ole-entry stream :type t :identity t) 247 (format stream "~s ~a ~a ~sB @~s" 248 (ole-entry-name-to-string (ole-entry.name ole-entry) 249 (ole-entry.name-length ole-entry)) 250 (ecase (ole-entry.object-type ole-entry) 251 (0 "unknown") 252 (1 "storage") 253 (2 "stream") 254 (5 "root")) 255 (ecase (ole-entry.color-flag ole-entry) 256 (0 "red") 257 (1 "black")) 258 (ole-entry.stream-size ole-entry) 259 (ole-entry.starting-sector-location ole-entry)))) 260 261 (defun location-position (location) 262 (* (1+ location) 512)) 263 264 (defun seek-sector (location stream) 265 (let ((position (location-position location))) 266 (assert (stream-position stream position)) 267 location)) 268 269 (defun check-ole-header (x) 270 (assert (equalp #(#xd0 #xcf #x11 #xe0 #xa1 #xb1 #x1a #xe1) (ole-header.signature x))) 271 ;;(assert (equalp clsid-null (ole-header.clsid x))) 272 (assert (eql #xfffe (ole-header.byte-order x))) 273 (assert (equalp #(0 0 0 0 0 0) (ole-header.reserved x))) 274 ;; TODO 275 (assert (eql 3 (ole-header.major-version x))) 276 (assert (eql 512 (ash 1 (ole-header.sector-shift x)))) 277 (assert (eql 64 (ash 1 (ole-header.mini-sector-shift x)))) 278 (assert (eql 0 (ole-header.number-of-directory-sectors x))) 279 ;;(assert (eql #xfffffffe (first-directory-sector-location x))) 280 (assert (eql 0 (ole-header.transaction-signature-number x))) 281 (assert (eql 4096 (ole-header.mini-stream-cutoff-size x))) 282 ;;(assert (eql #xfffffffe (first-mini-fat-sector-location x))) 283 (unless (plusp (ole-header.number-of-difat-sectors x)) 284 (assert (eql #xfffffffe (ole-header.first-difat-sector-location x))))) 285 286 (defun sector-chain (fat location) 287 (labels ((rec (x) 288 (unless (member x (list +difsect+ +fatsect+ +endofchain+ +freesect+)) 289 (assert (and #+nil(< +unused-sector+ x) (<= 0 x +maxregsect+))) 290 (cons x (rec (aref fat x)))))) 291 (rec location))) 292 293 (defun read-values (array reader stream &optional (start 0) end) 294 (loop 295 for i from start below (or end (length array)) 296 do (setf (aref array i) (funcall reader stream)))) 297 298 (defun read-difat (header stream) 299 (let ((x (make-array (+ 109 300 (* #.(/ (- 512 4) 4) 301 (ole-header.number-of-difat-sectors header))) 302 :element-type 'dword))) 303 (read-values x 'read-dword stream 0 109) 304 (loop 305 with m = #.(1- (/ 512 4)) 306 for n = (ole-header.first-difat-sector-location header) 307 then (read-dword stream) 308 for i = 109 then (+ m i) 309 until (= +endofchain+ n) 310 do (progn 311 (seek-sector n stream) 312 (read-values x 'read-dword stream i (+ m i)))) 313 x)) 314 315 (defun read-fat (difat stream) 316 (let* ((m #.(/ 512 4)) 317 (n (length difat)) 318 (x (make-array (* m n) :element-type 'dword))) 319 (dotimes (i n x) 320 (let ((s (aref difat i))) 321 (unless (= +freesect+ s) 322 (seek-sector s stream) 323 (read-values x 'read-dword stream (* m i) (* m (1+ i)))))))) 324 325 (defun read-directories (chain stream) 326 (let* ((m #.(/ 512 128)) 327 (x (make-array (* m (length chain)) 328 :element-type '(or null ole-entry) 329 :initial-element nil)) 330 (i -1)) 331 (dolist (s chain x) 332 (seek-sector s stream) 333 (dotimes (j m) 334 (setf (aref x (incf i)) (read-ole-entry stream)))))) 335 336 (defun read-mfat (chain stream) 337 (let* ((m #.(/ 512 4)) 338 (x (make-array (* m (length chain)) :element-type 'dword)) 339 (i -1)) 340 (dolist (s chain x) 341 (seek-sector s stream) 342 ;;TODO block read (read-values x 'read-dword stream (* m i) (* m (1+ i))) 343 (dotimes (j m) 344 (setf (aref x (incf i)) (read-dword stream)))))) 345 346 (defun %ole-entry-stream (header fat directories mfat stream ole-entry) 347 (let* ((offset 0) 348 (mini (< (ole-entry.stream-size ole-entry) 349 (ole-header.mini-stream-cutoff-size header))) 350 (chain (let ((x (sector-chain 351 fat 352 (ole-entry.starting-sector-location 353 (if mini 354 (aref directories 0) 355 ole-entry))))) 356 (when x 357 (coerce x 'vector)))) 358 (mchain (when mini 359 (let ((x (sector-chain 360 mfat 361 (ole-entry.starting-sector-location ole-entry)))) 362 (when x 363 (coerce x 'vector))))) 364 sector 365 (buffer (make-array 512 :element-type '(unsigned-byte 8))) 366 (size (ole-entry.stream-size ole-entry)) 367 self) 368 (setq self 369 (lambda (msg &rest args) 370 (assert stream) 371 (flet ((next-octet (consumep) 372 ;; (values <current-byte> <position-of-current-byte>) 373 ;; Advance the stream by a byte if CONSUMEP is true, except at eof. 374 (assert (not (minusp offset))) 375 (unless (< offset size) 376 (error 'end-of-file :stream self)) 377 (flet ((pick (q i) 378 (unless (eql sector q) 379 (seek-sector (aref chain q) stream) 380 (let ((n (read-sequence buffer stream))) 381 (assert (eql 512 n))) 382 (setq sector q)) 383 (multiple-value-prog1 384 (values (aref buffer i) 385 (+ i (location-position (aref chain sector)))) 386 (when consumep 387 (incf offset))))) 388 (if mchain 389 (multiple-value-bind (mq mr) (floor offset 64) 390 (multiple-value-bind (q r) (floor (aref mchain mq) (/ 512 64)) 391 (pick q (+ (* r 64) mr)))) 392 (multiple-value-bind (q r) (floor offset 512) 393 (pick q r)))))) 394 (ecase msg 395 (close (setq stream nil)) 396 (stream-position 397 (destructuring-bind (&optional newpos) args 398 (if newpos 399 (setf offset newpos 400 sector nil) 401 offset))) 402 (physical-stream-position (nth-value 1 (next-octet nil))) 403 (read-octet (values (next-octet t))))))))) 404 405 (defun ole-entry-stream (ole-file entry) 406 (funcall ole-file 'ole-entry-stream entry)) 407 408 (defun ole-directory-stream (ole-file) 409 (funcall ole-file 'ole-directory-stream)) 410 411 (defun find-ole-entry (ole-file &key name type) 412 (loop 413 with s = (ole-directory-stream ole-file) 414 with e = nil 415 while (setq e (funcall s)) 416 do (when (and (or (not type) 417 (let ((x (ole-entry.object-type e))) 418 (if (atom type) 419 (eql x type) 420 (member x type)))) 421 (or (not name) 422 (let ((x (ole-entry-name-to-string 423 (ole-entry.name e) 424 (ole-entry.name-length e)))) 425 (if (atom name) 426 (equal x name) 427 (member x name :test #'equal))))) 428 (return-from find-ole-entry e)))) 429 430 (defun %ole-directory-stream (directories) 431 (let ((pending (list (cons 0 0)))) 432 (lambda () 433 (block done 434 (loop 435 (if pending 436 (destructuring-bind (n &rest level) (pop pending) 437 (let ((e (aref directories n))) 438 (unless (zerop (ole-entry.object-type e)) 439 (let ((id (ole-entry.right-sibling-id e))) 440 (when (<= id +maxregsig+) 441 (push (cons id level) pending))) 442 (let ((id (ole-entry.child-id e))) 443 (when (<= id +maxregsig+) 444 (push (cons id (1+ level)) pending))) 445 (let ((id (ole-entry.left-sibling-id e))) 446 (when (<= id +maxregsig+) 447 (push (cons id level) pending))) 448 (return-from done (values e n level))))) 449 (return-from done))))))) 450 451 (defun ole-file-stream (filename) 452 (let* ((stream (open filename :element-type '(unsigned-byte 8))) 453 (header (read-ole-header stream)) 454 (difat (read-difat header stream)) 455 (fat (read-fat difat stream)) 456 (directory-chain (sector-chain 457 fat 458 (ole-header.first-directory-sector-location header))) 459 (directories (read-directories directory-chain stream)) 460 (mfat-chain (sector-chain 461 fat 462 (ole-header.first-mini-fat-sector-location header))) 463 (mfat (read-mfat mfat-chain stream))) 464 (check-ole-header header) 465 ;;(describe header) 466 ;;(terpri) 467 (lambda (msg &rest args) 468 (assert stream) 469 (ecase msg 470 (close 471 (close stream) 472 (setq stream nil)) 473 (ole-entry-stream 474 (destructuring-bind (entry) args 475 (%ole-entry-stream header fat directories mfat stream entry))) 476 (ole-directory-stream (%ole-directory-stream directories)))))) 477 478 (defun extract-pictures (ole-file dir html) 479 (walk-RecordHeader-tree 480 ole-file 481 (find-ole-entry ole-file :name "Pictures" :type 2) 482 (lambda (in level i h start end parents) 483 (declare (ignore level start end parents)) 484 (multiple-value-bind (blip kind) 485 (read-record-body 486 in 487 h 488 (lambda (blip in) 489 (with-open-file (out (format nil "~a/~d.~a" 490 dir 491 i 492 (blip-ext blip)) 493 :direction :output 494 :if-does-not-exist :create 495 :if-exists :supersede 496 :element-type '(unsigned-byte 8)) 497 (copy-stream in out)))) 498 (declare (ignore blip)) 499 (when html 500 (format html "<p><img src=\"~d.~(~a~)\">~%" i kind)))))) 501 502 (defun extract-ole-file (filename &optional (dir "/tmp")) ;; TODO extract audio files 503 (with-stream (ole-file (ole-file-stream filename)) 504 (with-open-file (html (format nil "~a/index.html" dir) 505 :direction :output 506 :if-does-not-exist :create 507 :if-exists :supersede 508 :element-type 'character) 509 (extract-pictures ole-file dir html)))) 510 511 ;;; MS-PPT PowerPoint (.ppt) Binary File Format 512 513 (define-structure RecordHeader () 514 (%dummy1 ubyte) 515 (%dummy2 ubyte) 516 (recVer t :compute (logand #x0f %dummy1)) 517 (recInstance t :compute (logior (ash %dummy2 4) (ash %dummy1 -4))) 518 (recType ushort) 519 (recLen dword)) 520 521 (define-structure CurrentUserAtom () 522 (size dword :always #x14) 523 (headerToken dword) 524 (offsetToCurrentEdit dword) 525 (lenUserName ushort) 526 (docFileVersion ushort) 527 (majorVersion ubyte) 528 (minorVersion ubyte) 529 (unused ushort) 530 (ansiUserName (achar lenUserName)) 531 (relVersion dword) 532 (unicodeUserName (wchar lenUserName))) 533 534 ;;; MS-ODRAW Office Drawing Binary File Format 535 536 (define-structure POINT () 537 (x dword) 538 (y dword)) 539 540 (define-structure RECT () 541 (left dword) 542 (top dword) 543 (right dword) 544 (bottom dword)) 545 546 (define-structure OfficeArtMetafileHeader () 547 (cbSize dword) 548 (rcBounds RECT) 549 (ptSize POINT) 550 (cbSave dword) 551 (compression ubyte :member '(#x00 #xfe)) 552 (filter ubyte :always #xfe)) 553 554 (define-structure PersistDirectoryEntry () 555 (%dummy dword) 556 (persistId t :compute (logand #xfffff %dummy)) 557 (cPersist t :compute (ash %dummy -20)) 558 (rgPersistOffset (dword cPersist))) 559 560 (defstruct blip header ext guid guid2 metafileHeader) 561 562 (defstruct PersistDirectoryAtom header entries) 563 564 (defun read-record-body (stream RecordHeader &optional fn) ;; TODO move up?! 565 (let ((x RecordHeader #+nil(read-RecordHeader stream))) 566 (with-slots (recVer recInstance recType recLen) x 567 (flet ((blip (ext guid2 &optional metafileHeader) 568 (with-stream (in (shorter-stream stream (RecordHeader.recLen x))) 569 (let* ((start (stream-position stream)) 570 (end (+ start (RecordHeader.recLen x))) 571 (y (make-blip 572 :header x 573 :ext ext 574 :guid (read-guid in) 575 :guid2 (when (member recInstance guid2) 576 (read-guid in)) 577 :metafileHeader (if metafileHeader 578 (read-OfficeArtMetafileHeader in) 579 (read-octet in))))) 580 (when fn 581 (funcall fn y in)) 582 (unless (eql end (stream-position stream)) 583 (stream-position stream end)) 584 y)))) 585 (ecase recType 586 (#.RT_CurrentUserAtom 587 (assert (zerop recVer)) 588 (assert (zerop recInstance)) 589 (list x (read-CurrentUserAtom stream)) 590 #+nil ;; why recLen too small? 591 (with-shorter-stream (in stream (RecordHeader.recLen x)) 592 (list x (read-CurrentUserAtom in)))) 593 (#.RT_UserEditAtom 594 (assert (zerop recVer)) 595 (assert (zerop recInstance)) 596 (with-stream (in (shorter-stream stream (RecordHeader.recLen x))) 597 (list x (read-UserEditAtom in)))) 598 (#.RT_PersistDirectoryAtom 599 (assert (zerop recVer)) 600 (assert (zerop recInstance)) 601 (let ((n (RecordHeader.recLen x))) 602 ;;(print n) 603 (with-stream (in (shorter-stream stream n)) 604 (make-PersistDirectoryAtom 605 :header x 606 :entries (loop 607 for fpos = 0 then (stream-position in) 608 while (< fpos n) 609 collect (progn 610 ;;(print fpos) 611 (read-PersistDirectoryEntry in))))))) 612 #+nil 613 (#.RT_Document ;; TODO 614 ) 615 (#.RT_OfficeArtBlipEMF 616 (assert (zerop recVer)) 617 (assert (member recInstance '(#x3d4 #x3d5))) 618 (values (blip "emf" '(#x3d5) t) :emf)) 619 (#.RT_OfficeArtBlipWMF 620 (assert (zerop recVer)) 621 (assert (member recInstance '(#x216 #x217))) 622 (values (blip "wmf" '(#x217) t) :wmf)) 623 (#.RT_OfficeArtBlipPICT 624 (assert (zerop recVer)) 625 (assert (member recInstance '(#x542 #x543))) 626 (values (blip "pict" '(#x543) t) :pict)) 627 (#.RT_OfficeArtBlipJPEG1 628 (assert (zerop recVer)) 629 (assert (member recInstance '(#x46A #x46B #x6E2 #x6E3))) 630 (values (blip "jpeg" '(#x46B #x6E3)) :jpeg)) 631 (#.RT_OfficeArtBlipPNG 632 (assert (zerop recVer)) 633 (assert (member recInstance '(#x6e0 #x6e1))) 634 (values (blip "png"'(#x6e1)) :png)) 635 (#.RT_OfficeArtBlipDIB 636 (assert (zerop recVer)) 637 (assert (member recInstance '(#x7a8 #x7a9))) 638 (values (blip "dib" '(#x7a9)) :dib)) 639 (#.RT_OfficeArtBlipTIFF 640 (assert (zerop recVer)) 641 (assert (member recInstance '(#x6e4 #x6e5))) 642 (values (blip "tiff" '(#x6e5)) :tiff)) 643 (#.RT_OfficeArtBlipJPEG2 644 (assert (zerop recVer)) 645 (assert (member recInstance '(#x46A #x46B #x6E2 #x6E3))) 646 (values (blip "jpeg" '(#x46B #x6E3)) :jpeg))))))) 647 648 (defun read-record (stream &optional fn) 649 (read-record-body stream (read-RecordHeader stream) fn)) 650 651 (defun walk-RecordHeader-tree (ole-file entry fn &optional post-fn) 652 (when entry 653 (with-stream (in (ole-entry-stream ole-file entry)) 654 (labels ((rec (level pos parents) 655 (handler-case 656 (loop 657 for i from 0 658 until (<= 1 pos (stream-position in)) 659 do (let* ((h (read-RecordHeader in)) 660 (start (stream-position in)) 661 (end (+ start (RecordHeader.recLen h)))) 662 (funcall fn in level i h start end parents) 663 (if (= #xf (RecordHeader.recVer h)) 664 (rec (1+ level) 665 (if (plusp pos) 666 (min pos end) 667 end) 668 (cons h parents)) 669 (stream-position in end)) 670 (when post-fn 671 (funcall post-fn in level i h start end parents)))) 672 (end-of-file () 673 (assert (zerop level)))))) 674 (rec 0 0 nil))))) 675 676 (defun print-RecordHeader-tree (ole-file entry) 677 (walk-RecordHeader-tree 678 ole-file 679 entry 680 (lambda (in level i h start end parents) 681 (declare (ignore in parents)) 682 (dotimes (j (* 2 level)) 683 (write-char #\space)) 684 (format t "~d #x~x #x~x #x~x ~d :: ~d ~d :: ~a~%" 685 i 686 (RecordHeader.recVer h) 687 (RecordHeader.recInstance h) 688 (RecordHeader.recType h) 689 (RecordHeader.recLen h) 690 start 691 end 692 (enum-by-value 'RecordType (RecordHeader.recType h)))))) 693 694 (defun print-RecordHeader-tree-from-ppt-file (filename) 695 (with-stream (ole-file (ole-file-stream filename)) 696 (print-RecordHeader-tree 697 ole-file 698 (find-ole-entry ole-file :name "PowerPoint Document" :type 2)))) 699 700 (defun utf-char (n) ;; TODO utf properly 701 (assert (plusp n)) 702 (if (member n '(#x0a #x0b #x0d)) ;; #x0b = vertical tab 703 "<br/>" 704 (code-char n))) 705 706 (defun ascii-char (n) 707 (assert (plusp n)) 708 (if (member n '(#x0a #x0b #x0d)) ;; #x0b = vertical tab 709 "<br/>" 710 (code-char n))) 711 712 (define-structure OfficeArtFOPTEOPID () 713 (%dummy ushort) 714 (opid t :compute (logand #x3fff %dummy)) 715 (fBid t :compute (not (zerop (logand #x4000 %dummy)))) 716 (fComplex t :compute (not (zerop (logand #x8000 %dummy))))) 717 718 (define-structure OfficeArtFBSE () 719 (btWin32 ubyte) 720 (btMacOS ubyte) 721 (rgbUid GUID) 722 (tag ushort) 723 (size dword) 724 (cRef dword) 725 (foDelay dword) 726 (unused1 ubyte) 727 (cbName ubyte) 728 (unused2 ubyte) 729 (unused3 ubyte) 730 #+nil(nameData (ubyte cbName)) 731 #+nil(embeddedBlip (ubyte size))) 732 733 (defun ppt-entry-to-html-naive (ole-file entry stream title pictures debug) 734 (macrolet ((out (&rest args) 735 `(format stream ,@args))) 736 (let ((slide-no 0) 737 (blip-no 0) 738 (blips nil) 739 ;; texts 740 (text-slide-no nil) 741 (text-no nil) 742 (texts nil)) 743 (out "<html>~%<head>~%") 744 (when title 745 (out "<title>~a</title>~%" title)) 746 (out "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>~%") 747 (out "<style>~%") 748 (out ".h {border-left:1px solid gray;padding-left:0.5em}~%") 749 (out ".m {color:gray}") 750 (out "</style>~%") 751 (out "</head>~%<body>~%") 752 (when title 753 (out "<a href=\"file://~a\">~a</a>~%" title title)) 754 (walk-RecordHeader-tree 755 ole-file 756 entry 757 (lambda (in level i h start end parents) 758 (declare (ignore end parents)) 759 (when debug 760 ;; pre 761 (when (and (zerop level) (plusp i)) 762 (out "<hr/>~%")) 763 ;; msg 764 (when debug 765 (out "<div class=\"h\">~%<pre class=\"m\">~a ~a #x~x ~a</pre>~%" 766 (- start 8) ;; - record header size 767 (RecordHeader.recType h) 768 (RecordHeader.recType h) 769 (enum-by-value 'RecordType (RecordHeader.recType h))))) 770 ;; post 771 (case (RecordHeader.recType h) 772 (#.RT_Document) 773 (#.RT_SlideListWithText 774 (setq text-slide-no 0)) 775 (#.RT_SlidePersistAtom 776 (incf text-slide-no) 777 (setq text-no 0)) 778 (#.RT_OfficeArtFBSE 779 (let* ((x (read-OfficeArtFBSE in)) 780 (y (find (OfficeArtFBSE.foDelay x) pictures :key #'cadr))) 781 (assert y) 782 (push (list (incf blip-no) (car y) (caddr y)) blips) 783 #+nil 784 (out "<div><p>@@@ ~a #x~x ~a === img ~s ~s</p>~%" 785 (RecordHeader.recType h) 786 (RecordHeader.recType h) 787 (enum-by-value 'RecordType (RecordHeader.recType h)) 788 blip-no 789 (OfficeArtFBSE.foDelay x)))) 790 (#.RT_Slide 791 (incf slide-no) 792 (unless debug 793 (when (< 1 slide-no) 794 (out "<hr/>~%"))) 795 (out "<div class=\"slide\">~%") 796 (out "<h1><a name=\"slide~d\">Slide ~d</a></h1>~%" slide-no slide-no) 797 (out "<pre><a href=\"#slide~d\"><</a> <a href=\"#slide~d\">></a></pre>~%" (1- slide-no) (1+ slide-no))) 798 ((#.RT_TextCharsAtom #.RT_CString) ;; utf16le 799 (unless nil #+nil(or (member #.RT_PROGTAGS parents :key 'RecordHeader.recType) 800 (member #.RT_NOTES parents :key 'RecordHeader.recType) 801 (member #.RT_MAINMASTER parents :key 'RecordHeader.recType)) 802 (cond 803 #+nil 804 ((member #.RT_SlideListWithText parents :key 'RecordHeader.recType) 805 (push ;; TODO also slide-no + text-no inside slide 806 (list 807 text-slide-no 808 (incf text-no) 809 (with-output-to-string (s) 810 (loop 811 for j from 0 below (RecordHeader.recLen h) by 2 812 do (format s "~a" (utf-char (read-ushort in)))))) 813 texts)) 814 (t 815 (out "<p>") 816 (loop 817 for j from 0 below (RecordHeader.recLen h) by 2 818 do (out "~a" (utf-char (read-ushort in)))) 819 (out "</p>~%"))))) 820 (#.RT_TextBytesAtom ;; ascii 821 (unless nil #+nil(or (member #.RT_PROGTAGS parents :key 'RecordHeader.recType) 822 (member #.RT_NOTES parents :key 'RecordHeader.recType) 823 (member #.RT_MAINMASTER parents :key 'RecordHeader.recType)) 824 (cond 825 #+nil 826 ((member #.RT_SlideListWithText parents :key 'RecordHeader.recType) 827 (push ;; TODO also slide-no + text-no inside slide 828 (list 829 text-slide-no 830 (incf text-no) 831 (with-output-to-string (s) 832 (loop 833 for j from 0 below (RecordHeader.recLen h) 834 do (format s "~a" (ascii-char (read-octet in)))))) 835 texts)) 836 (t 837 (out "<p>") 838 (loop 839 for j from 0 below (RecordHeader.recLen h) 840 do (out "~a" (ascii-char (read-octet in)))) 841 (out "</p>~%"))))) 842 (#.RT_OUTLINETEXTREFATOM 843 (let* ((index (1+ (read-dword in))) 844 (text (caddr 845 (find-if (lambda (x) 846 (and (= slide-no (car x)) 847 (= index (cadr x)))) 848 texts)))) 849 (if text 850 (out "<p>~a</p>~%" text) 851 (out "<p>!!!</p>~%")))) 852 ;; TODO RT_DOCUMENT / RT_SLIDELISTWITHTEXT / RT_TEXTBYTESATOM 853 (#.RT_OfficeArtFOPT 854 (with-stream (s (shorter-stream in (RecordHeader.recLen h))) 855 (let ((len (RecordHeader.recLen h))) 856 (loop 857 while (< (stream-position s) len) 858 do (let ((opid (read-OfficeArtFOPTEOPID s)) 859 (value (read-dword s))) 860 ;;(out "<p>...... ~s ~s</p>~%" opid value) 861 (when (OfficeArtFOPTEOPID.fComplex opid) 862 (decf len value)) 863 (case (OfficeArtFOPTEOPID.opid opid) 864 (#.pib 865 (assert (OfficeArtFOPTEOPID.fBid opid)) 866 (destructuring-bind (j n ext) (assoc value blips) 867 (assert (and j n ext)) 868 (out "<img src=\"~a.~(~a~)\"/>~%" n ext))))))))))) 869 (lambda (in level i h start end parents) 870 (declare (ignore in level i start end parents)) 871 (case (RecordHeader.recType h) 872 (#.RT_Slide 873 (out "</div>~%"))) 874 (when debug 875 (format stream "</div>~%")))) 876 ;;(out "~s~%" texts) 877 (out "</body>~%</html>~%")))) 878 879 (defun ppt-file-to-html-naive (filename &optional (stream *standard-output*)) 880 (with-stream (ole-file (ole-file-stream filename)) 881 (let ((pictures nil)) 882 ;;(extract-pictures ole-file dir html) ;; TODO mount olefs and traverse Pictures only once 883 (walk-RecordHeader-tree ole-file 884 (find-ole-entry ole-file :name "Pictures" :type 2) 885 (lambda (in level i h start end parents) 886 (declare (ignore level end parents)) 887 (multiple-value-bind (blip kind) 888 (read-record-body in h) 889 (declare (ignore blip)) 890 (push (list i (- start 8) kind) pictures)))) 891 (ppt-entry-to-html-naive ole-file 892 (find-ole-entry ole-file 893 :name "PowerPoint Document" 894 :type 2) 895 stream 896 filename 897 pictures 898 t)))) 899 900 (define-structure UserEditAtom () 901 (lastSlideIdRef dword) 902 (version ushort) 903 (minorVersion ubyte :always 0) 904 (majorVersion ubyte :always 3) 905 (offsetLastEdit dword) 906 (offsetPersistDirectory dword) 907 (docPersistIdRef dword :always 1) 908 (persistIdSeed dword) 909 (lastView ushort) 910 (unused ushort) 911 #+nil(encryptSessionPersistIdRef dword)) ;; TODO optional 912 913 (defun ppt-entry-to-html (ole-file entry stream title) 914 (macrolet ((out (&rest args) 915 `(format stream ,@args))) 916 (let ((slide-no 0)) 917 (out "<html>~%<head>~%") 918 (when title 919 (out "<title>~a</title>~%" title)) 920 (out "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\"/>~%") 921 (out "</head>~%<body>~%") 922 (walk-RecordHeader-tree 923 ole-file 924 entry 925 (lambda (in level i h start end parents) 926 (declare (ignore i level start end parents)) 927 (case (RecordHeader.recType h) 928 (#.RT_Document 929 (out "<div>~%")) 930 (#.RT_Slide 931 (out "<hr/>~%</div>~%<div class=\"slide\">~%<h1>Slide ~d</h1>~%" (incf slide-no))) 932 ((#.RT_TextCharsAtom #.RT_CString) ;; utf16le 933 (out "<p>") 934 (loop 935 for j from 0 below (RecordHeader.recLen h) by 2 936 do (out "~a" (utf-char (read-ushort in)))) 937 (out "</p>~%")) 938 (#.RT_TextBytesAtom ;; ascii 939 (out "<p>") 940 (loop 941 for j from 0 below (RecordHeader.recLen h) 942 do (out "~a" (ascii-char (read-octet in)))) 943 (out "</p>~%"))))) 944 (out "</div>~%</body>~%</html>~%")))) 945 946 (defun process-PersistDirectoryAtom (htab in) 947 (dolist (entry (PersistDirectoryAtom-entries (read-record in))) 948 (with-slots (persistId cPersist rgPersistOffset) entry 949 (loop 950 for n from 0 951 for o across rgPersistOffset 952 do (let ((k (+ persistId n))) 953 ;;(print (list :??? persistId :+ n := k :-> o)) 954 (unless (gethash k htab) 955 ;;(print (list persistId :+ n := k :-> o)) 956 (setf (gethash k htab) o))))))) 957 958 (defun ppt-file-to-html (filename &optional (stream *standard-output*)) 959 (with-stream (ole-file (ole-file-stream filename)) 960 (let ((u (block CurrentUser 961 (walk-RecordHeader-tree 962 ole-file 963 (find-ole-entry ole-file :name "Current User" :type 2) 964 (lambda (in level i h start end parents) 965 (declare (ignore level i start end parents)) 966 (return-from CurrentUser 967 (cadr (read-record-body in h)))))))) 968 ;;(describe u) 969 (let ((pictures nil)) 970 ;;(extract-pictures ole-file dir html) ;; TODO mount olefs and traverse Pictures only once 971 (walk-RecordHeader-tree ole-file 972 (find-ole-entry ole-file 973 :name "Pictures" 974 :type 2) 975 (lambda (in level i h start end parents) 976 (declare (ignore level end parents)) 977 (multiple-value-bind (blip kind) 978 (read-record-body in h) 979 (declare (ignore blip)) 980 (push (list i (- start 8) kind) pictures)))) 981 (print (list :pictures pictures)) 982 (with-stream (in (ole-entry-stream 983 ole-file 984 (find-ole-entry ole-file 985 :name "PowerPoint Document" 986 :type 2))) 987 (let ((htab (make-hash-table)) ;; persist oid -> fpos 988 (first-UserEditAtom nil)) 989 (stream-position in (CurrentUserAtom.offsetToCurrentEdit u)) 990 (loop 991 for e = (cadr (read-record in)) then (cadr (read-record in)) 992 do (progn 993 ;;(describe e) 994 (unless first-UserEditAtom 995 (setq first-UserEditAtom e)) 996 (stream-position in (UserEditAtom.offsetPersistDirectory e)) 997 (process-PersistDirectoryAtom htab in)) 998 until (zerop (UserEditAtom.offsetLastEdit e)) 999 do (stream-position in (UserEditAtom.offsetLastEdit e))) 1000 ;; live PersistDirectory 1001 (let ((persist-directory nil)) 1002 (maphash (lambda (k v) (push (cons k v) persist-directory)) htab) 1003 (setq persist-directory (sort persist-directory #'< :key #'car)) 1004 (print persist-directory)) 1005 ;; live DocumentContainer 1006 (print (gethash (UserEditAtom.docPersistIdRef first-UserEditAtom) htab))) 1007 #+nil(stream-position in 0) 1008 #+nil(print (read-record in))))))) 1009 1010 ;;; MS-DOC Word (.doc) Binary File Format 1011 1012 (define-structure FibBase () 1013 (wIdent ushort) 1014 (nFib ushort) 1015 (unused ushort) 1016 (lid ushort) 1017 (pnNext ushort) 1018 (flags1 ushort) ;; TODO 1019 (nFibBack ushort :member '(#xbf #xc1)) 1020 (lKey dword) 1021 (envr ubyte) ;; TODO :always 0? 1022 (flags2 ubyte) ;; TODO 1023 (reserved3 ushort) ;; TODO :always 0? 1024 (reserved4 ushort) ;; TODO :always 0? 1025 (reserved5 dword) 1026 (reserved6 dword)) 1027 1028 (define-structure FibRgFcLcb97 () 1029 (fcStshfOrig dword) 1030 (lcbStshfOrig dword) 1031 (fcStshf dword) 1032 (lcbStshf dword) 1033 (fcPlcffndRef dword) 1034 (lcbPlcffndRef dword) 1035 (fcPlcffndTxt dword) 1036 (lcbPlcffndTxt dword) 1037 (fcPlcfandRef dword) 1038 (lcbPlcfandRef dword) 1039 (fcPlcfandTxt dword) 1040 (lcbPlcfandTxt dword) 1041 (fcPlcfSed dword) 1042 (lcbPlcfSed dword) 1043 (fcPlcPad dword) 1044 (lcbPlcPad dword) 1045 (fcPlcfPhe dword) 1046 (lcbPlcfPhe dword) 1047 (fcSttbfGlsy dword) 1048 (lcbSttbfGlsy dword) 1049 (fcPlcfGlsy dword) 1050 (lcbPlcfGlsy dword) 1051 (fcPlcfHdd dword) 1052 (lcbPlcfHdd dword) 1053 (fcPlcfBteChpx dword) 1054 (lcbPlcfBteChpx dword) 1055 (fcPlcfBtePapx dword) 1056 (lcbPlcfBtePapx dword) 1057 (fcPlcfSea dword) 1058 (lcbPlcfSea dword) 1059 (fcSttbfFfn dword) 1060 (lcbSttbfFfn dword) 1061 (fcPlcfFldMom dword) 1062 (lcbPlcfFldMom dword) 1063 (fcPlcfFldHdr dword) 1064 (lcbPlcfFldHdr dword) 1065 (fcPlcfFldFtn dword) 1066 (lcbPlcfFldFtn dword) 1067 (fcPlcfFldAtn dword) 1068 (lcbPlcfFldAtn dword) 1069 (fcPlcfFldMcr dword) 1070 (lcbPlcfFldMcr dword) 1071 (fcSttbfBkmk dword) 1072 (lcbSttbfBkmk dword) 1073 (fcPlcfBkf dword) 1074 (lcbPlcfBkf dword) 1075 (fcPlcfBkl dword) 1076 (lcbPlcfBkl dword) 1077 (fcCmds dword) 1078 (lcbCmds dword) 1079 (fcUnused1 dword) 1080 (lcbUnused1 dword) 1081 (fcSttbfMcr dword) 1082 (lcbSttbfMcr dword) 1083 (fcPrDrvr dword) 1084 (lcbPrDrvr dword) 1085 (fcPrEnvPort dword) 1086 (lcbPrEnvPort dword) 1087 (fcPrEnvLand dword) 1088 (lcbPrEnvLand dword) 1089 (fcWss dword) 1090 (lcbWss dword) 1091 (fcDop dword) 1092 (lcbDop dword) 1093 (fcSttbfAssoc dword) 1094 (lcbSttbfAssoc dword) 1095 (fcClx dword) 1096 (lcbClx dword) 1097 (fcPlcfPgdFtn dword) 1098 (lcbPlcfPgdFtn dword) 1099 (fcAutosaveSource dword) 1100 (lcbAutosaveSource dword) 1101 (fcGrpXstAtnOwners dword) 1102 (lcbGrpXstAtnOwners dword) 1103 (fcSttbfAtnBkmk dword) 1104 (lcbSttbfAtnBkmk dword) 1105 (fcUnused2 dword) 1106 (lcbUnused2 dword) 1107 (fcUnused3 dword) 1108 (lcbUnused3 dword) 1109 (fcPlcSpaMom dword) 1110 (lcbPlcSpaMom dword) 1111 (fcPlcSpaHdr dword) 1112 (lcbPlcSpaHdr dword) 1113 (fcPlcfAtnBkf dword) 1114 (lcbPlcfAtnBkf dword) 1115 (fcPlcfAtnBkl dword) 1116 (lcbPlcfAtnBkl dword) 1117 (fcPms dword) 1118 (lcbPms dword) 1119 (fcFormFldSttbs dword) 1120 (lcbFormFldSttbs dword) 1121 (fcPlcfendRef dword) 1122 (lcbPlcfendRef dword) 1123 (fcPlcfendTxt dword) 1124 (lcbPlcfendTxt dword) 1125 (fcPlcfFldEdn dword) 1126 (lcbPlcfFldEdn dword) 1127 (fcUnused4 dword) 1128 (lcbUnused4 dword) 1129 (fcDggInfo dword) 1130 (lcbDggInfo dword) 1131 (fcSttbfRMark dword) 1132 (lcbSttbfRMark dword) 1133 (fcSttbfCaption dword) 1134 (lcbSttbfCaption dword) 1135 (fcSttbfAutoCaption dword) 1136 (lcbSttbfAutoCaption dword) 1137 (fcPlcfWkb dword) 1138 (lcbPlcfWkb dword) 1139 (fcPlcfSpl dword) 1140 (lcbPlcfSpl dword) 1141 (fcPlcftxbxTxt dword) 1142 (lcbPlcftxbxTxt dword) 1143 (fcPlcfFldTxbx dword) 1144 (lcbPlcfFldTxbx dword) 1145 (fcPlcfHdrtxbxTxt dword) 1146 (lcbPlcfHdrtxbxTxt dword) 1147 (fcPlcffldHdrTxbx dword) 1148 (lcbPlcffldHdrTxbx dword) 1149 (fcStwUser dword) 1150 (lcbStwUser dword) 1151 (fcSttbTtmbd dword) 1152 (lcbSttbTtmbd dword) 1153 (fcCookieData dword) 1154 (lcbCookieData dword) 1155 (fcPgdMotherOldOld dword) 1156 (lcbPgdMotherOldOld dword) 1157 (fcBkdMotherOldOld dword) 1158 (lcbBkdMotherOldOld dword) 1159 (fcPgdFtnOldOld dword) 1160 (lcbPgdFtnOldOld dword) 1161 (fcBkdFtnOldOld dword) 1162 (lcbBkdFtnOldOld dword) 1163 (fcPgdEdnOldOld dword) 1164 (lcbPgdEdnOldOld dword) 1165 (fcBkdEdnOldOld dword) 1166 (lcbBkdEdnOldOld dword) 1167 (fcSttbfIntlFld dword) 1168 (lcbSttbfIntlFld dword) 1169 (fcRouteSlip dword) 1170 (lcbRouteSlip dword) 1171 (fcSttbSavedBy dword) 1172 (lcbSttbSavedBy dword) 1173 (fcSttbFnm dword) 1174 (lcbSttbFnm dword) 1175 (fcPlfLst dword) 1176 (lcbPlfLst dword) 1177 (fcPlfLfo dword) 1178 (lcbPlfLfo dword) 1179 (fcPlcfTxbxBkd dword) 1180 (lcbPlcfTxbxBkd dword) 1181 (fcPlcfTxbxHdrBkd dword) 1182 (lcbPlcfTxbxHdrBkd dword) 1183 (fcDocUndoWord9 dword) 1184 (lcbDocUndoWord9 dword) 1185 (fcRgbUse dword) 1186 (lcbRgbUse dword) 1187 (fcUsp dword) 1188 (lcbUsp dword) 1189 (fcUskf dword) 1190 (lcbUskf dword) 1191 (fcPlcupcRgbUse dword) 1192 (lcbPlcupcRgbUse dword) 1193 (fcPlcupcUsp dword) 1194 (lcbPlcupcUsp dword) 1195 (fcSttbGlsyStyle dword) 1196 (lcbSttbGlsyStyle dword) 1197 (fcPlgosl dword) 1198 (lcbPlgosl dword) 1199 (fcPlcocx dword) 1200 (lcbPlcocx dword) 1201 (fcPlcfBteLvc dword) 1202 (lcbPlcfBteLvc dword) 1203 (dwLowDateTime dword) 1204 (dwHighDateTime dword) 1205 (fcPlcfLvcPre10 dword) 1206 (lcbPlcfLvcPre10 dword) 1207 (fcPlcfAsumy dword) 1208 (lcbPlcfAsumy dword) 1209 (fcPlcfGram dword) 1210 (lcbPlcfGram dword) 1211 (fcSttbListNames dword) 1212 (lcbSttbListNames dword) 1213 (fcSttbfUssr dword) 1214 (lcbSttbfUssr dword)) 1215 1216 #+nil 1217 (define-structure FibRgCswNew () 1218 (nFibNew ushort :member '(#x00D9 #x0101 #x010C #x0112)) 1219 rgCswNewData (variable): Depending on the value of nFibNew this is one of the following. 1220 Value of nFibNew 1221 Meaning 1222 0x00D9 1223 fibRgCswNewData2000 (2 bytes) 1224 0x0101 1225 fibRgCswNewData2000 (2 bytes) 1226 0x010C 1227 fibRgCswNewData2000 (2 bytes) 1228 0x0112 1229 fibRgCswNewData2007 (8 bytes) ) 1230 1231 (defstruct fib base csw fibRgW cslw fibRgLw cbRgFcLcb fibRgFcLcbBlob fibRgFcLcb 1232 cswNew fibRgCswNew) 1233 1234 (defun read-fib (stream) 1235 (let* ((base (read-fibbase stream)) 1236 (csw (let ((x (read-ushort stream))) 1237 (assert (= x #x0e)) 1238 x)) 1239 (fibRgW (read-vector stream 28 '(unsigned-byte 8) 'read-octet)) 1240 (cslw (let ((x (read-ushort stream))) 1241 (assert (= x #x16)) 1242 x)) 1243 (fibRgLw (read-vector stream 88 '(unsigned-byte 8) 'read-octet)) 1244 (cbRgFcLcb (read-ushort stream)) 1245 (fibRgFcLcbBlob-position (stream-position stream)) 1246 (fibRgFcLcbBlob (read-vector stream (* 8 cbRgFcLcb) '(unsigned-byte 8) 'read-octet)) 1247 (cswNew (read-ushort stream)) 1248 (fibRgCswNew (read-vector stream cswNew '(unsigned-byte 8) 'read-octet)) 1249 #+nil 1250 (nFib (if (zerop cswNew) 1251 (FibBase.nFib base) 1252 -1 #+nil(assert (zerop cswNew))))) ;; TODO implement this case 1253 (assert 1254 (member cbRgFcLcb '(#x5d #x6c #x88 #xa4 #xb7)) 1255 #+nil ;; spec says as bellow:-{ 1256 (= cbRgFcLcb (ecase nFib 1257 (#x0c1 #x5d) ;;;; < should be 1258 (#x0d9 #x6c) 1259 (#x101 #x88) 1260 (#x10c #xa4) ;;;; < actually is 1261 (#x112 #xb7)))) 1262 #+nil 1263 (assert (= cswNew (ecase nFib 1264 (#x0c1 0) 1265 (#x0d9 2) 1266 (#x101 2) 1267 (#x10c 2) 1268 (#x112 5)))) 1269 ;;(print (list :@@@-nfib nFib)) 1270 (make-fib :base base 1271 :csw csw 1272 :fibRgW fibRgW 1273 :cslw cslw 1274 :fibRgLw fibRgLw 1275 :cbRgFcLcb cbRgFcLcb 1276 :fibRgFcLcbBlob fibRgFcLcbBlob 1277 :fibRgFcLcb (with-stream (s (vector-stream fibRgFcLcbBlob 1278 fibRgFcLcbBlob-position)) 1279 (read-FibRgFcLcb97 s)) 1280 :cswNew cswNew 1281 :fibRgCswNew fibRgCswNew))) 1282 1283 (define-structure LSTF () 1284 (lsid dword) ;; TODO signed, not -1 (or #xffffffff) 1285 (tplc dword) 1286 (rgistdPara (ushort 9)) 1287 (flags ubyte) 1288 (fSimpleList t :compute (not (zerop (logand #x01 flags)))) 1289 (unused1 t :compute (not (zerop (logand #x02 flags)))) 1290 (fAutoNum t :compute (not (zerop (logand #x04 flags)))) 1291 (unused2 t :compute (not (zerop (logand #x08 flags)))) 1292 (fHybrid t :compute (not (zerop (logand #x10 flags)))) 1293 (reserved1 t :compute (logand #xe0 flags)) ;; TODO :always 0 1294 (grfhic ubyte)) 1295 1296 (defun read-PlfLst (stream) 1297 (let* ((cLst (read-ushort stream)) 1298 (z (make-array cLst))) 1299 (dotimes (i cLst z) 1300 (setf (aref z i) (read-lstf stream))))) 1301 1302 (define-structure LVLF () 1303 (iStartAt dword) ;; TODO signed 1304 (nfc ubyte) ;; TODO MUST not be equal to 0x08, 0x09, 0x0F, or 0x13 1305 (flags ubyte) 1306 (jc t :compute (logand #x03 flags)) 1307 (fLegal t :compute (not (zerop (logand #x04 flags)))) 1308 (fNoRestart t :compute (not (zerop (logand #x08 flags)))) 1309 (fIndentSav t :compute (not (zerop (logand #x10 flags)))) 1310 (fConverted t :compute (not (zerop (logand #x20 flags)))) 1311 (unused1 t :compute (not (zerop (logand #x40 flags)))) 1312 (fTentative t :compute (not (zerop (logand #x80 flags)))) 1313 (rgbxchNums (ubyte 9)) 1314 (ixchFollow ubyte) 1315 (dxaIndentSav dword) ;; TODO signed 1316 (unused2 dword) 1317 (cbGrpprlChpx ubyte) 1318 (cbGrpprlPapx ubyte) 1319 (ilvlRestartLim ubyte) 1320 (grfhic ubyte)) 1321 1322 (defstruct LVL lvlf grpprlPapx grpprlChpx xst) 1323 1324 (define-structure Sprm () 1325 (flags ushort) 1326 (ispmd t :compute (logand #x01ff flags)) 1327 (fSpec t :compute (not (zerop (logand #x0200 flags)))) 1328 (sgc t :compute (logand #x07 (ash flags -10))) 1329 (spra t :compute (logand #x07 (ash flags -13)))) 1330 1331 (defstruct PChgTabsDelClose cTabs rgdxaDel rgdxaClose) 1332 1333 (defun read-PChgTabsDelClose (stream) 1334 (let ((cTabs (read-octet stream))) 1335 (assert (<= 0 cTabs 64)) 1336 (let ((rgdxaDel (read-vector stream cTabs t 'read-ushort)) 1337 (rgdxaClose (read-vector stream cTabs t 'read-ushort))) 1338 (assert (equalp rgdxaDel (sort (copy-seq rgdxaDel) #'<=))) 1339 (make-PChgTabsDelClose :cTabs cTabs 1340 :rgdxaDel rgdxaDel 1341 :rgdxaClose rgdxaClose)))) 1342 1343 (defstruct PChgTabsAdd cTabs rgdxaAdd rgtbdAdd) 1344 1345 (defun read-PChgTabsAdd (stream) 1346 (let ((cTabs (read-octet stream))) 1347 (assert (<= 0 cTabs 64)) 1348 (let ((rgdxaAdd (read-vector stream cTabs t 'read-ushort)) 1349 (rgtbdAdd (read-vector stream cTabs t 'read-octet))) ;; TODO decode TBD struct 1350 (assert (equalp rgdxaAdd (sort (copy-seq rgdxaAdd) #'<=))) 1351 (make-PChgTabsAdd :cTabs cTabs 1352 :rgdxaAdd rgdxaAdd 1353 :rgtbdAdd rgtbdAdd)))) 1354 1355 (defstruct PChgTabsOperand cb DelClose Add) 1356 1357 (defun read-PChgTabsOperand (stream) 1358 (let ((cb (read-octet stream))) 1359 (assert (< 1 cb 255)) ;; TODO 255 1360 ;;(read-vector stream cb t 'read-octet) 1361 (make-PChgTabsOperand :cb cb 1362 :DelClose (read-PChgTabsDelClose stream) 1363 :Add (read-PChgTabsAdd stream)))) 1364 1365 (defstruct Prl sprm operand) 1366 1367 (defun read-Prl (stream) 1368 (let ((sprm (read-Sprm stream))) 1369 ;; (when (zerop (Sprm.sgc sprm)) 1370 ;; (print (list :@@@-!!! (read-vector stream 10 t 'read-octet)))) 1371 (assert (member (Sprm.sgc sprm) '(1 2 3 4 5))) 1372 (make-Prl 1373 :sprm sprm 1374 :operand (ecase (Sprm.spra sprm) 1375 (0 (read-octet stream)) 1376 (1 (read-octet stream)) 1377 (2 (read-ushort stream)) 1378 (3 (read-dword stream)) 1379 (4 (read-ushort stream)) 1380 (5 (read-ushort stream)) 1381 (6 (flet ((rd () 1382 (read-vector stream (read-octet stream) t 'read-octet))) 1383 (ecase (Sprm.sgc sprm) 1384 (1 (ecase (Sprm.flags sprm) ;; par 1385 (#xc615 (read-PChgTabsOperand stream)))) 1386 (2 (rd)) ;; char 1387 (3 (rd)) ;; pic 1388 (4 (rd)) ;; sec 1389 #+nil(5 )))) ;; tab 1390 (7 (read-vector stream 3 t 'read-octet)))))) 1391 1392 (defstruct Xst blob parsed) 1393 1394 (defun read-Xst (stream) 1395 ;;(read-vector stream (read-ushort stream) t 'read-ushort) 1396 (let* ((cch (read-ushort stream)) 1397 (blob (read-vector stream cch t 'read-ushort))) 1398 (make-Xst :blob blob 1399 :parsed nil 1400 #+nil(with-output-to-string (out) 1401 (dotimes (i cch) 1402 (format out "~a" (utf-char (aref blob i)))))))) 1403 1404 (defun read-LVL (stream) 1405 (let ((lvlf (read-lvlf stream))) 1406 ;;(describe lvlf) 1407 (make-LVL 1408 :lvlf lvlf 1409 :grpprlPapx (read-vector stream (LVLF.cbGrpprlPapx lvlf) t 'read-octet) 1410 :grpprlChpx (read-vector stream (LVLF.cbGrpprlChpx lvlf) t 'read-octet) 1411 ;; :grpprlPapx (read-vector stream (LVLF.cbGrpprlPapx lvlf) t 'read-prl) 1412 ;; :grpprlChpx (read-vector stream (LVLF.cbGrpprlChpx lvlf) t 'read-prl) 1413 :xst (read-Xst stream)))) 1414 1415 (defun fix-numbering (filename) 1416 (let (offsets) 1417 (with-stream (ole-file (ole-file-stream filename)) 1418 #+nil(break "~s" ole-file) 1419 (let (fcPlfLst lcbPlfLst) 1420 (with-stream (in (ole-entry-stream 1421 ole-file 1422 (find-ole-entry ole-file 1423 :name "WordDocument" 1424 :type 2))) 1425 (let ((fib (read-fib in))) 1426 ;;(describe fib) 1427 (let ((x (fib-fibRgFcLcb fib))) 1428 (setq fcPlfLst (FibRgFcLcb97.fcPlfLst x) 1429 lcbPlfLst (FibRgFcLcb97.lcbPlfLst x))) 1430 #+nil 1431 (multiple-value-bind (fcPlfLst lcbPlfLst) 1432 (with-stream (s (vector-stream (subseq (fib-fibRgFcLcbBlob fib) #.(* 4 146)))) 1433 (values (read-dword s) (read-dword s))) 1434 (print (list :@@@ fcPlfLst lcbPlfLst)) 1435 ))) 1436 (with-stream (in (ole-entry-stream 1437 ole-file 1438 (find-ole-entry ole-file 1439 :name '("0Table" "1Table") ;; TODO be sure which one? 1440 :type 2))) 1441 (stream-position in fcPlfLst) 1442 (let ((PlfLst (read-PlfLst in))) 1443 (let ((n 0)) 1444 (dotimes (i (length PlfLst)) 1445 (incf n (if (LSTF.fSimpleList (aref PlfLst i)) 1 9))) 1446 (let ((lvls (make-array n))) 1447 (dotimes (i n) 1448 (setf (aref lvls i) (read-lvl in))) 1449 ;; now I have lstf[] and lvl[] 1450 (let (anums ;; roughly like w:abstractNum 1451 (j 0)) 1452 (dotimes (i (length PlfLst)) 1453 (let ((lstf (aref PlfLst i))) 1454 (unless (LSTF.fSimpleList lstf) 1455 (push (list i #+nil lstf j) anums)) 1456 (incf j (if (LSTF.fSimpleList lstf) 1 9)))) 1457 (setq anums (nreverse anums)) 1458 ;;(print anums) 1459 (dolist (a anums) 1460 (destructuring-bind (i j) a ;; i_lstf j_lvl0 1461 (declare (ignore i)) 1462 (let* ((lvl (aref lvls (1+ j))) ;; hardcode second level 1463 (lvlf (LVL-lvlf lvl))) 1464 ;;(print (list :@@@ j (LVLF.fNoRestart lvlf) (LVLF.ilvlRestartLim lvlf))) 1465 (push (LVLF.%physical-stream-position lvlf) offsets))))) 1466 #+nil 1467 (dotimes (i n) 1468 (let* ((lvl (aref lvls i)) 1469 (lvlf (LVL-lvlf lvl))) 1470 (print (list :@@@ i (LVLF.fNoRestart lvlf) (LVLF.ilvlRestartLim lvlf))))))))) 1471 #+nil(values fcPlfLst lcbPlfLst))) 1472 (let ((fixed (format nil "~a.fixed.doc" filename))) 1473 (copy-file filename fixed) 1474 ;;(print (list :@@@-offsets offsets)) 1475 (with-open-file (s fixed 1476 :direction :io 1477 :if-exists :overwrite 1478 :if-does-not-exist :error 1479 :element-type '(unsigned-byte 8)) 1480 (dolist (o offsets) 1481 (stream-position s (+ 5 o)) 1482 (let ((flags (read-octet s))) 1483 (stream-position s (+ 5 o)) 1484 (write-byte (logior #x08 flags) s) 1485 #+nil(write-byte (logand #x07 flags) s)) 1486 (stream-position s (+ 26 o)) 1487 (write-byte 0 s)))))) 1488 1489 (defun extract-files (filename &optional (dir "/tmp")) 1490 (with-stream (ole-file (ole-file-stream filename)) 1491 (do ((s (ole-directory-stream ole-file)) 1492 e 1493 (i 0)) 1494 ((not (setq e (funcall s)))) 1495 (print-ole-entry e *standard-output*) 1496 (terpri) 1497 (ecase (ole-entry.object-type e) 1498 ((0 1 5)) 1499 (2 (with-stream (in (ole-entry-stream ole-file e)) 1500 (with-open-file (out (format nil "~a/XX-~d" dir (incf i)) 1501 :direction :output 1502 :if-does-not-exist :create 1503 :if-exists :supersede 1504 :element-type '(unsigned-byte 8)) 1505 (copy-stream in out)))))))) 1506 1507 ;;; MS-XLS Excel binary file 1508 1509 (define-structure BIFFRecordHeader () 1510 (tag ushort) 1511 (length ushort)) 1512 1513 (define-structure BIFF-ShortXLUnicodeString () 1514 (cch ubyte) 1515 (%dummy ubyte :member '(0 1)) 1516 (fHighByte t :compute (not (zerop (logand 1 %dummy)))) 1517 (reserved1 t :compute (assert (zerop (logand #xfe %dummy)))) 1518 (rgb (ubyte (if fHighByte (* 2 cch) cch))) 1519 (decoded t :compute (string-from-octets rgb fHighByte))) 1520 1521 (define-structure BIFF-BoundSheet8 () 1522 (lbPlyPos dword) 1523 (hsState ubyte :member '(0 1 2)) 1524 (dt ubyte :member '(0 1 2 6)) 1525 (stName BIFF-ShortXLUnicodeString)) 1526 1527 (define-structure BIFF-Cell () 1528 (rw ushort) 1529 (col ushort) 1530 (ixfe ushort)) 1531 1532 (define-structure BIFF-Blank () 1533 (cell BIFF-Cell)) 1534 1535 (define-structure BIFF-RkNumber () 1536 (%dummy dword) 1537 (percent t :compute (not (zerop (logand 1 %dummy)))) 1538 (signed t :compute (not (zerop (logand 2 %dummy)))) 1539 (value t :compute (let ((y (if signed 1540 (error "TODO") ;;(ash x -2) 1541 (double-float-from-bits 1542 (logand #xfffffffc %dummy) 0)))) 1543 (if percent (/ y 100) y)))) 1544 1545 (define-structure BIFF-RkRec () 1546 (ixfe ushort) 1547 (rk BIFF-RkNumber)) 1548 1549 (define-structure BIFF-RK () 1550 (rw ushort) 1551 (col ushort) 1552 (rkRec BIFF-RkRec)) 1553 1554 (define-structure BIFF-Bes () 1555 (bBoolErr ubyte) 1556 (fError ubyte :member '(0 1)) 1557 (decoded t :compute (if (zerop fError) 1558 (ecase bBoolErr 1559 (0 nil) 1560 (1 t)) 1561 (ecase fError 1562 (0 :#NULL!) 1563 (7 :#DIV/0!) 1564 (#xf :#VALUE!) 1565 (#x17 :#REF!) 1566 (#x1d :#NAME!) 1567 (#x24 :#NUM!) 1568 (#x2a :#N/A) 1569 (#x2b :#GETTING_DATA))))) 1570 1571 (define-structure BIFF-BoolErr () 1572 (cell BIFF-Cell) 1573 (bes BIFF-Bes)) 1574 1575 (define-structure BIFF-Number () 1576 (cell BIFF-Cell) 1577 (num ulonglong)) ;; TODO double 1578 1579 (define-structure BIFF-LabelSst () 1580 (cell BIFF-Cell) 1581 (isst dword)) 1582 1583 (define-structure BIFF-FormulaValue () ;; TODO 1584 (byte1 ubyte) 1585 (byte2 ubyte) 1586 (byte3 ubyte) 1587 (byte4 ubyte) 1588 (byte5 ubyte) 1589 (byte6 ubyte) 1590 (fExprO ushort)) 1591 1592 #+nil 1593 (define-structure BIFF-CellParsedFormula () ;; TODO 1594 (cce ushort) 1595 (rgce (ubyte cce)) 1596 (rgcb BIFF-RgbExtra)) 1597 1598 #+nil 1599 (define-structure BIFF-Formula () ;; TODO also probably wrong bit fiddling 1600 (cell BIFF-Cell) 1601 (val BIFF-FormulaValue) 1602 (%dummy ushort) 1603 (fAlwaysCalc t :compute (not (zerop (logand #x8000)))) 1604 (reserved1 t :compute (assert (zerop (logand #x4000)))) 1605 (fFill t :compute (not (zerop (logand #x2000)))) 1606 (fShrFmla t :compute (not (zerop (logand #x1000)))) 1607 (reserved2 t :compute (assert (zerop (logand #x800)))) 1608 (fClearErrors t :compute (not (zerop (logand #x400)))) 1609 (reserved3 t :compute (assert (zerop (logand #x3ff)))) 1610 (chn dword) 1611 (formula BIFF-CellParsedFormula)) 1612 1613 #+nil 1614 (define-structure BIFF-MulBlank () ;; TODO 1615 (rw ushort) 1616 (colFirst ushort)) 1617 1618 (define-structure BIFF-FormatRun () 1619 (ich ushort) 1620 (ifnt ushort)) 1621 1622 (define-structure BIFF-LPWideString () 1623 (cchCharacters ushort) 1624 (rgchData (wchar cchCharacters)) 1625 (decoded t :compute (string-from-wchars rgchData))) 1626 1627 (define-structure BIFF-RPHSSub () 1628 (crun ushort) 1629 (cch ushort) 1630 (st BIFF-LPWideString)) 1631 1632 (define-structure BIFF-PhRuns () 1633 (ichFirst ushort) ;; TODO signed 1634 (ichMom ushort) ;; TODO signed 1635 (cchMom ushort)) ;; TODO signed 1636 1637 (define-structure BIFF-ExtRst () 1638 (reserved ushort) 1639 (cb ushort) 1640 (phs dword) 1641 (rphssub BIFF-RPHSSub) 1642 (rgphruns (BIFF-PhRuns (BIFF-RPHSSub.crun rphssub)))) 1643 1644 (defvar *fHighByte*) ;; nil|0|1 ;; TODO clean up nil|t vs nil|0|1 1645 1646 (defun read-ustring (stream nchars fHighByte) 1647 (let ((*fHighByte* fHighByte) 1648 (b (make-array (* 2 nchars) 1649 :element-type 'character 1650 :fill-pointer 0))) 1651 (dotimes (i nchars (coerce b 'string)) 1652 (vector-push-extend 1653 (code-char (let ((c (ecase *fHighByte* 1654 (0 (read-octet stream)) 1655 (1 (logior (read-octet stream) 1656 (ash (read-octet stream) 8)))))) 1657 (assert (plusp c)) 1658 c)) 1659 b)))) 1660 1661 (define-structure BIFF-XLUnicodeRichExtendedString () 1662 (cch ushort) 1663 (%dummy ubyte) 1664 (fHighByte t :compute (logand 1 %dummy)) 1665 (reserved1 t :compute (assert (zerop (logand 2 %dummy)))) 1666 (fExtSt t :compute (not (zerop (logand 4 %dummy)))) 1667 (fRichSt t :compute (not (zerop (logand 8 %dummy)))) 1668 (reserved2 t :compute (assert (zerop (logand #xf0 %dummy)))) 1669 (cRun ushort :when fRichSt :default 0) 1670 (cbExtRst dword :when fExtSt :default 0) 1671 (rgb t :compute (read-ustring stream cch fHighByte)) 1672 (rgRun (BIFF-FormatRun cRun) :when fRichSt :default #()) 1673 (ExtRst (BIFF-ExtRst cbExtRst) :when fExtSt :default #())) 1674 1675 (define-structure BIFF-SST () 1676 (cstTotal dword) 1677 (cstUnique dword) 1678 (rgb (BIFF-XLUnicodeRichExtendedString cstUnique))) 1679 1680 (define-structure BIFF-DefColWidth () 1681 (cchdefColWidth ushort)) 1682 1683 (define-structure BIFF-Index () 1684 (reserved dword :always 0) 1685 (rwMic dword) 1686 (rwMac dword) 1687 (ibXF dword) 1688 (rgibRw (dword 1))) 1689 1690 (defun biff-continue-stream (stream size) 1691 ;; like SHORTER-STREAM but makes continue records transparent 1692 (let ((offset 0) 1693 self) 1694 (setq self 1695 (lambda (msg) 1696 (assert stream) 1697 (ecase msg 1698 (close (setq stream nil)) 1699 (stream-position offset) 1700 (physical-stream-position (physical-stream-position stream)) 1701 (read-octet 1702 (unless (< offset size) 1703 (when (eql #x3c (read-ushort stream)) ;; continue record 1704 (let ((n (read-ushort stream))) 1705 (assert (< 0 n 8225)) ;; TODO biff8 or 2081 biff2-5 1706 (incf size n) 1707 (when *fHighByte* 1708 (setq *fHighByte* (logand 1 (read-octet stream))) 1709 (decf size))))) 1710 (unless (< offset size) 1711 (error 'end-of-file :stream self)) 1712 (incf offset) 1713 (read-octet stream))))))) 1714 1715 (defun biff-substream (ole-entry-stream) 1716 (let ((in ole-entry-stream) 1717 end 1718 eof) 1719 (flet ((header () 1720 (let* ((h (read-BIFFRecordHeader in)) 1721 (nbytes (BIFFRecordHeader.length h))) 1722 (setq end (+ (stream-position in) nbytes)) 1723 (values (BIFFRecordHeader.tag h) 1724 (biff-continue-stream in nbytes))))) 1725 (assert (member (header) '(#x0009 #x0209 #x0409 #x0809))) ;; bof 1726 (lambda () 1727 (assert (not eof)) 1728 (stream-position in end) 1729 (multiple-value-bind (tag s) (header) 1730 (case tag ;; TODO more cell types 1731 (#x000a (not (setq eof t))) 1732 ;;(#x000b :index1) 1733 (#x0085 (read-BIFF-BoundSheet8 s)) 1734 (#x00fc (let (*fHighByte*) (read-BIFF-SST s))) 1735 (#x00fd (read-BIFF-LabelSst s)) 1736 ;;(#x020b (read-BIFF-Index s)) 1737 (#x027e (read-BIFF-Rk s)) 1738 (t tag))))))) 1739 1740 (defun princ-cell-value (x sst) 1741 (typecase x 1742 (BIFF-LabelSst 1743 (let ((c (BIFF-LabelSst.cell x))) 1744 `(:label ,(BIFF-Cell.rw c) 1745 ,(BIFF-Cell.col c) 1746 ,(BIFF-XLUnicodeRichExtendedString.rgb 1747 (aref (BIFF-SST.rgb sst) (BIFF-LabelSst.isst x)))))) 1748 (BIFF-RK 1749 `(:number ,(BIFF-RK.rw x) 1750 ,(BIFF-RK.col x) 1751 ,(BIFF-RkNumber.value (BIFF-RkRec.rk (BIFF-RK.RkRec x))))))) 1752 1753 (defun parse-sheet (BIFF-BoundSheet8 stream sst) 1754 (stream-position stream (BIFF-BoundSheet8.lbPlyPos BIFF-BoundSheet8)) 1755 (do (z x (s (biff-substream stream))) 1756 ((not (setq x (funcall s))) 1757 (nreverse z)) 1758 (let ((v (princ-cell-value x sst))) 1759 (when v 1760 (push v z)))) 1761 #+nil 1762 (let ((index (funcall (biff-substream stream)))) 1763 (etypecase index 1764 (BIFF-Index index #+nil(BIFF-Index.rgibRw ))))) 1765 1766 (defun parse-xls-file (filename) 1767 (with-stream (f (ole-file-stream filename)) 1768 (let ((e (find-ole-entry f :name "Workbook" :type 2))) 1769 (when e 1770 (with-stream (in (ole-entry-stream f e)) 1771 (let (sheets sst) 1772 (do (x (globals (biff-substream in))) 1773 ((not (setq x (funcall globals))) 1774 (setq sheets (nreverse sheets))) 1775 (typecase x 1776 (BIFF-BoundSheet8 (push x sheets)) 1777 (BIFF-SST (setq sst x)))) 1778 `(:workbook 1779 ,@(loop 1780 for x in sheets 1781 collect `(:sheet 1782 ,(BIFF-ShortXLUnicodeString.decoded 1783 (BIFF-BoundSheet8.stName x)) 1784 ,@(parse-sheet x in sst))))))))))