aboutsummaryrefslogblamecommitdiff
path: root/deskutils/bookworm/files/patch-data_scripts_mobi__lib_mobi__unpack.py
blob: dd3ca576da7326701f18d0d6866e6ad82119c391 (plain) (tree)

































































































































































































































































































































































                                                                                                                          
--- data/scripts/mobi_lib/mobi_unpack.py.orig	2021-08-16 04:42:50 UTC
+++ data/scripts/mobi_lib/mobi_unpack.py
@@ -256,7 +256,7 @@ class MobiHeader:
         self.header = self.sect.loadSection(self.start)
         self.records, = struct.unpack_from('>H', self.header, 0x8)
         self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack('>LLLLL', self.header[20:40])
-        print "Mobi Version: ", self.version
+        print("Mobi Version: ", self.version)
 
         # codec
         self.codec = 'windows-1252'
@@ -266,18 +266,18 @@ class MobiHeader:
         }
         if self.codepage in codec_map.keys():
             self.codec = codec_map[self.codepage]
-        print "Codec: ", self.codec
+        print("Codec: ", self.codec)
 
         # title
         toff, tlen = struct.unpack('>II', self.header[0x54:0x5c])
         tend = toff + tlen
         self.title=self.header[toff:tend]
-        print "Title: ", self.title
+        print("Title: ", self.title)
 
         # set up for decompression/unpacking
         compression, = struct.unpack_from('>H', self.header, 0x0)
         if compression == 0x4448:
-            print "Huffdic compression"
+            print("Huffdic compression")
             reader = HuffcdicReader()
             huffoff, huffnum = struct.unpack_from('>LL', self.header, 0x70)
             huffoff = huffoff + self.start
@@ -286,10 +286,10 @@ class MobiHeader:
                 reader.loadCdic(self.sect.loadSection(huffoff+i))
             self.unpack = reader.unpack
         elif compression == 2:
-            print "Palmdoc compression"
+            print("Palmdoc compression")
             self.unpack = PalmdocReader().unpack
         elif compression == 1:
-            print "No compression"
+            print("No compression")
             self.unpack = UncompressedReader().unpack
         else:
             raise unpackException('invalid compression type: 0x%4x' % compression)
@@ -376,14 +376,14 @@ class MobiHeader:
                 self.fdst += self.start
 
         if DEBUG:
-            print "firstaddl %0x" % self.firstaddl
-            print "ncxidx %0x" % self.ncxidx
-            print "exth flags %0x" % exth_flag
+            print("firstaddl %0x" % self.firstaddl)
+            print("ncxidx %0x" % self.ncxidx)
+            print("exth flags %0x" % exth_flag)
             if self.version == 8 or self.start != 0:
-                print "skelidx %0x" % self.skelidx
-                print "dividx %0x" % self.dividx
-                print "othidx %0x" % self.othidx
-                print "fdst %0x" % self.fdst
+                print("skelidx %0x" % self.skelidx)
+                print("dividx %0x" % self.dividx)
+                print("othidx %0x" % self.othidx)
+                print("fdst %0x" % self.fdst)
 
         # NOTE: See DumpMobiHeader.py for a complete set of header fields
 
@@ -464,7 +464,7 @@ class MobiHeader:
                         trailers += 1
                     flags = flags >> 1
         # get raw mobi markup languge
-        print "Unpack raw markup language"
+        print("Unpack raw markup language")
         dataList = []
         # offset = 0
         for i in xrange(1, self.records+1):
@@ -542,7 +542,7 @@ class MobiHeader:
             else:
                 metadata[name].append(value)
                 if DEBUG:
-                    print "multiple values: metadata[%s]=%s" % (name, metadata[name])
+                    print("multiple values: metadata[%s]=%s" % (name, metadata[name]))
         _length, num_items = struct.unpack('>LL', extheader[4:12])
         extheader = extheader[12:]
         pos = 0
@@ -564,12 +564,12 @@ class MobiHeader:
                     value, = struct.unpack('>L',content)
                     addValue(name, str(value))
                 else:
-                    print "Error: Value for %s has unexpected size of %s" % (name, size)
+                    print("Error: Value for %s has unexpected size of %s" % (name, size))
             elif id in id_map_hexstrings.keys():
                 name = id_map_hexstrings[id]
                 addValue(name, content.encode('hex'))
             else:
-                print "Warning: Unknown metadata with id %s found" % id
+                print("Warning: Unknown metadata with id %s found" % id)
                 name = str(id) + ' (hex)'
                 addValue(name, content.encode('hex'))
             pos += size
@@ -600,11 +600,11 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
     for mh in mhlst:
 
         if mh.isK8():
-            print "\n\nProcessing K8 format Ebook ..."
+            print("\n\nProcessing K8 format Ebook ...")
         elif mh.isPrintReplica():
-            print "\nProcessing PrintReplica (.azw4) format Ebook ..."
+            print("\nProcessing PrintReplica (.azw4) format Ebook ...")
         else:
-            print "\nProcessing Mobi format Ebook ..."
+            print("\nProcessing Mobi format Ebook ...")
 
         if DEBUG:
             # write out raw mobi header data
@@ -624,8 +624,8 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
         metadata['Codec'] = [mh.codec]
         metadata['UniqueID'] = [str(mh.unique_id)]
         if DEBUG:
-            print "MetaData from EXTH"
-            print metadata
+            print("MetaData from EXTH")
+            print(metadata)
 
         # save the raw markup language
         rawML = mh.getRawML()
@@ -643,12 +643,12 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
 
         # process additional sections that represent images, resources, fonts, and etc
         # build up a list of image names to use to postprocess the rawml
-        print "Unpacking images, resources, fonts, etc"
+        print("Unpacking images, resources, fonts, etc")
         firstaddl = mh.getfirstAddl()
         if DEBUG:
-            print "firstaddl is ", firstaddl
-            print "num_sections is ", sect.num_sections
-            print "K8Boundary is ", K8Boundary
+            print("firstaddl is ", firstaddl)
+            print("num_sections is ", sect.num_sections)
+            print("K8Boundary is ", K8Boundary)
         beg = firstaddl
         end = sect.num_sections
         if firstaddl < K8Boundary:
@@ -656,12 +656,12 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
         obfuscate_data = []
         for i in xrange(beg, end):
             if DEBUG:
-                print "Section is ", i
+                print("Section is ", i)
             data = sect.loadSection(i)
             type = data[0:4]
             if type in ["FLIS", "FCIS", "FDST", "DATP"]:
                 if DEBUG:
-                    print 'First 4 bytes: %s' % toHex(data[0:4])
+                    print('First 4 bytes: %s' % toHex(data[0:4]))
                     fname = "%05d" % (1+i-beg)
                     fname = type + fname
                     if mh.isK8():
@@ -669,13 +669,13 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
                     fname += '.dat'
                     outname= os.path.join(files.outdir, fname)
                     file(outname, 'wb').write(data)
-                    print "Skipping ", type, " section"
+                    print("Skipping ", type, " section")
                 imgnames.append(None)
                 continue
             elif type == "SRCS":
                 # The mobi file was created by kindlegen and contains a zip archive with all source files.
                 # Extract the archive and save it.
-                print "    Info: File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME
+                print("    Info: File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME)
                 srcname = os.path.join(files.outdir, KINDLEGENSRC_FILENAME)
                 file(srcname, 'wb').write(data[16:])
                 imgnames.append(None)
@@ -709,29 +709,29 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
                         adler32, = struct.unpack_from('>I', font_data, len(font_data) - 4)
                         font_data = zlib.decompress(font_data[2:-4], -wbits, usize)
                         if len(font_data) != usize:
-                            print 'Font Decompression Error: Uncompressed font size mismatch'
+                            print('Font Decompression Error: Uncompressed font size mismatch')
                         if False:
                             # For some reason these almost never match, probably Amazon has a
                             # buggy Adler32 implementation
                             sig = (zlib.adler32(font_data) & 0xffffffff)
                             if sig != adler32:
-                                print 'Font Decompression Error'
-                                print 'Adler checksum did not match. Stored: %d Calculated: %d' % (adler32, sig)
+                                print('Font Decompression Error')
+                                print('Adler checksum did not match. Stored: %d Calculated: %d' % (adler32, sig))
                     else:
-                        print "Error Decoding Font", str(err)
+                        print("Error Decoding Font", str(err))
                 hdr = font_data[0:4]
                 if hdr == '\0\1\0\0' or hdr == 'true' or hdr == 'ttcf':
                     ext = '.ttf'
                 elif hdr == 'OTTO':
                     ext = '.otf'
                 else:
-                    print "Warning: unknown font header %s" % hdr.encode('hex')
+                    print("Warning: unknown font header %s" % hdr.encode('hex'))
                     ext = '.dat'
                 fontname = "font%05d" % (1+i-beg)
                 fontname += ext
                 if (ext == '.ttf' or ext == '.otf') and (fflags & 0x0002):
                     obfuscate_data.append(fontname)
-                print "    extracting font: ", fontname
+                print("    extracting font: ", fontname)
                 outfnt = os.path.join(files.imgdir, fontname)
                 file(outfnt, 'wb').write(font_data)
                 imgnames.append(fontname)
@@ -746,7 +746,7 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
                 if DEBUG:
                     data = data[4:]
                     rescname = "resc%05d.dat" % (1+i-beg)
-                    print "    extracting resource: ", rescname
+                    print("    extracting resource: ", rescname)
                     outrsc = os.path.join(files.imgdir, rescname)
                     file(outrsc, 'wb').write(data)
                 imgnames.append(None)
@@ -754,7 +754,7 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
 
             if data == EOF_RECORD:
                 if DEBUG:
-                    print "Skip section %i as it contains the EOF record." % i
+                    print("Skip section %i as it contains the EOF record." % i)
                 imgnames.append(None)
                 continue
 
@@ -762,16 +762,16 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
             # Get the proper file extension
             imgtype = imghdr.what(None, data)
             if imgtype is None:
-                print "Warning: Section %s contains no image or an unknown image format" % i
+                print("Warning: Section %s contains no image or an unknown image format" % i)
                 imgnames.append(None)
                 if DEBUG:
-                    print 'First 4 bytes: %s' % toHex(data[0:4])
+                    print('First 4 bytes: %s' % toHex(data[0:4]))
                     fname = "unknown%05d.dat" % (1+i-beg)
                     outname= os.path.join(files.outdir, fname)
                     file(outname, 'wb').write(data)
             else:
                 imgname = "image%05d.%s" % (1+i-beg, imgtype)
-                print "    extracting image: ", imgname
+                print("    extracting image: ", imgname)
                 outimg = os.path.join(files.imgdir, imgname)
                 file(outimg, 'wb').write(data)
                 imgnames.append(imgname)
@@ -781,11 +781,11 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
         # Process print replica book.
         if mh.isPrintReplica() and not k8only:
             filenames = []
-            print "Print Replica ebook detected"
+            print("Print Replica ebook detected")
             try:
                 mh.processPrintReplica(files)
-            except Exception, e:
-                print 'Error processing Print Replica: ' + str(e)
+            except Exception as e:
+                print('Error processing Print Replica: ' + str(e))
             filenames.append(['', files.getInputFileBasename() + '.pdf'])
             usedmap = {}
             for name in imgnames:
@@ -915,7 +915,7 @@ def unpackBook(infile, outdir):
 
     # process the PalmDoc database header and verify it is a mobi
     sect = Sectionizer(infile)
-    print "Palm DB type: ", sect.ident
+    print("Palm DB type: ", sect.ident)
     if sect.ident != 'BOOKMOBI' and sect.ident != 'TEXtREAd':
         raise unpackException('invalid file format')
 
@@ -945,7 +945,7 @@ def unpackBook(infile, outdir):
             if (after - before) == 8:
                 data = sect.loadSection(i)
                 if data == K8_BOUNDARY:
-                    print "Mobi Ebook uses the new K8 file format"
+                    print("Mobi Ebook uses the new K8 file format")
                     mh = MobiHeader(sect,i+1)
                     hasK8 = hasK8 or mh.isK8()
                     mhlst.append(mh)
@@ -1010,32 +1010,32 @@ class Mobi8Reader:
 
 
 def usage(progname):
-    print ""
-    print "Description:"
-    print "  Unpacks an unencrypted Kindle/MobiPocket ebook to html and images"
-    print "  or an unencrypted Kindle/Print Replica ebook to PDF and images"
-    print "  into the specified output folder."
-    print "Usage:"
-    print "  %s -r -s -d -h infile [outdir]" % progname
-    print "Options:"
-    print "    -r           write raw data to the output folder"
-    print "    -s           split combination mobis into mobi7 and mobi8 ebooks"
-    print "    -d           enable verbose debugging"
-    print "    -h           print this help message"
+    print("")
+    print("Description:")
+    print("  Unpacks an unencrypted Kindle/MobiPocket ebook to html and images")
+    print("  or an unencrypted Kindle/Print Replica ebook to PDF and images")
+    print("  into the specified output folder.")
+    print("Usage:")
+    print("  %s -r -s -d -h infile [outdir]" % progname)
+    print("Options:")
+    print("    -r           write raw data to the output folder")
+    print("    -s           split combination mobis into mobi7 and mobi8 ebooks")
+    print("    -d           enable verbose debugging")
+    print("    -h           print this help message")
 
 
 def main(argv=sys.argv):
     global DEBUG
     global WRITE_RAW_DATA
     global SPLIT_COMBO_MOBIS
-    print "MobiUnpack 0.47"
-    print "  Copyright (c) 2009 Charles M. Hannum <root@ihack.net>"
-    print "  With Additions by P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding."
+    print("MobiUnpack 0.47")
+    print("  Copyright (c) 2009 Charles M. Hannum <root@ihack.net>")
+    print("  With Additions by P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding.")
     progname = os.path.basename(argv[0])
     try:
         opts, args = getopt.getopt(sys.argv[1:], "hdrs")
-    except getopt.GetoptError, err:
-        print str(err)
+    except getopt.GetoptError as err:
+        print(str(err))
         usage(progname)
         sys.exit(2)
 
@@ -1062,16 +1062,16 @@ def main(argv=sys.argv):
 
     infileext = os.path.splitext(infile)[1].upper()
     if infileext not in ['.MOBI', '.PRC', '.AZW', '.AZW4']:
-        print "Error: first parameter must be a Kindle/Mobipocket ebook or a Kindle/Print Replica ebook."
+        print("Error: first parameter must be a Kindle/Mobipocket ebook or a Kindle/Print Replica ebook.")
         return 1
 
     try:
-        print 'Unpacking Book...'
+        print('Unpacking Book...')
         unpackBook(infile, outdir)
-        print 'Completed'
+        print('Completed')
 
-    except ValueError, e:
-        print "Error: %s" % e
+    except ValueError as e:
+        print("Error: %s" % e)
         return 1
 
     return 0