--- data/scripts/mobi_lib/mobi_unpack.py.orig 2021-08-16 04:42:50 UTC
+++ data/scripts/mobi_lib/mobi_unpack.py
@@ -256,7 +256,7 @@ class MobiHeader:
self.header = self.sect.loadSection(self.start)
self.records, = struct.unpack_from('>H', self.header, 0x8)
self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack('>LLLLL', self.header[20:40])
- print "Mobi Version: ", self.version
+ print("Mobi Version: ", self.version)
# codec
self.codec = 'windows-1252'
@@ -266,18 +266,18 @@ class MobiHeader:
}
if self.codepage in codec_map.keys():
self.codec = codec_map[self.codepage]
- print "Codec: ", self.codec
+ print("Codec: ", self.codec)
# title
toff, tlen = struct.unpack('>II', self.header[0x54:0x5c])
tend = toff + tlen
self.title=self.header[toff:tend]
- print "Title: ", self.title
+ print("Title: ", self.title)
# set up for decompression/unpacking
compression, = struct.unpack_from('>H', self.header, 0x0)
if compression == 0x4448:
- print "Huffdic compression"
+ print("Huffdic compression")
reader = HuffcdicReader()
huffoff, huffnum = struct.unpack_from('>LL', self.header, 0x70)
huffoff = huffoff + self.start
@@ -286,10 +286,10 @@ class MobiHeader:
reader.loadCdic(self.sect.loadSection(huffoff+i))
self.unpack = reader.unpack
elif compression == 2:
- print "Palmdoc compression"
+ print("Palmdoc compression")
self.unpack = PalmdocReader().unpack
elif compression == 1:
- print "No compression"
+ print("No compression")
self.unpack = UncompressedReader().unpack
else:
raise unpackException('invalid compression type: 0x%4x' % compression)
@@ -376,14 +376,14 @@ class MobiHeader:
self.fdst += self.start
if DEBUG:
- print "firstaddl %0x" % self.firstaddl
- print "ncxidx %0x" % self.ncxidx
- print "exth flags %0x" % exth_flag
+ print("firstaddl %0x" % self.firstaddl)
+ print("ncxidx %0x" % self.ncxidx)
+ print("exth flags %0x" % exth_flag)
if self.version == 8 or self.start != 0:
- print "skelidx %0x" % self.skelidx
- print "dividx %0x" % self.dividx
- print "othidx %0x" % self.othidx
- print "fdst %0x" % self.fdst
+ print("skelidx %0x" % self.skelidx)
+ print("dividx %0x" % self.dividx)
+ print("othidx %0x" % self.othidx)
+ print("fdst %0x" % self.fdst)
# NOTE: See DumpMobiHeader.py for a complete set of header fields
@@ -464,7 +464,7 @@ class MobiHeader:
trailers += 1
flags = flags >> 1
# get raw mobi markup languge
- print "Unpack raw markup language"
+ print("Unpack raw markup language")
dataList = []
# offset = 0
for i in xrange(1, self.records+1):
@@ -542,7 +542,7 @@ class MobiHeader:
else:
metadata[name].append(value)
if DEBUG:
- print "multiple values: metadata[%s]=%s" % (name, metadata[name])
+ print("multiple values: metadata[%s]=%s" % (name, metadata[name]))
_length, num_items = struct.unpack('>LL', extheader[4:12])
extheader = extheader[12:]
pos = 0
@@ -564,12 +564,12 @@ class MobiHeader:
value, = struct.unpack('>L',content)
addValue(name, str(value))
else:
- print "Error: Value for %s has unexpected size of %s" % (name, size)
+ print("Error: Value for %s has unexpected size of %s" % (name, size))
elif id in id_map_hexstrings.keys():
name = id_map_hexstrings[id]
addValue(name, content.encode('hex'))
else:
- print "Warning: Unknown metadata with id %s found" % id
+ print("Warning: Unknown metadata with id %s found" % id)
name = str(id) + ' (hex)'
addValue(name, content.encode('hex'))
pos += size
@@ -600,11 +600,11 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
for mh in mhlst:
if mh.isK8():
- print "\n\nProcessing K8 format Ebook ..."
+ print("\n\nProcessing K8 format Ebook ...")
elif mh.isPrintReplica():
- print "\nProcessing PrintReplica (.azw4) format Ebook ..."
+ print("\nProcessing PrintReplica (.azw4) format Ebook ...")
else:
- print "\nProcessing Mobi format Ebook ..."
+ print("\nProcessing Mobi format Ebook ...")
if DEBUG:
# write out raw mobi header data
@@ -624,8 +624,8 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
metadata['Codec'] = [mh.codec]
metadata['UniqueID'] = [str(mh.unique_id)]
if DEBUG:
- print "MetaData from EXTH"
- print metadata
+ print("MetaData from EXTH")
+ print(metadata)
# save the raw markup language
rawML = mh.getRawML()
@@ -643,12 +643,12 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
# process additional sections that represent images, resources, fonts, and etc
# build up a list of image names to use to postprocess the rawml
- print "Unpacking images, resources, fonts, etc"
+ print("Unpacking images, resources, fonts, etc")
firstaddl = mh.getfirstAddl()
if DEBUG:
- print "firstaddl is ", firstaddl
- print "num_sections is ", sect.num_sections
- print "K8Boundary is ", K8Boundary
+ print("firstaddl is ", firstaddl)
+ print("num_sections is ", sect.num_sections)
+ print("K8Boundary is ", K8Boundary)
beg = firstaddl
end = sect.num_sections
if firstaddl < K8Boundary:
@@ -656,12 +656,12 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
obfuscate_data = []
for i in xrange(beg, end):
if DEBUG:
- print "Section is ", i
+ print("Section is ", i)
data = sect.loadSection(i)
type = data[0:4]
if type in ["FLIS", "FCIS", "FDST", "DATP"]:
if DEBUG:
- print 'First 4 bytes: %s' % toHex(data[0:4])
+ print('First 4 bytes: %s' % toHex(data[0:4]))
fname = "%05d" % (1+i-beg)
fname = type + fname
if mh.isK8():
@@ -669,13 +669,13 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
fname += '.dat'
outname= os.path.join(files.outdir, fname)
file(outname, 'wb').write(data)
- print "Skipping ", type, " section"
+ print("Skipping ", type, " section")
imgnames.append(None)
continue
elif type == "SRCS":
# The mobi file was created by kindlegen and contains a zip archive with all source files.
# Extract the archive and save it.
- print " Info: File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME
+ print(" Info: File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME)
srcname = os.path.join(files.outdir, KINDLEGENSRC_FILENAME)
file(srcname, 'wb').write(data[16:])
imgnames.append(None)
@@ -709,29 +709,29 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
adler32, = struct.unpack_from('>I', font_data, len(font_data) - 4)
font_data = zlib.decompress(font_data[2:-4], -wbits, usize)
if len(font_data) != usize:
- print 'Font Decompression Error: Uncompressed font size mismatch'
+ print('Font Decompression Error: Uncompressed font size mismatch')
if False:
# For some reason these almost never match, probably Amazon has a
# buggy Adler32 implementation
sig = (zlib.adler32(font_data) & 0xffffffff)
if sig != adler32:
- print 'Font Decompression Error'
- print 'Adler checksum did not match. Stored: %d Calculated: %d' % (adler32, sig)
+ print('Font Decompression Error')
+ print('Adler checksum did not match. Stored: %d Calculated: %d' % (adler32, sig))
else:
- print "Error Decoding Font", str(err)
+ print("Error Decoding Font", str(err))
hdr = font_data[0:4]
if hdr == '\0\1\0\0' or hdr == 'true' or hdr == 'ttcf':
ext = '.ttf'
elif hdr == 'OTTO':
ext = '.otf'
else:
- print "Warning: unknown font header %s" % hdr.encode('hex')
+ print("Warning: unknown font header %s" % hdr.encode('hex'))
ext = '.dat'
fontname = "font%05d" % (1+i-beg)
fontname += ext
if (ext == '.ttf' or ext == '.otf') and (fflags & 0x0002):
obfuscate_data.append(fontname)
- print " extracting font: ", fontname
+ print(" extracting font: ", fontname)
outfnt = os.path.join(files.imgdir, fontname)
file(outfnt, 'wb').write(font_data)
imgnames.append(fontname)
@@ -746,7 +746,7 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
if DEBUG:
data = data[4:]
rescname = "resc%05d.dat" % (1+i-beg)
- print " extracting resource: ", rescname
+ print(" extracting resource: ", rescname)
outrsc = os.path.join(files.imgdir, rescname)
file(outrsc, 'wb').write(data)
imgnames.append(None)
@@ -754,7 +754,7 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
if data == EOF_RECORD:
if DEBUG:
- print "Skip section %i as it contains the EOF record." % i
+ print("Skip section %i as it contains the EOF record." % i)
imgnames.append(None)
continue
@@ -762,16 +762,16 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
# Get the proper file extension
imgtype = imghdr.what(None, data)
if imgtype is None:
- print "Warning: Section %s contains no image or an unknown image format" % i
+ print("Warning: Section %s contains no image or an unknown image format" % i)
imgnames.append(None)
if DEBUG:
- print 'First 4 bytes: %s' % toHex(data[0:4])
+ print('First 4 bytes: %s' % toHex(data[0:4]))
fname = "unknown%05d.dat" % (1+i-beg)
outname= os.path.join(files.outdir, fname)
file(outname, 'wb').write(data)
else:
imgname = "image%05d.%s" % (1+i-beg, imgtype)
- print " extracting image: ", imgname
+ print(" extracting image: ", imgname)
outimg = os.path.join(files.imgdir, imgname)
file(outimg, 'wb').write(data)
imgnames.append(imgname)
@@ -781,11 +781,11 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou
# Process print replica book.
if mh.isPrintReplica() and not k8only:
filenames = []
- print "Print Replica ebook detected"
+ print("Print Replica ebook detected")
try:
mh.processPrintReplica(files)
- except Exception, e:
- print 'Error processing Print Replica: ' + str(e)
+ except Exception as e:
+ print('Error processing Print Replica: ' + str(e))
filenames.append(['', files.getInputFileBasename() + '.pdf'])
usedmap = {}
for name in imgnames:
@@ -915,7 +915,7 @@ def unpackBook(infile, outdir):
# process the PalmDoc database header and verify it is a mobi
sect = Sectionizer(infile)
- print "Palm DB type: ", sect.ident
+ print("Palm DB type: ", sect.ident)
if sect.ident != 'BOOKMOBI' and sect.ident != 'TEXtREAd':
raise unpackException('invalid file format')
@@ -945,7 +945,7 @@ def unpackBook(infile, outdir):
if (after - before) == 8:
data = sect.loadSection(i)
if data == K8_BOUNDARY:
- print "Mobi Ebook uses the new K8 file format"
+ print("Mobi Ebook uses the new K8 file format")
mh = MobiHeader(sect,i+1)
hasK8 = hasK8 or mh.isK8()
mhlst.append(mh)
@@ -1010,32 +1010,32 @@ class Mobi8Reader:
def usage(progname):
- print ""
- print "Description:"
- print " Unpacks an unencrypted Kindle/MobiPocket ebook to html and images"
- print " or an unencrypted Kindle/Print Replica ebook to PDF and images"
- print " into the specified output folder."
- print "Usage:"
- print " %s -r -s -d -h infile [outdir]" % progname
- print "Options:"
- print " -r write raw data to the output folder"
- print " -s split combination mobis into mobi7 and mobi8 ebooks"
- print " -d enable verbose debugging"
- print " -h print this help message"
+ print("")
+ print("Description:")
+ print(" Unpacks an unencrypted Kindle/MobiPocket ebook to html and images")
+ print(" or an unencrypted Kindle/Print Replica ebook to PDF and images")
+ print(" into the specified output folder.")
+ print("Usage:")
+ print(" %s -r -s -d -h infile [outdir]" % progname)
+ print("Options:")
+ print(" -r write raw data to the output folder")
+ print(" -s split combination mobis into mobi7 and mobi8 ebooks")
+ print(" -d enable verbose debugging")
+ print(" -h print this help message")
def main(argv=sys.argv):
global DEBUG
global WRITE_RAW_DATA
global SPLIT_COMBO_MOBIS
- print "MobiUnpack 0.47"
- print " Copyright (c) 2009 Charles M. Hannum <root@ihack.net>"
- print " With Additions by P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding."
+ print("MobiUnpack 0.47")
+ print(" Copyright (c) 2009 Charles M. Hannum <root@ihack.net>")
+ print(" With Additions by P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding.")
progname = os.path.basename(argv[0])
try:
opts, args = getopt.getopt(sys.argv[1:], "hdrs")
- except getopt.GetoptError, err:
- print str(err)
+ except getopt.GetoptError as err:
+ print(str(err))
usage(progname)
sys.exit(2)
@@ -1062,16 +1062,16 @@ def main(argv=sys.argv):
infileext = os.path.splitext(infile)[1].upper()
if infileext not in ['.MOBI', '.PRC', '.AZW', '.AZW4']:
- print "Error: first parameter must be a Kindle/Mobipocket ebook or a Kindle/Print Replica ebook."
+ print("Error: first parameter must be a Kindle/Mobipocket ebook or a Kindle/Print Replica ebook.")
return 1
try:
- print 'Unpacking Book...'
+ print('Unpacking Book...')
unpackBook(infile, outdir)
- print 'Completed'
+ print('Completed')
- except ValueError, e:
- print "Error: %s" % e
+ except ValueError as e:
+ print("Error: %s" % e)
return 1
return 0