diff options
Diffstat (limited to 'deskutils/bookworm/files/patch-data_scripts_mobi__lib_mobi__unpack.py')
-rw-r--r-- | deskutils/bookworm/files/patch-data_scripts_mobi__lib_mobi__unpack.py | 354 |
1 files changed, 354 insertions, 0 deletions
diff --git a/deskutils/bookworm/files/patch-data_scripts_mobi__lib_mobi__unpack.py b/deskutils/bookworm/files/patch-data_scripts_mobi__lib_mobi__unpack.py new file mode 100644 index 000000000000..dd3ca576da73 --- /dev/null +++ b/deskutils/bookworm/files/patch-data_scripts_mobi__lib_mobi__unpack.py @@ -0,0 +1,354 @@ +--- data/scripts/mobi_lib/mobi_unpack.py.orig 2021-08-16 04:42:50 UTC ++++ data/scripts/mobi_lib/mobi_unpack.py +@@ -256,7 +256,7 @@ class MobiHeader: + self.header = self.sect.loadSection(self.start) + self.records, = struct.unpack_from('>H', self.header, 0x8) + self.length, self.type, self.codepage, self.unique_id, self.version = struct.unpack('>LLLLL', self.header[20:40]) +- print "Mobi Version: ", self.version ++ print("Mobi Version: ", self.version) + + # codec + self.codec = 'windows-1252' +@@ -266,18 +266,18 @@ class MobiHeader: + } + if self.codepage in codec_map.keys(): + self.codec = codec_map[self.codepage] +- print "Codec: ", self.codec ++ print("Codec: ", self.codec) + + # title + toff, tlen = struct.unpack('>II', self.header[0x54:0x5c]) + tend = toff + tlen + self.title=self.header[toff:tend] +- print "Title: ", self.title ++ print("Title: ", self.title) + + # set up for decompression/unpacking + compression, = struct.unpack_from('>H', self.header, 0x0) + if compression == 0x4448: +- print "Huffdic compression" ++ print("Huffdic compression") + reader = HuffcdicReader() + huffoff, huffnum = struct.unpack_from('>LL', self.header, 0x70) + huffoff = huffoff + self.start +@@ -286,10 +286,10 @@ class MobiHeader: + reader.loadCdic(self.sect.loadSection(huffoff+i)) + self.unpack = reader.unpack + elif compression == 2: +- print "Palmdoc compression" ++ print("Palmdoc compression") + self.unpack = PalmdocReader().unpack + elif compression == 1: +- print "No compression" ++ print("No compression") + self.unpack = UncompressedReader().unpack + else: + raise unpackException('invalid compression type: 0x%4x' % compression) +@@ -376,14 +376,14 @@ class MobiHeader: + self.fdst += self.start + + if DEBUG: +- print "firstaddl %0x" % self.firstaddl +- print "ncxidx %0x" % self.ncxidx +- print "exth flags %0x" % exth_flag ++ print("firstaddl %0x" % self.firstaddl) ++ print("ncxidx %0x" % self.ncxidx) ++ print("exth flags %0x" % exth_flag) + if self.version == 8 or self.start != 0: +- print "skelidx %0x" % self.skelidx +- print "dividx %0x" % self.dividx +- print "othidx %0x" % self.othidx +- print "fdst %0x" % self.fdst ++ print("skelidx %0x" % self.skelidx) ++ print("dividx %0x" % self.dividx) ++ print("othidx %0x" % self.othidx) ++ print("fdst %0x" % self.fdst) + + # NOTE: See DumpMobiHeader.py for a complete set of header fields + +@@ -464,7 +464,7 @@ class MobiHeader: + trailers += 1 + flags = flags >> 1 + # get raw mobi markup languge +- print "Unpack raw markup language" ++ print("Unpack raw markup language") + dataList = [] + # offset = 0 + for i in xrange(1, self.records+1): +@@ -542,7 +542,7 @@ class MobiHeader: + else: + metadata[name].append(value) + if DEBUG: +- print "multiple values: metadata[%s]=%s" % (name, metadata[name]) ++ print("multiple values: metadata[%s]=%s" % (name, metadata[name])) + _length, num_items = struct.unpack('>LL', extheader[4:12]) + extheader = extheader[12:] + pos = 0 +@@ -564,12 +564,12 @@ class MobiHeader: + value, = struct.unpack('>L',content) + addValue(name, str(value)) + else: +- print "Error: Value for %s has unexpected size of %s" % (name, size) ++ print("Error: Value for %s has unexpected size of %s" % (name, size)) + elif id in id_map_hexstrings.keys(): + name = id_map_hexstrings[id] + addValue(name, content.encode('hex')) + else: +- print "Warning: Unknown metadata with id %s found" % id ++ print("Warning: Unknown metadata with id %s found" % id) + name = str(id) + ' (hex)' + addValue(name, content.encode('hex')) + pos += size +@@ -600,11 +600,11 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou + for mh in mhlst: + + if mh.isK8(): +- print "\n\nProcessing K8 format Ebook ..." ++ print("\n\nProcessing K8 format Ebook ...") + elif mh.isPrintReplica(): +- print "\nProcessing PrintReplica (.azw4) format Ebook ..." ++ print("\nProcessing PrintReplica (.azw4) format Ebook ...") + else: +- print "\nProcessing Mobi format Ebook ..." ++ print("\nProcessing Mobi format Ebook ...") + + if DEBUG: + # write out raw mobi header data +@@ -624,8 +624,8 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou + metadata['Codec'] = [mh.codec] + metadata['UniqueID'] = [str(mh.unique_id)] + if DEBUG: +- print "MetaData from EXTH" +- print metadata ++ print("MetaData from EXTH") ++ print(metadata) + + # save the raw markup language + rawML = mh.getRawML() +@@ -643,12 +643,12 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou + + # process additional sections that represent images, resources, fonts, and etc + # build up a list of image names to use to postprocess the rawml +- print "Unpacking images, resources, fonts, etc" ++ print("Unpacking images, resources, fonts, etc") + firstaddl = mh.getfirstAddl() + if DEBUG: +- print "firstaddl is ", firstaddl +- print "num_sections is ", sect.num_sections +- print "K8Boundary is ", K8Boundary ++ print("firstaddl is ", firstaddl) ++ print("num_sections is ", sect.num_sections) ++ print("K8Boundary is ", K8Boundary) + beg = firstaddl + end = sect.num_sections + if firstaddl < K8Boundary: +@@ -656,12 +656,12 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou + obfuscate_data = [] + for i in xrange(beg, end): + if DEBUG: +- print "Section is ", i ++ print("Section is ", i) + data = sect.loadSection(i) + type = data[0:4] + if type in ["FLIS", "FCIS", "FDST", "DATP"]: + if DEBUG: +- print 'First 4 bytes: %s' % toHex(data[0:4]) ++ print('First 4 bytes: %s' % toHex(data[0:4])) + fname = "%05d" % (1+i-beg) + fname = type + fname + if mh.isK8(): +@@ -669,13 +669,13 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou + fname += '.dat' + outname= os.path.join(files.outdir, fname) + file(outname, 'wb').write(data) +- print "Skipping ", type, " section" ++ print("Skipping ", type, " section") + imgnames.append(None) + continue + elif type == "SRCS": + # The mobi file was created by kindlegen and contains a zip archive with all source files. + # Extract the archive and save it. +- print " Info: File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME ++ print(" Info: File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME) + srcname = os.path.join(files.outdir, KINDLEGENSRC_FILENAME) + file(srcname, 'wb').write(data[16:]) + imgnames.append(None) +@@ -709,29 +709,29 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou + adler32, = struct.unpack_from('>I', font_data, len(font_data) - 4) + font_data = zlib.decompress(font_data[2:-4], -wbits, usize) + if len(font_data) != usize: +- print 'Font Decompression Error: Uncompressed font size mismatch' ++ print('Font Decompression Error: Uncompressed font size mismatch') + if False: + # For some reason these almost never match, probably Amazon has a + # buggy Adler32 implementation + sig = (zlib.adler32(font_data) & 0xffffffff) + if sig != adler32: +- print 'Font Decompression Error' +- print 'Adler checksum did not match. Stored: %d Calculated: %d' % (adler32, sig) ++ print('Font Decompression Error') ++ print('Adler checksum did not match. Stored: %d Calculated: %d' % (adler32, sig)) + else: +- print "Error Decoding Font", str(err) ++ print("Error Decoding Font", str(err)) + hdr = font_data[0:4] + if hdr == '\0\1\0\0' or hdr == 'true' or hdr == 'ttcf': + ext = '.ttf' + elif hdr == 'OTTO': + ext = '.otf' + else: +- print "Warning: unknown font header %s" % hdr.encode('hex') ++ print("Warning: unknown font header %s" % hdr.encode('hex')) + ext = '.dat' + fontname = "font%05d" % (1+i-beg) + fontname += ext + if (ext == '.ttf' or ext == '.otf') and (fflags & 0x0002): + obfuscate_data.append(fontname) +- print " extracting font: ", fontname ++ print(" extracting font: ", fontname) + outfnt = os.path.join(files.imgdir, fontname) + file(outfnt, 'wb').write(font_data) + imgnames.append(fontname) +@@ -746,7 +746,7 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou + if DEBUG: + data = data[4:] + rescname = "resc%05d.dat" % (1+i-beg) +- print " extracting resource: ", rescname ++ print(" extracting resource: ", rescname) + outrsc = os.path.join(files.imgdir, rescname) + file(outrsc, 'wb').write(data) + imgnames.append(None) +@@ -754,7 +754,7 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou + + if data == EOF_RECORD: + if DEBUG: +- print "Skip section %i as it contains the EOF record." % i ++ print("Skip section %i as it contains the EOF record." % i) + imgnames.append(None) + continue + +@@ -762,16 +762,16 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou + # Get the proper file extension + imgtype = imghdr.what(None, data) + if imgtype is None: +- print "Warning: Section %s contains no image or an unknown image format" % i ++ print("Warning: Section %s contains no image or an unknown image format" % i) + imgnames.append(None) + if DEBUG: +- print 'First 4 bytes: %s' % toHex(data[0:4]) ++ print('First 4 bytes: %s' % toHex(data[0:4])) + fname = "unknown%05d.dat" % (1+i-beg) + outname= os.path.join(files.outdir, fname) + file(outname, 'wb').write(data) + else: + imgname = "image%05d.%s" % (1+i-beg, imgtype) +- print " extracting image: ", imgname ++ print(" extracting image: ", imgname) + outimg = os.path.join(files.imgdir, imgname) + file(outimg, 'wb').write(data) + imgnames.append(imgname) +@@ -781,11 +781,11 @@ def process_all_mobi_headers(files, sect, mhlst, K8Bou + # Process print replica book. + if mh.isPrintReplica() and not k8only: + filenames = [] +- print "Print Replica ebook detected" ++ print("Print Replica ebook detected") + try: + mh.processPrintReplica(files) +- except Exception, e: +- print 'Error processing Print Replica: ' + str(e) ++ except Exception as e: ++ print('Error processing Print Replica: ' + str(e)) + filenames.append(['', files.getInputFileBasename() + '.pdf']) + usedmap = {} + for name in imgnames: +@@ -915,7 +915,7 @@ def unpackBook(infile, outdir): + + # process the PalmDoc database header and verify it is a mobi + sect = Sectionizer(infile) +- print "Palm DB type: ", sect.ident ++ print("Palm DB type: ", sect.ident) + if sect.ident != 'BOOKMOBI' and sect.ident != 'TEXtREAd': + raise unpackException('invalid file format') + +@@ -945,7 +945,7 @@ def unpackBook(infile, outdir): + if (after - before) == 8: + data = sect.loadSection(i) + if data == K8_BOUNDARY: +- print "Mobi Ebook uses the new K8 file format" ++ print("Mobi Ebook uses the new K8 file format") + mh = MobiHeader(sect,i+1) + hasK8 = hasK8 or mh.isK8() + mhlst.append(mh) +@@ -1010,32 +1010,32 @@ class Mobi8Reader: + + + def usage(progname): +- print "" +- print "Description:" +- print " Unpacks an unencrypted Kindle/MobiPocket ebook to html and images" +- print " or an unencrypted Kindle/Print Replica ebook to PDF and images" +- print " into the specified output folder." +- print "Usage:" +- print " %s -r -s -d -h infile [outdir]" % progname +- print "Options:" +- print " -r write raw data to the output folder" +- print " -s split combination mobis into mobi7 and mobi8 ebooks" +- print " -d enable verbose debugging" +- print " -h print this help message" ++ print("") ++ print("Description:") ++ print(" Unpacks an unencrypted Kindle/MobiPocket ebook to html and images") ++ print(" or an unencrypted Kindle/Print Replica ebook to PDF and images") ++ print(" into the specified output folder.") ++ print("Usage:") ++ print(" %s -r -s -d -h infile [outdir]" % progname) ++ print("Options:") ++ print(" -r write raw data to the output folder") ++ print(" -s split combination mobis into mobi7 and mobi8 ebooks") ++ print(" -d enable verbose debugging") ++ print(" -h print this help message") + + + def main(argv=sys.argv): + global DEBUG + global WRITE_RAW_DATA + global SPLIT_COMBO_MOBIS +- print "MobiUnpack 0.47" +- print " Copyright (c) 2009 Charles M. Hannum <root@ihack.net>" +- print " With Additions by P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding." ++ print("MobiUnpack 0.47") ++ print(" Copyright (c) 2009 Charles M. Hannum <root@ihack.net>") ++ print(" With Additions by P. Durrant, K. Hendricks, S. Siebert, fandrieu, DiapDealer, nickredding.") + progname = os.path.basename(argv[0]) + try: + opts, args = getopt.getopt(sys.argv[1:], "hdrs") +- except getopt.GetoptError, err: +- print str(err) ++ except getopt.GetoptError as err: ++ print(str(err)) + usage(progname) + sys.exit(2) + +@@ -1062,16 +1062,16 @@ def main(argv=sys.argv): + + infileext = os.path.splitext(infile)[1].upper() + if infileext not in ['.MOBI', '.PRC', '.AZW', '.AZW4']: +- print "Error: first parameter must be a Kindle/Mobipocket ebook or a Kindle/Print Replica ebook." ++ print("Error: first parameter must be a Kindle/Mobipocket ebook or a Kindle/Print Replica ebook.") + return 1 + + try: +- print 'Unpacking Book...' ++ print('Unpacking Book...') + unpackBook(infile, outdir) +- print 'Completed' ++ print('Completed') + +- except ValueError, e: +- print "Error: %s" % e ++ except ValueError as e: ++ print("Error: %s" % e) + return 1 + + return 0 |