-
Notifications
You must be signed in to change notification settings - Fork 631
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a feature to convert mobi to epub and push it to Amazon.
- Loading branch information
Showing
23 changed files
with
484 additions
and
162 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
122 changes: 122 additions & 0 deletions
122
application/lib/calibre/ebooks/compression/mobi_uncompress.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab | ||
import struct | ||
|
||
class unpackException(Exception): | ||
pass | ||
|
||
class UncompressedReader: | ||
def unpack(self, data): | ||
return data | ||
|
||
class PalmdocReader: | ||
def unpack(self, i): | ||
o, p = b"", 0 | ||
while p < len(i): | ||
# for python 3 must use slice since i[p] returns int while slice returns character | ||
c = ord(i[p : p + 1]) | ||
p += 1 | ||
if c >= 1 and c <= 8: | ||
o += i[p : p + c] | ||
p += c | ||
elif c < 128: | ||
o += bytes([c]) | ||
elif c >= 192: | ||
o += b" " + bytes([c ^ 128]) | ||
else: | ||
if p < len(i): | ||
c = (c << 8) | ord(i[p : p + 1]) | ||
p += 1 | ||
m = (c >> 3) & 0x07FF | ||
n = (c & 7) + 3 | ||
if m > n: | ||
o += o[-m : n - m] | ||
else: | ||
for _ in range(n): | ||
# because of completely ass-backwards decision by python mainters for python 3 | ||
# we must use slice for bytes as i[p] returns int while slice returns character | ||
if m == 1: | ||
o += o[-m:] | ||
else: | ||
o += o[-m : -m + 1] | ||
return o | ||
|
||
|
||
class HuffcdicReader: | ||
q = struct.Struct(b">Q").unpack_from | ||
|
||
def loadHuff(self, huff): | ||
if huff[0:8] != b"HUFF\x00\x00\x00\x18": | ||
raise unpackException("invalid huff header") | ||
off1, off2 = struct.unpack_from(b">LL", huff, 8) | ||
|
||
def dict1_unpack(v): | ||
codelen, term, maxcode = v & 0x1F, v & 0x80, v >> 8 | ||
assert codelen != 0 | ||
if codelen <= 8: | ||
assert term | ||
maxcode = ((maxcode + 1) << (32 - codelen)) - 1 | ||
return (codelen, term, maxcode) | ||
|
||
self.dict1 = list(map(dict1_unpack, struct.unpack_from(b">256L", huff, off1))) | ||
|
||
dict2 = struct.unpack_from(b">64L", huff, off2) | ||
self.mincode, self.maxcode = (), () | ||
for codelen, mincode in enumerate((0,) + dict2[0::2]): | ||
self.mincode += (mincode << (32 - codelen),) | ||
for codelen, maxcode in enumerate((0,) + dict2[1::2]): | ||
self.maxcode += (((maxcode + 1) << (32 - codelen)) - 1,) | ||
|
||
self.dictionary = [] | ||
|
||
def loadCdic(self, cdic): | ||
if cdic[0:8] != b"CDIC\x00\x00\x00\x10": | ||
raise unpackException("invalid cdic header") | ||
phrases, bits = struct.unpack_from(b">LL", cdic, 8) | ||
n = min(1 << bits, phrases - len(self.dictionary)) | ||
h = struct.Struct(b">H").unpack_from | ||
|
||
def getslice(off): | ||
(blen,) = h(cdic, 16 + off) | ||
slice = cdic[18 + off : 18 + off + (blen & 0x7FFF)] | ||
return (slice, blen & 0x8000) | ||
|
||
self.dictionary += list(map(getslice, struct.unpack_from(bytes(">%dH" % n, "latin-1"), cdic, 16))) | ||
|
||
def unpack(self, data): | ||
q = HuffcdicReader.q | ||
|
||
bitsleft = len(data) * 8 | ||
data += b"\x00\x00\x00\x00\x00\x00\x00\x00" | ||
pos = 0 | ||
(x,) = q(data, pos) | ||
n = 32 | ||
|
||
s = b"" | ||
while True: | ||
if n <= 0: | ||
pos += 4 | ||
(x,) = q(data, pos) | ||
n += 32 | ||
code = (x >> n) & ((1 << 32) - 1) | ||
|
||
codelen, term, maxcode = self.dict1[code >> 24] | ||
if not term: | ||
while code < self.mincode[codelen]: | ||
codelen += 1 | ||
maxcode = self.maxcode[codelen] | ||
|
||
n -= codelen | ||
bitsleft -= codelen | ||
if bitsleft < 0: | ||
break | ||
|
||
r = (maxcode - code) >> (32 - codelen) | ||
slice, flag = self.dictionary[r] | ||
if not flag: | ||
self.dictionary[r] = None | ||
slice = self.unpack(slice) | ||
self.dictionary[r] = (slice, 1) | ||
s += slice | ||
return s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
66 changes: 66 additions & 0 deletions
66
application/lib/calibre/ebooks/conversion/plugins/mobi_input.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
__license__ = 'GPL 3' | ||
__copyright__ = '2009, Kovid Goyal <[email protected]>' | ||
__docformat__ = 'restructuredtext en' | ||
|
||
import os | ||
|
||
from calibre.customize.conversion import InputFormatPlugin | ||
from calibre.ebooks import DRMError | ||
|
||
class MOBIInput(InputFormatPlugin): | ||
|
||
name = 'MOBI Input' | ||
author = 'Kovid Goyal' | ||
description = _('Convert MOBI files (.mobi, .prc, .azw) to HTML') | ||
file_types = {'mobi', 'prc', 'azw', 'azw3', 'pobi'} | ||
commit_name = 'mobi_input' | ||
|
||
#执行转换完成后返回生成的 opf 文件路径,只是路径,不包含文件名 | ||
#recipes: 可以为文件名, StringIO, 或一个列表 | ||
#output_dir: 输出目录 | ||
#fs: plumber生成的FsDictStub实例 | ||
#返回 opf文件的全路径名或传入的fs实例 | ||
def convert(self, stream, opts, file_ext, log, output_dir, fs): | ||
self.user = opts.user | ||
self.is_kf8 = False | ||
self.mobi_is_joint = False | ||
|
||
from calibre.ebooks.mobi.reader.mobi6 import MobiReader | ||
from lxml import html | ||
parse_cache = {} | ||
try: | ||
mr = MobiReader(stream, log, opts.input_encoding, opts.debug_pipeline, fs=fs) | ||
if mr.kf8_type is None: | ||
mr.extract_content(output_dir, parse_cache) | ||
except DRMError: | ||
raise | ||
except: | ||
mr = MobiReader(stream, log, opts.input_encoding, | ||
opts.debug_pipeline, try_extra_data_fix=True, fs=fs) | ||
if mr.kf8_type is None: | ||
mr.extract_content(output_dir, parse_cache) | ||
|
||
if mr.kf8_type is not None: | ||
log('Found KF8 MOBI of type %r'%mr.kf8_type) | ||
if mr.kf8_type == 'joint': | ||
self.mobi_is_joint = True | ||
from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader | ||
mr = Mobi8Reader(mr, log, fs=fs) | ||
opf = mr(output_dir) | ||
self.encrypted_fonts = mr.encrypted_fonts | ||
self.is_kf8 = True | ||
return opf | ||
|
||
raw = parse_cache.pop('calibre_raw_mobi_markup', False) | ||
if raw: | ||
if isinstance(raw, str): | ||
raw = raw.encode('utf-8') | ||
fs.write(os.path.join(output_dir, 'debug-raw.html'), raw, 'wb') | ||
from calibre.ebooks.oeb.base import close_self_closing_tags | ||
for f, root in parse_cache.items(): | ||
raw = html.tostring(root, encoding='utf-8', method='xml', | ||
include_meta_content_type=False) | ||
raw = close_self_closing_tags(raw) | ||
fs.write(os.path.join(output_dir, f), raw, 'wb') | ||
#accelerators['pagebreaks'] = '//h:div[@class="mbp_pagebreak"]' | ||
return fs if fs else mr.created_opf_path |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ | |
__copyright__ = '2009, Kovid Goyal <[email protected]>' | ||
__docformat__ = 'restructuredtext en' | ||
|
||
from PIL.Image import isImageType | ||
import os, re, sys, shutil, pprint, json, io, css_parser, logging, traceback | ||
from itertools import chain | ||
from functools import partial | ||
|
@@ -24,7 +25,7 @@ | |
from polyglot.builtins import string_or_bytes | ||
|
||
from filesystem_dict import FsDictStub | ||
from application.utils import get_directory_size | ||
from application.utils import get_directory_size, loc_exc_pos | ||
from application.base_handler import save_delivery_log | ||
|
||
DEBUG_README=b''' | ||
|
@@ -397,7 +398,7 @@ def run(self): | |
self.oeb = self.input_plugin(self.input_, self.opts, self.input_fmt, self.log, tdir, fs) | ||
except Exception as e: | ||
if 'All feeds are empty, aborting.' in str(e): | ||
self.log.warning('Failed to execute input plugin: {}'.format(str(e))) | ||
self.log.warning('Plumber: All feeds are empty, aborting.') | ||
else: | ||
self.log.warning('Failed to execute input plugin: {}'.format(traceback.format_exc())) | ||
fs.clear() | ||
|
@@ -416,11 +417,12 @@ def run(self): | |
# return | ||
self.opts_to_mi(self.opts, self.user_metadata) | ||
if not hasattr(self.oeb, 'manifest'): #从一堆文件里面创建OEBBook实例 | ||
fs.find_opf_path() | ||
try: | ||
self.oeb = create_oebbook(self.log, self.oeb, self.opts, encoding=self.input_plugin.output_encoding, | ||
self.oeb = create_oebbook(self.log, fs, self.opts, encoding=self.input_plugin.output_encoding, | ||
removed_items=getattr(self.input_plugin, 'removed_items_to_ignore', ())) | ||
except Exception as e: | ||
self.log.warning('Failed to create oebbook for recipes: {}'.format(str(e))) | ||
except: | ||
self.log.warning(loc_exc_pos('Failed to create oebbook')) | ||
fs.clear() | ||
return | ||
|
||
|
Oops, something went wrong.