-
Notifications
You must be signed in to change notification settings - Fork 1
/
DOCTranslator.cpp
315 lines (249 loc) · 6.68 KB
/
DOCTranslator.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
// DOCTranslator
// Copyright (c) 2014 Markus Himmel <[email protected]>
// This file is distributed under the terms of the MIT license.
#include "DOCTranslator.h"
#include <cstring>
#include <stdio.h>
#include <stdlib.h>
#include <fstream>
#include <Alert.h>
#include <Catalog.h>
#include <File.h>
#include <FindDirectory.h>
#undef B_TRANSLATION_CONTEXT
#define B_TRANSLATION_CONTEXT "DOCTranslator"
// 8859-12 intentionally left out because it does not exist.
const char * const mappings[] = { "8859-1", "8859-2", "8859-3", "8859-4",
"8859-5", "8859-6", "8859-7", "8859-8", "8859-9", "8859-10", "8859-11",
"8859-13", "8859-14", "8859-15", "8859-16", "cp437", "cp850", "cp852",
"cp862", "cp864", "cp866", "cp1250", "cp1251", "cp1252", "koi8-r",
"koi8-u", "MacCyrillic", "MacRoman", "roman", "UTF-8" };
const char * const paper[] = { "a4", "legal", "letter" };
static const translation_format sInputFormats[] = {
{
B_DOC_FORMAT,
B_TRANSLATOR_TEXT,
DOC_IN_QUALITY,
DOC_IN_CAPABILITY,
"application/msword",
"Microsoft Word document"
}
};
static const translation_format sOutputFormats[] = {
{
B_TRANSLATOR_TEXT,
B_TRANSLATOR_TEXT,
TEXT_OUT_QUALITY,
TEXT_OUT_CAPABILITY,
"text/plain",
"Plain text file"
},
{
B_PS_FORMAT,
B_TRANSLATOR_TEXT,
PS_OUT_QUALITY,
PS_OUT_CAPABILITY,
"application/postscript",
"PostScript file"
},
{
B_PDF_FORMAT,
B_TRANSLATOR_TEXT,
PDF_OUT_QUALITY,
PDF_OUT_CAPABILITY,
"application/pdf",
"PDF document"
}
};
static const TranSetting sDefaultSettings[] = {
{ DOC_SETTING_CHARACTER_MAPPING, TRAN_SETTING_INT32, 0},
{ DOC_SETTING_LANDSCAPE, TRAN_SETTING_BOOL, false},
{ DOC_SETTING_PAPER, TRAN_SETTING_INT32, 0},
{ DOC_SETTING_HIDDEN, TRAN_SETTING_BOOL, false},
{ DOC_SETTING_REMOVED, TRAN_SETTING_BOOL, false}
};
const uint32 kNumInputFormats = sizeof(sInputFormats) /
sizeof(translation_format);
const uint32 kNumOutputFormats = sizeof(sOutputFormats) /
sizeof(translation_format);
const uint32 kNumDefaultSettings = sizeof(sDefaultSettings) /
sizeof(TranSetting);
BTranslator*
make_nth_translator(int32 n, image_id you, uint32 flags, ...)
{
if (!n)
{
return new DOCTranslator();
}
else
{
return NULL;
}
}
bool
recognize_type(uint32 type)
{
for (int i = 0; i < kNumOutputFormats; i++)
{
if (type == sOutputFormats[i].type)
{
return true;
}
}
return false;
}
DOCTranslator::DOCTranslator()
:
BaseTranslator(B_TRANSLATE("DOC documents"),
B_TRANSLATE("DOC document translator"),
DOC_TRANSLATOR_VERSION,
sInputFormats, kNumInputFormats,
sOutputFormats, kNumOutputFormats,
"DOCTranslator_Settings",
sDefaultSettings, kNumDefaultSettings,
B_TRANSLATOR_TEXT, B_DOC_FORMAT)
{
}
DOCTranslator::~DOCTranslator()
{
}
int32
msoffice_sig_cmp(const uint8 *bytes)
{
uint8 msoffice_signature[8] = { 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A,
0xE1 };
size_t signature_length = 8;
return ((int32)(memcmp(msoffice_signature, bytes, signature_length)));
}
status_t
identify_msoffice_header(BPositionIO *inSource, translator_info *outInfo)
{
const int32 signatureSize = 8;
uint8 buffer[signatureSize];
if (inSource->Read(buffer, signatureSize) != signatureSize)
{
// Something went wrong reading the signature
return B_NO_TRANSLATOR;
}
if (msoffice_sig_cmp(buffer) != 0)
{
return B_NO_TRANSLATOR;
}
if (outInfo != NULL)
{
outInfo->type = B_DOC_FORMAT;
outInfo->group = B_TRANSLATOR_TEXT;
outInfo->quality = DOC_IN_QUALITY;
outInfo->capability = DOC_IN_QUALITY;
strcpy(outInfo->MIME, "application/msword");
strlcpy(outInfo->name, B_TRANSLATE("Microsoft Word document"),
sizeof(outInfo->name));
}
return B_OK;
}
status_t
DOCTranslator::DerivedIdentify(BPositionIO *source,
const translation_format *inFormat, BMessage *ioExtension,
translator_info *outInfo, uint32 outType)
{
// This only checks if a given file has the header shared by all binary
// Microsoft office formats. I do not think it is easily possible to detect
// Word documents in particular.
return identify_msoffice_header(source, outInfo);
}
status_t
DOCTranslator::DerivedTranslate(BPositionIO *inSource,
const translator_info *inInfo, BMessage *ioExtension,
uint32 outType, BPositionIO *outDestination, int32 baseType)
{
// Note: BaseType will always be -1 because this is a B_TRANSLATOR_TEXT
if (!recognize_type(outType))
{
return B_NO_TRANSLATOR;
}
if (identify_msoffice_header(inSource, NULL) != B_OK)
{
return B_NO_TRANSLATOR;
}
// Reset the cursor as it was progressed while identifying the header.
inSource->Seek(0, SEEK_SET);
// Get the tmp folder name
BPath tmpDir;
if (find_directory(B_SYSTEM_TEMP_DIRECTORY, &tmpDir) != B_OK)
{
return B_ERROR;
}
BString tmpPath;
// Template for a temporary file name
tmpPath.Append(tmpDir.Path()).Append("/DOCTranslator.XXXXXX");
// This is just to get a temporary file name, we use an ofstream
int tempFileHandle = mkstemp(tmpPath.LockBuffer(0));
tmpPath.UnlockBuffer();
if (tempFileHandle == -1)
{
return B_ERROR;
}
// Get rid of the the C style file stream
close(tempFileHandle);
// Read the whole input into a buffer
off_t *bufferSize;
inSource->GetSize(bufferSize);
uint8 fileBuffer[*bufferSize];
inSource->Read(&fileBuffer, *bufferSize);
// And dump it into a file
std::ofstream inputFile;
inputFile.open(tmpPath, ios::out | ios::binary);
if (!inputFile)
{
return B_ERROR;
}
inputFile.write(&fileBuffer, *bufferSize);
inputFile.close();
// Now execute antiword and have the shell dump stdout into a file
BString cmd = BString("antiword ");
cmd << "-m " // Mapping
<< mappings[fSettings->SetGetInt32(DOC_SETTING_CHARACTER_MAPPING)]
<< " ";
if (outType == B_PS_FORMAT)
{
cmd << "-p " << paper[fSettings->SetGetInt32(DOC_SETTING_PAPER)] << " ";
if (fSettings->SetGetBool(DOC_SETTING_LANDSCAPE))
{
cmd << "-L ";
}
}
if (outType == B_PDF_FORMAT)
{
cmd << "-a " << paper[fSettings->SetGetInt32(DOC_SETTING_PAPER)] << " ";
}
if (fSettings->SetGetBool(DOC_SETTING_REMOVED))
{
cmd << "-r ";
}
if (fSettings->SetGetBool(DOC_SETTING_HIDDEN))
{
cmd << "-s ";
}
cmd << tmpPath // Source
<< " > " // Redirect
<< tmpPath << "1"; // Destination is source with appended 1
if (system(cmd.String()) == -1)
{
return B_ERROR;
}
// Read the whole output into a buffer
BFile returned(tmpPath << "1", O_RDONLY);
off_t fileSize;
returned.GetSize(&fileSize);
uint8 outputBuffer[fileSize];
returned.Read(outputBuffer, fileSize);
// And finally feed it to the destination
outDestination->Write(outputBuffer, fileSize);
return B_OK;
}
BView*
DOCTranslator::NewConfigView(TranslatorSettings *settings)
{
return new DOCView(BRect(0, 0, DOC_VIEW_WIDTH, DOC_VIEW_HEIGHT),
B_TRANSLATE("DOCTranslator Settings"), 0, 0, settings);
}