forked from instructure/QTIMigrationTool
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfixwinchars.py
executable file
·109 lines (98 loc) · 2.86 KB
/
fixwinchars.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#! /usr/bin/env python
"""Copyright (c) 2008, University of Cambridge.
All rights reserved.
Redistribution and use of this software in source and binary forms
(where applicable), with or without modification, are permitted
provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions, and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions, and the following
disclaimer in the documentation and/or other materials provided with
the distribution.
* Neither the name of the University of Cambridge, nor the names of
any other contributors to the software, may be used to endorse or
promote products derived from this software without specific prior
written permission.
THIS SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE."""
import os, sys, string
CharFixMap={
0x80:"€",
0x82:"‚",
0x83:"ƒ",
0x84:"„",
0x85:"…",
0x86:"†",
0x87:"‡",
0x88:"ˆ",
0x89:"‰",
0x8A:"Š",
0x8B:"‹",
0x8C:"Œ",
0x8E:"Ž",
0x91:"‘",
0x92:"’",
0x93:"“",
0x94:"”",
0x95:"•",
0x96:"–",
0x97:"—",
0x98:"˜",
0x99:"™",
0x9A:"š",
0x9B:"›",
0x9C:"œ",
0x9E:"ž",
0x9F:"Ÿ",
}
def FixFile(fname,asciiMode,forceMode=0):
f=file(fname,"rb")
header=f.read(4)
if len(header)<4:
# ignore very short files
return
if (ord(header[0]) in (0x00, 0xFE, 0xFF,0xEF)) or ord(header[1])==0:
# File probably starts with a BOM or is in a 16-bit wide format, so skip it.
return
if ord(header[0])!=0x3C and not forceMode:
return
data=header+f.read()
f.close()
fix=0
for c in data:
if ord(c)>=0x80 and ord(c)<=0x9F:
fix+=1
elif asciiMode and ord(c)>=0xA0:
fix+=1
if not fix:
return
print "Fixing %i chars in file: %s"%(fix,fname)
output=[]
for c in data:
cout=CharFixMap.get(ord(c),c)
if asciiMode and c==cout and ord(c)>=0x80:
cout="&#x%2X;"%ord(c)
output.append(cout)
if c!=cout:
print "chr(0x%2X) -> %s"%(ord(c),cout)
outStr=string.join(output,'')
f=file(fname,"wb")
f.write(outStr)
f.close()
if __name__ == '__main__':
fileNames=[]
asciiMode=0
for x in sys.argv[1:]:
# check for options here
if x.lower()=="--ascii":
asciiMode=1
else:
fileNames.append(x)
for f in fileNames:
FixFile(f,asciiMode)