-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathstrutil.py
176 lines (149 loc) · 5.68 KB
/
strutil.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# (C) Copyright 2015-2024 Sei Lisa. All rights reserved.
#
# This file is part of LSL PyOptimizer.
#
# LSL PyOptimizer is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# LSL PyOptimizer is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LSL PyOptimizer. If not, see <http://www.gnu.org/licenses/>.
# String <-> Bytes conversion and output utilities
# Microsoft again not following standards. Sigh.
import codecs
codecs.register(lambda x: codecs.lookup('utf8') if x == 'cp65001' else None)
import sys
python2Narrow = False
if sys.version_info.major >= 3:
unicode = str
unichr = chr
xrange = range
python3 = True
python2 = False
uniwrap = unicode
bytewrap = bytes
def str2u(s, enc=None):
"""Convert a native Python3 str to Unicode. This is a NOP."""
return s
def str2b(s, enc=None):
"""Convert a native Python3 str to bytes, with the given encoding."""
return s.encode(getattr(enc, 'encoding', enc) or 'utf8',
'backslashreplace')
def u2str(s, enc=None):
"""Convert a Unicode string to native Python 3 str. This is a NOP."""
return s
def b2str(s, enc=None):
"""Convert a Bytes string to native Python 3 str."""
return s.decode(getattr(enc, 'encoding', enc) or 'utf8',
'replace')
def any2str(s, enc=None):
"""Convert Bytes or Unicode to native Python 3 str."""
return s if type(s) == str else b2str(s, enc)
else:
unicode = unicode
unichr = unichr
xrange = xrange
python2 = True
python3 = False
uniwrap = unicode
bytewrap = bytearray
def str2u(s, enc=None):
"""Convert a native Python2 str to Unicode."""
return s.decode(getattr(enc, 'encoding', enc) or 'utf8',
'replace')
def str2b(s, enc=None):
"""Convert a native Python2 str to bytes. This is a NOP."""
return s
def u2str(s, enc=None):
"""Convert a Unicode string to native Python 2 str."""
return s.encode(getattr(enc, 'encoding', enc) or 'utf8',
'backslashreplace')
def b2str(s, enc=None):
"""Convert a Bytes string to native Python 2 str. This is a NOP."""
return s
def any2str(s, enc=None):
"""Convert Bytes or Unicode to native Python 2 str."""
return s if type(s) == str else u2str(s, enc)
if len(u'\U00010001') == 2:
# Narrow character build (UTF-16 strings)
# Monkey-patch the relevant functions
python2Narrow = True
_unichr = unichr
_ord = ord
_len = len
def unichr(n):
if not (65536 <= n < 0x110000):
return _unichr(n)
return ('\\U%08X' % n).decode('unicode-escape')
def ord(x):
if isinstance(x, unicode) and _len(x) == 2:
x = unicode(x)
if 0xD800 <= _ord(x[0]) < 0xDC00:
return 65536 + ((_ord(x[0]) & 0x3FF) << 10
| (_ord(x[1]) & 0x3FF))
return _ord(x)
def len(x):
if isinstance(x, unicode):
return _len(x.encode('utf-32le')) >> 2
return _len(x)
# Alas, we can't monkey-patch the unicode class' __getitem__ and
# __getslice__ methods; we need a workaround.
class uniwrap(unicode):
def __getslice__(self, start, stop):
lim = sys.maxint >> 2
if start < 0: start = 0
if stop < 0: stop = 0
if start < lim:
start <<= 2
else:
start = sys.maxint
if stop < lim:
stop <<= 2
else:
stop = sys.maxint
return self.encode('utf-32le')[start:stop].decode(
'utf-32le')
def __getitem__(self, item):
if type(item) == slice:
start = item.start
stop = item.stop
step = item.step
if start is not None:
start <<= 2
if stop is not None:
stop <<= 2
if step is not None:
step <<= 2
return self.encode('utf-32le')[start:stop:step].decode(
'utf-32le')
u = self.encode('utf-32le')
item <<= 2
if item >= _len(u):
return u[item] # raise IndexError, as slicing doesn't
return u[item:(item+4 if item != -4 else None)].decode(
'utf-32le')
def b2u(s, enc=None):
"""Bytes to Unicode"""
return str2u(b2str(s, enc), enc)
def u2b(s, enc=None):
"""Unicode to Bytes"""
return str2b(u2str(s, enc), enc)
def any2b(s, enc=None):
"""Bytes or Unicode to Bytes"""
return s if type(s) == bytes else u2b(s, enc)
def any2u(s, enc=None):
"""Bytes or Unicode to Unicode"""
return s if type(s) == unicode else b2u(s, enc)
def werr(s):
"""Write any string to stderr"""
sys.stderr.write(any2str(s, sys.stderr))
def wout(s):
"""Write any string to stdout"""
sys.stdout.write(any2str(s, sys.stdout))
strutil_used = True