-
Notifications
You must be signed in to change notification settings - Fork 0
/
transliterate.go
60 lines (55 loc) · 1.68 KB
/
transliterate.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
package inflect
import "regexp"
var (
transliterations = map[string]*regexp.Regexp{
"A": regexp.MustCompile(`À|Á|Â|Ã|Ä`),
"AA": regexp.MustCompile(`Å`),
"AE": regexp.MustCompile(`Æ`),
"C": regexp.MustCompile(`Ç`),
"E": regexp.MustCompile(`È|É|Ê|Ë`),
"D": regexp.MustCompile(`Ð`),
"I": regexp.MustCompile(`Ì|Í|Î|Ï`),
"L": regexp.MustCompile(`Ł`),
"N": regexp.MustCompile(`Ñ`),
"O": regexp.MustCompile(`Ò|Ó|Ô|Õ|Ö`),
"OE": regexp.MustCompile(`Œ|Ø`),
"Th": regexp.MustCompile(`Þ`),
"U": regexp.MustCompile(`Ù|Ú|Û|Ü`),
"Y": regexp.MustCompile(`Ý`),
"a": regexp.MustCompile(`à|á|â|ã|ä`),
"aa": regexp.MustCompile(`å`),
"ae": regexp.MustCompile(`æ`),
"c": regexp.MustCompile(`ç`),
"e": regexp.MustCompile(`è|é|ê|ë`),
"i": regexp.MustCompile(`ì|í|î|ï`),
"n": regexp.MustCompile(`ñ|ń`),
"o": regexp.MustCompile(`ò|ó|ô|õ|ö|ō`),
"oe": regexp.MustCompile(`œ|ø`),
"s": regexp.MustCompile(`ś`),
"ss": regexp.MustCompile(`ß`),
"u": regexp.MustCompile(`ù|ú|û|ü|ũ|ū|ŭ|ů|ű|ų`),
"y": regexp.MustCompile(`ý|ÿ`),
"z": regexp.MustCompile(`ż`),
"d": regexp.MustCompile(`ð`),
"l": regexp.MustCompile(`ł`),
"th": regexp.MustCompile(`þ`),
}
)
// Transliterate replaces non-ASCII characters with an ASCII approximation, or if none exists, to “?”.
func Transliterate(word string) string {
for repl, regex := range transliterations {
word = regex.ReplaceAllString(word, repl)
}
var safe string
for _, r := range word {
if isAscii(r) {
safe += string(r)
} else {
safe += "?"
}
}
return safe
}
func isAscii(s rune) bool {
return int(s) >= 32 && int(s) <= 126
}