-
Notifications
You must be signed in to change notification settings - Fork 11
/
generate_strings.py
executable file
·152 lines (122 loc) · 6.34 KB
/
generate_strings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/env python
from constants import *
def get_string_variations(string, prefix_postfix_option, acronyms_only_option):
#Names are not case sensitive
string = string.lower()
#Remove junk chars that can't be in the bucket name, e.g & , "
names = remove_junk_chars(string)
#All all sorts of variations of the name
add_with_no_entity(names)
names = add_acronyms(names, acronyms_only_option)
add_with_space_replacements(names)
add_with_prefix_postfix_domains(names, prefix_postfix_option)
#Get the sorted set of names
names = sorted(list(set(names)))
#Return the names
return names
def remove_junk_chars(string):
"""Remove characters that shouldn't or won't be in a bucket name"""
name = string
names = []
#Remove junk chars
junk_chars = ["'", '"', "'", "!"]
for junk_char in junk_chars:
name = name.replace(junk_char, "")
#Remove domains (this can be added later)
domains = [".com", ".org", ".net", ".edu", ".gov"]
for domain in domains:
name = name.replace(domain, "")
#Replace junk char with space so it can be replaced by a replacement char
name = name.replace(","," ")
name = name.replace("."," ")
name = name.replace("&", " and ")
#Remove any duplicate spaces
while " " in name:
name = name.replace(" ", " ")
#Add the name without "and" if it's there (e.g. "Bob & Sue" becomes "Bob and Sue" and "Bob Sue")
names.append(name.strip())
if " and " in name:
names.append(name.replace(" and ", " ").strip())
return names
def add_acronyms(names, acronyms_only_option):
acronyms = []
chomped_strings = []
for name in names:
if len(name.split()) > 1:
if name.startswith("the "):
new_name = name.replace("the ","")
if new_name not in chomped_strings:
chomped_strings.append(new_name)
acronyms.append(get_abbreviated_string(new_name))
acronyms.append(get_abbreviated_string(name))
#Before going any further, be sure there aren't repeats to save memeory
acronyms = list(set(acronyms))
if acronyms_only_option:
if acronyms:
return acronyms
else:
return names
else:
names.extend(acronyms)
return names
def get_abbreviated_string(name):
abbreviated_string = ""
for word in name.split():
abbreviated_string += word[0]
return abbreviated_string
def add_with_no_entity(names):
"""If an entity name, e.g. Inc. or Corp., is in the name, add the name without it"""
chomped_names = []
for name in names:
for entity in entities:
if entity in name:
chomped_names.append(rchop(name, entity).strip())
names.extend(chomped_names)
def add_with_space_replacements(names):
"""Replaces every space in the line with replacements, e.g. -,_, and null"""
space_replaced_names = []
names_to_remove = []
for name in names:
if " " in name:
for space_replacement in space_replacements:
space_replaced_names.append(name.replace(" ",space_replacement).strip())
names_to_remove.append(name)
#Remove all instances of names with spaces
for name_to_remove in names_to_remove:
while name_to_remove in names:
names.remove(name_to_remove)
names.extend(space_replaced_names)
def add_with_prefix_postfix_domains(names, prefix_postfix_option):
'''For every name varient, add prefixes and postfixes, e.g. dev, www, .com, etc
Don't add prefix+postfix or you'll end up with internal-site-dev
'''
names_with_additions = []
for name in names:
#Add prefixes and postixes, SEPARATE so you don't get things like dev.site-internal
for prefix_postfix in prefixes_postfixes:
for prefix_postfix_separator in prefix_postfix_separators:
if prefix_postfix_option == "prefix" or prefix_postfix_option == "both":
names_with_additions.append("{prefix_postfix}{prefix_postfix_separator}{name}".format(prefix_postfix=prefix_postfix, prefix_postfix_separator=prefix_postfix_separator, name=name))
if prefix_postfix_option == "postfix" or prefix_postfix_option == "both":
names_with_additions.append("{name}{prefix_postfix_separator}{prefix_postfix}".format(name=name, prefix_postfix_separator=prefix_postfix_separator, prefix_postfix=prefix_postfix))
#Only add domains if none of them are in the string yet
if not any(domain in name for domain in domains):
for domain in domains:
names_with_additions.append("{name}{domain}".format(name=name, domain=domain))
names_with_additions.append("www.{name}{domain}".format(name=name, domain=domain))
for prefix_postfix in prefixes_postfixes:
for prefix_postfix_separator in prefix_postfix_separators:
#Add as a prefix
if prefix_postfix_option == "prefix" or prefix_postfix_option == "both":
names_with_additions.append("{prefix_postfix}{prefix_postfix_separator}{name}{domain}".format(prefix_postfix=prefix_postfix, prefix_postfix_separator=prefix_postfix_separator, name=name, domain=domain))
names_with_additions.append("{prefix_postfix}{prefix_postfix_separator}www.{name}{domain}".format(prefix_postfix=prefix_postfix, prefix_postfix_separator=prefix_postfix_separator, name=name, domain=domain))
#Add as a postfix
if prefix_postfix_option == "postfix" or prefix_postfix_option == "both":
names_with_additions.append("{name}{domain}{prefix_postfix_separator}{prefix_postfix}".format(name=name, domain=domain, prefix_postfix_separator=prefix_postfix_separator, prefix_postfix=prefix_postfix))
names_with_additions.append("www.{name}{domain}{prefix_postfix_separator}{prefix_postfix}".format(name=name, domain=domain, prefix_postfix_separator=prefix_postfix_separator, prefix_postfix=prefix_postfix))
names.extend(names_with_additions)
def rchop(thestring, ending):
"""Removes the given ending from the end of the string"""
if thestring.endswith(ending):
return thestring[:-len(ending)]
return thestring