-
Notifications
You must be signed in to change notification settings - Fork 1
/
parser.py
319 lines (248 loc) · 11.6 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
import os
import sys
import re
from bs4 import BeautifulSoup
debug = False
def extract_method_info(soup):
method_section = soup.find('h1')
class_name_tag = method_section.find_next()
class_name = class_name_tag.text.strip()
method_string_tag = class_name_tag.find_next_sibling()
method_string = method_string_tag.text.strip()
return class_name, method_string
def extract_class_and_inheritance(soup):
main_section = soup.find('section', id='main', class_='content mod')
if main_section is None:
return None, []
h1_tag = main_section.find('h1', class_='fqn')
if h1_tag is None:
return None, []
# Extract class name from the h1 tag
class_name = h1_tag.find('a', class_='mod').text.strip()
# Find the <p> tag containing inheritance information
inheritance_p_tag = None
for p_tag in main_section.find_all('p', recursive=False):
if p_tag.get_text(strip=True).startswith('Inherits all methods from:'):
inheritance_p_tag = p_tag
break
if inheritance_p_tag is None:
return class_name, []
# Extract inherited objects from the <p> tag
inherited_objects = [a_tag.text.strip() for a_tag in inheritance_p_tag.find_all('a', class_='mod')]
return class_name, inherited_objects
def extract_arguments(soup):
arguments_section = soup.find('h2', id='arguments')
if arguments_section is None:
return []
arguments_p_tag = arguments_section.find_next_sibling('p')
if arguments_p_tag is None:
return []
dl_elements = arguments_p_tag.find_all('dl')
if not dl_elements:
return []
arguments_list = []
for dl in dl_elements:
dt_element = dl.find('dt')
if dt_element is None:
continue
argument_type = dt_element.find('a')
if argument_type is None:
continue
argument_type = argument_type.text.strip()
argument_name = dt_element.text.replace(argument_type, '').strip()
argument_name, argument_optional, argument_optional_value = extract_argument_name_opt(argument_name)
# special case for function arguments
if argument_type == "function":
argument_name = "func"
# special cases where vararg is used
if argument_type == "...":
argument_name = "..."
argument_type = "any"
# special case where argument name was reserved type
if argument_name == "repeat":
argument_name = "setRepeat"
argument_description = dl.find('dd', class_='docblock')
if argument_description:
argument_description = argument_description.text.strip()
else:
argument_description = ""
if argument_description == "See method description.":
argument_description = ""
arguments_list.append({'type': argument_type, 'name': argument_name, 'description': argument_description, 'optional': argument_optional, 'optional_val': argument_optional_value})
return arguments_list
def extract_argument_name_opt(argument_name):
parts = argument_name.split(' (')
argument_name = parts[0].strip()
argument_optional = False
argument_optional_value = None
if len(parts) > 1:
optional_part = parts[1].strip()
if optional_part.endswith(')'):
optional_part = optional_part[:-1] # Remove the closing bracket
if optional_part.lower() == 'optional':
argument_optional = True
else:
argument_optional = True
argument_optional_value = optional_part
return argument_name, argument_optional, argument_optional_value
def extract_table(soup):
# Extract the header row
header = soup.find('thead')
if header is None:
return []
header_row = header.find_all('th')
if not header_row:
return []
# Extract the column names (keys) from the header
headers = [header.get_text(strip=True) for header in header_row]
# Find all the table rows, skipping the header
rows = soup.find_all('tr')[1:]
if not rows:
return []
row_list = []
for row in rows:
cells = row.find_all('td')
if not cells:
return []
# Ensure the row has the same number of columns as the header
if len(cells) != len(headers):
continue # Skip rows that don't match the header
cell_data = {}
for idx, cell in enumerate(cells):
# Check if the cell contains any <span> elements
if cell.find_all('span'):
cell_data[headers[idx]] = extract_parameters(cell)
else:
cell_data[headers[idx]] = cell.get_text(strip=True)
row_list.append(cell_data)
return row_list
def extract_parameters(cell):
parameters = {}
# Find all <span> tags in the 'cell'
spans = cell.find_all('span')
# Extract the 'title' as the key and text as the value
for span in spans:
title = span.get('title')
text = span.get_text(strip=True)
if title:
parameters[text] = title
return parameters
def extract_return_values(soup):
returns_section = soup.find('h2', id='returns')
if returns_section is None:
return []
returns_p_tag = returns_section.find_next_sibling('p')
if returns_p_tag is None:
return []
dl_elements = returns_p_tag.find_all('dl')
if not dl_elements:
return []
return_values_list = []
for dl in dl_elements:
dt_element = dl.find('dt')
if dt_element is None:
continue
return_type = dt_element.find('a')
if return_type is None:
continue
return_type = return_type.text.strip()
return_name = dt_element.text.replace(return_type, '').strip()
return_description = dl.find('dd', class_='docblock')
if return_description:
return_description = return_description.text.strip()
else:
return_description = ""
if return_description == "See method description.":
return_description = ""
return_values_list.append({'type': return_type, 'name': return_name, 'description': return_description})
return return_values_list
def write_lua_stub(class_name, method_string, arguments_list, table_list, return_values_list, output_directory):
with open(os.path.join(output_directory, class_name + ".lua"), "a") as lua_file:
# Write the parameter definitions
for argument in arguments_list:
lua_file.write("---@param " + argument['name'])
if argument['optional']:
lua_file.write("?")
lua_file.write(" " + argument['type'])
if argument['optional']:
lua_file.write(" Default value: (" + argument['optional_val'] + ")")
lua_file.write(" " + argument['description'] + "\n")
# Hard code table parsing for register events for now (function overload definitions)
if(class_name == "Global") and re.match(r"^Register\w+Event$", method_string):
for row in table_list:
# Only create function overloads for properly formatted parameter data
if 'Parameters' in row and isinstance(row['Parameters'], dict):
lua_file.write("---@overload fun(event: " + row['ID'] + ", func: fun(")
# Loop through parameters and check for 'event', we want to define this as the event ID and not a type
params = []
for key, value in row['Parameters'].items():
if key == 'event':
params.append(f'{key}: {row["ID"]}')
else:
params.append(f'{key}: {value}')
# Join the parameters and write to the file
lua_file.write(', '.join(params))
lua_file.write("), shots?: number): function\n")
# Write the return value definitions
for return_value in return_values_list:
lua_file.write("---@return " + return_value['type'] + " " + return_value['name'] + " " + return_value['description'] + "\n")
# Append the function signature
lua_file.write("function ")
if(class_name != "Global"):
lua_file.write(class_name + ":")
lua_file.write(method_string + "(")
for i, argument in enumerate(arguments_list):
lua_file.write(argument['name'])
if i < len(arguments_list) - 1:
lua_file.write(", ")
lua_file.write(") end\n\n")
global debug
if debug:
print(f"'{method_string}' stubs appended to '{class_name}.lua'")
def write_lua_class(class_name, inherited_objects, output_directory):
with open(os.path.join(output_directory, class_name + ".lua"), "w") as lua_file:
lua_file.write(f"---@meta\n\n")
if class_name != "Global":
lua_file.write(f"---@class {class_name}")
if inherited_objects:
lua_file.write(f": {', '.join(inherited_objects)}")
lua_file.write(f"\n{class_name} = {{}}\n\n")
global debug
if debug:
print(f"'{class_name}' class information appended to '{class_name}.lua'")
def process_html_file(html_file, filename, output_directory):
with open(html_file, "r") as f:
html = f.read()
soup = BeautifulSoup(html, 'html.parser')
# Special case for class definition files
if filename == "index.html":
class_name, inherited_objects = extract_class_and_inheritance(soup)
write_lua_class(class_name, inherited_objects, output_directory)
return
# Otherwise process like normal
class_name, method_string = extract_method_info(soup)
arguments_list = extract_arguments(soup)
table_list = extract_table(soup)
return_values_list = extract_return_values(soup)
write_lua_stub(class_name, method_string, arguments_list, table_list, return_values_list, output_directory)
def main():
# Check if command-line arguments are provided
if len(sys.argv) < 3:
print("Usage: python parser.py <html_input_directory> <output_directory> <debug=false>")
sys.exit(1)
directory = sys.argv[1]
output_directory = sys.argv[2]
# Debug (Optional) - Default to false if not provided
if(sys.argv[3]):
global debug
debug = True if sys.argv[3].lower() == 'true' else False
# Special case to process our class definitions
html_file = os.path.join(directory, "index.html")
process_html_file(html_file, "index.html", output_directory)
# Iterate through HTML files in the directory
for filename in os.listdir(directory):
if filename.endswith(".html") and filename != "index.html":
html_file = os.path.join(directory, filename)
process_html_file(html_file, filename, output_directory)
if __name__ == "__main__":
main()