-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
1751 lines (1253 loc) · 62.4 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python
# coding: utf-8
# Data management imports
# In[ ]:
import pandas as kungfupanda # pandas for data manipulation and markdown
from pandas import DataFrame # export
import argparse # For command line arguments when calling py script with flags
import pickle # for saving/loading json records and file
# modification date history
import json
from questionDataclass import questionDataclass as Question
# OS and directory management imports
# In[ ]:
from os import listdir # for file retrieval and path calculations
from os.path import isfile, join
from os import stat
from os.path import isdir # for creation of topic markdown folder if
from os import mkdir # not present
from os import getcwd # gets current working DIR for calculating git
# root of submissions folder
from os import chdir # for changing the working directory to ensure
from os.path import abspath, dirname # relative paths used are from this script's
import sys # location rather than the calling location
# e.g. if you call `python someFolder/main.py`
# then it will still work.
# Environment variable imports + file log and creation time imports
# In[ ]:
from os import getenv, environ # for environment variables
from dotenv import load_dotenv, find_dotenv # for config purposes (.env file)
import subprocess # tracing git log history for ctimes and mtimes
# In[ ]:
from os.path import getmtime, getctime # retreiving file creation/modification times
from datetime import datetime, timedelta
import time
# QOL and anti-reundancy imports
# In[ ]:
from typing import Set, Dict, List, Tuple # misc. QOL imports
from collections import defaultdict
from icecream import ic # for debugging / outputs
import re # for regex file name matching / question number matching
from functools import cache # for redundancy protection
# TQDM import done separately below after checking if this is a .py or .ipynb file
# # Script Configuration
# #### `.env` variables and `working directories`
# 1. Loads `env` variables for reference.
# 1. Tries to retrieve it from `../` if found (prioritizing template).
# 2. If failure, use the `.env` found in the current script directory (in the updater).
# 2. If is a script run, denotes it as such for script flag references and ensures working directory is the script's location rather than the calling directory.
# In[ ]:
# loading env variables
print('Default .env activated from script directory (.readme_updater/)')
load_dotenv(find_dotenv(), override=True)
if '.env' in listdir('../') :
print('.env found in ../ directory. Overriding default...')
load_dotenv(find_dotenv('../.env'), override=True)
# In[ ]:
# NOTE: if the script is being run from a jupyter notebook, then it should
# already be in the correct directory.
IS_NOTEBOOK = True
try:
if 'ipykernel' not in sys.modules:
print('Working directory being set to script location.')
IS_NOTEBOOK = False
chdir(dirname(abspath(__file__)))
else :
print('Working directory already set to script location. No need for adjustment')
except NameError:
print('NameError')
pass
# In[ ]:
# TQDM import based off if current running script is a jupyter notebook
# or a python script
if IS_NOTEBOOK :
print('Importing tqdm.notebook')
from tqdm.notebook import tqdm
else :
print('Importing tqdm (non-notebook)')
from tqdm import tqdm
# In[ ]:
# README_ABS_DIR will get confirmed in if name==main prior to running
README_ABS_DIR = getcwd().replace('\\', '/')
NOTEBOOK_ABS_DIR = README_ABS_DIR
MAIN_DIRECTORY = NOTEBOOK_ABS_DIR[NOTEBOOK_ABS_DIR.rfind('/')+1:]
print(f'{NOTEBOOK_ABS_DIR = }')
# In[ ]:
README_PATH = getenv('README_PATH')
LEETCODE_PATH_FROM_README = getenv('QUESTIONS_PATH_FROM_README')
LEETCODE_PATH_REFERENCE = join(README_PATH, LEETCODE_PATH_FROM_README)
# ## Helper Methods
#
# AddCase $\rightarrow$ takes information for a new question file and formats it accordingly for a row.
#
# UpdateLanguage $\rightarrow$ if a question already has a solution, this is called instead to insert the new file link to the existing row details.
# In[ ]:
# Categories besides those in lists
PRIMARY_CATEGORIES = set(['Daily', 'Weekly Premium', 'Contest', 'Favourite'])
_oldest_date = datetime.now()
# _oldest_date = datetime(2024, 7, 23)
# In[ ]:
def individualCTimeViaGit(cmd: List[str]) -> Tuple[datetime, datetime] :
process = subprocess.Popen(cmd,
shell=False,
stdin=None,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
result = process.stdout.readlines()
modifiedTimes = []
for line in result:
temp = line.decode("utf-8").replace('\n', '')
# In case of a redundant '\n' at the end of an output
if temp :
modifiedTimes.append(temp)
# Debugging
if '1404' in cmd[-1] :
print(f'{cmd = }')
print(f'{modifiedTimes = }')
print(f'{result = }')
try :
creationDate = datetime.strptime(time.ctime(int(min(modifiedTimes))), '%a %b %d %H:%M:%S %Y')
modifiedDate = datetime.strptime(time.ctime(int(max(modifiedTimes))), '%a %b %d %H:%M:%S %Y')
except ValueError as ve:
print(f'Error in parsing {path}')
print(f'{modifiedTimes}')
print(ve)
exit()
return (creationDate, modifiedDate)
# In[ ]:
_ALL_GIT_CM_TIMES = {}
def getAllCTimesViaGit(paths: List[str]) -> Dict[str, Tuple[datetime, datetime]] :
'''
WARNING: DO NOT USE LOCALLY. SLOW IF RAN LOCALLY.
GITHUB ACTIONS ARE ABLE TO PERFORM THIS QUICKLY (~10s for the script for ~700 files)
BUT A LOCAL RUN OF `-g` CAN TAKE UPWARDS OF 10 MINUTES FOR THE SAMEN NUMBER OF FILES.
To avoid having to constantly swap directories, this function parses all the ctimes and mtimes
in one block of time. This gets activated with the `-g` flag. Default otherwise is to use the
regular `getctime` and `getmtime` functions locally which is much much faster. This only exists
to compensate for the inability for ctime and mtime checking with git actions.
'''
print(f'Beginning parsing of git logs for file creation and modification dates...')
print(f'Script path: {getcwd() = }')
chdir('../')
print(f'README path: {getcwd() = }')
cmd = r"git log -M --format=%ct --reverse --".split()
# cmd = r"git log -M --follow --format=%ct --reverse --".split()
# cmd = r"git log --follow --format=%ct --reverse --".split()
output = {}
oldest_date = datetime.now()
# if getenv("GITHUB_ACTIONS") == 'true' :
# print('\n')
# print('/' + '=' * 15 + '\\')
# print(' ', end='')
# pqBarsPrinted = 0
# for i, path in enumerate(paths) :
# path = join(LEETCODE_PATH_FROM_README, path)
# output[path] = individualCTimeViaGit(cmd + [path])
# if output[path][0] < oldest_date :
# oldest_date = output[path][0]
# curChunk = int((i / len(paths)) * 15)
# if curChunk > pqBarsPrinted :
# print('=' * (curChunk - pqBarsPrinted), end='')
# pqBarsPrinted = curChunk
# print((15 - pqBarsPrinted) * '=', '\n\n')
# else :
with tqdm(total=len(paths)) as pbar :
# with tqdm(total=len(paths), position=0, leave=True) as pbar :
for i, path in enumerate(paths) :
path = join(LEETCODE_PATH_FROM_README, path)
output[path] = individualCTimeViaGit(cmd + [path])
if output[path][0] < oldest_date :
oldest_date = output[path][0]
pbar.update(1)
global _oldest_date
_oldest_date = oldest_date.replace(hour=0, minute=0, second=0, microsecond=0)
# Usually I'd avoid using global for this but this is a personal project so it should be fine.
_ALL_GIT_CM_TIMES.update(output)
print(f'{_ALL_GIT_CM_TIMES = }')
chdir(MAIN_DIRECTORY)
return output
# In[ ]:
@cache
def getCtimesMtimesGitHistory(path: str) -> Tuple[datetime, datetime] :
'''
WARNING: DO NOT USE LOCALLY. SLOW IF RAN LOCALLY RELATIVE TO THE REGULAR CTIME FUNCTION.
IF RUNNING LOCALLY, RUN (getCtimeMtimesMain) I.E. WITHOUT THE `-g` FLAG.
The cost for a single file isn't significant however when you reach ~100+ files,
the cumulative wait can go into the minutes compared to the seconds it would take
with the regular `getctime` and `getmtime` functions (without the `-g` flag)
'''
path = path[path.find('/') + 1:]
chdir('../')
cmd = r"git log --follow --format=%ct --reverse --".split() + [f'{path}']
cmDates = individualCTimeViaGit(cmd)
chdir(MAIN_DIRECTORY)
return cmDates
# In[ ]:
USE_GIT_DATES = False
@cache
def getCtimeMtimesMain(path: str) -> Tuple[datetime, datetime] :
'''
Returns the a tuple containing the datetime objs of
(create date and time, modification date and time)
@param useGitDates: bool = False
If true, it will track the creation/modification dates of the file
according to the git history. This is mainly to counter the issue in
GitHub actions where the file creation date is the time of the action.
'''
if USE_GIT_DATES :
return getCtimesMtimesGitHistory(path)
creation_date = time.ctime(getctime(path))
modification_date = time.ctime(getmtime(path))
creation_date = datetime.strptime(creation_date, "%a %b %d %H:%M:%S %Y")
modification_date = datetime.strptime(modification_date, "%a %b %d %H:%M:%S %Y")
# I've sometimes encountered weird meta data issues so just as a precaution
if creation_date > modification_date :
return (modification_date, creation_date)
return (creation_date, modification_date)
# In[ ]:
def getCtimeMtimes(path: str, *, preCalculated: Dict[str, Tuple[datetime, datetime]] = None) -> Tuple[datetime, datetime] :
# Due to readme realtive and script relative paths
readme_path = path if ('../' not in path) else path[path.find('../') + len('../'):]
if _ALL_GIT_CM_TIMES and readme_path in _ALL_GIT_CM_TIMES :
return _ALL_GIT_CM_TIMES[readme_path]
if preCalculated and readme_path in preCalculated :
return preCalculated[readme_path]
return getCtimeMtimesMain(path)
# In[ ]:
def addCase(level: str,
number: int,
title: str,
categories: Set[str],
language: str,
notebook_path: str,
readme_path: str,
fileLatestTimes: dict,
contestTitle: str=None,
contestQNo: str=None) -> dict :
'''
Takes the data found on a question not encountered before and
converts it into a callable dictionary with all the relevant
information
### Parameters (Required) :
level : str
Difficulty indicator of the question (e, m, h)
number : int
The official LeetCode question number
title : str
The title of the question (colloquial name)
categories : Set[str]
The categories the question falls under (e.g. Contest, Daily, etc.)
language : str
The programming language used to solve the question
notebook_path : str
The path from the main.py/ipynb script to the code file in question
readme_path : str
The path from the README.md file to be exported to the code file in question
fileLatestTimes : dict
A dictionary containing the latest modification times of all files
in the repository
### Parameters (Optional) :
contestTitle : str
The title of the contest the question was a part of if applicable
contestQNo : str
The question number in the contest if applicable (e.g. q1, q2, etc.)
### Returns :
output : dict
A dictionary containing all the relevant information for the question
to be used in the final output
'''
creation_date, modification_date = getCtimeMtimes(notebook_path)
fileLatestTimes[readme_path] = modification_date
try :
fileSize = stat(notebook_path).st_size
except FileNotFoundError as fnfe :
fileSize = 0
print(fnfe)
if not categories :
categories = set()
match level[0].lower() :
case 'e' :
level = 'Easy'
case 'm' :
level = 'Medium'
case 'h' :
level = 'Hard'
case _ :
level = 'Unknown'
output = {
'level': level,
'number': number,
'title': title,
'categories': categories,
'contestTitle': contestTitle,
'contestQNo': contestQNo,
'date_done': creation_date, # First time completed
'date_modified': modification_date, # Most recent date
'solution': '',
'solutions': {language: [readme_path]},
'languages': set([language]),
'bytes': fileSize
}
return output
# In[ ]:
def updateQuestion(orig: dict,
*,
language: str,
categories: Set[str],
notebook_path: str,
readme_path: str,
fileLatestTimes: dict,
contestTitle: str=None,
contestQNo: str=None) -> dict :
'''
Takes question data of a question that's already been encountered and
updates the relevant dictionary with the new information found. Similar
to addCase but for questions that have already been encountered.
### Parameters :
orig : dict
The original dictionary containing all the relevant information from previous encounters
All other parameters are the same as addCase and are optional in order to update them.
### Returns :
orig : dict
The updated dictionary containing all the relevant information from previous encounters
'''
# Another question file found
if language and language not in orig['languages'] :
orig['languages'].add(language)
if contestTitle :
orig['contestTitle'] = contestTitle
if contestQNo :
orig['contestQNo'] = contestQNo
if categories :
orig['categories'] |= categories
if notebook_path and readme_path :
creation_date, modification_date = getCtimeMtimes(notebook_path)
if creation_date < orig['date_done'] :
orig['date_done'] = creation_date
if modification_date > orig['date_modified'] :
orig['date_modified'] = modification_date
fileLatestTimes[readme_path] = modification_date
if language not in orig['solutions'] :
orig['solutions'][language] = []
orig['solutions'][language].append(readme_path)
try :
fileSize = stat(notebook_path).st_size
except FileNotFoundError as fnfe :
fileSize = 0
print(fnfe)
orig['bytes'] += fileSize
return orig
# # Pickle Processes
#
# In[ ]:
QUESTION_DATA_FOLDER = join(getenv('SUBMODULE_DATA_PATH'), getenv('LEETCODE_QUESTION_DETAILS'))
SUBMODULE_DATA_PATH = getenv('SUBMODULE_DATA_PATH')
@cache
def retrieveQuestionDetails() -> dict :
'''
Retrieves the question details (i.e. title, acRates, difficulties, etc.) from
the relevant `.pkl` file containing parsed official LeetCode json data.
### Returns :
questionDetailsDict : dict[int, details]
A dictionary containing the question details matched to the question's assigned number
'''
print(f'{getcwd() = }')
print(f'{listdir(SUBMODULE_DATA_PATH)}')
print(f'Question details path: {QUESTION_DATA_FOLDER = }')
if not isfile(QUESTION_DATA_FOLDER) :
print('\nError in parsing official question data. leetcode.pkl not found. Exiting...')
print()
exit()
else :
print('\nFiles found. Importing now...\n')
# schema: key=int(questionNumber) val=(title, titleSlug, paidOnly, difficulty, acRate)
with open(join(QUESTION_DATA_FOLDER), 'rb') as fp:
questionDetailsDict = pickle.load(fp)
return questionDetailsDict
# In[ ]:
HISTORY_PATH = join(getenv('USER_DATA_PATH'), getenv('FILE_MODIFICATION_NAME'))
# In[ ]:
def writeRecentFileTimes(fileLatestTimes: dict) -> bool :
'''Pickles the newly found most recent modification times of each question for reference in future runs'''
with open(HISTORY_PATH, 'wb') as fp:
pickle.dump(fileLatestTimes, fp)
return True
# In[ ]:
def getRecentFileTimes() -> dict :
'''Retrieves the pickled data from previous cases of `writeRecentFileTimes()`'''
if isfile(HISTORY_PATH) :
with open(HISTORY_PATH, 'rb') as fp:
return pickle.load(fp)
return {}
# # Daily and Weekly Challenges
# In[ ]:
DAILIES_DATA_PATH = join(getenv('SUBMODULE_DATA_PATH'), getenv('DAILIES_FOLDER'), getenv('DAILIES_FILE'))
WEEKLIES_DATA_PATH = join(getenv('SUBMODULE_DATA_PATH'), getenv('DAILIES_FOLDER'), getenv('WEEKLIES_FILE'))
# In[ ]:
def getDailies(firstDate: datetime = None) -> List[Tuple[datetime, int]] :
'''
Retrieves the daily questions from the official LeetCode json data
and returns them as a set of strings
### Returns :
dailies : List[Tuple[date, questionNo]]
'''
if not firstDate :
global _oldest_date
firstDate = _oldest_date
print('Oldest date found:', firstDate)
with open(DAILIES_DATA_PATH, 'rb') as fp:
dailies = json.load(fp)
output = []
for k in list(dailies.keys()) :
newK = datetime.strptime(k, '%Y-%m-%d')
if newK < firstDate :
continue
output.append((newK, int(dailies[k]['question']['questionFrontendId'])))
# print(f'{firstDate = }')
# print(f'dailies: {output = }')
return sorted(output, key=lambda x: x[0], reverse=True)
# In[ ]:
def getWeeklies(firstDate: datetime = None) -> List[Tuple[datetime, int]] :
'''
Retrieves the weekly premium questions from the official LeetCode json data
and returns them as a set of strings
### Returns :
weeklies : List[Tuple[date, questionNo]]
'''
if not firstDate :
global _oldest_date
firstDate = _oldest_date
print('Oldest date found:', firstDate)
with open(WEEKLIES_DATA_PATH, 'rb') as fp:
weeklies = json.load(fp)
output = []
for k in list(weeklies.keys()) :
newK = datetime.strptime(k, '%Y-%m-%d')
if newK < firstDate :
continue
output.append((newK, int(weeklies[k]['question']['questionFrontendId'])))
return sorted(output, key=lambda x: x[0], reverse=True)
# In[ ]:
# # NOTE: TESTING
# temp = getWeeklies(datetime(2022, 1, 1))
# ic(temp)
# In[ ]:
def parseQuestionsForDailies(questionData: dict) -> Dict[int, Question] :
'''
Parses the official LeetCode json data for the daily and weekly premium questions
and returns them as a dictionary of question numbers to question objects
### Returns :
dailies : Dict[int, Question]
A dictionary containing the daily questions as question objects
weeklies : Dict[int, Question]
A dictionary containing the weekly premium questions as question objects
'''
dailies = getDailies()
dailiesDict = {}
# I have ~12 hours of leeway due to potential to forget to commit
for date, qNo in dailies :
# if qNo == 1334 :
# print('\n\nhi')
# print(f'{bool(qNo in questionData) = }')
# print(f"{questionData[qNo]['date_done'] = }")
# print(f"{date + timedelta(days=1, hours=12) = }")
if qNo in questionData and questionData[qNo]['date_done'] <= date + timedelta(days=1, hours=12) :
dailiesDict[date] = questionData[qNo].copy()
dailiesDict[date]['date_done'] = date
questionData[qNo]['categories'].add('Daily')
# print(f'{dailiesDict[date] = }')
# print(f'{dailiesDict = }')
return dailiesDict
# In[ ]:
def parseQuestionsForWeeklies(questionData: dict) -> Dict[int, Question] :
'''
Parses the official LeetCode json data for the daily and weekly premium questions
and returns them as a dictionary of question numbers to question objects
### Returns :
dailies : Dict[int, Question]
A dictionary containing the daily questions as question objects
weeklies : Dict[int, Question]
A dictionary containing the weekly premium questions as question objects
'''
weeklies = getWeeklies()
weekliesDict = {}
# I gave ~1 day of leeway for the weeklies in case you forget to commit
for date, qNo in weeklies :
if qNo in questionData and questionData[qNo]['date_done'] <= date + timedelta(days=8) :
weekliesDict[date] = questionData[qNo].copy()
weekliesDict[date]['date_done'] = date
questionData[qNo]['categories'].add('Weekly Premium')
return weekliesDict
# # Parsing Files
# Question file parsing occurs here. It organizes it into 3 different lists, separated by difficulty and sorted by question number afterwards.
# In[ ]:
# Parse one leetcode answer file in the submissions folder
def parseCase(leetcodeFile: str, # file name
questionData: dict, # dictionary of question data
fileLatestTimes: dict,
reprocessMarkdown: set,
*,
questionDetailsDict: dict = retrieveQuestionDetails(),
subFolderPath: str = '',
altTitle: str = '',
contest: str = None,
contestQNo: str = None) -> bool:
path = join(LEETCODE_PATH_FROM_README, subFolderPath, leetcodeFile).replace("\\", "/")
try :
number = int(re.search("\d{1,4}", leetcodeFile).group()) # Takes the first full number as the question
level = questionDetailsDict[number].level # number and uses that as reference
# e.g. 'e123 v1.py' becomes 123
except (AttributeError) as ae :
print(f'Error in parsing {leetcodeFile}: {ae.name} encountered while trying to extract question number int(...).',
'\nparseCase(...)',
'\nSkipping')
return False
except (KeyError) as ke :
print(f'Error in parsing {leetcodeFile}: {ke} encountered while trying to extract question level from questionDetailsDict.',
'\nparseCase(...)',
'\nAttempting to pull from the name...')
level = leetcodeFile[0].lower()
if level in ['e', 'm', 'h'] :
print(f'Level found: {level}')
else :
print(f'Level not found. Defaulting to "Unknown"')
level = 'Unknown'
creationtime, modificationtime = getCtimeMtimes(join(README_PATH, path))
if path not in fileLatestTimes or max(creationtime, modificationtime) > fileLatestTimes[path] :
reprocessMarkdown.add(number)
fileLatestTimes[path] = max(creationtime, modificationtime)
if number in questionDetailsDict :
title = f'[{questionDetailsDict[number].title}](<https://leetcode.com/problems/{questionDetailsDict[number].slug}>)'
else :
title = f'Question {number}'
categories = set()
language = leetcodeFile[leetcodeFile.rfind('.') + 1:]
if len(altTitle) > 0 :
title = altTitle + ' - ' + title
# Question is from a contest folder
if contest :
temp = re.findall('q\d{1}', leetcodeFile) # Checking if file name has a question number (e.g. q1 of the contest)
if not len(temp) == 0 :
contestQNo = temp[0]
categories.add('Contest')
# for cat in PRIMARY_CATEGORIES :
# if cat.lower() in leetcodeFile.lower() :
# categories.add(cat)
if number in questionData : # If solution already found for this question
questionData[number] = updateQuestion(questionData[number],
language=language,
categories=categories,
notebook_path=join(README_PATH, path),
readme_path=path,
contestTitle=contest,
contestQNo=contestQNo,
fileLatestTimes=fileLatestTimes)
else :
questionData[number] = addCase(level=level,
number=number,
title=title,
categories=categories,
language=language,
notebook_path=join(README_PATH, path),
readme_path=path,
contestTitle=contest,
contestQNo=contestQNo,
fileLatestTimes=fileLatestTimes)
return True
# In[ ]:
@cache
def getCodeFiles() -> List[str] :
return [x for x in listdir(LEETCODE_PATH_REFERENCE) if isfile(join(LEETCODE_PATH_REFERENCE, x))
and not x.endswith('.txt')
and not x.endswith('.md')
and not x.endswith('.gitignore')
and '.' in x]
@cache
def getContestFolders() -> List[str] :
return [x for x in listdir(LEETCODE_PATH_REFERENCE) if not isfile(join(LEETCODE_PATH_REFERENCE, x))]
@cache
def getContextFiles(contestFolders: List[str] = getContestFolders()) -> List[str] :
output = [x for x in listdir(LEETCODE_PATH_REFERENCE) if isfile(join(LEETCODE_PATH_REFERENCE, x))
and (x.endswith('.txt')
or x.endswith('.md')
or '.' not in x)
and not x.endswith('.gitignore')]
for folder in contestFolders :
output.extend([join(folder, y) for y in listdir(join(LEETCODE_PATH_REFERENCE, folder))
if isfile(join(LEETCODE_PATH_REFERENCE, folder, y))
and (y.endswith('.txt')
or y.endswith('.md')
or '.' not in y)
and not y.endswith('.gitignore')])
return output
def getContestFiles(contestFolders: List[str]) -> List[Tuple[str, str]] :
contestLeetcodeFiles = []
for contestFolder in contestFolders :
contestLeetcodeFiles.extend([(contestFolder, fileName) for fileName in listdir(join(LEETCODE_PATH_REFERENCE, contestFolder))
if isfile(join(LEETCODE_PATH_REFERENCE, contestFolder, fileName))
and not fileName.endswith('.txt')
and not fileName.endswith('.md')
and '.' in fileName
and not fileName.endswith('.gitignore')])
return contestLeetcodeFiles
# # Sort TXT Context
# If .txt notes are placed, this adds them to their respective entry.
# In[ ]:
def parseContextFiles(txtFiles: str,
questionData: dict,
fileLatestTimes: dict,
reprocessMarkdown: Set[int]) -> None:
for fileName in txtFiles :
# print(f'Context file found: {fileName}')
try :
if '\\' in fileName :
number = int(re.search("\d{1,4}", fileName[fileName.rfind('\\'):]).group())
elif '/' in fileName :
number = int(re.search("\d{1,4}", fileName[fileName.rfind('/'):]).group())
else :
number = int(re.search("\d{1,4}", fileName).group())
except AttributeError as ae :
print(f'Error in parsing {fileName}: Attribute Error encountered while trying to extract question number int(...).',
'\nparseContextFiles(...)',
'\nSkipping')
continue
if number not in questionData :
print(f'Error. No question solution found for context file ({fileName = })')
continue
questionData[number]['contextFile'] = join(LEETCODE_PATH_FROM_README, fileName)
path = join(LEETCODE_PATH_REFERENCE, fileName)
creationtime, modificationtime = getCtimeMtimes(path)
if path not in fileLatestTimes or max(creationtime, modificationtime) > fileLatestTimes[path] :
fileLatestTimes[path] = max(creationtime, modificationtime)
reprocessMarkdown.add(number)
# # List-Based Categories
# Updating `Category` columns based on the lists in the `Lists` directory.
# In[ ]:
LISTSDIR = getenv('LISTS_LOCATION')
@cache
def getLists() -> List[str] :
listFileNames = [x for x in listdir(LISTSDIR) if isfile(join(LISTSDIR, x))
and not x.startswith('.')
and not x == 'README.md']
print(listFileNames)
return listFileNames
# In[ ]:
''' Format for lists file is as follows:
[Question #]. [Question Name]
[Easy, Med., Hard]
Topic1
Topic2
Topic3
...
'''
@cache
def getList(fileName, filePath) -> Set[int] :
output = set() # can change to dict later if we want to output category info
count = 0
with open(filePath, 'r') as file :
lines = file.readlines()
for line in lines :
if re.match(r'\d{1,4}\.', line) :
count += 1
output.add(int(line[:line.find('.')]))
return output
# In[ ]:
def processListData(questionData: dict,
*,
listFileNames: List[str] = getLists()) -> dict :
listData = {}
for file in listFileNames :
listData[file] = getList(file, join(LISTSDIR, file))
for q in listData[file] :
if q in questionData :
questionData[q]['categories'].add(file)
# print(listData)
return listData
# # Question Topic Grouping
# Parses the questions in `questionData` and adds their numbers to appropriate lists so that they can be parsed into their own lists as well as counted.
# In[ ]:
def getCompletedQuestionsTopicLists(questionData: dict,
*,
questionTopicsDict: dict = retrieveQuestionDetails()) -> defaultdict :
completedTopicLists = defaultdict(set)
for question in questionData.keys() :
# Shouldn't occur but just in case
if question not in questionTopicsDict :
continue
for topic in questionTopicsDict[question].topics :
completedTopicLists[topic].add(question)
return completedTopicLists
# # Individual Markdown Generation
#
# In[ ]:
README_PATH = getenv('README_PATH')
QUESTIONS_FOLDER_FROM_README = getenv('QUESTIONS_PATH_FROM_README')
QUESTIONS_FOLDER = join(README_PATH, QUESTIONS_FOLDER_FROM_README)
MARKDOWN_PATH = getenv('QUESTION_MARKDOWNS_PATH_FROM_README')
MARKDOWN_TO_SUBMISSIONS = '../' * MARKDOWN_PATH.count('/') + QUESTIONS_FOLDER_FROM_README
QUESTION_DATA_FOLDER_PATH = getenv('QUESTION_DATA_PATH')
QUESTION_TOPICS_FILE = getenv('LEETCODE_QUESTION_TOPICS')
QUESTION_DETAILS_FILE = getenv('LEETCODE_QUESTION_DETAILS')