From 4ff87abda66ac8c6b988477c03fd7163f95e259f Mon Sep 17 00:00:00 2001 From: ionite34 Date: Tue, 16 Aug 2022 18:29:48 -0400 Subject: [PATCH 1/9] Added regex for leading https or www --- botcore/utils/regex.py | 1 + 1 file changed, 1 insertion(+) diff --git a/botcore/utils/regex.py b/botcore/utils/regex.py index 56c50dadd..e074a342e 100644 --- a/botcore/utils/regex.py +++ b/botcore/utils/regex.py @@ -3,6 +3,7 @@ import re DISCORD_INVITE = re.compile( + r"((https?://)?(www\.)?)?" r"(discord([.,]|dot)gg|" # Could be discord.gg/ r"discord([.,]|dot)com(/|slash)invite|" # or discord.com/invite/ r"discordapp([.,]|dot)com(/|slash)invite|" # or discordapp.com/invite/ From b4ea1d65118d111e58ca656925e80662971d7cc4 Mon Sep 17 00:00:00 2001 From: ionite34 Date: Tue, 16 Aug 2022 18:31:00 -0400 Subject: [PATCH 2/9] Expanded regex testing for discord invites - Added tests for both match and search, previously the fact that `re.match` did not work with http/https links was not made apparent by the tests as only `re.search` is tested. --- tests/botcore/utils/test_regex.py | 69 +++++++++++++++++++------------ 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/tests/botcore/utils/test_regex.py b/tests/botcore/utils/test_regex.py index 2ffd0e46c..4a7390a59 100644 --- a/tests/botcore/utils/test_regex.py +++ b/tests/botcore/utils/test_regex.py @@ -4,8 +4,18 @@ from botcore.utils.regex import DISCORD_INVITE -def use_regex(s: str) -> Optional[str]: - """Helper function to run the Regex on a string. +def match_regex(s: str) -> Optional[str]: + """Helper function to run re.search on a string. + + Return the invite capture group, if the string matches the pattern + else return None + """ + result = DISCORD_INVITE.match(s) + return result if result is None else result.group("invite") + + +def search_regex(s: str) -> Optional[str]: + """Helper function to run re.match on a string. Return the invite capture group, if the string matches the pattern else return None @@ -19,32 +29,37 @@ class UtilsRegexTests(unittest.TestCase): def test_discord_invite_positives(self): """Test the DISCORD_INVITE regex on a set of strings we would expect to capture.""" - self.assertEqual(use_regex("discord.gg/python"), "python") - self.assertEqual(use_regex("https://discord.gg/python"), "python") - self.assertEqual(use_regex("discord.com/invite/python"), "python") - self.assertEqual(use_regex("discordapp.com/invite/python"), "python") - self.assertEqual(use_regex("discord.me/python"), "python") - self.assertEqual(use_regex("discord.li/python"), "python") - self.assertEqual(use_regex("discord.io/python"), "python") - self.assertEqual(use_regex(".gg/python"), "python") - - self.assertEqual(use_regex("discord.gg/python/but/extra"), "python/but/extra") - self.assertEqual(use_regex("discord.me/this/isnt/python"), "this/isnt/python") - self.assertEqual(use_regex(".gg/a/a/a/a/a/a/a/a/a/a/a"), "a/a/a/a/a/a/a/a/a/a/a") - self.assertEqual(use_regex("discordapp.com/invite/python/snakescord"), "python/snakescord") - self.assertEqual(use_regex("http://discord.gg/python/%20/notpython"), "python/%20/notpython") - self.assertEqual(use_regex("discord.gg/python?=ts/notpython"), "python?=ts/notpython") - self.assertEqual(use_regex("https://discord.gg/python#fragment/notpython"), "python#fragment/notpython") - self.assertEqual(use_regex("https://discord.gg/python/~/notpython"), "python/~/notpython") - - self.assertEqual(use_regex("https://discord.gg/python with whitespace"), "python") - self.assertEqual(use_regex(" https://discord.gg/python "), "python") + self.assertEqual(match_regex("discord.gg/python"), "python") + self.assertEqual(match_regex("https://discord.gg/python"), "python") + self.assertEqual(match_regex("https://www.discord.gg/python"), "python") + self.assertEqual(match_regex("discord.com/invite/python"), "python") + self.assertEqual(match_regex("www.discord.com/invite/python"), "python") + self.assertEqual(match_regex("discordapp.com/invite/python"), "python") + self.assertEqual(match_regex("discord.me/python"), "python") + self.assertEqual(match_regex("discord.li/python"), "python") + self.assertEqual(match_regex("discord.io/python"), "python") + self.assertEqual(match_regex(".gg/python"), "python") + + self.assertEqual(match_regex("discord.gg/python/but/extra"), "python/but/extra") + self.assertEqual(match_regex("discord.me/this/isnt/python"), "this/isnt/python") + self.assertEqual(match_regex(".gg/a/a/a/a/a/a/a/a/a/a/a"), "a/a/a/a/a/a/a/a/a/a/a") + self.assertEqual(match_regex("discordapp.com/invite/python/snakescord"), "python/snakescord") + self.assertEqual(match_regex("http://discord.gg/python/%20/notpython"), "python/%20/notpython") + self.assertEqual(match_regex("discord.gg/python?=ts/notpython"), "python?=ts/notpython") + self.assertEqual(match_regex("https://discord.gg/python#fragment/notpython"), "python#fragment/notpython") + self.assertEqual(match_regex("https://discord.gg/python/~/notpython"), "python/~/notpython") + + self.assertEqual(search_regex("https://discord.gg/python with whitespace"), "python") + self.assertEqual(search_regex(" https://discord.gg/python "), "python") def test_discord_invite_negatives(self): """Test the DISCORD_INVITE regex on a set of strings we would expect to not capture.""" - self.assertEqual(use_regex("another string"), None) - self.assertEqual(use_regex("https://pythondiscord.com"), None) - self.assertEqual(use_regex("https://discord.com"), None) - self.assertEqual(use_regex("https://discord.gg"), None) - self.assertEqual(use_regex("https://discord.gg/ python"), None) + self.assertEqual(match_regex("another string"), None) + self.assertEqual(match_regex("https://pythondiscord.com"), None) + self.assertEqual(match_regex("https://discord.com"), None) + self.assertEqual(match_regex("https://discord.gg"), None) + self.assertEqual(match_regex("https://discord.gg/ python"), None) + + self.assertEqual(search_regex("https://discord.com with whitespace"), None) + self.assertEqual(search_regex(" https://discord.com "), None) From 360ef04dba45950c013e4aee9ab63d0dc5386000 Mon Sep 17 00:00:00 2001 From: ionite34 Date: Tue, 16 Aug 2022 18:31:38 -0400 Subject: [PATCH 3/9] Removed a redundant regex character class --- botcore/utils/regex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/botcore/utils/regex.py b/botcore/utils/regex.py index e074a342e..76311ca7e 100644 --- a/botcore/utils/regex.py +++ b/botcore/utils/regex.py @@ -11,7 +11,7 @@ r"discord([.,]|dot)li|" # or discord.li r"discord([.,]|dot)io|" # or discord.io. r"((?\S+)", # the invite code itself flags=re.IGNORECASE ) From 8b71cb416a6b27c4e7829a0b4e898c0441c5f84a Mon Sep 17 00:00:00 2001 From: ionite34 Date: Tue, 16 Aug 2022 18:40:56 -0400 Subject: [PATCH 4/9] Corrected docstrings --- tests/botcore/utils/test_regex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/botcore/utils/test_regex.py b/tests/botcore/utils/test_regex.py index 4a7390a59..491e22bd8 100644 --- a/tests/botcore/utils/test_regex.py +++ b/tests/botcore/utils/test_regex.py @@ -5,7 +5,7 @@ def match_regex(s: str) -> Optional[str]: - """Helper function to run re.search on a string. + """Helper function to run re.match on a string. Return the invite capture group, if the string matches the pattern else return None @@ -15,7 +15,7 @@ def match_regex(s: str) -> Optional[str]: def search_regex(s: str) -> Optional[str]: - """Helper function to run re.match on a string. + """Helper function to run re.search on a string. Return the invite capture group, if the string matches the pattern else return None From 11327c6d9e9e702afe5e3dfa158fa78251d87e9b Mon Sep 17 00:00:00 2001 From: ionite34 Date: Tue, 16 Aug 2022 18:41:49 -0400 Subject: [PATCH 5/9] Unified usage of flags kwarg --- botcore/utils/regex.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/botcore/utils/regex.py b/botcore/utils/regex.py index 76311ca7e..b13b04084 100644 --- a/botcore/utils/regex.py +++ b/botcore/utils/regex.py @@ -33,7 +33,7 @@ r"(?P.*?)" # extract all code inside the markup r"\s*" # any more whitespace before the end of the code markup r"(?P=delim)", # match the exact same delimiter from the start again - re.DOTALL | re.IGNORECASE # "." also matches newlines, case insensitive + flags=re.DOTALL | re.IGNORECASE # "." also matches newlines, case insensitive ) """ Regex for formatted code, using Discord's code blocks. @@ -45,7 +45,7 @@ r"^(?:[ \t]*\n)*" # any blank (empty or tabs/spaces only) lines before the code r"(?P.*?)" # extract all the rest as code r"\s*$", # any trailing whitespace until the end of the string - re.DOTALL # "." also matches newlines + flags=re.DOTALL # "." also matches newlines ) """ Regex for raw code, *not* using Discord's code blocks. From d8735bc057d2100ecabcd49827b94ae0ceaf224f Mon Sep 17 00:00:00 2001 From: ionite34 Date: Tue, 16 Aug 2022 18:44:39 -0400 Subject: [PATCH 6/9] Removed redundant capture group --- botcore/utils/regex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/botcore/utils/regex.py b/botcore/utils/regex.py index b13b04084..4f91d3e0e 100644 --- a/botcore/utils/regex.py +++ b/botcore/utils/regex.py @@ -3,7 +3,7 @@ import re DISCORD_INVITE = re.compile( - r"((https?://)?(www\.)?)?" + r"(https?://)?(www\.)?" r"(discord([.,]|dot)gg|" # Could be discord.gg/ r"discord([.,]|dot)com(/|slash)invite|" # or discord.com/invite/ r"discordapp([.,]|dot)com(/|slash)invite|" # or discordapp.com/invite/ From ef4564cf9d78709135e51cb102a027cdd784634a Mon Sep 17 00:00:00 2001 From: ionite34 Date: Tue, 16 Aug 2022 19:06:16 -0400 Subject: [PATCH 7/9] Version bump and changelog --- docs/changelog.rst | 3 +++ pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index f944c8d6c..2bb68dbba 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -3,6 +3,9 @@ Changelog ========= +- :release:`8.1.0 <16th August 2022>` +- :support:`124` Updated Discord Invite regex to match leading http, https, www + - :release:`8.0.0 <27th July 2022>` - :breaking:`110` Bump async-rediscache to v1.0.0-rc2 diff --git a/pyproject.toml b/pyproject.toml index 3448c67f3..2680efab5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "bot-core" -version = "8.0.0" +version = "8.1.0" description = "Bot-Core provides the core functionality and utilities for the bots of the Python Discord community." authors = ["Python Discord "] license = "MIT" From 8330044eae8c7d40151538d7403f97ca22759640 Mon Sep 17 00:00:00 2001 From: ionite34 Date: Tue, 16 Aug 2022 19:11:29 -0400 Subject: [PATCH 8/9] Added comments for regex addition --- botcore/utils/regex.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/botcore/utils/regex.py b/botcore/utils/regex.py index 4f91d3e0e..de82a1ed1 100644 --- a/botcore/utils/regex.py +++ b/botcore/utils/regex.py @@ -3,7 +3,7 @@ import re DISCORD_INVITE = re.compile( - r"(https?://)?(www\.)?" + r"(https?://)?(www\.)?" # Optional http(s) and www. r"(discord([.,]|dot)gg|" # Could be discord.gg/ r"discord([.,]|dot)com(/|slash)invite|" # or discord.com/invite/ r"discordapp([.,]|dot)com(/|slash)invite|" # or discordapp.com/invite/ From e96d063b0c4c3895b433fdc76d156b5d3caab985 Mon Sep 17 00:00:00 2001 From: Ionite Date: Tue, 16 Aug 2022 19:11:48 -0400 Subject: [PATCH 9/9] Update docs/changelog.rst Co-authored-by: ChrisJL --- docs/changelog.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 2bb68dbba..643a54fc7 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -4,7 +4,7 @@ Changelog ========= - :release:`8.1.0 <16th August 2022>` -- :support:`124` Updated Discord Invite regex to match leading http, https, www +- :support:`124` Updated :obj:`botcore.utils.regex.DISCORD_INVITE` regex to optionally match leading "http[s]" and "www". - :release:`8.0.0 <27th July 2022>`