Skip to content

Commit

Permalink
Merge pull request #124 from python-discord/invite-regex
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrisLovering authored Aug 16, 2022
2 parents db0ff48 + e96d063 commit 009a139
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 31 deletions.
7 changes: 4 additions & 3 deletions botcore/utils/regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
import re

DISCORD_INVITE = re.compile(
r"(https?://)?(www\.)?" # Optional http(s) and www.
r"(discord([.,]|dot)gg|" # Could be discord.gg/
r"discord([.,]|dot)com(/|slash)invite|" # or discord.com/invite/
r"discordapp([.,]|dot)com(/|slash)invite|" # or discordapp.com/invite/
r"discord([.,]|dot)me|" # or discord.me
r"discord([.,]|dot)li|" # or discord.li
r"discord([.,]|dot)io|" # or discord.io.
r"((?<!\w)([.,]|dot))gg" # or .gg/
r")([/]|slash)" # / or 'slash'
r")(/|slash)" # / or 'slash'
r"(?P<invite>\S+)", # the invite code itself
flags=re.IGNORECASE
)
Expand All @@ -32,7 +33,7 @@
r"(?P<code>.*?)" # extract all code inside the markup
r"\s*" # any more whitespace before the end of the code markup
r"(?P=delim)", # match the exact same delimiter from the start again
re.DOTALL | re.IGNORECASE # "." also matches newlines, case insensitive
flags=re.DOTALL | re.IGNORECASE # "." also matches newlines, case insensitive
)
"""
Regex for formatted code, using Discord's code blocks.
Expand All @@ -44,7 +45,7 @@
r"^(?:[ \t]*\n)*" # any blank (empty or tabs/spaces only) lines before the code
r"(?P<code>.*?)" # extract all the rest as code
r"\s*$", # any trailing whitespace until the end of the string
re.DOTALL # "." also matches newlines
flags=re.DOTALL # "." also matches newlines
)
"""
Regex for raw code, *not* using Discord's code blocks.
Expand Down
3 changes: 3 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
Changelog
=========
- :release:`8.1.0 <16th August 2022>`
- :support:`124` Updated :obj:`botcore.utils.regex.DISCORD_INVITE` regex to optionally match leading "http[s]" and "www".


- :release:`8.0.0 <27th July 2022>`
- :breaking:`110` Bump async-rediscache to v1.0.0-rc2
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "bot-core"
version = "8.0.0"
version = "8.1.0"
description = "Bot-Core provides the core functionality and utilities for the bots of the Python Discord community."
authors = ["Python Discord <[email protected]>"]
license = "MIT"
Expand Down
69 changes: 42 additions & 27 deletions tests/botcore/utils/test_regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,18 @@
from botcore.utils.regex import DISCORD_INVITE


def use_regex(s: str) -> Optional[str]:
"""Helper function to run the Regex on a string.
def match_regex(s: str) -> Optional[str]:
"""Helper function to run re.match on a string.
Return the invite capture group, if the string matches the pattern
else return None
"""
result = DISCORD_INVITE.match(s)
return result if result is None else result.group("invite")


def search_regex(s: str) -> Optional[str]:
"""Helper function to run re.search on a string.
Return the invite capture group, if the string matches the pattern
else return None
Expand All @@ -19,32 +29,37 @@ class UtilsRegexTests(unittest.TestCase):
def test_discord_invite_positives(self):
"""Test the DISCORD_INVITE regex on a set of strings we would expect to capture."""

self.assertEqual(use_regex("discord.gg/python"), "python")
self.assertEqual(use_regex("https://discord.gg/python"), "python")
self.assertEqual(use_regex("discord.com/invite/python"), "python")
self.assertEqual(use_regex("discordapp.com/invite/python"), "python")
self.assertEqual(use_regex("discord.me/python"), "python")
self.assertEqual(use_regex("discord.li/python"), "python")
self.assertEqual(use_regex("discord.io/python"), "python")
self.assertEqual(use_regex(".gg/python"), "python")

self.assertEqual(use_regex("discord.gg/python/but/extra"), "python/but/extra")
self.assertEqual(use_regex("discord.me/this/isnt/python"), "this/isnt/python")
self.assertEqual(use_regex(".gg/a/a/a/a/a/a/a/a/a/a/a"), "a/a/a/a/a/a/a/a/a/a/a")
self.assertEqual(use_regex("discordapp.com/invite/python/snakescord"), "python/snakescord")
self.assertEqual(use_regex("http://discord.gg/python/%20/notpython"), "python/%20/notpython")
self.assertEqual(use_regex("discord.gg/python?=ts/notpython"), "python?=ts/notpython")
self.assertEqual(use_regex("https://discord.gg/python#fragment/notpython"), "python#fragment/notpython")
self.assertEqual(use_regex("https://discord.gg/python/~/notpython"), "python/~/notpython")

self.assertEqual(use_regex("https://discord.gg/python with whitespace"), "python")
self.assertEqual(use_regex(" https://discord.gg/python "), "python")
self.assertEqual(match_regex("discord.gg/python"), "python")
self.assertEqual(match_regex("https://discord.gg/python"), "python")
self.assertEqual(match_regex("https://www.discord.gg/python"), "python")
self.assertEqual(match_regex("discord.com/invite/python"), "python")
self.assertEqual(match_regex("www.discord.com/invite/python"), "python")
self.assertEqual(match_regex("discordapp.com/invite/python"), "python")
self.assertEqual(match_regex("discord.me/python"), "python")
self.assertEqual(match_regex("discord.li/python"), "python")
self.assertEqual(match_regex("discord.io/python"), "python")
self.assertEqual(match_regex(".gg/python"), "python")

self.assertEqual(match_regex("discord.gg/python/but/extra"), "python/but/extra")
self.assertEqual(match_regex("discord.me/this/isnt/python"), "this/isnt/python")
self.assertEqual(match_regex(".gg/a/a/a/a/a/a/a/a/a/a/a"), "a/a/a/a/a/a/a/a/a/a/a")
self.assertEqual(match_regex("discordapp.com/invite/python/snakescord"), "python/snakescord")
self.assertEqual(match_regex("http://discord.gg/python/%20/notpython"), "python/%20/notpython")
self.assertEqual(match_regex("discord.gg/python?=ts/notpython"), "python?=ts/notpython")
self.assertEqual(match_regex("https://discord.gg/python#fragment/notpython"), "python#fragment/notpython")
self.assertEqual(match_regex("https://discord.gg/python/~/notpython"), "python/~/notpython")

self.assertEqual(search_regex("https://discord.gg/python with whitespace"), "python")
self.assertEqual(search_regex(" https://discord.gg/python "), "python")

def test_discord_invite_negatives(self):
"""Test the DISCORD_INVITE regex on a set of strings we would expect to not capture."""

self.assertEqual(use_regex("another string"), None)
self.assertEqual(use_regex("https://pythondiscord.com"), None)
self.assertEqual(use_regex("https://discord.com"), None)
self.assertEqual(use_regex("https://discord.gg"), None)
self.assertEqual(use_regex("https://discord.gg/ python"), None)
self.assertEqual(match_regex("another string"), None)
self.assertEqual(match_regex("https://pythondiscord.com"), None)
self.assertEqual(match_regex("https://discord.com"), None)
self.assertEqual(match_regex("https://discord.gg"), None)
self.assertEqual(match_regex("https://discord.gg/ python"), None)

self.assertEqual(search_regex("https://discord.com with whitespace"), None)
self.assertEqual(search_regex(" https://discord.com "), None)

0 comments on commit 009a139

Please sign in to comment.