Skip to content

Commit

Permalink
重构匹配逻辑 (#22)
Browse files Browse the repository at this point in the history
1. 添加事件预处理,用于提取小程序链接
2. 优化 链接/资源ID 提取逻辑
  • Loading branch information
fllesser authored Jan 1, 2025
1 parent 0d401d4 commit 95ab4f3
Show file tree
Hide file tree
Showing 18 changed files with 177 additions and 130 deletions.
2 changes: 1 addition & 1 deletion nonebot_plugin_resolver2/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from nonebot import get_driver, logger

from nonebot.plugin import PluginMetadata
from .matchers import resolvers, commands
from .matchers import resolvers
from .config import *
from .cookie import *

Expand Down
8 changes: 6 additions & 2 deletions nonebot_plugin_resolver2/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from nonebot import get_driver, require, get_plugin_config
from nonebot import (
get_driver,
require,
get_plugin_config
)
require("nonebot_plugin_localstore")
require("nonebot_plugin_apscheduler")
from nonebot_plugin_apscheduler import scheduler
Expand Down Expand Up @@ -30,6 +34,6 @@ class Config(BaseModel):
# 全局名称
NICKNAME: str = next(iter(get_driver().config.nickname), "")
# 根据是否为国外机器声明代理
PROXY: str = None if rconfig.r_is_oversea else rconfig.r_proxy
PROXY: str = "" if rconfig.r_is_oversea else rconfig.r_proxy
# 哔哩哔哩限制的最大视频时长(默认8分钟)单位:秒
DURATION_MAXIMUM: int = rconfig.r_video_duration_maximum
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions nonebot_plugin_resolver2/matchers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@

resolvers = {module: eval(module) for module in modules}

from .filter import *
commands = [enable_resolve, disable_resolve, check_resolve, enable_all_resolve, disable_all_resolve]
# from .filter import *
# commands = [enable_resolve, disable_resolve, check_resolve, enable_all_resolve, disable_all_resolve]
62 changes: 29 additions & 33 deletions nonebot_plugin_resolver2/matchers/bilibili.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from tqdm.asyncio import tqdm
from nonebot.log import logger
from nonebot.rule import Rule
from nonebot.typing import T_State
from nonebot.params import CommandArg
from nonebot.exception import ActionFailed
from nonebot.plugin.on import on_message, on_command
Expand All @@ -31,7 +31,12 @@
get_file_seg
)
from .filter import is_not_in_disable_group
from ..data_source.common import (
from .preprocess import (
r_keywords,
R_KEYWORD_KEY,
R_EXTRACT_KEY
)
from ..download.common import (
delete_boring_characters,
download_file_by_stream,
merge_av
Expand All @@ -54,52 +59,43 @@
'referer': 'https://www.bilibili.com'
}

def is_bilibili(event: MessageEvent) -> bool:
message = str(event.message).strip()
return any(key in message for key in {"bilibili.com", "b23.tv", "bili2233.cn", "BV"})

bilibili = on_message(
rule = Rule(
is_not_in_disable_group,
is_bilibili
)
rule = is_not_in_disable_group & r_keywords("bilibili", "b23", "bili2233", "BV")
)

bili_music = on_command(
cmd="bm",
block = True
)

@bilibili.handle()
async def _(bot: Bot, event: MessageEvent):
async def _(bot: Bot, state: T_State):
# 消息
message: str = str(event.message).strip()
text, keyword = state.get(R_EXTRACT_KEY), state.get(R_KEYWORD_KEY)
url, video_id = '', ''

url: str = ""
video_id: str = ""
# BV处理
if re.match(r'^BV[1-9a-zA-Z]{10}$', message):
# url = 'https://www.bilibili.com/video/' + message
video_id = message
# 处理短号、小程序问题
elif 'b23.tv' in message:
if keyword == 'BV':
if re.match(r'^BV[1-9a-zA-Z]{10}$', text):
video_id = text
elif keyword == 'b23':
# 处理短号、小程序
b_short_reg = r"(http:|https:)\/\/b23.tv\/[A-Za-z\d._?%&+\-=\/#]*"
if match := re.search(b_short_reg, message.replace("\\", "")):
if match := re.search(b_short_reg, text):
b_short_url = match.group(0)
async with httpx.AsyncClient() as client:
resp = await client.get(b_short_url, headers=BILIBILI_HEADERS, follow_redirects=True)
url = str(resp.url)
elif 'bili2233' in message:
elif keyword == 'bili2233':
# 处理新域名、小程序
b_new_reg = r"(http:|https:)\/\/bili2233.cn\/[A-Za-z\d._?%&+\-=\/#]*"
#await bilibili.send(message.replace("\\", ""))
if match := re.search(b_new_reg, message.replace("\\", "")):
if match := re.search(b_new_reg, text):
b_new_url = match.group(0)
async with httpx.AsyncClient() as client:
resp = await client.get(b_new_url, headers=BILIBILI_HEADERS, follow_redirects=True)
url = str(resp.url)

else:
url_reg = r"(http:|https:)\/\/(space|www|live).bilibili.com\/[A-Za-z\d._?%&+\-=\/#]*"
if match := re.search(url_reg, message):
url_reg = r"(http:|https:)\/\/(space|www|live|m)?.?bilibili.com\/[A-Za-z\d._?%&+\-=\/#]*"
if match := re.search(url_reg, text):
url = match.group(0)
if url:
# ===============发现解析的是动态,转移一下===============
Expand Down Expand Up @@ -174,8 +170,8 @@ async def _(bot: Bot, event: MessageEvent):

if video_id:
v = video.Video(bvid = video_id, credential=credential)
elif match := re.search(r"video\/[^\?\/ ]+", url):
video_id = match.group(0).split('/')[1]
elif match := re.search(r"(av\d+|BV[A-Za-z0-9]{10})", url):
video_id = match.group(1)
if "av" in video_id:
v = video.Video(aid=int(video_id.split("av")[1]), credential=credential)
else:
Expand All @@ -186,11 +182,11 @@ async def _(bot: Bot, event: MessageEvent):
segs: list[MessageSegment | str] = []
try:
video_info = await v.get_info()
if video_info is None:
await bilibili.finish(Message(f"{NICKNAME}解析 | 哔哩哔哩 - 出错,无法获取数据!"))
await bilibili.send(f'{NICKNAME}解析 | 哔哩哔哩 - 视频')
if not video_info:
raise Exception("video_info is None")
except Exception as e:
await bilibili.finish(Message(f"{NICKNAME}解析 | 哔哩哔哩 - 出错\n{e}"))
await bilibili.finish(f"{NICKNAME}解析 | 哔哩哔哩 - 出错 {e}")
await bilibili.send(f'{NICKNAME}解析 | 哔哩哔哩 - 视频')
video_title, video_cover, video_desc, video_duration = video_info['title'], video_info['pic'], video_info['desc'], video_info['duration']
# 校准 分 p 的情况
page_num = 0
Expand Down
2 changes: 1 addition & 1 deletion nonebot_plugin_resolver2/matchers/douyin.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from .utils import get_video_seg, construct_nodes
from .filter import is_not_in_disable_group

from ..data_source.common import download_img
from ..download.common import download_img
from ..parsers.base import VideoInfo
from ..parsers.douyin import DouYin
from ..config import NICKNAME
Expand Down
56 changes: 22 additions & 34 deletions nonebot_plugin_resolver2/matchers/kugou.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
import httpx
import asyncio

from nonebot import on_keyword
from nonebot.rule import Rule
from nonebot.log import logger
from nonebot.typing import T_State
from nonebot.plugin import on_message
from nonebot.adapters.onebot.v11 import (
Message,
MessageEvent,
Expand All @@ -14,52 +15,41 @@
)
from .utils import get_file_seg
from .filter import is_not_in_disable_group

from ..data_source.common import download_audio
from .preprocess import (
r_keywords,
R_KEYWORD_KEY,
R_EXTRACT_KEY
)
from ..download.common import download_audio
from ..constant import COMMON_HEADER
from ..config import NICKNAME

# KG临时接口
KUGOU_TEMP_API = "https://www.hhlqilongzhu.cn/api/dg_kugouSQ.php?msg={}&n=1&type=json"

kugou = on_keyword(
keywords = {"kugou.com"},
rule = Rule(is_not_in_disable_group)
kugou = on_message(
rule = is_not_in_disable_group & r_keywords("kugou.com")
)

@kugou.handle()
async def _(bot: Bot, event: MessageEvent):
message = event.message.extract_plain_text().strip()
# logger.info(message)
reg1 = r"https?://.*?kugou\.com.*?(?=\s|$|\n)"
reg2 = r'jumpUrl":\s*"(https?:\\/\\/[^"]+)"'
reg3 = r'jumpUrl":\s*"(https?://[^"]+)"'
async def _(bot: Bot, state: T_State):
text = state.get(R_EXTRACT_KEY)
pattern = r"https?://.*?kugou\.com.*?(?=\s|$|\n)"
# 处理卡片问题
if 'com.tencent.structmsg' in message:
if match := re.search(reg2, message):
get_url = match.group(1)
else:
if match := re.search(reg3, message):
get_url = match.group(1)
else:
await kugou.send(Message(f"{NICKNAME}解析 | 酷狗音乐 - 获取链接失败"))
get_url = None
return
if get_url:
url = json.loads('"' + get_url + '"')
if match := re.search(pattern, text):
url = match.group(0)
else:
match = re.search(reg1, message)
url = match.group()

# 使用 httpx 获取 URL 的标题
logger.info(f'无效链接,忽略 - {text}')
return
# 使用 httpx 获取 URL 的标题
async with httpx.AsyncClient() as client:
response = await client.get(url, follow_redirects=True)
if response.status_code != 200:
await kugou.finish(f"{NICKNAME}解析 | 酷狗音乐 - 获取链接失败")
title = response.text
get_name = r"<title>(.*?)_高音质在线试听"
if name := re.search(get_name, title):
kugou_title = name.group(1) # 只输出歌曲名和歌手名的部分
pattern = r"<title>(.*?)_高音质在线试听"
if match := re.search(pattern, title):
kugou_title = match.group(1) # 只输出歌曲名和歌手名的部分
async with httpx.AsyncClient() as client:
resp = await client.get(f"{KUGOU_TEMP_API.replace('{}', kugou_title)}", headers=COMMON_HEADER)
kugou_vip_data = resp.json()
Expand All @@ -80,5 +70,3 @@ async def _(bot: Bot, event: MessageEvent):
await kugou.finish(get_file_seg(audio_path, f'{kugou_name}-{kugou_singer}.{audio_path.name.split(".")[-1]}'))
else:
await kugou.send(f"{NICKNAME}解析 | 酷狗音乐 - 不支持当前外链,请重新分享再试")


38 changes: 19 additions & 19 deletions nonebot_plugin_resolver2/matchers/ncm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,23 @@
import httpx
import asyncio

from nonebot import on_message
from nonebot.rule import Rule
from nonebot.plugin import on_message
from nonebot.typing import T_State
from nonebot.adapters.onebot.v11 import (
Message,
MessageEvent,
Bot,
MessageSegment
)

from .filter import is_not_in_disable_group
from .utils import get_file_seg
from .preprocess import (
r_keywords,
R_KEYWORD_KEY,
R_EXTRACT_KEY
)
from ..constant import COMMON_HEADER
from ..data_source.common import download_audio
from ..download.common import download_audio
from ..config import *

# NCM获取歌曲信息链接
Expand All @@ -23,27 +27,23 @@
# NCM临时接口
NETEASE_TEMP_API = "https://www.hhlqilongzhu.cn/api/dg_wyymusic.php?id={}&br=7&type=json"

def is_ncm(event: MessageEvent) -> bool:
message = str(event.message).strip()
return any(key in message for key in {"music.163.com", "163cn.tv"})

ncm = on_message(
rule = Rule(is_ncm, is_not_in_disable_group)
rule = is_not_in_disable_group & r_keywords("music.163.com", "163cn.tv")
)

@ncm.handle()
async def ncm_handler(bot: Bot, event: MessageEvent):
message = str(event.message).strip()
async def _(bot: Bot, state: T_State):
text, keyword = state.get(R_EXTRACT_KEY), state.get(R_KEYWORD_KEY)
# 解析短链接
if "163cn.tv" in message:
if match := re.search(r"(http:|https:)\/\/163cn\.tv\/([a-zA-Z0-9]+)", message):
message = match.group(0)
# message = str(httpx.head(message, follow_redirects=True).url)
if keyword == "163cn.tv":
if match := re.search(r"(http:|https:)\/\/163cn\.tv\/([a-zA-Z0-9]+)", text):
url = match.group(0)
async with httpx.AsyncClient() as client:
resp = await client.head(message, follow_redirects=True)
message = str(resp.url)

if match := re.search(r"id=(\d+)", message):
resp = await client.head(url, follow_redirects=True)
url = str(resp.url)
else:
url = text
if match := re.search(r"id=(\d+)", url):
ncm_id = match.group(1)
else:
await ncm.finish(f"{NICKNAME}解析 | 网易云 - 获取链接失败")
Expand Down
63 changes: 63 additions & 0 deletions nonebot_plugin_resolver2/matchers/preprocess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import json

from typing import Literal
from nonebot.log import logger
from nonebot.message import event_preprocessor
from nonebot.typing import T_State
from nonebot.rule import Rule
from nonebot.adapters.onebot.v11 import MessageEvent


R_KEYWORD_KEY: Literal["_r_keyword"] = "_r_keyword"
R_EXTRACT_KEY: Literal["_r_extract"] = "_r_extract"

@event_preprocessor
def _(event: MessageEvent, state: T_State):
message = event.get_message()
text = message.extract_plain_text().strip()
if json_seg := [seg for seg in message if seg.type == 'json']:
try:
data_str = json_seg[0].data.get('data')
data_str = data_str.replace('&#44;', ',')
data = json.loads(data_str)
meta = data.get('meta')
if detail := meta.get('detail_1'):
text = detail.get('qqdocurl')
elif news := meta.get('news'):
text = news.get('jumpUrl')
text = text.replace('\\', '').replace("&amp;", "&")
except Exception:
pass
state[R_EXTRACT_KEY] = text

class RKeywordsRule:
"""检查消息是否含有关键词 增强版"""

__slots__ = ("keywords",)

def __init__(self, *keywords: str):
self.keywords = keywords

def __repr__(self) -> str:
return f"RKeywords(keywords={self.keywords})"

def __eq__(self, other: object) -> bool:
return isinstance(other, RKeywordsRule) and frozenset(
self.keywords
) == frozenset(other.keywords)

def __hash__(self) -> int:
return hash(frozenset(self.keywords))

async def __call__(self, state: T_State) -> bool:
text = state.get(R_EXTRACT_KEY)
if not text:
return False
if key := next((k for k in self.keywords if k in text), None):
state[R_KEYWORD_KEY] = key
return True
return False


def r_keywords(*keywords: str) -> Rule:
return Rule(RKeywordsRule(*keywords))
2 changes: 1 addition & 1 deletion nonebot_plugin_resolver2/matchers/tiktok.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from .filter import is_not_in_disable_group
from .utils import get_video_seg
from ..data_source.ytdlp import get_video_info, ytdlp_download_video
from ..download.ytdlp import get_video_info, ytdlp_download_video
from ..config import *


Expand Down
Loading

0 comments on commit 95ab4f3

Please sign in to comment.