630 lines
21 KiB
Python
630 lines
21 KiB
Python
"""
|
||
BBCode处理服务
|
||
基于 osu-web 官方实现的 BBCode 解析器
|
||
支持所有 osu! 官方 BBCode 标签
|
||
"""
|
||
|
||
import html
|
||
import re
|
||
from typing import ClassVar
|
||
|
||
from app.exceptions.userpage import (
|
||
ContentEmptyError,
|
||
ContentTooLongError,
|
||
ForbiddenTagError,
|
||
)
|
||
|
||
import bleach
|
||
from bleach.css_sanitizer import CSSSanitizer
|
||
|
||
|
||
class BBCodeService:
|
||
"""BBCode处理服务类 - 基于 osu-web 官方实现"""
|
||
|
||
# 允许的HTML标签和属性 - 基于官方实现
|
||
ALLOWED_TAGS: ClassVar[list[str]] = [
|
||
"a",
|
||
"audio",
|
||
"blockquote",
|
||
"br",
|
||
"button",
|
||
"center",
|
||
"code",
|
||
"del",
|
||
"div",
|
||
"em",
|
||
"h2",
|
||
"h4",
|
||
"iframe",
|
||
"img",
|
||
"li",
|
||
"ol",
|
||
"p",
|
||
"pre",
|
||
"span",
|
||
"strong",
|
||
"u",
|
||
"ul",
|
||
# imagemap 相关
|
||
"map",
|
||
"area",
|
||
# 自定义容器
|
||
"details",
|
||
"summary",
|
||
]
|
||
|
||
ALLOWED_ATTRIBUTES: ClassVar[dict[str, list[str]]] = {
|
||
"a": ["href", "rel", "class", "data-user-id", "target", "style", "title"],
|
||
"audio": ["controls", "preload", "src"],
|
||
"blockquote": [],
|
||
"button": ["type", "class", "style"],
|
||
"center": [],
|
||
"code": [],
|
||
"div": ["class", "style"],
|
||
"details": ["class"],
|
||
"h2": [],
|
||
"h4": [],
|
||
"iframe": ["class", "src", "allowfullscreen", "width", "height", "frameborder"],
|
||
"img": ["class", "loading", "src", "width", "height", "usemap", "alt", "style"],
|
||
"map": ["name"],
|
||
"area": ["href", "style", "title", "class"],
|
||
"ol": ["class"],
|
||
"span": ["class", "style", "title"],
|
||
"summary": [],
|
||
"ul": ["class"],
|
||
"*": ["class"],
|
||
}
|
||
|
||
# 危险的BBCode标签(不允许)
|
||
FORBIDDEN_TAGS: ClassVar[list[str]] = [
|
||
"script",
|
||
"iframe",
|
||
"object",
|
||
"embed",
|
||
"form",
|
||
"input",
|
||
"textarea",
|
||
"select",
|
||
"option",
|
||
"meta",
|
||
"link",
|
||
"style",
|
||
"title",
|
||
"head",
|
||
"html",
|
||
"body",
|
||
]
|
||
|
||
@classmethod
|
||
def parse_bbcode(cls, text: str) -> str:
|
||
"""
|
||
解析BBCode文本并转换为HTML
|
||
基于 osu-web BBCodeFromDB.php 的实现
|
||
|
||
Args:
|
||
text: 包含BBCode的原始文本
|
||
|
||
Returns:
|
||
转换后的HTML字符串
|
||
"""
|
||
if not text:
|
||
return ""
|
||
|
||
# 预处理:转义HTML实体
|
||
text = html.escape(text)
|
||
|
||
# 按照 osu-web 的解析顺序进行处理
|
||
# 块级标签处理
|
||
text = cls._parse_imagemap(text)
|
||
text = cls._parse_box(text)
|
||
text = cls._parse_code(text)
|
||
text = cls._parse_list(text)
|
||
text = cls._parse_notice(text)
|
||
text = cls._parse_quote(text)
|
||
text = cls._parse_heading(text)
|
||
|
||
# 行内标签处理
|
||
text = cls._parse_audio(text)
|
||
text = cls._parse_bold(text)
|
||
text = cls._parse_centre(text)
|
||
text = cls._parse_inline_code(text)
|
||
text = cls._parse_colour(text)
|
||
text = cls._parse_email(text)
|
||
text = cls._parse_image(text)
|
||
text = cls._parse_italic(text)
|
||
text = cls._parse_size(text)
|
||
text = cls._parse_smilies(text)
|
||
text = cls._parse_spoiler(text)
|
||
text = cls._parse_strike(text)
|
||
text = cls._parse_underline(text)
|
||
text = cls._parse_url(text)
|
||
text = cls._parse_youtube(text)
|
||
text = cls._parse_profile(text)
|
||
|
||
# 换行处理
|
||
text = text.replace("\n", "<br />")
|
||
|
||
return text
|
||
|
||
@classmethod
|
||
def _parse_audio(cls, text: str) -> str:
|
||
"""解析 [audio] 标签"""
|
||
pattern = r"\[audio\]([^\[]+)\[/audio\]"
|
||
|
||
def replace_audio(match):
|
||
url = match.group(1).strip()
|
||
return f'<audio controls preload="none" src="{url}"></audio>'
|
||
|
||
return re.sub(pattern, replace_audio, text, flags=re.IGNORECASE)
|
||
|
||
@classmethod
|
||
def _parse_bold(cls, text: str) -> str:
|
||
"""解析 [b] 标签"""
|
||
text = re.sub(r"\[b\]", "<strong>", text, flags=re.IGNORECASE)
|
||
text = re.sub(r"\[/b\]", "</strong>", text, flags=re.IGNORECASE)
|
||
return text
|
||
|
||
@classmethod
|
||
def _parse_box(cls, text: str) -> str:
|
||
"""解析 [box] 和 [spoilerbox] 标签"""
|
||
# [box=title] 格式
|
||
pattern = r"\[box=([^\]]+)\](.*?)\[/box\]"
|
||
|
||
def replace_box_with_title(match):
|
||
title = match.group(1)
|
||
content = match.group(2)
|
||
return (
|
||
f"<div class='js-spoilerbox bbcode-spoilerbox'>"
|
||
f"<button type='button' class='js-spoilerbox__link bbcode-spoilerbox__link' "
|
||
f"style='background: none; border: none; cursor: pointer; padding: 0; text-align: left; width: 100%;'>"
|
||
f"<span class='bbcode-spoilerbox__link-icon'></span>{title}</button>"
|
||
f"<div class='js-spoilerbox__body bbcode-spoilerbox__body'>{content}</div></div>"
|
||
)
|
||
|
||
text = re.sub(pattern, replace_box_with_title, text, flags=re.DOTALL | re.IGNORECASE)
|
||
|
||
# [spoilerbox] 格式
|
||
pattern = r"\[spoilerbox\](.*?)\[/spoilerbox\]"
|
||
|
||
def replace_spoilerbox(match):
|
||
content = match.group(1)
|
||
return (
|
||
f"<div class='js-spoilerbox bbcode-spoilerbox'>"
|
||
f"<button type='button' class='js-spoilerbox__link bbcode-spoilerbox__link' "
|
||
f"style='background: none; border: none; cursor: pointer; padding: 0; text-align: left; width: 100%;'>"
|
||
f"<span class='bbcode-spoilerbox__link-icon'></span>SPOILER</button>"
|
||
f"<div class='js-spoilerbox__body bbcode-spoilerbox__body'>{content}</div></div>"
|
||
)
|
||
|
||
return re.sub(pattern, replace_spoilerbox, text, flags=re.DOTALL | re.IGNORECASE)
|
||
|
||
@classmethod
|
||
def _parse_centre(cls, text: str) -> str:
|
||
"""解析 [centre] 标签"""
|
||
text = re.sub(r"\[centre\]", "<center>", text, flags=re.IGNORECASE)
|
||
text = re.sub(r"\[/centre\]", "</center>", text, flags=re.IGNORECASE)
|
||
text = re.sub(r"\[center\]", "<center>", text, flags=re.IGNORECASE)
|
||
text = re.sub(r"\[/center\]", "</center>", text, flags=re.IGNORECASE)
|
||
return text
|
||
|
||
@classmethod
|
||
def _parse_code(cls, text: str) -> str:
|
||
"""解析 [code] 标签"""
|
||
pattern = r"\[code\]\n*(.*?)\n*\[/code\]"
|
||
return re.sub(pattern, r"<pre>\1</pre>", text, flags=re.DOTALL | re.IGNORECASE)
|
||
|
||
@classmethod
|
||
def _parse_colour(cls, text: str) -> str:
|
||
"""解析 [color] 标签"""
|
||
pattern = r"\[color=([^\]]+)\](.*?)\[/color\]"
|
||
return re.sub(pattern, r'<span style="color:\1">\2</span>', text, flags=re.IGNORECASE)
|
||
|
||
@classmethod
|
||
def _parse_email(cls, text: str) -> str:
|
||
"""解析 [email] 标签"""
|
||
# [email]email@example.com[/email]
|
||
pattern1 = r"\[email\]([^\[]+)\[/email\]"
|
||
text = re.sub(pattern1, r'<a rel="nofollow" href="mailto:\1">\1</a>', text, flags=re.IGNORECASE)
|
||
|
||
# [email=email@example.com]text[/email]
|
||
pattern2 = r"\[email=([^\]]+)\](.*?)\[/email\]"
|
||
text = re.sub(pattern2, r'<a rel="nofollow" href="mailto:\1">\2</a>', text, flags=re.IGNORECASE)
|
||
|
||
return text
|
||
|
||
@classmethod
|
||
def _parse_heading(cls, text: str) -> str:
|
||
"""解析 [heading] 标签"""
|
||
pattern = r"\[heading\](.*?)\[/heading\]"
|
||
return re.sub(pattern, r"<h2>\1</h2>", text, flags=re.IGNORECASE)
|
||
|
||
@classmethod
|
||
def _parse_image(cls, text: str) -> str:
|
||
"""解析 [img] 标签"""
|
||
pattern = r"\[img\]([^\[]+)\[/img\]"
|
||
|
||
def replace_image(match):
|
||
url = match.group(1).strip()
|
||
# TODO: 可以在这里添加图片代理支持
|
||
# 生成带有懒加载的图片标签
|
||
return f'<img loading="lazy" src="{url}" alt="" style="max-width: 100%; height: auto;" />'
|
||
|
||
return re.sub(pattern, replace_image, text, flags=re.IGNORECASE)
|
||
|
||
@classmethod
|
||
def _parse_imagemap(cls, text: str) -> str:
|
||
"""
|
||
解析 [imagemap] 标签
|
||
基于 osu-web BBCodeFromDB.php 的实现
|
||
"""
|
||
pattern = r"\[imagemap\]\s*\n([^\s\n]+)\s*\n((?:[0-9.]+ [0-9.]+ [0-9.]+ [0-9.]+ (?:#|https?://[^\s]+|mailto:[^\s]+)[^\n]*\n?)+)\[/imagemap\]"
|
||
|
||
def replace_imagemap(match):
|
||
image_url = match.group(1).strip()
|
||
links_data = match.group(2).strip()
|
||
|
||
if not links_data:
|
||
return f'<img loading="lazy" src="{image_url}" alt="" style="max-width: 100%; height: auto;" />'
|
||
|
||
# 解析链接数据
|
||
links = []
|
||
for line in links_data.split("\n"):
|
||
line = line.strip()
|
||
if not line:
|
||
continue
|
||
|
||
# 按空格分割,最多分成6部分(前5个是数字和URL,第6个是标题)
|
||
parts = line.split(" ", 5)
|
||
if len(parts) >= 5:
|
||
try:
|
||
left = float(parts[0])
|
||
top = float(parts[1])
|
||
width = float(parts[2])
|
||
height = float(parts[3])
|
||
href = parts[4]
|
||
# 标题可能包含空格,所以重新组合
|
||
title = parts[5] if len(parts) > 5 else ""
|
||
|
||
# 构建样式
|
||
style = f"left: {left}%; top: {top}%; width: {width}%; height: {height}%;"
|
||
|
||
if href == "#":
|
||
# 无链接区域
|
||
links.append(f'<span class="imagemap__link" style="{style}" title="{title}"></span>')
|
||
else:
|
||
# 有链接区域
|
||
links.append(
|
||
f'<a class="imagemap__link" href="{href}" style="{style}" title="{title}"></a>'
|
||
)
|
||
except (ValueError, IndexError):
|
||
continue
|
||
|
||
if links:
|
||
links_html = "".join(links)
|
||
# 基于官方实现的图片标签
|
||
image_html = (
|
||
f'<img class="imagemap__image" loading="lazy" src="{image_url}" alt="" '
|
||
f'style="max-width: 100%; height: auto;" />'
|
||
)
|
||
# 使用imagemap容器
|
||
return f'<div class="imagemap">{image_html}{links_html}</div>'
|
||
else:
|
||
return f'<img loading="lazy" src="{image_url}" alt="" style="max-width: 100%; height: auto;" />'
|
||
|
||
return re.sub(pattern, replace_imagemap, text, flags=re.DOTALL | re.IGNORECASE)
|
||
|
||
@classmethod
|
||
def _parse_italic(cls, text: str) -> str:
|
||
"""解析 [i] 标签"""
|
||
text = re.sub(r"\[i\]", "<em>", text, flags=re.IGNORECASE)
|
||
text = re.sub(r"\[/i\]", "</em>", text, flags=re.IGNORECASE)
|
||
return text
|
||
|
||
@classmethod
|
||
def _parse_inline_code(cls, text: str) -> str:
|
||
"""解析 [c] 内联代码标签"""
|
||
text = re.sub(r"\[c\]", "<code>", text, flags=re.IGNORECASE)
|
||
text = re.sub(r"\[/c\]", "</code>", text, flags=re.IGNORECASE)
|
||
return text
|
||
|
||
@classmethod
|
||
def _parse_list(cls, text: str) -> str:
|
||
"""解析 [list] 标签"""
|
||
# 有序列表
|
||
pattern = r"\[list=1\](.*?)\[/list\]"
|
||
text = re.sub(pattern, r"<ol>\1</ol>", text, flags=re.DOTALL | re.IGNORECASE)
|
||
|
||
# 无序列表
|
||
pattern = r"\[list\](.*?)\[/list\]"
|
||
text = re.sub(pattern, r"<ol class='unordered'>\1</ol>", text, flags=re.DOTALL | re.IGNORECASE)
|
||
|
||
# 列表项
|
||
pattern = r"\[\*\]\s*(.*?)(?=\[\*\]|\[/list\]|$)"
|
||
text = re.sub(pattern, r"<li>\1</li>", text, flags=re.DOTALL | re.IGNORECASE)
|
||
|
||
return text
|
||
|
||
@classmethod
|
||
def _parse_notice(cls, text: str) -> str:
|
||
"""解析 [notice] 标签"""
|
||
pattern = r"\[notice\]\n*(.*?)\n*\[/notice\]"
|
||
return re.sub(pattern, r'<div class="well">\1</div>', text, flags=re.DOTALL | re.IGNORECASE)
|
||
|
||
@classmethod
|
||
def _parse_profile(cls, text: str) -> str:
|
||
"""解析 [profile] 标签"""
|
||
pattern = r"\[profile(?:=(\d+))?\](.*?)\[/profile\]"
|
||
|
||
def replace_profile(match):
|
||
user_id = match.group(1)
|
||
username = match.group(2)
|
||
|
||
if user_id:
|
||
return f'<a href="/users/{user_id}" class="user-profile-link" data-user-id="{user_id}">{username}</a>'
|
||
else:
|
||
return f'<a href="/users/@{username}" class="user-profile-link">@{username}</a>'
|
||
|
||
return re.sub(pattern, replace_profile, text, flags=re.IGNORECASE)
|
||
|
||
@classmethod
|
||
def _parse_quote(cls, text: str) -> str:
|
||
"""解析 [quote] 标签"""
|
||
# [quote="author"]content[/quote]
|
||
pattern1 = r'\[quote="([^"]+)"\]\s*(.*?)\s*\[/quote\]'
|
||
text = re.sub(pattern1, r"<blockquote><h4>\1 wrote:</h4>\2</blockquote>", text, flags=re.DOTALL | re.IGNORECASE)
|
||
|
||
# [quote]content[/quote]
|
||
pattern2 = r"\[quote\]\s*(.*?)\s*\[/quote\]"
|
||
text = re.sub(pattern2, r"<blockquote>\1</blockquote>", text, flags=re.DOTALL | re.IGNORECASE)
|
||
|
||
return text
|
||
|
||
@classmethod
|
||
def _parse_size(cls, text: str) -> str:
|
||
"""解析 [size] 标签"""
|
||
|
||
def replace_size(match):
|
||
size = int(match.group(1))
|
||
# 限制字体大小范围 (30-200%)
|
||
size = max(30, min(200, size))
|
||
return f'<span style="font-size:{size}%;">'
|
||
|
||
pattern = r"\[size=(\d+)\]"
|
||
text = re.sub(pattern, replace_size, text, flags=re.IGNORECASE)
|
||
text = re.sub(r"\[/size\]", "</span>", text, flags=re.IGNORECASE)
|
||
|
||
return text
|
||
|
||
@classmethod
|
||
def _parse_smilies(cls, text: str) -> str:
|
||
"""解析表情符号标签"""
|
||
# 处理 phpBB 风格的表情符号标记
|
||
pattern = r"<!-- s(.*?) --><img src=\"\{SMILIES_PATH\}/(.*?) /><!-- s\1 -->"
|
||
return re.sub(pattern, r'<img class="smiley" src="/smilies/\2 />', text)
|
||
|
||
@classmethod
|
||
def _parse_spoiler(cls, text: str) -> str:
|
||
"""解析 [spoiler] 标签"""
|
||
text = re.sub(r"\[spoiler\]", "<span class='spoiler'>", text, flags=re.IGNORECASE)
|
||
text = re.sub(r"\[/spoiler\]", "</span>", text, flags=re.IGNORECASE)
|
||
return text
|
||
|
||
@classmethod
|
||
def _parse_strike(cls, text: str) -> str:
|
||
"""解析 [s] 和 [strike] 标签"""
|
||
text = re.sub(r"\[s\]", "<del>", text, flags=re.IGNORECASE)
|
||
text = re.sub(r"\[/s\]", "</del>", text, flags=re.IGNORECASE)
|
||
text = re.sub(r"\[strike\]", "<del>", text, flags=re.IGNORECASE)
|
||
text = re.sub(r"\[/strike\]", "</del>", text, flags=re.IGNORECASE)
|
||
return text
|
||
|
||
@classmethod
|
||
def _parse_underline(cls, text: str) -> str:
|
||
"""解析 [u] 标签"""
|
||
text = re.sub(r"\[u\]", "<u>", text, flags=re.IGNORECASE)
|
||
text = re.sub(r"\[/u\]", "</u>", text, flags=re.IGNORECASE)
|
||
return text
|
||
|
||
@classmethod
|
||
def _parse_url(cls, text: str) -> str:
|
||
"""解析 [url] 标签"""
|
||
# [url]http://example.com[/url]
|
||
pattern1 = r"\[url\]([^\[]+)\[/url\]"
|
||
text = re.sub(pattern1, r'<a rel="nofollow" href="\1">\1</a>', text, flags=re.IGNORECASE)
|
||
|
||
# [url=http://example.com]text[/url]
|
||
pattern2 = r"\[url=([^\]]+)\](.*?)\[/url\]"
|
||
text = re.sub(pattern2, r'<a rel="nofollow" href="\1">\2</a>', text, flags=re.IGNORECASE)
|
||
|
||
return text
|
||
|
||
@classmethod
|
||
def _parse_youtube(cls, text: str) -> str:
|
||
"""解析 [youtube] 标签"""
|
||
pattern = r"\[youtube\]([a-zA-Z0-9_-]{11})\[/youtube\]"
|
||
|
||
def replace_youtube(match):
|
||
video_id = match.group(1)
|
||
return (
|
||
f"<iframe class='u-embed-wide u-embed-wide--bbcode' "
|
||
f"src='https://www.youtube.com/embed/{video_id}?rel=0' allowfullscreen></iframe>"
|
||
)
|
||
|
||
return re.sub(pattern, replace_youtube, text, flags=re.IGNORECASE)
|
||
|
||
@classmethod
|
||
def sanitize_html(cls, html_content: str) -> str:
|
||
"""
|
||
清理HTML内容,移除危险标签和属性
|
||
基于 osu-web 的安全策略
|
||
|
||
Args:
|
||
html_content: 要清理的HTML内容
|
||
|
||
Returns:
|
||
清理后的安全HTML
|
||
"""
|
||
if not html_content:
|
||
return ""
|
||
|
||
# 使用bleach清理HTML,配置CSS清理器以允许安全的样式
|
||
css_sanitizer = CSSSanitizer(
|
||
allowed_css_properties=[
|
||
"color",
|
||
"background",
|
||
"background-color",
|
||
"font-size",
|
||
"font-weight",
|
||
"font-style",
|
||
"text-decoration",
|
||
"text-align",
|
||
"left",
|
||
"top",
|
||
"width",
|
||
"height",
|
||
"position",
|
||
"margin",
|
||
"padding",
|
||
"max-width",
|
||
"max-height",
|
||
"aspect-ratio",
|
||
"z-index",
|
||
"display",
|
||
"border",
|
||
"border-none",
|
||
"cursor",
|
||
]
|
||
)
|
||
|
||
cleaned = bleach.clean(
|
||
html_content,
|
||
tags=cls.ALLOWED_TAGS,
|
||
attributes=cls.ALLOWED_ATTRIBUTES,
|
||
protocols=["http", "https", "mailto"],
|
||
css_sanitizer=css_sanitizer,
|
||
strip=True,
|
||
)
|
||
|
||
return cleaned
|
||
|
||
@classmethod
|
||
def process_userpage_content(cls, raw_content: str, max_length: int = 60000) -> dict[str, str]:
|
||
"""
|
||
处理用户页面内容
|
||
基于 osu-web 的处理流程
|
||
|
||
Args:
|
||
raw_content: 原始BBCode内容
|
||
max_length: 最大允许长度(字符数,支持多字节字符)
|
||
|
||
Returns:
|
||
包含raw和html两个版本的字典
|
||
"""
|
||
# 检查内容是否为空或仅包含空白字符
|
||
if not raw_content or not raw_content.strip():
|
||
raise ContentEmptyError()
|
||
|
||
# 检查长度限制(Python的len()本身支持Unicode字符计数)
|
||
content_length = len(raw_content)
|
||
if content_length > max_length:
|
||
raise ContentTooLongError(content_length, max_length)
|
||
|
||
# 检查是否包含禁止的标签
|
||
content_lower = raw_content.lower()
|
||
for forbidden_tag in cls.FORBIDDEN_TAGS:
|
||
if f"[{forbidden_tag}" in content_lower or f"<{forbidden_tag}" in content_lower:
|
||
raise ForbiddenTagError(forbidden_tag)
|
||
|
||
# 转换BBCode为HTML
|
||
html_content = cls.parse_bbcode(raw_content)
|
||
|
||
# 清理HTML
|
||
safe_html = cls.sanitize_html(html_content)
|
||
|
||
# 包装在 bbcode 容器中
|
||
final_html = f'<div class="bbcode">{safe_html}</div>'
|
||
|
||
return {"raw": raw_content, "html": final_html}
|
||
|
||
@classmethod
|
||
def validate_bbcode(cls, content: str) -> list[str]:
|
||
"""
|
||
验证BBCode语法并返回错误列表
|
||
基于 osu-web 的验证逻辑
|
||
|
||
Args:
|
||
content: 要验证的BBCode内容
|
||
|
||
Returns:
|
||
错误消息列表
|
||
"""
|
||
errors = []
|
||
|
||
# 检查内容是否仅包含引用(参考官方逻辑)
|
||
content_without_quotes = cls._remove_block_quotes(content)
|
||
if content.strip() and not content_without_quotes.strip():
|
||
errors.append("Content cannot contain only quotes")
|
||
|
||
# 检查标签配对
|
||
tag_stack = []
|
||
tag_pattern = r"\[(/?)(\w+)(?:=[^\]]+)?\]"
|
||
|
||
for match in re.finditer(tag_pattern, content, re.IGNORECASE):
|
||
is_closing = match.group(1) == "/"
|
||
tag_name = match.group(2).lower()
|
||
|
||
if is_closing:
|
||
if not tag_stack:
|
||
errors.append(f"Closing tag '[/{tag_name}]' without opening tag")
|
||
elif tag_stack[-1] != tag_name:
|
||
errors.append(f"Mismatched closing tag '[/{tag_name}]', expected '[/{tag_stack[-1]}]'")
|
||
else:
|
||
tag_stack.pop()
|
||
else:
|
||
# 特殊处理自闭合标签(只有列表项 * 是真正的自闭合)
|
||
if tag_name not in ["*"]:
|
||
tag_stack.append(tag_name)
|
||
|
||
# 检查未关闭的标签
|
||
for unclosed_tag in tag_stack:
|
||
errors.append(f"Unclosed tag '[{unclosed_tag}]'")
|
||
|
||
return errors
|
||
|
||
@classmethod
|
||
def _remove_block_quotes(cls, text: str) -> str:
|
||
"""
|
||
移除引用块(参考 osu-web BBCodeFromDB::removeBlockQuotes)
|
||
|
||
Args:
|
||
text: 原始文本
|
||
|
||
Returns:
|
||
移除引用后的文本
|
||
"""
|
||
# 基于官方实现的简化版本
|
||
# 移除 [quote]...[/quote] 和 [quote=author]...[/quote]
|
||
pattern = r"\[quote(?:=[^\]]+)?\].*?\[/quote\]"
|
||
result = re.sub(pattern, "", text, flags=re.DOTALL | re.IGNORECASE)
|
||
return result.strip()
|
||
|
||
@classmethod
|
||
def remove_bbcode_tags(cls, text: str) -> str:
|
||
"""
|
||
移除所有BBCode标签,只保留纯文本
|
||
用于搜索索引等场景
|
||
基于官方实现
|
||
"""
|
||
# 基于官方实现的完整BBCode标签模式
|
||
pattern = (
|
||
r"\[/?(\*|\*:m|audio|b|box|color|spoilerbox|centre|center|code|email|heading|i|img|"
|
||
r"list|list:o|list:u|notice|profile|quote|s|strike|u|spoiler|size|url|youtube|c)"
|
||
r"(=.*?(?=:))?(:[a-zA-Z0-9]{1,5})?\]"
|
||
)
|
||
|
||
return re.sub(pattern, "", text)
|
||
|
||
|
||
# 服务实例
|
||
bbcode_service = BBCodeService()
|