From 0b43f2aad815dd5d0a5209f6a64b49949060b0ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=92=95=E8=B0=B7=E9=85=B1?= Date: Tue, 16 Sep 2025 00:33:46 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81=E4=B8=AA=E4=BA=BA=E4=BB=8B?= =?UTF-8?q?=E7=BB=8D=E7=BC=96=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + app/exceptions/userpage.py | 44 +++ app/models/userpage.py | 61 ++++ app/router/v2/me.py | 102 +++++- app/service/bbcode_service.py | 592 +++++++++++++++++++++++++++++++++ pyproject.toml | 2 + tools/achievement_images_dl.py | 3 +- uv.lock | 37 +++ 8 files changed, 840 insertions(+), 2 deletions(-) create mode 100644 app/exceptions/userpage.py create mode 100644 app/models/userpage.py create mode 100644 app/service/bbcode_service.py diff --git a/.gitignore b/.gitignore index 043fa6e..e491cc1 100644 --- a/.gitignore +++ b/.gitignore @@ -222,3 +222,4 @@ newrelic.ini logs/ osu-server-spectator-master/* spectator-server/ +.github/copilot-instructions.md \ No newline at end of file diff --git a/app/exceptions/userpage.py b/app/exceptions/userpage.py new file mode 100644 index 0000000..a0a8e60 --- /dev/null +++ b/app/exceptions/userpage.py @@ -0,0 +1,44 @@ +""" +用户页面相关的异常类 +""" + +from __future__ import annotations + + +class UserpageError(Exception): + """用户页面处理错误基类""" + def __init__(self, message: str, code: str = "userpage_error"): + self.message = message + self.code = code + super().__init__(message) + + +class ContentTooLongError(UserpageError): + """内容过长错误""" + def __init__(self, current_length: int, max_length: int): + message = f"Content too long. Maximum {max_length} characters allowed, got {current_length}." + super().__init__(message, "content_too_long") + self.current_length = current_length + self.max_length = max_length + + +class ContentEmptyError(UserpageError): + """内容为空错误""" + def __init__(self): + super().__init__("Content cannot be empty.", "content_empty") + + +class BBCodeValidationError(UserpageError): + """BBCode验证错误""" + def __init__(self, errors: list[str]): + message = f"BBCode validation failed: {'; '.join(errors)}" + super().__init__(message, "bbcode_validation_error") + self.errors = errors + + +class ForbiddenTagError(UserpageError): + """禁止标签错误""" + def __init__(self, tag: str): + message = f"Forbidden tag '{tag}' is not allowed." + super().__init__(message, "forbidden_tag") + self.tag = tag diff --git a/app/models/userpage.py b/app/models/userpage.py new file mode 100644 index 0000000..1981105 --- /dev/null +++ b/app/models/userpage.py @@ -0,0 +1,61 @@ +""" +用户页面编辑相关的API模型 +""" + +from __future__ import annotations + +from pydantic import BaseModel, Field, field_validator + + +class UpdateUserpageRequest(BaseModel): + """更新用户页面请求模型(匹配官方osu-web格式)""" + + body: str = Field( + description="用户页面的BBCode原始内容", + max_length=60000, + examples=["[b]Hello![/b] This is my profile page.\n[color=blue]Blue text[/color]"] + ) + + @field_validator("body") + @classmethod + def validate_body_content(cls, v: str) -> str: + """验证原始内容""" + if not v.strip(): + return "" + + # 基本长度验证 + if len(v) > 60000: + msg = "Content too long. Maximum 60000 characters allowed." + raise ValueError(msg) + + return v + + +class UpdateUserpageResponse(BaseModel): + """更新用户页面响应模型(匹配官方osu-web格式)""" + + html: str = Field(description="处理后的HTML内容") + + +class UserpageResponse(BaseModel): + """用户页面响应模型(包含html和raw,匹配官方格式)""" + + html: str = Field(description="处理后的HTML内容") + raw: str = Field(description="原始BBCode内容") + + +class ValidateBBCodeRequest(BaseModel): + """验证BBCode请求模型""" + + content: str = Field( + description="要验证的BBCode内容", + max_length=60000 + ) + + +class ValidateBBCodeResponse(BaseModel): + """验证BBCode响应模型""" + + valid: bool = Field(description="BBCode是否有效") + errors: list[str] = Field(default_factory=list, description="错误列表") + preview: dict[str, str] = Field(description="预览内容") diff --git a/app/router/v2/me.py b/app/router/v2/me.py index 16d0c40..3ff90e3 100644 --- a/app/router/v2/me.py +++ b/app/router/v2/me.py @@ -4,12 +4,21 @@ from app.database import User from app.database.lazer_user import ALL_INCLUDED from app.dependencies import get_current_user from app.dependencies.database import Database +from app.exceptions.userpage import UserpageError from app.models.api_me import APIMe from app.models.score import GameMode +from app.models.user import Page +from app.models.userpage import ( + UpdateUserpageRequest, + UpdateUserpageResponse, + ValidateBBCodeRequest, + ValidateBBCodeResponse, +) +from app.service.bbcode_service import bbcode_service from .router import router -from fastapi import Path, Security +from fastapi import HTTPException, Path, Security @router.get( @@ -51,3 +60,94 @@ async def get_user_info_default( None, ) return user_resp + + +# @router.get( +# "/users/{user_id}/page", +# response_model=UserpageResponse, +# name="获取用户页面", +# description="获取指定用户的个人页面内容。匹配官方osu-web API格式。", +# tags=["用户"], +# ) +# async def get_userpage( +# session: Database, +# user_id: int = Path(description="用户ID"), +# ): +# """获取用户页面内容""" +# # 查找用户 +# user = await session.get(User, user_id) +# if not user: +# raise HTTPException(status_code=404, detail={"error": "User not found"}) + +# # 返回页面内容 +# if user.page: +# return UserpageResponse(html=user.page.get("html", ""), raw=user.page.get("raw", "")) +# else: +# return UserpageResponse(html="", raw="") + + +@router.put( + "/users/{user_id}/page", + response_model=UpdateUserpageResponse, + name="更新用户页面", + description="更新指定用户的个人页面内容(支持BBCode)。匹配官方osu-web API格式。", + tags=["用户"], +) +async def update_userpage( + request: UpdateUserpageRequest, + session: Database, + user_id: int = Path(description="用户ID"), + current_user: User = Security(get_current_user, scopes=["edit"]), +): + """更新用户页面内容(匹配官方osu-web实现)""" + # 检查权限:只能编辑自己的页面(除非是管理员) + if user_id != current_user.id: + raise HTTPException(status_code=403, detail={"error": "Access denied"}) + + try: + # 处理BBCode内容 + processed_page = bbcode_service.process_userpage_content(request.body) + + # 更新数据库 - 直接更新用户对象 + current_user.page = Page(html=processed_page["html"], raw=processed_page["raw"]) + session.add(current_user) + await session.commit() + await session.refresh(current_user) + + # 返回官方格式的响应:只包含html + return UpdateUserpageResponse(html=processed_page["html"]) + + except UserpageError as e: + # 使用官方格式的错误响应:{'error': message} + raise HTTPException(status_code=422, detail={"error": e.message}) + except Exception: + raise HTTPException(status_code=500, detail={"error": "Failed to update user page"}) + + +@router.post( + "/me/validate-bbcode", + response_model=ValidateBBCodeResponse, + name="验证BBCode", + description="验证BBCode语法并返回预览。", + tags=["用户"], +) +async def validate_bbcode( + request: ValidateBBCodeRequest, +): + """验证BBCode语法""" + try: + # 验证BBCode语法 + errors = bbcode_service.validate_bbcode(request.content) + + # 生成预览(如果没有严重错误) + if len(errors) == 0: + preview = bbcode_service.process_userpage_content(request.content) + else: + preview = {"raw": request.content, "html": ""} + + return ValidateBBCodeResponse(valid=len(errors) == 0, errors=errors, preview=preview) + + except UserpageError as e: + return ValidateBBCodeResponse(valid=False, errors=[e.message], preview={"raw": request.content, "html": ""}) + except Exception: + raise HTTPException(status_code=500, detail={"error": "Failed to validate BBCode"}) diff --git a/app/service/bbcode_service.py b/app/service/bbcode_service.py new file mode 100644 index 0000000..3f4089b --- /dev/null +++ b/app/service/bbcode_service.py @@ -0,0 +1,592 @@ +""" +BBCode处理服务 +基于 osu-web 官方实现的 BBCode 解析器 +支持所有 osu! 官方 BBCode 标签 +""" + +from __future__ import annotations + +import html +import re +from typing import ClassVar + +from app.exceptions.userpage import ( + ContentEmptyError, + ContentTooLongError, + ForbiddenTagError, +) + +import bleach +from bleach.css_sanitizer import CSSSanitizer + + +class BBCodeService: + """BBCode处理服务类 - 基于 osu-web 官方实现""" + + # 允许的HTML标签和属性 - 基于官方实现 + ALLOWED_TAGS: ClassVar[list[str]] = [ + "a", "audio", "blockquote", "br", "center", "code", "del", "div", "em", "h2", "h4", + "iframe", "img", "li", "ol", "p", "pre", "span", "strong", "u", "ul", + # imagemap 相关 + "map", "area", + # 自定义容器 + "details", "summary", + ] + + ALLOWED_ATTRIBUTES: ClassVar[dict[str, list[str]]] = { + "a": ["href", "rel", "class", "data-user-id", "target", "style", "title"], + "audio": ["controls", "preload", "src"], + "blockquote": [], + "center": [], + "code": [], + "div": ["class", "style"], + "details": ["class"], + "h2": [], + "h4": [], + "iframe": ["class", "src", "allowfullscreen", "width", "height", "frameborder"], + "img": ["class", "loading", "src", "width", "height", "usemap", "alt", "style"], + "map": ["name"], + "area": ["href", "style", "title", "class"], + "ol": ["class"], + "span": ["class", "style", "title"], + "summary": [], + "ul": ["class"], + "*": ["class"], + } + + # 危险的BBCode标签(不允许) + FORBIDDEN_TAGS: ClassVar[list[str]] = [ + "script", "iframe", "object", "embed", "form", "input", "textarea", "button", + "select", "option", "meta", "link", "style", "title", "head", "html", "body", + ] + + @classmethod + def parse_bbcode(cls, text: str) -> str: + """ + 解析BBCode文本并转换为HTML + 基于 osu-web BBCodeFromDB.php 的实现 + + Args: + text: 包含BBCode的原始文本 + + Returns: + 转换后的HTML字符串 + """ + if not text: + return "" + + # 预处理:转义HTML实体 + text = html.escape(text) + + # 按照 osu-web 的解析顺序进行处理 + # 块级标签处理 + text = cls._parse_imagemap(text) + text = cls._parse_box(text) + text = cls._parse_code(text) + text = cls._parse_list(text) + text = cls._parse_notice(text) + text = cls._parse_quote(text) + text = cls._parse_heading(text) + + # 行内标签处理 + text = cls._parse_audio(text) + text = cls._parse_bold(text) + text = cls._parse_centre(text) + text = cls._parse_inline_code(text) + text = cls._parse_colour(text) + text = cls._parse_email(text) + text = cls._parse_image(text) + text = cls._parse_italic(text) + text = cls._parse_size(text) + text = cls._parse_smilies(text) + text = cls._parse_spoiler(text) + text = cls._parse_strike(text) + text = cls._parse_underline(text) + text = cls._parse_url(text) + text = cls._parse_youtube(text) + text = cls._parse_profile(text) + + # 换行处理 + text = text.replace("\n", "
") + + return text + + @classmethod + def _parse_audio(cls, text: str) -> str: + """解析 [audio] 标签""" + pattern = r"\[audio\]([^\[]+)\[/audio\]" + + def replace_audio(match): + url = match.group(1).strip() + return f'' + + return re.sub(pattern, replace_audio, text, flags=re.IGNORECASE) + + @classmethod + def _parse_bold(cls, text: str) -> str: + """解析 [b] 标签""" + text = re.sub(r"\[b\]", "", text, flags=re.IGNORECASE) + text = re.sub(r"\[/b\]", "", text, flags=re.IGNORECASE) + return text + + @classmethod + def _parse_box(cls, text: str) -> str: + """解析 [box] 和 [spoilerbox] 标签""" + # [box=title] 格式 + pattern = r"\[box=([^\]]+)\](.*?)\[/box\]" + + def replace_box_with_title(match): + title = match.group(1) + content = match.group(2) + return ( + f"
" + f"" + f"{title}" + f"
{content}
" + ) + + text = re.sub(pattern, replace_box_with_title, text, flags=re.DOTALL | re.IGNORECASE) + + # [spoilerbox] 格式 + pattern = r"\[spoilerbox\](.*?)\[/spoilerbox\]" + + def replace_spoilerbox(match): + content = match.group(1) + return ( + f"
" + f"" + f"SPOILER" + f"
{content}
" + ) + + return re.sub(pattern, replace_spoilerbox, text, flags=re.DOTALL | re.IGNORECASE) + + @classmethod + def _parse_centre(cls, text: str) -> str: + """解析 [centre] 标签""" + text = re.sub(r"\[centre\]", "
", text, flags=re.IGNORECASE) + text = re.sub(r"\[/centre\]", "
", text, flags=re.IGNORECASE) + text = re.sub(r"\[center\]", "
", text, flags=re.IGNORECASE) + text = re.sub(r"\[/center\]", "
", text, flags=re.IGNORECASE) + return text + + @classmethod + def _parse_code(cls, text: str) -> str: + """解析 [code] 标签""" + pattern = r"\[code\]\n*(.*?)\n*\[/code\]" + return re.sub(pattern, r"
\1
", text, flags=re.DOTALL | re.IGNORECASE) + + @classmethod + def _parse_colour(cls, text: str) -> str: + """解析 [color] 标签""" + pattern = r"\[color=([^\]]+)\](.*?)\[/color\]" + return re.sub(pattern, r'\2', text, flags=re.IGNORECASE) + + @classmethod + def _parse_email(cls, text: str) -> str: + """解析 [email] 标签""" + # [email]email@example.com[/email] + pattern1 = r"\[email\]([^\[]+)\[/email\]" + text = re.sub(pattern1, r'\1', text, flags=re.IGNORECASE) + + # [email=email@example.com]text[/email] + pattern2 = r"\[email=([^\]]+)\](.*?)\[/email\]" + text = re.sub(pattern2, r'\2', text, flags=re.IGNORECASE) + + return text + + @classmethod + def _parse_heading(cls, text: str) -> str: + """解析 [heading] 标签""" + pattern = r"\[heading\](.*?)\[/heading\]" + return re.sub(pattern, r"

\1

", text, flags=re.IGNORECASE) + + @classmethod + def _parse_image(cls, text: str) -> str: + """解析 [img] 标签""" + pattern = r"\[img\]([^\[]+)\[/img\]" + + def replace_image(match): + url = match.group(1).strip() + # TODO: 可以在这里添加图片代理支持 + # 生成带有懒加载的图片标签 + return f'' + + return re.sub(pattern, replace_image, text, flags=re.IGNORECASE) + + @classmethod + def _parse_imagemap(cls, text: str) -> str: + """ + 解析 [imagemap] 标签 + 基于 osu-web BBCodeFromDB.php 的实现 + """ + pattern = r"\[imagemap\]\s*\n([^\s\n]+)\s*\n((?:[0-9.]+ [0-9.]+ [0-9.]+ [0-9.]+ (?:#|https?://[^\s]+|mailto:[^\s]+)[^\n]*\n?)+)\[/imagemap\]" + + def replace_imagemap(match): + image_url = match.group(1).strip() + links_data = match.group(2).strip() + + if not links_data: + return f'' + + # 解析链接数据 + links = [] + for line in links_data.split("\n"): + line = line.strip() + if not line: + continue + + # 按空格分割,最多分成6部分(前5个是数字和URL,第6个是标题) + parts = line.split(" ", 5) + if len(parts) >= 5: + try: + left = float(parts[0]) + top = float(parts[1]) + width = float(parts[2]) + height = float(parts[3]) + href = parts[4] + # 标题可能包含空格,所以重新组合 + title = parts[5] if len(parts) > 5 else "" + + # 构建样式 + style = f"left: {left}%; top: {top}%; width: {width}%; height: {height}%;" + + if href == "#": + # 无链接区域 + links.append( + f'' + ) + else: + # 有链接区域 + links.append( + f'' + ) + except (ValueError, IndexError): + continue + + if links: + links_html = "".join(links) + # 基于官方实现的图片标签 + image_html = ( + f'' + ) + # 使用imagemap容器 + return f'
{image_html}{links_html}
' + else: + return f'' + + return re.sub(pattern, replace_imagemap, text, flags=re.DOTALL | re.IGNORECASE) + + @classmethod + def _parse_italic(cls, text: str) -> str: + """解析 [i] 标签""" + text = re.sub(r"\[i\]", "", text, flags=re.IGNORECASE) + text = re.sub(r"\[/i\]", "", text, flags=re.IGNORECASE) + return text + + @classmethod + def _parse_inline_code(cls, text: str) -> str: + """解析 [c] 内联代码标签""" + text = re.sub(r"\[c\]", "", text, flags=re.IGNORECASE) + text = re.sub(r"\[/c\]", "", text, flags=re.IGNORECASE) + return text + + @classmethod + def _parse_list(cls, text: str) -> str: + """解析 [list] 标签""" + # 有序列表 + pattern = r"\[list=1\](.*?)\[/list\]" + text = re.sub(pattern, r"
    \1
", text, flags=re.DOTALL | re.IGNORECASE) + + # 无序列表 + pattern = r"\[list\](.*?)\[/list\]" + text = re.sub(pattern, r"
    \1
", text, flags=re.DOTALL | re.IGNORECASE) + + # 列表项 + pattern = r"\[\*\]\s*(.*?)(?=\[\*\]|\[/list\]|$)" + text = re.sub(pattern, r"
  • \1
  • ", text, flags=re.DOTALL | re.IGNORECASE) + + return text + + @classmethod + def _parse_notice(cls, text: str) -> str: + """解析 [notice] 标签""" + pattern = r"\[notice\]\n*(.*?)\n*\[/notice\]" + return re.sub(pattern, r'
    \1
    ', text, flags=re.DOTALL | re.IGNORECASE) + + @classmethod + def _parse_profile(cls, text: str) -> str: + """解析 [profile] 标签""" + pattern = r"\[profile(?:=(\d+))?\](.*?)\[/profile\]" + + def replace_profile(match): + user_id = match.group(1) + username = match.group(2) + + if user_id: + return f'{username}' + else: + return f'@{username}' + + return re.sub(pattern, replace_profile, text, flags=re.IGNORECASE) + + @classmethod + def _parse_quote(cls, text: str) -> str: + """解析 [quote] 标签""" + # [quote="author"]content[/quote] + pattern1 = r'\[quote="([^"]+)"\]\s*(.*?)\s*\[/quote\]' + text = re.sub(pattern1, r"

    \1 wrote:

    \2
    ", text, + flags=re.DOTALL | re.IGNORECASE) + + # [quote]content[/quote] + pattern2 = r"\[quote\]\s*(.*?)\s*\[/quote\]" + text = re.sub(pattern2, r"
    \1
    ", text, flags=re.DOTALL | re.IGNORECASE) + + return text + + @classmethod + def _parse_size(cls, text: str) -> str: + """解析 [size] 标签""" + + def replace_size(match): + size = int(match.group(1)) + # 限制字体大小范围 (30-200%) + size = max(30, min(200, size)) + return f'' + + pattern = r"\[size=(\d+)\]" + text = re.sub(pattern, replace_size, text, flags=re.IGNORECASE) + text = re.sub(r"\[/size\]", "", text, flags=re.IGNORECASE) + + return text + + @classmethod + def _parse_smilies(cls, text: str) -> str: + """解析表情符号标签""" + # 处理 phpBB 风格的表情符号标记 + pattern = r"" + return re.sub(pattern, r'", text, flags=re.IGNORECASE) + text = re.sub(r"\[/spoiler\]", "", text, flags=re.IGNORECASE) + return text + + @classmethod + def _parse_strike(cls, text: str) -> str: + """解析 [s] 和 [strike] 标签""" + text = re.sub(r"\[s\]", "", text, flags=re.IGNORECASE) + text = re.sub(r"\[/s\]", "", text, flags=re.IGNORECASE) + text = re.sub(r"\[strike\]", "", text, flags=re.IGNORECASE) + text = re.sub(r"\[/strike\]", "", text, flags=re.IGNORECASE) + return text + + @classmethod + def _parse_underline(cls, text: str) -> str: + """解析 [u] 标签""" + text = re.sub(r"\[u\]", "", text, flags=re.IGNORECASE) + text = re.sub(r"\[/u\]", "", text, flags=re.IGNORECASE) + return text + + @classmethod + def _parse_url(cls, text: str) -> str: + """解析 [url] 标签""" + # [url]http://example.com[/url] + pattern1 = r"\[url\]([^\[]+)\[/url\]" + text = re.sub(pattern1, r'\1', text, flags=re.IGNORECASE) + + # [url=http://example.com]text[/url] + pattern2 = r"\[url=([^\]]+)\](.*?)\[/url\]" + text = re.sub(pattern2, r'\2', text, flags=re.IGNORECASE) + + return text + + @classmethod + def _parse_youtube(cls, text: str) -> str: + """解析 [youtube] 标签""" + pattern = r"\[youtube\]([a-zA-Z0-9_-]{11})\[/youtube\]" + + def replace_youtube(match): + video_id = match.group(1) + return ( + f"" + ) + + return re.sub(pattern, replace_youtube, text, flags=re.IGNORECASE) + + @classmethod + def sanitize_html(cls, html_content: str) -> str: + """ + 清理HTML内容,移除危险标签和属性 + 基于 osu-web 的安全策略 + + Args: + html_content: 要清理的HTML内容 + + Returns: + 清理后的安全HTML + """ + if not html_content: + return "" + + # 使用bleach清理HTML,配置CSS清理器以允许安全的样式 + css_sanitizer = CSSSanitizer( + allowed_css_properties=[ + "color", + "background-color", + "font-size", + "font-weight", + "font-style", + "text-decoration", + "text-align", + "left", + "top", + "width", + "height", + "position", + "margin", + "padding", + "max-width", + "max-height", + "aspect-ratio", + "z-index", + "display", + ] + ) + + cleaned = bleach.clean( + html_content, + tags=cls.ALLOWED_TAGS, + attributes=cls.ALLOWED_ATTRIBUTES, + protocols=["http", "https", "mailto"], + css_sanitizer=css_sanitizer, + strip=True, + ) + + return cleaned + + @classmethod + def process_userpage_content(cls, raw_content: str, max_length: int = 60000) -> dict[str, str]: + """ + 处理用户页面内容 + 基于 osu-web 的处理流程 + + Args: + raw_content: 原始BBCode内容 + max_length: 最大允许长度(字符数,支持多字节字符) + + Returns: + 包含raw和html两个版本的字典 + """ + # 检查内容是否为空或仅包含空白字符 + if not raw_content or not raw_content.strip(): + raise ContentEmptyError() + + # 检查长度限制(Python的len()本身支持Unicode字符计数) + content_length = len(raw_content) + if content_length > max_length: + raise ContentTooLongError(content_length, max_length) + + # 检查是否包含禁止的标签 + content_lower = raw_content.lower() + for forbidden_tag in cls.FORBIDDEN_TAGS: + if f"[{forbidden_tag}" in content_lower or f"<{forbidden_tag}" in content_lower: + raise ForbiddenTagError(forbidden_tag) + + # 转换BBCode为HTML + html_content = cls.parse_bbcode(raw_content) + + # 清理HTML + safe_html = cls.sanitize_html(html_content) + + # 包装在 bbcode 容器中 + final_html = f'
    {safe_html}
    ' + + return {"raw": raw_content, "html": final_html} + + @classmethod + def validate_bbcode(cls, content: str) -> list[str]: + """ + 验证BBCode语法并返回错误列表 + 基于 osu-web 的验证逻辑 + + Args: + content: 要验证的BBCode内容 + + Returns: + 错误消息列表 + """ + errors = [] + + # 检查内容是否仅包含引用(参考官方逻辑) + content_without_quotes = cls._remove_block_quotes(content) + if content.strip() and not content_without_quotes.strip(): + errors.append("Content cannot contain only quotes") + + # 检查标签配对 + tag_stack = [] + tag_pattern = r"\[(/?)(\w+)(?:=[^\]]+)?\]" + + for match in re.finditer(tag_pattern, content, re.IGNORECASE): + is_closing = match.group(1) == "/" + tag_name = match.group(2).lower() + + if is_closing: + if not tag_stack: + errors.append(f"Closing tag '[/{tag_name}]' without opening tag") + elif tag_stack[-1] != tag_name: + errors.append(f"Mismatched closing tag '[/{tag_name}]', expected '[/{tag_stack[-1]}]'") + else: + tag_stack.pop() + else: + # 特殊处理自闭合标签(只有列表项 * 是真正的自闭合) + if tag_name not in ["*"]: + tag_stack.append(tag_name) + + # 检查未关闭的标签 + for unclosed_tag in tag_stack: + errors.append(f"Unclosed tag '[{unclosed_tag}]'") + + return errors + + @classmethod + def _remove_block_quotes(cls, text: str) -> str: + """ + 移除引用块(参考 osu-web BBCodeFromDB::removeBlockQuotes) + + Args: + text: 原始文本 + + Returns: + 移除引用后的文本 + """ + # 基于官方实现的简化版本 + # 移除 [quote]...[/quote] 和 [quote=author]...[/quote] + pattern = r"\[quote(?:=[^\]]+)?\].*?\[/quote\]" + result = re.sub(pattern, "", text, flags=re.DOTALL | re.IGNORECASE) + return result.strip() + + @classmethod + def remove_bbcode_tags(cls, text: str) -> str: + """ + 移除所有BBCode标签,只保留纯文本 + 用于搜索索引等场景 + 基于官方实现 + """ + # 基于官方实现的完整BBCode标签模式 + pattern = ( + r"\[/?(\*|\*:m|audio|b|box|color|spoilerbox|centre|center|code|email|heading|i|img|" + r"list|list:o|list:u|notice|profile|quote|s|strike|u|spoiler|size|url|youtube|c)" + r"(=.*?(?=:))?(:[a-zA-Z0-9]{1,5})?\]" + ) + + return re.sub(pattern, "", text) + + +# 服务实例 +bbcode_service = BBCodeService() diff --git a/pyproject.toml b/pyproject.toml index eda1458..2a51831 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ dependencies = [ "alembic>=1.12.1", "apscheduler>=3.11.0", "bcrypt>=4.1.2", + "bleach>=6.1.0", "cryptography>=41.0.7", "fastapi>=0.104.1", "fastapi-limiter>=0.1.6", @@ -32,6 +33,7 @@ dependencies = [ "sentry-sdk[fastapi,httpx,loguru,sqlalchemy]>=2.34.1", "sqlalchemy>=2.0.23", "sqlmodel>=0.0.24", + "tinycss2>=1.4.0", "uvicorn[standard]>=0.24.0", ] authors = [{ name = "GooGuTeam" }] diff --git a/tools/achievement_images_dl.py b/tools/achievement_images_dl.py index 4ec7a0f..d7d7ade 100644 --- a/tools/achievement_images_dl.py +++ b/tools/achievement_images_dl.py @@ -7,9 +7,10 @@ download_achievements.py from __future__ import annotations -import httpx from pathlib import Path +import httpx + def download_achievement_images(achievements_path: Path) -> None: """Download all used achievement images (one by one, from osu!).""" diff --git a/uv.lock b/uv.lock index 799d3d0..29f2cd1 100644 --- a/uv.lock +++ b/uv.lock @@ -255,6 +255,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a9/cf/45fb5261ece3e6b9817d3d82b2f343a505fd58674a92577923bc500bd1aa/bcrypt-4.3.0-cp39-abi3-win_amd64.whl", hash = "sha256:e53e074b120f2877a35cc6c736b8eb161377caae8925c17688bd46ba56daaa5b", size = 152799, upload-time = "2025-02-28T01:23:53.139Z" }, ] +[[package]] +name = "bleach" +version = "6.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "webencodings" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/76/9a/0e33f5054c54d349ea62c277191c020c2d6ef1d65ab2cb1993f91ec846d1/bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f", size = 203083, upload-time = "2024-10-29T18:30:40.477Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/55/96142937f66150805c25c4d0f31ee4132fd33497753400734f9dfdcbdc66/bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e", size = 163406, upload-time = "2024-10-29T18:30:38.186Z" }, +] + [[package]] name = "boto3" version = "1.39.11" @@ -576,6 +588,7 @@ dependencies = [ { name = "alembic" }, { name = "apscheduler" }, { name = "bcrypt" }, + { name = "bleach" }, { name = "cryptography" }, { name = "fastapi" }, { name = "fastapi-limiter" }, @@ -597,6 +610,7 @@ dependencies = [ { name = "sentry-sdk", extra = ["fastapi", "httpx", "loguru", "sqlalchemy"] }, { name = "sqlalchemy" }, { name = "sqlmodel" }, + { name = "tinycss2" }, { name = "uvicorn", extra = ["standard"] }, ] @@ -617,6 +631,7 @@ requires-dist = [ { name = "alembic", specifier = ">=1.12.1" }, { name = "apscheduler", specifier = ">=3.11.0" }, { name = "bcrypt", specifier = ">=4.1.2" }, + { name = "bleach", specifier = ">=6.1.0" }, { name = "cryptography", specifier = ">=41.0.7" }, { name = "fastapi", specifier = ">=0.104.1" }, { name = "fastapi-limiter", specifier = ">=0.1.6" }, @@ -638,6 +653,7 @@ requires-dist = [ { name = "sentry-sdk", extras = ["fastapi", "httpx", "loguru", "sqlalchemy"], specifier = ">=2.34.1" }, { name = "sqlalchemy", specifier = ">=2.0.23" }, { name = "sqlmodel", specifier = ">=0.0.24" }, + { name = "tinycss2", specifier = ">=1.4.0" }, { name = "uvicorn", extras = ["standard"], specifier = ">=0.24.0" }, ] @@ -1542,6 +1558,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ce/fd/901cfa59aaa5b30a99e16876f11abe38b59a1a2c51ffb3d7142bb6089069/starlette-0.47.3-py3-none-any.whl", hash = "sha256:89c0778ca62a76b826101e7c709e70680a1699ca7da6b44d38eb0a7e61fe4b51", size = 72991, upload-time = "2025-08-24T13:36:40.887Z" }, ] +[[package]] +name = "tinycss2" +version = "1.4.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "webencodings" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7a/fd/7a5ee21fd08ff70d3d33a5781c255cbe779659bd03278feb98b19ee550f4/tinycss2-1.4.0.tar.gz", hash = "sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7", size = 87085, upload-time = "2024-10-24T14:58:29.895Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/34/ebdc18bae6aa14fbee1a08b63c015c72b64868ff7dae68808ab500c492e2/tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289", size = 26610, upload-time = "2024-10-24T14:58:28.029Z" }, +] + [[package]] name = "types-aioboto3" version = "15.1.0" @@ -1839,6 +1867,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/fa/a4f5c2046385492b2273213ef815bf71a0d4c1943b784fb904e184e30201/watchfiles-1.1.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:af06c863f152005c7592df1d6a7009c836a247c9d8adb78fef8575a5a98699db", size = 623315, upload-time = "2025-06-15T19:06:29.076Z" }, ] +[[package]] +name = "webencodings" +version = "0.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/02/ae6ceac1baeda530866a85075641cec12989bd8d31af6d5ab4a3e8c92f47/webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923", size = 9721, upload-time = "2017-04-05T20:21:34.189Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", size = 11774, upload-time = "2017-04-05T20:21:32.581Z" }, +] + [[package]] name = "websockets" version = "15.0.1"