From d6587b933078b4d0e54fcdc4642333105a10c1f6 Mon Sep 17 00:00:00 2001 From: bacon Date: Tue, 4 Mar 2025 00:13:03 +0300 Subject: [PATCH] added rewrite guid link --- proxy/rss_proxy.py | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/proxy/rss_proxy.py b/proxy/rss_proxy.py index 5ea4aec..754ab03 100755 --- a/proxy/rss_proxy.py +++ b/proxy/rss_proxy.py @@ -3,9 +3,9 @@ import os import requests import redis import xml.etree.ElementTree as ET +import re from flask import request, Response - PROXY_URL = os.getenv("PROXY_URL") REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0") CACHE_TTL = int(os.getenv("CACHE_TTL", 3600)) @@ -13,10 +13,16 @@ CACHE_TTL = int(os.getenv("CACHE_TTL", 3600)) rdb = redis.from_url(REDIS_URL) +def extract_viewtopic_link(description): + """Search viewtopic.php in description""" + match = re.search(r'href="(http://tapochek\.net/viewtopic\.php\?t=\d+)"', description) + return match.group(1) if match else None + + def init_proxy(app): @app.route("/proxy") def proxy(): - """Proxy RSS feed with per-item caching.""" + """Proxy RSS feed with per-item caching and GUID replacement.""" raw_query = request.query_string.decode() if raw_query.startswith("url="): url = urllib.parse.unquote(raw_query[4:]) @@ -24,7 +30,6 @@ def init_proxy(app): return "Missing URL", 400 try: - # Получаем ленту proxies = {"http": PROXY_URL, "https": PROXY_URL} if PROXY_URL else None r = requests.get(url, timeout=10, proxies=proxies) _encode = r.apparent_encoding.lower() @@ -32,7 +37,6 @@ def init_proxy(app): xml_data = r.text.replace(f'', '') - # Разбираем XML root = ET.fromstring(xml_data) items = root.findall(".//item") @@ -40,19 +44,26 @@ def init_proxy(app): new_items = [] for item in items: - guid = item.find("guid").text if item.find("guid") is not None else None - if guid: - cache_key = f"rss:item:{guid}" - cached_item = rdb.get(cache_key) + guid = item.find("guid") + if guid is None or guid.get("isPermaLink") == "true": + continue - if cached_item: - cached_items.append(cached_item.decode()) - else: - item_str = ET.tostring(item, encoding="unicode") - rdb.setex(cache_key, CACHE_TTL, item_str) - new_items.append(item_str) + cache_key = f"rss:item:{guid.text}" + cached_item = rdb.get(cache_key) + + if cached_item: + cached_items.append(cached_item.decode()) + else: + description = item.find("description") + if description is not None: + new_guid = extract_viewtopic_link(description.text) + if new_guid: + guid.text = new_guid + + item_str = ET.tostring(item, encoding="unicode") + rdb.setex(cache_key, CACHE_TTL, item_str) + new_items.append(item_str) - # Собираем финальный RSS final_items = cached_items + new_items response_xml = f'{"".join(final_items)}'