import html
import urllib.parse
import os
import requests
import redis
import xml.etree.ElementTree as ET
import re
import unicodedata
from flask import request, Response
PROXY_URL = os.getenv("PROXY_URL")
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
CACHE_TTL = int(os.getenv("CACHE_TTL", 3600))
rdb = redis.from_url(REDIS_URL)
_head_html = f"""
Tapochek.net RSS
http://tapochek.net/
15"""
def normalize_text(text):
"""Приводит текст к нормальному виду, устраняя странные символы."""
if text:
return unicodedata.normalize("NFKC", text)
return text
def extract_viewtopic_link(description):
decoded_description = html.unescape(description)
match = re.search(r'href="(https?://[^"]+)"', decoded_description)
return match.group(1) if match else None
def normalize_xml_texts(elem):
"""Применяет normalize_text ко всем текстовым узлам XML."""
if elem.text:
elem.text = normalize_text(elem.text)
if elem.tail:
elem.tail = normalize_text(elem.tail)
for child in elem:
normalize_xml_texts(child)
def init_proxy(app):
@app.route("/proxy")
def proxy():
"""Proxy RSS feed with per-item caching and GUID replacement."""
raw_query = request.query_string.decode()
if raw_query.startswith("url="):
url = urllib.parse.unquote(raw_query[4:])
url = html.unescape(url)
else:
return "Missing URL", 400
try:
proxies = {"http": PROXY_URL, "https": PROXY_URL} if PROXY_URL else None
r = requests.get(url, timeout=10, proxies=proxies)
xml_data = r.text
xml_data = xml_data.replace("&", "&")
_encoding = xml_data.split('encoding="')[1].split('"')[0]
if '',
''
)
if '' not in xml_data:
xml_data = f'{xml_data}'
root = ET.fromstring(xml_data)
items = root.findall(".//item")
cached_items = []
new_items = []
for item in items:
guid = item.find("guid")
if guid is None or not guid.text:
continue
cache_key = f"rss:item:{guid.text}"
cached_item = rdb.get(cache_key)
title = item.find("title")
new_title = re.sub(r'\d+', '', title.text)
title.text = new_title
if cached_item:
cached_items.append(cached_item.decode())
else:
normalize_xml_texts(item)
item_str = ET.tostring(item, encoding="unicode")
item_str = html.unescape(item_str)
rdb.setex(cache_key, CACHE_TTL, item_str)
new_items.append(item_str)
final_items = cached_items + new_items
response_xml = f"""{_head_html}{"".join(final_items)}"""
return Response(response_xml, content_type="application/xml; charset=utf-8")
except Exception as e:
return f"Error: {e}", 500