118 lines
4.2 KiB
Python
Executable File
118 lines
4.2 KiB
Python
Executable File
import html
|
|
import urllib.parse
|
|
import os
|
|
import requests
|
|
import redis
|
|
import xml.etree.ElementTree as ET
|
|
import re
|
|
import unicodedata
|
|
from flask import request, Response
|
|
|
|
PROXY_URL = os.getenv("PROXY_URL")
|
|
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
|
|
CACHE_TTL = int(os.getenv("CACHE_TTL", 3600))
|
|
|
|
rdb = redis.from_url(REDIS_URL)
|
|
|
|
|
|
def normalize_text(text):
|
|
"""Приводит текст к нормальному виду, устраняя странные символы."""
|
|
if text:
|
|
return unicodedata.normalize("NFKC", text)
|
|
return text
|
|
|
|
|
|
def extract_viewtopic_link(description):
|
|
decoded_description = html.unescape(description)
|
|
match = re.search(r'href="(https?://[^"]+)"', decoded_description)
|
|
return match.group(1) if match else None
|
|
|
|
|
|
def normalize_xml_texts(elem):
|
|
"""Применяет normalize_text ко всем текстовым узлам XML."""
|
|
if elem.text:
|
|
elem.text = normalize_text(elem.text)
|
|
if elem.tail:
|
|
elem.tail = normalize_text(elem.tail)
|
|
for child in elem:
|
|
normalize_xml_texts(child)
|
|
|
|
|
|
def init_proxy(app):
|
|
@app.route("/proxy")
|
|
def proxy():
|
|
"""Proxy RSS feed with per-item caching and GUID replacement."""
|
|
raw_query = request.query_string.decode()
|
|
if raw_query.startswith("url="):
|
|
url = urllib.parse.unquote(raw_query[4:])
|
|
else:
|
|
return "Missing URL", 400
|
|
|
|
try:
|
|
proxies = {"http": PROXY_URL, "https": PROXY_URL} if PROXY_URL else None
|
|
r = requests.get(url, timeout=10, proxies=proxies)
|
|
|
|
xml_data = r.text
|
|
xml_data = xml_data.replace("&", "&")
|
|
_encoding = xml_data.split('encoding="')[1].split('"')[0]
|
|
if '<?xml version="1.0" encoding="' in xml_data:
|
|
xml_data = xml_data.replace(
|
|
f'<?xml version="1.0" encoding="{_encoding}"?>',
|
|
'<?xml version="1.0" encoding="UTF-8"?>'
|
|
)
|
|
|
|
if '<?xml version="1.0" encoding="UTF-8"?>' not in xml_data:
|
|
xml_data = f'<?xml version="1.0" encoding="UTF-8"?>{xml_data}'
|
|
|
|
root = ET.fromstring(xml_data)
|
|
items = root.findall(".//item")
|
|
|
|
cached_items = []
|
|
new_items = []
|
|
|
|
for item in items:
|
|
guid = item.find("guid")
|
|
if guid is None or not guid.text:
|
|
continue
|
|
|
|
cache_key = f"rss:item:{guid.text}"
|
|
cached_item = rdb.get(cache_key)
|
|
|
|
|
|
if cached_item:
|
|
cached_items.append(cached_item.decode())
|
|
else:
|
|
description = item.find("description")
|
|
if description is not None:
|
|
new_guid = extract_viewtopic_link(description.text)
|
|
if new_guid:
|
|
print(f"Заменяю GUID: {guid.text} → {new_guid}")
|
|
guid.attrib.clear()
|
|
guid.text = new_guid # 🔹 Теперь подмена делается ДО нормализации
|
|
|
|
# 🔹 Теперь нормализуем весь item ПОСЛЕ замены guid
|
|
normalize_xml_texts(item)
|
|
|
|
item_str = ET.tostring(item, encoding="unicode")
|
|
item_str = html.unescape(item_str)
|
|
title = item.find("title")
|
|
|
|
title.text = normalize_text(title.text)
|
|
rdb.setex(cache_key, CACHE_TTL, item_str)
|
|
new_items.append(item_str)
|
|
|
|
final_items = cached_items + new_items
|
|
response_xml = f"""<?xml version="1.0" encoding="UTF-8"?>
|
|
<rss version="2.0">
|
|
<channel><title>Tapochek.net RSS</title>
|
|
<link>http://tapochek.net/</link>
|
|
<ttl>15</ttl>
|
|
{"".join(final_items)}
|
|
</channel></rss>"""
|
|
|
|
return Response(response_xml, content_type="application/xml; charset=utf-8")
|
|
|
|
except Exception as e:
|
|
return f"Error: {e}", 500
|
|
|