mirror of
https://github.com/ArtistGrid/archiver.git
synced 2025-11-01 17:21:49 +00:00
139 lines
3.8 KiB
Python
139 lines
3.8 KiB
Python
import os
|
|
import threading
|
|
import time
|
|
from flask import Flask, request, abort
|
|
import requests
|
|
from urllib.parse import unquote
|
|
|
|
app = Flask(__name__)
|
|
|
|
# Globals
|
|
lock = threading.Lock()
|
|
current_job = None
|
|
logs = [] # store logs as list of strings, limited size
|
|
MAX_LOG_LINES = 1000 # limit log length
|
|
|
|
|
|
def log(msg):
|
|
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
|
|
entry = f"[{timestamp}] {msg}"
|
|
print(entry)
|
|
with lock:
|
|
logs.append(entry)
|
|
if len(logs) > MAX_LOG_LINES:
|
|
logs.pop(0)
|
|
|
|
|
|
def archive_url(target_url):
|
|
"""
|
|
Sends a request to Wayback Machine to archive the target URL.
|
|
"""
|
|
log(f"Starting archive request for: {target_url}")
|
|
try:
|
|
save_url = f"https://web.archive.org/save/{target_url}"
|
|
response = requests.get(save_url)
|
|
if response.status_code == 200:
|
|
log(f"Archive request successful for: {target_url}")
|
|
return True
|
|
else:
|
|
log(f"Archive request failed with status {response.status_code} for: {target_url}")
|
|
return False
|
|
except Exception as e:
|
|
log(f"Exception during archive request: {e}")
|
|
return False
|
|
|
|
|
|
def archive_job(target_url, password):
|
|
global current_job
|
|
expected_password = os.getenv("ARCHIVE_PASSWORD")
|
|
|
|
log(f"Received archive request for '{target_url}'")
|
|
|
|
if password != expected_password:
|
|
log("Password mismatch detected. Aborting archive request.")
|
|
return
|
|
|
|
log("Password matched successfully.")
|
|
log("Waiting 10 minutes before archiving...")
|
|
time.sleep(600)
|
|
|
|
with lock:
|
|
if current_job != threading.current_thread():
|
|
log("Detected newer archive request. Cancelling this one.")
|
|
return
|
|
|
|
success = archive_url(target_url)
|
|
if success:
|
|
log("Archiving process completed successfully.")
|
|
else:
|
|
log("Archiving process failed.")
|
|
|
|
with lock:
|
|
current_job = None
|
|
|
|
|
|
@app.route('/')
|
|
@app.route('/index.html')
|
|
def index():
|
|
# Return logs as white text on black background, monospace
|
|
with lock:
|
|
content = "\n".join(logs[-MAX_LOG_LINES:])
|
|
html = f"""
|
|
<html>
|
|
<head>
|
|
<title>Archive Logs</title>
|
|
<style>
|
|
body {{
|
|
background-color: black;
|
|
color: white;
|
|
font-family: monospace;
|
|
white-space: pre-wrap;
|
|
padding: 20px;
|
|
}}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
{content}
|
|
</body>
|
|
</html>
|
|
"""
|
|
return html
|
|
|
|
|
|
@app.route('/archive/<path:url_to_archive>', methods=['GET'])
|
|
def archive_endpoint(url_to_archive):
|
|
global current_job
|
|
|
|
# URL decode the target URL from the path
|
|
target_url = unquote(url_to_archive)
|
|
|
|
# Get password from query parameter
|
|
password = request.args.get('password', '')
|
|
|
|
log(f"Incoming archive request for URL: {target_url}")
|
|
|
|
expected_password = os.getenv("ARCHIVE_PASSWORD")
|
|
if not expected_password:
|
|
log("No ARCHIVE_PASSWORD set in environment. Rejecting request.")
|
|
abort(500, "Server configuration error.")
|
|
|
|
if password != expected_password:
|
|
log("Password mismatch for archive request.")
|
|
abort(401, "Unauthorized: Password mismatch.")
|
|
|
|
with lock:
|
|
if current_job and current_job.is_alive():
|
|
log("Cancelling previous archive job due to new request.")
|
|
current_job = None
|
|
|
|
t = threading.Thread(target=archive_job, args=(target_url, password))
|
|
current_job = t
|
|
t.start()
|
|
log(f"Started new archive job for: {target_url}")
|
|
|
|
return f"Archive request for {target_url} accepted. Archiving will start after 10 minutes if no newer request is received.\n", 202
|
|
|
|
|
|
if __name__ == '__main__':
|
|
log("Starting Archive Server...")
|
|
app.run(host='0.0.0.0', port=8080)
|