From b9d8c518674794b8d017e982de3f24347dc8f492 Mon Sep 17 00:00:00 2001 From: Ari Archer Date: Thu, 2 Jan 2025 11:51:11 +0200 Subject: [PATCH] Use Flask-NoAI for AI blocking. Signed-off-by: Ari Archer --- requirements.txt | 1 + src/aw/__init__.py | 37 +++---------------------------------- 2 files changed, 4 insertions(+), 34 deletions(-) diff --git a/requirements.txt b/requirements.txt index 406998f..31202b8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ web-mini flask-limiter flask-ishuman pymemcache +flask-noai diff --git a/src/aw/__init__.py b/src/aw/__init__.py index ec9bd19..f8de403 100644 --- a/src/aw/__init__.py +++ b/src/aw/__init__.py @@ -13,6 +13,7 @@ from typing import Any import flask import web_mini from werkzeug.middleware.proxy_fix import ProxyFix +from flask_noai import noai from . import util @@ -35,39 +36,7 @@ def assign_http(app: flask.Flask) -> flask.Flask: robots: str = ( f"""User-agent: * Sitemap: {app.config['PREFERRED_URL_SCHEME']}://{app.config['DOMAIN']}/sitemap.xml -Disallow: /vote/*/* - -# We are not slaves for machines. - -User-agent: Amazonbot -User-agent: anthropic-ai -User-agent: Applebot-Extended -User-agent: Bytespider -User-agent: CCBot -User-agent: ChatGPT-User -User-agent: ClaudeBot -User-agent: Claude-Web -User-agent: cohere-ai -User-agent: Diffbot -User-agent: FacebookBot -User-agent: facebookexternalhit -User-agent: FriendlyCrawler -User-agent: Google-Extended -User-agent: GPTBot -User-agent: ICC-Crawler -User-agent: ImagesiftBot -User-agent: img2dataset -User-agent: meta-externalagent -User-agent: OAI-SearchBot -User-agent: Omgili -User-agent: Omgilibot -User-agent: PerplexityBot -User-agent: PetalBot -User-agent: Scrapy -User-agent: Timpibot -User-agent: VelenPublicWebCrawler -User-agent: YouBot -Disallow: /""" +Disallow: /vote/*/*""" ) return flask.Response(robots, mimetype="text/plain") @@ -210,4 +179,4 @@ def create_app(name: str) -> flask.Flask: "b64encode": b64encode, } - return assign_http(app) + return noai(assign_http(app))