From c8e10f487b155c0e3d97b81b5a8640b1f4228b08 Mon Sep 17 00:00:00 2001 From: Ari Archer Date: Thu, 2 Feb 2023 15:57:41 +0200 Subject: [PATCH] update @ Thu 2 Feb 15:57:41 EET 2023 Signed-off-by: Ari Archer --- blog.json | 7 +++++++ scripts/blog.py | 53 ++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/blog.json b/blog.json index 3c5922b..a600ad3 100644 --- a/blog.json +++ b/blog.json @@ -555,6 +555,13 @@ "version": 1, "time": 1675084012.897419, "keywords": "trans transgender transfem fem woman transwoman transgirl lgbt psychologist psychology mental health happiness comingout out closet lithuania" + }, + "doml-2-2023-02-02": { + "title": "RE9NTCAjMiAtLSAyMDIzLzAyLzAy", + "content": "aGVsbG8gd29ybGQKCnRvZGF5IHdhcyBraW5kYSBjb29sLCBpIGxpdGVyYWxseSB3YXNudCBhdCBzY2hvb2wgYW5kIHdhcyBqdXN0IHByb2dyYW1taW5nIGZvciA1IGhvdXJzLAphbHRob3VnaCBpdHMgbm90IHdoYXQgeW91IHRoaW5rIGl0IGlzLCBtb3N0IGxpa2VseQoKdG9kYXkgaSB3ZW50IHRvIGEgcHJvZ3JhbW1pbmcgb2x5bXBpYyBhZ2FpbiwgdGhlIGNvdW50cnkgcGFydCwgcGFydCAxLCBhbHRob3VnaCBpIGRvbnQKdGhpbmsgaWxsIGdvIHRvIHRoZSBmaW5hbGUsIHJlYXNvbiBiZWluZyBpIGdvdCBgMzUgLyAzMDBgIHBvaW50cywgaWsgaXQgc291bmRzIGhvcnJpYmxlLApiZWNhdXNlIGl0IGlzLCBpdHMgYDExLig2KSAlYCBidXQgcGVyIGF2ZXJhZ2UgaSBmZWVsIGxpa2UgaSBkaWQgb2theSBpZywsLCwgaSBtZWFuLCBpIGdvdAptb3JlIHBvaW50cyB0aGFuIGFub3RoZXIgZ3V5IGZvciBzdXJlLCB0aGVyZSB3ZXJlIDQgb2YgdXMsIGl0IHdhcyB2ZXJ5IGhhcmQsIGFsdGhvdWdoCmlsbCB0cnkgdG8gZG8gaXQgYXQgaG9tZSB0b2RheSBvciB3aGVuZXZlciBpIGhhdmUgdGltZSwgaSBzdGlsbCB3YW5uYSBzZWUgaWYgaW0gYWJsZSB0bwpkbyBpdCBpbiBwZWFjZSwgYmVjYXVzZSBpbiB0aGUgZW52aXJvbm1lbnQgaSB3YXMgd29ya2luZyBpIHdhcyBhIGJpdCBzY2FyZWQgYmVjYXVzZSBpIHdhc250CmV2ZW4gZG9pbmcgaXQgaW4gbXkgb3duIHNjaG9vbCBsb2wsIG5ldyBwZW9wbGUsIHRlYWNoZXJzLCBhbHRob3VnaCBnb3QgdG8ga25vdyBhIGNvdXBsZSBvZgpndXlzIGluIG15IHByb2dyYW1taW5nIGNsdWIgdGhpbmcgd2hpY2ggd2FzIHByZXR0eSBuaWNlLCB0aGV5cmUgbmljZQoKaSB0aGluayB0aGUgcHJvZ3JhbW1pbmcgc3R1ZmYgd2FzIGZ1biwgaSBtZWFuIGl0IGRpZCBodXJ0IHNpdHRpbmcgdGhlcmUgYW5kIHN0YXJpbmcgYXQKYSBjb21wdXRlciBzY3JlZW4gZm9yIDUgaG91cnMsIGJ1dCBlaCwgZG9lc250IG1hdHRlciwgYXQgbGVhc3QgaSBkaWQgdmVyeSB3ZWxsIGluIHRoZSBmaXJzdAoyIHBhcnRzLCBnb3QgbWF4IHBvaW50cywgMiBuZCBhbG1vc3QgbWF4LCBidXQgaSBmaXhlZCBpdCBhZnRlcgoKc28geWVhaCwgdGhhdHMgdGhhdCBwYXJ0IG9mIHRoZSBkYXksIGkgb25seSB3ZW50IHRvIGhhbGYgb2YgcGh5c2ljcyBsb2wsIHRoYXRzIGFsbCwKZGlkbnQgZG8gYW55dGhpbmcgZWxzZSBiZXNpZGVzIGNvZGUgYWZ0ZXIKCm5vdyBpbSBob21lLCBpIGNhbWUgYmFjaywgaW0gdG9vIHRpcmVkIHRvIGRvIG11Y2ggcm4gdGJoLCBpIHJlYWxseSB3YW50ZWQgdG8gc2hhcmUgd2hhdCBpCmRpZCB0b2RheSwgZXZlbiB0aG91Z2ggaXQgaXNudCBtdWNoIGZvciBub3cgYXQgbGVhc3QsIGFmdGVyIHRoaXMgaW0gcHJvYmFibHkgZ29pbmcgb24gYSB3YWxrLAptYXliZSB0byBteSBncmFuZG1hLCB3ZWxsIHNlZSwgc28geWUKCmhhdmUgYSBuaWNlIGRheSA8Mwo=", + "version": 1, + "time": 1675346248.689833, + "keywords": "doml web programming day of my life 2023 2023\/02\/02" } } } \ No newline at end of file diff --git a/scripts/blog.py b/scripts/blog.py index 0657c41..9666642 100755 --- a/scripts/blog.py +++ b/scripts/blog.py @@ -9,6 +9,7 @@ import string import sys import xml.etree.ElementTree as etree from base64 import b64decode, b64encode +from collections.abc import Collection from datetime import datetime from glob import iglob from html import escape as html_escape @@ -18,7 +19,7 @@ from shutil import rmtree from tempfile import gettempdir from threading import Thread from timeit import default_timer as code_timer -from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple +from typing import Any, Callable, Dict, List, Optional, Set, Tuple from warnings import filterwarnings as filter_warnings import ujson # type: ignore @@ -81,6 +82,35 @@ DEFAULT_CONFIG: Dict[str, Any] = { DEFAULT_CONFIG_FILE: str = "blog.json" HISTORY_FILE: str = ".blog_history" BLOG_VERSION: int = 1 +CONTEXT_WORDS: Tuple[str, ...] = ( + "the", + "a", + "about", + "etc", + "on", + "at", + "in", + "by", + "its", + "i", + "to", + "my", + "of", + "between", + "because", + "of", + "or", + "how", + "to", + "begin", + "is", + "this", + "person", + "important", + "homework", + "and", + "cause", +) BLOG_MARKDOWN_TEMPLATE: str = """

%s

@@ -222,7 +252,20 @@ HOME_PAGE_HTML_TEMPLATE: str = f""" """ -def sanitise_title(title: str, titleset: Iterable[str], _nosep: bool = False) -> str: +def remove_basic_punct(s: str) -> str: + return "".join(c for c in s if c not in "'\"()[]{}:;.,?!=#") + + +def sanitise_title( + title: str, titleset: Collection[str], /, nosep: bool = False +) -> str: + title = " ".join( + [ + w + for w in remove_basic_punct(title).lower().split() + if w not in CONTEXT_WORDS + ][:8] + ) _title: str = "" for char in title: @@ -234,13 +277,13 @@ def sanitise_title(title: str, titleset: Iterable[str], _nosep: bool = False) -> else "" ) - _title = _title.lower().rstrip("-") + _title = _title.rstrip("-") return ( _title if _title not in titleset and _title.strip() else sanitise_title( - _title + ("" if _nosep else "-") + random.choice(string.digits), + _title + ("" if nosep else "-") + random.choice(string.digits), titleset, True, ) @@ -439,7 +482,7 @@ def new_blog(config: Dict[str, Any]) -> Tuple[int, Dict[str, Any]]: us_title: str = title s_title: str = sanitise_title(us_title, config["blogs"]) else: - raise RuntimeError("Unreachable") + return EXIT_ERR, config blog: Dict[str, Any] = { "title": b64encode(us_title.encode()).decode(),