From 05d2ca3089ec00d9fe208413a34e59ae160349d1 Mon Sep 17 00:00:00 2001 From: Ari Archer Date: Thu, 11 Jul 2024 04:41:40 +0300 Subject: [PATCH] update @ Thu Jul 11 04:41:40 EEST 2024 Signed-off-by: Ari Archer --- scripts/blog.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/blog.py b/scripts/blog.py index 3c7dedc..f38628d 100755 --- a/scripts/blog.py +++ b/scripts/blog.py @@ -959,6 +959,9 @@ def build(config: dict[str, typing.Any]) -> int: pd: Counter[int] = Counter() ph: Counter[int] = Counter() + w_regex: re.Pattern[str] = re.compile(r"\b[a-zA-Z']+\b") + url_regex: re.Pattern[str] = re.compile(r"https?://\S+|www\.\S+") + def build_post(slug: str, post: dict[str, typing.Any]) -> None: ct: float = ctimer() @@ -966,11 +969,11 @@ def build(config: dict[str, typing.Any]) -> int: os.makedirs(post_dir) rtm: MarkdownResult = read_time_of_markdown(post["content"], config["read-wpm"]) - cont: str = post["content"] + " " + post["title"] + cont: str = url_regex.sub("", post["content"]) + " " + post["title"] rt.append(rtm.seconds) cc.append(len(cont)) - ws.update(Counter(cont.lower().split())) + ws.update(Counter(w_regex.findall(cont.lower().strip()))) tgs.update(Counter(list(map(str.lower, post["keywords"])))) dt, s = rf_format_time(post["created"])