From 0d10b72cc8c4ac9133ca4869c8f751753512cd6b Mon Sep 17 00:00:00 2001 From: Ari Archer Date: Sun, 29 Oct 2023 17:41:58 +0200 Subject: [PATCH] update @ Sun Oct 29 17:41:58 EET 2023 Signed-off-by: Ari Archer --- .gitignore | 1 + blog.json | 4 +- scripts/blog.py | 283 ++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 279 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index 184abe7..38afcb9 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ venv/ /rss.xml /sitemap.xml /robots.txt +/stats/ diff --git a/blog.json b/blog.json index f3abb3b..9d90767 100644 --- a/blog.json +++ b/blog.json @@ -46,7 +46,7 @@ "website": "https://ari-web.xyz", "blog": "https://blog.ari-web.xyz", "source": "/git", - "visitor-count": "https://server.ari-web.xyz/visit", + "visitor-count": "/visit", "comment": "/c", "theme": { "primary": "#262220", @@ -124,6 +124,8 @@ "server-port": 8080, "post-preview-size": 196, "read-wpm": 150, + "top-words": 64, + "top-tags": 64, "posts": { "bluey": { "title": "bluey", diff --git a/scripts/blog.py b/scripts/blog.py index fa2a3eb..d8e5687 100755 --- a/scripts/blog.py +++ b/scripts/blog.py @@ -16,6 +16,7 @@ import sys import tempfile import typing import xml.etree.ElementTree as etree +from collections import Counter from glob import iglob from html import escape as html_escape from threading import Thread @@ -29,6 +30,7 @@ import mistune.plugins import unidecode import web_mini from readtime import of_markdown as read_time_of_markdown # type: ignore +from readtime.result import Result as MarkdownResult # type: ignore __version__: typing.Final[int] = 2 GEN: typing.Final[str] = f"ari-web blog generator version {__version__}" @@ -137,6 +139,8 @@ DEFAULT_CONFIG: dict[str, typing.Any] = { "server-port": 8080, "post-preview-size": 196, "read-wpm": 150, + "top-words": 64, + "top-tags": 64, "posts": {}, } @@ -238,6 +242,9 @@ POST_TEMPLATE: typing.Final[str] = ( home + stats + + comment @@ -286,6 +293,9 @@ INDEX_TEMPLATE: typing.Final[str] = ( /> + stats + + comment @@ -308,6 +318,109 @@ INDEX_TEMPLATE: typing.Final[str] = ( """ ) +STATS_TEMPLATE: typing.Final[str] = ( + HTML_BEGIN + + """ +{blog_title} -> stats + + + + + +
+

stats of {blog_header}

+ + +
+
+
+
    +
  • total count of blog posts : {post_count}
  • +
  • edited post count : {edited_post_count}, {edited_post_count_p:.2f}%
  • +
  • + total read time : +
      +
    • average read time :
    • +
    +
  • + +
  • + content + +
      +
    • characters : {char_count}
    • +
        +
      • average count of characters : {avg_chars:.2f}
      • +
      +
    + +
      +
    • words : {word_count}
    • +
        +
      • average count of words : {avg_words:.2f}
      • +
      • average word length : {avg_word_len:.2f}
      • +
      • + top {top_words} used words +
          {word_most_used}
        +
      • +
      +
    + +
      +
    • tags : {tag_count}
    • +
        +
      • average count of tags : {avg_tags}
      • +
      • + top {top_tags} used tags +
          {tags_most_used}
        +
      • +
      +
    +
  • + +
  • + time ( GMT ) +
      +
    • average posts by year : {posts_by_yr_avg}
        {posts_by_yr}
    • +
    • average posts by month : {posts_by_month_avg}
        {posts_by_month}
    • +
    • average posts by day : {posts_by_day_avg}
        {posts_by_day}
    • +
    • average posts by hour : {posts_by_hr_avg}
        {posts_by_hr}
    • +
    +
  • +
+
+
+ + +""" +) + if NCI: import http.server @@ -390,6 +503,11 @@ def slugify( ) +def rf_format_time(ts: float) -> typing.Tuple[datetime.datetime, str]: + d: datetime.datetime = datetime.datetime.utcfromtimestamp(ts) + return d, d.strftime("%Y-%m-%d %H:%M:%S") + + def rformat_time(ts: float) -> str: return datetime.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S") @@ -485,6 +603,53 @@ def min_css_file(file: str, out: str) -> None: ocss.write(web_mini.css.minify_css(icss.read())) +def sorted_post_counter( + c: Counter[int], + pcount: int, + fix: str, +) -> typing.Dict[str, typing.Any]: + s: int = sum(c.values()) + avg: float = s / len(c) + + return { + f"posts_by_{fix}": " ".join( + f"
  • -- {p} post{'' if p == 1 else 's'}, {p / pcount * 100:.2f}%
  • " + for v, p in c.most_common() + ), + f"posts_by_{fix}_avg": f"{round(avg, 2)}, {round(avg / s * 100, 2)}%", + } + + +def s_to_str(seconds: float) -> str: + minutes, sec = divmod(seconds, 60) + hours, minutes = divmod(minutes, 60) + days, hours = divmod(hours, 24) + + periods: typing.Tuple[typing.Tuple[float, str, str], ...] = ( + (round(days, 2), "day", "days"), + (round(hours, 2), "hour", "hours"), + (round(minutes, 2), "minute", "minutes"), + (round(sec, 2), "second", "seconds"), + ) + + time_periods: typing.List[str] = [] + + for period in periods: + if period[0] != 0: + time_periods.append( + "{} {}".format(period[0], period[1] if period[0] == 1 else period[2]) + ) + + readable_text: str = ", ".join(time_periods[:-1]) + + if len(time_periods) > 1: + readable_text += " and " + time_periods[-1] + else: + readable_text = time_periods[0] + + return f"{readable_text} ( {round(seconds, 2)} second{'' if seconds == 1 else 's'} )" + + # markdown TITLE_LINKS_RE: typing.Final[str] = r"<#:[^>]+?>" @@ -747,7 +912,11 @@ def build(config: dict[str, typing.Any]) -> int: if os.path.exists(config["posts-dir"]): shutil.rmtree(config["posts-dir"]) + if os.path.exists("stats"): + shutil.rmtree("stats") + os.makedirs(config["posts-dir"], exist_ok=True) + os.makedirs("stats", exist_ok=True) llog("building blog") @@ -769,12 +938,37 @@ def build(config: dict[str, typing.Any]) -> int: with open(critp, "r") as fp: post_crit_css = web_mini.css.minify_css(fp.read()) + rt: typing.List[int] = [] + cc: typing.List[int] = [] + ws: Counter[str] = Counter() + tgs: Counter[str] = Counter() + + py: Counter[int] = Counter() + pm: Counter[int] = Counter() + pd: Counter[int] = Counter() + ph: Counter[int] = Counter() + def build_post(slug: str, post: dict[str, typing.Any]) -> None: ct: float = ctimer() post_dir: str = f"{config['posts-dir']}/{slug}" os.makedirs(post_dir) + rtm: MarkdownResult = read_time_of_markdown(post["content"], config["read-wpm"]) + cont: str = post["content"] + " " + post["title"] + + rt.append(rtm.seconds) + cc.append(len(cont)) + ws.update(Counter(cont.split())) + tgs.update(Counter(post["keywords"])) + + dt, s = rf_format_time(post["created"]) + + py[dt.year] += 1 + pm[dt.month] += 1 + pd[dt.day] += 1 + ph[dt.hour] += 1 + with open(f"{post_dir}/index.html", "w") as html: html.write( web_mini.html.minify_html( @@ -797,12 +991,9 @@ def build(config: dict[str, typing.Any]) -> int: post_title=html_escape(post["title"]), author=author, locale=config["locale"], - post_creation_time=rformat_time(post["created"]), + post_creation_time=s, post_description=html_escape(post["description"]), - post_read_time=read_time_of_markdown( - post["content"], - config["read-wpm"], - ).text, + post_read_time=rtm.text, post_edit_time=( "" if "edited" not in post @@ -838,7 +1029,7 @@ def build(config: dict[str, typing.Any]) -> int: web_mini.html.minify_html( INDEX_TEMPLATE.format( # type: ignore lang=lang, - keywords=html_escape(", ".join(config["blog-keywords"])), + keywords=(bkw := html_escape(", ".join(config["blog-keywords"]))), theme_type=config["theme"]["type"], theme_primary=config["theme"]["primary"], theme_secondary=config["theme"]["secondary"], @@ -852,8 +1043,8 @@ def build(config: dict[str, typing.Any]) -> int: author=author, locale=config["locale"], license=config["license"], - blog_description=html_escape(config["description"]), - blog_header=html_escape(config["header"]), + blog_description=(bd := html_escape(config["description"])), + blog_header=(bh := html_escape(config["header"])), latest_post_path=f"{config['posts-dir']}/{latest_post[0]}", latest_post_title_trunc=html_escape( trunc(latest_post[1]["title"], config["recent-title-trunc"]) @@ -877,6 +1068,80 @@ def build(config: dict[str, typing.Any]) -> int: for t in ts: t.join() + char_count: int = sum(cc) + post_count: int = len(config["posts"]) + epost_count: int = sum("edited" in p for p in config["posts"].values()) + + rts: int = sum(rt) + + wcs: int = sum(ws.values()) + wcl: int = len(ws) + + tcs: int = sum(tgs.values()) + tcl: int = len(tgs) + + avg_chars: float = char_count / post_count + avg_words: float = wcs / post_count + avg_tags: float = tcs / post_count + + with open("stats/index.html", "w") as stats: + stats.write( + web_mini.html.minify_html( + STATS_TEMPLATE.format( + lang=lang, + keywords=bkw + ", stats, statistics", + theme_type=config["theme"]["type"], + theme_primary=config["theme"]["primary"], + theme_secondary=config["theme"]["secondary"], + blog=config["blog"], + path="", + styles=styles, + critical_css=crit_css, + gen=GEN, + locale=config["locale"], + blog_title=blog_title, + blog_description=bd, + blog_header=bh, + visitor_count=config["visitor-count"], + comment=config["comment"], + website=config["website"], + source=config["source"], + rss=config["rss-file"], + post_count=post_count, + edited_post_count=epost_count, + edited_post_count_p=epost_count / post_count * 100, + read_time=s_to_str(rts), + avg_read_time=s_to_str(rts / post_count), + char_count=char_count, + avg_chars=avg_chars, + word_count=wcs, + avg_words=avg_words, + avg_word_len=avg_chars / avg_words, + top_words=config["top-words"], + word_most_used=" ".join( + f"
  • {html_escape(w)}, {u} use{'' if u == 1 else 's'}, {u / wcl * 100:.2f}%
  • " + for w, u in ws.most_common(config["top-words"]) + ), + tag_count=tcs, + avg_tags=avg_tags, + top_tags=config["top-tags"], + tags_most_used=" ".join( + f"
  • {html_escape(w)}, {u} use{'' if u == 1 else 's'}, {u / tcl * 100:.2f}%
  • " + for w, u in tgs.most_common(config["top-tags"]) + ), + **sorted_post_counter(py, post_count, "yr"), + **sorted_post_counter(pm, post_count, "month"), + **sorted_post_counter(pd, post_count, "day"), + **sorted_post_counter(ph, post_count, "hr"), + author=config["author"], + email=config["email"], + license=config["license"], + ) + ) + ) + + lnew(f"generated {stats.name!r}") + return 0 @@ -979,6 +1244,7 @@ def sitemap(config: dict[str, typing.Any]) -> int: ("", config["website"]), ("", config["blog"]), ("", f"{config['blog']}/{config['rss-file']}"), + ("", f'{config["blog"]}/stats'), ) + tuple(config["posts"].items()): llog(f"adding {slug or post!r} to sitemap") @@ -1111,6 +1377,7 @@ def clean(config: dict[str, typing.Any]) -> int: config["rss-file"], "robots.txt", "sitemap.xml", + "stats", ): if os.path.exists(pattern): remove(pattern)