1
0
mirror of https://github.com/squidfunk/mkdocs-material.git synced 2024-11-23 23:21:00 +01:00

Removed readtime and lxml dependencies

This commit is contained in:
squidfunk 2023-08-31 11:13:06 +02:00
parent 1cb61a9a6f
commit 0d3cd8a433
No known key found for this signature in database
GPG Key ID: 5ED40BC4F9C436DF
6 changed files with 182 additions and 30 deletions

View File

@ -47,16 +47,12 @@ RUN \
git \ git \
git-fast-import \ git-fast-import \
jpeg-dev \ jpeg-dev \
libxml2 \
libxslt \
openssh \ openssh \
zlib-dev \ zlib-dev \
&& \ && \
apk add --no-cache --virtual .build \ apk add --no-cache --virtual .build \
gcc \ gcc \
libffi-dev \ libffi-dev \
libxml2-dev \
libxslt-dev \
musl-dev \ musl-dev \
&& \ && \
pip install --no-cache-dir --upgrade pip \ pip install --no-cache-dir --upgrade pip \

View File

@ -23,7 +23,6 @@ from __future__ import annotations
import logging import logging
import os import os
import posixpath import posixpath
import readtime
import yaml import yaml
from babel.dates import format_date from babel.dates import format_date
@ -43,6 +42,7 @@ from yaml import SafeLoader
from .author import Authors from .author import Authors
from .config import BlogConfig from .config import BlogConfig
from .readtime import readtime
from .structure import Archive, Category, Excerpt, Post, View from .structure import Archive, Category, Excerpt, Post, View
from .templates import url_filter from .templates import url_filter
@ -260,17 +260,6 @@ class BlogPlugin(BasePlugin[BlogConfig]):
# Append to list of authors # Append to list of authors
page.authors.append(self.authors[name]) page.authors.append(self.authors[name])
# Compute readtime of post, if enabled and not explicitly set
if self.config.post_readtime:
rate = self.config.post_readtime_words_per_minute
# There's a bug in the readtime library which causes it to fail if
# the input string contains emojis - see https://t.ly/qEoHq
if not page.config.readtime:
data = markdown.encode("unicode_escape")
read = readtime.of_markdown(data, rate)
page.config.readtime = read.minutes
# Extract settings for excerpts # Extract settings for excerpts
separator = self.config.post_excerpt_separator separator = self.config.post_excerpt_separator
max_authors = self.config.post_excerpt_max_authors max_authors = self.config.post_excerpt_max_authors
@ -295,6 +284,22 @@ class BlogPlugin(BasePlugin[BlogConfig]):
page.excerpt.authors = page.authors[:max_authors] page.excerpt.authors = page.authors[:max_authors]
page.excerpt.categories = page.categories[:max_categories] page.excerpt.categories = page.categories[:max_categories]
# Process posts
def on_page_content(self, html, *, page, config, files):
if not self.config.enabled:
return
# Skip if page is not a post managed by this instance - this plugin has
# support for multiple instances, which is why this check is necessary
if page not in self.blog.posts:
return
# Compute readtime of post, if enabled and not explicitly set
if self.config.post_readtime:
words_per_minute = self.config.post_readtime_words_per_minute
if not page.config.readtime:
page.config.readtime = readtime(html, words_per_minute)
# Register template filters for plugin # Register template filters for plugin
def on_env(self, env, *, config, files): def on_env(self, env, *, config, files):
if not self.config.enabled: if not self.config.enabled:

View File

@ -0,0 +1,74 @@
# Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import re
from html.parser import HTMLParser
from math import ceil
# -----------------------------------------------------------------------------
# Functions
# -----------------------------------------------------------------------------
# Compute readtime - we first used the original readtime library, but the list
# of dependencies it brings with it increased the size of the Docker image by
# 20 MB (packed), which is an increase of 50%. For this reason, we adapt the
# original readtime algorithm to our needs - see https://t.ly/fPZ7L
def readtime(html: str, words_per_minute: int):
parser = ReadtimeParser()
parser.feed(html)
parser.close()
# Extract words from text and compute readtime in seconds
words = len(re.split(r"\W+", "".join(parser.text)))
seconds = ceil(words / words_per_minute * 60)
# Account for additional images
delta = 12
for _ in range(parser.images):
seconds += delta
if delta > 3: delta -= 1
# Return readtime in minutes
return ceil(seconds / 60)
# -----------------------------------------------------------------------------
# Classes
# -----------------------------------------------------------------------------
# Readtime parser
class ReadtimeParser(HTMLParser):
# Initialize parser
def __init__(self):
super().__init__(convert_charrefs = True)
# Keep track of text and images
self.text = []
self.images = 0
# Collect images
def handle_starttag(self, tag, attrs):
if tag == "img":
self.images += 1
# Collect text
def handle_data(self, data):
self.text.append(data)

View File

@ -29,8 +29,6 @@ pymdown-extensions>=9.9.1
# Requirements for plugins # Requirements for plugins
babel>=2.10.3 babel>=2.10.3
colorama>=0.4 colorama>=0.4
lxml>=4.6
paginate>=0.5.6 paginate>=0.5.6
readtime>=2.0
regex>=2022.4.24 regex>=2022.4.24
requests>=2.26 requests>=2.26

View File

@ -23,7 +23,6 @@ from __future__ import annotations
import logging import logging
import os import os
import posixpath import posixpath
import readtime
import yaml import yaml
from babel.dates import format_date from babel.dates import format_date
@ -43,6 +42,7 @@ from yaml import SafeLoader
from .author import Authors from .author import Authors
from .config import BlogConfig from .config import BlogConfig
from .readtime import readtime
from .structure import Archive, Category, Excerpt, Post, View from .structure import Archive, Category, Excerpt, Post, View
from .templates import url_filter from .templates import url_filter
@ -260,17 +260,6 @@ class BlogPlugin(BasePlugin[BlogConfig]):
# Append to list of authors # Append to list of authors
page.authors.append(self.authors[name]) page.authors.append(self.authors[name])
# Compute readtime of post, if enabled and not explicitly set
if self.config.post_readtime:
rate = self.config.post_readtime_words_per_minute
# There's a bug in the readtime library which causes it to fail if
# the input string contains emojis - see https://t.ly/qEoHq
if not page.config.readtime:
data = markdown.encode("unicode_escape")
read = readtime.of_markdown(data, rate)
page.config.readtime = read.minutes
# Extract settings for excerpts # Extract settings for excerpts
separator = self.config.post_excerpt_separator separator = self.config.post_excerpt_separator
max_authors = self.config.post_excerpt_max_authors max_authors = self.config.post_excerpt_max_authors
@ -295,6 +284,22 @@ class BlogPlugin(BasePlugin[BlogConfig]):
page.excerpt.authors = page.authors[:max_authors] page.excerpt.authors = page.authors[:max_authors]
page.excerpt.categories = page.categories[:max_categories] page.excerpt.categories = page.categories[:max_categories]
# Process posts
def on_page_content(self, html, *, page, config, files):
if not self.config.enabled:
return
# Skip if page is not a post managed by this instance - this plugin has
# support for multiple instances, which is why this check is necessary
if page not in self.blog.posts:
return
# Compute readtime of post, if enabled and not explicitly set
if self.config.post_readtime:
words_per_minute = self.config.post_readtime_words_per_minute
if not page.config.readtime:
page.config.readtime = readtime(html, words_per_minute)
# Register template filters for plugin # Register template filters for plugin
def on_env(self, env, *, config, files): def on_env(self, env, *, config, files):
if not self.config.enabled: if not self.config.enabled:

View File

@ -0,0 +1,74 @@
# Copyright (c) 2016-2023 Martin Donath <martin.donath@squidfunk.com>
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
import re
from html.parser import HTMLParser
from math import ceil
# -----------------------------------------------------------------------------
# Functions
# -----------------------------------------------------------------------------
# Compute readtime - we first used the original readtime library, but the list
# of dependencies it brings with it increased the size of the Docker image by
# 20 MB (packed), which is an increase of 50%. For this reason, we adapt the
# original readtime algorithm to our needs - see https://t.ly/fPZ7L
def readtime(html: str, words_per_minute: int):
parser = ReadtimeParser()
parser.feed(html)
parser.close()
# Extract words from text and compute readtime in seconds
words = len(re.split(r"\W+", "".join(parser.text)))
seconds = ceil(words / words_per_minute * 60)
# Account for additional images
delta = 12
for _ in range(parser.images):
seconds += delta
if delta > 3: delta -= 1
# Return readtime in minutes
return ceil(seconds / 60)
# -----------------------------------------------------------------------------
# Classes
# -----------------------------------------------------------------------------
# Readtime parser
class ReadtimeParser(HTMLParser):
# Initialize parser
def __init__(self):
super().__init__(convert_charrefs = True)
# Keep track of text and images
self.text = []
self.images = 0
# Collect images
def handle_starttag(self, tag, attrs):
if tag == "img":
self.images += 1
# Collect text
def handle_data(self, data):
self.text.append(data)