Use cPython to speed up some rendering by about 10%.

2025-02-18 19:54:21 +01:00 · 2021-05-16 00:19:50 +00:00 · 2021-05-16 00:19:50 +00:00 · 187783696b
commit 187783696b
parent 68feebc78e
3 changed files with 174 additions and 157 deletions
--- a/bemani/format/afp/blend.py
+++ b/bemani/format/afp/blend.py
@ -0,0 +1,155 @@
+from typing import Tuple
+
+from .types.generic import Color
+
+
+def clamp(color: float) -> int:
+    return min(max(0, round(color)), 255)
+
+
+def blend_normal(
+    # RGBA color tuple representing what's already at the dest.
+    dest: Tuple[int, int, int, int],
+    # RGBA color tuple representing the source we want to blend to the dest.
+    src: Tuple[int, int, int, int],
+    # A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
+    mult_color: Color,
+    # A RGBA color tuple where all values are 0-255, used to calculate the final color.
+    add_color: Tuple[int, int, int, int],
+) -> Tuple[int, int, int, int]:
+    # "Normal" blend mode, which is just alpha blending. Various games use the DX
+    # equation Src * As + Dst * (1 - As). We premultiply Dst by Ad as well, since
+    # we are blitting onto a destination that could have transparency.
+
+    # Calculate multiplicative and additive colors against the source.
+    src = (
+        clamp((src[0] * mult_color.r) + add_color[0]),
+        clamp((src[1] * mult_color.g) + add_color[1]),
+        clamp((src[2] * mult_color.b) + add_color[2]),
+        clamp((src[3] * mult_color.a) + add_color[3]),
+    )
+
+    # Short circuit for speed.
+    if src[3] == 0:
+        return dest
+    if src[3] == 255:
+        return src
+
+    # Calculate alpha blending.
+    srcpercent = src[3] / 255.0
+    destpercent = dest[3] / 255.0
+    destremainder = 1.0 - srcpercent
+    return (
+        clamp((dest[0] * destpercent * destremainder) + (src[0] * srcpercent)),
+        clamp((dest[1] * destpercent * destremainder) + (src[1] * srcpercent)),
+        clamp((dest[2] * destpercent * destremainder) + (src[2] * srcpercent)),
+        clamp(255 * (srcpercent + destpercent * destremainder)),
+    )
+
+
+def blend_addition(
+    # RGBA color tuple representing what's already at the dest.
+    dest: Tuple[int, int, int, int],
+    # RGBA color tuple representing the source we want to blend to the dest.
+    src: Tuple[int, int, int, int],
+    # A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
+    mult_color: Color,
+    # A RGBA color tuple where all values are 0-255, used to calculate the final color.
+    add_color: Tuple[int, int, int, int],
+) -> Tuple[int, int, int, int]:
+    # "Addition" blend mode, which is used for fog/clouds/etc. Various games use the DX
+    # equation Src * As + Dst * 1. It appears jubeat does not premultiply the source
+    # by its alpha component.
+
+    # Calculate multiplicative and additive colors against the source.
+    src = (
+        clamp((src[0] * mult_color.r) + add_color[0]),
+        clamp((src[1] * mult_color.g) + add_color[1]),
+        clamp((src[2] * mult_color.b) + add_color[2]),
+        clamp((src[3] * mult_color.a) + add_color[3]),
+    )
+
+    # Short circuit for speed.
+    if src[3] == 0:
+        return dest
+
+    # Calculate alpha blending.
+    srcpercent = src[3] / 255.0
+    return (
+        clamp(dest[0] + (src[0] * srcpercent)),
+        clamp(dest[1] + (src[1] * srcpercent)),
+        clamp(dest[2] + (src[2] * srcpercent)),
+        clamp(dest[3] + (255 * srcpercent)),
+    )
+
+
+def blend_subtraction(
+    # RGBA color tuple representing what's already at the dest.
+    dest: Tuple[int, int, int, int],
+    # RGBA color tuple representing the source we want to blend to the dest.
+    src: Tuple[int, int, int, int],
+    # A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
+    mult_color: Color,
+    # A RGBA color tuple where all values are 0-255, used to calculate the final color.
+    add_color: Tuple[int, int, int, int],
+) -> Tuple[int, int, int, int]:
+    # "Subtraction" blend mode, used for darkening an image. Various games use the DX
+    # equation Dst * 1 - Src * As. It appears jubeat does not premultiply the source
+    # by its alpha component much like the "additive" blend above..
+
+    # Calculate multiplicative and additive colors against the source.
+    src = (
+        clamp((src[0] * mult_color.r) + add_color[0]),
+        clamp((src[1] * mult_color.g) + add_color[1]),
+        clamp((src[2] * mult_color.b) + add_color[2]),
+        clamp((src[3] * mult_color.a) + add_color[3]),
+    )
+
+    # Short circuit for speed.
+    if src[3] == 0:
+        return dest
+
+    # Calculate alpha blending.
+    srcpercent = src[3] / 255.0
+    return (
+        clamp(dest[0] - (src[0] * srcpercent)),
+        clamp(dest[1] - (src[1] * srcpercent)),
+        clamp(dest[2] - (src[2] * srcpercent)),
+        clamp(dest[3] - (255 * srcpercent)),
+    )
+
+
+def blend_multiply(
+    # RGBA color tuple representing what's already at the dest.
+    dest: Tuple[int, int, int, int],
+    # RGBA color tuple representing the source we want to blend to the dest.
+    src: Tuple[int, int, int, int],
+    # A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
+    mult_color: Color,
+    # A RGBA color tuple where all values are 0-255, used to calculate the final color.
+    add_color: Tuple[int, int, int, int],
+) -> Tuple[int, int, int, int]:
+    # "Multiply" blend mode, used for darkening an image. Various games use the DX
+    # equation Src * 0 + Dst * Src. It appears jubeat uses the alternative formula
+    # Src * Dst + Dst * (1 - As) which reduces to the first equation as long as the
+    # source alpha is always 255.
+
+    # Calculate multiplicative and additive colors against the source.
+    src = (
+        clamp((src[0] * mult_color.r) + add_color[0]),
+        clamp((src[1] * mult_color.g) + add_color[1]),
+        clamp((src[2] * mult_color.b) + add_color[2]),
+        clamp((src[3] * mult_color.a) + add_color[3]),
+    )
+
+    # Short circuit for speed.
+    if src[3] == 0:
+        return dest
+
+    # Calculate alpha blending.
+    return (
+        clamp(255 * ((dest[0] / 255.0) * (src[0] / 255.0))),
+        clamp(255 * ((dest[1] / 255.0) * (src[1] / 255.0))),
+        clamp(255 * ((dest[2] / 255.0) * (src[2] / 255.0))),
+        clamp(255 * ((dest[3] / 255.0) * (src[3] / 255.0))),
+    )
--- a/bemani/format/afp/render.py
+++ b/bemani/format/afp/render.py
@ -1,6 +1,7 @@
 from typing import Dict, List, Tuple, Optional, Union
 from PIL import Image  # type: ignore

+from .blend import blend_normal, blend_addition, blend_subtraction, blend_multiply
 from .swf import SWF, Frame, Tag, AP2ShapeTag, AP2DefineSpriteTag, AP2PlaceObjectTag, AP2RemoveObjectTag, AP2DoActionTag, AP2DefineFontTag, AP2DefineEditTextTag
 from .types import Color, Matrix, Point
 from .geo import Shape, DrawParams
@ -488,9 +489,9 @@ class AFPRenderer(VerboseOutput):
                                texoff = texx + (texy * texwidth)

                                if blend == 0 or blend == 2:
-                                    imgmap[imgoff] = self.__blend_normal(imgmap[imgoff], texmap[texoff], mult_color, add_color)
+                                    imgmap[imgoff] = blend_normal(imgmap[imgoff], texmap[texoff], mult_color, add_color)
                                elif blend == 3:
-                                    imgmap[imgoff] = self.__blend_multiply(imgmap[imgoff], texmap[texoff], mult_color, add_color)
+                                    imgmap[imgoff] = blend_multiply(imgmap[imgoff], texmap[texoff], mult_color, add_color)
                                # TODO: blend mode 4, which is "screen" blending according to SWF references. I've only seen this
                                # in Jubeat and it implements it using OpenGL equation Src * (1 - Dst) + Dst * 1.
                                # TODO: blend mode 5, which is "lighten" blending according to SWF references. Jubeat does not
@ -502,9 +503,9 @@ class AFPRenderer(VerboseOutput):
                                # TODO: blend mode 13, which is "overlay" according to SWF references. The equation seems to be
                                # Src * Dst + Dst * Src but Jubeat thinks it should be Src * Dst + Dst * (1 - As).
                                elif blend == 8:
-                                    imgmap[imgoff] = self.__blend_addition(imgmap[imgoff], texmap[texoff], mult_color, add_color)
+                                    imgmap[imgoff] = blend_addition(imgmap[imgoff], texmap[texoff], mult_color, add_color)
                                elif blend == 9 or blend == 70:
-                                    imgmap[imgoff] = self.__blend_subtraction(imgmap[imgoff], texmap[texoff], mult_color, add_color)
+                                    imgmap[imgoff] = blend_subtraction(imgmap[imgoff], texmap[texoff], mult_color, add_color)
                                # TODO: blend mode 75, which is not in the SWF spec and appears to have the equation
                                # Src * (1 - Dst) + Dst * (1 - Src).
                                else:
@ -512,163 +513,12 @@ class AFPRenderer(VerboseOutput):
                                        # Don't print it for every pixel.
                                        print(f"WARNING: Unsupported blend {blend}")
                                        announced = True
-                                    imgmap[imgoff] = self.__blend_normal(imgmap[imgoff], texmap[texoff], mult_color, add_color)
+                                    imgmap[imgoff] = blend_normal(imgmap[imgoff], texmap[texoff], mult_color, add_color)

                        img.putdata(imgmap)
        else:
            raise Exception(f"Unknown placed object type to render {renderable}!")

-    def __clamp(self, color: Union[float, int]) -> int:
-        return min(max(0, round(color)), 255)
-
-    def __blend_normal(
-        self,
-        # RGBA color tuple representing what's already at the dest.
-        dest: Tuple[int, int, int, int],
-        # RGBA color tuple representing the source we want to blend to the dest.
-        src: Tuple[int, int, int, int],
-        # A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
-        mult_color: Color,
-        # A RGBA color tuple where all values are 0-255, used to calculate the final color.
-        add_color: Tuple[int, int, int, int],
-    ) -> Tuple[int, int, int, int]:
-        # "Normal" blend mode, which is just alpha blending. Various games use the DX
-        # equation Src * As + Dst * (1 - As). We premultiply Dst by Ad as well, since
-        # we are blitting onto a destination that could have transparency.
-
-        # Calculate multiplicative and additive colors against the source.
-        src = (
-            self.__clamp((src[0] * mult_color.r) + add_color[0]),
-            self.__clamp((src[1] * mult_color.g) + add_color[1]),
-            self.__clamp((src[2] * mult_color.b) + add_color[2]),
-            self.__clamp((src[3] * mult_color.a) + add_color[3]),
-        )
-
-        # Short circuit for speed.
-        if src[3] == 0:
-            return dest
-        if src[3] == 255:
-            return src
-
-        # Calculate alpha blending.
-        srcpercent = src[3] / 255.0
-        destpercent = dest[3] / 255.0
-        destremainder = 1.0 - srcpercent
-        return (
-            self.__clamp((dest[0] * destpercent * destremainder) + (src[0] * srcpercent)),
-            self.__clamp((dest[1] * destpercent * destremainder) + (src[1] * srcpercent)),
-            self.__clamp((dest[2] * destpercent * destremainder) + (src[2] * srcpercent)),
-            self.__clamp(255 * (srcpercent + destpercent * destremainder)),
-        )
-
-    def __blend_addition(
-        self,
-        # RGBA color tuple representing what's already at the dest.
-        dest: Tuple[int, int, int, int],
-        # RGBA color tuple representing the source we want to blend to the dest.
-        src: Tuple[int, int, int, int],
-        # A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
-        mult_color: Color,
-        # A RGBA color tuple where all values are 0-255, used to calculate the final color.
-        add_color: Tuple[int, int, int, int],
-    ) -> Tuple[int, int, int, int]:
-        # "Addition" blend mode, which is used for fog/clouds/etc. Various games use the DX
-        # equation Src * As + Dst * 1. It appears jubeat does not premultiply the source
-        # by its alpha component.
-
-        # Calculate multiplicative and additive colors against the source.
-        src = (
-            self.__clamp((src[0] * mult_color.r) + add_color[0]),
-            self.__clamp((src[1] * mult_color.g) + add_color[1]),
-            self.__clamp((src[2] * mult_color.b) + add_color[2]),
-            self.__clamp((src[3] * mult_color.a) + add_color[3]),
-        )
-
-        # Short circuit for speed.
-        if src[3] == 0:
-            return dest
-
-        # Calculate alpha blending.
-        srcpercent = src[3] / 255.0
-        return (
-            self.__clamp(dest[0] + (src[0] * srcpercent)),
-            self.__clamp(dest[1] + (src[1] * srcpercent)),
-            self.__clamp(dest[2] + (src[2] * srcpercent)),
-            self.__clamp(dest[3] + (255 * srcpercent)),
-        )
-
-    def __blend_subtraction(
-        self,
-        # RGBA color tuple representing what's already at the dest.
-        dest: Tuple[int, int, int, int],
-        # RGBA color tuple representing the source we want to blend to the dest.
-        src: Tuple[int, int, int, int],
-        # A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
-        mult_color: Color,
-        # A RGBA color tuple where all values are 0-255, used to calculate the final color.
-        add_color: Tuple[int, int, int, int],
-    ) -> Tuple[int, int, int, int]:
-        # "Subtraction" blend mode, used for darkening an image. Various games use the DX
-        # equation Dst * 1 - Src * As. It appears jubeat does not premultiply the source
-        # by its alpha component much like the "additive" blend above..
-
-        # Calculate multiplicative and additive colors against the source.
-        src = (
-            self.__clamp((src[0] * mult_color.r) + add_color[0]),
-            self.__clamp((src[1] * mult_color.g) + add_color[1]),
-            self.__clamp((src[2] * mult_color.b) + add_color[2]),
-            self.__clamp((src[3] * mult_color.a) + add_color[3]),
-        )
-
-        # Short circuit for speed.
-        if src[3] == 0:
-            return dest
-
-        # Calculate alpha blending.
-        srcpercent = src[3] / 255.0
-        return (
-            self.__clamp(dest[0] - (src[0] * srcpercent)),
-            self.__clamp(dest[1] - (src[1] * srcpercent)),
-            self.__clamp(dest[2] - (src[2] * srcpercent)),
-            self.__clamp(dest[3] - (255 * srcpercent)),
-        )
-
-    def __blend_multiply(
-        self,
-        # RGBA color tuple representing what's already at the dest.
-        dest: Tuple[int, int, int, int],
-        # RGBA color tuple representing the source we want to blend to the dest.
-        src: Tuple[int, int, int, int],
-        # A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
-        mult_color: Color,
-        # A RGBA color tuple where all values are 0-255, used to calculate the final color.
-        add_color: Tuple[int, int, int, int],
-    ) -> Tuple[int, int, int, int]:
-        # "Multiply" blend mode, used for darkening an image. Various games use the DX
-        # equation Src * 0 + Dst * Src. It appears jubeat uses the alternative formula
-        # Src * Dst + Dst * (1 - As) which reduces to the first equation as long as the
-        # source alpha is always 255.
-
-        # Calculate multiplicative and additive colors against the source.
-        src = (
-            self.__clamp((src[0] * mult_color.r) + add_color[0]),
-            self.__clamp((src[1] * mult_color.g) + add_color[1]),
-            self.__clamp((src[2] * mult_color.b) + add_color[2]),
-            self.__clamp((src[3] * mult_color.a) + add_color[3]),
-        )
-
-        # Short circuit for speed.
-        if src[3] == 0:
-            return dest
-
-        # Calculate alpha blending.
-        return (
-            self.__clamp(255 * ((dest[0] / 255.0) * (src[0] / 255.0))),
-            self.__clamp(255 * ((dest[1] / 255.0) * (src[1] / 255.0))),
-            self.__clamp(255 * ((dest[2] / 255.0) * (src[2] / 255.0))),
-            self.__clamp(255 * ((dest[3] / 255.0) * (src[3] / 255.0))),
-        )
-
    def __process_tags(self, clip: PlacedClip, prefix: str = "  ") -> bool:
        self.vprint(f"{prefix}Handling placed clip {clip.object_id} at depth {clip.depth}")

@ -767,7 +617,7 @@ class AFPRenderer(VerboseOutput):
                    # Now, render out the placed objects. We sort by depth so that we can
                    # get the layering correct, but its important to preserve the original
                    # insertion order for delete requests.
-                    curimage = Image.new("RGBA", (swf.location.width, swf.location.height), color=color.as_tuple())
+                    curimage = Image.new("RGBA", (int(swf.location.width), int(swf.location.height)), color=color.as_tuple())

                    clip = self.__find_renderable(root_clip, visible_tag)
                    if clip:
--- a/setup.py
+++ b/setup.py
@ -117,6 +117,18 @@ setup(
                    "bemani/protocol/xml.py",
                ]
            ),
+            Extension(
+                "bemani.format.afp.blend",
+                [
+                    "bemani/format/afp/blend.py",
+                ]
+            ),
+            Extension(
+                "bemani.format.afp.types.generic",
+                [
+                    "bemani/format/afp/types/generic.py",
+                ]
+            ),
        ],
        language_level=3,
    ),