1
0
mirror of synced 2025-01-31 12:13:49 +01:00

Shave off another 33% of rendering time by better pcklable data in multithreaded case.

This commit is contained in:
Jennifer Taylor 2021-05-16 15:15:06 +00:00
parent fdf7e70bfd
commit 3db9cc9a58
3 changed files with 73 additions and 47 deletions

View File

@ -1,6 +1,6 @@
import multiprocessing
from PIL import Image # type: ignore
from typing import List, Tuple
from typing import List, Sequence, Tuple
from .types.generic import Color, Matrix, Point
@ -11,14 +11,14 @@ def clamp(color: float) -> int:
def blend_normal(
# RGBA color tuple representing what's already at the dest.
dest: Tuple[int, int, int, int],
dest: Sequence[int],
# RGBA color tuple representing the source we want to blend to the dest.
src: Tuple[int, int, int, int],
src: Sequence[int],
# A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
mult_color: Color,
# A RGBA color tuple where all values are 0-255, used to calculate the final color.
add_color: Tuple[int, int, int, int],
) -> Tuple[int, int, int, int]:
) -> Sequence[int]:
# "Normal" blend mode, which is just alpha blending. Various games use the DX
# equation Src * As + Dst * (1 - As). We premultiply Dst by Ad as well, since
# we are blitting onto a destination that could have transparency.
@ -51,14 +51,14 @@ def blend_normal(
def blend_addition(
# RGBA color tuple representing what's already at the dest.
dest: Tuple[int, int, int, int],
dest: Sequence[int],
# RGBA color tuple representing the source we want to blend to the dest.
src: Tuple[int, int, int, int],
src: Sequence[int],
# A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
mult_color: Color,
# A RGBA color tuple where all values are 0-255, used to calculate the final color.
add_color: Tuple[int, int, int, int],
) -> Tuple[int, int, int, int]:
) -> Sequence[int]:
# "Addition" blend mode, which is used for fog/clouds/etc. Various games use the DX
# equation Src * As + Dst * 1. It appears jubeat does not premultiply the source
# by its alpha component.
@ -87,14 +87,14 @@ def blend_addition(
def blend_subtraction(
# RGBA color tuple representing what's already at the dest.
dest: Tuple[int, int, int, int],
dest: Sequence[int],
# RGBA color tuple representing the source we want to blend to the dest.
src: Tuple[int, int, int, int],
src: Sequence[int],
# A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
mult_color: Color,
# A RGBA color tuple where all values are 0-255, used to calculate the final color.
add_color: Tuple[int, int, int, int],
) -> Tuple[int, int, int, int]:
) -> Sequence[int]:
# "Subtraction" blend mode, used for darkening an image. Various games use the DX
# equation Dst * 1 - Src * As. It appears jubeat does not premultiply the source
# by its alpha component much like the "additive" blend above..
@ -123,14 +123,14 @@ def blend_subtraction(
def blend_multiply(
# RGBA color tuple representing what's already at the dest.
dest: Tuple[int, int, int, int],
dest: Sequence[int],
# RGBA color tuple representing the source we want to blend to the dest.
src: Tuple[int, int, int, int],
src: Sequence[int],
# A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
mult_color: Color,
# A RGBA color tuple where all values are 0-255, used to calculate the final color.
add_color: Tuple[int, int, int, int],
) -> Tuple[int, int, int, int]:
) -> Sequence[int]:
# "Multiply" blend mode, used for darkening an image. Various games use the DX
# equation Src * 0 + Dst * Src. It appears jubeat uses the alternative formula
# Src * Dst + Dst * (1 - As) which reduces to the first equation as long as the
@ -166,12 +166,8 @@ def affine_composite(
origin: Point,
blendfunc: int,
texture: Image.Image,
) -> List[Tuple[int, int, int, int]]:
# Get the data in an easier to manipulate and faster to update fashion.
imgmap = list(img.getdata())
texmap = list(texture.getdata())
cores = multiprocessing.cpu_count()
single_threaded: bool = False,
) -> Image.Image:
# Warn if we have an unsupported blend.
if blendfunc not in {0, 2, 3, 8, 9, 70}:
print(f"WARNING: Unsupported blend {blendfunc}")
@ -195,7 +191,12 @@ def affine_composite(
miny = max(int(min(pix1.y, pix2.y, pix3.y, pix4.y)), 0)
maxy = min(int(max(pix1.y, pix2.y, pix3.y, pix4.y)) + 1, imgheight)
if cores < 2:
cores = multiprocessing.cpu_count()
if single_threaded or cores < 2:
# Get the data in an easier to manipulate and faster to update fashion.
imgmap = list(img.getdata())
texmap = list(texture.getdata())
# We don't have enough CPU cores to bother multiprocessing.
for imgy in range(miny, maxy):
for imgx in range(minx, maxx):
@ -213,7 +214,12 @@ def affine_composite(
# Blend it.
texoff = texx + (texy * texwidth)
imgmap[imgoff] = affine_blend_impl(add_color, mult_color, texmap[texoff], imgmap[imgoff], blendfunc)
img.putdata(imgmap)
else:
imgbytes = img.tobytes('raw', 'RGBA')
texbytes = texture.tobytes('raw', 'RGBA')
# Let's spread the load across multiple processors.
procs: List[multiprocessing.Process] = []
work: multiprocessing.Queue = multiprocessing.Queue()
@ -236,8 +242,8 @@ def affine_composite(
add_color,
mult_color,
blendfunc,
imgmap,
texmap,
imgbytes,
texbytes,
),
)
procs.append(proc)
@ -247,25 +253,25 @@ def affine_composite(
work.put(imgy)
expected += 1
lines: List[List[Tuple[int, int, int, int]]] = [
imgmap[x:(x + imgwidth)]
lines: List[bytes] = [
imgbytes[x:(x + (imgwidth * 4))]
for x in range(
0,
imgwidth * imgheight,
imgwidth,
imgwidth * imgheight * 4,
imgwidth * 4,
)
]
for _ in range(expected):
imgy, result = results.get()
lines[imgy] = result
imgmap = [pixel for line in lines for pixel in line]
for proc in procs:
work.put(None)
for proc in procs:
proc.join()
return imgmap
img = Image.frombytes('RGBA', (imgwidth, imgheight), b''.join(lines))
return img
def pixel_renderer(
@ -281,20 +287,20 @@ def pixel_renderer(
add_color: Tuple[int, int, int, int],
mult_color: Color,
blendfunc: int,
imgmap: List[Tuple[int, int, int, int]],
texmap: List[Tuple[int, int, int, int]],
imgbytes: bytes,
texbytes: bytes,
) -> None:
while True:
imgy = work.get()
if imgy is None:
return
result: List[Tuple[int, int, int, int]] = []
result: List[Sequence[int]] = []
for imgx in range(imgwidth):
# Determine offset
imgoff = imgx + (imgy * imgwidth)
if imgx < minx or imgx >= maxx:
result.append(imgmap[imgoff])
result.append(imgbytes[(imgoff * 4):((imgoff + 1) * 4)])
continue
# Calculate what texture pixel data goes here.
@ -303,23 +309,26 @@ def pixel_renderer(
# If we're out of bounds, don't update.
if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight:
result.append(imgmap[imgoff])
result.append(imgbytes[(imgoff * 4):((imgoff + 1) * 4)])
continue
# Blend it.
texoff = texx + (texy * texwidth)
result.append(affine_blend_impl(add_color, mult_color, texmap[texoff], imgmap[imgoff], blendfunc))
result.append(affine_blend_impl(add_color, mult_color, texbytes[(texoff * 4):((texoff + 1) * 4)], imgbytes[(imgoff * 4):((imgoff + 1) * 4)], blendfunc))
results.put((imgy, result))
linebytes = bytes([channel for pixel in result for channel in pixel])
results.put((imgy, linebytes))
def affine_blend_impl(
add_color: Tuple[int, int, int, int],
mult_color: Color,
src_color: Tuple[int, int, int, int],
dest_color: Tuple[int, int, int, int],
# This should be a sequence of exactly 4 values, either bytes or a tuple.
src_color: Sequence[int],
# This should be a sequence of exactly 4 values, either bytes or a tuple.
dest_color: Sequence[int],
blendfunc: int,
) -> Tuple[int, int, int, int]:
) -> Sequence[int]:
if blendfunc == 3:
return blend_multiply(dest_color, src_color, mult_color, add_color)
# TODO: blend mode 4, which is "screen" blending according to SWF references. I've only seen this

View File

@ -110,9 +110,12 @@ class PlacedClip(PlacedObject):
class AFPRenderer(VerboseOutput):
def __init__(self, shapes: Dict[str, Shape] = {}, textures: Dict[str, Image.Image] = {}, swfs: Dict[str, SWF] = {}) -> None:
def __init__(self, shapes: Dict[str, Shape] = {}, textures: Dict[str, Image.Image] = {}, swfs: Dict[str, SWF] = {}, single_threaded: bool = False) -> None:
super().__init__()
# Options for rendering
self.__single_threaded = single_threaded
self.shapes: Dict[str, Shape] = shapes
self.textures: Dict[str, Image.Image] = textures
self.swfs: Dict[str, SWF] = swfs
@ -363,7 +366,7 @@ class AFPRenderer(VerboseOutput):
else:
raise Exception(f"Failed to process tag: {tag}")
def __render_object(self, img: Image.Image, renderable: PlacedObject, parent_transform: Matrix, parent_origin: Point) -> None:
def __render_object(self, img: Image.Image, renderable: PlacedObject, parent_transform: Matrix, parent_origin: Point) -> Image.Image:
# Compute the affine transformation matrix for this object.
transform = parent_transform.multiply(renderable.transform)
@ -375,7 +378,7 @@ class AFPRenderer(VerboseOutput):
# this object invisible. We can ignore this since the object should not
# be drawn.
print(f"WARNING: Transform Matrix {transform} has zero scaling factor, making it non-invertible!")
return
return img
# Render individual shapes if this is a sprite.
if isinstance(renderable, PlacedClip):
@ -386,7 +389,7 @@ class AFPRenderer(VerboseOutput):
)
for obj in objs:
self.vprint(f" Rendering placed object ID {obj.object_id} from sprite {obj.source.tag_id} onto Depth {obj.depth}")
self.__render_object(img, obj, transform, parent_origin.add(renderable.rotation_offset))
img = self.__render_object(img, obj, transform, parent_origin.add(renderable.rotation_offset))
elif isinstance(renderable, PlacedShape):
# This is a shape draw reference.
shape = renderable.source
@ -400,7 +403,7 @@ class AFPRenderer(VerboseOutput):
for params in shape.draw_params:
if not (params.flags & 0x1):
# Not instantiable, don't render.
return
return img
if params.flags & 0x8:
# TODO: Need to support blending and UV coordinate colors here.
@ -449,10 +452,12 @@ class AFPRenderer(VerboseOutput):
img.alpha_composite(texture, cutin.as_tuple(), cutoff.as_tuple())
else:
# We can't, so do the slow render that's correct.
img.putdata(affine_composite(img, add_color, mult_color, transform, inverse, origin, blend, texture))
img = affine_composite(img, add_color, mult_color, transform, inverse, origin, blend, texture, single_threaded=self.__single_threaded)
else:
raise Exception(f"Unknown placed object type to render {renderable}!")
return img
def __process_tags(self, clip: PlacedClip, prefix: str = " ") -> bool:
self.vprint(f"{prefix}Handling placed clip {clip.object_id} at depth {clip.depth}")
@ -557,7 +562,7 @@ class AFPRenderer(VerboseOutput):
if clip:
for obj in sorted(clip.placed_objects, key=lambda obj: obj.depth):
self.vprint(f" Rendering placed object ID {obj.object_id} from sprite {obj.source.tag_id} onto Depth {obj.depth}")
self.__render_object(curimage, obj, root_clip.transform, root_clip.rotation_offset)
curimage = self.__render_object(curimage, obj, root_clip.transform, root_clip.rotation_offset)
else:
# Nothing changed, make a copy of the previous render.
self.vprint(" Using previous frame render")

View File

@ -265,7 +265,12 @@ def main() -> int:
"--background-color",
type=str,
default=None,
help="Set the background color of the animation, overriding a default if present in the SWF.",
help="Set the background color of the animation as a comma-separated RGB or RGBA color, overriding a default if present in the SWF.",
)
render_parser.add_argument(
"--disable-threads",
action="store_true",
help="Disable multi-threaded rendering.",
)
list_parser = subparsers.add_parser('list', help='List out the possible paths to render from a series of SWFs')
@ -282,6 +287,12 @@ def main() -> int:
action="store_true",
help="Display verbuse debugging output",
)
list_parser.add_argument(
"--disable-threads",
action="store_true",
help="Disable multi-threaded rendering.",
)
args = parser.parse_args()
@ -560,7 +571,7 @@ def main() -> int:
if args.action in ["render", "list"]:
# This is a complicated one, as we need to be able to specify multiple
# directories of files as well as support IFS files and TXP2 files.
renderer = AFPRenderer()
renderer = AFPRenderer(single_threaded=args.disable_threads)
# TODO: Allow specifying individual folders and such.
for container in args.container:
@ -700,6 +711,7 @@ def main() -> int:
# Render the gif/webp frames.
duration, images = renderer.render_path(args.path, verbose=args.verbose, background_color=color)
if len(images) == 0:
raise Exception("Did not render any frames!")