Initial implementation of C++ affine renderer for massive speed boost.
This commit is contained in:
parent
b02c4292be
commit
48e9c59513
@ -6,203 +6,279 @@ from typing import Any, List, Sequence, Tuple
|
||||
from .types.generic import Color, Matrix, Point
|
||||
|
||||
|
||||
def clamp(color: float) -> int:
|
||||
return min(max(0, round(color)), 255)
|
||||
# If we compiled the faster cython code, we can use it instead!
|
||||
try:
|
||||
from .blendalt import affine_composite
|
||||
except ImportError:
|
||||
def clamp(color: float) -> int:
|
||||
return min(max(0, round(color)), 255)
|
||||
|
||||
def blend_normal(
|
||||
# RGBA color tuple representing what's already at the dest.
|
||||
dest: Sequence[int],
|
||||
# RGBA color tuple representing the source we want to blend to the dest.
|
||||
src: Sequence[int],
|
||||
) -> Sequence[int]:
|
||||
# "Normal" blend mode, which is just alpha blending. Various games use the DX
|
||||
# equation Src * As + Dst * (1 - As). We premultiply Dst by Ad as well, since
|
||||
# we are blitting onto a destination that could have transparency. Once we are
|
||||
# done, we divide out the premultiplied Ad in order to put the pixes back to
|
||||
# their full blended values since we are not setting the destination alpha to 1.0.
|
||||
# This enables partial transparent backgrounds to work properly.
|
||||
|
||||
def blend_normal(
|
||||
# RGBA color tuple representing what's already at the dest.
|
||||
dest: Sequence[int],
|
||||
# RGBA color tuple representing the source we want to blend to the dest.
|
||||
src: Sequence[int],
|
||||
# A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
|
||||
mult_color: Color,
|
||||
# A RGBA color tuple where all values are 0-255, used to calculate the final color.
|
||||
add_color: Tuple[int, int, int, int],
|
||||
) -> Sequence[int]:
|
||||
# "Normal" blend mode, which is just alpha blending. Various games use the DX
|
||||
# equation Src * As + Dst * (1 - As). We premultiply Dst by Ad as well, since
|
||||
# we are blitting onto a destination that could have transparency. Once we are
|
||||
# done, we divide out the premultiplied Ad in order to put the pixes back to
|
||||
# their full blended values since we are not setting the destination alpha to 1.0.
|
||||
# This enables partial transparent backgrounds to work properly.
|
||||
# Short circuit for speed.
|
||||
if src[3] == 0:
|
||||
return dest
|
||||
if src[3] == 255:
|
||||
return src
|
||||
|
||||
# Calculate multiplicative and additive colors against the source.
|
||||
src = (
|
||||
clamp((src[0] * mult_color.r) + add_color[0]),
|
||||
clamp((src[1] * mult_color.g) + add_color[1]),
|
||||
clamp((src[2] * mult_color.b) + add_color[2]),
|
||||
clamp((src[3] * mult_color.a) + add_color[3]),
|
||||
)
|
||||
# Calculate alpha blending.
|
||||
srcpercent = src[3] / 255.0
|
||||
destpercent = dest[3] / 255.0
|
||||
srcremaineder = 1.0 - srcpercent
|
||||
new_alpha = (srcpercent + destpercent * srcremaineder)
|
||||
return (
|
||||
clamp(((dest[0] * destpercent * srcremaineder) + (src[0] * srcpercent)) / new_alpha),
|
||||
clamp(((dest[1] * destpercent * srcremaineder) + (src[1] * srcpercent)) / new_alpha),
|
||||
clamp(((dest[2] * destpercent * srcremaineder) + (src[2] * srcpercent)) / new_alpha),
|
||||
clamp(255 * new_alpha)
|
||||
)
|
||||
|
||||
# Short circuit for speed.
|
||||
if src[3] == 0:
|
||||
return dest
|
||||
if src[3] == 255:
|
||||
return src
|
||||
def blend_addition(
|
||||
# RGBA color tuple representing what's already at the dest.
|
||||
dest: Sequence[int],
|
||||
# RGBA color tuple representing the source we want to blend to the dest.
|
||||
src: Sequence[int],
|
||||
) -> Sequence[int]:
|
||||
# "Addition" blend mode, which is used for fog/clouds/etc. Various games use the DX
|
||||
# equation Src * As + Dst * 1. It appears jubeat does not premultiply the source
|
||||
# by its alpha component.
|
||||
|
||||
# Calculate alpha blending.
|
||||
srcpercent = src[3] / 255.0
|
||||
destpercent = dest[3] / 255.0
|
||||
srcremaineder = 1.0 - srcpercent
|
||||
new_alpha = (srcpercent + destpercent * srcremaineder)
|
||||
return (
|
||||
clamp(((dest[0] * destpercent * srcremaineder) + (src[0] * srcpercent)) / new_alpha),
|
||||
clamp(((dest[1] * destpercent * srcremaineder) + (src[1] * srcpercent)) / new_alpha),
|
||||
clamp(((dest[2] * destpercent * srcremaineder) + (src[2] * srcpercent)) / new_alpha),
|
||||
clamp(255 * new_alpha)
|
||||
)
|
||||
# Short circuit for speed.
|
||||
if src[3] == 0:
|
||||
return dest
|
||||
|
||||
# Calculate final color blending.
|
||||
srcpercent = src[3] / 255.0
|
||||
return (
|
||||
clamp(dest[0] + (src[0] * srcpercent)),
|
||||
clamp(dest[1] + (src[1] * srcpercent)),
|
||||
clamp(dest[2] + (src[2] * srcpercent)),
|
||||
dest[3],
|
||||
)
|
||||
|
||||
def blend_addition(
|
||||
# RGBA color tuple representing what's already at the dest.
|
||||
dest: Sequence[int],
|
||||
# RGBA color tuple representing the source we want to blend to the dest.
|
||||
src: Sequence[int],
|
||||
# A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
|
||||
mult_color: Color,
|
||||
# A RGBA color tuple where all values are 0-255, used to calculate the final color.
|
||||
add_color: Tuple[int, int, int, int],
|
||||
) -> Sequence[int]:
|
||||
# "Addition" blend mode, which is used for fog/clouds/etc. Various games use the DX
|
||||
# equation Src * As + Dst * 1. It appears jubeat does not premultiply the source
|
||||
# by its alpha component.
|
||||
def blend_subtraction(
|
||||
# RGBA color tuple representing what's already at the dest.
|
||||
dest: Sequence[int],
|
||||
# RGBA color tuple representing the source we want to blend to the dest.
|
||||
src: Sequence[int],
|
||||
) -> Sequence[int]:
|
||||
# "Subtraction" blend mode, used for darkening an image. Various games use the DX
|
||||
# equation Dst * 1 - Src * As. It appears jubeat does not premultiply the source
|
||||
# by its alpha component much like the "additive" blend above..
|
||||
|
||||
# Calculate multiplicative and additive colors against the source.
|
||||
src = (
|
||||
clamp((src[0] * mult_color.r) + add_color[0]),
|
||||
clamp((src[1] * mult_color.g) + add_color[1]),
|
||||
clamp((src[2] * mult_color.b) + add_color[2]),
|
||||
clamp((src[3] * mult_color.a) + add_color[3]),
|
||||
)
|
||||
# Short circuit for speed.
|
||||
if src[3] == 0:
|
||||
return dest
|
||||
|
||||
# Short circuit for speed.
|
||||
if src[3] == 0:
|
||||
return dest
|
||||
# Calculate final color blending.
|
||||
srcpercent = src[3] / 255.0
|
||||
return (
|
||||
clamp(dest[0] - (src[0] * srcpercent)),
|
||||
clamp(dest[1] - (src[1] * srcpercent)),
|
||||
clamp(dest[2] - (src[2] * srcpercent)),
|
||||
dest[3],
|
||||
)
|
||||
|
||||
# Calculate final color blending.
|
||||
srcpercent = src[3] / 255.0
|
||||
return (
|
||||
clamp(dest[0] + (src[0] * srcpercent)),
|
||||
clamp(dest[1] + (src[1] * srcpercent)),
|
||||
clamp(dest[2] + (src[2] * srcpercent)),
|
||||
dest[3],
|
||||
)
|
||||
def blend_multiply(
|
||||
# RGBA color tuple representing what's already at the dest.
|
||||
dest: Sequence[int],
|
||||
# RGBA color tuple representing the source we want to blend to the dest.
|
||||
src: Sequence[int],
|
||||
) -> Sequence[int]:
|
||||
# "Multiply" blend mode, used for darkening an image. Various games use the DX
|
||||
# equation Src * 0 + Dst * Src. It appears jubeat uses the alternative formula
|
||||
# Src * Dst + Dst * (1 - As) which reduces to the first equation as long as the
|
||||
# source alpha is always 255.
|
||||
|
||||
# Calculate final color blending.
|
||||
return (
|
||||
clamp(255 * ((dest[0] / 255.0) * (src[0] / 255.0))),
|
||||
clamp(255 * ((dest[1] / 255.0) * (src[1] / 255.0))),
|
||||
clamp(255 * ((dest[2] / 255.0) * (src[2] / 255.0))),
|
||||
dest[3],
|
||||
)
|
||||
|
||||
def blend_subtraction(
|
||||
# RGBA color tuple representing what's already at the dest.
|
||||
dest: Sequence[int],
|
||||
# RGBA color tuple representing the source we want to blend to the dest.
|
||||
src: Sequence[int],
|
||||
# A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
|
||||
mult_color: Color,
|
||||
# A RGBA color tuple where all values are 0-255, used to calculate the final color.
|
||||
add_color: Tuple[int, int, int, int],
|
||||
) -> Sequence[int]:
|
||||
# "Subtraction" blend mode, used for darkening an image. Various games use the DX
|
||||
# equation Dst * 1 - Src * As. It appears jubeat does not premultiply the source
|
||||
# by its alpha component much like the "additive" blend above..
|
||||
def affine_composite(
|
||||
img: Image.Image,
|
||||
add_color: Tuple[int, int, int, int],
|
||||
mult_color: Color,
|
||||
transform: Matrix,
|
||||
origin: Point,
|
||||
blendfunc: int,
|
||||
texture: Image.Image,
|
||||
single_threaded: bool = False,
|
||||
) -> Image.Image:
|
||||
# Calculate the inverse so we can map canvas space back to texture space.
|
||||
try:
|
||||
inverse = transform.inverse()
|
||||
except ZeroDivisionError:
|
||||
# If this happens, that means one of the scaling factors was zero, making
|
||||
# this object invisible. We can ignore this since the object should not
|
||||
# be drawn.
|
||||
print(f"WARNING: Transform Matrix {transform} has zero scaling factor, making it non-invertible!")
|
||||
return img
|
||||
|
||||
# Calculate multiplicative and additive colors against the source.
|
||||
src = (
|
||||
clamp((src[0] * mult_color.r) + add_color[0]),
|
||||
clamp((src[1] * mult_color.g) + add_color[1]),
|
||||
clamp((src[2] * mult_color.b) + add_color[2]),
|
||||
clamp((src[3] * mult_color.a) + add_color[3]),
|
||||
)
|
||||
# Warn if we have an unsupported blend.
|
||||
if blendfunc not in {0, 2, 3, 8, 9, 70}:
|
||||
print(f"WARNING: Unsupported blend {blendfunc}")
|
||||
return img
|
||||
|
||||
# Short circuit for speed.
|
||||
if src[3] == 0:
|
||||
return dest
|
||||
# These are calculated properties and caching them outside of the loop
|
||||
# speeds things up a bit.
|
||||
imgwidth = img.width
|
||||
imgheight = img.height
|
||||
texwidth = texture.width
|
||||
texheight = texture.height
|
||||
|
||||
# Calculate final color blending.
|
||||
srcpercent = src[3] / 255.0
|
||||
return (
|
||||
clamp(dest[0] - (src[0] * srcpercent)),
|
||||
clamp(dest[1] - (src[1] * srcpercent)),
|
||||
clamp(dest[2] - (src[2] * srcpercent)),
|
||||
dest[3],
|
||||
)
|
||||
# Calculate the maximum range of update this texture can possibly reside in.
|
||||
pix1 = transform.multiply_point(Point.identity().subtract(origin))
|
||||
pix2 = transform.multiply_point(Point.identity().subtract(origin).add(Point(texwidth, 0)))
|
||||
pix3 = transform.multiply_point(Point.identity().subtract(origin).add(Point(0, texheight)))
|
||||
pix4 = transform.multiply_point(Point.identity().subtract(origin).add(Point(texwidth, texheight)))
|
||||
|
||||
# Map this to the rectangle we need to sweep in the rendering image.
|
||||
minx = max(int(min(pix1.x, pix2.x, pix3.x, pix4.x)), 0)
|
||||
maxx = min(int(max(pix1.x, pix2.x, pix3.x, pix4.x)) + 1, imgwidth)
|
||||
miny = max(int(min(pix1.y, pix2.y, pix3.y, pix4.y)), 0)
|
||||
maxy = min(int(max(pix1.y, pix2.y, pix3.y, pix4.y)) + 1, imgheight)
|
||||
|
||||
def blend_multiply(
|
||||
# RGBA color tuple representing what's already at the dest.
|
||||
dest: Sequence[int],
|
||||
# RGBA color tuple representing the source we want to blend to the dest.
|
||||
src: Sequence[int],
|
||||
# A pre-scaled color where all values are 0.0-1.0, used to calculate the final color.
|
||||
mult_color: Color,
|
||||
# A RGBA color tuple where all values are 0-255, used to calculate the final color.
|
||||
add_color: Tuple[int, int, int, int],
|
||||
) -> Sequence[int]:
|
||||
# "Multiply" blend mode, used for darkening an image. Various games use the DX
|
||||
# equation Src * 0 + Dst * Src. It appears jubeat uses the alternative formula
|
||||
# Src * Dst + Dst * (1 - As) which reduces to the first equation as long as the
|
||||
# source alpha is always 255.
|
||||
if maxx <= 0 or maxy <= 0:
|
||||
# This image is entirely off the screen!
|
||||
return img
|
||||
|
||||
# Calculate multiplicative and additive colors against the source.
|
||||
src = (
|
||||
clamp((src[0] * mult_color.r) + add_color[0]),
|
||||
clamp((src[1] * mult_color.g) + add_color[1]),
|
||||
clamp((src[2] * mult_color.b) + add_color[2]),
|
||||
clamp((src[3] * mult_color.a) + add_color[3]),
|
||||
)
|
||||
cores = multiprocessing.cpu_count()
|
||||
if single_threaded or cores < 2:
|
||||
# Get the data in an easier to manipulate and faster to update fashion.
|
||||
imgmap = list(img.getdata())
|
||||
texmap = list(texture.getdata())
|
||||
|
||||
# Calculate final color blending.
|
||||
return (
|
||||
clamp(255 * ((dest[0] / 255.0) * (src[0] / 255.0))),
|
||||
clamp(255 * ((dest[1] / 255.0) * (src[1] / 255.0))),
|
||||
clamp(255 * ((dest[2] / 255.0) * (src[2] / 255.0))),
|
||||
dest[3],
|
||||
)
|
||||
# We don't have enough CPU cores to bother multiprocessing.
|
||||
for imgy in range(miny, maxy):
|
||||
for imgx in range(minx, maxx):
|
||||
# Determine offset
|
||||
imgoff = imgx + (imgy * imgwidth)
|
||||
|
||||
# Calculate what texture pixel data goes here.
|
||||
texloc = inverse.multiply_point(Point(float(imgx), float(imgy))).add(origin)
|
||||
texx, texy = texloc.as_tuple()
|
||||
|
||||
def affine_composite(
|
||||
img: Image.Image,
|
||||
add_color: Tuple[int, int, int, int],
|
||||
mult_color: Color,
|
||||
transform: Matrix,
|
||||
inverse: Matrix,
|
||||
origin: Point,
|
||||
blendfunc: int,
|
||||
texture: Image.Image,
|
||||
single_threaded: bool = False,
|
||||
) -> Image.Image:
|
||||
# Warn if we have an unsupported blend.
|
||||
if blendfunc not in {0, 2, 3, 8, 9, 70}:
|
||||
print(f"WARNING: Unsupported blend {blendfunc}")
|
||||
# If we're out of bounds, don't update.
|
||||
if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight:
|
||||
continue
|
||||
|
||||
# These are calculated properties and caching them outside of the loop
|
||||
# speeds things up a bit.
|
||||
imgwidth = img.width
|
||||
imgheight = img.height
|
||||
texwidth = texture.width
|
||||
texheight = texture.height
|
||||
# Blend it.
|
||||
texoff = texx + (texy * texwidth)
|
||||
imgmap[imgoff] = blend_point(add_color, mult_color, texmap[texoff], imgmap[imgoff], blendfunc)
|
||||
|
||||
# Calculate the maximum range of update this texture can possibly reside in.
|
||||
pix1 = transform.multiply_point(Point.identity().subtract(origin))
|
||||
pix2 = transform.multiply_point(Point.identity().subtract(origin).add(Point(texwidth, 0)))
|
||||
pix3 = transform.multiply_point(Point.identity().subtract(origin).add(Point(0, texheight)))
|
||||
pix4 = transform.multiply_point(Point.identity().subtract(origin).add(Point(texwidth, texheight)))
|
||||
img.putdata(imgmap)
|
||||
else:
|
||||
imgbytes = img.tobytes('raw', 'RGBA')
|
||||
texbytes = texture.tobytes('raw', 'RGBA')
|
||||
|
||||
# Map this to the rectangle we need to sweep in the rendering image.
|
||||
minx = max(int(min(pix1.x, pix2.x, pix3.x, pix4.x)), 0)
|
||||
maxx = min(int(max(pix1.x, pix2.x, pix3.x, pix4.x)) + 1, imgwidth)
|
||||
miny = max(int(min(pix1.y, pix2.y, pix3.y, pix4.y)), 0)
|
||||
maxy = min(int(max(pix1.y, pix2.y, pix3.y, pix4.y)) + 1, imgheight)
|
||||
# Let's spread the load across multiple processors.
|
||||
procs: List[multiprocessing.Process] = []
|
||||
work: multiprocessing.Queue = multiprocessing.Queue()
|
||||
results: multiprocessing.Queue = multiprocessing.Queue()
|
||||
expected: int = 0
|
||||
interrupted: bool = False
|
||||
|
||||
cores = multiprocessing.cpu_count()
|
||||
if single_threaded or cores < 2:
|
||||
# Get the data in an easier to manipulate and faster to update fashion.
|
||||
imgmap = list(img.getdata())
|
||||
texmap = list(texture.getdata())
|
||||
def ctrlc(sig: Any, frame: Any) -> None:
|
||||
nonlocal interrupted
|
||||
interrupted = True
|
||||
|
||||
# We don't have enough CPU cores to bother multiprocessing.
|
||||
for imgy in range(miny, maxy):
|
||||
for imgx in range(minx, maxx):
|
||||
original_handler = signal.getsignal(signal.SIGINT)
|
||||
signal.signal(signal.SIGINT, ctrlc)
|
||||
|
||||
for _ in range(cores):
|
||||
proc = multiprocessing.Process(
|
||||
target=pixel_renderer,
|
||||
args=(
|
||||
work,
|
||||
results,
|
||||
minx,
|
||||
maxx,
|
||||
imgwidth,
|
||||
texwidth,
|
||||
texheight,
|
||||
inverse,
|
||||
origin,
|
||||
add_color,
|
||||
mult_color,
|
||||
blendfunc,
|
||||
imgbytes,
|
||||
texbytes,
|
||||
),
|
||||
)
|
||||
procs.append(proc)
|
||||
proc.start()
|
||||
|
||||
for imgy in range(miny, maxy):
|
||||
work.put(imgy)
|
||||
expected += 1
|
||||
|
||||
lines: List[bytes] = [
|
||||
imgbytes[x:(x + (imgwidth * 4))]
|
||||
for x in range(
|
||||
0,
|
||||
imgwidth * imgheight * 4,
|
||||
imgwidth * 4,
|
||||
)
|
||||
]
|
||||
for _ in range(expected):
|
||||
imgy, result = results.get()
|
||||
lines[imgy] = result
|
||||
|
||||
for proc in procs:
|
||||
work.put(None)
|
||||
for proc in procs:
|
||||
proc.join()
|
||||
|
||||
signal.signal(signal.SIGINT, original_handler)
|
||||
if interrupted:
|
||||
raise KeyboardInterrupt()
|
||||
|
||||
img = Image.frombytes('RGBA', (imgwidth, imgheight), b''.join(lines))
|
||||
return img
|
||||
|
||||
def pixel_renderer(
|
||||
work: multiprocessing.Queue,
|
||||
results: multiprocessing.Queue,
|
||||
minx: int,
|
||||
maxx: int,
|
||||
imgwidth: int,
|
||||
texwidth: int,
|
||||
texheight: int,
|
||||
inverse: Matrix,
|
||||
origin: Point,
|
||||
add_color: Tuple[int, int, int, int],
|
||||
mult_color: Color,
|
||||
blendfunc: int,
|
||||
imgbytes: bytes,
|
||||
texbytes: bytes,
|
||||
) -> None:
|
||||
while True:
|
||||
imgy = work.get()
|
||||
if imgy is None:
|
||||
return
|
||||
|
||||
result: List[Sequence[int]] = []
|
||||
for imgx in range(imgwidth):
|
||||
# Determine offset
|
||||
imgoff = imgx + (imgy * imgwidth)
|
||||
if imgx < minx or imgx >= maxx:
|
||||
result.append(imgbytes[(imgoff * 4):((imgoff + 1) * 4)])
|
||||
continue
|
||||
|
||||
# Calculate what texture pixel data goes here.
|
||||
texloc = inverse.multiply_point(Point(float(imgx), float(imgy))).add(origin)
|
||||
@ -210,155 +286,50 @@ def affine_composite(
|
||||
|
||||
# If we're out of bounds, don't update.
|
||||
if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight:
|
||||
result.append(imgbytes[(imgoff * 4):((imgoff + 1) * 4)])
|
||||
continue
|
||||
|
||||
# Blend it.
|
||||
texoff = texx + (texy * texwidth)
|
||||
imgmap[imgoff] = affine_blend_impl(add_color, mult_color, texmap[texoff], imgmap[imgoff], blendfunc)
|
||||
result.append(blend_point(add_color, mult_color, texbytes[(texoff * 4):((texoff + 1) * 4)], imgbytes[(imgoff * 4):((imgoff + 1) * 4)], blendfunc))
|
||||
|
||||
img.putdata(imgmap)
|
||||
else:
|
||||
imgbytes = img.tobytes('raw', 'RGBA')
|
||||
texbytes = texture.tobytes('raw', 'RGBA')
|
||||
linebytes = bytes([channel for pixel in result for channel in pixel])
|
||||
results.put((imgy, linebytes))
|
||||
|
||||
# Let's spread the load across multiple processors.
|
||||
procs: List[multiprocessing.Process] = []
|
||||
work: multiprocessing.Queue = multiprocessing.Queue()
|
||||
results: multiprocessing.Queue = multiprocessing.Queue()
|
||||
expected: int = 0
|
||||
interrupted: bool = False
|
||||
def blend_point(
|
||||
add_color: Tuple[int, int, int, int],
|
||||
mult_color: Color,
|
||||
# This should be a sequence of exactly 4 values, either bytes or a tuple.
|
||||
src_color: Sequence[int],
|
||||
# This should be a sequence of exactly 4 values, either bytes or a tuple.
|
||||
dest_color: Sequence[int],
|
||||
blendfunc: int,
|
||||
) -> Sequence[int]:
|
||||
# Calculate multiplicative and additive colors against the source.
|
||||
src_color = (
|
||||
clamp((src_color[0] * mult_color.r) + add_color[0]),
|
||||
clamp((src_color[1] * mult_color.g) + add_color[1]),
|
||||
clamp((src_color[2] * mult_color.b) + add_color[2]),
|
||||
clamp((src_color[3] * mult_color.a) + add_color[3]),
|
||||
)
|
||||
|
||||
def ctrlc(sig: Any, frame: Any) -> None:
|
||||
nonlocal interrupted
|
||||
interrupted = True
|
||||
|
||||
original_handler = signal.getsignal(signal.SIGINT)
|
||||
signal.signal(signal.SIGINT, ctrlc)
|
||||
|
||||
for _ in range(cores):
|
||||
proc = multiprocessing.Process(
|
||||
target=pixel_renderer,
|
||||
args=(
|
||||
work,
|
||||
results,
|
||||
minx,
|
||||
maxx,
|
||||
imgwidth,
|
||||
texwidth,
|
||||
texheight,
|
||||
inverse,
|
||||
origin,
|
||||
add_color,
|
||||
mult_color,
|
||||
blendfunc,
|
||||
imgbytes,
|
||||
texbytes,
|
||||
),
|
||||
)
|
||||
procs.append(proc)
|
||||
proc.start()
|
||||
|
||||
for imgy in range(miny, maxy):
|
||||
work.put(imgy)
|
||||
expected += 1
|
||||
|
||||
lines: List[bytes] = [
|
||||
imgbytes[x:(x + (imgwidth * 4))]
|
||||
for x in range(
|
||||
0,
|
||||
imgwidth * imgheight * 4,
|
||||
imgwidth * 4,
|
||||
)
|
||||
]
|
||||
for _ in range(expected):
|
||||
imgy, result = results.get()
|
||||
lines[imgy] = result
|
||||
|
||||
for proc in procs:
|
||||
work.put(None)
|
||||
for proc in procs:
|
||||
proc.join()
|
||||
|
||||
signal.signal(signal.SIGINT, original_handler)
|
||||
if interrupted:
|
||||
raise KeyboardInterrupt()
|
||||
|
||||
img = Image.frombytes('RGBA', (imgwidth, imgheight), b''.join(lines))
|
||||
return img
|
||||
|
||||
|
||||
def pixel_renderer(
|
||||
work: multiprocessing.Queue,
|
||||
results: multiprocessing.Queue,
|
||||
minx: int,
|
||||
maxx: int,
|
||||
imgwidth: int,
|
||||
texwidth: int,
|
||||
texheight: int,
|
||||
inverse: Matrix,
|
||||
origin: Point,
|
||||
add_color: Tuple[int, int, int, int],
|
||||
mult_color: Color,
|
||||
blendfunc: int,
|
||||
imgbytes: bytes,
|
||||
texbytes: bytes,
|
||||
) -> None:
|
||||
while True:
|
||||
imgy = work.get()
|
||||
if imgy is None:
|
||||
return
|
||||
|
||||
result: List[Sequence[int]] = []
|
||||
for imgx in range(imgwidth):
|
||||
# Determine offset
|
||||
imgoff = imgx + (imgy * imgwidth)
|
||||
if imgx < minx or imgx >= maxx:
|
||||
result.append(imgbytes[(imgoff * 4):((imgoff + 1) * 4)])
|
||||
continue
|
||||
|
||||
# Calculate what texture pixel data goes here.
|
||||
texloc = inverse.multiply_point(Point(float(imgx), float(imgy))).add(origin)
|
||||
texx, texy = texloc.as_tuple()
|
||||
|
||||
# If we're out of bounds, don't update.
|
||||
if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight:
|
||||
result.append(imgbytes[(imgoff * 4):((imgoff + 1) * 4)])
|
||||
continue
|
||||
|
||||
# Blend it.
|
||||
texoff = texx + (texy * texwidth)
|
||||
result.append(affine_blend_impl(add_color, mult_color, texbytes[(texoff * 4):((texoff + 1) * 4)], imgbytes[(imgoff * 4):((imgoff + 1) * 4)], blendfunc))
|
||||
|
||||
linebytes = bytes([channel for pixel in result for channel in pixel])
|
||||
results.put((imgy, linebytes))
|
||||
|
||||
|
||||
def affine_blend_impl(
|
||||
add_color: Tuple[int, int, int, int],
|
||||
mult_color: Color,
|
||||
# This should be a sequence of exactly 4 values, either bytes or a tuple.
|
||||
src_color: Sequence[int],
|
||||
# This should be a sequence of exactly 4 values, either bytes or a tuple.
|
||||
dest_color: Sequence[int],
|
||||
blendfunc: int,
|
||||
) -> Sequence[int]:
|
||||
if blendfunc == 3:
|
||||
return blend_multiply(dest_color, src_color, mult_color, add_color)
|
||||
# TODO: blend mode 4, which is "screen" blending according to SWF references. I've only seen this
|
||||
# in Jubeat and it implements it using OpenGL equation Src * (1 - Dst) + Dst * 1.
|
||||
# TODO: blend mode 5, which is "lighten" blending according to SWF references. Jubeat does not
|
||||
# premultiply by alpha, but the GL/DX equation is max(Src * As, Dst * 1).
|
||||
# TODO: blend mode 6, which is "darken" blending according to SWF references. Jubeat does not
|
||||
# premultiply by alpha, but the GL/DX equation is min(Src * As, Dst * 1).
|
||||
# TODO: blend mode 10, which is "invert" according to SWF references. The only game I could find
|
||||
# that implemented this had equation Src * (1 - Dst) + Dst * (1 - As).
|
||||
# TODO: blend mode 13, which is "overlay" according to SWF references. The equation seems to be
|
||||
# Src * Dst + Dst * Src but Jubeat thinks it should be Src * Dst + Dst * (1 - As).
|
||||
elif blendfunc == 8:
|
||||
return blend_addition(dest_color, src_color, mult_color, add_color)
|
||||
elif blendfunc == 9 or blendfunc == 70:
|
||||
return blend_subtraction(dest_color, src_color, mult_color, add_color)
|
||||
# TODO: blend mode 75, which is not in the SWF spec and appears to have the equation
|
||||
# Src * (1 - Dst) + Dst * (1 - Src).
|
||||
else:
|
||||
return blend_normal(dest_color, src_color, mult_color, add_color)
|
||||
if blendfunc == 3:
|
||||
return blend_multiply(dest_color, src_color)
|
||||
# TODO: blend mode 4, which is "screen" blending according to SWF references. I've only seen this
|
||||
# in Jubeat and it implements it using OpenGL equation Src * (1 - Dst) + Dst * 1.
|
||||
# TODO: blend mode 5, which is "lighten" blending according to SWF references. Jubeat does not
|
||||
# premultiply by alpha, but the GL/DX equation is max(Src * As, Dst * 1).
|
||||
# TODO: blend mode 6, which is "darken" blending according to SWF references. Jubeat does not
|
||||
# premultiply by alpha, but the GL/DX equation is min(Src * As, Dst * 1).
|
||||
# TODO: blend mode 10, which is "invert" according to SWF references. The only game I could find
|
||||
# that implemented this had equation Src * (1 - Dst) + Dst * (1 - As).
|
||||
# TODO: blend mode 13, which is "overlay" according to SWF references. The equation seems to be
|
||||
# Src * Dst + Dst * Src but Jubeat thinks it should be Src * Dst + Dst * (1 - As).
|
||||
elif blendfunc == 8:
|
||||
return blend_addition(dest_color, src_color)
|
||||
elif blendfunc == 9 or blendfunc == 70:
|
||||
return blend_subtraction(dest_color, src_color)
|
||||
# TODO: blend mode 75, which is not in the SWF spec and appears to have the equation
|
||||
# Src * (1 - Dst) + Dst * (1 - Src).
|
||||
else:
|
||||
return blend_normal(dest_color, src_color)
|
||||
|
16
bemani/format/afp/blendalt.pyi
Normal file
16
bemani/format/afp/blendalt.pyi
Normal file
@ -0,0 +1,16 @@
|
||||
from PIL import Image # type: ignore
|
||||
from typing import Tuple
|
||||
|
||||
from .types.generic import Color, Matrix, Point
|
||||
|
||||
def affine_composite(
|
||||
img: Image.Image,
|
||||
add_color: Tuple[int, int, int, int],
|
||||
mult_color: Color,
|
||||
transform: Matrix,
|
||||
origin: Point,
|
||||
blendfunc: int,
|
||||
texture: Image.Image,
|
||||
single_threaded: bool = False,
|
||||
) -> Image.Image:
|
||||
...
|
129
bemani/format/afp/blendalt.pyx
Normal file
129
bemani/format/afp/blendalt.pyx
Normal file
@ -0,0 +1,129 @@
|
||||
from PIL import Image # type: ignore
|
||||
from typing import Tuple
|
||||
|
||||
from .types.generic import Color, Matrix, Point
|
||||
|
||||
cdef extern struct intcolor_t:
|
||||
unsigned char r;
|
||||
unsigned char g;
|
||||
unsigned char b;
|
||||
unsigned char a;
|
||||
|
||||
cdef extern struct floatcolor_t:
|
||||
float r;
|
||||
float g;
|
||||
float b;
|
||||
float a;
|
||||
|
||||
cdef extern struct matrix_t:
|
||||
float a;
|
||||
float b;
|
||||
float c;
|
||||
float d;
|
||||
float tx;
|
||||
float ty;
|
||||
|
||||
cdef extern struct point_t:
|
||||
float x;
|
||||
float y;
|
||||
|
||||
cdef extern int affine_composite_fast(
|
||||
unsigned char *imgdata,
|
||||
unsigned int imgwidth,
|
||||
unsigned int imgheight,
|
||||
unsigned int minx,
|
||||
unsigned int maxx,
|
||||
unsigned int miny,
|
||||
unsigned int maxy,
|
||||
intcolor_t add_color,
|
||||
floatcolor_t mult_color,
|
||||
matrix_t inverse,
|
||||
point_t origin,
|
||||
int blendfunc,
|
||||
unsigned char *texdata,
|
||||
unsigned int texwidth,
|
||||
unsigned int texheight,
|
||||
int single_threaded
|
||||
)
|
||||
|
||||
def affine_composite(
|
||||
img: Image.Image,
|
||||
add_color: Tuple[int, int, int, int],
|
||||
mult_color: Color,
|
||||
transform: Matrix,
|
||||
origin: Point,
|
||||
blendfunc: int,
|
||||
texture: Image.Image,
|
||||
single_threaded: bool = False,
|
||||
) -> Image.Image:
|
||||
# Calculate the inverse so we can map canvas space back to texture space.
|
||||
try:
|
||||
inverse = transform.inverse()
|
||||
except ZeroDivisionError:
|
||||
# If this happens, that means one of the scaling factors was zero, making
|
||||
# this object invisible. We can ignore this since the object should not
|
||||
# be drawn.
|
||||
print(f"WARNING: Transform Matrix {transform} has zero scaling factor, making it non-invertible!")
|
||||
return img
|
||||
|
||||
if blendfunc not in {0, 2, 3, 8, 9, 70}:
|
||||
print(f"WARNING: Unsupported blend {blendfunc}")
|
||||
return img
|
||||
|
||||
# These are calculated properties and caching them outside of the loop
|
||||
# speeds things up a bit.
|
||||
imgwidth = img.width
|
||||
imgheight = img.height
|
||||
texwidth = texture.width
|
||||
texheight = texture.height
|
||||
|
||||
# Calculate the maximum range of update this texture can possibly reside in.
|
||||
pix1 = transform.multiply_point(Point.identity().subtract(origin))
|
||||
pix2 = transform.multiply_point(Point.identity().subtract(origin).add(Point(texwidth, 0)))
|
||||
pix3 = transform.multiply_point(Point.identity().subtract(origin).add(Point(0, texheight)))
|
||||
pix4 = transform.multiply_point(Point.identity().subtract(origin).add(Point(texwidth, texheight)))
|
||||
|
||||
# Map this to the rectangle we need to sweep in the rendering image.
|
||||
minx = max(int(min(pix1.x, pix2.x, pix3.x, pix4.x)), 0)
|
||||
maxx = min(int(max(pix1.x, pix2.x, pix3.x, pix4.x)) + 1, imgwidth)
|
||||
miny = max(int(min(pix1.y, pix2.y, pix3.y, pix4.y)), 0)
|
||||
maxy = min(int(max(pix1.y, pix2.y, pix3.y, pix4.y)) + 1, imgheight)
|
||||
|
||||
if maxx <= 0 or maxy <= 0:
|
||||
# This image is entirely off the screen!
|
||||
return img
|
||||
|
||||
# Grab the raw image data.
|
||||
imgbytes = img.tobytes('raw', 'RGBA')
|
||||
texbytes = texture.tobytes('raw', 'RGBA')
|
||||
|
||||
# Convert classes to C structs.
|
||||
cdef intcolor_t c_addcolor = intcolor_t(r=add_color[0], g=add_color[1], b=add_color[2], a=add_color[3])
|
||||
cdef floatcolor_t c_multcolor = floatcolor_t(r=mult_color.r, g=mult_color.g, b=mult_color.b, a=mult_color.a)
|
||||
cdef matrix_t c_inverse = matrix_t(a=inverse.a, b=inverse.b, c=inverse.c, d=inverse.d, tx=inverse.tx, ty=inverse.ty)
|
||||
cdef point_t c_origin = point_t(x=origin.x, y=origin.y)
|
||||
|
||||
# Call the C++ function.
|
||||
errors = affine_composite_fast(
|
||||
imgbytes,
|
||||
imgwidth,
|
||||
imgheight,
|
||||
minx,
|
||||
maxx,
|
||||
miny,
|
||||
maxy,
|
||||
c_addcolor,
|
||||
c_multcolor,
|
||||
c_inverse,
|
||||
c_origin,
|
||||
blendfunc,
|
||||
texbytes,
|
||||
texwidth,
|
||||
texheight,
|
||||
single_threaded,
|
||||
)
|
||||
if errors != 0:
|
||||
raise Exception("Error raised in C++!")
|
||||
|
||||
# We blitted in-place, return that.
|
||||
return Image.frombytes('RGBA', (imgwidth, imgheight), imgbytes)
|
232
bemani/format/afp/blendaltimpl.cpp
Normal file
232
bemani/format/afp/blendaltimpl.cpp
Normal file
@ -0,0 +1,232 @@
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
|
||||
extern "C"
|
||||
{
|
||||
typedef struct intcolor {
|
||||
unsigned char r;
|
||||
unsigned char g;
|
||||
unsigned char b;
|
||||
unsigned char a;
|
||||
} intcolor_t;
|
||||
|
||||
typedef struct floatcolor {
|
||||
float r;
|
||||
float g;
|
||||
float b;
|
||||
float a;
|
||||
} floatcolor_t;
|
||||
|
||||
typedef struct point {
|
||||
float x;
|
||||
float y;
|
||||
|
||||
struct point add(struct point other) {
|
||||
return (struct point){
|
||||
x + other.x,
|
||||
y + other.y,
|
||||
};
|
||||
};
|
||||
} point_t;
|
||||
|
||||
typedef struct matrix {
|
||||
float a;
|
||||
float b;
|
||||
float c;
|
||||
float d;
|
||||
float tx;
|
||||
float ty;
|
||||
|
||||
point_t multiply_point(point_t point) {
|
||||
return (point_t){
|
||||
(a * point.x) + (c * point.y) + tx,
|
||||
(b * point.x) + (d * point.y) + ty,
|
||||
};
|
||||
}
|
||||
} matrix_t;
|
||||
|
||||
inline unsigned char clamp(float color) {
|
||||
return fmin(fmax(0.0, roundf(color)), 255.0);
|
||||
}
|
||||
|
||||
intcolor_t blend_normal(
|
||||
intcolor_t dest,
|
||||
intcolor_t src
|
||||
) {
|
||||
// "Normal" blend mode, which is just alpha blending. Various games use the DX
|
||||
// equation Src * As + Dst * (1 - As). We premultiply Dst by Ad as well, since
|
||||
// we are blitting onto a destination that could have transparency. Once we are
|
||||
// done, we divide out the premultiplied Ad in order to put the pixes back to
|
||||
// their full blended values since we are not setting the destination alpha to 1.0.
|
||||
// This enables partial transparent backgrounds to work properly.
|
||||
|
||||
// Short circuit for speed.
|
||||
if (src.a == 0) {
|
||||
return dest;
|
||||
}
|
||||
if (src.a == 255) {
|
||||
return src;
|
||||
}
|
||||
|
||||
// Calculate alpha blending.
|
||||
float srcpercent = src.a / 255.0;
|
||||
float destpercent = dest.a / 255.0;
|
||||
float srcremaineder = 1.0 - srcpercent;
|
||||
float new_alpha = (srcpercent + destpercent * srcremaineder);
|
||||
return (intcolor_t){
|
||||
clamp(((dest.r * destpercent * srcremaineder) + (src.r * srcpercent)) / new_alpha),
|
||||
clamp(((dest.g * destpercent * srcremaineder) + (src.g * srcpercent)) / new_alpha),
|
||||
clamp(((dest.b * destpercent * srcremaineder) + (src.b * srcpercent)) / new_alpha),
|
||||
clamp(255 * new_alpha)
|
||||
};
|
||||
}
|
||||
|
||||
intcolor_t blend_addition(
|
||||
intcolor_t dest,
|
||||
intcolor_t src
|
||||
) {
|
||||
// "Addition" blend mode, which is used for fog/clouds/etc. Various games use the DX
|
||||
// equation Src * As + Dst * 1. It appears jubeat does not premultiply the source
|
||||
// by its alpha component.
|
||||
|
||||
// Short circuit for speed.
|
||||
if (src.a == 0) {
|
||||
return dest;
|
||||
}
|
||||
|
||||
// Calculate final color blending.
|
||||
float srcpercent = src.a / 255.0;
|
||||
return (intcolor_t){
|
||||
clamp(dest.r + (src.r * srcpercent)),
|
||||
clamp(dest.g + (src.g * srcpercent)),
|
||||
clamp(dest.b + (src.b * srcpercent)),
|
||||
dest.a,
|
||||
};
|
||||
}
|
||||
|
||||
intcolor_t blend_subtraction(
|
||||
intcolor_t dest,
|
||||
intcolor_t src
|
||||
) {
|
||||
// "Subtraction" blend mode, used for darkening an image. Various games use the DX
|
||||
// equation Dst * 1 - Src * As. It appears jubeat does not premultiply the source
|
||||
// by its alpha component much like the "additive" blend above..
|
||||
|
||||
// Short circuit for speed.
|
||||
if (src.a == 0) {
|
||||
return dest;
|
||||
}
|
||||
|
||||
// Calculate final color blending.
|
||||
float srcpercent = src.a / 255.0;
|
||||
return (intcolor_t){
|
||||
clamp(dest.r - (src.r * srcpercent)),
|
||||
clamp(dest.g - (src.g * srcpercent)),
|
||||
clamp(dest.b - (src.b * srcpercent)),
|
||||
dest.a,
|
||||
};
|
||||
}
|
||||
|
||||
intcolor_t blend_multiply(
|
||||
intcolor_t dest,
|
||||
intcolor_t src
|
||||
) {
|
||||
// "Multiply" blend mode, used for darkening an image. Various games use the DX
|
||||
// equation Src * 0 + Dst * Src. It appears jubeat uses the alternative formula
|
||||
// Src * Dst + Dst * (1 - As) which reduces to the first equation as long as the
|
||||
// source alpha is always 255.
|
||||
|
||||
// Calculate final color blending.
|
||||
return (intcolor_t){
|
||||
clamp(255 * ((dest.r / 255.0) * (src.r / 255.0))),
|
||||
clamp(255 * ((dest.g / 255.0) * (src.g / 255.0))),
|
||||
clamp(255 * ((dest.b / 255.0) * (src.b / 255.0))),
|
||||
dest.a,
|
||||
};
|
||||
}
|
||||
|
||||
intcolor_t blend_point(
|
||||
intcolor_t add_color,
|
||||
floatcolor_t mult_color,
|
||||
intcolor_t src_color,
|
||||
intcolor_t dest_color,
|
||||
int blendfunc
|
||||
) {
|
||||
// Calculate multiplicative and additive colors against the source.
|
||||
src_color = (intcolor_t){
|
||||
clamp((src_color.r * mult_color.r) + add_color.r),
|
||||
clamp((src_color.g * mult_color.g) + add_color.g),
|
||||
clamp((src_color.b * mult_color.b) + add_color.b),
|
||||
clamp((src_color.a * mult_color.a) + add_color.a),
|
||||
};
|
||||
|
||||
if (blendfunc == 3) {
|
||||
return blend_multiply(dest_color, src_color);
|
||||
}
|
||||
// TODO: blend mode 4, which is "screen" blending according to SWF references. I've only seen this
|
||||
// in Jubeat and it implements it using OpenGL equation Src * (1 - Dst) + Dst * 1.
|
||||
// TODO: blend mode 5, which is "lighten" blending according to SWF references. Jubeat does not
|
||||
// premultiply by alpha, but the GL/DX equation is max(Src * As, Dst * 1).
|
||||
// TODO: blend mode 6, which is "darken" blending according to SWF references. Jubeat does not
|
||||
// premultiply by alpha, but the GL/DX equation is min(Src * As, Dst * 1).
|
||||
// TODO: blend mode 10, which is "invert" according to SWF references. The only game I could find
|
||||
// that implemented this had equation Src * (1 - Dst) + Dst * (1 - As).
|
||||
// TODO: blend mode 13, which is "overlay" according to SWF references. The equation seems to be
|
||||
// Src * Dst + Dst * Src but Jubeat thinks it should be Src * Dst + Dst * (1 - As).
|
||||
if (blendfunc == 8) {
|
||||
return blend_addition(dest_color, src_color);
|
||||
}
|
||||
if (blendfunc == 9 || blendfunc == 70) {
|
||||
return blend_subtraction(dest_color, src_color);
|
||||
}
|
||||
// TODO: blend mode 75, which is not in the SWF spec and appears to have the equation
|
||||
// Src * (1 - Dst) + Dst * (1 - Src).
|
||||
return blend_normal(dest_color, src_color);
|
||||
}
|
||||
|
||||
int affine_composite_fast(
|
||||
unsigned char *imgbytes,
|
||||
unsigned int imgwidth,
|
||||
unsigned int imgheight,
|
||||
unsigned int minx,
|
||||
unsigned int maxx,
|
||||
unsigned int miny,
|
||||
unsigned int maxy,
|
||||
intcolor_t add_color,
|
||||
floatcolor_t mult_color,
|
||||
matrix_t inverse,
|
||||
point_t origin,
|
||||
int blendfunc,
|
||||
unsigned char *texbytes,
|
||||
unsigned int texwidth,
|
||||
unsigned int texheight,
|
||||
int single_threaded
|
||||
) {
|
||||
// Cast to a usable type.
|
||||
intcolor_t *imgdata = (intcolor_t *)imgbytes;
|
||||
intcolor_t *texdata = (intcolor_t *)texbytes;
|
||||
|
||||
for (unsigned int imgy = miny; imgy < maxy; imgy++) {
|
||||
for (unsigned int imgx = minx; imgx < maxx; imgx++) {
|
||||
// Determine offset.
|
||||
unsigned int imgoff = imgx + (imgy * imgwidth);
|
||||
|
||||
// Calculate what texture pixel data goes here.
|
||||
point_t texloc = inverse.multiply_point((point_t){(float)imgx, (float)imgy}).add(origin);
|
||||
int texx = roundf(texloc.x);
|
||||
int texy = roundf(texloc.y);
|
||||
|
||||
// If we're out of bounds, don't update.
|
||||
if (texx < 0 or texy < 0 or texx >= (int)texwidth or texy >= (int)texheight) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Blend it.
|
||||
unsigned int texoff = texx + (texy * texwidth);
|
||||
imgdata[imgoff] = blend_point(add_color, mult_color, texdata[texoff], imgdata[imgoff], blendfunc);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
@ -374,16 +374,6 @@ class AFPRenderer(VerboseOutput):
|
||||
# Compute the affine transformation matrix for this object.
|
||||
transform = parent_transform.multiply(renderable.transform)
|
||||
|
||||
# Calculate the inverse so we can map canvas space back to texture space.
|
||||
try:
|
||||
inverse = transform.inverse()
|
||||
except ZeroDivisionError:
|
||||
# If this happens, that means one of the scaling factors was zero, making
|
||||
# this object invisible. We can ignore this since the object should not
|
||||
# be drawn.
|
||||
print(f"WARNING: Transform Matrix {transform} has zero scaling factor, making it non-invertible!")
|
||||
return img
|
||||
|
||||
# Render individual shapes if this is a sprite.
|
||||
if isinstance(renderable, PlacedClip):
|
||||
# This is a sprite placement reference.
|
||||
@ -458,7 +448,7 @@ class AFPRenderer(VerboseOutput):
|
||||
img.alpha_composite(texture, cutin.as_tuple(), cutoff.as_tuple())
|
||||
else:
|
||||
# We can't, so do the slow render that's correct.
|
||||
img = affine_composite(img, add_color, mult_color, transform, inverse, origin, blend, texture, single_threaded=self.__single_threaded)
|
||||
img = affine_composite(img, add_color, mult_color, transform, origin, blend, texture, single_threaded=self.__single_threaded)
|
||||
else:
|
||||
raise Exception(f"Unknown placed object type to render {renderable}!")
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user