diff --git a/bemani/format/afp/blend/blend.py b/bemani/format/afp/blend/blend.py index 2425eb9..67b8b8a 100644 --- a/bemani/format/afp/blend/blend.py +++ b/bemani/format/afp/blend/blend.py @@ -1,9 +1,10 @@ import multiprocessing import signal from PIL import Image # type: ignore -from typing import Any, List, Optional, Sequence, Union +from typing import Any, Callable, List, Optional, Sequence, Union from ..types import Color, Matrix, Point +from .perspective import perspective_calculate def clamp(color: float) -> int: @@ -192,6 +193,218 @@ def blend_point( return blend_normal(dest_color, src_color) +def pixel_renderer( + imgx: int, + imgy: int, + imgwidth: int, + texwidth: int, + texheight: int, + xscale: float, + yscale: float, + callback: Callable[[Point], Optional[Point]], + add_color: Color, + mult_color: Color, + blendfunc: int, + imgbytes: Union[bytes, bytearray], + texbytes: Union[bytes, bytearray], + maskbytes: Optional[Union[bytes, bytearray]], + enable_aa: bool, +) -> Sequence[int]: + # Determine offset + maskoff = imgx + (imgy * imgwidth) + imgoff = maskoff * 4 + + if maskbytes is not None and maskbytes[maskoff] == 0: + # This pixel is masked off! + return imgbytes[imgoff:(imgoff + 4)] + + if enable_aa: + r = 0 + g = 0 + b = 0 + a = 0 + count = 0 + denom = 0 + + # Essentially what we're doing here is calculating the scale, clamping it at 1.0 as the + # minimum and then setting the AA sample swing accordingly. This has the effect of anti-aliasing + # scaled up images a bit softer than would otherwise be achieved. + xswing = 0.5 * max(1.0, xscale) + yswing = 0.5 * max(1.0, yscale) + + xpoints = [0.5 - xswing, 0.5 - (xswing / 2.0), 0.5, 0.5 + (xswing / 2.0), 0.5 + xswing] + ypoints = [0.5 - yswing, 0.5 - (yswing / 2.0), 0.5, 0.5 + (yswing / 2.0), 0.5 + yswing] + + # First, figure out if we can use bilinear resampling. + bilinear = False + if xscale >= 1.0 and yscale >= 1.0: + aaloc = callback(Point(imgx + 0.5, imgy + 0.5)) + if aaloc is not None: + aax, aay, _ = aaloc.as_tuple() + if not (aax <= 0 or aay <= 0 or aax >= (texwidth - 1) or aay >= (texheight - 1)): + bilinear = True + + # Now perform the desired AA operation. + if bilinear: + # Calculate the pixel we're after, and what percentage into the pixel we are. + texloc = callback(Point(imgx + 0.5, imgy + 0.5)) + if texloc is None: + raise Exception("Logic error!") + aax, aay, _ = texloc.as_tuple() + aaxrem = texloc.x - aax + aayrem = texloc.y - aay + + # Find the four pixels that we can interpolate from. The first number is the x, and second is y. + tex00 = (aax + (aay * texwidth)) * 4 + tex10 = tex00 + 4 + tex01 = (aax + ((aay + 1) * texwidth)) * 4 + tex11 = tex01 + 4 + + # Calculate various scaling factors based on alpha and percentage. + tex00percent = texbytes[tex00 + 3] / 255.0 + tex10percent = texbytes[tex10 + 3] / 255.0 + tex01percent = texbytes[tex01 + 3] / 255.0 + tex11percent = texbytes[tex11 + 3] / 255.0 + + y0percent = (tex00percent * (1.0 - aaxrem)) + (tex10percent * aaxrem) + y1percent = (tex01percent * (1.0 - aaxrem)) + (tex11percent * aaxrem) + finalpercent = (y0percent * (1.0 - aayrem)) + (y1percent * aayrem) + + if finalpercent <= 0.0: + # This pixel would be blank, so we avoid dividing by zero. + average = [255, 255, 255, 0] + else: + # Interpolate in the X direction on both Y axis. + y0r = ((texbytes[tex00] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10] * tex10percent * aaxrem)) + y0g = ((texbytes[tex00 + 1] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10 + 1] * tex10percent * aaxrem)) + y0b = ((texbytes[tex00 + 2] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10 + 2] * tex10percent * aaxrem)) + + y1r = ((texbytes[tex01] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11] * tex11percent * aaxrem)) + y1g = ((texbytes[tex01 + 1] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11 + 1] * tex11percent * aaxrem)) + y1b = ((texbytes[tex01 + 2] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11 + 2] * tex11percent * aaxrem)) + + # Now interpolate the Y direction to get the final pixel value. + average = [ + int(((y0r * (1.0 - aayrem)) + (y1r * aayrem)) / finalpercent), + int(((y0g * (1.0 - aayrem)) + (y1g * aayrem)) / finalpercent), + int(((y0b * (1.0 - aayrem)) + (y1b * aayrem)) / finalpercent), + int(finalpercent * 255), + ] + else: + for addy in ypoints: + for addx in xpoints: + texloc = callback(Point(imgx + addx, imgy + addy)) + denom += 1 + + if texloc is None: + continue + + aax, aay, _ = texloc.as_tuple() + + # If we're out of bounds, don't update. Factor this in, however, so we can get partial + # transparency to the pixel that is already there. + if aax < 0 or aay < 0 or aax >= texwidth or aay >= texheight: + continue + + # Grab the values to average, for SSAA. Make sure to factor in alpha as a poor-man's + # blend to ensure that partial transparency pixel values don't unnecessarily factor + # into average calculations. + texoff = (aax + (aay * texwidth)) * 4 + + # If this is a fully transparent pixel, the below formulas work out to adding nothing + # so we should skip this altogether. + if texbytes[texoff + 3] == 0: + continue + + apercent = texbytes[texoff + 3] / 255.0 + r += int(texbytes[texoff] * apercent) + g += int(texbytes[texoff + 1] * apercent) + b += int(texbytes[texoff + 2] * apercent) + a += texbytes[texoff + 3] + count += 1 + + if count == 0: + # None of the samples existed in-bounds. + return imgbytes[imgoff:(imgoff + 4)] + + # Average the pixels. Make sure to divide out the alpha in preparation for blending. + alpha = a // denom + + if alpha == 0: + average = [255, 255, 255, alpha] + else: + apercent = alpha / 255.0 + average = [int((r / denom) / apercent), int((g / denom) / apercent), int((b / denom) / apercent), alpha] + + # Finally, blend it with the destination. + return blend_point(add_color, mult_color, average, imgbytes[imgoff:(imgoff + 4)], blendfunc) + else: + # Calculate what texture pixel data goes here. + texloc = callback(Point(imgx + 0.5, imgy + 0.5)) + if texloc is None: + return imgbytes[imgoff:(imgoff + 4)] + + texx, texy, _ = texloc.as_tuple() + + # If we're out of bounds, don't update. + if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight: + return imgbytes[imgoff:(imgoff + 4)] + + # Blend it. + texoff = (texx + (texy * texwidth)) * 4 + return blend_point(add_color, mult_color, texbytes[texoff:(texoff + 4)], imgbytes[imgoff:(imgoff + 4)], blendfunc) + + +def affine_line_renderer( + work: multiprocessing.Queue, + results: multiprocessing.Queue, + minx: int, + maxx: int, + imgwidth: int, + texwidth: int, + texheight: int, + inverse: Matrix, + add_color: Color, + mult_color: Color, + blendfunc: int, + imgbytes: Union[bytes, bytearray], + texbytes: Union[bytes, bytearray], + maskbytes: Optional[Union[bytes, bytearray]], + enable_aa: bool, +) -> None: + while True: + imgy = work.get() + if imgy is None: + return + + rowbytes = bytearray(imgbytes[(imgy * imgwidth * 4):((imgy + 1) * imgwidth * 4)]) + for imgx in range(imgwidth): + if imgx < minx or imgx >= maxx: + # No need to even consider this pixel. + continue + else: + # Blit new pixel into the correct range. + rowbytes[(imgx * 4):((imgx + 1) * 4)] = pixel_renderer( + imgx, + imgy, + imgwidth, + texwidth, + texheight, + 1.0 / inverse.xscale, + 1.0 / inverse.yscale, + lambda point: inverse.multiply_point(point), + add_color, + mult_color, + blendfunc, + imgbytes, + texbytes, + maskbytes, + enable_aa, + ) + + results.put((imgy, bytes(rowbytes))) + + def affine_composite( img: Image.Image, add_color: Color, @@ -262,7 +475,9 @@ def affine_composite( imgwidth, texwidth, texheight, - inverse, + 1.0 / inverse.xscale, + 1.0 / inverse.yscale, + lambda point: inverse.multiply_point(point), add_color, mult_color, blendfunc, @@ -298,7 +513,7 @@ def affine_composite( for _ in range(cores): proc = multiprocessing.Process( - target=line_renderer, + target=affine_line_renderer, args=( work, results, @@ -349,7 +564,7 @@ def affine_composite( return img -def line_renderer( +def perspective_line_renderer( work: multiprocessing.Queue, results: multiprocessing.Queue, minx: int, @@ -357,6 +572,8 @@ def line_renderer( imgwidth: int, texwidth: int, texheight: int, + xscale: float, + yscale: float, inverse: Matrix, add_color: Color, mult_color: Color, @@ -366,6 +583,14 @@ def line_renderer( maskbytes: Optional[Union[bytes, bytearray]], enable_aa: bool, ) -> None: + def perspective_inverse(imgpoint: Point) -> Optional[Point]: + # Calculate the texture coordinate with our perspective interpolation. + texdiv = inverse.multiply_point(imgpoint) + if texdiv.z <= 0.0: + return None + + return Point(texdiv.x / texdiv.z, texdiv.y / texdiv.z) + while True: imgy = work.get() if imgy is None: @@ -384,7 +609,9 @@ def line_renderer( imgwidth, texwidth, texheight, - inverse, + xscale, + yscale, + perspective_inverse, add_color, mult_color, blendfunc, @@ -397,160 +624,6 @@ def line_renderer( results.put((imgy, bytes(rowbytes))) -def pixel_renderer( - imgx: int, - imgy: int, - imgwidth: int, - texwidth: int, - texheight: int, - inverse: Matrix, - add_color: Color, - mult_color: Color, - blendfunc: int, - imgbytes: Union[bytes, bytearray], - texbytes: Union[bytes, bytearray], - maskbytes: Optional[Union[bytes, bytearray]], - enable_aa: bool, -) -> Sequence[int]: - # Determine offset - maskoff = imgx + (imgy * imgwidth) - imgoff = maskoff * 4 - - if maskbytes is not None and maskbytes[maskoff] == 0: - # This pixel is masked off! - return imgbytes[imgoff:(imgoff + 4)] - - if enable_aa: - r = 0 - g = 0 - b = 0 - a = 0 - count = 0 - denom = 0 - - # Essentially what we're doing here is calculating the scale, clamping it at 1.0 as the - # minimum and then setting the AA sample swing accordingly. This has the effect of anti-aliasing - # scaled up images a bit softer than would otherwise be achieved. - xscale = 1.0 / inverse.xscale - yscale = 1.0 / inverse.yscale - - # These are used for picking the various sample points for SSAA method below. - xswing = 0.5 * max(1.0, xscale) - yswing = 0.5 * max(1.0, yscale) - - xpoints = [0.5 - xswing, 0.5 - (xswing / 2.0), 0.5, 0.5 + (xswing / 2.0), 0.5 + xswing] - ypoints = [0.5 - yswing, 0.5 - (yswing / 2.0), 0.5, 0.5 + (yswing / 2.0), 0.5 + yswing] - - # First, figure out if we can use bilinear resampling. - bilinear = False - if xscale >= 1.0 and yscale >= 1.0: - aaloc = inverse.multiply_point(Point(imgx + 0.5, imgy + 0.5)) - aax, aay, _ = aaloc.as_tuple() - if not (aax <= 0 or aay <= 0 or aax >= (texwidth - 1) or aay >= (texheight - 1)): - bilinear = True - - # Now perform the desired AA operation. - if bilinear: - # Calculate the pixel we're after, and what percentage into the pixel we are. - texloc = inverse.multiply_point(Point(imgx + 0.5, imgy + 0.5)) - aax, aay, _ = texloc.as_tuple() - aaxrem = texloc.x - aax - aayrem = texloc.y - aay - - # Find the four pixels that we can interpolate from. The first number is the x, and second is y. - tex00 = (aax + (aay * texwidth)) * 4 - tex10 = tex00 + 4 - tex01 = (aax + ((aay + 1) * texwidth)) * 4 - tex11 = tex01 + 4 - - # Calculate various scaling factors based on alpha and percentage. - tex00percent = texbytes[tex00 + 3] / 255.0 - tex10percent = texbytes[tex10 + 3] / 255.0 - tex01percent = texbytes[tex01 + 3] / 255.0 - tex11percent = texbytes[tex11 + 3] / 255.0 - - y0percent = (tex00percent * (1.0 - aaxrem)) + (tex10percent * aaxrem) - y1percent = (tex01percent * (1.0 - aaxrem)) + (tex11percent * aaxrem) - finalpercent = (y0percent * (1.0 - aayrem)) + (y1percent * aayrem) - - if finalpercent <= 0.0: - # This pixel would be blank, so we avoid dividing by zero. - average = [255, 255, 255, 0] - else: - # Interpolate in the X direction on both Y axis. - y0r = ((texbytes[tex00] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10] * tex10percent * aaxrem)) - y0g = ((texbytes[tex00 + 1] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10 + 1] * tex10percent * aaxrem)) - y0b = ((texbytes[tex00 + 2] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10 + 2] * tex10percent * aaxrem)) - - y1r = ((texbytes[tex01] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11] * tex11percent * aaxrem)) - y1g = ((texbytes[tex01 + 1] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11 + 1] * tex11percent * aaxrem)) - y1b = ((texbytes[tex01 + 2] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11 + 2] * tex11percent * aaxrem)) - - # Now interpolate the Y direction to get the final pixel value. - average = [ - int(((y0r * (1.0 - aayrem)) + (y1r * aayrem)) / finalpercent), - int(((y0g * (1.0 - aayrem)) + (y1g * aayrem)) / finalpercent), - int(((y0b * (1.0 - aayrem)) + (y1b * aayrem)) / finalpercent), - int(finalpercent * 255), - ] - else: - for addy in ypoints: - for addx in xpoints: - texloc = inverse.multiply_point(Point(imgx + addx, imgy + addy)) - aax, aay, _ = texloc.as_tuple() - - # If we're out of bounds, don't update. Factor this in, however, so we can get partial - # transparency to the pixel that is already there. - denom += 1 - if aax < 0 or aay < 0 or aax >= texwidth or aay >= texheight: - continue - - # Grab the values to average, for SSAA. Make sure to factor in alpha as a poor-man's - # blend to ensure that partial transparency pixel values don't unnecessarily factor - # into average calculations. - texoff = (aax + (aay * texwidth)) * 4 - - # If this is a fully transparent pixel, the below formulas work out to adding nothing - # so we should skip this altogether. - if texbytes[texoff + 3] == 0: - continue - - apercent = texbytes[texoff + 3] / 255.0 - r += int(texbytes[texoff] * apercent) - g += int(texbytes[texoff + 1] * apercent) - b += int(texbytes[texoff + 2] * apercent) - a += texbytes[texoff + 3] - count += 1 - - if count == 0: - # None of the samples existed in-bounds. - return imgbytes[imgoff:(imgoff + 4)] - - # Average the pixels. Make sure to divide out the alpha in preparation for blending. - alpha = a // denom - - if alpha == 0: - average = [255, 255, 255, alpha] - else: - apercent = alpha / 255.0 - average = [int((r / denom) / apercent), int((g / denom) / apercent), int((b / denom) / apercent), alpha] - - # Finally, blend it with the destination. - return blend_point(add_color, mult_color, average, imgbytes[imgoff:(imgoff + 4)], blendfunc) - else: - # Calculate what texture pixel data goes here. - texloc = inverse.multiply_point(Point(imgx + 0.5, imgy + 0.5)) - texx, texy, _ = texloc.as_tuple() - - # If we're out of bounds, don't update. - if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight: - return imgbytes[imgoff:(imgoff + 4)] - - # Blend it. - texoff = (texx + (texy * texwidth)) * 4 - return blend_point(add_color, mult_color, texbytes[texoff:(texoff + 4)], imgbytes[imgoff:(imgoff + 4)], blendfunc) - - def perspective_composite( img: Image.Image, add_color: Color, @@ -576,6 +649,12 @@ def perspective_composite( texwidth = texture.width texheight = texture.height + # Get the perspective-correct inverse matrix for looking up texture coordinates. + inverse_matrix, minx, miny, maxx, maxy = perspective_calculate(imgwidth, imgheight, texwidth, texheight, transform, camera, focal_length) + if inverse_matrix is None: + # This texture is entirely off of the screen. + return img + # Get the data in an easier to manipulate and faster to update fashion. imgbytes = bytearray(img.tobytes('raw', 'RGBA')) texbytes = texture.tobytes('raw', 'RGBA') @@ -585,29 +664,122 @@ def perspective_composite( else: maskbytes = None - for texy in range(texheight): - for texx in range(texwidth): - # Calculate perspective projection. - imgloc = transform.multiply_point(Point(texx, texy)) - perspective = focal_length / (imgloc.z - camera.z) - imgx = int(((imgloc.x - camera.x) * perspective) + camera.x) - imgy = int(((imgloc.y - camera.y) * perspective) + camera.y) + def perspective_inverse(imgpoint: Point) -> Optional[Point]: + # Calculate the texture coordinate with our perspective interpolation. + texdiv = inverse_matrix.multiply_point(imgpoint) + if texdiv.z <= 0.0: + return None - # Check clipping. - if imgx < 0 or imgx >= imgwidth: - continue - if imgy < 0 or imgy >= imgheight: - continue + return Point(texdiv.x / texdiv.z, texdiv.y / texdiv.z) - # Check mask rectangle. - maskoff = imgx + (imgy * imgwidth) - imgoff = maskoff * 4 - if maskbytes is not None and maskbytes[maskoff] == 0: - continue + cores = multiprocessing.cpu_count() + if single_threaded or cores < 2: + # Get the data in an easier to manipulate and faster to update fashion. + imgbytes = bytearray(img.tobytes('raw', 'RGBA')) + texbytes = texture.tobytes('raw', 'RGBA') + if mask: + alpha = mask.split()[-1] + maskbytes = alpha.tobytes('raw', 'L') + else: + maskbytes = None - # Blend it. - texoff = (texx + (texy * texwidth)) * 4 - imgbytes[imgoff:(imgoff + 4)] = blend_point(add_color, mult_color, texbytes[texoff:(texoff + 4)], imgbytes[imgoff:(imgoff + 4)], blendfunc) + # We don't have enough CPU cores to bother multiprocessing. + for imgy in range(miny, maxy): + for imgx in range(minx, maxx): + # Determine offset + imgoff = (imgx + (imgy * imgwidth)) * 4 + imgbytes[imgoff:(imgoff + 4)] = pixel_renderer( + imgx, + imgy, + imgwidth, + texwidth, + texheight, + transform.xscale, + transform.yscale, + perspective_inverse, + add_color, + mult_color, + blendfunc, + imgbytes, + texbytes, + maskbytes, + enable_aa, + ) img = Image.frombytes('RGBA', (imgwidth, imgheight), bytes(imgbytes)) + else: + imgbytes = img.tobytes('raw', 'RGBA') + texbytes = texture.tobytes('raw', 'RGBA') + if mask: + alpha = mask.split()[-1] + maskbytes = alpha.tobytes('raw', 'L') + else: + maskbytes = None + + # Let's spread the load across multiple processors. + procs: List[multiprocessing.Process] = [] + work: multiprocessing.Queue = multiprocessing.Queue() + results: multiprocessing.Queue = multiprocessing.Queue() + expected: int = 0 + interrupted: bool = False + + def ctrlc(sig: Any, frame: Any) -> None: + nonlocal interrupted + interrupted = True + + previous_handler = signal.getsignal(signal.SIGINT) + signal.signal(signal.SIGINT, ctrlc) + + for _ in range(cores): + proc = multiprocessing.Process( + target=perspective_line_renderer, + args=( + work, + results, + minx, + maxx, + imgwidth, + texwidth, + texheight, + transform.xscale, + transform.yscale, + inverse_matrix, + add_color, + mult_color, + blendfunc, + imgbytes, + texbytes, + maskbytes, + enable_aa, + ), + ) + procs.append(proc) + proc.start() + + for imgy in range(miny, maxy): + work.put(imgy) + expected += 1 + + lines: List[bytes] = [ + imgbytes[x:(x + (imgwidth * 4))] + for x in range( + 0, + imgwidth * imgheight * 4, + imgwidth * 4, + ) + ] + for _ in range(expected): + imgy, result = results.get() + lines[imgy] = result + + for _proc in procs: + work.put(None) + for proc in procs: + proc.join() + + signal.signal(signal.SIGINT, previous_handler) + if interrupted: + raise KeyboardInterrupt() + + img = Image.frombytes('RGBA', (imgwidth, imgheight), b''.join(lines)) return img diff --git a/bemani/format/afp/blend/blendcpp.pyx b/bemani/format/afp/blend/blendcpp.pyx index f186554..71eb7b1 100644 --- a/bemani/format/afp/blend/blendcpp.pyx +++ b/bemani/format/afp/blend/blendcpp.pyx @@ -3,6 +3,7 @@ from PIL import Image # type: ignore from typing import Optional, Tuple from ..types import Color, Matrix, Point +from .perspective import perspective_calculate cdef extern struct floatcolor_t: float r; @@ -24,9 +25,9 @@ cdef extern struct matrix_t: float a42; float a43; -cdef extern int affine_composite_fast( - unsigned char *imgdata, - unsigned char *maskdata, +cdef extern int composite_fast( + unsigned char *imgbytes, + unsigned char *maskbytes, unsigned int imgwidth, unsigned int imgheight, unsigned int minx, @@ -35,27 +36,10 @@ cdef extern int affine_composite_fast( unsigned int maxy, floatcolor_t add_color, floatcolor_t mult_color, + float xscale, + float yscale, matrix_t inverse, - int blendfunc, - unsigned char *texdata, - unsigned int texwidth, - unsigned int texheight, - unsigned int threads, - unsigned int enable_aa, -) - -cdef extern int perspective_composite_fast( - unsigned char *imgbytes, - unsigned char *maskbytes, - unsigned int imgwidth, - unsigned int imgheight, - float camera_x, - float camera_y, - float camera_z, - float focal_length, - floatcolor_t add_color, - floatcolor_t mult_color, - matrix_t transform, + int use_perspective, int blendfunc, unsigned char *texbytes, unsigned int texwidth, @@ -137,7 +121,7 @@ def affine_composite( cdef unsigned int threads = 1 if single_threaded else multiprocessing.cpu_count() # Call the C++ function. - errors = affine_composite_fast( + errors = composite_fast( imgbytes, maskbytes, imgwidth, @@ -148,7 +132,10 @@ def affine_composite( maxy, c_addcolor, c_multcolor, + transform.xscale, + transform.yscale, c_inverse, + 0, blendfunc, texbytes, texwidth, @@ -190,6 +177,12 @@ def perspective_composite( texwidth = texture.width texheight = texture.height + # Get the perspective-correct inverse matrix for looking up texture coordinates. + inverse_matrix, minx, miny, maxx, maxy = perspective_calculate(imgwidth, imgheight, texwidth, texheight, transform, camera, focal_length) + if inverse_matrix is None: + # This texture is entirely off of the screen. + return img + # Grab the raw image data. imgbytes = img.tobytes('raw', 'RGBA') texbytes = texture.tobytes('raw', 'RGBA') @@ -207,27 +200,30 @@ def perspective_composite( # Convert classes to C structs. cdef floatcolor_t c_addcolor = floatcolor_t(r=add_color.r, g=add_color.g, b=add_color.b, a=add_color.a) cdef floatcolor_t c_multcolor = floatcolor_t(r=mult_color.r, g=mult_color.g, b=mult_color.b, a=mult_color.a) - cdef matrix_t c_transform = matrix_t( - a11=transform.a11, a12=transform.a12, a13=transform.a13, - a21=transform.a21, a22=transform.a22, a23=transform.a23, - a31=transform.a31, a32=transform.a32, a33=transform.a33, - a41=transform.a41, a42=transform.a42, a43=transform.a43, + cdef matrix_t c_inverse = matrix_t( + a11=inverse_matrix.a11, a12=inverse_matrix.a12, a13=inverse_matrix.a13, + a21=inverse_matrix.a21, a22=inverse_matrix.a22, a23=inverse_matrix.a23, + a31=inverse_matrix.a31, a32=inverse_matrix.a32, a33=inverse_matrix.a33, + a41=inverse_matrix.a41, a42=inverse_matrix.a42, a43=inverse_matrix.a43, ) cdef unsigned int threads = 1 if single_threaded else multiprocessing.cpu_count() # Call the C++ function. - errors = perspective_composite_fast( + errors = composite_fast( imgbytes, maskbytes, imgwidth, imgheight, - camera.x, - camera.y, - camera.z, - focal_length, + minx, + maxx, + miny, + maxy, c_addcolor, c_multcolor, - c_transform, + transform.xscale, + transform.yscale, + c_inverse, + 1, blendfunc, texbytes, texwidth, diff --git a/bemani/format/afp/blend/blendcppimpl.cxx b/bemani/format/afp/blend/blendcppimpl.cxx index 6b454fc..1247329 100644 --- a/bemani/format/afp/blend/blendcppimpl.cxx +++ b/bemani/format/afp/blend/blendcppimpl.cxx @@ -56,14 +56,6 @@ extern "C" (a13 * point.x) + (a23 * point.y) + (a33 * point.z) + a43, }; } - - float xscale() { - return sqrt((a11 * a11) + (a12 * a12) + (a13 * a13)); - } - - float yscale() { - return sqrt((a21 * a21) + (a22 * a22) + (a23 * a23)); - } } matrix_t; typedef struct work { @@ -77,7 +69,10 @@ extern "C" intcolor_t *texdata; unsigned int texwidth; unsigned int texheight; + float xscale; + float yscale; matrix_t inverse; + int use_perspective; floatcolor_t add_color; floatcolor_t mult_color; int blendfunc; @@ -270,12 +265,8 @@ extern "C" // costs us almost nothing. Essentially what we're doing here is calculating the scale, clamping it at 1.0 as the // minimum and then setting the AA sample swing accordingly. This has the effect of anti-aliasing scaled up images // a bit softer than would otherwise be achieved. - float xscale = 1.0 / work->inverse.xscale(); - float yscale = 1.0 / work->inverse.yscale(); - - // These are used for picking the various sample points for SSAA method below. - float xswing = 0.5 * fmax(1.0, xscale); - float yswing = 0.5 * fmax(1.0, yscale); + float xswing = 0.5 * fmax(1.0, work->xscale); + float yswing = 0.5 * fmax(1.0, work->yscale); for (unsigned int imgy = work->miny; imgy < work->maxy; imgy++) { for (unsigned int imgx = work->minx; imgx < work->maxx; imgx++) { @@ -300,10 +291,21 @@ extern "C" // First, figure out if we can use bilinear resampling. int bilinear = 0; - if (xscale >= 1.0 && yscale >= 1.0) { - point_t aaloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)}); - int aax = aaloc.x; - int aay = aaloc.y; + if (work->xscale >= 1.0 && work->yscale >= 1.0) { + int aax = -1; + int aay = -1; + + if (work->use_perspective) { + point_t aaloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)}); + if (aaloc.z > 0.0) { + aax = aaloc.x / aaloc.z; + aay = aaloc.y / aaloc.z; + } + } else { + point_t aaloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)}); + aax = aaloc.x; + aay = aaloc.y; + } if (!(aax <= 0 || aay <= 0 || aax >= ((int)work->texwidth - 1) || aay >= ((int)work->texheight - 1))) { bilinear = 1; @@ -314,11 +316,28 @@ extern "C" intcolor_t average; if (bilinear) { // Calculate the pixel we're after, and what percentage into the pixel we are. - point_t texloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)}); - int aax = texloc.x; - int aay = texloc.y; - float aaxrem = texloc.x - (float)aax; - float aayrem = texloc.y - (float)aay; + int aax; + int aay; + float aaxrem; + float aayrem; + + if (work->use_perspective) { + // We don't check for negative here, because we already checked it above and wouldn't + // have enabled bilinear interpoliation. + point_t texloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)}); + float fx = texloc.x / texloc.z; + float fy = texloc.y / texloc.z; + aax = fx; + aay = fy; + aaxrem = fx - (float)aax; + aayrem = fy - (float)aay; + } else { + point_t texloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)}); + aax = texloc.x; + aay = texloc.y; + aaxrem = texloc.x - (float)aax; + aayrem = texloc.y - (float)aay; + } // Find the four pixels that we can interpolate from. The first number is the x, and second is y. unsigned int tex00 = aax + (aay * work->texwidth); @@ -366,9 +385,20 @@ extern "C" } else { for (float addy = 0.5 - yswing; addy <= 0.5 + yswing; addy += yswing / 2.0) { for (float addx = 0.5 - xswing; addx <= 0.5 + xswing; addx += xswing / 2.0) { - point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + addx, (float)imgy + addy}); - int aax = texloc.x; - int aay = texloc.y; + int aax = -1; + int aay = -1; + + if (work->use_perspective) { + point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + addx, (float)imgy + addy}); + if (texloc.z > 0.0) { + aax = texloc.x / texloc.z; + aay = texloc.y / texloc.z; + } + } else { + point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + addx, (float)imgy + addy}); + aax = texloc.x; + aay = texloc.y; + } // If we're out of bounds, don't update. Factor this in, however, so we can get partial // transparency to the pixel that is already there. @@ -429,9 +459,20 @@ extern "C" work->imgdata[imgoff] = blend_point(work->add_color, work->mult_color, average, work->imgdata[imgoff], work->blendfunc); } else { // Grab the center of the pixel to get the color. - point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + (float)0.5, (float)imgy + (float)0.5}); - int texx = texloc.x; - int texy = texloc.y; + int texx = -1; + int texy = -1; + + if (work->use_perspective) { + point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + (float)0.5, (float)imgy + (float)0.5}); + if (texloc.z > 0.0) { + texx = texloc.x / texloc.z; + texy = texloc.y / texloc.z; + } + } else { + point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + (float)0.5, (float)imgy + (float)0.5}); + texx = texloc.x; + texy = texloc.y; + } // If we're out of bounds, don't update. if (texx < 0 || texy < 0 || texx >= (int)work->texwidth || texy >= (int)work->texheight) { @@ -452,7 +493,7 @@ extern "C" return NULL; } - int affine_composite_fast( + int composite_fast( unsigned char *imgbytes, unsigned char *maskbytes, unsigned int imgwidth, @@ -463,7 +504,10 @@ extern "C" unsigned int maxy, floatcolor_t add_color, floatcolor_t mult_color, + float xscale, + float yscale, matrix_t inverse, + int use_perspective, int blendfunc, unsigned char *texbytes, unsigned int texwidth, @@ -488,11 +532,14 @@ extern "C" work.texdata = texdata; work.texwidth = texwidth; work.texheight = texheight; + work.xscale = xscale; + work.yscale = yscale; work.inverse = inverse; work.add_color = add_color; work.mult_color = mult_color; work.blendfunc = blendfunc; work.enable_aa = enable_aa; + work.use_perspective = use_perspective; chunk_composite_fast(&work); } else { @@ -531,12 +578,15 @@ extern "C" work->texdata = texdata; work->texwidth = texwidth; work->texheight = texheight; + work->xscale = xscale; + work->yscale = yscale; work->inverse = inverse; work->add_color = add_color; work->mult_color = mult_color; work->blendfunc = blendfunc; work->thread = thread; work->enable_aa = enable_aa; + work->use_perspective = use_perspective; if (me) { @@ -584,58 +634,4 @@ extern "C" return 0; } - - int perspective_composite_fast( - unsigned char *imgbytes, - unsigned char *maskbytes, - unsigned int imgwidth, - unsigned int imgheight, - float camera_x, - float camera_y, - float camera_z, - float focal_length, - floatcolor_t add_color, - floatcolor_t mult_color, - matrix_t transform, - int blendfunc, - unsigned char *texbytes, - unsigned int texwidth, - unsigned int texheight, - unsigned int threads, - unsigned int enable_aa - ) { - // Cast to a usable type. - intcolor_t *imgdata = (intcolor_t *)imgbytes; - intcolor_t *texdata = (intcolor_t *)texbytes; - - for (unsigned int texy = 0; texy < texheight; texy++) { - for (unsigned int texx = 0; texx < texwidth; texx++) { - // Calculate perspective projection. - point_t imgloc = transform.multiply_point((point_t){(float)texx, (float)texy}); - float perspective = focal_length / (imgloc.z - camera_z); - int imgx = ((imgloc.x - camera_x) * perspective) + camera_x; - int imgy = ((imgloc.y - camera_y) * perspective) + camera_y; - - // Check clipping. - if (imgx < 0 || imgx >= (int)imgwidth) { - continue; - } - if (imgy < 0 || imgy >= (int)imgheight) { - continue; - } - - // Check mask rectangle. - unsigned int imgoff = imgx + (imgy * imgwidth); - if (maskbytes != NULL && maskbytes[imgoff] == 0) { - continue; - } - - // Blend it. - unsigned int texoff = (texx + (texy * texwidth)); - imgdata[imgoff] = blend_point(add_color, mult_color, texdata[texoff], imgdata[imgoff], blendfunc); - } - } - - return 0; - } } diff --git a/bemani/format/afp/blend/perspective.py b/bemani/format/afp/blend/perspective.py new file mode 100644 index 0000000..65dd1a1 --- /dev/null +++ b/bemani/format/afp/blend/perspective.py @@ -0,0 +1,95 @@ +from typing import Dict, List, Optional, Tuple + +from ..types import Matrix, Point + + +def perspective_calculate( + imgwidth: int, + imgheight: int, + texwidth: int, + texheight: int, + transform: Matrix, + camera: Point, + focal_length: float, +) -> Tuple[Optional[Matrix], int, int, int, int]: + # Arbitrarily choose three points on the texture to create a pair of vectors + # so that we can interpolate backwards. This isn't as simple as inverting the + # view matrix like in affine compositing because dividing by Z makes the + # perspective transform non-linear. So instead we interpolate 1/Z, u/Z and + # v/Z since those ARE linear, and work backwards from there. + xy: List[Point] = [] + uvz: Dict[Point, Point] = {} + for (texx, texy) in [ + (0, 0), + (texwidth, 0), + (0, texheight), + # Include this just to get a good upper bounds for where the texture + # will be drawn. + (texwidth, texheight), + ]: + imgloc = transform.multiply_point(Point(texx, texy)) + distance = imgloc.z - camera.z + imgx = int(((imgloc.x - camera.x) * (focal_length / distance)) + camera.x) + imgy = int(((imgloc.y - camera.y) * (focal_length / distance)) + camera.y) + + xy_point = Point(imgx, imgy) + xy.append(xy_point) + uvz[xy_point] = Point( + focal_length * texx / distance, + focal_length * texy / distance, + focal_length / distance, + ) + + # Calculate the maximum range of update this texture can possibly reside in. + minx = max(int(min(p.x for p in xy)), 0) + maxx = min(int(max(p.x for p in xy)) + 1, imgwidth) + miny = max(int(min(p.y for p in xy)), 0) + maxy = min(int(max(p.y for p in xy)) + 1, imgheight) + + if maxx <= minx or maxy <= miny: + # This image is entirely off the screen! + return (None, minx, miny, maxx, maxy) + + # Now that we have three points, construct a matrix that allows us to calculate + # what amount of each u/z, v/z and 1/z vector we need to interpolate values. The + # below matrix gives us an affine transform that will convert a point that's in + # the range 0, 0 to 1, 1 to a point inside the parallellogram that is made by + # projecting the two vectors we got from calculating the three texture points above. + xy_matrix = Matrix.affine( + a=xy[1].x - xy[0].x, + b=xy[1].y - xy[0].y, + c=xy[2].x - xy[0].x, + d=xy[2].y - xy[0].y, + tx=xy[0].x, + ty=xy[0].y, + ) + + # We invert that above, which gives us a matrix that can take screen space (imgx, + # imgy) and gives us instead those ratios, which allows us to then interpolate the + # u/z, v/z and 1/z values. + try: + xy_matrix = xy_matrix.inverse() + except ZeroDivisionError: + # This can't be inverted, so this shouldn't be displayed. + return (None, minx, miny, maxx, maxy) + + # We construct a second matrix, which interpolates coordinates in the range of + # 0, 0 to 1, 1 and gives us back the u/z, v/z and 1/z values. + uvz_matrix = Matrix( + a11=uvz[xy[1]].x - uvz[xy[0]].x, + a12=uvz[xy[1]].y - uvz[xy[0]].y, + a13=uvz[xy[1]].z - uvz[xy[0]].z, + a21=uvz[xy[2]].x - uvz[xy[0]].x, + a22=uvz[xy[2]].y - uvz[xy[0]].y, + a23=uvz[xy[2]].z - uvz[xy[0]].z, + a31=0.0, + a32=0.0, + a33=0.0, + a41=uvz[xy[0]].x, + a42=uvz[xy[0]].y, + a43=uvz[xy[0]].z, + ) + + # Finally, we can combine the two matrixes to do the interpolation all at once. + inverse_matrix = xy_matrix.multiply(uvz_matrix) + return (inverse_matrix, minx, miny, maxx, maxy)