1
0
mirror of synced 2024-11-24 06:20:12 +01:00

Switch perspective engine to using existing blitter with correct perspective-based texture mapping.

This commit is contained in:
Jennifer Taylor 2021-08-05 17:32:00 +00:00
parent 75bc9f975c
commit 74838e698d
4 changed files with 557 additions and 298 deletions

View File

@ -1,9 +1,10 @@
import multiprocessing
import signal
from PIL import Image # type: ignore
from typing import Any, List, Optional, Sequence, Union
from typing import Any, Callable, List, Optional, Sequence, Union
from ..types import Color, Matrix, Point
from .perspective import perspective_calculate
def clamp(color: float) -> int:
@ -192,6 +193,218 @@ def blend_point(
return blend_normal(dest_color, src_color)
def pixel_renderer(
imgx: int,
imgy: int,
imgwidth: int,
texwidth: int,
texheight: int,
xscale: float,
yscale: float,
callback: Callable[[Point], Optional[Point]],
add_color: Color,
mult_color: Color,
blendfunc: int,
imgbytes: Union[bytes, bytearray],
texbytes: Union[bytes, bytearray],
maskbytes: Optional[Union[bytes, bytearray]],
enable_aa: bool,
) -> Sequence[int]:
# Determine offset
maskoff = imgx + (imgy * imgwidth)
imgoff = maskoff * 4
if maskbytes is not None and maskbytes[maskoff] == 0:
# This pixel is masked off!
return imgbytes[imgoff:(imgoff + 4)]
if enable_aa:
r = 0
g = 0
b = 0
a = 0
count = 0
denom = 0
# Essentially what we're doing here is calculating the scale, clamping it at 1.0 as the
# minimum and then setting the AA sample swing accordingly. This has the effect of anti-aliasing
# scaled up images a bit softer than would otherwise be achieved.
xswing = 0.5 * max(1.0, xscale)
yswing = 0.5 * max(1.0, yscale)
xpoints = [0.5 - xswing, 0.5 - (xswing / 2.0), 0.5, 0.5 + (xswing / 2.0), 0.5 + xswing]
ypoints = [0.5 - yswing, 0.5 - (yswing / 2.0), 0.5, 0.5 + (yswing / 2.0), 0.5 + yswing]
# First, figure out if we can use bilinear resampling.
bilinear = False
if xscale >= 1.0 and yscale >= 1.0:
aaloc = callback(Point(imgx + 0.5, imgy + 0.5))
if aaloc is not None:
aax, aay, _ = aaloc.as_tuple()
if not (aax <= 0 or aay <= 0 or aax >= (texwidth - 1) or aay >= (texheight - 1)):
bilinear = True
# Now perform the desired AA operation.
if bilinear:
# Calculate the pixel we're after, and what percentage into the pixel we are.
texloc = callback(Point(imgx + 0.5, imgy + 0.5))
if texloc is None:
raise Exception("Logic error!")
aax, aay, _ = texloc.as_tuple()
aaxrem = texloc.x - aax
aayrem = texloc.y - aay
# Find the four pixels that we can interpolate from. The first number is the x, and second is y.
tex00 = (aax + (aay * texwidth)) * 4
tex10 = tex00 + 4
tex01 = (aax + ((aay + 1) * texwidth)) * 4
tex11 = tex01 + 4
# Calculate various scaling factors based on alpha and percentage.
tex00percent = texbytes[tex00 + 3] / 255.0
tex10percent = texbytes[tex10 + 3] / 255.0
tex01percent = texbytes[tex01 + 3] / 255.0
tex11percent = texbytes[tex11 + 3] / 255.0
y0percent = (tex00percent * (1.0 - aaxrem)) + (tex10percent * aaxrem)
y1percent = (tex01percent * (1.0 - aaxrem)) + (tex11percent * aaxrem)
finalpercent = (y0percent * (1.0 - aayrem)) + (y1percent * aayrem)
if finalpercent <= 0.0:
# This pixel would be blank, so we avoid dividing by zero.
average = [255, 255, 255, 0]
else:
# Interpolate in the X direction on both Y axis.
y0r = ((texbytes[tex00] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10] * tex10percent * aaxrem))
y0g = ((texbytes[tex00 + 1] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10 + 1] * tex10percent * aaxrem))
y0b = ((texbytes[tex00 + 2] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10 + 2] * tex10percent * aaxrem))
y1r = ((texbytes[tex01] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11] * tex11percent * aaxrem))
y1g = ((texbytes[tex01 + 1] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11 + 1] * tex11percent * aaxrem))
y1b = ((texbytes[tex01 + 2] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11 + 2] * tex11percent * aaxrem))
# Now interpolate the Y direction to get the final pixel value.
average = [
int(((y0r * (1.0 - aayrem)) + (y1r * aayrem)) / finalpercent),
int(((y0g * (1.0 - aayrem)) + (y1g * aayrem)) / finalpercent),
int(((y0b * (1.0 - aayrem)) + (y1b * aayrem)) / finalpercent),
int(finalpercent * 255),
]
else:
for addy in ypoints:
for addx in xpoints:
texloc = callback(Point(imgx + addx, imgy + addy))
denom += 1
if texloc is None:
continue
aax, aay, _ = texloc.as_tuple()
# If we're out of bounds, don't update. Factor this in, however, so we can get partial
# transparency to the pixel that is already there.
if aax < 0 or aay < 0 or aax >= texwidth or aay >= texheight:
continue
# Grab the values to average, for SSAA. Make sure to factor in alpha as a poor-man's
# blend to ensure that partial transparency pixel values don't unnecessarily factor
# into average calculations.
texoff = (aax + (aay * texwidth)) * 4
# If this is a fully transparent pixel, the below formulas work out to adding nothing
# so we should skip this altogether.
if texbytes[texoff + 3] == 0:
continue
apercent = texbytes[texoff + 3] / 255.0
r += int(texbytes[texoff] * apercent)
g += int(texbytes[texoff + 1] * apercent)
b += int(texbytes[texoff + 2] * apercent)
a += texbytes[texoff + 3]
count += 1
if count == 0:
# None of the samples existed in-bounds.
return imgbytes[imgoff:(imgoff + 4)]
# Average the pixels. Make sure to divide out the alpha in preparation for blending.
alpha = a // denom
if alpha == 0:
average = [255, 255, 255, alpha]
else:
apercent = alpha / 255.0
average = [int((r / denom) / apercent), int((g / denom) / apercent), int((b / denom) / apercent), alpha]
# Finally, blend it with the destination.
return blend_point(add_color, mult_color, average, imgbytes[imgoff:(imgoff + 4)], blendfunc)
else:
# Calculate what texture pixel data goes here.
texloc = callback(Point(imgx + 0.5, imgy + 0.5))
if texloc is None:
return imgbytes[imgoff:(imgoff + 4)]
texx, texy, _ = texloc.as_tuple()
# If we're out of bounds, don't update.
if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight:
return imgbytes[imgoff:(imgoff + 4)]
# Blend it.
texoff = (texx + (texy * texwidth)) * 4
return blend_point(add_color, mult_color, texbytes[texoff:(texoff + 4)], imgbytes[imgoff:(imgoff + 4)], blendfunc)
def affine_line_renderer(
work: multiprocessing.Queue,
results: multiprocessing.Queue,
minx: int,
maxx: int,
imgwidth: int,
texwidth: int,
texheight: int,
inverse: Matrix,
add_color: Color,
mult_color: Color,
blendfunc: int,
imgbytes: Union[bytes, bytearray],
texbytes: Union[bytes, bytearray],
maskbytes: Optional[Union[bytes, bytearray]],
enable_aa: bool,
) -> None:
while True:
imgy = work.get()
if imgy is None:
return
rowbytes = bytearray(imgbytes[(imgy * imgwidth * 4):((imgy + 1) * imgwidth * 4)])
for imgx in range(imgwidth):
if imgx < minx or imgx >= maxx:
# No need to even consider this pixel.
continue
else:
# Blit new pixel into the correct range.
rowbytes[(imgx * 4):((imgx + 1) * 4)] = pixel_renderer(
imgx,
imgy,
imgwidth,
texwidth,
texheight,
1.0 / inverse.xscale,
1.0 / inverse.yscale,
lambda point: inverse.multiply_point(point),
add_color,
mult_color,
blendfunc,
imgbytes,
texbytes,
maskbytes,
enable_aa,
)
results.put((imgy, bytes(rowbytes)))
def affine_composite(
img: Image.Image,
add_color: Color,
@ -262,7 +475,9 @@ def affine_composite(
imgwidth,
texwidth,
texheight,
inverse,
1.0 / inverse.xscale,
1.0 / inverse.yscale,
lambda point: inverse.multiply_point(point),
add_color,
mult_color,
blendfunc,
@ -298,7 +513,7 @@ def affine_composite(
for _ in range(cores):
proc = multiprocessing.Process(
target=line_renderer,
target=affine_line_renderer,
args=(
work,
results,
@ -349,7 +564,7 @@ def affine_composite(
return img
def line_renderer(
def perspective_line_renderer(
work: multiprocessing.Queue,
results: multiprocessing.Queue,
minx: int,
@ -357,6 +572,8 @@ def line_renderer(
imgwidth: int,
texwidth: int,
texheight: int,
xscale: float,
yscale: float,
inverse: Matrix,
add_color: Color,
mult_color: Color,
@ -366,6 +583,14 @@ def line_renderer(
maskbytes: Optional[Union[bytes, bytearray]],
enable_aa: bool,
) -> None:
def perspective_inverse(imgpoint: Point) -> Optional[Point]:
# Calculate the texture coordinate with our perspective interpolation.
texdiv = inverse.multiply_point(imgpoint)
if texdiv.z <= 0.0:
return None
return Point(texdiv.x / texdiv.z, texdiv.y / texdiv.z)
while True:
imgy = work.get()
if imgy is None:
@ -384,7 +609,9 @@ def line_renderer(
imgwidth,
texwidth,
texheight,
inverse,
xscale,
yscale,
perspective_inverse,
add_color,
mult_color,
blendfunc,
@ -397,160 +624,6 @@ def line_renderer(
results.put((imgy, bytes(rowbytes)))
def pixel_renderer(
imgx: int,
imgy: int,
imgwidth: int,
texwidth: int,
texheight: int,
inverse: Matrix,
add_color: Color,
mult_color: Color,
blendfunc: int,
imgbytes: Union[bytes, bytearray],
texbytes: Union[bytes, bytearray],
maskbytes: Optional[Union[bytes, bytearray]],
enable_aa: bool,
) -> Sequence[int]:
# Determine offset
maskoff = imgx + (imgy * imgwidth)
imgoff = maskoff * 4
if maskbytes is not None and maskbytes[maskoff] == 0:
# This pixel is masked off!
return imgbytes[imgoff:(imgoff + 4)]
if enable_aa:
r = 0
g = 0
b = 0
a = 0
count = 0
denom = 0
# Essentially what we're doing here is calculating the scale, clamping it at 1.0 as the
# minimum and then setting the AA sample swing accordingly. This has the effect of anti-aliasing
# scaled up images a bit softer than would otherwise be achieved.
xscale = 1.0 / inverse.xscale
yscale = 1.0 / inverse.yscale
# These are used for picking the various sample points for SSAA method below.
xswing = 0.5 * max(1.0, xscale)
yswing = 0.5 * max(1.0, yscale)
xpoints = [0.5 - xswing, 0.5 - (xswing / 2.0), 0.5, 0.5 + (xswing / 2.0), 0.5 + xswing]
ypoints = [0.5 - yswing, 0.5 - (yswing / 2.0), 0.5, 0.5 + (yswing / 2.0), 0.5 + yswing]
# First, figure out if we can use bilinear resampling.
bilinear = False
if xscale >= 1.0 and yscale >= 1.0:
aaloc = inverse.multiply_point(Point(imgx + 0.5, imgy + 0.5))
aax, aay, _ = aaloc.as_tuple()
if not (aax <= 0 or aay <= 0 or aax >= (texwidth - 1) or aay >= (texheight - 1)):
bilinear = True
# Now perform the desired AA operation.
if bilinear:
# Calculate the pixel we're after, and what percentage into the pixel we are.
texloc = inverse.multiply_point(Point(imgx + 0.5, imgy + 0.5))
aax, aay, _ = texloc.as_tuple()
aaxrem = texloc.x - aax
aayrem = texloc.y - aay
# Find the four pixels that we can interpolate from. The first number is the x, and second is y.
tex00 = (aax + (aay * texwidth)) * 4
tex10 = tex00 + 4
tex01 = (aax + ((aay + 1) * texwidth)) * 4
tex11 = tex01 + 4
# Calculate various scaling factors based on alpha and percentage.
tex00percent = texbytes[tex00 + 3] / 255.0
tex10percent = texbytes[tex10 + 3] / 255.0
tex01percent = texbytes[tex01 + 3] / 255.0
tex11percent = texbytes[tex11 + 3] / 255.0
y0percent = (tex00percent * (1.0 - aaxrem)) + (tex10percent * aaxrem)
y1percent = (tex01percent * (1.0 - aaxrem)) + (tex11percent * aaxrem)
finalpercent = (y0percent * (1.0 - aayrem)) + (y1percent * aayrem)
if finalpercent <= 0.0:
# This pixel would be blank, so we avoid dividing by zero.
average = [255, 255, 255, 0]
else:
# Interpolate in the X direction on both Y axis.
y0r = ((texbytes[tex00] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10] * tex10percent * aaxrem))
y0g = ((texbytes[tex00 + 1] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10 + 1] * tex10percent * aaxrem))
y0b = ((texbytes[tex00 + 2] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10 + 2] * tex10percent * aaxrem))
y1r = ((texbytes[tex01] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11] * tex11percent * aaxrem))
y1g = ((texbytes[tex01 + 1] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11 + 1] * tex11percent * aaxrem))
y1b = ((texbytes[tex01 + 2] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11 + 2] * tex11percent * aaxrem))
# Now interpolate the Y direction to get the final pixel value.
average = [
int(((y0r * (1.0 - aayrem)) + (y1r * aayrem)) / finalpercent),
int(((y0g * (1.0 - aayrem)) + (y1g * aayrem)) / finalpercent),
int(((y0b * (1.0 - aayrem)) + (y1b * aayrem)) / finalpercent),
int(finalpercent * 255),
]
else:
for addy in ypoints:
for addx in xpoints:
texloc = inverse.multiply_point(Point(imgx + addx, imgy + addy))
aax, aay, _ = texloc.as_tuple()
# If we're out of bounds, don't update. Factor this in, however, so we can get partial
# transparency to the pixel that is already there.
denom += 1
if aax < 0 or aay < 0 or aax >= texwidth or aay >= texheight:
continue
# Grab the values to average, for SSAA. Make sure to factor in alpha as a poor-man's
# blend to ensure that partial transparency pixel values don't unnecessarily factor
# into average calculations.
texoff = (aax + (aay * texwidth)) * 4
# If this is a fully transparent pixel, the below formulas work out to adding nothing
# so we should skip this altogether.
if texbytes[texoff + 3] == 0:
continue
apercent = texbytes[texoff + 3] / 255.0
r += int(texbytes[texoff] * apercent)
g += int(texbytes[texoff + 1] * apercent)
b += int(texbytes[texoff + 2] * apercent)
a += texbytes[texoff + 3]
count += 1
if count == 0:
# None of the samples existed in-bounds.
return imgbytes[imgoff:(imgoff + 4)]
# Average the pixels. Make sure to divide out the alpha in preparation for blending.
alpha = a // denom
if alpha == 0:
average = [255, 255, 255, alpha]
else:
apercent = alpha / 255.0
average = [int((r / denom) / apercent), int((g / denom) / apercent), int((b / denom) / apercent), alpha]
# Finally, blend it with the destination.
return blend_point(add_color, mult_color, average, imgbytes[imgoff:(imgoff + 4)], blendfunc)
else:
# Calculate what texture pixel data goes here.
texloc = inverse.multiply_point(Point(imgx + 0.5, imgy + 0.5))
texx, texy, _ = texloc.as_tuple()
# If we're out of bounds, don't update.
if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight:
return imgbytes[imgoff:(imgoff + 4)]
# Blend it.
texoff = (texx + (texy * texwidth)) * 4
return blend_point(add_color, mult_color, texbytes[texoff:(texoff + 4)], imgbytes[imgoff:(imgoff + 4)], blendfunc)
def perspective_composite(
img: Image.Image,
add_color: Color,
@ -576,6 +649,12 @@ def perspective_composite(
texwidth = texture.width
texheight = texture.height
# Get the perspective-correct inverse matrix for looking up texture coordinates.
inverse_matrix, minx, miny, maxx, maxy = perspective_calculate(imgwidth, imgheight, texwidth, texheight, transform, camera, focal_length)
if inverse_matrix is None:
# This texture is entirely off of the screen.
return img
# Get the data in an easier to manipulate and faster to update fashion.
imgbytes = bytearray(img.tobytes('raw', 'RGBA'))
texbytes = texture.tobytes('raw', 'RGBA')
@ -585,29 +664,122 @@ def perspective_composite(
else:
maskbytes = None
for texy in range(texheight):
for texx in range(texwidth):
# Calculate perspective projection.
imgloc = transform.multiply_point(Point(texx, texy))
perspective = focal_length / (imgloc.z - camera.z)
imgx = int(((imgloc.x - camera.x) * perspective) + camera.x)
imgy = int(((imgloc.y - camera.y) * perspective) + camera.y)
def perspective_inverse(imgpoint: Point) -> Optional[Point]:
# Calculate the texture coordinate with our perspective interpolation.
texdiv = inverse_matrix.multiply_point(imgpoint)
if texdiv.z <= 0.0:
return None
# Check clipping.
if imgx < 0 or imgx >= imgwidth:
continue
if imgy < 0 or imgy >= imgheight:
continue
return Point(texdiv.x / texdiv.z, texdiv.y / texdiv.z)
# Check mask rectangle.
maskoff = imgx + (imgy * imgwidth)
imgoff = maskoff * 4
if maskbytes is not None and maskbytes[maskoff] == 0:
continue
cores = multiprocessing.cpu_count()
if single_threaded or cores < 2:
# Get the data in an easier to manipulate and faster to update fashion.
imgbytes = bytearray(img.tobytes('raw', 'RGBA'))
texbytes = texture.tobytes('raw', 'RGBA')
if mask:
alpha = mask.split()[-1]
maskbytes = alpha.tobytes('raw', 'L')
else:
maskbytes = None
# Blend it.
texoff = (texx + (texy * texwidth)) * 4
imgbytes[imgoff:(imgoff + 4)] = blend_point(add_color, mult_color, texbytes[texoff:(texoff + 4)], imgbytes[imgoff:(imgoff + 4)], blendfunc)
# We don't have enough CPU cores to bother multiprocessing.
for imgy in range(miny, maxy):
for imgx in range(minx, maxx):
# Determine offset
imgoff = (imgx + (imgy * imgwidth)) * 4
imgbytes[imgoff:(imgoff + 4)] = pixel_renderer(
imgx,
imgy,
imgwidth,
texwidth,
texheight,
transform.xscale,
transform.yscale,
perspective_inverse,
add_color,
mult_color,
blendfunc,
imgbytes,
texbytes,
maskbytes,
enable_aa,
)
img = Image.frombytes('RGBA', (imgwidth, imgheight), bytes(imgbytes))
else:
imgbytes = img.tobytes('raw', 'RGBA')
texbytes = texture.tobytes('raw', 'RGBA')
if mask:
alpha = mask.split()[-1]
maskbytes = alpha.tobytes('raw', 'L')
else:
maskbytes = None
# Let's spread the load across multiple processors.
procs: List[multiprocessing.Process] = []
work: multiprocessing.Queue = multiprocessing.Queue()
results: multiprocessing.Queue = multiprocessing.Queue()
expected: int = 0
interrupted: bool = False
def ctrlc(sig: Any, frame: Any) -> None:
nonlocal interrupted
interrupted = True
previous_handler = signal.getsignal(signal.SIGINT)
signal.signal(signal.SIGINT, ctrlc)
for _ in range(cores):
proc = multiprocessing.Process(
target=perspective_line_renderer,
args=(
work,
results,
minx,
maxx,
imgwidth,
texwidth,
texheight,
transform.xscale,
transform.yscale,
inverse_matrix,
add_color,
mult_color,
blendfunc,
imgbytes,
texbytes,
maskbytes,
enable_aa,
),
)
procs.append(proc)
proc.start()
for imgy in range(miny, maxy):
work.put(imgy)
expected += 1
lines: List[bytes] = [
imgbytes[x:(x + (imgwidth * 4))]
for x in range(
0,
imgwidth * imgheight * 4,
imgwidth * 4,
)
]
for _ in range(expected):
imgy, result = results.get()
lines[imgy] = result
for _proc in procs:
work.put(None)
for proc in procs:
proc.join()
signal.signal(signal.SIGINT, previous_handler)
if interrupted:
raise KeyboardInterrupt()
img = Image.frombytes('RGBA', (imgwidth, imgheight), b''.join(lines))
return img

View File

@ -3,6 +3,7 @@ from PIL import Image # type: ignore
from typing import Optional, Tuple
from ..types import Color, Matrix, Point
from .perspective import perspective_calculate
cdef extern struct floatcolor_t:
float r;
@ -24,9 +25,9 @@ cdef extern struct matrix_t:
float a42;
float a43;
cdef extern int affine_composite_fast(
unsigned char *imgdata,
unsigned char *maskdata,
cdef extern int composite_fast(
unsigned char *imgbytes,
unsigned char *maskbytes,
unsigned int imgwidth,
unsigned int imgheight,
unsigned int minx,
@ -35,27 +36,10 @@ cdef extern int affine_composite_fast(
unsigned int maxy,
floatcolor_t add_color,
floatcolor_t mult_color,
float xscale,
float yscale,
matrix_t inverse,
int blendfunc,
unsigned char *texdata,
unsigned int texwidth,
unsigned int texheight,
unsigned int threads,
unsigned int enable_aa,
)
cdef extern int perspective_composite_fast(
unsigned char *imgbytes,
unsigned char *maskbytes,
unsigned int imgwidth,
unsigned int imgheight,
float camera_x,
float camera_y,
float camera_z,
float focal_length,
floatcolor_t add_color,
floatcolor_t mult_color,
matrix_t transform,
int use_perspective,
int blendfunc,
unsigned char *texbytes,
unsigned int texwidth,
@ -137,7 +121,7 @@ def affine_composite(
cdef unsigned int threads = 1 if single_threaded else multiprocessing.cpu_count()
# Call the C++ function.
errors = affine_composite_fast(
errors = composite_fast(
imgbytes,
maskbytes,
imgwidth,
@ -148,7 +132,10 @@ def affine_composite(
maxy,
c_addcolor,
c_multcolor,
transform.xscale,
transform.yscale,
c_inverse,
0,
blendfunc,
texbytes,
texwidth,
@ -190,6 +177,12 @@ def perspective_composite(
texwidth = texture.width
texheight = texture.height
# Get the perspective-correct inverse matrix for looking up texture coordinates.
inverse_matrix, minx, miny, maxx, maxy = perspective_calculate(imgwidth, imgheight, texwidth, texheight, transform, camera, focal_length)
if inverse_matrix is None:
# This texture is entirely off of the screen.
return img
# Grab the raw image data.
imgbytes = img.tobytes('raw', 'RGBA')
texbytes = texture.tobytes('raw', 'RGBA')
@ -207,27 +200,30 @@ def perspective_composite(
# Convert classes to C structs.
cdef floatcolor_t c_addcolor = floatcolor_t(r=add_color.r, g=add_color.g, b=add_color.b, a=add_color.a)
cdef floatcolor_t c_multcolor = floatcolor_t(r=mult_color.r, g=mult_color.g, b=mult_color.b, a=mult_color.a)
cdef matrix_t c_transform = matrix_t(
a11=transform.a11, a12=transform.a12, a13=transform.a13,
a21=transform.a21, a22=transform.a22, a23=transform.a23,
a31=transform.a31, a32=transform.a32, a33=transform.a33,
a41=transform.a41, a42=transform.a42, a43=transform.a43,
cdef matrix_t c_inverse = matrix_t(
a11=inverse_matrix.a11, a12=inverse_matrix.a12, a13=inverse_matrix.a13,
a21=inverse_matrix.a21, a22=inverse_matrix.a22, a23=inverse_matrix.a23,
a31=inverse_matrix.a31, a32=inverse_matrix.a32, a33=inverse_matrix.a33,
a41=inverse_matrix.a41, a42=inverse_matrix.a42, a43=inverse_matrix.a43,
)
cdef unsigned int threads = 1 if single_threaded else multiprocessing.cpu_count()
# Call the C++ function.
errors = perspective_composite_fast(
errors = composite_fast(
imgbytes,
maskbytes,
imgwidth,
imgheight,
camera.x,
camera.y,
camera.z,
focal_length,
minx,
maxx,
miny,
maxy,
c_addcolor,
c_multcolor,
c_transform,
transform.xscale,
transform.yscale,
c_inverse,
1,
blendfunc,
texbytes,
texwidth,

View File

@ -56,14 +56,6 @@ extern "C"
(a13 * point.x) + (a23 * point.y) + (a33 * point.z) + a43,
};
}
float xscale() {
return sqrt((a11 * a11) + (a12 * a12) + (a13 * a13));
}
float yscale() {
return sqrt((a21 * a21) + (a22 * a22) + (a23 * a23));
}
} matrix_t;
typedef struct work {
@ -77,7 +69,10 @@ extern "C"
intcolor_t *texdata;
unsigned int texwidth;
unsigned int texheight;
float xscale;
float yscale;
matrix_t inverse;
int use_perspective;
floatcolor_t add_color;
floatcolor_t mult_color;
int blendfunc;
@ -270,12 +265,8 @@ extern "C"
// costs us almost nothing. Essentially what we're doing here is calculating the scale, clamping it at 1.0 as the
// minimum and then setting the AA sample swing accordingly. This has the effect of anti-aliasing scaled up images
// a bit softer than would otherwise be achieved.
float xscale = 1.0 / work->inverse.xscale();
float yscale = 1.0 / work->inverse.yscale();
// These are used for picking the various sample points for SSAA method below.
float xswing = 0.5 * fmax(1.0, xscale);
float yswing = 0.5 * fmax(1.0, yscale);
float xswing = 0.5 * fmax(1.0, work->xscale);
float yswing = 0.5 * fmax(1.0, work->yscale);
for (unsigned int imgy = work->miny; imgy < work->maxy; imgy++) {
for (unsigned int imgx = work->minx; imgx < work->maxx; imgx++) {
@ -300,10 +291,21 @@ extern "C"
// First, figure out if we can use bilinear resampling.
int bilinear = 0;
if (xscale >= 1.0 && yscale >= 1.0) {
point_t aaloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)});
int aax = aaloc.x;
int aay = aaloc.y;
if (work->xscale >= 1.0 && work->yscale >= 1.0) {
int aax = -1;
int aay = -1;
if (work->use_perspective) {
point_t aaloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)});
if (aaloc.z > 0.0) {
aax = aaloc.x / aaloc.z;
aay = aaloc.y / aaloc.z;
}
} else {
point_t aaloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)});
aax = aaloc.x;
aay = aaloc.y;
}
if (!(aax <= 0 || aay <= 0 || aax >= ((int)work->texwidth - 1) || aay >= ((int)work->texheight - 1))) {
bilinear = 1;
@ -314,11 +316,28 @@ extern "C"
intcolor_t average;
if (bilinear) {
// Calculate the pixel we're after, and what percentage into the pixel we are.
point_t texloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)});
int aax = texloc.x;
int aay = texloc.y;
float aaxrem = texloc.x - (float)aax;
float aayrem = texloc.y - (float)aay;
int aax;
int aay;
float aaxrem;
float aayrem;
if (work->use_perspective) {
// We don't check for negative here, because we already checked it above and wouldn't
// have enabled bilinear interpoliation.
point_t texloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)});
float fx = texloc.x / texloc.z;
float fy = texloc.y / texloc.z;
aax = fx;
aay = fy;
aaxrem = fx - (float)aax;
aayrem = fy - (float)aay;
} else {
point_t texloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)});
aax = texloc.x;
aay = texloc.y;
aaxrem = texloc.x - (float)aax;
aayrem = texloc.y - (float)aay;
}
// Find the four pixels that we can interpolate from. The first number is the x, and second is y.
unsigned int tex00 = aax + (aay * work->texwidth);
@ -366,9 +385,20 @@ extern "C"
} else {
for (float addy = 0.5 - yswing; addy <= 0.5 + yswing; addy += yswing / 2.0) {
for (float addx = 0.5 - xswing; addx <= 0.5 + xswing; addx += xswing / 2.0) {
point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + addx, (float)imgy + addy});
int aax = texloc.x;
int aay = texloc.y;
int aax = -1;
int aay = -1;
if (work->use_perspective) {
point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + addx, (float)imgy + addy});
if (texloc.z > 0.0) {
aax = texloc.x / texloc.z;
aay = texloc.y / texloc.z;
}
} else {
point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + addx, (float)imgy + addy});
aax = texloc.x;
aay = texloc.y;
}
// If we're out of bounds, don't update. Factor this in, however, so we can get partial
// transparency to the pixel that is already there.
@ -429,9 +459,20 @@ extern "C"
work->imgdata[imgoff] = blend_point(work->add_color, work->mult_color, average, work->imgdata[imgoff], work->blendfunc);
} else {
// Grab the center of the pixel to get the color.
point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + (float)0.5, (float)imgy + (float)0.5});
int texx = texloc.x;
int texy = texloc.y;
int texx = -1;
int texy = -1;
if (work->use_perspective) {
point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + (float)0.5, (float)imgy + (float)0.5});
if (texloc.z > 0.0) {
texx = texloc.x / texloc.z;
texy = texloc.y / texloc.z;
}
} else {
point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + (float)0.5, (float)imgy + (float)0.5});
texx = texloc.x;
texy = texloc.y;
}
// If we're out of bounds, don't update.
if (texx < 0 || texy < 0 || texx >= (int)work->texwidth || texy >= (int)work->texheight) {
@ -452,7 +493,7 @@ extern "C"
return NULL;
}
int affine_composite_fast(
int composite_fast(
unsigned char *imgbytes,
unsigned char *maskbytes,
unsigned int imgwidth,
@ -463,7 +504,10 @@ extern "C"
unsigned int maxy,
floatcolor_t add_color,
floatcolor_t mult_color,
float xscale,
float yscale,
matrix_t inverse,
int use_perspective,
int blendfunc,
unsigned char *texbytes,
unsigned int texwidth,
@ -488,11 +532,14 @@ extern "C"
work.texdata = texdata;
work.texwidth = texwidth;
work.texheight = texheight;
work.xscale = xscale;
work.yscale = yscale;
work.inverse = inverse;
work.add_color = add_color;
work.mult_color = mult_color;
work.blendfunc = blendfunc;
work.enable_aa = enable_aa;
work.use_perspective = use_perspective;
chunk_composite_fast(&work);
} else {
@ -531,12 +578,15 @@ extern "C"
work->texdata = texdata;
work->texwidth = texwidth;
work->texheight = texheight;
work->xscale = xscale;
work->yscale = yscale;
work->inverse = inverse;
work->add_color = add_color;
work->mult_color = mult_color;
work->blendfunc = blendfunc;
work->thread = thread;
work->enable_aa = enable_aa;
work->use_perspective = use_perspective;
if (me)
{
@ -584,58 +634,4 @@ extern "C"
return 0;
}
int perspective_composite_fast(
unsigned char *imgbytes,
unsigned char *maskbytes,
unsigned int imgwidth,
unsigned int imgheight,
float camera_x,
float camera_y,
float camera_z,
float focal_length,
floatcolor_t add_color,
floatcolor_t mult_color,
matrix_t transform,
int blendfunc,
unsigned char *texbytes,
unsigned int texwidth,
unsigned int texheight,
unsigned int threads,
unsigned int enable_aa
) {
// Cast to a usable type.
intcolor_t *imgdata = (intcolor_t *)imgbytes;
intcolor_t *texdata = (intcolor_t *)texbytes;
for (unsigned int texy = 0; texy < texheight; texy++) {
for (unsigned int texx = 0; texx < texwidth; texx++) {
// Calculate perspective projection.
point_t imgloc = transform.multiply_point((point_t){(float)texx, (float)texy});
float perspective = focal_length / (imgloc.z - camera_z);
int imgx = ((imgloc.x - camera_x) * perspective) + camera_x;
int imgy = ((imgloc.y - camera_y) * perspective) + camera_y;
// Check clipping.
if (imgx < 0 || imgx >= (int)imgwidth) {
continue;
}
if (imgy < 0 || imgy >= (int)imgheight) {
continue;
}
// Check mask rectangle.
unsigned int imgoff = imgx + (imgy * imgwidth);
if (maskbytes != NULL && maskbytes[imgoff] == 0) {
continue;
}
// Blend it.
unsigned int texoff = (texx + (texy * texwidth));
imgdata[imgoff] = blend_point(add_color, mult_color, texdata[texoff], imgdata[imgoff], blendfunc);
}
}
return 0;
}
}

View File

@ -0,0 +1,95 @@
from typing import Dict, List, Optional, Tuple
from ..types import Matrix, Point
def perspective_calculate(
imgwidth: int,
imgheight: int,
texwidth: int,
texheight: int,
transform: Matrix,
camera: Point,
focal_length: float,
) -> Tuple[Optional[Matrix], int, int, int, int]:
# Arbitrarily choose three points on the texture to create a pair of vectors
# so that we can interpolate backwards. This isn't as simple as inverting the
# view matrix like in affine compositing because dividing by Z makes the
# perspective transform non-linear. So instead we interpolate 1/Z, u/Z and
# v/Z since those ARE linear, and work backwards from there.
xy: List[Point] = []
uvz: Dict[Point, Point] = {}
for (texx, texy) in [
(0, 0),
(texwidth, 0),
(0, texheight),
# Include this just to get a good upper bounds for where the texture
# will be drawn.
(texwidth, texheight),
]:
imgloc = transform.multiply_point(Point(texx, texy))
distance = imgloc.z - camera.z
imgx = int(((imgloc.x - camera.x) * (focal_length / distance)) + camera.x)
imgy = int(((imgloc.y - camera.y) * (focal_length / distance)) + camera.y)
xy_point = Point(imgx, imgy)
xy.append(xy_point)
uvz[xy_point] = Point(
focal_length * texx / distance,
focal_length * texy / distance,
focal_length / distance,
)
# Calculate the maximum range of update this texture can possibly reside in.
minx = max(int(min(p.x for p in xy)), 0)
maxx = min(int(max(p.x for p in xy)) + 1, imgwidth)
miny = max(int(min(p.y for p in xy)), 0)
maxy = min(int(max(p.y for p in xy)) + 1, imgheight)
if maxx <= minx or maxy <= miny:
# This image is entirely off the screen!
return (None, minx, miny, maxx, maxy)
# Now that we have three points, construct a matrix that allows us to calculate
# what amount of each u/z, v/z and 1/z vector we need to interpolate values. The
# below matrix gives us an affine transform that will convert a point that's in
# the range 0, 0 to 1, 1 to a point inside the parallellogram that is made by
# projecting the two vectors we got from calculating the three texture points above.
xy_matrix = Matrix.affine(
a=xy[1].x - xy[0].x,
b=xy[1].y - xy[0].y,
c=xy[2].x - xy[0].x,
d=xy[2].y - xy[0].y,
tx=xy[0].x,
ty=xy[0].y,
)
# We invert that above, which gives us a matrix that can take screen space (imgx,
# imgy) and gives us instead those ratios, which allows us to then interpolate the
# u/z, v/z and 1/z values.
try:
xy_matrix = xy_matrix.inverse()
except ZeroDivisionError:
# This can't be inverted, so this shouldn't be displayed.
return (None, minx, miny, maxx, maxy)
# We construct a second matrix, which interpolates coordinates in the range of
# 0, 0 to 1, 1 and gives us back the u/z, v/z and 1/z values.
uvz_matrix = Matrix(
a11=uvz[xy[1]].x - uvz[xy[0]].x,
a12=uvz[xy[1]].y - uvz[xy[0]].y,
a13=uvz[xy[1]].z - uvz[xy[0]].z,
a21=uvz[xy[2]].x - uvz[xy[0]].x,
a22=uvz[xy[2]].y - uvz[xy[0]].y,
a23=uvz[xy[2]].z - uvz[xy[0]].z,
a31=0.0,
a32=0.0,
a33=0.0,
a41=uvz[xy[0]].x,
a42=uvz[xy[0]].y,
a43=uvz[xy[0]].z,
)
# Finally, we can combine the two matrixes to do the interpolation all at once.
inverse_matrix = xy_matrix.multiply(uvz_matrix)
return (inverse_matrix, minx, miny, maxx, maxy)