1
0
mirror of synced 2024-11-28 07:50:51 +01:00

Switch perspective engine to using existing blitter with correct perspective-based texture mapping.

This commit is contained in:
Jennifer Taylor 2021-08-05 17:32:00 +00:00
parent 75bc9f975c
commit 74838e698d
4 changed files with 557 additions and 298 deletions

View File

@ -1,9 +1,10 @@
import multiprocessing import multiprocessing
import signal import signal
from PIL import Image # type: ignore from PIL import Image # type: ignore
from typing import Any, List, Optional, Sequence, Union from typing import Any, Callable, List, Optional, Sequence, Union
from ..types import Color, Matrix, Point from ..types import Color, Matrix, Point
from .perspective import perspective_calculate
def clamp(color: float) -> int: def clamp(color: float) -> int:
@ -192,6 +193,218 @@ def blend_point(
return blend_normal(dest_color, src_color) return blend_normal(dest_color, src_color)
def pixel_renderer(
imgx: int,
imgy: int,
imgwidth: int,
texwidth: int,
texheight: int,
xscale: float,
yscale: float,
callback: Callable[[Point], Optional[Point]],
add_color: Color,
mult_color: Color,
blendfunc: int,
imgbytes: Union[bytes, bytearray],
texbytes: Union[bytes, bytearray],
maskbytes: Optional[Union[bytes, bytearray]],
enable_aa: bool,
) -> Sequence[int]:
# Determine offset
maskoff = imgx + (imgy * imgwidth)
imgoff = maskoff * 4
if maskbytes is not None and maskbytes[maskoff] == 0:
# This pixel is masked off!
return imgbytes[imgoff:(imgoff + 4)]
if enable_aa:
r = 0
g = 0
b = 0
a = 0
count = 0
denom = 0
# Essentially what we're doing here is calculating the scale, clamping it at 1.0 as the
# minimum and then setting the AA sample swing accordingly. This has the effect of anti-aliasing
# scaled up images a bit softer than would otherwise be achieved.
xswing = 0.5 * max(1.0, xscale)
yswing = 0.5 * max(1.0, yscale)
xpoints = [0.5 - xswing, 0.5 - (xswing / 2.0), 0.5, 0.5 + (xswing / 2.0), 0.5 + xswing]
ypoints = [0.5 - yswing, 0.5 - (yswing / 2.0), 0.5, 0.5 + (yswing / 2.0), 0.5 + yswing]
# First, figure out if we can use bilinear resampling.
bilinear = False
if xscale >= 1.0 and yscale >= 1.0:
aaloc = callback(Point(imgx + 0.5, imgy + 0.5))
if aaloc is not None:
aax, aay, _ = aaloc.as_tuple()
if not (aax <= 0 or aay <= 0 or aax >= (texwidth - 1) or aay >= (texheight - 1)):
bilinear = True
# Now perform the desired AA operation.
if bilinear:
# Calculate the pixel we're after, and what percentage into the pixel we are.
texloc = callback(Point(imgx + 0.5, imgy + 0.5))
if texloc is None:
raise Exception("Logic error!")
aax, aay, _ = texloc.as_tuple()
aaxrem = texloc.x - aax
aayrem = texloc.y - aay
# Find the four pixels that we can interpolate from. The first number is the x, and second is y.
tex00 = (aax + (aay * texwidth)) * 4
tex10 = tex00 + 4
tex01 = (aax + ((aay + 1) * texwidth)) * 4
tex11 = tex01 + 4
# Calculate various scaling factors based on alpha and percentage.
tex00percent = texbytes[tex00 + 3] / 255.0
tex10percent = texbytes[tex10 + 3] / 255.0
tex01percent = texbytes[tex01 + 3] / 255.0
tex11percent = texbytes[tex11 + 3] / 255.0
y0percent = (tex00percent * (1.0 - aaxrem)) + (tex10percent * aaxrem)
y1percent = (tex01percent * (1.0 - aaxrem)) + (tex11percent * aaxrem)
finalpercent = (y0percent * (1.0 - aayrem)) + (y1percent * aayrem)
if finalpercent <= 0.0:
# This pixel would be blank, so we avoid dividing by zero.
average = [255, 255, 255, 0]
else:
# Interpolate in the X direction on both Y axis.
y0r = ((texbytes[tex00] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10] * tex10percent * aaxrem))
y0g = ((texbytes[tex00 + 1] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10 + 1] * tex10percent * aaxrem))
y0b = ((texbytes[tex00 + 2] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10 + 2] * tex10percent * aaxrem))
y1r = ((texbytes[tex01] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11] * tex11percent * aaxrem))
y1g = ((texbytes[tex01 + 1] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11 + 1] * tex11percent * aaxrem))
y1b = ((texbytes[tex01 + 2] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11 + 2] * tex11percent * aaxrem))
# Now interpolate the Y direction to get the final pixel value.
average = [
int(((y0r * (1.0 - aayrem)) + (y1r * aayrem)) / finalpercent),
int(((y0g * (1.0 - aayrem)) + (y1g * aayrem)) / finalpercent),
int(((y0b * (1.0 - aayrem)) + (y1b * aayrem)) / finalpercent),
int(finalpercent * 255),
]
else:
for addy in ypoints:
for addx in xpoints:
texloc = callback(Point(imgx + addx, imgy + addy))
denom += 1
if texloc is None:
continue
aax, aay, _ = texloc.as_tuple()
# If we're out of bounds, don't update. Factor this in, however, so we can get partial
# transparency to the pixel that is already there.
if aax < 0 or aay < 0 or aax >= texwidth or aay >= texheight:
continue
# Grab the values to average, for SSAA. Make sure to factor in alpha as a poor-man's
# blend to ensure that partial transparency pixel values don't unnecessarily factor
# into average calculations.
texoff = (aax + (aay * texwidth)) * 4
# If this is a fully transparent pixel, the below formulas work out to adding nothing
# so we should skip this altogether.
if texbytes[texoff + 3] == 0:
continue
apercent = texbytes[texoff + 3] / 255.0
r += int(texbytes[texoff] * apercent)
g += int(texbytes[texoff + 1] * apercent)
b += int(texbytes[texoff + 2] * apercent)
a += texbytes[texoff + 3]
count += 1
if count == 0:
# None of the samples existed in-bounds.
return imgbytes[imgoff:(imgoff + 4)]
# Average the pixels. Make sure to divide out the alpha in preparation for blending.
alpha = a // denom
if alpha == 0:
average = [255, 255, 255, alpha]
else:
apercent = alpha / 255.0
average = [int((r / denom) / apercent), int((g / denom) / apercent), int((b / denom) / apercent), alpha]
# Finally, blend it with the destination.
return blend_point(add_color, mult_color, average, imgbytes[imgoff:(imgoff + 4)], blendfunc)
else:
# Calculate what texture pixel data goes here.
texloc = callback(Point(imgx + 0.5, imgy + 0.5))
if texloc is None:
return imgbytes[imgoff:(imgoff + 4)]
texx, texy, _ = texloc.as_tuple()
# If we're out of bounds, don't update.
if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight:
return imgbytes[imgoff:(imgoff + 4)]
# Blend it.
texoff = (texx + (texy * texwidth)) * 4
return blend_point(add_color, mult_color, texbytes[texoff:(texoff + 4)], imgbytes[imgoff:(imgoff + 4)], blendfunc)
def affine_line_renderer(
work: multiprocessing.Queue,
results: multiprocessing.Queue,
minx: int,
maxx: int,
imgwidth: int,
texwidth: int,
texheight: int,
inverse: Matrix,
add_color: Color,
mult_color: Color,
blendfunc: int,
imgbytes: Union[bytes, bytearray],
texbytes: Union[bytes, bytearray],
maskbytes: Optional[Union[bytes, bytearray]],
enable_aa: bool,
) -> None:
while True:
imgy = work.get()
if imgy is None:
return
rowbytes = bytearray(imgbytes[(imgy * imgwidth * 4):((imgy + 1) * imgwidth * 4)])
for imgx in range(imgwidth):
if imgx < minx or imgx >= maxx:
# No need to even consider this pixel.
continue
else:
# Blit new pixel into the correct range.
rowbytes[(imgx * 4):((imgx + 1) * 4)] = pixel_renderer(
imgx,
imgy,
imgwidth,
texwidth,
texheight,
1.0 / inverse.xscale,
1.0 / inverse.yscale,
lambda point: inverse.multiply_point(point),
add_color,
mult_color,
blendfunc,
imgbytes,
texbytes,
maskbytes,
enable_aa,
)
results.put((imgy, bytes(rowbytes)))
def affine_composite( def affine_composite(
img: Image.Image, img: Image.Image,
add_color: Color, add_color: Color,
@ -262,7 +475,9 @@ def affine_composite(
imgwidth, imgwidth,
texwidth, texwidth,
texheight, texheight,
inverse, 1.0 / inverse.xscale,
1.0 / inverse.yscale,
lambda point: inverse.multiply_point(point),
add_color, add_color,
mult_color, mult_color,
blendfunc, blendfunc,
@ -298,7 +513,7 @@ def affine_composite(
for _ in range(cores): for _ in range(cores):
proc = multiprocessing.Process( proc = multiprocessing.Process(
target=line_renderer, target=affine_line_renderer,
args=( args=(
work, work,
results, results,
@ -349,7 +564,7 @@ def affine_composite(
return img return img
def line_renderer( def perspective_line_renderer(
work: multiprocessing.Queue, work: multiprocessing.Queue,
results: multiprocessing.Queue, results: multiprocessing.Queue,
minx: int, minx: int,
@ -357,6 +572,8 @@ def line_renderer(
imgwidth: int, imgwidth: int,
texwidth: int, texwidth: int,
texheight: int, texheight: int,
xscale: float,
yscale: float,
inverse: Matrix, inverse: Matrix,
add_color: Color, add_color: Color,
mult_color: Color, mult_color: Color,
@ -366,6 +583,14 @@ def line_renderer(
maskbytes: Optional[Union[bytes, bytearray]], maskbytes: Optional[Union[bytes, bytearray]],
enable_aa: bool, enable_aa: bool,
) -> None: ) -> None:
def perspective_inverse(imgpoint: Point) -> Optional[Point]:
# Calculate the texture coordinate with our perspective interpolation.
texdiv = inverse.multiply_point(imgpoint)
if texdiv.z <= 0.0:
return None
return Point(texdiv.x / texdiv.z, texdiv.y / texdiv.z)
while True: while True:
imgy = work.get() imgy = work.get()
if imgy is None: if imgy is None:
@ -384,7 +609,9 @@ def line_renderer(
imgwidth, imgwidth,
texwidth, texwidth,
texheight, texheight,
inverse, xscale,
yscale,
perspective_inverse,
add_color, add_color,
mult_color, mult_color,
blendfunc, blendfunc,
@ -397,160 +624,6 @@ def line_renderer(
results.put((imgy, bytes(rowbytes))) results.put((imgy, bytes(rowbytes)))
def pixel_renderer(
imgx: int,
imgy: int,
imgwidth: int,
texwidth: int,
texheight: int,
inverse: Matrix,
add_color: Color,
mult_color: Color,
blendfunc: int,
imgbytes: Union[bytes, bytearray],
texbytes: Union[bytes, bytearray],
maskbytes: Optional[Union[bytes, bytearray]],
enable_aa: bool,
) -> Sequence[int]:
# Determine offset
maskoff = imgx + (imgy * imgwidth)
imgoff = maskoff * 4
if maskbytes is not None and maskbytes[maskoff] == 0:
# This pixel is masked off!
return imgbytes[imgoff:(imgoff + 4)]
if enable_aa:
r = 0
g = 0
b = 0
a = 0
count = 0
denom = 0
# Essentially what we're doing here is calculating the scale, clamping it at 1.0 as the
# minimum and then setting the AA sample swing accordingly. This has the effect of anti-aliasing
# scaled up images a bit softer than would otherwise be achieved.
xscale = 1.0 / inverse.xscale
yscale = 1.0 / inverse.yscale
# These are used for picking the various sample points for SSAA method below.
xswing = 0.5 * max(1.0, xscale)
yswing = 0.5 * max(1.0, yscale)
xpoints = [0.5 - xswing, 0.5 - (xswing / 2.0), 0.5, 0.5 + (xswing / 2.0), 0.5 + xswing]
ypoints = [0.5 - yswing, 0.5 - (yswing / 2.0), 0.5, 0.5 + (yswing / 2.0), 0.5 + yswing]
# First, figure out if we can use bilinear resampling.
bilinear = False
if xscale >= 1.0 and yscale >= 1.0:
aaloc = inverse.multiply_point(Point(imgx + 0.5, imgy + 0.5))
aax, aay, _ = aaloc.as_tuple()
if not (aax <= 0 or aay <= 0 or aax >= (texwidth - 1) or aay >= (texheight - 1)):
bilinear = True
# Now perform the desired AA operation.
if bilinear:
# Calculate the pixel we're after, and what percentage into the pixel we are.
texloc = inverse.multiply_point(Point(imgx + 0.5, imgy + 0.5))
aax, aay, _ = texloc.as_tuple()
aaxrem = texloc.x - aax
aayrem = texloc.y - aay
# Find the four pixels that we can interpolate from. The first number is the x, and second is y.
tex00 = (aax + (aay * texwidth)) * 4
tex10 = tex00 + 4
tex01 = (aax + ((aay + 1) * texwidth)) * 4
tex11 = tex01 + 4
# Calculate various scaling factors based on alpha and percentage.
tex00percent = texbytes[tex00 + 3] / 255.0
tex10percent = texbytes[tex10 + 3] / 255.0
tex01percent = texbytes[tex01 + 3] / 255.0
tex11percent = texbytes[tex11 + 3] / 255.0
y0percent = (tex00percent * (1.0 - aaxrem)) + (tex10percent * aaxrem)
y1percent = (tex01percent * (1.0 - aaxrem)) + (tex11percent * aaxrem)
finalpercent = (y0percent * (1.0 - aayrem)) + (y1percent * aayrem)
if finalpercent <= 0.0:
# This pixel would be blank, so we avoid dividing by zero.
average = [255, 255, 255, 0]
else:
# Interpolate in the X direction on both Y axis.
y0r = ((texbytes[tex00] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10] * tex10percent * aaxrem))
y0g = ((texbytes[tex00 + 1] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10 + 1] * tex10percent * aaxrem))
y0b = ((texbytes[tex00 + 2] * tex00percent * (1.0 - aaxrem)) + (texbytes[tex10 + 2] * tex10percent * aaxrem))
y1r = ((texbytes[tex01] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11] * tex11percent * aaxrem))
y1g = ((texbytes[tex01 + 1] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11 + 1] * tex11percent * aaxrem))
y1b = ((texbytes[tex01 + 2] * tex01percent * (1.0 - aaxrem)) + (texbytes[tex11 + 2] * tex11percent * aaxrem))
# Now interpolate the Y direction to get the final pixel value.
average = [
int(((y0r * (1.0 - aayrem)) + (y1r * aayrem)) / finalpercent),
int(((y0g * (1.0 - aayrem)) + (y1g * aayrem)) / finalpercent),
int(((y0b * (1.0 - aayrem)) + (y1b * aayrem)) / finalpercent),
int(finalpercent * 255),
]
else:
for addy in ypoints:
for addx in xpoints:
texloc = inverse.multiply_point(Point(imgx + addx, imgy + addy))
aax, aay, _ = texloc.as_tuple()
# If we're out of bounds, don't update. Factor this in, however, so we can get partial
# transparency to the pixel that is already there.
denom += 1
if aax < 0 or aay < 0 or aax >= texwidth or aay >= texheight:
continue
# Grab the values to average, for SSAA. Make sure to factor in alpha as a poor-man's
# blend to ensure that partial transparency pixel values don't unnecessarily factor
# into average calculations.
texoff = (aax + (aay * texwidth)) * 4
# If this is a fully transparent pixel, the below formulas work out to adding nothing
# so we should skip this altogether.
if texbytes[texoff + 3] == 0:
continue
apercent = texbytes[texoff + 3] / 255.0
r += int(texbytes[texoff] * apercent)
g += int(texbytes[texoff + 1] * apercent)
b += int(texbytes[texoff + 2] * apercent)
a += texbytes[texoff + 3]
count += 1
if count == 0:
# None of the samples existed in-bounds.
return imgbytes[imgoff:(imgoff + 4)]
# Average the pixels. Make sure to divide out the alpha in preparation for blending.
alpha = a // denom
if alpha == 0:
average = [255, 255, 255, alpha]
else:
apercent = alpha / 255.0
average = [int((r / denom) / apercent), int((g / denom) / apercent), int((b / denom) / apercent), alpha]
# Finally, blend it with the destination.
return blend_point(add_color, mult_color, average, imgbytes[imgoff:(imgoff + 4)], blendfunc)
else:
# Calculate what texture pixel data goes here.
texloc = inverse.multiply_point(Point(imgx + 0.5, imgy + 0.5))
texx, texy, _ = texloc.as_tuple()
# If we're out of bounds, don't update.
if texx < 0 or texy < 0 or texx >= texwidth or texy >= texheight:
return imgbytes[imgoff:(imgoff + 4)]
# Blend it.
texoff = (texx + (texy * texwidth)) * 4
return blend_point(add_color, mult_color, texbytes[texoff:(texoff + 4)], imgbytes[imgoff:(imgoff + 4)], blendfunc)
def perspective_composite( def perspective_composite(
img: Image.Image, img: Image.Image,
add_color: Color, add_color: Color,
@ -576,6 +649,12 @@ def perspective_composite(
texwidth = texture.width texwidth = texture.width
texheight = texture.height texheight = texture.height
# Get the perspective-correct inverse matrix for looking up texture coordinates.
inverse_matrix, minx, miny, maxx, maxy = perspective_calculate(imgwidth, imgheight, texwidth, texheight, transform, camera, focal_length)
if inverse_matrix is None:
# This texture is entirely off of the screen.
return img
# Get the data in an easier to manipulate and faster to update fashion. # Get the data in an easier to manipulate and faster to update fashion.
imgbytes = bytearray(img.tobytes('raw', 'RGBA')) imgbytes = bytearray(img.tobytes('raw', 'RGBA'))
texbytes = texture.tobytes('raw', 'RGBA') texbytes = texture.tobytes('raw', 'RGBA')
@ -585,29 +664,122 @@ def perspective_composite(
else: else:
maskbytes = None maskbytes = None
for texy in range(texheight): def perspective_inverse(imgpoint: Point) -> Optional[Point]:
for texx in range(texwidth): # Calculate the texture coordinate with our perspective interpolation.
# Calculate perspective projection. texdiv = inverse_matrix.multiply_point(imgpoint)
imgloc = transform.multiply_point(Point(texx, texy)) if texdiv.z <= 0.0:
perspective = focal_length / (imgloc.z - camera.z) return None
imgx = int(((imgloc.x - camera.x) * perspective) + camera.x)
imgy = int(((imgloc.y - camera.y) * perspective) + camera.y)
# Check clipping. return Point(texdiv.x / texdiv.z, texdiv.y / texdiv.z)
if imgx < 0 or imgx >= imgwidth:
continue
if imgy < 0 or imgy >= imgheight:
continue
# Check mask rectangle. cores = multiprocessing.cpu_count()
maskoff = imgx + (imgy * imgwidth) if single_threaded or cores < 2:
imgoff = maskoff * 4 # Get the data in an easier to manipulate and faster to update fashion.
if maskbytes is not None and maskbytes[maskoff] == 0: imgbytes = bytearray(img.tobytes('raw', 'RGBA'))
continue texbytes = texture.tobytes('raw', 'RGBA')
if mask:
alpha = mask.split()[-1]
maskbytes = alpha.tobytes('raw', 'L')
else:
maskbytes = None
# Blend it. # We don't have enough CPU cores to bother multiprocessing.
texoff = (texx + (texy * texwidth)) * 4 for imgy in range(miny, maxy):
imgbytes[imgoff:(imgoff + 4)] = blend_point(add_color, mult_color, texbytes[texoff:(texoff + 4)], imgbytes[imgoff:(imgoff + 4)], blendfunc) for imgx in range(minx, maxx):
# Determine offset
imgoff = (imgx + (imgy * imgwidth)) * 4
imgbytes[imgoff:(imgoff + 4)] = pixel_renderer(
imgx,
imgy,
imgwidth,
texwidth,
texheight,
transform.xscale,
transform.yscale,
perspective_inverse,
add_color,
mult_color,
blendfunc,
imgbytes,
texbytes,
maskbytes,
enable_aa,
)
img = Image.frombytes('RGBA', (imgwidth, imgheight), bytes(imgbytes)) img = Image.frombytes('RGBA', (imgwidth, imgheight), bytes(imgbytes))
else:
imgbytes = img.tobytes('raw', 'RGBA')
texbytes = texture.tobytes('raw', 'RGBA')
if mask:
alpha = mask.split()[-1]
maskbytes = alpha.tobytes('raw', 'L')
else:
maskbytes = None
# Let's spread the load across multiple processors.
procs: List[multiprocessing.Process] = []
work: multiprocessing.Queue = multiprocessing.Queue()
results: multiprocessing.Queue = multiprocessing.Queue()
expected: int = 0
interrupted: bool = False
def ctrlc(sig: Any, frame: Any) -> None:
nonlocal interrupted
interrupted = True
previous_handler = signal.getsignal(signal.SIGINT)
signal.signal(signal.SIGINT, ctrlc)
for _ in range(cores):
proc = multiprocessing.Process(
target=perspective_line_renderer,
args=(
work,
results,
minx,
maxx,
imgwidth,
texwidth,
texheight,
transform.xscale,
transform.yscale,
inverse_matrix,
add_color,
mult_color,
blendfunc,
imgbytes,
texbytes,
maskbytes,
enable_aa,
),
)
procs.append(proc)
proc.start()
for imgy in range(miny, maxy):
work.put(imgy)
expected += 1
lines: List[bytes] = [
imgbytes[x:(x + (imgwidth * 4))]
for x in range(
0,
imgwidth * imgheight * 4,
imgwidth * 4,
)
]
for _ in range(expected):
imgy, result = results.get()
lines[imgy] = result
for _proc in procs:
work.put(None)
for proc in procs:
proc.join()
signal.signal(signal.SIGINT, previous_handler)
if interrupted:
raise KeyboardInterrupt()
img = Image.frombytes('RGBA', (imgwidth, imgheight), b''.join(lines))
return img return img

View File

@ -3,6 +3,7 @@ from PIL import Image # type: ignore
from typing import Optional, Tuple from typing import Optional, Tuple
from ..types import Color, Matrix, Point from ..types import Color, Matrix, Point
from .perspective import perspective_calculate
cdef extern struct floatcolor_t: cdef extern struct floatcolor_t:
float r; float r;
@ -24,9 +25,9 @@ cdef extern struct matrix_t:
float a42; float a42;
float a43; float a43;
cdef extern int affine_composite_fast( cdef extern int composite_fast(
unsigned char *imgdata, unsigned char *imgbytes,
unsigned char *maskdata, unsigned char *maskbytes,
unsigned int imgwidth, unsigned int imgwidth,
unsigned int imgheight, unsigned int imgheight,
unsigned int minx, unsigned int minx,
@ -35,27 +36,10 @@ cdef extern int affine_composite_fast(
unsigned int maxy, unsigned int maxy,
floatcolor_t add_color, floatcolor_t add_color,
floatcolor_t mult_color, floatcolor_t mult_color,
float xscale,
float yscale,
matrix_t inverse, matrix_t inverse,
int blendfunc, int use_perspective,
unsigned char *texdata,
unsigned int texwidth,
unsigned int texheight,
unsigned int threads,
unsigned int enable_aa,
)
cdef extern int perspective_composite_fast(
unsigned char *imgbytes,
unsigned char *maskbytes,
unsigned int imgwidth,
unsigned int imgheight,
float camera_x,
float camera_y,
float camera_z,
float focal_length,
floatcolor_t add_color,
floatcolor_t mult_color,
matrix_t transform,
int blendfunc, int blendfunc,
unsigned char *texbytes, unsigned char *texbytes,
unsigned int texwidth, unsigned int texwidth,
@ -137,7 +121,7 @@ def affine_composite(
cdef unsigned int threads = 1 if single_threaded else multiprocessing.cpu_count() cdef unsigned int threads = 1 if single_threaded else multiprocessing.cpu_count()
# Call the C++ function. # Call the C++ function.
errors = affine_composite_fast( errors = composite_fast(
imgbytes, imgbytes,
maskbytes, maskbytes,
imgwidth, imgwidth,
@ -148,7 +132,10 @@ def affine_composite(
maxy, maxy,
c_addcolor, c_addcolor,
c_multcolor, c_multcolor,
transform.xscale,
transform.yscale,
c_inverse, c_inverse,
0,
blendfunc, blendfunc,
texbytes, texbytes,
texwidth, texwidth,
@ -190,6 +177,12 @@ def perspective_composite(
texwidth = texture.width texwidth = texture.width
texheight = texture.height texheight = texture.height
# Get the perspective-correct inverse matrix for looking up texture coordinates.
inverse_matrix, minx, miny, maxx, maxy = perspective_calculate(imgwidth, imgheight, texwidth, texheight, transform, camera, focal_length)
if inverse_matrix is None:
# This texture is entirely off of the screen.
return img
# Grab the raw image data. # Grab the raw image data.
imgbytes = img.tobytes('raw', 'RGBA') imgbytes = img.tobytes('raw', 'RGBA')
texbytes = texture.tobytes('raw', 'RGBA') texbytes = texture.tobytes('raw', 'RGBA')
@ -207,27 +200,30 @@ def perspective_composite(
# Convert classes to C structs. # Convert classes to C structs.
cdef floatcolor_t c_addcolor = floatcolor_t(r=add_color.r, g=add_color.g, b=add_color.b, a=add_color.a) cdef floatcolor_t c_addcolor = floatcolor_t(r=add_color.r, g=add_color.g, b=add_color.b, a=add_color.a)
cdef floatcolor_t c_multcolor = floatcolor_t(r=mult_color.r, g=mult_color.g, b=mult_color.b, a=mult_color.a) cdef floatcolor_t c_multcolor = floatcolor_t(r=mult_color.r, g=mult_color.g, b=mult_color.b, a=mult_color.a)
cdef matrix_t c_transform = matrix_t( cdef matrix_t c_inverse = matrix_t(
a11=transform.a11, a12=transform.a12, a13=transform.a13, a11=inverse_matrix.a11, a12=inverse_matrix.a12, a13=inverse_matrix.a13,
a21=transform.a21, a22=transform.a22, a23=transform.a23, a21=inverse_matrix.a21, a22=inverse_matrix.a22, a23=inverse_matrix.a23,
a31=transform.a31, a32=transform.a32, a33=transform.a33, a31=inverse_matrix.a31, a32=inverse_matrix.a32, a33=inverse_matrix.a33,
a41=transform.a41, a42=transform.a42, a43=transform.a43, a41=inverse_matrix.a41, a42=inverse_matrix.a42, a43=inverse_matrix.a43,
) )
cdef unsigned int threads = 1 if single_threaded else multiprocessing.cpu_count() cdef unsigned int threads = 1 if single_threaded else multiprocessing.cpu_count()
# Call the C++ function. # Call the C++ function.
errors = perspective_composite_fast( errors = composite_fast(
imgbytes, imgbytes,
maskbytes, maskbytes,
imgwidth, imgwidth,
imgheight, imgheight,
camera.x, minx,
camera.y, maxx,
camera.z, miny,
focal_length, maxy,
c_addcolor, c_addcolor,
c_multcolor, c_multcolor,
c_transform, transform.xscale,
transform.yscale,
c_inverse,
1,
blendfunc, blendfunc,
texbytes, texbytes,
texwidth, texwidth,

View File

@ -56,14 +56,6 @@ extern "C"
(a13 * point.x) + (a23 * point.y) + (a33 * point.z) + a43, (a13 * point.x) + (a23 * point.y) + (a33 * point.z) + a43,
}; };
} }
float xscale() {
return sqrt((a11 * a11) + (a12 * a12) + (a13 * a13));
}
float yscale() {
return sqrt((a21 * a21) + (a22 * a22) + (a23 * a23));
}
} matrix_t; } matrix_t;
typedef struct work { typedef struct work {
@ -77,7 +69,10 @@ extern "C"
intcolor_t *texdata; intcolor_t *texdata;
unsigned int texwidth; unsigned int texwidth;
unsigned int texheight; unsigned int texheight;
float xscale;
float yscale;
matrix_t inverse; matrix_t inverse;
int use_perspective;
floatcolor_t add_color; floatcolor_t add_color;
floatcolor_t mult_color; floatcolor_t mult_color;
int blendfunc; int blendfunc;
@ -270,12 +265,8 @@ extern "C"
// costs us almost nothing. Essentially what we're doing here is calculating the scale, clamping it at 1.0 as the // costs us almost nothing. Essentially what we're doing here is calculating the scale, clamping it at 1.0 as the
// minimum and then setting the AA sample swing accordingly. This has the effect of anti-aliasing scaled up images // minimum and then setting the AA sample swing accordingly. This has the effect of anti-aliasing scaled up images
// a bit softer than would otherwise be achieved. // a bit softer than would otherwise be achieved.
float xscale = 1.0 / work->inverse.xscale(); float xswing = 0.5 * fmax(1.0, work->xscale);
float yscale = 1.0 / work->inverse.yscale(); float yswing = 0.5 * fmax(1.0, work->yscale);
// These are used for picking the various sample points for SSAA method below.
float xswing = 0.5 * fmax(1.0, xscale);
float yswing = 0.5 * fmax(1.0, yscale);
for (unsigned int imgy = work->miny; imgy < work->maxy; imgy++) { for (unsigned int imgy = work->miny; imgy < work->maxy; imgy++) {
for (unsigned int imgx = work->minx; imgx < work->maxx; imgx++) { for (unsigned int imgx = work->minx; imgx < work->maxx; imgx++) {
@ -300,10 +291,21 @@ extern "C"
// First, figure out if we can use bilinear resampling. // First, figure out if we can use bilinear resampling.
int bilinear = 0; int bilinear = 0;
if (xscale >= 1.0 && yscale >= 1.0) { if (work->xscale >= 1.0 && work->yscale >= 1.0) {
point_t aaloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)}); int aax = -1;
int aax = aaloc.x; int aay = -1;
int aay = aaloc.y;
if (work->use_perspective) {
point_t aaloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)});
if (aaloc.z > 0.0) {
aax = aaloc.x / aaloc.z;
aay = aaloc.y / aaloc.z;
}
} else {
point_t aaloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)});
aax = aaloc.x;
aay = aaloc.y;
}
if (!(aax <= 0 || aay <= 0 || aax >= ((int)work->texwidth - 1) || aay >= ((int)work->texheight - 1))) { if (!(aax <= 0 || aay <= 0 || aax >= ((int)work->texwidth - 1) || aay >= ((int)work->texheight - 1))) {
bilinear = 1; bilinear = 1;
@ -314,11 +316,28 @@ extern "C"
intcolor_t average; intcolor_t average;
if (bilinear) { if (bilinear) {
// Calculate the pixel we're after, and what percentage into the pixel we are. // Calculate the pixel we're after, and what percentage into the pixel we are.
point_t texloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)}); int aax;
int aax = texloc.x; int aay;
int aay = texloc.y; float aaxrem;
float aaxrem = texloc.x - (float)aax; float aayrem;
float aayrem = texloc.y - (float)aay;
if (work->use_perspective) {
// We don't check for negative here, because we already checked it above and wouldn't
// have enabled bilinear interpoliation.
point_t texloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)});
float fx = texloc.x / texloc.z;
float fy = texloc.y / texloc.z;
aax = fx;
aay = fy;
aaxrem = fx - (float)aax;
aayrem = fy - (float)aay;
} else {
point_t texloc = work->inverse.multiply_point((point_t){(float)(imgx + 0.5), (float)(imgy + 0.5)});
aax = texloc.x;
aay = texloc.y;
aaxrem = texloc.x - (float)aax;
aayrem = texloc.y - (float)aay;
}
// Find the four pixels that we can interpolate from. The first number is the x, and second is y. // Find the four pixels that we can interpolate from. The first number is the x, and second is y.
unsigned int tex00 = aax + (aay * work->texwidth); unsigned int tex00 = aax + (aay * work->texwidth);
@ -366,9 +385,20 @@ extern "C"
} else { } else {
for (float addy = 0.5 - yswing; addy <= 0.5 + yswing; addy += yswing / 2.0) { for (float addy = 0.5 - yswing; addy <= 0.5 + yswing; addy += yswing / 2.0) {
for (float addx = 0.5 - xswing; addx <= 0.5 + xswing; addx += xswing / 2.0) { for (float addx = 0.5 - xswing; addx <= 0.5 + xswing; addx += xswing / 2.0) {
point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + addx, (float)imgy + addy}); int aax = -1;
int aax = texloc.x; int aay = -1;
int aay = texloc.y;
if (work->use_perspective) {
point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + addx, (float)imgy + addy});
if (texloc.z > 0.0) {
aax = texloc.x / texloc.z;
aay = texloc.y / texloc.z;
}
} else {
point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + addx, (float)imgy + addy});
aax = texloc.x;
aay = texloc.y;
}
// If we're out of bounds, don't update. Factor this in, however, so we can get partial // If we're out of bounds, don't update. Factor this in, however, so we can get partial
// transparency to the pixel that is already there. // transparency to the pixel that is already there.
@ -429,9 +459,20 @@ extern "C"
work->imgdata[imgoff] = blend_point(work->add_color, work->mult_color, average, work->imgdata[imgoff], work->blendfunc); work->imgdata[imgoff] = blend_point(work->add_color, work->mult_color, average, work->imgdata[imgoff], work->blendfunc);
} else { } else {
// Grab the center of the pixel to get the color. // Grab the center of the pixel to get the color.
point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + (float)0.5, (float)imgy + (float)0.5}); int texx = -1;
int texx = texloc.x; int texy = -1;
int texy = texloc.y;
if (work->use_perspective) {
point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + (float)0.5, (float)imgy + (float)0.5});
if (texloc.z > 0.0) {
texx = texloc.x / texloc.z;
texy = texloc.y / texloc.z;
}
} else {
point_t texloc = work->inverse.multiply_point((point_t){(float)imgx + (float)0.5, (float)imgy + (float)0.5});
texx = texloc.x;
texy = texloc.y;
}
// If we're out of bounds, don't update. // If we're out of bounds, don't update.
if (texx < 0 || texy < 0 || texx >= (int)work->texwidth || texy >= (int)work->texheight) { if (texx < 0 || texy < 0 || texx >= (int)work->texwidth || texy >= (int)work->texheight) {
@ -452,7 +493,7 @@ extern "C"
return NULL; return NULL;
} }
int affine_composite_fast( int composite_fast(
unsigned char *imgbytes, unsigned char *imgbytes,
unsigned char *maskbytes, unsigned char *maskbytes,
unsigned int imgwidth, unsigned int imgwidth,
@ -463,7 +504,10 @@ extern "C"
unsigned int maxy, unsigned int maxy,
floatcolor_t add_color, floatcolor_t add_color,
floatcolor_t mult_color, floatcolor_t mult_color,
float xscale,
float yscale,
matrix_t inverse, matrix_t inverse,
int use_perspective,
int blendfunc, int blendfunc,
unsigned char *texbytes, unsigned char *texbytes,
unsigned int texwidth, unsigned int texwidth,
@ -488,11 +532,14 @@ extern "C"
work.texdata = texdata; work.texdata = texdata;
work.texwidth = texwidth; work.texwidth = texwidth;
work.texheight = texheight; work.texheight = texheight;
work.xscale = xscale;
work.yscale = yscale;
work.inverse = inverse; work.inverse = inverse;
work.add_color = add_color; work.add_color = add_color;
work.mult_color = mult_color; work.mult_color = mult_color;
work.blendfunc = blendfunc; work.blendfunc = blendfunc;
work.enable_aa = enable_aa; work.enable_aa = enable_aa;
work.use_perspective = use_perspective;
chunk_composite_fast(&work); chunk_composite_fast(&work);
} else { } else {
@ -531,12 +578,15 @@ extern "C"
work->texdata = texdata; work->texdata = texdata;
work->texwidth = texwidth; work->texwidth = texwidth;
work->texheight = texheight; work->texheight = texheight;
work->xscale = xscale;
work->yscale = yscale;
work->inverse = inverse; work->inverse = inverse;
work->add_color = add_color; work->add_color = add_color;
work->mult_color = mult_color; work->mult_color = mult_color;
work->blendfunc = blendfunc; work->blendfunc = blendfunc;
work->thread = thread; work->thread = thread;
work->enable_aa = enable_aa; work->enable_aa = enable_aa;
work->use_perspective = use_perspective;
if (me) if (me)
{ {
@ -584,58 +634,4 @@ extern "C"
return 0; return 0;
} }
int perspective_composite_fast(
unsigned char *imgbytes,
unsigned char *maskbytes,
unsigned int imgwidth,
unsigned int imgheight,
float camera_x,
float camera_y,
float camera_z,
float focal_length,
floatcolor_t add_color,
floatcolor_t mult_color,
matrix_t transform,
int blendfunc,
unsigned char *texbytes,
unsigned int texwidth,
unsigned int texheight,
unsigned int threads,
unsigned int enable_aa
) {
// Cast to a usable type.
intcolor_t *imgdata = (intcolor_t *)imgbytes;
intcolor_t *texdata = (intcolor_t *)texbytes;
for (unsigned int texy = 0; texy < texheight; texy++) {
for (unsigned int texx = 0; texx < texwidth; texx++) {
// Calculate perspective projection.
point_t imgloc = transform.multiply_point((point_t){(float)texx, (float)texy});
float perspective = focal_length / (imgloc.z - camera_z);
int imgx = ((imgloc.x - camera_x) * perspective) + camera_x;
int imgy = ((imgloc.y - camera_y) * perspective) + camera_y;
// Check clipping.
if (imgx < 0 || imgx >= (int)imgwidth) {
continue;
}
if (imgy < 0 || imgy >= (int)imgheight) {
continue;
}
// Check mask rectangle.
unsigned int imgoff = imgx + (imgy * imgwidth);
if (maskbytes != NULL && maskbytes[imgoff] == 0) {
continue;
}
// Blend it.
unsigned int texoff = (texx + (texy * texwidth));
imgdata[imgoff] = blend_point(add_color, mult_color, texdata[texoff], imgdata[imgoff], blendfunc);
}
}
return 0;
}
} }

View File

@ -0,0 +1,95 @@
from typing import Dict, List, Optional, Tuple
from ..types import Matrix, Point
def perspective_calculate(
imgwidth: int,
imgheight: int,
texwidth: int,
texheight: int,
transform: Matrix,
camera: Point,
focal_length: float,
) -> Tuple[Optional[Matrix], int, int, int, int]:
# Arbitrarily choose three points on the texture to create a pair of vectors
# so that we can interpolate backwards. This isn't as simple as inverting the
# view matrix like in affine compositing because dividing by Z makes the
# perspective transform non-linear. So instead we interpolate 1/Z, u/Z and
# v/Z since those ARE linear, and work backwards from there.
xy: List[Point] = []
uvz: Dict[Point, Point] = {}
for (texx, texy) in [
(0, 0),
(texwidth, 0),
(0, texheight),
# Include this just to get a good upper bounds for where the texture
# will be drawn.
(texwidth, texheight),
]:
imgloc = transform.multiply_point(Point(texx, texy))
distance = imgloc.z - camera.z
imgx = int(((imgloc.x - camera.x) * (focal_length / distance)) + camera.x)
imgy = int(((imgloc.y - camera.y) * (focal_length / distance)) + camera.y)
xy_point = Point(imgx, imgy)
xy.append(xy_point)
uvz[xy_point] = Point(
focal_length * texx / distance,
focal_length * texy / distance,
focal_length / distance,
)
# Calculate the maximum range of update this texture can possibly reside in.
minx = max(int(min(p.x for p in xy)), 0)
maxx = min(int(max(p.x for p in xy)) + 1, imgwidth)
miny = max(int(min(p.y for p in xy)), 0)
maxy = min(int(max(p.y for p in xy)) + 1, imgheight)
if maxx <= minx or maxy <= miny:
# This image is entirely off the screen!
return (None, minx, miny, maxx, maxy)
# Now that we have three points, construct a matrix that allows us to calculate
# what amount of each u/z, v/z and 1/z vector we need to interpolate values. The
# below matrix gives us an affine transform that will convert a point that's in
# the range 0, 0 to 1, 1 to a point inside the parallellogram that is made by
# projecting the two vectors we got from calculating the three texture points above.
xy_matrix = Matrix.affine(
a=xy[1].x - xy[0].x,
b=xy[1].y - xy[0].y,
c=xy[2].x - xy[0].x,
d=xy[2].y - xy[0].y,
tx=xy[0].x,
ty=xy[0].y,
)
# We invert that above, which gives us a matrix that can take screen space (imgx,
# imgy) and gives us instead those ratios, which allows us to then interpolate the
# u/z, v/z and 1/z values.
try:
xy_matrix = xy_matrix.inverse()
except ZeroDivisionError:
# This can't be inverted, so this shouldn't be displayed.
return (None, minx, miny, maxx, maxy)
# We construct a second matrix, which interpolates coordinates in the range of
# 0, 0 to 1, 1 and gives us back the u/z, v/z and 1/z values.
uvz_matrix = Matrix(
a11=uvz[xy[1]].x - uvz[xy[0]].x,
a12=uvz[xy[1]].y - uvz[xy[0]].y,
a13=uvz[xy[1]].z - uvz[xy[0]].z,
a21=uvz[xy[2]].x - uvz[xy[0]].x,
a22=uvz[xy[2]].y - uvz[xy[0]].y,
a23=uvz[xy[2]].z - uvz[xy[0]].z,
a31=0.0,
a32=0.0,
a33=0.0,
a41=uvz[xy[0]].x,
a42=uvz[xy[0]].y,
a43=uvz[xy[0]].z,
)
# Finally, we can combine the two matrixes to do the interpolation all at once.
inverse_matrix = xy_matrix.multiply(uvz_matrix)
return (inverse_matrix, minx, miny, maxx, maxy)