early-access version 2167

This commit is contained in:
pineappleEA 2021-10-30 03:43:57 +02:00
parent 5df35c4982
commit b46aaf17ee
12 changed files with 85 additions and 76 deletions

View File

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 2166.
This is the source code for early-access 2167.
## Legal Notice

View File

@ -106,30 +106,28 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
auto& event = events_interface.events[event_id];
auto& gpu = system.GPU();
// This is mostly to take into account unimplemented features. As synced
// gpu is always synced.
if (!gpu.IsAsync()) {
event.event->GetWritableEvent().Signal();
return NvResult::Success;
}
const u32 current_syncpoint_value = event.fence.value;
const s32 diff = current_syncpoint_value - params.threshold;
if (diff >= 0) {
event.event->GetWritableEvent().Signal();
params.value = current_syncpoint_value;
std::memcpy(output.data(), &params, sizeof(params));
events_interface.failed[event_id] = false;
return NvResult::Success;
}
const u32 target_value = current_syncpoint_value - diff;
const u32 target_value = params.value;
if (!is_async) {
params.value = 0;
}
if (params.timeout == 0) {
const auto check_failing = [&]() {
if (events_interface.failed[event_id]) {
gpu.WaitFence(params.syncpt_id, target_value);
std::memcpy(output.data(), &params, sizeof(params));
events_interface.failed[event_id] = false;
return true;
}
return false;
};
if (params.timeout == 0) {
if (check_failing()) {
return NvResult::Success;
}
return NvResult::Timeout;
}
@ -148,15 +146,7 @@ NvResult nvhost_ctrl::IocCtrlEventWait(const std::vector<u8>& input, std::vector
params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000;
}
params.value |= event_id;
event.event->GetWritableEvent().Clear();
if (events_interface.failed[event_id]) {
{
auto lk = system.StallCPU();
gpu.WaitFence(params.syncpt_id, target_value);
system.UnstallCPU();
}
std::memcpy(output.data(), &params, sizeof(params));
events_interface.failed[event_id] = false;
if (check_failing()) {
return NvResult::Success;
}
gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value);

View File

@ -30,7 +30,7 @@ namespace {
return false;
}
void VisitMark(const IR::Inst& inst) {
void VisitMark(IR::Block& block, IR::Inst& inst) {
switch (inst.GetOpcode()) {
case IR::Opcode::ShuffleIndex:
case IR::Opcode::ShuffleUp:
@ -49,19 +49,30 @@ void VisitMark(const IR::Inst& inst) {
break;
}
IR::Inst* const bitcast_inst{bitcast_arg.InstRecursive()};
bool must_patch_outside = false;
if (bitcast_inst->GetOpcode() == IR::Opcode::GetAttribute) {
const IR::Attribute attr{bitcast_inst->Arg(0).Attribute()};
switch (attr) {
case IR::Attribute::PositionX:
case IR::Attribute::PositionY:
bitcast_inst->SetFlags<u32>(0xDEADBEEF);
must_patch_outside = true;
break;
default:
break;
}
}
if (must_patch_outside) {
const auto it{IR::Block::InstructionList::s_iterator_to(inst)};
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::F32 new_inst{&*block.PrependNewInst(it, inst)};
const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())};
const IR::Value converted{ir.FPMul(new_inst, up_factor)};
inst.ReplaceUsesWith(converted);
}
break;
}
default:
break;
}
@ -75,6 +86,14 @@ void PatchFragCoord(IR::Block& block, IR::Inst& inst) {
inst.ReplaceUsesWith(downscaled_frag_coord);
}
void PatchPointSize(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::F32 point_value{inst.Arg(1)};
const IR::F32 up_factor{ir.FPRecip(ir.ResolutionDownFactor())};
const IR::F32 upscaled_point_value{ir.FPMul(point_value, up_factor)};
inst.SetArg(1, upscaled_point_value);
}
[[nodiscard]] IR::U32 Scale(IR::IREmitter& ir, const IR::U1& is_scaled, const IR::U32& value) {
IR::U32 scaled_value{value};
if (const u32 up_scale = Settings::values.resolution_info.up_scale; up_scale != 1) {
@ -253,6 +272,19 @@ void Visit(const IR::Program& program, IR::Block& block, IR::Inst& inst) {
}
break;
}
case IR::Opcode::SetAttribute: {
const IR::Attribute attr{inst.Arg(0).Attribute()};
switch (attr) {
case IR::Attribute::PointSize:
if (inst.Flags<u32>() != 0xDEADBEEF) {
PatchPointSize(block, inst);
}
break;
default:
break;
}
break;
}
case IR::Opcode::ImageQueryDimensions:
PatchImageQueryDimensions(block, inst);
break;
@ -281,7 +313,7 @@ void RescalingPass(IR::Program& program) {
if (is_fragment_shader) {
for (IR::Block* const block : program.post_order_blocks) {
for (IR::Inst& inst : block->Instructions()) {
VisitMark(inst);
VisitMark(*block, inst);
}
}
}

View File

@ -63,8 +63,8 @@ void main() {
// TODO(Blinkhawk): This code can be optimized through shader group instructions.
vec3 horizontal = blurHorizontal(color_texture, frag_tex_coord, tex_offset).rgb;
vec3 vertical = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb;
vec3 diagonalA = blurVertical(color_texture, frag_tex_coord, tex_offset).rgb;
vec3 diagonalB = blurVertical(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb;
vec3 diagonalA = blurDiagonal(color_texture, frag_tex_coord, tex_offset).rgb;
vec3 diagonalB = blurDiagonal(color_texture, frag_tex_coord, tex_offset * vec2(1.0, -1.0)).rgb;
vec3 combination = mix(mix(horizontal, vertical, 0.5f), mix(diagonalA, diagonalB, 0.5f), 0.5f);
color = vec4(combination + base, 1.0f);
}

View File

@ -978,8 +978,9 @@ void RasterizerOpenGL::SyncPointState() {
oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable);
glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
const bool is_rescaling{texture_cache.IsRescaling()};
const float scale = is_rescaling ? Settings::values.resolution_info.up_factor : 1.0f;
glPointSize(std::max(1.0f, maxwell3d.regs.point_size * scale));
}
void RasterizerOpenGL::SyncLineState() {

View File

@ -1201,7 +1201,14 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const TSCEntry& config) {
glSamplerParameterfv(handle, GL_TEXTURE_BORDER_COLOR, config.BorderColor().data());
if (GLAD_GL_ARB_texture_filter_anisotropic || GLAD_GL_EXT_texture_filter_anisotropic) {
glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, config.MaxAnisotropy());
const f32 setting_anisotropic =
static_cast<f32>(1U << Settings::values.max_anisotropy.GetValue());
const f32 game_anisotropic = std::clamp(config.MaxAnisotropy(), 1.0f, 16.0f);
const bool aument_anisotropic =
game_anisotropic > 1.0f || config.mipmap_filter == TextureMipmapFilter::Linear;
const f32 max_anisotropy =
aument_anisotropic ? std::max(game_anisotropic, setting_anisotropic) : game_anisotropic;
glSamplerParameterf(handle, GL_TEXTURE_MAX_ANISOTROPY, max_anisotropy);
} else {
LOG_WARNING(Render_OpenGL, "GL_ARB_texture_filter_anisotropic is required");
}

View File

@ -1448,7 +1448,14 @@ Sampler::Sampler(TextureCacheRuntime& runtime, const Tegra::Texture::TSCEntry& t
LOG_WARNING(Render_Vulkan, "VK_EXT_sampler_filter_minmax is required");
}
// Some games have samplers with garbage. Sanitize them here.
const float max_anisotropy = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f);
const f32 setting_anisotropic =
static_cast<f32>(1U << Settings::values.max_anisotropy.GetValue());
const f32 game_anisotropic = std::clamp(tsc.MaxAnisotropy(), 1.0f, 16.0f);
const bool aument_anisotropic =
game_anisotropic > 1.0f || tsc.mipmap_filter == TextureMipmapFilter::Linear;
const f32 max_anisotropy =
aument_anisotropic ? std::max(game_anisotropic, setting_anisotropic) : game_anisotropic;
sampler = device.GetLogical().CreateSampler(VkSamplerCreateInfo{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
.pNext = pnext,

View File

@ -38,7 +38,6 @@ enum class ImageFlagBits : u32 {
Rescaled = 1 << 12,
CheckingRescalable = 1 << 13,
IsRescalable = 1 << 14,
Blacklisted = 1 << 15,
};
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)

View File

@ -16,6 +16,7 @@ namespace VideoCommon {
using Tegra::Texture::TextureType;
using Tegra::Texture::TICEntry;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceType;
ImageInfo::ImageInfo(const TICEntry& config) noexcept {
format = PixelFormatFromTextureInfo(config.format, config.r_type, config.g_type, config.b_type,
@ -102,6 +103,7 @@ ImageInfo::ImageInfo(const TICEntry& config) noexcept {
layer_stride = CalculateLayerStride(*this);
maybe_unaligned_layer_stride = CalculateLayerSize(*this);
rescaleable &= (block.depth == 0) && resources.levels == 1;
rescaleable &= size.height > 256 || GetFormatType(format) != SurfaceType::ColorTexture;
downscaleable = size.height > 512;
}
}
@ -135,7 +137,8 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index)
type = ImageType::e3D;
size.depth = rt.depth;
} else {
rescaleable = block.depth == 0 && size.height > 256;
rescaleable = block.depth == 0;
rescaleable &= size.height > 256;
downscaleable = size.height > 512;
type = ImageType::e2D;
resources.layers = rt.depth;
@ -165,7 +168,7 @@ ImageInfo::ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept {
type = ImageType::e3D;
size.depth = regs.zeta_depth;
} else {
rescaleable = block.depth == 0 && size.height > 256;
rescaleable = block.depth == 0;
downscaleable = size.height > 512;
type = ImageType::e2D;
resources.layers = regs.zeta_depth;
@ -199,7 +202,8 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept {
.height = config.height,
.depth = 1,
};
rescaleable = block.depth == 0 && size.height > 256;
rescaleable = block.depth == 0;
rescaleable &= size.height > 256;
downscaleable = size.height > 512;
}
}

View File

@ -53,8 +53,8 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
const auto device_memory = runtime.GetDeviceLocalMemory();
const u64 possible_expected_memory = (device_memory * 4) / 10;
const u64 possible_critical_memory = (device_memory * 7) / 10;
expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY);
critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY);
expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY - 256_MiB);
critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY - 512_MiB);
minimum_memory = 0;
} else {
// On OpenGL we can be more conservatives as the driver takes care.
@ -355,7 +355,6 @@ void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
if (view.blacklist && view.id != NULL_IMAGE_VIEW_ID) {
const ImageViewBase& image_view{slot_image_views[view.id]};
auto& image = slot_images[image_view.image_id];
image.flags |= ImageFlagBits::Blacklisted;
has_blacklisted |= ScaleDown(image);
image.scale_rating = 0;
}
@ -985,7 +984,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
bool can_rescale = info.rescaleable;
bool any_rescaled = false;
bool any_blacklisted = false;
for (const ImageId sibling_id : all_siblings) {
if (!can_rescale) {
break;
@ -993,7 +991,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
Image& sibling = slot_images[sibling_id];
can_rescale &= ImageCanRescale(sibling);
any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled);
any_blacklisted |= True(sibling.flags & ImageFlagBits::Blacklisted);
}
can_rescale &= any_rescaled;
@ -1007,9 +1004,6 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
for (const ImageId sibling_id : all_siblings) {
Image& sibling = slot_images[sibling_id];
ScaleDown(sibling);
if (any_blacklisted) {
sibling.flags |= ImageFlagBits::Blacklisted;
}
}
}
@ -1644,7 +1638,6 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
boost::container::small_vector<const AliasedImage*, 1> aliased_images;
Image& image = slot_images[image_id];
bool any_rescaled = True(image.flags & ImageFlagBits::Rescaled);
bool any_blacklisted = True(image.flags & ImageFlagBits::Blacklisted);
u64 most_recent_tick = image.modification_tick;
for (const AliasedImage& aliased : image.aliased_images) {
ImageBase& aliased_image = slot_images[aliased.id];
@ -1652,7 +1645,6 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
most_recent_tick = std::max(most_recent_tick, aliased_image.modification_tick);
aliased_images.push_back(&aliased);
any_rescaled |= True(aliased_image.flags & ImageFlagBits::Rescaled);
any_blacklisted |= True(aliased_image.flags & ImageFlagBits::Blacklisted);
}
}
if (aliased_images.empty()) {
@ -1664,9 +1656,6 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
ScaleUp(image);
} else {
ScaleDown(image);
if (any_blacklisted) {
image.flags |= ImageFlagBits::Blacklisted;
}
}
}
image.modification_tick = most_recent_tick;
@ -1684,9 +1673,6 @@ void TextureCache<P>::SynchronizeAliases(ImageId image_id) {
Image& aliased_image = slot_images[aliased->id];
if (!can_rescale) {
ScaleDown(aliased_image);
if (any_blacklisted) {
aliased_image.flags |= ImageFlagBits::Blacklisted;
}
CopyImage(image_id, aliased->id, aliased->copies);
continue;
}

View File

@ -6,7 +6,6 @@
#include <array>
#include "common/cityhash.h"
#include "common/settings.h"
#include "video_core/textures/texture.h"
using Tegra::Texture::TICEntry;
@ -51,22 +50,6 @@ constexpr std::array<float, 256> SRGB_CONVERSION_LUT = {
0.917104f, 0.929242f, 0.941493f, 0.953859f, 0.966338f, 1.000000f, 1.000000f, 1.000000f,
};
unsigned SettingsMinimumAnisotropy() noexcept {
switch (static_cast<Anisotropy>(Settings::values.max_anisotropy.GetValue())) {
default:
case Anisotropy::Default:
return 1U;
case Anisotropy::Filter2x:
return 2U;
case Anisotropy::Filter4x:
return 4U;
case Anisotropy::Filter8x:
return 8U;
case Anisotropy::Filter16x:
return 16U;
}
}
} // Anonymous namespace
std::array<float, 4> TSCEntry::BorderColor() const noexcept {
@ -78,7 +61,7 @@ std::array<float, 4> TSCEntry::BorderColor() const noexcept {
}
float TSCEntry::MaxAnisotropy() const noexcept {
return static_cast<float>(std::max(1U << max_anisotropy, SettingsMinimumAnisotropy()));
return static_cast<float>(1U << max_anisotropy);
}
} // namespace Tegra::Texture

View File

@ -130,22 +130,22 @@
</item>
<item>
<property name="text">
<string>2x (WILL BREAK THINGS)</string>
<string>2x</string>
</property>
</item>
<item>
<property name="text">
<string>4x (WILL BREAK THINGS)</string>
<string>4x</string>
</property>
</item>
<item>
<property name="text">
<string>8x (WILL BREAK THINGS)</string>
<string>8x</string>
</property>
</item>
<item>
<property name="text">
<string>16x (WILL BREAK THINGS)</string>
<string>16x</string>
</property>
</item>
</widget>