From ef3cffea23b893c7a1adc7fbc9d26c5cef05c516 Mon Sep 17 00:00:00 2001 From: Mefiresu <15063879+Mefiresu@users.noreply.github.com> Date: Fri, 2 May 2025 17:30:55 +0200 Subject: [PATCH 1/2] wiiu/render: Fix missing max texture size According to https://www.x.org/docs/AMD/old/R6xx_R7xx_3D.pdf in section 1.3 (page 6), GX2 supports textures of up to 8192x8192 resolution. --- src/render/wiiu/SDL_render_wiiu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/render/wiiu/SDL_render_wiiu.c b/src/render/wiiu/SDL_render_wiiu.c index c046b0711a226..51ff5b321930d 100644 --- a/src/render/wiiu/SDL_render_wiiu.c +++ b/src/render/wiiu/SDL_render_wiiu.c @@ -360,8 +360,8 @@ SDL_RenderDriver WIIU_RenderDriver = SDL_PIXELFORMAT_ARGB2101010, }, - .max_texture_width = 0, - .max_texture_height = 0, + .max_texture_width = 8192, + .max_texture_height = 8192, }, }; From 76cd0e2885b729346f94e490c3ecd7be3223cf20 Mon Sep 17 00:00:00 2001 From: Mefiresu <15063879+Mefiresu@users.noreply.github.com> Date: Sat, 17 May 2025 08:50:49 +0200 Subject: [PATCH 2/2] wiiu/render: Improve SDL_UpdateTexture performance When possible, use the DMA to copy the whole texture data at once, this improves performance by nearly 15x in the best case. In all other cases, use OSBlockMove instead of the base memcpy implementation for a ~1.5x speedup in the slowest path. --- src/render/wiiu/SDL_rtexture_wiiu.c | 34 +++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/src/render/wiiu/SDL_rtexture_wiiu.c b/src/render/wiiu/SDL_rtexture_wiiu.c index 1bb4755435c47..b592bd129b21e 100644 --- a/src/render/wiiu/SDL_rtexture_wiiu.c +++ b/src/render/wiiu/SDL_rtexture_wiiu.c @@ -33,6 +33,10 @@ #include #include #include +#include +#include +#include +#include #include #include @@ -170,8 +174,10 @@ int WIIU_SDL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture, WIIU_VideoData *videodata = (WIIU_VideoData *) SDL_GetVideoDevice()->driverdata; Uint32 BytesPerPixel = SDL_BYTESPERPIXEL(texture->format); size_t length = rect->w * BytesPerPixel; + size_t total_size = length * rect->h; Uint8 *src = (Uint8 *) pixels, *dst; int row, dst_pitch; + bool src_aligned, dst_aligned; if (!videodata->hasForeground) { return 0; @@ -180,10 +186,30 @@ int WIIU_SDL_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture, /* We write the rules, and we say all textures are streaming */ WIIU_SDL_LockTexture(renderer, texture, rect, (void**)&dst, &dst_pitch); - for (row = 0; row < rect->h; ++row) { - SDL_memcpy(dst, src, length); - src += pitch; - dst += dst_pitch; + if (length == pitch && length == dst_pitch) { + /* DMA requires 8 byte alignment */ + src_aligned = ((uintptr_t)src & 7U) == 0; + dst_aligned = ((uintptr_t)dst & 7U) == 0; + /* DMA works best on textures bigger than 5KiB */ + if ((total_size > 5120) && (src_aligned && dst_aligned)) { + DCFlushRange(src, total_size); + /* Run a single DMA transfer and wait until transfer is done */ + while (!DMAEWaitDone(DMAECopyMem(dst, src, total_size >> 2, DMAE_SWAP_NONE))); + } else { + /* Otherwise, fallback to a single memory copy */ + OSBlockMove(dst, src, total_size, true); + } + } else { + /* Flush source first */ + DCFlushRange(src, total_size); + for (row = 0; row < rect->h; ++row) { + /* Do not flush per line here */ + OSBlockMove(dst, src, length, false); + src += pitch; + dst += dst_pitch; + } + /* Now that we're done, we can flush the entire destination at once */ + DCFlushRange(dst, dst_pitch * rect->h); } WIIU_SDL_UnlockTexture(renderer, texture);