From 2236f4b2c372e3764d14997f25ae2dee0de6f1ad Mon Sep 17 00:00:00 2001 From: emmauss Date: Wed, 18 Jul 2018 22:05:17 +0300 Subject: [PATCH 01/15] Implement IFileSystem:CleanDirectoryRecursively (#283) * implement ifilesys:cleandirectoryrecursively * clean up Ifilesystem --- .../OsHle/Services/FspSrv/IFileSystem.cs | 51 ++++++++++++------- 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/Ryujinx.HLE/OsHle/Services/FspSrv/IFileSystem.cs b/Ryujinx.HLE/OsHle/Services/FspSrv/IFileSystem.cs index 441b7e8ad..61c6d1150 100644 --- a/Ryujinx.HLE/OsHle/Services/FspSrv/IFileSystem.cs +++ b/Ryujinx.HLE/OsHle/Services/FspSrv/IFileSystem.cs @@ -35,7 +35,7 @@ namespace Ryujinx.HLE.OsHle.Services.FspSrv { 10, Commit }, { 11, GetFreeSpaceSize }, { 12, GetTotalSpaceSize }, - //{ 13, CleanDirectoryRecursively }, + { 13, CleanDirectoryRecursively }, //{ 14, GetFileTimeStampRaw } }; @@ -46,8 +46,6 @@ namespace Ryujinx.HLE.OsHle.Services.FspSrv public long CreateFile(ServiceCtx Context) { - long Position = Context.Request.PtrBuff[0].Position; - string Name = ReadUtf8String(Context); long Mode = Context.RequestData.ReadInt64(); @@ -80,8 +78,6 @@ namespace Ryujinx.HLE.OsHle.Services.FspSrv public long DeleteFile(ServiceCtx Context) { - long Position = Context.Request.PtrBuff[0].Position; - string Name = ReadUtf8String(Context); string FileName = Context.Ns.VFs.GetFullPath(Path, Name); @@ -103,8 +99,6 @@ namespace Ryujinx.HLE.OsHle.Services.FspSrv public long CreateDirectory(ServiceCtx Context) { - long Position = Context.Request.PtrBuff[0].Position; - string Name = ReadUtf8String(Context); string DirName = Context.Ns.VFs.GetFullPath(Path, Name); @@ -141,8 +135,6 @@ namespace Ryujinx.HLE.OsHle.Services.FspSrv private long DeleteDirectory(ServiceCtx Context, bool Recursive) { - long Position = Context.Request.PtrBuff[0].Position; - string Name = ReadUtf8String(Context); string DirName = Context.Ns.VFs.GetFullPath(Path, Name); @@ -220,8 +212,6 @@ namespace Ryujinx.HLE.OsHle.Services.FspSrv public long GetEntryType(ServiceCtx Context) { - long Position = Context.Request.PtrBuff[0].Position; - string Name = ReadUtf8String(Context); string FileName = Context.Ns.VFs.GetFullPath(Path, Name); @@ -246,8 +236,6 @@ namespace Ryujinx.HLE.OsHle.Services.FspSrv public long OpenFile(ServiceCtx Context) { - long Position = Context.Request.PtrBuff[0].Position; - int FilterFlags = Context.RequestData.ReadInt32(); string Name = ReadUtf8String(Context); @@ -282,8 +270,6 @@ namespace Ryujinx.HLE.OsHle.Services.FspSrv public long OpenDirectory(ServiceCtx Context) { - long Position = Context.Request.PtrBuff[0].Position; - int FilterFlags = Context.RequestData.ReadInt32(); string Name = ReadUtf8String(Context); @@ -321,8 +307,6 @@ namespace Ryujinx.HLE.OsHle.Services.FspSrv public long GetFreeSpaceSize(ServiceCtx Context) { - long Position = Context.Request.PtrBuff[0].Position; - string Name = ReadUtf8String(Context); Context.ResponseData.Write(Context.Ns.VFs.GetDrive().AvailableFreeSpace); @@ -332,8 +316,6 @@ namespace Ryujinx.HLE.OsHle.Services.FspSrv public long GetTotalSpaceSize(ServiceCtx Context) { - long Position = Context.Request.PtrBuff[0].Position; - string Name = ReadUtf8String(Context); Context.ResponseData.Write(Context.Ns.VFs.GetDrive().TotalSize); @@ -341,6 +323,37 @@ namespace Ryujinx.HLE.OsHle.Services.FspSrv return 0; } + public long CleanDirectoryRecursively(ServiceCtx Context) + { + string Name = ReadUtf8String(Context); + + string DirName = Context.Ns.VFs.GetFullPath(Path, Name); + + if (!Directory.Exists(DirName)) + { + return MakeError(ErrorModule.Fs, FsErr.PathDoesNotExist); + } + + if (IsPathAlreadyInUse(DirName)) + { + return MakeError(ErrorModule.Fs, FsErr.PathAlreadyInUse); + } + + foreach (string Entry in Directory.EnumerateFileSystemEntries(DirName)) + { + if (Directory.Exists(Entry)) + { + Directory.Delete(Entry, true); + } + else if (File.Exists(Entry)) + { + File.Delete(Entry); + } + } + + return 0; + } + private bool IsPathAlreadyInUse(string Path) { lock (OpenPaths) From 0a13900bc93066dcb453f86d9154d52020255d32 Mon Sep 17 00:00:00 2001 From: greggameplayer <33609333+greggameplayer@users.noreply.github.com> Date: Thu, 19 Jul 2018 01:19:37 +0200 Subject: [PATCH 02/15] Implement Z24S8 TextureFormat (#247) * add Z24S8 TextureFormat * return correct PixelFormat & PixelType * return correct texture size * return correct Bytes Per Pixel * Correct PixelType --- Ryujinx.Graphics/Gal/GalTextureFormat.cs | 1 + Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs | 1 + Ryujinx.HLE/Gpu/Texture/TextureHelper.cs | 1 + Ryujinx.HLE/Gpu/Texture/TextureReader.cs | 3 ++- 4 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Ryujinx.Graphics/Gal/GalTextureFormat.cs b/Ryujinx.Graphics/Gal/GalTextureFormat.cs index 30f5c2297..f1e025f2a 100644 --- a/Ryujinx.Graphics/Gal/GalTextureFormat.cs +++ b/Ryujinx.Graphics/Gal/GalTextureFormat.cs @@ -19,6 +19,7 @@ namespace Ryujinx.Graphics.Gal BC3 = 0x26, BC4 = 0x27, BC5 = 0x28, + Z24S8 = 0x29, ZF32 = 0x2f, Astc2D4x4 = 0x40, Astc2D5x5 = 0x41, diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs index 5d20c931e..2b0b7403a 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs @@ -139,6 +139,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL case GalTextureFormat.R16: return (PixelFormat.Red, PixelType.HalfFloat); case GalTextureFormat.R8: return (PixelFormat.Red, PixelType.UnsignedByte); case GalTextureFormat.ZF32: return (PixelFormat.DepthComponent, PixelType.Float); + case GalTextureFormat.Z24S8: return (PixelFormat.DepthStencil, PixelType.UnsignedInt248); } throw new NotImplementedException(Format.ToString()); diff --git a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs index dbbc87d40..e934e363d 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs @@ -37,6 +37,7 @@ namespace Ryujinx.HLE.Gpu.Texture case GalTextureFormat.A8B8G8R8: case GalTextureFormat.R32: case GalTextureFormat.ZF32: + case GalTextureFormat.Z24S8: return Texture.Width * Texture.Height * 4; case GalTextureFormat.A1B5G5R5: diff --git a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs index 1d8c8056a..3a5037bbb 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs @@ -14,6 +14,7 @@ namespace Ryujinx.HLE.Gpu.Texture case GalTextureFormat.R16G16B16A16: return Read8Bpp (Memory, Texture); case GalTextureFormat.A8B8G8R8: return Read4Bpp (Memory, Texture); case GalTextureFormat.R32: return Read4Bpp (Memory, Texture); + case GalTextureFormat.Z24S8: return Read4Bpp (Memory, Texture); case GalTextureFormat.A1B5G5R5: return Read5551 (Memory, Texture); case GalTextureFormat.B5G6R5: return Read565 (Memory, Texture); case GalTextureFormat.G8R8: return Read2Bpp (Memory, Texture); @@ -42,7 +43,7 @@ namespace Ryujinx.HLE.Gpu.Texture case GalTextureFormat.Astc2D8x5: return Read16BptCompressedTexture(Memory, Texture, 8, 5); case GalTextureFormat.Astc2D10x5: return Read16BptCompressedTexture(Memory, Texture, 10, 5); case GalTextureFormat.Astc2D10x6: return Read16BptCompressedTexture(Memory, Texture, 10, 6); - } + } throw new NotImplementedException(Texture.Format.ToString()); } From 120fe6b74a0d2903471bfaeb25ef8265712fc576 Mon Sep 17 00:00:00 2001 From: greggameplayer <33609333+greggameplayer@users.noreply.github.com> Date: Thu, 19 Jul 2018 01:26:28 +0200 Subject: [PATCH 03/15] Implement BF10GF11RF11 TextureFormat (#246) * add BF10GF11RF11 TextureFormat * return correct PixelFormat & PixelType * return correct texture size * return correct Bytes Per Pixel * correct PixelType --- Ryujinx.Graphics/Gal/GalTextureFormat.cs | 1 + Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs | 1 + Ryujinx.HLE/Gpu/Texture/TextureHelper.cs | 1 + Ryujinx.HLE/Gpu/Texture/TextureReader.cs | 1 + 4 files changed, 4 insertions(+) diff --git a/Ryujinx.Graphics/Gal/GalTextureFormat.cs b/Ryujinx.Graphics/Gal/GalTextureFormat.cs index f1e025f2a..c47842832 100644 --- a/Ryujinx.Graphics/Gal/GalTextureFormat.cs +++ b/Ryujinx.Graphics/Gal/GalTextureFormat.cs @@ -14,6 +14,7 @@ namespace Ryujinx.Graphics.Gal G8R8 = 0x18, R16 = 0x1b, R8 = 0x1d, + BF10GF11RF11 = 0x21, BC1 = 0x24, BC2 = 0x25, BC3 = 0x26, diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs index 2b0b7403a..9004e9bae 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLEnumConverter.cs @@ -139,6 +139,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL case GalTextureFormat.R16: return (PixelFormat.Red, PixelType.HalfFloat); case GalTextureFormat.R8: return (PixelFormat.Red, PixelType.UnsignedByte); case GalTextureFormat.ZF32: return (PixelFormat.DepthComponent, PixelType.Float); + case GalTextureFormat.BF10GF11RF11: return (PixelFormat.Rgb, PixelType.UnsignedInt10F11F11FRev); case GalTextureFormat.Z24S8: return (PixelFormat.DepthStencil, PixelType.UnsignedInt248); } diff --git a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs index e934e363d..de26c397d 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs @@ -37,6 +37,7 @@ namespace Ryujinx.HLE.Gpu.Texture case GalTextureFormat.A8B8G8R8: case GalTextureFormat.R32: case GalTextureFormat.ZF32: + case GalTextureFormat.BF10GF11RF11: case GalTextureFormat.Z24S8: return Texture.Width * Texture.Height * 4; diff --git a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs index 3a5037bbb..26129877a 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs @@ -14,6 +14,7 @@ namespace Ryujinx.HLE.Gpu.Texture case GalTextureFormat.R16G16B16A16: return Read8Bpp (Memory, Texture); case GalTextureFormat.A8B8G8R8: return Read4Bpp (Memory, Texture); case GalTextureFormat.R32: return Read4Bpp (Memory, Texture); + case GalTextureFormat.BF10GF11RF11: return Read4Bpp (Memory, Texture); case GalTextureFormat.Z24S8: return Read4Bpp (Memory, Texture); case GalTextureFormat.A1B5G5R5: return Read5551 (Memory, Texture); case GalTextureFormat.B5G6R5: return Read565 (Memory, Texture); From fa5545aab80c056fa7e1f8d516a5add79eb30d8b Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Thu, 19 Jul 2018 02:06:28 +0200 Subject: [PATCH 04/15] Implement Ssubw_V and Usubw_V instructions. (#287) * Update AOpCodeTable.cs * Update AInstEmitSimdHelper.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdMove.cs * Update AInstEmitSimdCmp.cs * Update Instructions.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs --- ChocolArm64/AOpCodeTable.cs | 2 + .../Instruction/AInstEmitSimdArithmetic.cs | 10 + ChocolArm64/Instruction/AInstEmitSimdCmp.cs | 6 +- .../Instruction/AInstEmitSimdHelper.cs | 77 ++++-- ChocolArm64/Instruction/AInstEmitSimdMove.cs | 15 +- Ryujinx.Tests/Cpu/CpuTestSimd.cs | 54 ++++ Ryujinx.Tests/Cpu/CpuTestSimdReg.cs | 248 ++++++++++++++++++ Ryujinx.Tests/Cpu/Tester/Instructions.cs | 243 ++++++++++++++++- 8 files changed, 620 insertions(+), 35 deletions(-) diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index 0e979aa44..c69de38f5 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -387,6 +387,7 @@ namespace ChocolArm64 SetA64("0100111101xxxxxx000001xxxxxxxxxx", AInstEmit.Sshr_V, typeof(AOpCodeSimdShImm)); SetA64("0x00111100>>>xxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); SetA64("0100111101xxxxxx000101xxxxxxxxxx", AInstEmit.Ssra_V, typeof(AOpCodeSimdShImm)); + SetA64("0x001110<<1xxxxx001100xxxxxxxxxx", AInstEmit.Ssubw_V, typeof(AOpCodeSimdReg)); SetA64("0x00110000000000xxxxxxxxxxxxxxxx", AInstEmit.St__Vms, typeof(AOpCodeSimdMemMs)); SetA64("0x001100100xxxxxxxxxxxxxxxxxxxxx", AInstEmit.St__Vms, typeof(AOpCodeSimdMemMs)); SetA64("0x00110100x00000xxxxxxxxxxxxxxxx", AInstEmit.St__Vss, typeof(AOpCodeSimdMemSs)); @@ -430,6 +431,7 @@ namespace ChocolArm64 SetA64("0110111101xxxxxx000001xxxxxxxxxx", AInstEmit.Ushr_V, typeof(AOpCodeSimdShImm)); SetA64("0x10111100>>>xxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); SetA64("0110111101xxxxxx000101xxxxxxxxxx", AInstEmit.Usra_V, typeof(AOpCodeSimdShImm)); + SetA64("0x101110<<1xxxxx001100xxxxxxxxxx", AInstEmit.Usubw_V, typeof(AOpCodeSimdReg)); SetA64("0>001110<<0xxxxx000110xxxxxxxxxx", AInstEmit.Uzp1_V, typeof(AOpCodeSimdReg)); SetA64("0>001110<<0xxxxx010110xxxxxxxxxx", AInstEmit.Uzp2_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<100001001010xxxxxxxxxx", AInstEmit.Xtn_V, typeof(AOpCodeSimd)); diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index 36bb1cbf1..8e7418611 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -1072,6 +1072,11 @@ namespace ChocolArm64.Instruction EmitVectorSaturatingNarrowOpSxZx(Context, () => { }); } + public static void Ssubw_V(AILEmitterCtx Context) + { + EmitVectorWidenRmBinaryOpSx(Context, () => Context.Emit(OpCodes.Sub)); + } + public static void Sub_S(AILEmitterCtx Context) { EmitScalarBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub)); @@ -1225,5 +1230,10 @@ namespace ChocolArm64.Instruction { EmitVectorSaturatingNarrowOpZxZx(Context, () => { }); } + + public static void Usubw_V(AILEmitterCtx Context) + { + EmitVectorWidenRmBinaryOpZx(Context, () => Context.Emit(OpCodes.Sub)); + } } } diff --git a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs index 773d98944..c2d47747e 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdCmp.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdCmp.cs @@ -364,7 +364,7 @@ namespace ChocolArm64.Instruction AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; int Bytes = Op.GetBitsCount() >> 3; - int Elems = (!Scalar ? Bytes >> Op.Size : 1); + int Elems = !Scalar ? Bytes >> Op.Size : 1; ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size)); @@ -408,7 +408,7 @@ namespace ChocolArm64.Instruction AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; int Bytes = Op.GetBitsCount() >> 3; - int Elems = (!Scalar ? Bytes >> Op.Size : 1); + int Elems = !Scalar ? Bytes >> Op.Size : 1; ulong SzMask = ulong.MaxValue >> (64 - (8 << Op.Size)); @@ -522,4 +522,4 @@ namespace ChocolArm64.Instruction Context.MarkLabel(LblEnd); } } -} \ No newline at end of file +} diff --git a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs index 7716e2987..0bd1a6292 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdHelper.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdHelper.cs @@ -419,20 +419,25 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> SizeF + 2; - for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++) + bool Rd = (Opers & OperFlags.Rd) != 0; + bool Rn = (Opers & OperFlags.Rn) != 0; + bool Rm = (Opers & OperFlags.Rm) != 0; + + for (int Index = 0; Index < Elems; Index++) { - if (Opers.HasFlag(OperFlags.Rd)) + if (Rd) { EmitVectorExtractF(Context, Op.Rd, Index, SizeF); } - if (Opers.HasFlag(OperFlags.Rn)) + if (Rn) { EmitVectorExtractF(Context, Op.Rn, Index, SizeF); } - if (Opers.HasFlag(OperFlags.Rm)) + if (Rm) { EmitVectorExtractF(Context, ((AOpCodeSimdReg)Op).Rm, Index, SizeF); } @@ -469,8 +474,9 @@ namespace ChocolArm64.Instruction int SizeF = Op.Size & 1; int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> SizeF + 2; - for (int Index = 0; Index < (Bytes >> SizeF + 2); Index++) + for (int Index = 0; Index < Elems; Index++) { if (Ternary) { @@ -531,19 +537,23 @@ namespace ChocolArm64.Instruction int Bytes = Op.GetBitsCount() >> 3; int Elems = Bytes >> Op.Size; + bool Rd = (Opers & OperFlags.Rd) != 0; + bool Rn = (Opers & OperFlags.Rn) != 0; + bool Rm = (Opers & OperFlags.Rm) != 0; + for (int Index = 0; Index < Elems; Index++) { - if (Opers.HasFlag(OperFlags.Rd)) + if (Rd) { EmitVectorExtract(Context, Op.Rd, Index, Op.Size, Signed); } - if (Opers.HasFlag(OperFlags.Rn)) + if (Rn) { EmitVectorExtract(Context, Op.Rn, Index, Op.Size, Signed); } - if (Opers.HasFlag(OperFlags.Rm)) + if (Rm) { EmitVectorExtract(Context, ((AOpCodeSimdReg)Op).Rm, Index, Op.Size, Signed); } @@ -662,9 +672,6 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - Context.EmitLdvec(Op.Rd); - Context.EmitStvectmp(); - int Elems = 8 >> Op.Size; int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; @@ -707,9 +714,6 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - Context.EmitLdvec(Op.Rd); - Context.EmitStvectmp(); - int Elems = 8 >> Op.Size; int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; @@ -747,21 +751,25 @@ namespace ChocolArm64.Instruction { AOpCodeSimdReg Op = (AOpCodeSimdReg)Context.CurrOp; - int Bytes = Op.GetBitsCount() >> 3; + int Words = Op.GetBitsCount() >> 4; + int Pairs = Words >> Op.Size; - int Elems = Bytes >> Op.Size; - int Half = Elems >> 1; - - for (int Index = 0; Index < Elems; Index++) + for (int Index = 0; Index < Pairs; Index++) { - int Elem = (Index & (Half - 1)) << 1; + int Idx = Index << 1; - EmitVectorExtract(Context, Index < Half ? Op.Rn : Op.Rm, Elem + 0, Op.Size, Signed); - EmitVectorExtract(Context, Index < Half ? Op.Rn : Op.Rm, Elem + 1, Op.Size, Signed); + EmitVectorExtract(Context, Op.Rn, Idx, Op.Size, Signed); + EmitVectorExtract(Context, Op.Rn, Idx + 1, Op.Size, Signed); Emit(); - EmitVectorInsertTmp(Context, Index, Op.Size); + EmitVectorExtract(Context, Op.Rm, Idx, Op.Size, Signed); + EmitVectorExtract(Context, Op.Rm, Idx + 1, Op.Size, Signed); + + Emit(); + + EmitVectorInsertTmp(Context, Pairs + Index, Op.Size); + EmitVectorInsertTmp(Context, Index, Op.Size); } Context.EmitLdvectmp(); @@ -818,7 +826,7 @@ namespace ChocolArm64.Instruction int Part = !Scalar && (Op.RegisterSize == ARegisterSize.SIMD128) ? Elems : 0; - long TMaxValue = SignedDst ? (1 << (ESize - 1)) - 1 : (1L << ESize) - 1L; + long TMaxValue = SignedDst ? (1 << (ESize - 1)) - 1 : (long)(~0UL >> (64 - ESize)); long TMinValue = SignedDst ? -((1 << (ESize - 1))) : 0; Context.EmitLdc_I8(0L); @@ -871,7 +879,7 @@ namespace ChocolArm64.Instruction if (Scalar) { - EmitVectorZeroLower(Context, Op.Rd); + EmitVectorZeroLowerTmp(Context); } EmitVectorInsertTmp(Context, Part + Index, Op.Size); @@ -963,6 +971,11 @@ namespace ChocolArm64.Instruction EmitVectorInsert(Context, Rd, 0, 3, 0); } + public static void EmitVectorZeroLowerTmp(AILEmitterCtx Context) + { + EmitVectorInsertTmp(Context, 0, 3, 0); + } + public static void EmitVectorZeroUpper(AILEmitterCtx Context, int Rd) { EmitVectorInsert(Context, Rd, 1, 3, 0); @@ -1008,6 +1021,20 @@ namespace ChocolArm64.Instruction Context.EmitStvec(Reg); } + public static void EmitVectorInsertTmp(AILEmitterCtx Context, int Index, int Size, long Value) + { + ThrowIfInvalid(Index, Size); + + Context.EmitLdc_I8(Value); + Context.EmitLdvectmp(); + Context.EmitLdc_I4(Index); + Context.EmitLdc_I4(Size); + + AVectorHelper.EmitCall(Context, nameof(AVectorHelper.VectorInsertInt)); + + Context.EmitStvectmp(); + } + public static void EmitVectorInsertF(AILEmitterCtx Context, int Reg, int Index, int Size) { ThrowIfInvalidF(Index, Size); diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs index 592cab733..3bf1e4635 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs @@ -295,13 +295,22 @@ namespace ChocolArm64.Instruction int Part = Op.RegisterSize == ARegisterSize.SIMD128 ? Elems : 0; + if (Part != 0) + { + Context.EmitLdvec(Op.Rd); + Context.EmitStvectmp(); + } + for (int Index = 0; Index < Elems; Index++) { EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size + 1); - EmitVectorInsert(Context, Op.Rd, Part + Index, Op.Size); + EmitVectorInsertTmp(Context, Part + Index, Op.Size); } + Context.EmitLdvectmp(); + Context.EmitStvec(Op.Rd); + if (Part == 0) { EmitVectorZeroUpper(Context, Op.Rd); @@ -342,7 +351,7 @@ namespace ChocolArm64.Instruction EmitVectorExtractZx(Context, Op.Rm, Idx + Part, Op.Size); EmitVectorInsertTmp(Context, Idx + 1, Op.Size); - EmitVectorInsertTmp(Context, Idx , Op.Size); + EmitVectorInsertTmp(Context, Idx, Op.Size); } Context.EmitLdvectmp(); @@ -398,7 +407,7 @@ namespace ChocolArm64.Instruction EmitVectorExtractZx(Context, Op.Rm, Base + Index, Op.Size); EmitVectorInsertTmp(Context, Idx + 1, Op.Size); - EmitVectorInsertTmp(Context, Idx , Op.Size); + EmitVectorInsertTmp(Context, Idx, Op.Size); } Context.EmitLdvectmp(); diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index b84d29575..82591edae 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -1377,6 +1377,60 @@ namespace Ryujinx.Tests.Cpu }); Assert.That(((ThreadState.Fpsr >> 27) & 1) != 0, Is.EqualTo(Shared.FPSR[27])); } + + [Test, Description("XTN{2} ., .")] + public void Xtn_V_8H8B_4S4H_2D2S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H8B, 4S4H, 2D2S> + { + uint Opcode = 0x0E212800; // XTN V0.8B, V0.8H + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + SimdFp.Xtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Description("XTN{2} ., .")] + public void Xtn_V_8H16B_4S8H_2D4S([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8H16B, 4S8H, 2D4S> + { + uint Opcode = 0x4E212800; // XTN2 V0.16B, V0.8H + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + SimdFp.Xtn_V(Op[30], Op[23, 22], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } #endif } } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs index 51db857c3..c67348d1d 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs @@ -1659,6 +1659,130 @@ namespace Ryujinx.Tests.Cpu }); } + [Test, Pairwise, Description("SADDW{2} ., ., .")] + public void Saddw_V_8B8H8H_4H4S4S_2S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H8H, 4H4S4S, 2S2D2D> + { + uint Opcode = 0x0E201000; // SADDW V0.8H, V0.8H, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); + SimdFp.Saddw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("SADDW{2} ., ., .")] + public void Saddw_V_16B8H8H_8H4S4S_4S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H8H, 8H4S4S, 4S2D2D> + { + uint Opcode = 0x4E201000; // SADDW2 V0.8H, V0.8H, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE1(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Saddw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("SSUBW{2} ., ., .")] + public void Ssubw_V_8B8H8H_4H4S4S_2S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H8H, 4H4S4S, 2S2D2D> + { + uint Opcode = 0x0E203000; // SSUBW V0.8H, V0.8H, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); + SimdFp.Ssubw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("SSUBW{2} ., ., .")] + public void Ssubw_V_16B8H8H_8H4S4S_4S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H8H, 8H4S4S, 4S2D2D> + { + uint Opcode = 0x4E203000; // SSUBW2 V0.8H, V0.8H, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE1(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Ssubw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + [Test, Pairwise, Description("SUB , , ")] public void Sub_S_D([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, @@ -2184,6 +2308,130 @@ namespace Ryujinx.Tests.Cpu }); } + [Test, Pairwise, Description("UADDW{2} ., ., .")] + public void Uaddw_V_8B8H8H_4H4S4S_2S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H8H, 4H4S4S, 2S2D2D> + { + uint Opcode = 0x2E201000; // UADDW V0.8H, V0.8H, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); + SimdFp.Uaddw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("UADDW{2} ., ., .")] + public void Uaddw_V_16B8H8H_8H4S4S_4S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H8H, 8H4S4S, 4S2D2D> + { + uint Opcode = 0x6E201000; // UADDW2 V0.8H, V0.8H, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE1(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Uaddw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("USUBW{2} ., ., .")] + public void Usubw_V_8B8H8H_4H4S4S_2S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <8B8H8H, 4H4S4S, 2S2D2D> + { + uint Opcode = 0x2E203000; // USUBW V0.8H, V0.8H, V0.8B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 0, new Bits(B)); + SimdFp.Usubw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + + [Test, Pairwise, Description("USUBW{2} ., ., .")] + public void Usubw_V_16B8H8H_8H4S4S_4S2D2D([Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_8B4H2S1D_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H2S1D_")] [Random(RndCnt)] ulong A, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong B, + [Values(0b00u, 0b01u, 0b10u)] uint size) // <16B8H8H, 8H4S4S, 4S2D2D> + { + uint Opcode = 0x6E203000; // USUBW2 V0.8H, V0.8H, V0.16B + Opcode |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= ((size & 3) << 22); + Bits Op = new Bits(Opcode); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE1(B); + AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + + AArch64.Vpart(0, 0, new Bits(Z)); AArch64.Vpart(0, 1, new Bits(Z)); + AArch64.Vpart(1, 0, new Bits(A)); AArch64.Vpart(1, 1, new Bits(A)); + AArch64.Vpart(2, 1, new Bits(B)); + SimdFp.Usubw_V(Op[30], Op[23, 22], Op[20, 16], Op[9, 5], Op[4, 0]); + + Assert.Multiple(() => + { + Assert.That(GetVectorE0(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 0).ToUInt64())); + Assert.That(GetVectorE1(ThreadState.V0), Is.EqualTo(AArch64.Vpart(64, 0, 1).ToUInt64())); + }); + } + [Test, Pairwise, Description("UZP1 ., ., .")] public void Uzp1_V_8B_4H_2S([Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, diff --git a/Ryujinx.Tests/Cpu/Tester/Instructions.cs b/Ryujinx.Tests/Cpu/Tester/Instructions.cs index 68f83423b..2f52dcbf5 100644 --- a/Ryujinx.Tests/Cpu/Tester/Instructions.cs +++ b/Ryujinx.Tests/Cpu/Tester/Instructions.cs @@ -3315,6 +3315,37 @@ namespace Ryujinx.Tests.Cpu.Tester Vpart(d, part, result); } + + // xtn_advsimd.html + public static void Xtn_V(bool Q, Bits size, Bits Rn, Bits Rd) + { + /* Decode Vector */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = 64; + int part = (int)UInt(Q); + int elements = datasize / esize; + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(datasize); + Bits operand = V(2 * datasize, n); + Bits element; + + for (int e = 0; e <= elements - 1; e++) + { + element = Elem(operand, e, 2 * esize); + + Elem(result, e, esize, element[esize - 1, 0]); + } + + Vpart(d, part, result); + } #endregion #region "SimdReg" @@ -4395,8 +4426,8 @@ namespace Ryujinx.Tests.Cpu.Tester int part = (int)UInt(Q); int elements = datasize / esize; - bool unsigned = (U == true); bool accumulate = (op == false); + bool unsigned = (U == true); /* Operation */ /* CheckFPAdvSIMDEnabled64(); */ @@ -4484,8 +4515,8 @@ namespace Ryujinx.Tests.Cpu.Tester int part = (int)UInt(Q); int elements = datasize / esize; - bool unsigned = (U == true); bool accumulate = (op == false); + bool unsigned = (U == true); /* Operation */ /* CheckFPAdvSIMDEnabled64(); */ @@ -4511,6 +4542,108 @@ namespace Ryujinx.Tests.Cpu.Tester V(d, result); } + // saddw_advsimd.html + public static void Saddw_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool U = false; + const bool o1 = false; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = 64; + int part = (int)UInt(Q); + int elements = datasize / esize; + + bool sub_op = (o1 == true); + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(2 * datasize); + Bits operand1 = V(2 * datasize, n); + Bits operand2 = Vpart(datasize, m, part); + BigInteger element1; + BigInteger element2; + BigInteger sum; + + for (int e = 0; e <= elements - 1; e++) + { + element1 = Int(Elem(operand1, e, 2 * esize), unsigned); + element2 = Int(Elem(operand2, e, esize), unsigned); + + if (sub_op) + { + sum = element1 - element2; + } + else + { + sum = element1 + element2; + } + + Elem(result, e, 2 * esize, sum.SubBigInteger(2 * esize - 1, 0)); + } + + V(d, result); + } + + // ssubw_advsimd.html + public static void Ssubw_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool U = false; + const bool o1 = true; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = 64; + int part = (int)UInt(Q); + int elements = datasize / esize; + + bool sub_op = (o1 == true); + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(2 * datasize); + Bits operand1 = V(2 * datasize, n); + Bits operand2 = Vpart(datasize, m, part); + BigInteger element1; + BigInteger element2; + BigInteger sum; + + for (int e = 0; e <= elements - 1; e++) + { + element1 = Int(Elem(operand1, e, 2 * esize), unsigned); + element2 = Int(Elem(operand2, e, esize), unsigned); + + if (sub_op) + { + sum = element1 - element2; + } + else + { + sum = element1 + element2; + } + + Elem(result, e, 2 * esize, sum.SubBigInteger(2 * esize - 1, 0)); + } + + V(d, result); + } + // sub_advsimd.html#SUB_asisdsame_only public static void Sub_S(Bits size, Bits Rm, Bits Rn, Bits Rd) { @@ -4785,8 +4918,8 @@ namespace Ryujinx.Tests.Cpu.Tester int part = (int)UInt(Q); int elements = datasize / esize; - bool unsigned = (U == true); bool accumulate = (op == false); + bool unsigned = (U == true); /* Operation */ /* CheckFPAdvSIMDEnabled64(); */ @@ -4874,8 +5007,8 @@ namespace Ryujinx.Tests.Cpu.Tester int part = (int)UInt(Q); int elements = datasize / esize; - bool unsigned = (U == true); bool accumulate = (op == false); + bool unsigned = (U == true); /* Operation */ /* CheckFPAdvSIMDEnabled64(); */ @@ -4901,6 +5034,108 @@ namespace Ryujinx.Tests.Cpu.Tester V(d, result); } + // uaddw_advsimd.html + public static void Uaddw_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool U = true; + const bool o1 = false; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = 64; + int part = (int)UInt(Q); + int elements = datasize / esize; + + bool sub_op = (o1 == true); + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(2 * datasize); + Bits operand1 = V(2 * datasize, n); + Bits operand2 = Vpart(datasize, m, part); + BigInteger element1; + BigInteger element2; + BigInteger sum; + + for (int e = 0; e <= elements - 1; e++) + { + element1 = Int(Elem(operand1, e, 2 * esize), unsigned); + element2 = Int(Elem(operand2, e, esize), unsigned); + + if (sub_op) + { + sum = element1 - element2; + } + else + { + sum = element1 + element2; + } + + Elem(result, e, 2 * esize, sum.SubBigInteger(2 * esize - 1, 0)); + } + + V(d, result); + } + + // usubw_advsimd.html + public static void Usubw_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) + { + const bool U = true; + const bool o1 = true; + + /* Decode */ + int d = (int)UInt(Rd); + int n = (int)UInt(Rn); + int m = (int)UInt(Rm); + + /* if size == '11' then ReservedValue(); */ + + int esize = 8 << (int)UInt(size); + int datasize = 64; + int part = (int)UInt(Q); + int elements = datasize / esize; + + bool sub_op = (o1 == true); + bool unsigned = (U == true); + + /* Operation */ + /* CheckFPAdvSIMDEnabled64(); */ + + Bits result = new Bits(2 * datasize); + Bits operand1 = V(2 * datasize, n); + Bits operand2 = Vpart(datasize, m, part); + BigInteger element1; + BigInteger element2; + BigInteger sum; + + for (int e = 0; e <= elements - 1; e++) + { + element1 = Int(Elem(operand1, e, 2 * esize), unsigned); + element2 = Int(Elem(operand2, e, esize), unsigned); + + if (sub_op) + { + sum = element1 - element2; + } + else + { + sum = element1 + element2; + } + + Elem(result, e, 2 * esize, sum.SubBigInteger(2 * esize - 1, 0)); + } + + V(d, result); + } + // uzp1_advsimd.html public static void Uzp1_V(bool Q, Bits size, Bits Rm, Bits Rn, Bits Rd) { From 2795af038df06d2f8e0dbbf0fd271bbac5da59a2 Mon Sep 17 00:00:00 2001 From: simonmkwii-dev <40786398+simonmkwii-dev@users.noreply.github.com> Date: Thu, 19 Jul 2018 11:06:45 +1000 Subject: [PATCH 05/15] Implement GetCurrentIpAddress() and stub GetCurrentNetworkProfile() (#271) * Implement GetCurrentIpAddress() ...and stub GetCurrentNetworkProfile() * Update IGeneralService.cs * Actually implement it properly this time... * * Made some requested changes * Added requested changes. * Added more requested changes. * oof * Local > Address * Cyuubumped * Change PrintInfo > PrintDebug * Revert change --- .../OsHle/Services/Nifm/IGeneralService.cs | 29 +++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/Ryujinx.HLE/OsHle/Services/Nifm/IGeneralService.cs b/Ryujinx.HLE/OsHle/Services/Nifm/IGeneralService.cs index e289a8db8..66b1f1b61 100644 --- a/Ryujinx.HLE/OsHle/Services/Nifm/IGeneralService.cs +++ b/Ryujinx.HLE/OsHle/Services/Nifm/IGeneralService.cs @@ -1,6 +1,11 @@ using Ryujinx.HLE.Logging; using Ryujinx.HLE.OsHle.Ipc; +using System; using System.Collections.Generic; +using System.Linq; +using System.Net; +using System.Net.Sockets; +using System.Net.NetworkInformation; namespace Ryujinx.HLE.OsHle.Services.Nifm { @@ -14,10 +19,13 @@ namespace Ryujinx.HLE.OsHle.Services.Nifm { m_Commands = new Dictionary() { - { 4, CreateRequest } + { 4, CreateRequest }, + { 12, GetCurrentIpAddress } }; } + public const int NoInternetConnection = 0x2586e; + //CreateRequest(i32) public long CreateRequest(ServiceCtx Context) { @@ -29,5 +37,22 @@ namespace Ryujinx.HLE.OsHle.Services.Nifm return 0; } + + public long GetCurrentIpAddress(ServiceCtx Context) + { + if (!NetworkInterface.GetIsNetworkAvailable()) + { + return NoInternetConnection; + } + + IPHostEntry Host = Dns.GetHostEntry(Dns.GetHostName()); + IPAddress Address = Host.AddressList.FirstOrDefault(A => A.AddressFamily == AddressFamily.InterNetwork); + + Context.ResponseData.Write(BitConverter.ToUInt32(Address.GetAddressBytes())); + + Context.Ns.Log.PrintInfo(LogClass.ServiceNifm, "Console's local IP is " + Address.ToString()); + + return 0; + } } -} \ No newline at end of file +} From 6a69001aa26fe6f3688fa98901ca40e641743d55 Mon Sep 17 00:00:00 2001 From: Ac_K Date: Thu, 19 Jul 2018 03:10:51 +0200 Subject: [PATCH 06/15] Update IGeneralService.cs Fixed little mistake on the debug string. --- Ryujinx.HLE/OsHle/Services/Nifm/IGeneralService.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Ryujinx.HLE/OsHle/Services/Nifm/IGeneralService.cs b/Ryujinx.HLE/OsHle/Services/Nifm/IGeneralService.cs index 66b1f1b61..83bb9f370 100644 --- a/Ryujinx.HLE/OsHle/Services/Nifm/IGeneralService.cs +++ b/Ryujinx.HLE/OsHle/Services/Nifm/IGeneralService.cs @@ -50,7 +50,7 @@ namespace Ryujinx.HLE.OsHle.Services.Nifm Context.ResponseData.Write(BitConverter.ToUInt32(Address.GetAddressBytes())); - Context.Ns.Log.PrintInfo(LogClass.ServiceNifm, "Console's local IP is " + Address.ToString()); + Context.Ns.Log.PrintInfo(LogClass.ServiceNifm, $"Console's local IP is {Address.ToString()}"); return 0; } From 8b685b12f0b7a901139999dff17b24b049b9084b Mon Sep 17 00:00:00 2001 From: Ac_K Date: Thu, 19 Jul 2018 06:03:53 +0200 Subject: [PATCH 07/15] Implement SvcWaitForAddress 0x34 (#289) * Implement SvcWaitForAddress 0x34 Currently needed by Sonic Mania Plus * Fix mistake * read-decrement-write locked --- Ryujinx.HLE/OsHle/Handles/KThread.cs | 6 +- Ryujinx.HLE/OsHle/Kernel/AddressArbiter.cs | 112 +++++++++++++++++++++ Ryujinx.HLE/OsHle/Kernel/KernelErr.cs | 2 +- Ryujinx.HLE/OsHle/Kernel/SvcHandler.cs | 3 +- Ryujinx.HLE/OsHle/Kernel/SvcSystem.cs | 2 +- Ryujinx.HLE/OsHle/Kernel/SvcThreadSync.cs | 51 ++++++++++ 6 files changed, 171 insertions(+), 5 deletions(-) create mode 100644 Ryujinx.HLE/OsHle/Kernel/AddressArbiter.cs diff --git a/Ryujinx.HLE/OsHle/Handles/KThread.cs b/Ryujinx.HLE/OsHle/Handles/KThread.cs index 3db46f3d6..2b980d17b 100644 --- a/Ryujinx.HLE/OsHle/Handles/KThread.cs +++ b/Ryujinx.HLE/OsHle/Handles/KThread.cs @@ -9,10 +9,12 @@ namespace Ryujinx.HLE.OsHle.Handles public int CoreMask { get; set; } - public long MutexAddress { get; set; } - public long CondVarAddress { get; set; } + public long MutexAddress { get; set; } + public long CondVarAddress { get; set; } + public long ArbiterWaitAddress { get; set; } public bool CondVarSignaled { get; set; } + public bool ArbiterSignaled { get; set; } private Process Process; diff --git a/Ryujinx.HLE/OsHle/Kernel/AddressArbiter.cs b/Ryujinx.HLE/OsHle/Kernel/AddressArbiter.cs new file mode 100644 index 000000000..ce9ef0cd8 --- /dev/null +++ b/Ryujinx.HLE/OsHle/Kernel/AddressArbiter.cs @@ -0,0 +1,112 @@ +using ChocolArm64.Memory; +using ChocolArm64.State; +using Ryujinx.HLE.OsHle.Handles; + +using static Ryujinx.HLE.OsHle.ErrorCode; + +namespace Ryujinx.HLE.OsHle.Kernel +{ + static class AddressArbiter + { + static ulong WaitForAddress(Process Process, AThreadState ThreadState, long Address, ulong Timeout) + { + KThread CurrentThread = Process.GetThread(ThreadState.Tpidr); + + Process.Scheduler.SetReschedule(CurrentThread.ProcessorId); + + CurrentThread.ArbiterWaitAddress = Address; + CurrentThread.ArbiterSignaled = false; + + Process.Scheduler.EnterWait(CurrentThread, NsTimeConverter.GetTimeMs(Timeout)); + + if (!CurrentThread.ArbiterSignaled) + { + return MakeError(ErrorModule.Kernel, KernelErr.Timeout); + } + + return 0; + } + + public static ulong WaitForAddressIfLessThan(Process Process, + AThreadState ThreadState, + AMemory Memory, + long Address, + int Value, + ulong Timeout, + bool ShouldDecrement) + { + Memory.SetExclusive(ThreadState, Address); + + int CurrentValue = Memory.ReadInt32(Address); + + while (true) + { + if (Memory.TestExclusive(ThreadState, Address)) + { + if (CurrentValue < Value) + { + if (ShouldDecrement) + { + Memory.WriteInt32(Address, CurrentValue - 1); + } + + Memory.ClearExclusiveForStore(ThreadState); + } + else + { + Memory.ClearExclusiveForStore(ThreadState); + + return MakeError(ErrorModule.Kernel, KernelErr.InvalidState); + } + + break; + } + + Memory.SetExclusive(ThreadState, Address); + + CurrentValue = Memory.ReadInt32(Address); + } + + if (Timeout == 0) + { + return MakeError(ErrorModule.Kernel, KernelErr.Timeout); + } + + return WaitForAddress(Process, ThreadState, Address, Timeout); + } + + public static ulong WaitForAddressIfEqual(Process Process, + AThreadState ThreadState, + AMemory Memory, + long Address, + int Value, + ulong Timeout) + { + if (Memory.ReadInt32(Address) != Value) + { + return MakeError(ErrorModule.Kernel, KernelErr.InvalidState); + } + + if (Timeout == 0) + { + return MakeError(ErrorModule.Kernel, KernelErr.Timeout); + } + + return WaitForAddress(Process, ThreadState, Address, Timeout); + } + } + + enum ArbitrationType : int + { + WaitIfLessThan, + DecrementAndWaitIfLessThan, + WaitIfEqual + } + + enum SignalType : int + { + Signal, + IncrementAndSignalIfEqual, + ModifyByWaitingCountAndSignalIfEqual + } +} diff --git a/Ryujinx.HLE/OsHle/Kernel/KernelErr.cs b/Ryujinx.HLE/OsHle/Kernel/KernelErr.cs index ad4fdfb6b..bbae53255 100644 --- a/Ryujinx.HLE/OsHle/Kernel/KernelErr.cs +++ b/Ryujinx.HLE/OsHle/Kernel/KernelErr.cs @@ -12,7 +12,7 @@ namespace Ryujinx.HLE.OsHle.Kernel public const int Timeout = 117; public const int Canceled = 118; public const int CountOutOfRange = 119; - public const int InvalidInfo = 120; + public const int InvalidEnumValue = 120; public const int InvalidThread = 122; public const int InvalidState = 125; } diff --git a/Ryujinx.HLE/OsHle/Kernel/SvcHandler.cs b/Ryujinx.HLE/OsHle/Kernel/SvcHandler.cs index e05073fda..e816c44ec 100644 --- a/Ryujinx.HLE/OsHle/Kernel/SvcHandler.cs +++ b/Ryujinx.HLE/OsHle/Kernel/SvcHandler.cs @@ -73,7 +73,8 @@ namespace Ryujinx.HLE.OsHle.Kernel { 0x2c, SvcMapPhysicalMemory }, { 0x2d, SvcUnmapPhysicalMemory }, { 0x32, SvcSetThreadActivity }, - { 0x33, SvcGetThreadContext3 } + { 0x33, SvcGetThreadContext3 }, + { 0x34, SvcWaitForAddress } }; this.Ns = Ns; diff --git a/Ryujinx.HLE/OsHle/Kernel/SvcSystem.cs b/Ryujinx.HLE/OsHle/Kernel/SvcSystem.cs index a32b2d86f..08305522f 100644 --- a/Ryujinx.HLE/OsHle/Kernel/SvcSystem.cs +++ b/Ryujinx.HLE/OsHle/Kernel/SvcSystem.cs @@ -294,7 +294,7 @@ namespace Ryujinx.HLE.OsHle.Kernel InfoType == 19 || InfoType == 20) { - ThreadState.X0 = MakeError(ErrorModule.Kernel, KernelErr.InvalidInfo); + ThreadState.X0 = MakeError(ErrorModule.Kernel, KernelErr.InvalidEnumValue); return; } diff --git a/Ryujinx.HLE/OsHle/Kernel/SvcThreadSync.cs b/Ryujinx.HLE/OsHle/Kernel/SvcThreadSync.cs index ec9c40e08..9fc426176 100644 --- a/Ryujinx.HLE/OsHle/Kernel/SvcThreadSync.cs +++ b/Ryujinx.HLE/OsHle/Kernel/SvcThreadSync.cs @@ -197,6 +197,57 @@ namespace Ryujinx.HLE.OsHle.Kernel Process.Scheduler.EnterWait(CurrThread); } + private void SvcWaitForAddress(AThreadState ThreadState) + { + long Address = (long)ThreadState.X0; + ArbitrationType Type = (ArbitrationType)ThreadState.X1; + int Value = (int)ThreadState.X2; + ulong Timeout = ThreadState.X3; + + Ns.Log.PrintDebug(LogClass.KernelSvc, + "Address = " + Address.ToString("x16") + ", " + + "ArbitrationType = " + Type .ToString() + ", " + + "Value = " + Value .ToString("x8") + ", " + + "Timeout = " + Timeout.ToString("x16")); + + if (IsPointingInsideKernel(Address)) + { + Ns.Log.PrintWarning(LogClass.KernelSvc, $"Invalid address 0x{Address:x16}!"); + + ThreadState.X0 = MakeError(ErrorModule.Kernel, KernelErr.InvalidAddress); + + return; + } + + if (IsWordAddressUnaligned(Address)) + { + Ns.Log.PrintWarning(LogClass.KernelSvc, $"Unaligned address 0x{Address:x16}!"); + + ThreadState.X0 = MakeError(ErrorModule.Kernel, KernelErr.InvalidAlignment); + + return; + } + + switch (Type) + { + case ArbitrationType.WaitIfLessThan: + ThreadState.X0 = AddressArbiter.WaitForAddressIfLessThan(Process, ThreadState, Memory, Address, Value, Timeout, false); + break; + + case ArbitrationType.DecrementAndWaitIfLessThan: + ThreadState.X0 = AddressArbiter.WaitForAddressIfLessThan(Process, ThreadState, Memory, Address, Value, Timeout, true); + break; + + case ArbitrationType.WaitIfEqual: + ThreadState.X0 = AddressArbiter.WaitForAddressIfEqual(Process, ThreadState, Memory, Address, Value, Timeout); + break; + + default: + ThreadState.X0 = MakeError(ErrorModule.Kernel, KernelErr.InvalidEnumValue); + break; + } + } + private void MutexUnlock(KThread CurrThread, long MutexAddress) { lock (Process.ThreadSyncLock) From 60f2198a1e8e61fe1cfb8da30a6afcd86a672a85 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Thu, 19 Jul 2018 02:30:21 -0300 Subject: [PATCH 08/15] Support deswizzle of sparse tiled textures and some frame buffer fixes (#275) * Attempt to support deswizzle of sparse tiled textures * Use correct frame buffer and viewport sizes, started to clean up the copy engine * Correct texture width alignment * Use Scale/Translate registers to calculate viewport rect * Allow texture copy between frame buffers --- Ryujinx.Graphics/Gal/IGalFrameBuffer.cs | 19 +++ Ryujinx.Graphics/Gal/OpenGL/OGLFrameBuffer.cs | 82 +++++++++-- Ryujinx.HLE/Gpu/Engines/NvGpuEngine2d.cs | 130 ++++++++++++------ Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 26 +++- Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs | 14 +- Ryujinx.HLE/Gpu/Texture/TextureFactory.cs | 5 +- Ryujinx.HLE/Gpu/Texture/TextureHelper.cs | 8 +- Ryujinx.HLE/Gpu/Texture/TextureInfo.cs | 19 ++- Ryujinx.HLE/Gpu/Texture/TextureReader.cs | 18 +-- Ryujinx.HLE/Gpu/Texture/TextureWriter.cs | 28 +--- Ryujinx.HLE/OsHle/Services/Vi/NvFlinger.cs | 4 +- 11 files changed, 237 insertions(+), 116 deletions(-) diff --git a/Ryujinx.Graphics/Gal/IGalFrameBuffer.cs b/Ryujinx.Graphics/Gal/IGalFrameBuffer.cs index eaae0a492..1f62bdb37 100644 --- a/Ryujinx.Graphics/Gal/IGalFrameBuffer.cs +++ b/Ryujinx.Graphics/Gal/IGalFrameBuffer.cs @@ -22,6 +22,25 @@ namespace Ryujinx.Graphics.Gal void Render(); + void Copy( + long SrcKey, + long DstKey, + int SrcX0, + int SrcY0, + int SrcX1, + int SrcY1, + int DstX0, + int DstY0, + int DstX1, + int DstY1); + void GetBufferData(long Key, Action Callback); + + void SetBufferData( + long Key, + int Width, + int Height, + GalTextureFormat Format, + byte[] Buffer); } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLFrameBuffer.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLFrameBuffer.cs index 305fa37d8..cd52762c7 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLFrameBuffer.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLFrameBuffer.cs @@ -78,11 +78,6 @@ namespace Ryujinx.Graphics.Gal.OpenGL public void Create(long Key, int Width, int Height) { - //TODO: We should either use the original frame buffer size, - //or just remove the Width/Height arguments. - Width = Window.Width; - Height = Window.Height; - if (Fbs.TryGetValue(Key, out FrameBuffer Fb)) { if (Fb.Width != Width || @@ -125,8 +120,6 @@ namespace Ryujinx.Graphics.Gal.OpenGL GL.DrawBuffer(DrawBufferMode.ColorAttachment0); - GL.Viewport(0, 0, Width, Height); - Fbs.Add(Key, Fb); } @@ -230,7 +223,16 @@ namespace Ryujinx.Graphics.Gal.OpenGL { Viewport = new Rect(X, Y, Width, Height); - //TODO + SetViewport(Viewport); + } + + private void SetViewport(Rect Viewport) + { + GL.Viewport( + Viewport.X, + Viewport.Y, + Viewport.Width, + Viewport.Height); } public void Render() @@ -300,10 +302,38 @@ namespace Ryujinx.Graphics.Gal.OpenGL GL.Enable(EnableCap.Blend); } - //GL.Viewport(0, 0, 1280, 720); + SetViewport(Viewport); } } + public void Copy( + long SrcKey, + long DstKey, + int SrcX0, + int SrcY0, + int SrcX1, + int SrcY1, + int DstX0, + int DstY0, + int DstX1, + int DstY1) + { + if (Fbs.TryGetValue(SrcKey, out FrameBuffer SrcFb) && + Fbs.TryGetValue(DstKey, out FrameBuffer DstFb)) + { + GL.BindFramebuffer(FramebufferTarget.ReadFramebuffer, SrcFb.Handle); + GL.BindFramebuffer(FramebufferTarget.DrawFramebuffer, DstFb.Handle); + + GL.Clear(ClearBufferMask.ColorBufferBit); + + GL.BlitFramebuffer( + SrcX0, SrcY0, SrcX1, SrcY1, + DstX0, DstY0, DstX1, DstY1, + ClearBufferMask.ColorBufferBit, + BlitFramebufferFilter.Linear); + } +} + public void GetBufferData(long Key, Action Callback) { if (Fbs.TryGetValue(Key, out FrameBuffer Fb)) @@ -329,13 +359,35 @@ namespace Ryujinx.Graphics.Gal.OpenGL } } - private void SetViewport(Rect Viewport) + public void SetBufferData( + long Key, + int Width, + int Height, + GalTextureFormat Format, + byte[] Buffer) { - GL.Viewport( - Viewport.X, - Viewport.Y, - Viewport.Width, - Viewport.Height); + if (Fbs.TryGetValue(Key, out FrameBuffer Fb)) + { + GL.BindTexture(TextureTarget.Texture2D, Fb.TexHandle); + + const int Level = 0; + const int Border = 0; + + const PixelInternalFormat InternalFmt = PixelInternalFormat.Rgba; + + (PixelFormat GlFormat, PixelType Type) = OGLEnumConverter.GetTextureFormat(Format); + + GL.TexImage2D( + TextureTarget.Texture2D, + Level, + InternalFmt, + Width, + Height, + Border, + GlFormat, + Type, + Buffer); + } } private void EnsureInitialized() diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine2d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine2d.cs index f150b3f5e..d2c5f1262 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine2d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine2d.cs @@ -1,6 +1,7 @@ using Ryujinx.Graphics.Gal; using Ryujinx.HLE.Gpu.Memory; using Ryujinx.HLE.Gpu.Texture; +using System; using System.Collections.Generic; namespace Ryujinx.HLE.Gpu.Engines @@ -64,6 +65,8 @@ namespace Ryujinx.HLE.Gpu.Engines bool SrcLinear = ReadRegister(NvGpuEngine2dReg.SrcLinear) != 0; int SrcWidth = ReadRegister(NvGpuEngine2dReg.SrcWidth); int SrcHeight = ReadRegister(NvGpuEngine2dReg.SrcHeight); + int SrcPitch = ReadRegister(NvGpuEngine2dReg.SrcPitch); + int SrcBlkDim = ReadRegister(NvGpuEngine2dReg.SrcBlockDimensions); bool DstLinear = ReadRegister(NvGpuEngine2dReg.DstLinear) != 0; int DstWidth = ReadRegister(NvGpuEngine2dReg.DstWidth); @@ -71,75 +74,114 @@ namespace Ryujinx.HLE.Gpu.Engines int DstPitch = ReadRegister(NvGpuEngine2dReg.DstPitch); int DstBlkDim = ReadRegister(NvGpuEngine2dReg.DstBlockDimensions); + TextureSwizzle SrcSwizzle = SrcLinear + ? TextureSwizzle.Pitch + : TextureSwizzle.BlockLinear; + TextureSwizzle DstSwizzle = DstLinear ? TextureSwizzle.Pitch : TextureSwizzle.BlockLinear; + int SrcBlockHeight = 1 << ((SrcBlkDim >> 4) & 0xf); int DstBlockHeight = 1 << ((DstBlkDim >> 4) & 0xf); - long Key = Vmm.GetPhysicalAddress(MakeInt64From2xInt32(NvGpuEngine2dReg.SrcAddress)); - long SrcAddress = MakeInt64From2xInt32(NvGpuEngine2dReg.SrcAddress); long DstAddress = MakeInt64From2xInt32(NvGpuEngine2dReg.DstAddress); - bool IsFbTexture = Gpu.Engine3d.IsFrameBufferPosition(Key); + long SrcKey = Vmm.GetPhysicalAddress(SrcAddress); + long DstKey = Vmm.GetPhysicalAddress(DstAddress); - if (IsFbTexture && DstLinear) + bool IsSrcFb = Gpu.Engine3d.IsFrameBufferPosition(SrcKey); + bool IsDstFb = Gpu.Engine3d.IsFrameBufferPosition(DstKey); + + TextureInfo SrcTexture() { - DstSwizzle = TextureSwizzle.BlockLinear; + return new TextureInfo( + SrcAddress, + SrcWidth, + SrcHeight, + SrcPitch, + SrcBlockHeight, 1, + SrcSwizzle, + GalTextureFormat.A8B8G8R8); } - TextureInfo DstTexture = new TextureInfo( - DstAddress, - DstWidth, - DstHeight, - DstBlockHeight, - DstBlockHeight, - DstSwizzle, - GalTextureFormat.A8B8G8R8); - - if (IsFbTexture) + TextureInfo DstTexture() { - //TODO: Change this when the correct frame buffer resolution is used. - //Currently, the frame buffer size is hardcoded to 1280x720. - SrcWidth = 1280; - SrcHeight = 720; + return new TextureInfo( + DstAddress, + DstWidth, + DstHeight, + DstPitch, + DstBlockHeight, 1, + DstSwizzle, + GalTextureFormat.A8B8G8R8); + } - Gpu.Renderer.FrameBuffer.GetBufferData(Key, (byte[] Buffer) => + //TODO: fb -> fb copies, tex -> fb copies, formats other than RGBA8, + //make it throw for unimpl stuff (like the copy mode)... + if (IsSrcFb && IsDstFb) + { + //Frame Buffer -> Frame Buffer copy. + Gpu.Renderer.FrameBuffer.Copy( + SrcKey, + DstKey, + 0, + 0, + SrcWidth, + SrcHeight, + 0, + 0, + DstWidth, + DstHeight); + } + if (IsSrcFb) + { + //Frame Buffer -> Texture copy. + Gpu.Renderer.FrameBuffer.GetBufferData(SrcKey, (byte[] Buffer) => { - CopyTexture( - Vmm, - DstTexture, - Buffer, - SrcWidth, - SrcHeight); + TextureInfo Src = SrcTexture(); + TextureInfo Dst = DstTexture(); + + if (Src.Width != Dst.Width || + Src.Height != Dst.Height) + { + throw new NotImplementedException("Texture resizing is not supported"); + } + + TextureWriter.Write(Vmm, Dst, Buffer); }); } + else if (IsDstFb) + { + //Texture -> Frame Buffer copy. + const GalTextureFormat Format = GalTextureFormat.A8B8G8R8; + + byte[] Buffer = TextureReader.Read(Vmm, SrcTexture()); + + Gpu.Renderer.FrameBuffer.SetBufferData( + DstKey, + DstWidth, + DstHeight, + Format, + Buffer); + } else { - long Size = SrcWidth * SrcHeight * 4; + //Texture -> Texture copy. + TextureInfo Src = SrcTexture(); + TextureInfo Dst = DstTexture(); - byte[] Buffer = Vmm.ReadBytes(SrcAddress, Size); + if (Src.Width != Dst.Width || + Src.Height != Dst.Height) + { + throw new NotImplementedException("Texture resizing is not supported"); + } - CopyTexture( - Vmm, - DstTexture, - Buffer, - SrcWidth, - SrcHeight); + TextureWriter.Write(Vmm, Dst, TextureReader.Read(Vmm, Src)); } } - private void CopyTexture( - NvGpuVmm Vmm, - TextureInfo Texture, - byte[] Buffer, - int Width, - int Height) - { - TextureWriter.Write(Vmm, Texture, Buffer, Width, Height); - } - private long MakeInt64From2xInt32(NvGpuEngine2dReg Reg) { return diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index 5c474ab0b..dce25a5e9 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -132,10 +132,22 @@ namespace Ryujinx.HLE.Gpu.Engines int Width = ReadRegister(NvGpuEngine3dReg.FrameBufferNWidth + FbIndex * 0x10); int Height = ReadRegister(NvGpuEngine3dReg.FrameBufferNHeight + FbIndex * 0x10); - //Note: Using the Width/Height results seems to give incorrect results. - //Maybe the size of all frame buffers is hardcoded to screen size? This seems unlikely. - Gpu.Renderer.FrameBuffer.Create(Key, 1280, 720); + float TX = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNTranslateX + FbIndex * 4); + float TY = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNTranslateY + FbIndex * 4); + + float SX = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNScaleX + FbIndex * 4); + float SY = ReadRegisterFloat(NvGpuEngine3dReg.ViewportNScaleY + FbIndex * 4); + + int VpX = (int)MathF.Max(0, TX - MathF.Abs(SX)); + int VpY = (int)MathF.Max(0, TY - MathF.Abs(SY)); + + int VpW = (int)(TX + MathF.Abs(SX)) - VpX; + int VpH = (int)(TY + MathF.Abs(SY)) - VpY; + + Gpu.Renderer.FrameBuffer.Create(Key, Width, Height); Gpu.Renderer.FrameBuffer.Bind(Key); + + Gpu.Renderer.FrameBuffer.SetViewport(VpX, VpY, VpW, VpH); } private long[] UploadShaders(NvGpuVmm Vmm) @@ -195,8 +207,8 @@ namespace Ryujinx.HLE.Gpu.Engines Gpu.Renderer.Shader.Bind(Key); } - float SignX = GetFlipSign(NvGpuEngine3dReg.ViewportScaleX); - float SignY = GetFlipSign(NvGpuEngine3dReg.ViewportScaleY); + float SignX = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleX); + float SignY = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleY); Gpu.Renderer.Shader.SetFlip(SignX, SignY); @@ -220,8 +232,8 @@ namespace Ryujinx.HLE.Gpu.Engines private void SetFrontFace() { - float SignX = GetFlipSign(NvGpuEngine3dReg.ViewportScaleX); - float SignY = GetFlipSign(NvGpuEngine3dReg.ViewportScaleY); + float SignX = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleX); + float SignY = GetFlipSign(NvGpuEngine3dReg.ViewportNScaleY); GalFrontFace FrontFace = (GalFrontFace)ReadRegister(NvGpuEngine3dReg.FrontFace); diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs index 3de2885ef..e7dabe44a 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3dReg.cs @@ -6,12 +6,14 @@ namespace Ryujinx.HLE.Gpu.Engines FrameBufferNWidth = 0x202, FrameBufferNHeight = 0x203, FrameBufferNFormat = 0x204, - ViewportScaleX = 0x280, - ViewportScaleY = 0x281, - ViewportScaleZ = 0x282, - ViewportTranslateX = 0x283, - ViewportTranslateY = 0x284, - ViewportTranslateZ = 0x285, + ViewportNScaleX = 0x280, + ViewportNScaleY = 0x281, + ViewportNScaleZ = 0x282, + ViewportNTranslateX = 0x283, + ViewportNTranslateY = 0x284, + ViewportNTranslateZ = 0x285, + ViewportNHoriz = 0x300, + ViewportNVert = 0x301, VertexArrayFirst = 0x35d, VertexArrayCount = 0x35e, ClearDepth = 0x364, diff --git a/Ryujinx.HLE/Gpu/Texture/TextureFactory.cs b/Ryujinx.HLE/Gpu/Texture/TextureFactory.cs index 9df0b6000..4db0b6f10 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureFactory.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureFactory.cs @@ -55,9 +55,11 @@ namespace Ryujinx.HLE.Gpu.Texture int Pitch = (Tic[3] & 0xffff) << 5; - int BlockHeightLog2 = (Tic[3] >> 3) & 7; + int BlockHeightLog2 = (Tic[3] >> 3) & 7; + int TileWidthLog2 = (Tic[3] >> 10) & 7; int BlockHeight = 1 << BlockHeightLog2; + int TileWidth = 1 << TileWidthLog2; int Width = (Tic[4] & 0xffff) + 1; int Height = (Tic[5] & 0xffff) + 1; @@ -68,6 +70,7 @@ namespace Ryujinx.HLE.Gpu.Texture Height, Pitch, BlockHeight, + TileWidth, Swizzle, Format); diff --git a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs index de26c397d..ecf2b6bf5 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureHelper.cs @@ -7,8 +7,14 @@ namespace Ryujinx.HLE.Gpu.Texture { static class TextureHelper { - public static ISwizzle GetSwizzle(TextureInfo Texture, int Width, int Bpp) + public static ISwizzle GetSwizzle(TextureInfo Texture, int BlockWidth, int Bpp) { + int Width = (Texture.Width + (BlockWidth - 1)) / BlockWidth; + + int AlignMask = Texture.TileWidth * (64 / Bpp) - 1; + + Width = (Width + AlignMask) & ~AlignMask; + switch (Texture.Swizzle) { case TextureSwizzle._1dBuffer: diff --git a/Ryujinx.HLE/Gpu/Texture/TextureInfo.cs b/Ryujinx.HLE/Gpu/Texture/TextureInfo.cs index 31784bbc5..2a98ce00f 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureInfo.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureInfo.cs @@ -11,6 +11,7 @@ namespace Ryujinx.HLE.Gpu.Texture public int Pitch { get; private set; } public int BlockHeight { get; private set; } + public int TileWidth { get; private set; } public TextureSwizzle Swizzle { get; private set; } @@ -29,6 +30,8 @@ namespace Ryujinx.HLE.Gpu.Texture BlockHeight = 16; + TileWidth = 1; + Swizzle = TextureSwizzle.BlockLinear; Format = GalTextureFormat.A8B8G8R8; @@ -40,16 +43,18 @@ namespace Ryujinx.HLE.Gpu.Texture int Height, int Pitch, int BlockHeight, + int TileWidth, TextureSwizzle Swizzle, GalTextureFormat Format) { - this.Position = Position; - this.Width = Width; - this.Height = Height; - this.Pitch = Pitch; - this.BlockHeight = BlockHeight; - this.Swizzle = Swizzle; - this.Format = Format; + this.Position = Position; + this.Width = Width; + this.Height = Height; + this.Pitch = Pitch; + this.BlockHeight = BlockHeight; + this.TileWidth = TileWidth; + this.Swizzle = Swizzle; + this.Format = Format; } } } \ No newline at end of file diff --git a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs index 26129877a..350ab825f 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureReader.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureReader.cs @@ -56,7 +56,7 @@ namespace Ryujinx.HLE.Gpu.Texture byte[] Output = new byte[Width * Height]; - ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 1); + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 1); (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( Memory, @@ -89,7 +89,7 @@ namespace Ryujinx.HLE.Gpu.Texture byte[] Output = new byte[Width * Height * 2]; - ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 2); + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 2); (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( Memory, @@ -127,7 +127,7 @@ namespace Ryujinx.HLE.Gpu.Texture byte[] Output = new byte[Width * Height * 2]; - ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 2); + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 2); (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( Memory, @@ -164,7 +164,7 @@ namespace Ryujinx.HLE.Gpu.Texture byte[] Output = new byte[Width * Height * 2]; - ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 2); + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 2); (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( Memory, @@ -197,7 +197,7 @@ namespace Ryujinx.HLE.Gpu.Texture byte[] Output = new byte[Width * Height * 4]; - ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 4); + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 4); (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( Memory, @@ -230,7 +230,7 @@ namespace Ryujinx.HLE.Gpu.Texture byte[] Output = new byte[Width * Height * 8]; - ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 8); + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 8); (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( Memory, @@ -263,7 +263,7 @@ namespace Ryujinx.HLE.Gpu.Texture byte[] Output = new byte[Width * Height * 16]; - ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 16); + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 16); (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( Memory, @@ -298,7 +298,7 @@ namespace Ryujinx.HLE.Gpu.Texture byte[] Output = new byte[Width * Height * 8]; - ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 8); + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 4, 8); (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( Memory, @@ -331,7 +331,7 @@ namespace Ryujinx.HLE.Gpu.Texture byte[] Output = new byte[Width * Height * 16]; - ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 16); + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, BlockWidth, 16); (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( Memory, diff --git a/Ryujinx.HLE/Gpu/Texture/TextureWriter.cs b/Ryujinx.HLE/Gpu/Texture/TextureWriter.cs index b64302a5a..a87d4545b 100644 --- a/Ryujinx.HLE/Gpu/Texture/TextureWriter.cs +++ b/Ryujinx.HLE/Gpu/Texture/TextureWriter.cs @@ -6,29 +6,9 @@ namespace Ryujinx.HLE.Gpu.Texture { static class TextureWriter { - public static void Write( - IAMemory Memory, - TextureInfo Texture, - byte[] Data, - int Width, - int Height) + public unsafe static void Write(IAMemory Memory, TextureInfo Texture, byte[] Data) { - switch (Texture.Format) - { - case GalTextureFormat.A8B8G8R8: Write4Bpp(Memory, Texture, Data, Width, Height); break; - - default: throw new NotImplementedException(Texture.Format.ToString()); - } - } - - private unsafe static void Write4Bpp( - IAMemory Memory, - TextureInfo Texture, - byte[] Data, - int Width, - int Height) - { - ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, Width, 4); + ISwizzle Swizzle = TextureHelper.GetSwizzle(Texture, 1, 4); (AMemory CpuMem, long Position) = TextureHelper.GetMemoryAndPosition( Memory, @@ -38,8 +18,8 @@ namespace Ryujinx.HLE.Gpu.Texture { long InOffs = 0; - for (int Y = 0; Y < Height; Y++) - for (int X = 0; X < Width; X++) + for (int Y = 0; Y < Texture.Height; Y++) + for (int X = 0; X < Texture.Width; X++) { long Offset = (uint)Swizzle.GetSwizzleOffset(X, Y); diff --git a/Ryujinx.HLE/OsHle/Services/Vi/NvFlinger.cs b/Ryujinx.HLE/OsHle/Services/Vi/NvFlinger.cs index a3ed3ab51..5307127be 100644 --- a/Ryujinx.HLE/OsHle/Services/Vi/NvFlinger.cs +++ b/Ryujinx.HLE/OsHle/Services/Vi/NvFlinger.cs @@ -279,8 +279,8 @@ namespace Ryujinx.HLE.OsHle.Services.Android private void SendFrameBuffer(ServiceCtx Context, int Slot) { - int FbWidth = 1280; - int FbHeight = 720; + int FbWidth = BufferQueue[Slot].Data.Width; + int FbHeight = BufferQueue[Slot].Data.Height; int NvMapHandle = BitConverter.ToInt32(BufferQueue[Slot].Data.RawData, 0x4c); int BufferOffset = BitConverter.ToInt32(BufferQueue[Slot].Data.RawData, 0x50); From bdb6cbb43514f6d8eb96847a22b70709ae705827 Mon Sep 17 00:00:00 2001 From: Merry Date: Thu, 19 Jul 2018 06:32:37 +0100 Subject: [PATCH 09/15] AOpCodeTable: Speed up instruction decoding (#284) --- ChocolArm64/AOpCodeTable.cs | 71 +++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index c69de38f5..689e03923 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -4,6 +4,7 @@ using ChocolArm64.Instruction; using ChocolArm64.Instruction32; using ChocolArm64.State; using System; +using System.Collections.Generic; namespace ChocolArm64 { @@ -438,18 +439,43 @@ namespace ChocolArm64 SetA64("0>001110<<0xxxxx001110xxxxxxxxxx", AInstEmit.Zip1_V, typeof(AOpCodeSimdReg)); SetA64("0>001110<<0xxxxx011110xxxxxxxxxx", AInstEmit.Zip2_V, typeof(AOpCodeSimdReg)); #endregion + +#region "Generate InstA64FastLookup Table (AArch64)" + var Tmp = new List[FastLookupSize]; + for (int i = 0; i < FastLookupSize; i++) + { + Tmp[i] = new List(); + } + + foreach (var Inst in AllInstA64) + { + int Mask = ToFastLookupIndex(Inst.Mask); + int Value = ToFastLookupIndex(Inst.Value); + + for (int i = 0; i < FastLookupSize; i++) + { + if ((i & Mask) == Value) + { + Tmp[i].Add(Inst); + } + } + } + + for (int i = 0; i < FastLookupSize; i++) + { + InstA64FastLookup[i] = Tmp[i].ToArray(); + } +#endregion } - private class TreeNode + private class InstInfo { public int Mask; public int Value; - public TreeNode Next; - public AInst Inst; - public TreeNode(int Mask, int Value, AInst Inst) + public InstInfo(int Mask, int Value, AInst Inst) { this.Mask = Mask; this.Value = Value; @@ -457,8 +483,11 @@ namespace ChocolArm64 } } - private static TreeNode InstHeadA32; - private static TreeNode InstHeadA64; + private static List AllInstA32 = new List(); + private static List AllInstA64 = new List(); + + private static int FastLookupSize = 0x1000; + private static InstInfo[][] InstA64FastLookup = new InstInfo[FastLookupSize][]; private static void SetA32(string Encoding, AInstInterpreter Interpreter, Type Type) { @@ -519,7 +548,7 @@ namespace ChocolArm64 if (XBits == 0) { - InsertTop(XMask, Value, Inst, Mode); + InsertInst(XMask, Value, Inst, Mode); return; } @@ -535,55 +564,53 @@ namespace ChocolArm64 if (Mask != Blacklisted) { - InsertTop(XMask, Value | Mask, Inst, Mode); + InsertInst(XMask, Value | Mask, Inst, Mode); } } } - private static void InsertTop( + private static void InsertInst( int XMask, int Value, AInst Inst, AExecutionMode Mode) { - TreeNode Node = new TreeNode(XMask, Value, Inst); + InstInfo Info = new InstInfo(XMask, Value, Inst); if (Mode == AExecutionMode.AArch64) { - Node.Next = InstHeadA64; - - InstHeadA64 = Node; + AllInstA64.Add(Info); } else { - Node.Next = InstHeadA32; - - InstHeadA32 = Node; + AllInstA32.Add(Info); } } public static AInst GetInstA32(int OpCode) { - return GetInst(InstHeadA32, OpCode); + return GetInstFromList(AllInstA32, OpCode); } public static AInst GetInstA64(int OpCode) { - return GetInst(InstHeadA64, OpCode); + return GetInstFromList(InstA64FastLookup[ToFastLookupIndex(OpCode)], OpCode); } - private static AInst GetInst(TreeNode Head, int OpCode) + private static int ToFastLookupIndex(int Value) { - TreeNode Node = Head; + return ((Value >> 10) & 0x00F) | ((Value >> 18) & 0xFF0); + } - do + private static AInst GetInstFromList(IEnumerable InstList, int OpCode) + { + foreach (var Node in InstList) { if ((OpCode & Node.Mask) == Node.Value) { return Node.Inst; } } - while ((Node = Node.Next) != null); return AInst.Undefined; } From cd203e98f2bed076798972da1d108bb64b1884ec Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 19 Jul 2018 02:33:27 -0300 Subject: [PATCH 10/15] Implement Geometry shaders (#280) * Implement Geometry shaders * Add EmitVertex() and EndPrimitive() * Read output geometry data from header * Stub Vmad * Add Iadd_I32 * Stub Mov_S (S2R) * Stub Isberd * Change vertex index to gpr39 in Abuf * Add stub messages for consistency * Do not print input block when there is no attributes * Use GL_ARB_enhanced_layouts * Skip geometry shaders when there's no GL_ARB_enhanced_layouts * Address feedback * Address feedback --- Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs | 43 +++++ Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs | 22 ++- Ryujinx.Graphics/Gal/Shader/GlslDecl.cs | 7 +- Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs | 156 ++++++++++++++++-- .../Gal/Shader/ShaderDecodeAlu.cs | 37 +++++ .../Gal/Shader/ShaderDecodeHelper.cs | 8 +- .../Gal/Shader/ShaderDecodeMem.cs | 3 + .../Gal/Shader/ShaderDecodeMove.cs | 20 +++ .../Gal/Shader/ShaderDecodeSpecial.cs | 29 ++++ Ryujinx.Graphics/Gal/Shader/ShaderDecoder.cs | 12 +- Ryujinx.Graphics/Gal/Shader/ShaderHeader.cs | 73 ++++++++ Ryujinx.Graphics/Gal/Shader/ShaderIrInst.cs | 5 +- .../Gal/Shader/ShaderIrOperAbuf.cs | 11 +- .../Gal/Shader/ShaderOpCodeTable.cs | 4 + Ryujinx.Graphics/Gal/ShaderDumper.cs | 44 ++++- 15 files changed, 426 insertions(+), 48 deletions(-) create mode 100644 Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs create mode 100644 Ryujinx.Graphics/Gal/Shader/ShaderDecodeSpecial.cs create mode 100644 Ryujinx.Graphics/Gal/Shader/ShaderHeader.cs diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs new file mode 100644 index 000000000..69fce6d31 --- /dev/null +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLExtension.cs @@ -0,0 +1,43 @@ +using OpenTK.Graphics.OpenGL; + +namespace Ryujinx.Graphics.Gal.OpenGL +{ + static class OGLExtension + { + private static bool Initialized = false; + + private static bool EnhancedLayouts; + + public static bool HasEnhancedLayouts() + { + EnsureInitialized(); + + return EnhancedLayouts; + } + + private static void EnsureInitialized() + { + if (Initialized) + { + return; + } + + EnhancedLayouts = HasExtension("GL_ARB_enhanced_layouts"); + } + + private static bool HasExtension(string Name) + { + int NumExtensions = GL.GetInteger(GetPName.NumExtensions); + + for (int Extension = 0; Extension < NumExtensions; Extension++) + { + if (GL.GetString(StringNameIndexed.Extensions, Extension) == Name) + { + return true; + } + } + + return false; + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs index ad7177550..fe98aa091 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs @@ -118,20 +118,20 @@ namespace Ryujinx.Graphics.Gal.OpenGL if (IsDualVp) { - ShaderDumper.Dump(Memory, Position + 0x50, Type, "a"); - ShaderDumper.Dump(Memory, PositionB + 0x50, Type, "b"); + ShaderDumper.Dump(Memory, Position, Type, "a"); + ShaderDumper.Dump(Memory, PositionB, Type, "b"); Program = Decompiler.Decompile( Memory, - Position + 0x50, - PositionB + 0x50, + Position, + PositionB, Type); } else { - ShaderDumper.Dump(Memory, Position + 0x50, Type); + ShaderDumper.Dump(Memory, Position, Type); - Program = Decompiler.Decompile(Memory, Position + 0x50, Type); + Program = Decompiler.Decompile(Memory, Position, Type); } return new ShaderStage( @@ -198,6 +198,16 @@ namespace Ryujinx.Graphics.Gal.OpenGL private void Bind(ShaderStage Stage) { + if (Stage.Type == GalShaderType.Geometry) + { + //Enhanced layouts are required for Geometry shaders + //skip this stage if current driver has no ARB_enhanced_layouts + if (!OGLExtension.HasEnhancedLayouts()) + { + return; + } + } + switch (Stage.Type) { case GalShaderType.Vertex: Current.Vertex = Stage; break; diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs index d3284f9f5..7688545c0 100644 --- a/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs +++ b/Ryujinx.Graphics/Gal/Shader/GlslDecl.cs @@ -4,13 +4,13 @@ namespace Ryujinx.Graphics.Gal.Shader { class GlslDecl { + public const int LayerAttr = 0x064; public const int TessCoordAttrX = 0x2f0; public const int TessCoordAttrY = 0x2f4; public const int TessCoordAttrZ = 0x2f8; public const int InstanceIdAttr = 0x2f8; public const int VertexIdAttr = 0x2fc; public const int FaceAttr = 0x3fc; - public const int GlPositionWAttr = 0x7c; public const int MaxUboSize = 1024; @@ -210,7 +210,8 @@ namespace Ryujinx.Graphics.Gal.Shader //This is a built-in input variable. if (Abuf.Offs == VertexIdAttr || Abuf.Offs == InstanceIdAttr || - Abuf.Offs == FaceAttr) + Abuf.Offs == FaceAttr || + Abuf.Offs == LayerAttr) { break; } @@ -254,6 +255,8 @@ namespace Ryujinx.Graphics.Gal.Shader m_Attributes.Add(Index, DeclInfo); } + + Traverse(Abuf, Abuf.Vertex); break; } diff --git a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs index 575fb72f9..a338f4041 100644 --- a/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs +++ b/Ryujinx.Graphics/Gal/Shader/GlslDecompiler.cs @@ -21,10 +21,14 @@ namespace Ryujinx.Graphics.Gal.Shader private const string IdentationStr = " "; + private const int MaxVertexInput = 3; + private static string[] ElemTypes = new string[] { "float", "vec2", "vec3", "vec4" }; private GlslDecl Decl; + private ShaderHeader Header, HeaderB; + private ShaderIrBlock[] Blocks, BlocksB; private StringBuilder SB; @@ -50,6 +54,7 @@ namespace Ryujinx.Graphics.Gal.Shader { ShaderIrInst.Cle, GetCleExpr }, { ShaderIrInst.Clt, GetCltExpr }, { ShaderIrInst.Cne, GetCneExpr }, + { ShaderIrInst.Cut, GetCutExpr }, { ShaderIrInst.Exit, GetExitExpr }, { ShaderIrInst.Fabs, GetAbsExpr }, { ShaderIrInst.Fadd, GetAddExpr }, @@ -110,6 +115,9 @@ namespace Ryujinx.Graphics.Gal.Shader long VpBPosition, GalShaderType ShaderType) { + Header = new ShaderHeader(Memory, VpAPosition); + HeaderB = new ShaderHeader(Memory, VpBPosition); + Blocks = ShaderDecoder.Decode(Memory, VpAPosition); BlocksB = ShaderDecoder.Decode(Memory, VpBPosition); @@ -123,6 +131,9 @@ namespace Ryujinx.Graphics.Gal.Shader public GlslProgram Decompile(IGalMemory Memory, long Position, GalShaderType ShaderType) { + Header = new ShaderHeader(Memory, Position); + HeaderB = null; + Blocks = ShaderDecoder.Decode(Memory, Position); BlocksB = null; @@ -137,6 +148,7 @@ namespace Ryujinx.Graphics.Gal.Shader SB.AppendLine("#version 410 core"); + PrintDeclHeader(); PrintDeclTextures(); PrintDeclUniforms(); PrintDeclAttributes(); @@ -170,6 +182,37 @@ namespace Ryujinx.Graphics.Gal.Shader Decl.Uniforms.Values); } + private void PrintDeclHeader() + { + if (Decl.ShaderType == GalShaderType.Geometry) + { + int MaxVertices = Header.MaxOutputVertexCount; + + string OutputTopology; + + switch (Header.OutputTopology) + { + case ShaderHeader.PointList: OutputTopology = "points"; break; + case ShaderHeader.LineStrip: OutputTopology = "line_strip"; break; + case ShaderHeader.TriangleStrip: OutputTopology = "triangle_strip"; break; + + default: throw new InvalidOperationException(); + } + + SB.AppendLine("#extension GL_ARB_enhanced_layouts : require"); + + SB.AppendLine(); + + SB.AppendLine("// Stubbed. Maxwell geometry shaders don't inform input geometry type"); + + SB.AppendLine("layout(triangles) in;" + Environment.NewLine); + + SB.AppendLine($"layout({OutputTopology}, max_vertices = {MaxVertices}) out;"); + + SB.AppendLine(); + } + } + private void PrintDeclTextures() { PrintDecls(Decl.Textures, "uniform sampler2D"); @@ -201,7 +244,9 @@ namespace Ryujinx.Graphics.Gal.Shader private void PrintDeclAttributes() { - PrintDecls(Decl.Attributes); + string GeometryArray = (Decl.ShaderType == GalShaderType.Geometry) ? "[" + MaxVertexInput + "]" : ""; + + PrintDecls(Decl.Attributes, Suffix: GeometryArray); } private void PrintDeclInAttributes() @@ -211,7 +256,27 @@ namespace Ryujinx.Graphics.Gal.Shader SB.AppendLine("layout (location = " + GlslDecl.PositionOutAttrLocation + ") in vec4 " + GlslDecl.PositionOutAttrName + ";"); } - PrintDeclAttributes(Decl.InAttributes.Values, "in"); + if (Decl.ShaderType == GalShaderType.Geometry) + { + if (Decl.InAttributes.Count > 0) + { + SB.AppendLine("in Vertex {"); + + foreach (ShaderDeclInfo DeclInfo in Decl.InAttributes.Values.OrderBy(DeclKeySelector)) + { + if (DeclInfo.Index >= 0) + { + SB.AppendLine(IdentationStr + "layout (location = " + DeclInfo.Index + ") " + GetDecl(DeclInfo) + "; "); + } + } + + SB.AppendLine("} block_in[];" + Environment.NewLine); + } + } + else + { + PrintDeclAttributes(Decl.InAttributes.Values, "in"); + } } private void PrintDeclOutAttributes() @@ -254,7 +319,7 @@ namespace Ryujinx.Graphics.Gal.Shader PrintDecls(Decl.Preds, "bool"); } - private void PrintDecls(IReadOnlyDictionary Dict, string CustomType = null) + private void PrintDecls(IReadOnlyDictionary Dict, string CustomType = null, string Suffix = "") { foreach (ShaderDeclInfo DeclInfo in Dict.Values.OrderBy(DeclKeySelector)) { @@ -262,15 +327,15 @@ namespace Ryujinx.Graphics.Gal.Shader if (CustomType != null) { - Name = CustomType + " " + DeclInfo.Name + ";"; + Name = CustomType + " " + DeclInfo.Name + Suffix + ";"; } else if (DeclInfo.Name == GlslDecl.FragmentOutputName) { - Name = "layout (location = 0) out " + GetDecl(DeclInfo) + ";" + Environment.NewLine; + Name = "layout (location = 0) out " + GetDecl(DeclInfo) + Suffix + ";" + Environment.NewLine; } else { - Name = GetDecl(DeclInfo) + ";"; + Name = GetDecl(DeclInfo) + Suffix + ";"; } SB.AppendLine(Name); @@ -307,7 +372,21 @@ namespace Ryujinx.Graphics.Gal.Shader string Swizzle = ".xyzw".Substring(0, DeclInfo.Size + 1); - SB.AppendLine(IdentationStr + Attr.Name + Swizzle + " = " + DeclInfo.Name + ";"); + if (Decl.ShaderType == GalShaderType.Geometry) + { + for (int Vertex = 0; Vertex < MaxVertexInput; Vertex++) + { + string Dst = Attr.Name + "[" + Vertex + "]" + Swizzle; + + string Src = "block_in[" + Vertex + "]." + DeclInfo.Name; + + SB.AppendLine(IdentationStr + Dst + " = " + Src + ";"); + } + } + else + { + SB.AppendLine(IdentationStr + Attr.Name + Swizzle + " = " + DeclInfo.Name + ";"); + } } if (BlocksB != null) @@ -320,6 +399,16 @@ namespace Ryujinx.Graphics.Gal.Shader SB.AppendLine(IdentationStr + GlslDecl.ProgramName + "();"); } + if (Decl.ShaderType != GalShaderType.Geometry) + { + PrintAttrToOutput(); + } + + SB.AppendLine("}"); + } + + private void PrintAttrToOutput(string Identation = IdentationStr) + { foreach (KeyValuePair KV in Decl.OutAttributes) { if (!Decl.Attributes.TryGetValue(KV.Key, out ShaderDeclInfo Attr)) @@ -331,21 +420,26 @@ namespace Ryujinx.Graphics.Gal.Shader string Swizzle = ".xyzw".Substring(0, DeclInfo.Size + 1); - SB.AppendLine(IdentationStr + DeclInfo.Name + " = " + Attr.Name + Swizzle + ";"); + string Name = Attr.Name; + + if (Decl.ShaderType == GalShaderType.Geometry) + { + Name += "[0]"; + } + + SB.AppendLine(Identation + DeclInfo.Name + " = " + Name + Swizzle + ";"); } if (Decl.ShaderType == GalShaderType.Vertex) { - SB.AppendLine(IdentationStr + "gl_Position.xy *= " + GlslDecl.FlipUniformName + ";"); + SB.AppendLine(Identation + "gl_Position.xy *= " + GlslDecl.FlipUniformName + ";"); } if (Decl.ShaderType != GalShaderType.Fragment) { - SB.AppendLine(IdentationStr + GlslDecl.PositionOutAttrName + " = gl_Position;"); - SB.AppendLine(IdentationStr + GlslDecl.PositionOutAttrName + ".w = 1;"); + SB.AppendLine(Identation + GlslDecl.PositionOutAttrName + " = gl_Position;"); + SB.AppendLine(Identation + GlslDecl.PositionOutAttrName + ".w = 1;"); } - - SB.AppendLine("}"); } private void PrintBlockScope( @@ -484,11 +578,17 @@ namespace Ryujinx.Graphics.Gal.Shader { SB.AppendLine(Identation + "continue;"); } - - continue; } + else if (Op.Inst == ShaderIrInst.Emit) + { + PrintAttrToOutput(Identation); - SB.AppendLine(Identation + GetSrcExpr(Op, true) + ";"); + SB.AppendLine(Identation + "EmitVertex();"); + } + else + { + SB.AppendLine(Identation + GetSrcExpr(Op, true) + ";"); + } } else if (Node is ShaderIrCmnt Cmnt) { @@ -634,6 +734,14 @@ namespace Ryujinx.Graphics.Gal.Shader private string GetOutAbufName(ShaderIrOperAbuf Abuf) { + if (Decl.ShaderType == GalShaderType.Geometry) + { + switch (Abuf.Offs) + { + case GlslDecl.LayerAttr: return "gl_Layer"; + } + } + return GetAttrTempName(Abuf); } @@ -692,7 +800,16 @@ namespace Ryujinx.Graphics.Gal.Shader throw new InvalidOperationException(); } - return DeclInfo.Name + Swizzle; + if (Decl.ShaderType == GalShaderType.Geometry) + { + string Vertex = "floatBitsToInt(" + GetSrcExpr(Abuf.Vertex) + ")"; + + return DeclInfo.Name + "[" + Vertex + "]" + Swizzle; + } + else + { + return DeclInfo.Name + Swizzle; + } } private string GetName(ShaderIrOperGpr Gpr) @@ -805,6 +922,8 @@ namespace Ryujinx.Graphics.Gal.Shader private string GetCneExpr(ShaderIrOp Op) => GetBinaryExpr(Op, "!="); + private string GetCutExpr(ShaderIrOp Op) => "EndPrimitive()"; + private string GetCneuExpr(ShaderIrOp Op) => GetBinaryExprWithNaN(Op, "!="); private string GetCnumExpr(ShaderIrOp Op) => GetUnaryCall(Op, "!isnan"); @@ -1104,8 +1223,9 @@ namespace Ryujinx.Graphics.Gal.Shader switch (Node) { case ShaderIrOperAbuf Abuf: - return Abuf.Offs == GlslDecl.VertexIdAttr || + return Abuf.Offs == GlslDecl.LayerAttr || Abuf.Offs == GlslDecl.InstanceIdAttr || + Abuf.Offs == GlslDecl.VertexIdAttr || Abuf.Offs == GlslDecl.FaceAttr ? OperType.I32 : OperType.F32; diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs index a44073513..00f072f19 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeAlu.cs @@ -442,6 +442,41 @@ namespace Ryujinx.Graphics.Gal.Shader return Signed ? ShaderIrInst.Asr : ShaderIrInst.Lsr; } + public static void Vmad(ShaderIrBlock Block, long OpCode) + { + ShaderIrNode OperA = GetOperGpr8(OpCode); + + ShaderIrNode OperB; + + if (((OpCode >> 50) & 1) != 0) + { + OperB = GetOperGpr20(OpCode); + } + else + { + OperB = GetOperImm19_20(OpCode); + } + + ShaderIrOperGpr OperC = GetOperGpr39(OpCode); + + ShaderIrNode Tmp = new ShaderIrOp(ShaderIrInst.Mul, OperA, OperB); + + ShaderIrNode Final = new ShaderIrOp(ShaderIrInst.Add, Tmp, OperC); + + int Shr = (int)((OpCode >> 51) & 3); + + if (Shr != 0) + { + int Shift = (Shr == 2) ? 15 : 7; + + Final = new ShaderIrOp(ShaderIrInst.Lsr, Final, new ShaderIrOperImm(Shift)); + } + + Block.AddNode(new ShaderIrCmnt("Stubbed. Instruction is reduced to a * b + c")); + + Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Final), OpCode)); + } + public static void Xmad_CR(ShaderIrBlock Block, long OpCode) { EmitXmad(Block, OpCode, ShaderOper.CR); @@ -819,6 +854,8 @@ namespace Ryujinx.Graphics.Gal.Shader OperA = GetAluFabsFneg(OperA, AbsA, NegA); + Block.AddNode(new ShaderIrCmnt("Stubbed.")); + Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), OperA), OpCode)); } diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeHelper.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeHelper.cs index 1f1b158ef..7d7b2f6c6 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeHelper.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeHelper.cs @@ -7,14 +7,15 @@ namespace Ryujinx.Graphics.Gal.Shader public static ShaderIrOperAbuf[] GetOperAbuf20(long OpCode) { int Abuf = (int)(OpCode >> 20) & 0x3ff; - int Reg = (int)(OpCode >> 39) & 0xff; int Size = (int)(OpCode >> 47) & 3; + ShaderIrOperGpr Vertex = GetOperGpr39(OpCode); + ShaderIrOperAbuf[] Opers = new ShaderIrOperAbuf[Size + 1]; for (int Index = 0; Index <= Size; Index++) { - Opers[Index] = new ShaderIrOperAbuf(Abuf + Index * 4, Reg); + Opers[Index] = new ShaderIrOperAbuf(Abuf + Index * 4, Vertex); } return Opers; @@ -23,9 +24,8 @@ namespace Ryujinx.Graphics.Gal.Shader public static ShaderIrOperAbuf GetOperAbuf28(long OpCode) { int Abuf = (int)(OpCode >> 28) & 0x3ff; - int Reg = (int)(OpCode >> 39) & 0xff; - return new ShaderIrOperAbuf(Abuf, Reg); + return new ShaderIrOperAbuf(Abuf, GetOperGpr39(OpCode)); } public static ShaderIrOperCbuf GetOperCbuf34(long OpCode) diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs index 083b0c63a..aea7e744d 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMem.cs @@ -35,6 +35,9 @@ namespace Ryujinx.Graphics.Gal.Shader { ShaderIrNode[] Opers = GetOperAbuf20(OpCode); + //Used by GS + ShaderIrOperGpr Vertex = GetOperGpr39(OpCode); + int Index = 0; foreach (ShaderIrNode OperA in Opers) diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMove.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMove.cs index 4c9e59cf0..c6b71fb01 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMove.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeMove.cs @@ -85,6 +85,16 @@ namespace Ryujinx.Graphics.Gal.Shader EmitI2i(Block, OpCode, ShaderOper.RR); } + public static void Isberd(ShaderIrBlock Block, long OpCode) + { + //This instruction seems to be used to translate from an address to a vertex index in a GS + //Stub it as such + + Block.AddNode(new ShaderIrCmnt("Stubbed.")); + + Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), GetOperGpr8(OpCode)), OpCode)); + } + public static void Mov_C(ShaderIrBlock Block, long OpCode) { ShaderIrOperCbuf Cbuf = GetOperCbuf34(OpCode); @@ -128,6 +138,16 @@ namespace Ryujinx.Graphics.Gal.Shader EmitSel(Block, OpCode, ShaderOper.RR); } + public static void Mov_S(ShaderIrBlock Block, long OpCode) + { + Block.AddNode(new ShaderIrCmnt("Stubbed.")); + + //Zero is used as a special number to get a valid "0 * 0 + VertexIndex" in a GS + ShaderIrNode Source = new ShaderIrOperImm(0); + + Block.AddNode(GetPredNode(new ShaderIrAsg(GetOperGpr0(OpCode), Source), OpCode)); + } + private static void EmitF2f(ShaderIrBlock Block, long OpCode, ShaderOper Oper) { bool NegA = ((OpCode >> 45) & 1) != 0; diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecodeSpecial.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeSpecial.cs new file mode 100644 index 000000000..591631ff9 --- /dev/null +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecodeSpecial.cs @@ -0,0 +1,29 @@ +using System; + +using static Ryujinx.Graphics.Gal.Shader.ShaderDecodeHelper; + +namespace Ryujinx.Graphics.Gal.Shader +{ + static partial class ShaderDecode + { + public static void Out_R(ShaderIrBlock Block, long OpCode) + { + //TODO: Those registers have to be used for something + ShaderIrOperGpr Gpr0 = GetOperGpr0(OpCode); + ShaderIrOperGpr Gpr8 = GetOperGpr8(OpCode); + ShaderIrOperGpr Gpr20 = GetOperGpr20(OpCode); + + int Type = (int)((OpCode >> 39) & 3); + + if ((Type & 1) != 0) + { + Block.AddNode(GetPredNode(new ShaderIrOp(ShaderIrInst.Emit), OpCode)); + } + + if ((Type & 2) != 0) + { + Block.AddNode(GetPredNode(new ShaderIrOp(ShaderIrInst.Cut), OpCode)); + } + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderDecoder.cs b/Ryujinx.Graphics/Gal/Shader/ShaderDecoder.cs index 85522ff95..98f371b57 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderDecoder.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderDecoder.cs @@ -4,6 +4,8 @@ namespace Ryujinx.Graphics.Gal.Shader { static class ShaderDecoder { + private const long HeaderSize = 0x50; + private const bool AddDbgComments = true; public static ShaderIrBlock[] Decode(IGalMemory Memory, long Start) @@ -32,13 +34,13 @@ namespace Ryujinx.Graphics.Gal.Shader return Output; } - ShaderIrBlock Entry = Enqueue(Start); + ShaderIrBlock Entry = Enqueue(Start + HeaderSize); while (Blocks.Count > 0) { ShaderIrBlock Current = Blocks.Dequeue(); - FillBlock(Memory, Current); + FillBlock(Memory, Current, Start + HeaderSize); //Set child blocks. "Branch" is the block the branch instruction //points to (when taken), "Next" is the block at the next address, @@ -122,14 +124,14 @@ namespace Ryujinx.Graphics.Gal.Shader return Graph; } - private static void FillBlock(IGalMemory Memory, ShaderIrBlock Block) + private static void FillBlock(IGalMemory Memory, ShaderIrBlock Block, long Beginning) { long Position = Block.Position; do { //Ignore scheduling instructions, which are written every 32 bytes. - if ((Position & 0x1f) == 0) + if (((Position - Beginning) & 0x1f) == 0) { Position += 8; @@ -147,7 +149,7 @@ namespace Ryujinx.Graphics.Gal.Shader if (AddDbgComments) { - string DbgOpCode = $"0x{(Position - 8):x16}: 0x{OpCode:x16} "; + string DbgOpCode = $"0x{(Position - Beginning - 8):x16}: 0x{OpCode:x16} "; DbgOpCode += (Decode?.Method.Name ?? "???"); diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderHeader.cs b/Ryujinx.Graphics/Gal/Shader/ShaderHeader.cs new file mode 100644 index 000000000..8e5057ed9 --- /dev/null +++ b/Ryujinx.Graphics/Gal/Shader/ShaderHeader.cs @@ -0,0 +1,73 @@ +namespace Ryujinx.Graphics.Gal.Shader +{ + class ShaderHeader + { + public const int PointList = 1; + public const int LineStrip = 6; + public const int TriangleStrip = 7; + + public int SphType { get; private set; } + public int Version { get; private set; } + public int ShaderType { get; private set; } + public bool MrtEnable { get; private set; } + public bool KillsPixels { get; private set; } + public bool DoesGlobalStore { get; private set; } + public int SassVersion { get; private set; } + public bool DoesLoadOrStore { get; private set; } + public bool DoesFp64 { get; private set; } + public int StreamOutMask { get; private set; } + + public int ShaderLocalMemoryLowSize { get; private set; } + public int PerPatchAttributeCount { get; private set; } + + public int ShaderLocalMemoryHighSize { get; private set; } + public int ThreadsPerInputPrimitive { get; private set; } + + public int ShaderLocalMemoryCrsSize { get; private set; } + public int OutputTopology { get; private set; } + + public int MaxOutputVertexCount { get; private set; } + public int StoreReqStart { get; private set; } + public int StoreReqEnd { get; private set; } + + public ShaderHeader(IGalMemory Memory, long Position) + { + uint CommonWord0 = (uint)Memory.ReadInt32(Position + 0); + uint CommonWord1 = (uint)Memory.ReadInt32(Position + 4); + uint CommonWord2 = (uint)Memory.ReadInt32(Position + 8); + uint CommonWord3 = (uint)Memory.ReadInt32(Position + 12); + uint CommonWord4 = (uint)Memory.ReadInt32(Position + 16); + + SphType = ReadBits(CommonWord0, 0, 5); + Version = ReadBits(CommonWord0, 5, 5); + ShaderType = ReadBits(CommonWord0, 10, 4); + MrtEnable = ReadBits(CommonWord0, 14, 1) != 0; + KillsPixels = ReadBits(CommonWord0, 15, 1) != 0; + DoesGlobalStore = ReadBits(CommonWord0, 16, 1) != 0; + SassVersion = ReadBits(CommonWord0, 17, 4); + DoesLoadOrStore = ReadBits(CommonWord0, 26, 1) != 0; + DoesFp64 = ReadBits(CommonWord0, 27, 1) != 0; + StreamOutMask = ReadBits(CommonWord0, 28, 4); + + ShaderLocalMemoryLowSize = ReadBits(CommonWord1, 0, 24); + PerPatchAttributeCount = ReadBits(CommonWord1, 24, 8); + + ShaderLocalMemoryHighSize = ReadBits(CommonWord2, 0, 24); + ThreadsPerInputPrimitive = ReadBits(CommonWord2, 24, 8); + + ShaderLocalMemoryCrsSize = ReadBits(CommonWord3, 0, 24); + OutputTopology = ReadBits(CommonWord3, 24, 4); + + MaxOutputVertexCount = ReadBits(CommonWord4, 0, 12); + StoreReqStart = ReadBits(CommonWord4, 12, 8); + StoreReqEnd = ReadBits(CommonWord4, 24, 8); + } + + private static int ReadBits(uint Word, int Offset, int BitWidth) + { + uint Mask = (1u << BitWidth) - 1u; + + return (int)((Word >> Offset) & Mask); + } + } +} \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderIrInst.cs b/Ryujinx.Graphics/Gal/Shader/ShaderIrInst.cs index 9841f58ff..fd86cadb1 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderIrInst.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderIrInst.cs @@ -82,6 +82,9 @@ namespace Ryujinx.Graphics.Gal.Shader Bra, Exit, - Kil + Kil, + + Emit, + Cut } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderIrOperAbuf.cs b/Ryujinx.Graphics/Gal/Shader/ShaderIrOperAbuf.cs index fa612de76..f17d9c0e6 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderIrOperAbuf.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderIrOperAbuf.cs @@ -2,13 +2,14 @@ namespace Ryujinx.Graphics.Gal.Shader { class ShaderIrOperAbuf : ShaderIrNode { - public int Offs { get; private set; } - public int GprIndex { get; private set; } + public int Offs { get; private set; } - public ShaderIrOperAbuf(int Offs, int GprIndex) + public ShaderIrNode Vertex { get; private set; } + + public ShaderIrOperAbuf(int Offs, ShaderIrNode Vertex) { - this.Offs = Offs; - this.GprIndex = GprIndex; + this.Offs = Offs; + this.Vertex = Vertex; } } } \ No newline at end of file diff --git a/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs b/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs index 1ac117851..3f20dc446 100644 --- a/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs +++ b/Ryujinx.Graphics/Gal/Shader/ShaderOpCodeTable.cs @@ -74,6 +74,7 @@ namespace Ryujinx.Graphics.Gal.Shader Set("0100110000100x", ShaderDecode.Imnmx_C); Set("0011100x00100x", ShaderDecode.Imnmx_I); Set("0101110000100x", ShaderDecode.Imnmx_R); + Set("1110111111010x", ShaderDecode.Isberd); Set("11100000xxxxxx", ShaderDecode.Ipa); Set("0100110000011x", ShaderDecode.Iscadd_C); Set("0011100x00011x", ShaderDecode.Iscadd_I); @@ -95,7 +96,9 @@ namespace Ryujinx.Graphics.Gal.Shader Set("0011100x10011x", ShaderDecode.Mov_I); Set("000000010000xx", ShaderDecode.Mov_I32); Set("0101110010011x", ShaderDecode.Mov_R); + Set("1111000011001x", ShaderDecode.Mov_S); Set("0101000010000x", ShaderDecode.Mufu); + Set("1111101111100x", ShaderDecode.Out_R); Set("0101000010010x", ShaderDecode.Psetp); Set("0100110010010x", ShaderDecode.Rro_C); Set("0011100x10010x", ShaderDecode.Rro_I); @@ -114,6 +117,7 @@ namespace Ryujinx.Graphics.Gal.Shader Set("1101111101001x", ShaderDecode.Texq); Set("1101100xxxxxxx", ShaderDecode.Texs); Set("1101101xxxxxxx", ShaderDecode.Tlds); + Set("01011111xxxxxx", ShaderDecode.Vmad); Set("0100111xxxxxxx", ShaderDecode.Xmad_CR); Set("0011011x00xxxx", ShaderDecode.Xmad_I); Set("010100010xxxxx", ShaderDecode.Xmad_RC); diff --git a/Ryujinx.Graphics/Gal/ShaderDumper.cs b/Ryujinx.Graphics/Gal/ShaderDumper.cs index 7cd56b21e..541368e89 100644 --- a/Ryujinx.Graphics/Gal/ShaderDumper.cs +++ b/Ryujinx.Graphics/Gal/ShaderDumper.cs @@ -18,13 +18,21 @@ namespace Ryujinx.Graphics.Gal string FileName = "Shader" + DumpIndex.ToString("d4") + "." + ShaderExtension(Type) + ExtSuffix + ".bin"; - string FilePath = Path.Combine(DumpDir(), FileName); + string FullPath = Path.Combine(FullDir(), FileName); + string CodePath = Path.Combine(CodeDir(), FileName); DumpIndex++; - using (FileStream Output = File.Create(FilePath)) - using (BinaryWriter Writer = new BinaryWriter(Output)) + using (FileStream FullFile = File.Create(FullPath)) + using (FileStream CodeFile = File.Create(CodePath)) + using (BinaryWriter FullWriter = new BinaryWriter(FullFile)) + using (BinaryWriter CodeWriter = new BinaryWriter(CodeFile)) { + for (long i = 0; i < 0x50; i += 4) + { + FullWriter.Write(Memory.ReadInt32(Position + i)); + } + long Offset = 0; ulong Instruction = 0; @@ -32,8 +40,8 @@ namespace Ryujinx.Graphics.Gal //Dump until a NOP instruction is found while ((Instruction >> 52 & 0xfff8) != 0x50b0) { - uint Word0 = (uint)Memory.ReadInt32(Position + Offset + 0); - uint Word1 = (uint)Memory.ReadInt32(Position + Offset + 4); + uint Word0 = (uint)Memory.ReadInt32(Position + 0x50 + Offset + 0); + uint Word1 = (uint)Memory.ReadInt32(Position + 0x50 + Offset + 4); Instruction = Word0 | (ulong)Word1 << 32; @@ -44,7 +52,8 @@ namespace Ryujinx.Graphics.Gal break; } - Writer.Write(Instruction); + FullWriter.Write(Instruction); + CodeWriter.Write(Instruction); Offset += 8; } @@ -52,13 +61,24 @@ namespace Ryujinx.Graphics.Gal //Align to meet nvdisasm requeriments while (Offset % 0x20 != 0) { - Writer.Write(0); + FullWriter.Write(0); + CodeWriter.Write(0); Offset += 4; } } } + private static string FullDir() + { + return CreateAndReturn(Path.Combine(DumpDir(), "Full")); + } + + private static string CodeDir() + { + return CreateAndReturn(Path.Combine(DumpDir(), "Code")); + } + private static string DumpDir() { if (string.IsNullOrEmpty(RuntimeDir)) @@ -79,6 +99,16 @@ namespace Ryujinx.Graphics.Gal return RuntimeDir; } + private static string CreateAndReturn(string Dir) + { + if (!Directory.Exists(Dir)) + { + Directory.CreateDirectory(Dir); + } + + return Dir; + } + private static string ShaderExtension(GalShaderType Type) { switch (Type) From ee064a2fb8d5e6167122477d5014beae509f92ed Mon Sep 17 00:00:00 2001 From: mailwl Date: Thu, 19 Jul 2018 19:45:50 +0300 Subject: [PATCH 11/15] .gitignore: ignore autogenerated launchSettings.json (#292) thanks to @Cyuubi --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 82d9719b5..123f46184 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,6 @@ $RECYCLE.BIN/ # Mac desktop service store files .DS_Store + +# VS Launch Settings +launchSettings.json From 8b67297711003dfac432acb3278c5a406617e662 Mon Sep 17 00:00:00 2001 From: emmauss Date: Thu, 19 Jul 2018 20:49:34 +0300 Subject: [PATCH 12/15] Added appveyor configuration file (#277) --- appveyor.yml | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 appveyor.yml diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 000000000..eced37d77 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,28 @@ +version: 1.0.{build} +branches: + only: + - master +image: Visual Studio 2017 +configuration: Release +build_script: +- ps: >- + dotnet --version + + dotnet publish -c Release -r win-x64 + + dotnet publish -c Release -r linux-x64 + + dotnet publish -c Release -r osx-x64 + + 7z a ryujinx-$env:APPVEYOR_BUILD_VERSION-win_x64.zip $env:APPVEYOR_BUILD_FOLDER\Ryujinx\bin\Release\netcoreapp2.1\win-x64\publish\ + + 7z a ryujinx-$env:APPVEYOR_BUILD_VERSION-linux_x64.tar $env:APPVEYOR_BUILD_FOLDER\Ryujinx\bin\Release\netcoreapp2.1\linux-x64\publish\ + + 7z a ryujinx-$env:APPVEYOR_BUILD_VERSION-linux_x64.tar.gz ryujinx-$env:APPVEYOR_BUILD_VERSION-linux_x64.tar + + 7z a ryujinx-$env:APPVEYOR_BUILD_VERSION-osx_x64.zip $env:APPVEYOR_BUILD_FOLDER\Ryujinx\bin\Release\netcoreapp2.1\osx-x64\publish\ + +artifacts: +- path: ryujinx-%APPVEYOR_BUILD_VERSION%-win_x64.zip +- path: ryujinx-%APPVEYOR_BUILD_VERSION%-linux_x64.tar.gz +- path: ryujinx-%APPVEYOR_BUILD_VERSION%-osx_x64.zip From c9fc52edb6abc014d5d5671c1634b01ace48de2f Mon Sep 17 00:00:00 2001 From: Thomas Guillemard Date: Thu, 19 Jul 2018 20:44:52 +0200 Subject: [PATCH 13/15] Fix SystemPathToSwitchPath platform issues and make sure to delete temporary NRO after sessions dispose (#293) --- Ryujinx.HLE/OsHle/Horizon.cs | 1 - Ryujinx.HLE/OsHle/Process.cs | 10 +++++----- Ryujinx.HLE/VirtualFileSystem.cs | 4 ++-- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Ryujinx.HLE/OsHle/Horizon.cs b/Ryujinx.HLE/OsHle/Horizon.cs index 9d8a937ff..70ae24be3 100644 --- a/Ryujinx.HLE/OsHle/Horizon.cs +++ b/Ryujinx.HLE/OsHle/Horizon.cs @@ -96,7 +96,6 @@ namespace Ryujinx.HLE.OsHle if (IsNro && (SwitchFilePath == null || !SwitchFilePath.StartsWith("sdmc:/"))) { - // TODO: avoid copying the file if we are already inside a sdmc directory string SwitchPath = $"sdmc:/switch/{Name}{Homebrew.TemporaryNroSuffix}"; string TempPath = Ns.VFs.SwitchPathToSystemPath(SwitchPath); diff --git a/Ryujinx.HLE/OsHle/Process.cs b/Ryujinx.HLE/OsHle/Process.cs index be27dcc28..c7606dc90 100644 --- a/Ryujinx.HLE/OsHle/Process.cs +++ b/Ryujinx.HLE/OsHle/Process.cs @@ -403,11 +403,6 @@ namespace Ryujinx.HLE.OsHle { if (Disposing && !Disposed) { - if (NeedsHbAbi && Executables[0].FilePath.EndsWith(Homebrew.TemporaryNroSuffix)) - { - File.Delete(Executables[0].FilePath); - } - //If there is still some thread running, disposing the objects is not //safe as the thread may try to access those resources. Instead, we set //the flag to have the Process disposed when all threads finishes. @@ -431,6 +426,11 @@ namespace Ryujinx.HLE.OsHle } } + if (NeedsHbAbi && Executables.Count > 0 && Executables[0].FilePath.EndsWith(Homebrew.TemporaryNroSuffix)) + { + File.Delete(Executables[0].FilePath); + } + INvDrvServices.UnloadProcess(this); AppletState.Dispose(); diff --git a/Ryujinx.HLE/VirtualFileSystem.cs b/Ryujinx.HLE/VirtualFileSystem.cs index 38df81f87..df1fc9db1 100644 --- a/Ryujinx.HLE/VirtualFileSystem.cs +++ b/Ryujinx.HLE/VirtualFileSystem.cs @@ -57,11 +57,11 @@ namespace Ryujinx.HLE public string SystemPathToSwitchPath(string SystemPath) { - string BaseSystemPath = GetBasePath() + "/"; + string BaseSystemPath = GetBasePath() + Path.DirectorySeparatorChar; if (SystemPath.StartsWith(BaseSystemPath)) { string RawPath = SystemPath.Replace(BaseSystemPath, ""); - int FirstSeparatorOffset = RawPath.IndexOf('/'); + int FirstSeparatorOffset = RawPath.IndexOf(Path.DirectorySeparatorChar); if (FirstSeparatorOffset == -1) { return $"{RawPath}:/"; From 45bb24dbae7b4fb4118036aa74024605995510fd Mon Sep 17 00:00:00 2001 From: emmauss Date: Thu, 19 Jul 2018 18:53:49 +0000 Subject: [PATCH 14/15] fix extra space --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index eced37d77..539212813 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -18,7 +18,7 @@ build_script: 7z a ryujinx-$env:APPVEYOR_BUILD_VERSION-linux_x64.tar $env:APPVEYOR_BUILD_FOLDER\Ryujinx\bin\Release\netcoreapp2.1\linux-x64\publish\ - 7z a ryujinx-$env:APPVEYOR_BUILD_VERSION-linux_x64.tar.gz ryujinx-$env:APPVEYOR_BUILD_VERSION-linux_x64.tar + 7z a ryujinx-$env:APPVEYOR_BUILD_VERSION-linux_x64.tar.gz ryujinx-$env:APPVEYOR_BUILD_VERSION-linux_x64.tar 7z a ryujinx-$env:APPVEYOR_BUILD_VERSION-osx_x64.zip $env:APPVEYOR_BUILD_FOLDER\Ryujinx\bin\Release\netcoreapp2.1\osx-x64\publish\ From 5fe0bc584b21c660e083a9cb37aa0a8be4719f95 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 19 Jul 2018 16:02:51 -0300 Subject: [PATCH 15/15] Send data to OpenGL host without client-side copies (#285) * Directly send host address to buffer data * Cleanup OGLShader * Directly copy vertex and index data too * Revert shader bind "cache" * Address feedback --- ChocolArm64/Memory/AMemory.cs | 7 ++ Ryujinx.Graphics/Gal/IGalRasterizer.cs | 6 +- Ryujinx.Graphics/Gal/IGalShader.cs | 3 +- Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs | 16 ++-- Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs | 22 ++--- .../Gal/OpenGL/OGLStreamBuffer.cs | 93 +++---------------- Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs | 12 +-- Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs | 6 ++ 8 files changed, 55 insertions(+), 110 deletions(-) diff --git a/ChocolArm64/Memory/AMemory.cs b/ChocolArm64/Memory/AMemory.cs index da5cf0074..054277b29 100644 --- a/ChocolArm64/Memory/AMemory.cs +++ b/ChocolArm64/Memory/AMemory.cs @@ -204,6 +204,13 @@ namespace ChocolArm64.Memory return Modified; } + public IntPtr GetHostAddress(long Position, long Size) + { + EnsureRangeIsValid(Position, Size, AMemoryPerm.Read); + + return (IntPtr)(RamPtr + (ulong)Position); + } + public sbyte ReadSByte(long Position) { return (sbyte)ReadByte(Position); diff --git a/Ryujinx.Graphics/Gal/IGalRasterizer.cs b/Ryujinx.Graphics/Gal/IGalRasterizer.cs index 0c5d37e40..a87d36c38 100644 --- a/Ryujinx.Graphics/Gal/IGalRasterizer.cs +++ b/Ryujinx.Graphics/Gal/IGalRasterizer.cs @@ -1,3 +1,5 @@ +using System; + namespace Ryujinx.Graphics.Gal { public interface IGalRasterizer @@ -45,9 +47,9 @@ namespace Ryujinx.Graphics.Gal void SetPrimitiveRestartIndex(uint Index); - void CreateVbo(long Key, byte[] Buffer); + void CreateVbo(long Key, int DataSize, IntPtr HostAddress); - void CreateIbo(long Key, byte[] Buffer); + void CreateIbo(long Key, int DataSize, IntPtr HostAddress); void SetVertexArray(int Stride, long VboKey, GalVertexAttrib[] Attribs); diff --git a/Ryujinx.Graphics/Gal/IGalShader.cs b/Ryujinx.Graphics/Gal/IGalShader.cs index 9adaceaf5..56235a070 100644 --- a/Ryujinx.Graphics/Gal/IGalShader.cs +++ b/Ryujinx.Graphics/Gal/IGalShader.cs @@ -1,3 +1,4 @@ +using System; using System.Collections.Generic; namespace Ryujinx.Graphics.Gal @@ -10,7 +11,7 @@ namespace Ryujinx.Graphics.Gal IEnumerable GetTextureUsage(long Key); - void SetConstBuffer(long Key, int Cbuf, byte[] Data); + void SetConstBuffer(long Key, int Cbuf, int DataSize, IntPtr HostAddress); void EnsureTextureBinding(string UniformName, int Value); diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs index 0dc56966b..f2e5859e5 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLRasterizer.cs @@ -211,28 +211,28 @@ namespace Ryujinx.Graphics.Gal.OpenGL GL.PrimitiveRestartIndex(Index); } - public void CreateVbo(long Key, byte[] Buffer) + public void CreateVbo(long Key, int DataSize, IntPtr HostAddress) { int Handle = GL.GenBuffer(); - VboCache.AddOrUpdate(Key, Handle, (uint)Buffer.Length); + VboCache.AddOrUpdate(Key, Handle, (uint)DataSize); - IntPtr Length = new IntPtr(Buffer.Length); + IntPtr Length = new IntPtr(DataSize); GL.BindBuffer(BufferTarget.ArrayBuffer, Handle); - GL.BufferData(BufferTarget.ArrayBuffer, Length, Buffer, BufferUsageHint.StreamDraw); + GL.BufferData(BufferTarget.ArrayBuffer, Length, HostAddress, BufferUsageHint.StreamDraw); } - public void CreateIbo(long Key, byte[] Buffer) + public void CreateIbo(long Key, int DataSize, IntPtr HostAddress) { int Handle = GL.GenBuffer(); - IboCache.AddOrUpdate(Key, Handle, (uint)Buffer.Length); + IboCache.AddOrUpdate(Key, Handle, (uint)DataSize); - IntPtr Length = new IntPtr(Buffer.Length); + IntPtr Length = new IntPtr(DataSize); GL.BindBuffer(BufferTarget.ElementArrayBuffer, Handle); - GL.BufferData(BufferTarget.ElementArrayBuffer, Length, Buffer, BufferUsageHint.StreamDraw); + GL.BufferData(BufferTarget.ElementArrayBuffer, Length, HostAddress, BufferUsageHint.StreamDraw); } public void SetVertexArray(int Stride, long VboKey, GalVertexAttrib[] Attribs) diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs index fe98aa091..37213d8ed 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLShader.cs @@ -5,6 +5,8 @@ using System.Collections.Concurrent; using System.Collections.Generic; using System.Linq; +using Buffer = System.Buffer; + namespace Ryujinx.Graphics.Gal.OpenGL { public class OGLShader : IGalShader @@ -151,7 +153,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL return Enumerable.Empty(); } - public void SetConstBuffer(long Key, int Cbuf, byte[] Data) + public void SetConstBuffer(long Key, int Cbuf, int DataSize, IntPtr HostAddress) { if (Stages.TryGetValue(Key, out ShaderStage Stage)) { @@ -159,13 +161,9 @@ namespace Ryujinx.Graphics.Gal.OpenGL { OGLStreamBuffer Buffer = GetConstBuffer(Stage.Type, Cbuf); - int Size = Math.Min(Data.Length, Buffer.Size); + int Size = Math.Min(DataSize, Buffer.Size); - byte[] Destiny = Buffer.Map(Size); - - Array.Copy(Data, Destiny, Size); - - Buffer.Unmap(Size); + Buffer.SetData(Size, HostAddress); } } } @@ -278,7 +276,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL { int FreeBinding = 0; - int BindUniformBlocksIfNotNull(ShaderStage Stage) + void BindUniformBlocksIfNotNull(ShaderStage Stage) { if (Stage != null) { @@ -297,8 +295,6 @@ namespace Ryujinx.Graphics.Gal.OpenGL FreeBinding++; } } - - return FreeBinding; } BindUniformBlocksIfNotNull(Current.Vertex); @@ -312,7 +308,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL { int FreeBinding = 0; - int BindUniformBuffersIfNotNull(ShaderStage Stage) + void BindUniformBuffersIfNotNull(ShaderStage Stage) { if (Stage != null) { @@ -325,8 +321,6 @@ namespace Ryujinx.Graphics.Gal.OpenGL FreeBinding++; } } - - return FreeBinding; } BindUniformBuffersIfNotNull(Current.Vertex); @@ -347,7 +341,7 @@ namespace Ryujinx.Graphics.Gal.OpenGL //Allocate a maximum of 64 KiB int Size = Math.Min(GL.GetInteger(GetPName.MaxUniformBlockSize), 64 * 1024); - Buffer = OGLStreamBuffer.Create(BufferTarget.UniformBuffer, Size); + Buffer = new OGLStreamBuffer(BufferTarget.UniformBuffer, Size); ConstBuffers[StageIndex][Cbuf] = Buffer; } diff --git a/Ryujinx.Graphics/Gal/OpenGL/OGLStreamBuffer.cs b/Ryujinx.Graphics/Gal/OpenGL/OGLStreamBuffer.cs index 329c5b5df..0d5dee93f 100644 --- a/Ryujinx.Graphics/Gal/OpenGL/OGLStreamBuffer.cs +++ b/Ryujinx.Graphics/Gal/OpenGL/OGLStreamBuffer.cs @@ -1,9 +1,9 @@ -using System; using OpenTK.Graphics.OpenGL; +using System; namespace Ryujinx.Graphics.Gal.OpenGL { - abstract class OGLStreamBuffer : IDisposable + class OGLStreamBuffer : IDisposable { public int Handle { get; protected set; } @@ -11,53 +11,25 @@ namespace Ryujinx.Graphics.Gal.OpenGL protected BufferTarget Target { get; private set; } - private bool Mapped = false; - - public OGLStreamBuffer(BufferTarget Target, int MaxSize) + public OGLStreamBuffer(BufferTarget Target, int Size) { - Handle = 0; - Mapped = false; - this.Target = Target; - this.Size = MaxSize; + this.Size = Size; + + Handle = GL.GenBuffer(); + + GL.BindBuffer(Target, Handle); + + GL.BufferData(Target, Size, IntPtr.Zero, BufferUsageHint.StreamDraw); } - public static OGLStreamBuffer Create(BufferTarget Target, int MaxSize) + public void SetData(int Size, IntPtr HostAddress) { - //TODO: Query here for ARB_buffer_storage and use when available - return new SubDataBuffer(Target, MaxSize); + GL.BindBuffer(Target, Handle); + + GL.BufferSubData(Target, IntPtr.Zero, Size, HostAddress); } - public byte[] Map(int Size) - { - if (Handle == 0 || Mapped || Size > this.Size) - { - throw new InvalidOperationException(); - } - - byte[] Memory = InternMap(Size); - - Mapped = true; - - return Memory; - } - - public void Unmap(int UsedSize) - { - if (Handle == 0 || !Mapped) - { - throw new InvalidOperationException(); - } - - InternUnmap(UsedSize); - - Mapped = false; - } - - protected abstract byte[] InternMap(int Size); - - protected abstract void InternUnmap(int UsedSize); - public void Dispose() { Dispose(true); @@ -73,41 +45,4 @@ namespace Ryujinx.Graphics.Gal.OpenGL } } } - - class SubDataBuffer : OGLStreamBuffer - { - private byte[] Memory; - - public SubDataBuffer(BufferTarget Target, int MaxSize) - : base(Target, MaxSize) - { - Memory = new byte[MaxSize]; - - GL.GenBuffers(1, out int Handle); - - GL.BindBuffer(Target, Handle); - - GL.BufferData(Target, Size, IntPtr.Zero, BufferUsageHint.StreamDraw); - - this.Handle = Handle; - } - - protected override byte[] InternMap(int Size) - { - return Memory; - } - - protected override void InternUnmap(int UsedSize) - { - GL.BindBuffer(Target, Handle); - - unsafe - { - fixed (byte* MemoryPtr = Memory) - { - GL.BufferSubData(Target, IntPtr.Zero, UsedSize, (IntPtr)MemoryPtr); - } - } - } - } } diff --git a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs index dce25a5e9..c3e7a77fc 100644 --- a/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs +++ b/Ryujinx.HLE/Gpu/Engines/NvGpuEngine3d.cs @@ -560,9 +560,9 @@ namespace Ryujinx.HLE.Gpu.Engines if (Cb.Enabled) { - byte[] Data = Vmm.ReadBytes(Cb.Position, (uint)Cb.Size); + IntPtr DataAddress = Vmm.GetHostAddress(Cb.Position, Cb.Size); - Gpu.Renderer.Shader.SetConstBuffer(BasePosition + (uint)Offset, Cbuf, Data); + Gpu.Renderer.Shader.SetConstBuffer(BasePosition + (uint)Offset, Cbuf, Cb.Size, DataAddress); } } } @@ -595,9 +595,9 @@ namespace Ryujinx.HLE.Gpu.Engines if (!IboCached || Vmm.IsRegionModified(IboKey, (uint)IbSize, NvGpuBufferType.Index)) { - byte[] Data = Vmm.ReadBytes(IndexPosition, (uint)IbSize); + IntPtr DataAddress = Vmm.GetHostAddress(IndexPosition, IbSize); - Gpu.Renderer.Rasterizer.CreateIbo(IboKey, Data); + Gpu.Renderer.Rasterizer.CreateIbo(IboKey, IbSize, DataAddress); } Gpu.Renderer.Rasterizer.SetIndexArray(IbSize, IndexFormat); @@ -659,9 +659,9 @@ namespace Ryujinx.HLE.Gpu.Engines if (!VboCached || Vmm.IsRegionModified(VboKey, VbSize, NvGpuBufferType.Vertex)) { - byte[] Data = Vmm.ReadBytes(VertexPosition, VbSize); + IntPtr DataAddress = Vmm.GetHostAddress(VertexPosition, VbSize); - Gpu.Renderer.Rasterizer.CreateVbo(VboKey, Data); + Gpu.Renderer.Rasterizer.CreateVbo(VboKey, (int)VbSize, DataAddress); } Gpu.Renderer.Rasterizer.SetVertexArray(Stride, VboKey, Attribs[Index].ToArray()); diff --git a/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs b/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs index 0c81dd150..7b23e49fa 100644 --- a/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs +++ b/Ryujinx.HLE/Gpu/Memory/NvGpuVmm.cs @@ -1,5 +1,6 @@ using ChocolArm64.Memory; using Ryujinx.Graphics.Gal; +using System; using System.Collections.Concurrent; namespace Ryujinx.HLE.Gpu.Memory @@ -279,6 +280,11 @@ namespace Ryujinx.HLE.Gpu.Memory return Cache.IsRegionModified(Memory, BufferType, PA, Size); } + public IntPtr GetHostAddress(long Position, long Size) + { + return Memory.GetHostAddress(GetPhysicalAddress(Position), Size); + } + public byte ReadByte(long Position) { Position = GetPhysicalAddress(Position);