Files
project-reset/Assets/Plugins/HTraceSSGI/Resources/HTraceSSGI/Computes/HDenoiserSSGI.compute
2025-12-31 12:44:11 -05:00

344 lines
14 KiB
Plaintext

#pragma kernel TemporalAccumulation
#pragma kernel TemporalStabilization
#pragma kernel PointDistributionFill
#pragma kernel SpatialFilter
#pragma kernel SpatialFilter1 SpatialFilter = SpatialFilter1
#pragma kernel SpatialFilter2 SpatialFilter = SpatialFilter2 SECOND_PASS
#include "../Includes/HReservoirSSGI.hlsl"
#pragma multi_compile _ _GBUFFER_NORMALS_OCT
#include "Packages/com.unity.render-pipelines.universal/ShaderLibrary/ShaderVariablesFunctions.hlsl"
#pragma multi_compile _ USE_SPATIAL_OCCLUSION
#pragma multi_compile _ USE_TEMPORAL_INVALIDITY
#pragma multi_compile _ INTERPOLATION_OUTPUT
#define TEMPORAL_DENOISING_SAMPLECOUNT 16
#define TEMPORAL_STABILIZATION_SAMPLECOUNT 16
H_TEXTURE(_Radiance);
H_TEXTURE(_NormalDepth);
H_TEXTURE(_AmbientOcclusion);
H_TEXTURE(_SpatialGuidance);
H_TEXTURE(_RadianceReprojected);
H_TEXTURE(_TemporalInvalidity);
H_TEXTURE(_SamplecountReprojected);
H_TEXTURE(_SpatialOcclusion);
H_RW_TEXTURE(float4, _Radiance_TemporalOutput);
H_RW_TEXTURE(float3, _Radiance_SpatialOutput);
H_RW_TEXTURE(float3, _Radiance_Output);
H_RW_TEXTURE(float, _Samplecount_Output);
H_RW_TEXTURE(uint2, _RadianceNormalDepth_Output);
StructuredBuffer<float3> _PointDistribution;
RWStructuredBuffer<float3> _PointDistribution_Output;
float _FilterRadius;
float _FilterAdaptivity;
// ------------------------ TEMPORAL FUNCTIONS -------------------------
float3 DirectClipToAABB(float3 History, float3 Min, float3 Max)
{
float3 Center = 0.5 * (Max + Min);
float3 Extents = 0.5 * (Max - Min);
float3 Offset = History - Center;
float3 Vunit = Offset.xyz / Extents.xyz;
float3 AbsUnit = abs(Vunit);
float MaxUnit = max(max(AbsUnit.x, AbsUnit.y), AbsUnit.z);
if (MaxUnit > 1.0) return Center + (Offset / MaxUnit);
else return History;
}
// ------------------------ POINT DISTRIBUTION BUFFER --------------------
[numthreads(128, 1, 1)]
void PointDistributionFill(uint pixCoord : SV_DispatchThreadID)
{
// _PointDistribution_Output[pixCoord.x] = HSampleDiskCubic(GetLDSequenceSampleFloat(pixCoord.x, 0), GetLDSequenceSampleFloat(pixCoord.x, 1));
static const float3 k_PoissonDiskSamples[8] =
{
// https://www.desmos.com/calculator/abaqyvswem
float3( -1.00 , 0.00 , 1.0 ),
float3( 0.00 , 1.00 , 1.0 ),
float3( 1.00 , 0.00 , 1.0 ),
float3( 0.00 , -1.00 , 1.0 ),
float3( -0.25 * sqrt(2.0) , 0.25 * sqrt(2.0) , 0.5 ),
float3( 0.25 * sqrt(2.0) , 0.25 * sqrt(2.0) , 0.5 ),
float3( 0.25 * sqrt(2.0) , -0.25 * sqrt(2.0) , 0.5 ),
float3( -0.25 * sqrt(2.0) , -0.25 * sqrt(2.0) , 0.5 )
};
int i = pixCoord;
uint JitterIndex = (uint(_FrameCount) % 8) + i;
float2 SampleJitter = SampleUnitDisk(JitterIndex) * 0.15;
// 8 offsets with jitter
if (pixCoord.x < 8)
{
float3 Sample = k_PoissonDiskSamples[i];
Sample.xy += SampleJitter;
_PointDistribution_Output[i] = Sample;
}
// 8 offsets without jitter
if (pixCoord.x >= 8 && pixCoord.x < 16)
{
_PointDistribution_Output[i] = k_PoissonDiskSamples[i - 8];
}
}
// ------------------------ TEMPORAL ACCUMULATION ------------------------
[numthreads(8, 8, 1)]
void TemporalAccumulation(uint3 pixCoord : SV_DispatchThreadID)
{
float3 Radiance = H_LOAD(_Radiance, pixCoord.xy).xyz;
float3 RadianceHistory = H_LOAD(_RadianceReprojected, pixCoord.xy).xyz;
float3 Moment1 = Radiance;
float3 Moment2 = Radiance * Radiance;
float WeightTotal = 1.0;
float CenterLuminance = Luminance(Radiance);
float MaxLuma = -9999; float MinLuma = 9999;
float3 MaxLumaSample, MinLumaSample;
const static int2 SampleOffsets[8] = {int2(1, 0), int2(0, 1), int2(-1, 0), int2(0, -1), int2(1, -1), int2(-1, 1), int2(-1, -1), int2(1, 1)};
for (int i = 0; i < 8; i++)
{
int2 Offset = SampleOffsets[i];
float Weight = exp(-3.0 * float(Offset.x * Offset.x + Offset.y * Offset.y) / 4.0f);
float3 Sample = H_LOAD(_Radiance, pixCoord.xy + Offset * 1).xyz;
Moment1 += Sample * Weight;
Moment2 += Sample * Sample * Weight;
WeightTotal += Weight;
float SampleLuma = Luminance(Sample);
if (MaxLuma < SampleLuma) { MaxLuma = SampleLuma; MaxLumaSample = Sample; }
if (MinLuma > SampleLuma) { MinLuma = SampleLuma; MinLumaSample = Sample; }
}
Moment1 /= WeightTotal;
Moment2 /= WeightTotal;
float3 StdDev = sqrt(max(0.0, Moment2 - Moment1 * Moment1));
float2 TemporalInvalidity = 1;
if (USE_TEMPORAL_INVALIDITY)
{
TemporalInvalidity = H_LOAD(_TemporalInvalidity, pixCoord.xy).xy;
TemporalInvalidity.x = pow(TemporalInvalidity.x, 5);
TemporalInvalidity.y = TemporalInvalidity.y < 0.95f ? 0 : TemporalInvalidity.y;
}
float ClampBoxSize = 0.5;
float ClampBoxMultiplier = lerp(1, 5, TemporalInvalidity.x);
float3 Min = lerp(Radiance, Moment1, ClampBoxSize * ClampBoxSize) - StdDev * ClampBoxSize * ClampBoxMultiplier;
float3 Max = lerp(Radiance, Moment1, ClampBoxSize * ClampBoxSize) + StdDev * ClampBoxSize * ClampBoxMultiplier;
RadianceHistory = DirectClipToAABB(RadianceHistory, Min, Max);
float SamplecountReprojected = H_LOAD(_SamplecountReprojected, pixCoord.xy).x;
float Samplecount = min(TEMPORAL_DENOISING_SAMPLECOUNT, SamplecountReprojected + 1);
float TemporalWeight = 1.0f - (1.0f / float(Samplecount));
if (ENABLE_RCRS_FILTER)
{
if (CenterLuminance >= MinLuma && CenterLuminance <= MaxLuma) { Radiance = Radiance;}
else if (CenterLuminance > MaxLuma) { Radiance = MaxLumaSample; }
else { Radiance = MinLumaSample; }
}
if (ENABLE_EXPOSURE_CONTROL)
{
float ExposurePrevious = HGetPreviousExposureMultiplier;
float ExposureCurrent = HGetCurrentExposureMultiplier;
float ExposureRatio = (ExposurePrevious * ExposureCurrent) != 0.0 ? ExposureCurrent / ExposurePrevious : 100.0;
if (max(ExposureRatio, 1.0 / ExposureRatio) > 2.0)
TemporalWeight = 0;
}
Radiance = lerp(Radiance, RadianceHistory, TemporalWeight * TemporalInvalidity.y * ENABLE_TEMPORAL_DENOISING);
_Radiance_TemporalOutput[H_COORD(pixCoord.xy)] = float4(Radiance, 0);
_Radiance_SpatialOutput[H_COORD(pixCoord.xy)] = Radiance;
_Samplecount_Output[H_COORD(pixCoord.xy)] = Samplecount;
}
// ------------------------ TEMPORAL STABILIZATION ------------------------
[numthreads(8, 8, 1)]
void TemporalStabilization(uint3 pixCoord : SV_DispatchThreadID)
{
float3 Radiance = H_LOAD(_Radiance, pixCoord.xy).xyz;
float4 RadianceSamplecountHistory = H_LOAD(_RadianceReprojected, pixCoord.xy);
float3 Moment1 = Radiance;
float3 Moment2 = Radiance * Radiance;
float WeightTotal = 1.0;
const static int2 SampleOffsets[8] = {int2(1, 0), int2(0, 1), int2(-1, 0), int2(0, -1), int2(1, -1), int2(-1, 1), int2(-1, -1), int2(1, 1)};
for (int i = 0; i < 8; i++)
{
int2 Offset = SampleOffsets[i];
float Weight = exp(-3.0 * float(Offset.x * Offset.x + Offset.y * Offset.y) / 4.0f);
float3 Sample = H_LOAD(_Radiance, pixCoord.xy + Offset * 1).xyz;
Moment1 += Sample * Weight;
Moment2 += Sample * Sample * Weight;
WeightTotal += Weight;
}
Moment1 /= WeightTotal;
Moment2 /= WeightTotal;
float3 StdDev = sqrt(max(0.0, Moment2 - Moment1 * Moment1));
float2 TemporalInvalidity = 1;
if (USE_TEMPORAL_INVALIDITY)
{
TemporalInvalidity = H_LOAD(_TemporalInvalidity, pixCoord.xy / _HScaleFactorSSGI).xy;
TemporalInvalidity.x = pow(TemporalInvalidity.x, 5);
TemporalInvalidity.y = TemporalInvalidity.y < 0.95f ? 0 : TemporalInvalidity.y;
}
float ClampBoxSize = 0.5;
float ClampBoxMultiplier = lerp(3, 6, TemporalInvalidity.x * 2);
float3 Min = lerp(Radiance, Moment1, ClampBoxSize * ClampBoxSize) - StdDev * ClampBoxSize * ClampBoxMultiplier;
float3 Max = lerp(Radiance, Moment1, ClampBoxSize * ClampBoxSize) + StdDev * ClampBoxSize * ClampBoxMultiplier;
RadianceSamplecountHistory.xyz = DirectClipToAABB(RadianceSamplecountHistory.xyz, Min, Max);
float Samplecount = min(TEMPORAL_STABILIZATION_SAMPLECOUNT, RadianceSamplecountHistory.w + 1);
float TemporalWeight = 1.0f - (1.0f / float(Samplecount));
if (ENABLE_EXPOSURE_CONTROL)
{
float ExposurePrevious = HGetPreviousExposureMultiplier;
float ExposureCurrent = HGetCurrentExposureMultiplier;
float ExposureRatio = (ExposurePrevious * ExposureCurrent) != 0.0 ? ExposureCurrent / ExposurePrevious : 100.0;
if (max(ExposureRatio, 1.0 / ExposureRatio) > 2.0)
TemporalWeight = 0;
}
Radiance = lerp(Radiance, RadianceSamplecountHistory.xyz, TemporalWeight * TemporalInvalidity.y * ENABLE_TEMPORAL_STABILIZATION);
_Radiance_TemporalOutput[H_COORD(pixCoord.xy)] = float4(Radiance.xyz, Samplecount.x);
}
// ------------------------ SPATIAL FILTER ------------------------
[numthreads(8, 8, 1)]
void SpatialFilter(uint3 pixCoord : SV_DispatchThreadID)
{
uint2 pixCoordUnscaled = GetUnscaledCoords(pixCoord.xy);
int2 pixCoordMax = _ScreenSize.xy / _HScaleFactorSSGI.xx - 1;
float2 pixCoordNDC = (float2(pixCoordUnscaled) + 0.5f) * _ScreenSize.zw;
uint NormalDepthPacked = asuint(H_LOAD(_NormalDepth, pixCoord.xy).x);
float4 NormalDepth = UnpackNormalDepth(NormalDepthPacked);
if (NormalDepth.w <= UNITY_RAW_FAR_CLIP_VALUE) { return; }
float3 Radiance = H_LOAD(_Radiance, pixCoord.xy).xyz;
Radiance = SpatialDenoisingTonemap(Radiance);
float DepthCetnerLinear = H_LINEAR_EYE_DEPTH(NormalDepth.w);
float3 NormalCenterVS = H_TRANSFORM_WORLD_TO_VIEW_NORMAL(NormalDepth.xyz);
float3 PositionCenterWS = H_COMPUTE_POSITION_WS(pixCoordNDC, NormalDepth.w, H_MATRIX_I_VP);
float3 PositionCenterVS = ComputeFastViewSpacePosition(pixCoordNDC, NormalDepth.w, DepthCetnerLinear);
float4 NormalPlaneVS = float4(NormalCenterVS.xyz, dot(PositionCenterVS, NormalCenterVS.xyz));
float SpatialOcclusionCenter = H_LOAD(_SpatialOcclusion, pixCoord.xy).x;
uint2 SpatialGuidancePacked = asuint(H_LOAD(_SpatialGuidance, pixCoord.xy).xy);
float AdaptiveFilterScale = UnpackAmbientOcclusion(SpatialGuidancePacked.y);
float AdaptivePlaneWeight = lerp(500.0f, 100.0f, AdaptiveFilterScale);
uint SpatialGuidance = SpatialGuidancePacked.x;
float FilterRadius = _FilterRadius / 2.0f;
#ifdef SECOND_PASS
FilterRadius = _FilterRadius;
#endif
float MinFilterRadius = lerp(0.01f, 0.1f, H_LOAD(_AmbientOcclusion, pixCoord.xy).x);
FilterRadius = max(MinFilterRadius, lerp(FilterRadius, FilterRadius * AdaptiveFilterScale, _FilterAdaptivity));
float DistanceToPoint = length(H_GET_ABSOLUTE_POSITION_WS(PositionCenterWS) - H_GET_CAMERA_POSITION_WS());
float3x3 OrthoBasis = HGetLocalFrame(NormalDepth.xyz);
float RadiusScale = lerp(5.0f, 50.0f, saturate(DistanceToPoint / 500.0f));
float Radius = DistanceToPoint * FilterRadius / RadiusScale;
float Sigma = 0.9f * Radius;
float WeightAccumulated = 1;
float2 PerPixelJitter;
PerPixelJitter.x = HInterleavedGradientNoise(pixCoord.xy, 0);
PerPixelJitter.y = HInterleavedGradientNoise(pixCoord.yx, 1);
PerPixelJitter = (PerPixelJitter * 2 - 1) * 0.15;
UNITY_UNROLL
for (int i = 0; i < 8; i++)
{
float GuidanceAdaptivity = 1 - (0.75 * ((SpatialGuidance >> i) & 0x1));
float2 Point = (_PointDistribution[i + 8].xy + PerPixelJitter) * Radius * GuidanceAdaptivity;
float3 PositionPointWS = PositionCenterWS + OrthoBasis[0] * Point.x + OrthoBasis[1] * Point.y;
float2 SampleCoordNDC = H_COMPUTE_NDC_Z(PositionPointWS, H_MATRIX_VP).xy;
int2 SampleCoord = SampleCoordNDC * _ScreenSize.xy / _HScaleFactorSSGI;
int2 Overshoot = max(SampleCoord - pixCoordMax, 0);
SampleCoord -= 2 * Overshoot;
uint NormalDepthPacked = asuint(H_LOAD(_NormalDepth, SampleCoord).x);
float4 NormalDepthSample = UnpackNormalDepth(NormalDepthPacked);
float SpatialOcclusionSample = H_LOAD(_SpatialOcclusion, SampleCoord).x;
float3 PositionSampleVS = ComputeFastViewSpacePosition(SampleCoordNDC, NormalDepthSample.w, H_LINEAR_EYE_DEPTH(NormalDepthSample.w));
float PlaneWeight = ProbePlaneWeighting(NormalPlaneVS, PositionSampleVS, DepthCetnerLinear, AdaptivePlaneWeight);
float NormalWeight = saturate(dot(NormalDepth.xyz, NormalDepthSample.xyz));
float GaussianWeight = GaussianWeighting(length(Point), Sigma);
float OcclusionWeight = 1.0f;
if (USE_SPATIAL_OCCLUSION)
{ OcclusionWeight = exp2(-max(5, 10 * (1 - SpatialOcclusionCenter)) * abs(SpatialOcclusionCenter - SpatialOcclusionSample)); }
float SampleWeight = NormalWeight * PlaneWeight * OcclusionWeight * GaussianWeight * ENABLE_SPATIAL_DENOISING;
WeightAccumulated += SampleWeight;
float3 RadianceSample = H_LOAD(_Radiance, SampleCoord).xyz;
RadianceSample = SpatialDenoisingTonemap(RadianceSample);
Radiance += RadianceSample * SampleWeight;
}
Radiance /= WeightAccumulated;
Radiance = SpatialDenoisingTonemapInverse(Radiance);
if (AnyIsNaN(Radiance) || AnyIsInf(Radiance))
Radiance = 0;
// Radiance = AdaptiveFilterScale;
#if SECOND_PASS
if (INTERPOLATION_OUTPUT) _RadianceNormalDepth_Output[H_COORD(pixCoord.xy)] = uint2(PackToR11G11B10f(Radiance), PackNormalDepth(NormalDepth.xyz, NormalDepth.w));
else _Radiance_Output[H_COORD(pixCoord.xy)] = Radiance;
#else
_Radiance_Output[H_COORD(pixCoord.xy)] = Radiance;
#endif
}