#[compute] #version 450 //process 16 by 16 chunk per invocation. Chosen arbitrarily. Im not sure what the optimal value here is layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; //documentation reccomends using 'restrict' whenever possible. Not able to use it with the sampler layout(rgba16f, binding=0, set=0) restrict uniform image2D image1; //write layout(binding=0, set=1) uniform sampler2D sampler1; //read layout(push_constant, std430) uniform Params { vec2 screen_size; //dimensions of buffer we're writing to float dither; //[0.0 - 0.5] amount of dithering int is_gaussian; //[0 - 1] 0 is box blur, 1 is gaussian weighting int kern_samples; //number of samples per pass int kern_width; //width of the blur int pass; //[1,2,3] 1 horizontal blur, 2 vertical blur, 3 copy to screen } p; // Random number generator for dithering float rng(vec2 seed) { return fract(sin(dot(seed.xy, vec2(12.9898, 78.233))) * 43758.5453123); } // Apply dithering to the kernel offset float get_dither(float ks) { if (p.dither <= 0.0) return 0.0; //avoid some math if we don't need dithering return (rng(gl_GlobalInvocationID.xy / p.screen_size) * 2.0 - 1.0) * ks * p.dither; } // Compute Gaussian weight for a given distance and sigma float gaussian(float x, float sigma) { return exp(-(x * x) / (2.0 * sigma * sigma)) / (sqrt(2.0 * 3.14159) * sigma); } // Precompute Gaussian weights for the kernel void compute_gaussian_weights(int samples, float sigma, out float weights[161]) { float total_weight = 0.0; for (int i = 0; i <= samples; i++) { float x = float(i) - float(samples) / 2.0; weights[i] = gaussian(x, sigma); total_weight += weights[i]; } // Normalize weights for (int i = 0; i <= samples; i++) { weights[i] /= total_weight; } } void main() { //coordinates ivec2 pixel = ivec2(gl_GlobalInvocationID.xy); vec2 uv = vec2(pixel) / p.screen_size; // Early exit if the pixel is outside the screen bounds if (pixel.x >= p.screen_size.x || pixel.y >= p.screen_size.y) return; // Kernel parameters float kern_spacing = float(p.kern_width) / float(p.kern_samples); float kern_half = float(p.kern_width) * 0.5; // Gaussian weights float weights[161]; //Not allowed to have variable length arrays in GLSL, so hard code theoretical max kernel size. float sigma = float(p.kern_width) / (6.0 * float(p.kern_width)/float(p.kern_samples)); // Adjust sigma based on kernel width if(p.is_gaussian == 1) { compute_gaussian_weights(p.kern_samples, sigma, weights); } vec4 col = vec4(0.0); vec2 coord; if (p.pass == 1) { // Horizontal pass for (int i = 0; i <= p.kern_samples; i++) { coord.x = pixel.x + (i * kern_spacing) - kern_half + get_dither(kern_spacing); coord.y = pixel.y; //if gaussian blur, add a weighted sample, if box we can average all at once at the end of loop if(p.is_gaussian == 1) col += texture(sampler1, clamp(coord / p.screen_size, 0.0, 1.0)) * weights[i]; else col += texture(sampler1, clamp(coord / p.screen_size, 0.0, 1.0)); } if(p.is_gaussian == 0) col.rgb /= float(p.kern_samples + 1); imageStore(image1, pixel, col); } else if (p.pass == 2) { // Vertical pass for (int j = 0; j <= p.kern_samples; j++) { coord.x = pixel.x; coord.y = pixel.y + (j * kern_spacing) - kern_half + get_dither(kern_spacing); if(p.is_gaussian == 1) col += texture(sampler1, clamp(coord / p.screen_size, 0.0, 1.0)) * weights[j]; else col += texture(sampler1, clamp(coord / p.screen_size, 0.0, 1.0)); } if(p.is_gaussian == 0) col.rgb /= float(p.kern_samples + 1); imageStore(image1, pixel, col); } else if (p.pass == 3) { //copy buffer to screen col = texture(sampler1, uv); imageStore(image1, pixel, col); } }