@group(0) @binding(0) var gridHashes: array; @group(0) @binding(1) var indices: array; @group(0) @binding(2) var totalCount: u32; var sharedData: array; var sharedIndices: array; @compute @workgroup_size(256) fn main(@builtin(local_invocation_id) local_id : vec3, @builtin(global_invocation_id) global_id : vec3) { let localIndex = local_id.x; let globalIndex = global_id.x; // Load element from global memory into shared memory if in range if (globalIndex < totalCount) { sharedData[localIndex] = gridHashes[globalIndex]; sharedIndices[localIndex] = indices[globalIndex]; } else { sharedData[localIndex] = 0xffffffffu; // Max uint to push invalid values to the end sharedIndices[localIndex] = 0xffffffffu; // or some invalid index } workgroupBarrier(); // Bitonic sort in shared memory on 256 elements var size = 2u; while (size <= 256u) { var stride = size >> 1u; var j = stride; while (j > 0u) { let ixj = localIndex ^ j; if (ixj > localIndex) { let ascending = ((localIndex & size) == 0u); let valLocal = sharedData[localIndex]; let valIxj = sharedData[ixj]; var swap = false; if ( ascending ) { if ( valLocal > valIxj ) { swap = true; } } else { if (valLocal < valIxj) { swap = true; } } if (swap) { // Swap values sharedData[localIndex] = valIxj; sharedData[ixj] = valLocal; // Swap indices as well let idxLocal = sharedIndices[localIndex]; let idxIxj = sharedIndices[ixj]; sharedIndices[localIndex] = idxIxj; sharedIndices[ixj] = idxLocal; } } workgroupBarrier(); j = j >> 1u; } size = size << 1u; } // Write sorted results back to global memory if (globalIndex < totalCount) { gridHashes[globalIndex] = sharedData[localIndex]; indices[globalIndex] = sharedIndices[localIndex]; } }