WebGPU is a new way to access the GPU using javascript API. The most exciting thing is this will be accessible to everyone starting today (April 26th 2023) in the stable branch of Google Chrome 113 and Microsoft Edge 113.
The main way to access the compute and rendering capabilities of the GPU is via a shader language. WebGPU comes with a rust like shader language.
I am interested in computing for deep learning. Lets see how we can do a basic Matrix addition using a shader language.
Below is a complete javascript for the addition. Name it matrix_addition.js
The different variables and their relationships are defined in this diagram
const rows = 2
const cols = 2
// Define global buffer size
const BUFFER_SIZE = new Float32Array(rows*cols).byteLength;
// Compute shader
const shader = `
@group(0) @binding(0)
var<storage, read> input1: array<f32>;
@group(0) @binding(1)
var<storage, read> input2: array<f32>;
@group(0) @binding(2)
var<storage, read_write> output: array<f32>;
@compute @workgroup_size(64)
fn main(
@builtin(global_invocation_id)
global_id : vec3u,
@builtin(local_invocation_id)
local_id : vec3u,
) {
// Avoid accessing the buffer out of bounds
if (global_id.x >= ${BUFFER_SIZE}) {
return;
}
output[global_id.x] = input1[global_id.x]+input2[global_id.x];
}
`;
// Main function
async function init() {
// 1: request adapter and device
if (!navigator.gpu) {
throw Error('WebGPU not supported.');
}
const adapter = await navigator.gpu.requestAdapter();
if (!adapter) {
throw Error('Couldn\'t request WebGPU adapter.');
}
const device = await adapter.requestDevice();
// 2: Create a shader module from the shader template literal
const shaderModule = device.createShaderModule({
code: shader
});
// 3: Create input (input1, input2) and output (output) buffers to read GPU calculations to,
// and a staging (stagingBuffer) buffer to be mapped for JavaScript access
const matrixA = new Float32Array(rows*cols).map((_,i) => 1);
const matrixB = new Float32Array(rows*cols).map((_,i) => 2);
const input1 = device.createBuffer({
size: BUFFER_SIZE,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
});
device.queue.writeBuffer(input1, 0, matrixA, 0, matrixA.length);
const input2 = device.createBuffer({
size: BUFFER_SIZE,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST,
});
device.queue.writeBuffer(input2, 0, matrixB, 0, matrixB.length);
const output = device.createBuffer({
size: BUFFER_SIZE,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC
});
const stagingBuffer = device.createBuffer({
size: BUFFER_SIZE,
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST
});
// 4: Create a GPUBindGroupLayout to define the bind group structure, create a GPUBindGroup from it,
// then use it to create a GPUComputePipeline
const bindGroupLayout =
device.createBindGroupLayout({
entries: [{
binding: 0,
visibility: GPUShaderStage.COMPUTE,
buffer: {
type: "read-only-storage"
}
},{
binding: 1,
visibility: GPUShaderStage.COMPUTE,
buffer: {
type: "read-only-storage"
}
},{
binding: 2,
visibility: GPUShaderStage.COMPUTE,
buffer: {
type: "storage"
}
}]
});
const bindGroup = device.createBindGroup({
layout: bindGroupLayout,
entries: [{
binding: 0,
resource: {
buffer: input1,
}
},{
binding: 1,
resource: {
buffer: input2,
}
},{
binding: 2,
resource: {
buffer: output,
}
}]
});
const computePipeline = device.createComputePipeline({
layout: device.createPipelineLayout({
bindGroupLayouts: [bindGroupLayout]
}),
compute: {
module: shaderModule,
entryPoint: 'main'
}
});
// 5: Create GPUCommandEncoder to issue commands to the GPU
const commandEncoder = device.createCommandEncoder();
// 6: Initiate render pass
const passEncoder = commandEncoder.beginComputePass();
// 7: Issue commands
passEncoder.setPipeline(computePipeline);
passEncoder.setBindGroup(0, bindGroup);
passEncoder.dispatchWorkgroups(Math.ceil(BUFFER_SIZE / 64));
// End the render pass
passEncoder.end();
// Copy output buffer to staging buffer
commandEncoder.copyBufferToBuffer(
output,
0, // Source offset
stagingBuffer,
0, // Destination offset
BUFFER_SIZE
);
// 8: End frame by passing array of command buffers to command queue for execution
device.queue.submit([commandEncoder.finish()]);
// map staging buffer to read results back to JS
await stagingBuffer.mapAsync(
GPUMapMode.READ,
0, // Offset
BUFFER_SIZE // Length
);
const copyArrayBuffer = stagingBuffer.getMappedRange(0, BUFFER_SIZE);
const data = copyArrayBuffer.slice();
stagingBuffer.unmap();
console.log(new Float32Array(data));
}
init().catch((e)=>{
alert(e)
});
HTML is standard boiler plate
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>WebGPU Matrix Addition</title>
</head>
<body>
<script type="module" src="./matrix_addition.js"></script>
</body>
</html>
Open Chrome developer tools to see the output
Top comments (2)
Recently, I learned about a new project, the Hyperdust Protocol. It supports multi-chain deployment and offers cost-effective, sustainable GPU computing power and cloud rendering services for Web3 applications such as the metaverse, AI training, and top-tier AAA games. It's reported that the mainnet is about to launch, and you can now claim airdrops for free trial. Interested friends can check it out. Here is their official website: hyperdust.io/
Great simplified example. Can’t wait interactive demo !