Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bitmask lookup table based supersampling #64

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion piet-gpu-hal/src/hub.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::vulkan;
use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait;
use crate::PipelineBuilder as PipelineBuilderTrait;
use crate::{Device, Error, SamplerParams};
use crate::vulkan::Format;

pub type MemFlags = <vulkan::VkDevice as Device>::MemFlags;
pub type Semaphore = <vulkan::VkDevice as Device>::Semaphore;
Expand Down Expand Up @@ -152,9 +153,10 @@ impl Session {
&self,
width: u32,
height: u32,
format: Format,
mem_flags: MemFlags,
) -> Result<Image, Error> {
let image = self.0.device.create_image2d(width, height, mem_flags)?;
let image = self.0.device.create_image2d(width, height, format, mem_flags)?;
Ok(Image(Arc::new(ImageInner {
image,
session: Arc::downgrade(&self.0),
Expand Down
3 changes: 3 additions & 0 deletions piet-gpu-hal/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use crate::vulkan::Format;

/// The cross-platform abstraction for a GPU device.
///
/// This abstraction is inspired by gfx-hal, but is specialized to the needs of piet-gpu.
Expand Down Expand Up @@ -57,6 +59,7 @@ pub trait Device: Sized {
&self,
width: u32,
height: u32,
format: Format,
mem_flags: Self::MemFlags,
) -> Result<Self::Image, Error>;

Expand Down
6 changes: 4 additions & 2 deletions piet-gpu-hal/src/vulkan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use ash::{vk, Device, Entry, Instance};
use once_cell::sync::Lazy;

use crate::{Device as DeviceTrait, Error, ImageLayout, SamplerParams};
pub use ash::vk::Format;

pub struct VkInstance {
/// Retain the dynamic lib.
Expand Down Expand Up @@ -460,6 +461,7 @@ impl crate::Device for VkDevice {
&self,
width: u32,
height: u32,
format: Format,
mem_flags: Self::MemFlags,
) -> Result<Self::Image, Error> {
let device = &self.device.device;
Expand All @@ -476,7 +478,7 @@ impl crate::Device for VkDevice {
let image = device.create_image(
&vk::ImageCreateInfo::builder()
.image_type(vk::ImageType::TYPE_2D)
.format(vk::Format::R8G8B8A8_UNORM)
.format(format)
.extent(extent)
.mip_levels(1)
.array_layers(1)
Expand Down Expand Up @@ -505,7 +507,7 @@ impl crate::Device for VkDevice {
&vk::ImageViewCreateInfo::builder()
.view_type(vk::ImageViewType::TYPE_2D)
.image(image)
.format(vk::Format::R8G8B8A8_UNORM)
.format(format)
.subresource_range(vk::ImageSubresourceRange {
aspect_mask: vk::ImageAspectFlags::COLOR,
base_mip_level: 0,
Expand Down
6 changes: 2 additions & 4 deletions piet-gpu-types/src/ptcl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ piet_gpu! {
index: u32,
offset: [i16; 2],
}
struct CmdAlpha {
alpha: f32,
}
struct CmdJump {
new_ref: u32,
}
Expand All @@ -32,12 +29,13 @@ piet_gpu! {
Fill(CmdFill),
Stroke(CmdStroke),
Solid,
Alpha(CmdAlpha),
Color(CmdColor),
Image(CmdImage),
BeginClip,
EndClip,
Jump(CmdJump),
SaveStencil,
RestoreStencil,
}
}
}
1 change: 0 additions & 1 deletion piet-gpu-types/src/scene.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ piet_gpu! {
}
struct Clip {
bbox: [f32; 4],
// TODO: add alpha?
}
struct SetFillMode {
fill_mode: u32,
Expand Down
1 change: 1 addition & 0 deletions piet-gpu-types/src/tile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ piet_gpu! {
struct TileSeg {
origin: [f32; 2],
vector: [f32; 2],
len: f32,
y_edge: f32,
next: Ref<TileSeg>,
}
Expand Down
171 changes: 106 additions & 65 deletions piet-gpu/shader/coarse.comp
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,16 @@ Alloc read_tile_alloc(uint el_ix) {
#endif

// The maximum number of commands per annotated element.
#define ANNO_COMMANDS 2
#define ANNO_COMMANDS 3

// Perhaps cmd_alloc should be a global? This is a style question.
bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit) {
Alloc cmd_alloc;
Alloc alpha_cmd_alloc;
CmdRef cmd_ref;
CmdRef alpha_cmd_ref;
uint cmd_limit;
uint alpha_cmd_limit;

bool alloc_cmd() {
if (cmd_ref.offset < cmd_limit) {
return true;
}
Expand All @@ -91,6 +97,62 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit
return true;
}

bool alloc_cmd_rev() {
if (alpha_cmd_ref.offset >= alpha_cmd_limit) {
return true;
}
MallocResult new_cmd = malloc(PTCL_INITIAL_ALLOC);
if (new_cmd.failed) {
return false;
}
CmdJump jump = CmdJump(alpha_cmd_ref.offset);
alpha_cmd_alloc = new_cmd.alloc;
alpha_cmd_ref = CmdRef(alpha_cmd_alloc.offset + PTCL_INITIAL_ALLOC - Cmd_size);
Cmd_Jump_write(alpha_cmd_alloc, alpha_cmd_ref, jump);
alpha_cmd_limit = alpha_cmd_alloc.offset + ANNO_COMMANDS * Cmd_size;
return true;
}

void write_fill(AnnotatedTag tag, Tile tile, float stroke_width) {
if (fill_mode_from_flags(tag.flags) == MODE_NONZERO) {
if (tile.tile.offset != 0) {
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill);
cmd_ref.offset += 4 + CmdFill_size;
} else {
Cmd_Solid_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
}
} else {
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * stroke_width);
Cmd_Stroke_write(cmd_alloc, cmd_ref, cmd_stroke);
cmd_ref.offset += 4 + CmdStroke_size;
}
}

void write_fill_rev(AnnotatedTag tag, Tile tile, float stroke_width) {
if (fill_mode_from_flags(tag.flags) == MODE_NONZERO) {
if (tile.tile.offset != 0) {
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
alpha_cmd_ref.offset -= 4 + CmdFill_size;
Cmd_Fill_write(alpha_cmd_alloc, alpha_cmd_ref, cmd_fill);
} else {
alpha_cmd_ref.offset -= 4;
Cmd_Solid_write(alpha_cmd_alloc, alpha_cmd_ref);
}
} else {
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * stroke_width);
alpha_cmd_ref.offset -= 4 + CmdStroke_size;
Cmd_Stroke_write(alpha_cmd_alloc, alpha_cmd_ref, cmd_stroke);
}

alpha_cmd_ref.offset -= 4;
Cmd_RestoreStencil_write(alpha_cmd_alloc, alpha_cmd_ref);

Cmd_SaveStencil_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
}

void main() {
if (mem_error != NO_ERROR) {
return;
Expand All @@ -112,12 +174,18 @@ void main() {
uint tile_x = gl_LocalInvocationID.x % N_TILE_X;
uint tile_y = gl_LocalInvocationID.x / N_TILE_X;
uint this_tile_ix = (bin_tile_y + tile_y) * conf.width_in_tiles + bin_tile_x + tile_x;
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, this_tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
cmd_alloc = slice_mem(conf.ptcl_alloc, this_tile_ix * PTCL_INITIAL_ALLOC * 2, PTCL_INITIAL_ALLOC);
cmd_ref = CmdRef(cmd_alloc.offset);
// Reserve space for the maximum number of commands and a potential jump.
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - (ANNO_COMMANDS + 1) * Cmd_size;
cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - (ANNO_COMMANDS + 1) * Cmd_size;
alpha_cmd_alloc = slice_mem(conf.ptcl_alloc, PTCL_INITIAL_ALLOC * (this_tile_ix * 2 + 1), PTCL_INITIAL_ALLOC);
alpha_cmd_ref = CmdRef(alpha_cmd_alloc.offset + PTCL_INITIAL_ALLOC - Cmd_size);
if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
Cmd_End_write(alpha_cmd_alloc, alpha_cmd_ref);
}
alpha_cmd_limit = alpha_cmd_ref.offset + ANNO_COMMANDS * Cmd_size;
// The nesting depth of the clip stack
uint clip_depth = 0;
uint clip_depth = 1;
// State for the "clip zero" optimization. If it's nonzero, then we are
// currently in a clip for which the entire tile has an alpha of zero, and
// the value is the depth after the "begin clip" of that element.
Expand Down Expand Up @@ -277,7 +345,7 @@ void main() {
}
if (include_tile) {
uint el_slice = el_ix / 32;
uint el_mask = 1 << (el_ix & 31);
uint el_mask = 1 << (el_ix & 31u);
atomicOr(sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask);
}
}
Expand Down Expand Up @@ -317,94 +385,66 @@ void main() {
Tile tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoColor fill = Annotated_Color_read(conf.anno_alloc, ref);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
if (fill_mode_from_flags(tag.flags) == MODE_NONZERO) {
if (tile.tile.offset != 0) {
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill);
cmd_ref.offset += 4 + CmdFill_size;
} else {
Cmd_Solid_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
if (unpackUnorm4x8(fill.rgba_color).wzyx.a == 1.0) {
if (!alloc_cmd()) {
break;
}
write_fill(tag, tile, fill.linewidth);
Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(fill.rgba_color));
cmd_ref.offset += 4 + CmdColor_size;
if (tile.tile.offset == 0) {
// Tile is fully occluded. See include_tile logic above for the invariant.
clip_zero_depth = clip_depth;
}
} else {
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * fill.linewidth);
Cmd_Stroke_write(cmd_alloc, cmd_ref, cmd_stroke);
cmd_ref.offset += 4 + CmdStroke_size;
if (!alloc_cmd_rev()) {
break;
}
alpha_cmd_ref.offset -= 4 + CmdColor_size;
Cmd_Color_write(alpha_cmd_alloc, alpha_cmd_ref, CmdColor(fill.rgba_color));
write_fill_rev(tag, tile, fill.linewidth);
}
Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(fill.rgba_color));
cmd_ref.offset += 4 + CmdColor_size;
break;
case Annotated_Image:
tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoImage fill_img = Annotated_Image_read(conf.anno_alloc, ref);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
// For now, use back-to-front drawing for all images.
// Optimizations for opaque images can be done in the future.
if (!alloc_cmd_rev()) {
break;
}
if (fill_mode_from_flags(tag.flags) == MODE_NONZERO) {
if (tile.tile.offset != 0) {
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill);
cmd_ref.offset += 4 + CmdFill_size;
} else {
Cmd_Solid_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
}
} else {
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * fill_img.linewidth);
Cmd_Stroke_write(cmd_alloc, cmd_ref, cmd_stroke);
cmd_ref.offset += 4 + CmdStroke_size;
}
Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(fill_img.index, fill_img.offset));
cmd_ref.offset += 4 + CmdImage_size;
alpha_cmd_ref.offset -= 4 + CmdImage_size;
Cmd_Image_write(alpha_cmd_alloc, alpha_cmd_ref, CmdImage(fill_img.index, fill_img.offset));
write_fill_rev(tag, tile, fill_img.linewidth);
break;
case Annotated_BeginClip:
tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
if (tile.tile.offset == 0 && tile.backdrop == 0) {
clip_zero_depth = clip_depth + 1;
} else if (tile.tile.offset == 0 && clip_depth < 32) {
clip_one_mask |= (1 << clip_depth);
clip_one_mask |= (1u << clip_depth);
} else {
AnnoBeginClip begin_clip = Annotated_BeginClip_read(conf.anno_alloc, ref);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
if (!alloc_cmd()) {
break;
}
if (fill_mode_from_flags(tag.flags) == MODE_NONZERO) {
if (tile.tile.offset != 0) {
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill);
cmd_ref.offset += 4 + CmdFill_size;
} else {
// TODO: here is where a bunch of optimization magic should happen
float alpha = tile.backdrop == 0 ? 0.0 : 1.0;
Cmd_Alpha_write(cmd_alloc, cmd_ref, CmdAlpha(alpha));
cmd_ref.offset += 4 + CmdAlpha_size;
}
} else {
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * begin_clip.linewidth);
Cmd_Stroke_write(cmd_alloc, cmd_ref, cmd_stroke);
cmd_ref.offset += 4 + CmdStroke_size;
}
write_fill(tag, tile, begin_clip.linewidth);
Cmd_BeginClip_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
if (clip_depth < 32) {
clip_one_mask &= ~(1 << clip_depth);
clip_one_mask &= ~(1u << clip_depth);
}
}
clip_depth++;
break;
case Annotated_EndClip:
clip_depth--;
if (clip_depth >= 32 || (clip_one_mask & (1 << clip_depth)) == 0) {
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
if (clip_depth >= 32 || (clip_one_mask & (1u << clip_depth)) == 0) {
if (!alloc_cmd()) {
break;
}
Cmd_Solid_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
Cmd_EndClip_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
}
Expand All @@ -431,6 +471,7 @@ void main() {
if (rd_ix >= ready_ix && partition_ix >= n_partitions) break;
}
if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
Cmd_End_write(cmd_alloc, cmd_ref);
CmdJump jump = CmdJump(alpha_cmd_ref.offset);
Cmd_Jump_write(cmd_alloc, cmd_ref, jump);
}
}
Binary file modified piet-gpu/shader/coarse.spv
Binary file not shown.
Loading