Skip to content

Commit

Permalink
Merge pull request #36 from milankl/fastfloat16
Browse files Browse the repository at this point in the history
fastfloat16sr first implementation
  • Loading branch information
milankl authored Feb 7, 2021
2 parents 69d6c85 + 5eb5d87 commit 75565a8
Show file tree
Hide file tree
Showing 10 changed files with 1,340 additions and 12 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: CI
on:
pull_request:
branches:
- main
push:
branches:
- main
jobs:
test:
runs-on: ${{ matrix.os }}
strategy:
matrix:
julia-version: ["1.0", "1.5", nightly]
julia-arch: [x64]
os: [ubuntu-latest, windows-latest, macOS-latest]

steps:
- uses: actions/checkout@v2
- name: "Set up Julia"
uses: julia-actions/setup-julia@latest
with:
version: ${{ matrix.julia-version }}
arch: ${{ matrix.julia-arch }}
- uses: julia-actions/julia-runtest@latest
- uses: julia-actions/julia-uploadcodecov@latest
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
6 changes: 4 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
name = "StochasticRounding"
uuid = "3843c9a1-1f18-49ff-9d99-1b4c8a8e97ed"
authors = ["Milan Kloewer"]
version = "0.4.1"
version = "0.5.0"

[deps]
BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
FastFloat16s = "ecdfd59a-7c14-409c-ab7a-3704f8e92dd6"
RandomNumbers = "e6cf234a-135c-5ec9-84dd-332b85af5143"

[compat]
BFloat16s = "^0.1"
FastFloat16s = "^0.1"
RandomNumbers = "^1.4"
julia = "^1"
BFloat16s = "^0.1"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Expand Down
19 changes: 11 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

Stochastic rounding for floating-point arithmetic.

This package exports `Float32sr`,`Float16sr` and `BFloat16sr`. Three number formats that behave like their deterministic counterparts but
with stochastic rounding that is proportional to the distance of the next representable numbers and therefore
[exact in expectation](https://en.wikipedia.org/wiki/Rounding#Stochastic_rounding) (see also example below in "Usage").
Although there is currently no known hardware implementation available,
This package exports `Float32sr`,`Float16sr`,`FastFloat16sr` and `BFloat16sr`, four number formats that behave
like their deterministic counterparts but with stochastic rounding that is proportional to the distance of the
next representable numbers and therefore [exact in expectation](https://en.wikipedia.org/wiki/Rounding#Stochastic_rounding)
(see also example below in "Usage"). Although there is currently no known hardware implementation available,
[Graphcore is working on IPUs with stochastic rounding](https://www.graphcore.ai/posts/directions-of-ai-research).
Stochastic rounding makes the number formats considerably slower, but e.g. Float32+stochastic rounding is only
about 2x slower than Float64. [Xoroshio128Plus](https://sunoru.github.io/RandomNumbers.jl/stable/man/xorshifts/#Xorshift-Family-1),
Expand All @@ -16,6 +16,9 @@ a random number generator from the [Xorshift family](https://en.wikipedia.org/wi

You are welcome to raise [issues](https://github.com/milankl/StochasticRounding.jl/issues), ask questions or suggest any changes or new features.

`BFloat16sr` is based on [BFloat16s.jl](https://github.com/JuliaMath/BFloat16s.jl)
`FastFloat16sr` is based on [FastFloat16s.jl](https://github.com/milankl/FastFloat16s.jl)

### Usage

```julia
Expand Down Expand Up @@ -66,10 +69,10 @@ julia> B1,B2 = Float32sr.(A1),Float32sr.(A2);
```
And similarly for the other number types. Then on an Intel(R) Core(R) i5 (Ice Lake) @ 1.1GHz timings via `@btime +($A1,$A2)` etc. are

| rounding mode | Float32 | BFloat16 | Float64 | Float16 |
| --------------------- | ---------- | ---------- | --------- | --------- |
| default | 460 μs | 556 μs | 1.151ms | 16.446 ms |
| + stochastic rounding | 2.585 ms | 3.820 ms | n/a | 20.714 ms |
| rounding mode | Float32 | BFloat16 | Float64 | [FastFloat16](https://github.com/milankl/FastFloat16s.jl) | Float16 |
| --------------------- | ---------- | ---------- | --------- | ----------- | --------- |
| default | 460 μs | 556 μs | 1.151ms | 629 μs | 16.446 ms |
| + stochastic rounding | 2.585 ms | 3.820 ms | n/a | 4.096 ms | 20.714 ms |

Stochastic rounding imposes an about x5-7 performance decrease for Float32/BFloat16, but is almost negligible for Float16.
For Float32sr about 50% of the time is spend on the random number generation, a bit less than 50% on the addition in
Expand Down
841 changes: 841 additions & 0 deletions figs/drawing.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added figs/stochastic_rounding_schematic2.xcf
Binary file not shown.
5 changes: 4 additions & 1 deletion src/StochasticRounding.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ module StochasticRounding
Float16sr,Float16_stochastic_round,
Float16_chance_roundup,NaN16sr,Inf16sr,
Float32sr,Float32_stochastic_round,
Float32_chance_roundup,NaN32sr,Inf32sr
Float32_chance_roundup,NaN32sr,Inf32sr,
FastFloat16sr,FastFloat16_stochastic_round,
FastFloat16_chance_roundup,NaNF16sr,InfF16sr

import Base: isfinite, isnan, precision, iszero,
sign_mask, exponent_mask, significand_mask,
Expand Down Expand Up @@ -40,5 +42,6 @@ module StochasticRounding
include("bfloat16sr.jl")
include("float16sr.jl")
include("float32sr.jl")
include("fast_float16sr.jl")

end
142 changes: 142 additions & 0 deletions src/fast_float16sr.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import FastFloat16s.FastFloat16

"""The Float32 + stochastic rounding type."""
primitive type FastFloat16sr <: AbstractFloat 32 end

# basic properties
sign_mask(::Type{FastFloat16sr}) = 0x8000_0000
exponent_mask(::Type{FastFloat16sr}) = 0x7f80_0000
significand_mask(::Type{FastFloat16sr}) = 0x007f_ffff
precision(::Type{FastFloat16sr}) = 11

one(::Type{FastFloat16sr}) = reinterpret(FastFloat16sr,one(Float32))
zero(::Type{FastFloat16sr}) = reinterpret(FastFloat16sr,0x0000_0000)
one(::FastFloat16sr) = one(FastFloat16sr)
zero(::FastFloat16sr) = zero(FlastFloat16r)

typemin(::Type{FastFloat16sr}) = Float32sr(typemin(Float16))
typemax(::Type{FastFloat16sr}) = Float32sr(typemax(Float16))
floatmin(::Type{FastFloat16sr}) = Float32sr(floatmin(Float16))
floatmax(::Type{FastFloat16sr}) = Float32sr(floatmax(FastFlaot16))

typemin(::FastFloat16sr) = typemin(FastFloat16sr)
typemax(::FastFloat16sr) = typemax(FastFloat16sr)
floatmin(::FastFloat16sr) = floatmin(FastFloat16sr)
floatmax(::FastFloat16sr) = floatmax(FastFloat16sr)

eps(::Type{FastFloat16sr}) = FastFloat16sr(eps(Float16))
eps(x::FastFloat16sr) = FastFloat16sr(eps(Float16(x)))

const InfF16sr = reinterpret(FastFloat16sr, Inf32)
const NaNF16sr = reinterpret(FastFloat16sr, NaN32)

# basic operations
abs(x::FastFloat16sr) = reinterpret(FastFloat16sr, abs(Float32(x)))
isnan(x::FastFloat16sr) = isnan(Float32(x))
isfinite(x::FastFloat16sr) = isfinite(Float32(x))

nextfloat(x::FastFloat16sr) = FastFloat16sr(nextfloat(Float16(x)))
prevfloat(x::FastFloat16sr) = FastFloat16sr(prevfloat(Float16(x)))

-(x::FastFloat16sr) = reinterpret(FastFloat16sr, reinterpret(UInt32, x) sign_mask(FastFloat16sr))

# conversions
Float32(x::FastFloat16sr) = reinterpret(Float32,x)
FastFloat16sr(x::FastFloat16) = reinterpret(FastFloat16sr,x)
FastFloat16(x::FastFloat16sr) = reinterpret(FastFloat16,x)
FastFloat16sr(x::Float32) = FastFloat16sr(FastFloat16(x))
FastFloat16sr(x::Float16) = FastFloat16sr(Float32(x))
FastFloat16sr(x::Float64) = FastFloat16sr(Float32(x))
Float16(x::FastFloat16sr) = Float16(Float32(x))
Float64(x::FastFloat16sr) = Float64(Float32(x))

FastFloat16sr(x::Integer) = FastFloat16sr(Float32(x))
(::Type{T})(x::FastFloat16sr) where {T<:Integer} = T(Float32(x))

"""Convert to FastFloat16sr from Float32 with stochastic rounding.
Binary arithmetic version."""
function FastFloat16_stochastic_round(x::Float32)
ix = reinterpret(Int32,x)
# if deterministically round to 0 return 0
# to avoid a stochastic rounding to NaN
# push to the left to get rid of sign
# push to the right to get rid of the insignificant bits
((ix << 1) >> 13) == zero(Int32) && return zero(FastFloat16sr)

# r are random bits for the last 31
# >> either introduces 0s for the first 33 bits
# or 1s. Interpreted as Int64 this corresponds to [-ulp/2,ulp/2)
# which is added with binary arithmetic subsequently
# this is the stochastic perturbation.
# Then deterministic round to nearest to either round up or round down.
r = rand(Xor128[],Int32) >> 19 # = preserve 1 sign, 8 ebits, 10sbits
xr = reinterpret(Float32,ix + r)
return FastFloat16sr(xr) # round to nearest
end

# # Promotion
# promote_rule(::Type{Float16}, ::Type{FastFloat32sr}) = Float32
# promote_rule(::Type{Float64}, ::Type{Float32sr}) = Float64

for t in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128)
@eval promote_rule(::Type{FastFloat16sr}, ::Type{$t}) = FastFloat16sr
end

# Rounding
round(x::FastFloat16sr, r::RoundingMode{:ToZero}) = FastFloat16sr(round(Float32(x), r))
round(x::FastFloat16sr, r::RoundingMode{:Down}) = FastFloat16sr(round(Float32(x), r))
round(x::FastFloat16sr, r::RoundingMode{:Up}) = FastFloat16sr(round(Float32(x), r))
round(x::FastFloat16sr, r::RoundingMode{:Nearest}) = FastFloat16sr(round(Float32(x), r))

# Comparison
function ==(x::FastFloat16sr, y::FastFloat16sr)
return Float32(x) == Float32(y)
end

for op in (:<, :<=, :isless)
@eval ($op)(a::FastFloat16sr, b::FastFloat16sr) = ($op)(Float32(a), Float32(b))
end

# Arithmetic
for f in (:+, :-, :*, :/, :^)
@eval ($f)(x::FastFloat16sr, y::FastFloat16sr) = FastFloat16_stochastic_round($(f)(Float32(x), Float32(y)))
end

for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh,
:atanh,:exp,:exp2,:exp10,:expm1,:log,:log2,:log10,:sqrt,:cbrt,:log1p)
@eval begin
Base.$func(a::FastFloat16sr) = FastFloat16_stochastic_round($func(Float32(a)))
end
end

for func in (:atan,:hypot)
@eval begin
$func(a::FastFloat16sr,b::FastFloat16sr) = FastFloat16_stochastic_round($func(Float32(a),Float32(b)))
end
end


# Showing
function show(io::IO, x::FastFloat16sr)
if isinf(x)
print(io, x < 0 ? "-InfF16sr" : "InfF16sr")
elseif isnan(x)
print(io, "NaNF16sr")
else
io2 = IOBuffer()
print(io2,Float32(x))
f = String(take!(io2))
print(io,"FastFloat16sr("*f*")")
end
end

bitstring(x::FastFloat16sr) = bitstring(reinterpret(UInt32,x))

function bitstring(x::FastFloat16sr,mode::Symbol)
if mode == :split # split into sign, exponent, signficand
s = bitstring(x)
return "$(s[1]) $(s[2:9]) $(s[10:end])"
else
return bitstring(x)
end
end
2 changes: 1 addition & 1 deletion src/float32sr.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ const Inf32sr = reinterpret(Float32sr, Inf32)
const NaN32sr = reinterpret(Float32sr, NaN32)

# basic operations
abs(x::Float32sr) = reinterpret(Float32sr, abs(reinterpret(Float32)))
abs(x::Float32sr) = reinterpret(Float32sr, abs(reinterpret(Float32,x)))
isnan(x::Float32sr) = isnan(reinterpret(Float32,x))
isfinite(x::Float32sr) = isfinite(reinterpret(Float32,x))

Expand Down
Loading

0 comments on commit 75565a8

Please sign in to comment.