Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
f28e182
fix: Correct ToFloat64 conversion
dndungu Jul 29, 2025
2b82a3e
fix: Correct ToFloat16 subnormal conversion
dndungu Jul 29, 2025
b487898
feat: Add comprehensive tests to increase coverage
dndungu Jul 29, 2025
261d097
test: Increase test coverage for math and arithmetic
dndungu Jul 29, 2025
ba94d3b
test: Increase test coverage for convert
dndungu Jul 29, 2025
72f10dd
test: Increase test coverage for types
dndungu Jul 29, 2025
2b63c02
test: Increase test coverage for arithmetic
dndungu Jul 29, 2025
3e5a9c2
test: Increase test coverage for arithmetic
dndungu Jul 29, 2025
3a4ed0a
test: Increase test coverage for convert
dndungu Jul 29, 2025
dfe7fc6
test: Increase test coverage for convert
dndungu Jul 29, 2025
3422a45
refactor(convert): use external float16 library
dndungu Jul 29, 2025
f4cfb58
wip
dndungu Jul 30, 2025
661c3cc
wip
dndungu Jul 31, 2025
0e9ea52
chore: upgrade to go 1.25
dndungu Aug 21, 2025
094ea9f
refactor: update core float16 implementation
dndungu Aug 22, 2025
d9c3417
chore: update go.mod
dndungu Aug 22, 2025
c88c40d
float16: add FromFloat32WithRounding and shouldRoundWithMode; fix dup…
dndungu Aug 25, 2025
b682b71
float16: honor rounding mode in IEEE add/mul/div via FromFloat32WithR…
dndungu Aug 25, 2025
d14807f
tests: verify rounding parameter effects for Add/Mul/Div in IEEE mode…
dndungu Aug 25, 2025
555ec5f
build: add Makefile with test, lint, lint-fix, coverage targets
dndungu Aug 25, 2025
5c6fe27
float16: restore rounding/conversion modes, error types, FloatClass; …
dndungu Aug 25, 2025
58b0d79
Makefile: run with GOWORK=off; tolerate golangci-lint workspace mismatch
dndungu Aug 25, 2025
7568607
chore: add concise PR template
dndungu Aug 25, 2025
a4c5f74
chore: remove stray file '=q' (ignored by Go toolchain)
dndungu Aug 25, 2025
2ff643d
feat(bfloat16): Implement core infrastructure for BFloat16 (Phase 1 c…
dndungu Aug 25, 2025
be5aba9
refactor(format): Apply formatting and incorporate developer changes
dndungu Aug 25, 2025
9bafbe8
chore(lint): add golangci-lint configuration
dndungu Aug 25, 2025
4e6bf12
feat(bfloat16): add arithmetic operations with mode support
dndungu Mar 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# PR Title

## Summary
- What: Briefly describe the change
- Why: Problem it solves / value
- How: Key changes (bullets)

## Testing
- [ ] Unit tests added/updated
- [ ] Manual/Integration verified
- [ ] CI passes locally (tests + lint)

## Risk/Impact
- Breaking changes: yes/no (explain)
- Migration notes (if any)

## Checklist
- [ ] Small, logical commits
- [ ] Docs/README updated (if user-facing)
- [ ] Backwards compatible (or documented)
- [ ] Security/Privacy reviewed (if applicable)
- [ ] Linked issue(s) / references

## Notes
- Screenshots/logs (if relevant)
33 changes: 33 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
version: "2"

run:
timeout: 5m

linters:
enable:
- govet
- ineffassign
- misspell
- goconst
- gocyclo
- staticcheck

formatters:
enable:
- gofmt
- goimports

linters-settings:
gocyclo:
min-complexity: 15
goconst:
min-len: 3
min-occurrences: 3
govet:
enable-all: true
misspell:
locale: US

issues:
max-issues-per-linter: 0
max-same-issues: 0
74 changes: 74 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
SHELL := /bin/sh

PKG := ./...
COVER_DIR := coverage
COVER_PROFILE := $(COVER_DIR)/coverage.out
COVER_HTML := $(COVER_DIR)/coverage.html

.PHONY: all test race vet fmt fmt-check lint lint-fix cover cover-html ci

all: test

# Run unit tests
test:
GOWORK=off go test $(PKG)

# Run tests with race detector
race:
GOWORK=off go test -race $(PKG)

# Static analysis
vet:
GOWORK=off go vet $(PKG)

format:
@echo "🎨 Applying code formatters..."
@echo " - Standard Go formatting..."
@gofmt -w .
@echo " - Organizing imports..."
@goimports -w .
@echo " - Strict formatting with gofumpt..."
@gofumpt -w . 2>/dev/null || echo " (gofumpt not available, skipping)"
@echo "✅ Code formatting complete"

# Check formatting without modifying files; fails if formatting needed
fmt-check:
@diff=$$(gofmt -s -l .); \
if [ -n "$$diff" ]; then \
echo "Files need formatting:"; echo "$$diff"; exit 1; \
else \
echo "Formatting OK"; \
fi

# Lint: go vet + formatting check + optional golangci-lint if installed
lint:
@echo "Running go vet"; GOWORK=off go vet $(PKG)
@echo "Checking formatting"; \
diff=$$(gofmt -s -l .); if [ -n "$$diff" ]; then echo "Files need formatting:"; echo "$$diff"; exit 1; else echo "Formatting OK"; fi
@if command -v golangci-lint >/dev/null 2>&1; then \
echo "Running golangci-lint"; GOWORK=off golangci-lint run || true; \
else \
echo "golangci-lint not installed; skipping"; \
fi

# Attempt to fix lint: gofmt + optional golangci-lint --fix if installed
lint-fix: fmt
@if command -v golangci-lint >/dev/null 2>&1; then \
echo "Running golangci-lint --fix"; golangci-lint run --fix || true; \
else \
echo "golangci-lint not installed; skipping"; \
fi

# Generate coverage profile and print total coverage
cover:
mkdir -p $(COVER_DIR)
GOWORK=off go test -covermode=atomic -coverprofile=$(COVER_PROFILE) $(PKG)
go tool cover -func=$(COVER_PROFILE) | tail -n 1

# Generate HTML coverage report
cover-html: cover
go tool cover -html=$(COVER_PROFILE) -o $(COVER_HTML)
@echo "Wrote $(COVER_HTML)"

# CI-style aggregate target
ci: fmt-check vet test cover
153 changes: 24 additions & 129 deletions arithmetic.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package float16

import (
"math/bits"
"math"
)

// Global arithmetic settings
var (
DefaultArithmeticMode = ModeIEEEArithmetic
DefaultRounding = RoundNearestEven
DefaultRounding = DefaultRoundingMode
)

// ArithmeticMode defines the precision/performance trade-off for arithmetic operations
Expand Down Expand Up @@ -88,7 +88,7 @@ func AddWithMode(a, b Float16, mode ArithmeticMode, rounding RoundingMode) (Floa
f32a := a.ToFloat32()
f32b := b.ToFloat32()
result := f32a + f32b
return ToFloat16WithMode(result, ModeIEEE, rounding)
return FromFloat32(result), nil
}

// Full IEEE 754 implementation for exact mode
Expand Down Expand Up @@ -182,7 +182,7 @@ func MulWithMode(a, b Float16, mode ArithmeticMode, rounding RoundingMode) (Floa
f32a := a.ToFloat32()
f32b := b.ToFloat32()
result := f32a * f32b
return ToFloat16WithMode(result, ModeIEEE, rounding)
return FromFloat32(result), nil
}

// Full IEEE 754 implementation
Expand Down Expand Up @@ -320,7 +320,7 @@ func DivWithMode(a, b Float16, mode ArithmeticMode, rounding RoundingMode) (Floa
f32a := a.ToFloat32()
f32b := b.ToFloat32()
result := f32a / f32b
return ToFloat16WithMode(result, ModeIEEE, rounding)
return FromFloat32(result), nil
}

// Full IEEE 754 implementation
Expand All @@ -331,121 +331,12 @@ func DivWithMode(a, b Float16, mode ArithmeticMode, rounding RoundingMode) (Floa

// addIEEE754 implements full IEEE 754 addition
func addIEEE754(a, b Float16, rounding RoundingMode) (Float16, error) {
// Extract components
signA, expA, mantA := a.extractComponents()
signB, expB, mantB := b.extractComponents()

// Ensure a has the larger magnitude for simpler logic
if expA < expB || (expA == expB && mantA < mantB) {
signA, expA, mantA, signB, expB, mantB = signB, expB, mantB, signA, expA, mantA
}

// Handle subnormal numbers by normalizing
if expA == 0 && mantA != 0 {
// Normalize a
shift := leadingZeros10(mantA)
mantA <<= (shift + 1)
mantA &= MantissaMask
expA = uint16(1 - shift)
} else if expA != 0 {
// Add implicit leading 1 for normal numbers
mantA |= (1 << MantissaLen)
expA = expA
}

if expB == 0 && mantB != 0 {
// Normalize b
shift := leadingZeros10(mantB)
mantB <<= (shift + 1)
mantB &= MantissaMask
expB = uint16(1 - shift)
} else if expB != 0 {
// Add implicit leading 1 for normal numbers
mantB |= (1 << MantissaLen)
}

// Align mantissas by shifting the smaller one
expDiff := int(expA) - int(expB)
if expDiff > 0 {
if expDiff >= 24 {
// b is too small to affect the result
return a, nil
}
mantB >>= expDiff
}

var resultSign uint16
var resultMant uint32
var resultExp int

if signA == signB {
// Same sign: add magnitudes
resultSign = signA
resultMant = uint32(mantA) + uint32(mantB)
resultExp = int(expA)
} else {
// Different signs: subtract magnitudes
if mantA >= mantB {
resultSign = signA
resultMant = uint32(mantA) - uint32(mantB)
} else {
resultSign = signB
resultMant = uint32(mantB) - uint32(mantA)
}
resultExp = int(expA)
}

// Handle zero result
if resultMant == 0 {
return PositiveZero, nil
}

// Normalize result
if resultMant >= (1 << (MantissaLen + 1)) {
// Overflow: shift right and increment exponent
resultMant >>= 1
resultExp++
} else {
// Find leading 1 and normalize
leadingZeros := 31 - bits.Len32(resultMant)
if leadingZeros > 0 {
shift := leadingZeros - (31 - MantissaLen - 1)
if shift > 0 {
resultMant <<= shift
resultExp -= shift
}
}
}

// Check for overflow
if resultExp >= ExponentInfinity {
if resultSign != 0 {
return NegativeInfinity, nil
}
return PositiveInfinity, nil
}

// Check for underflow
if resultExp <= 0 {
// Convert to subnormal or zero
shift := 1 - resultExp
if shift >= 24 {
// Underflow to zero
if resultSign != 0 {
return NegativeZero, nil
}
return PositiveZero, nil
}
resultMant >>= shift
resultExp = 0
}

// Remove implicit leading 1 for normal numbers
if resultExp > 0 {
resultMant &= MantissaMask
}

return packComponents(resultSign, uint16(resultExp), uint16(resultMant)), nil
// For addition, we can use the simpler approach of converting to float32
// since the intermediate precision is sufficient for exact float16 results
f32a := a.ToFloat32()
f32b := b.ToFloat32()
result := f32a + f32b
return FromFloat32WithRounding(result, rounding), nil
}

// mulIEEE754 implements full IEEE 754 multiplication
Expand All @@ -455,7 +346,7 @@ func mulIEEE754(a, b Float16, rounding RoundingMode) (Float16, error) {
f32a := a.ToFloat32()
f32b := b.ToFloat32()
result := f32a * f32b
return ToFloat16WithMode(result, ModeIEEE, rounding)
return FromFloat32WithRounding(result, rounding), nil
}

// divIEEE754 implements full IEEE 754 division
Expand All @@ -465,9 +356,7 @@ func divIEEE754(a, b Float16, rounding RoundingMode) (Float16, error) {
f32a := a.ToFloat32()
f32b := b.ToFloat32()
result := f32a / f32b

// Use the provided rounding mode for the conversion back to Float16
return ToFloat16WithMode(result, ModeExact, rounding)
return FromFloat32WithRounding(result, rounding), nil
}

// Comparison operations
Expand Down Expand Up @@ -542,7 +431,13 @@ func Min(a, b Float16) Float16 {
if b.IsNaN() {
return a
}

// Handle -0 and +0
if a.IsZero() && b.IsZero() {
if a.Signbit() {
return a // a is -0
}
return b // b is -0, or both are +0
}
if Less(a, b) {
return a
}
Expand Down Expand Up @@ -630,7 +525,7 @@ func ScaleSlice(s []Float16, scalar Float16) []Float16 {

// SumSlice returns the sum of all elements in the slice
func SumSlice(s []Float16) Float16 {
var sum Float16 = PositiveZero
sum := PositiveZero
for _, v := range s {
sum = Add(sum, v)
}
Expand All @@ -643,7 +538,7 @@ func DotProduct(a, b []Float16) Float16 {
panic("float16: slice length mismatch")
}

var sum Float16 = PositiveZero
sum := PositiveZero
for i := range a {
product := Mul(a[i], b[i])
sum = Add(sum, product)
Expand All @@ -653,10 +548,10 @@ func DotProduct(a, b []Float16) Float16 {

// Norm2 computes the L2 norm (Euclidean norm) of a Float16 slice
func Norm2(s []Float16) Float16 {
var sumSquares Float16 = PositiveZero
sumSquares := PositiveZero
for _, v := range s {
square := Mul(v, v)
sumSquares = Add(sumSquares, square)
}
return Sqrt(sumSquares)
return FromFloat64(math.Sqrt(sumSquares.ToFloat64()))
}
Loading