go/src/math/big/float.go

1299 lines
31 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file implements multi-precision floating-point numbers.
// Like in the GNU MPFR library (http://www.mpfr.org/), operands
// can be of mixed precision. Unlike MPFR, the rounding mode is
// not specified with each operation, but with each operand. The
// rounding mode of the result operand determines the rounding
// mode of an operation. This is a from-scratch implementation.
// CAUTION: WORK IN PROGRESS - USE AT YOUR OWN RISK.
package big
import (
"fmt"
"math"
)
const debugFloat = true // enable for debugging
// A Float represents a multi-precision floating point number of the form
//
// sign × mantissa × 2**exponent
//
// with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp (with the
// exception of 0 and Inf which have a 0 mantissa and special exponents).
//
// Each Float value also has a precision, rounding mode, and accuracy.
//
// The precision is the maximum number of mantissa bits available to
// represent the value. The rounding mode specifies how a result should
// be rounded to fit into the mantissa bits, and accuracy describes the
// rounding error with respect to the exact result.
//
// All operations, including setters, that specify a *Float for the result,
// usually via the receiver, round their result to the result's precision
// and according to its rounding mode, unless specified otherwise. If the
// result precision is 0 (see below), it is set to the precision of the
// argument with the largest precision value before any rounding takes
// place. The rounding mode remains unchanged, thus uninitialized Floats
// provided as result arguments will "inherit" a reasonble precision from
// the incoming arguments and their mode is the zero value for RoundingMode
// (ToNearestEven).
//
// By setting the desired precision to 24 or 53 and using ToNearestEven
// rounding, Float operations produce the same results as the corresponding
// float32 or float64 IEEE-754 arithmetic for normalized operands (no NaNs
// or denormalized numbers). Additionally, positive and negative zeros and
// infinities are fully supported.
//
// The zero (uninitialized) value for a Float is ready to use and represents
// the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven.
//
type Float struct {
mode RoundingMode
acc Accuracy
neg bool
mant nat
exp int32
prec uint // TODO(gri) make this a 32bit field
}
// Internal representation: The mantissa bits x.mant of a Float x are stored
// in a nat slice long enough to hold up to x.prec bits; the slice may (but
// doesn't have to) be shorter if the mantissa contains trailing 0 bits.
// Unless x is a zero or an infinity, x.mant is normalized such that the
// msb of x.mant == 1 (i.e., the msb is shifted all the way "to the left").
// Thus, if the mantissa has trailing 0 bits or x.prec is not a multiple
// of the the Word size _W, x.mant[0] has trailing zero bits. Zero and Inf
// values have an empty mantissa and a 0 or infExp exponent, respectively.
const (
MaxExp = math.MaxInt32 // largest supported exponent magnitude
infExp = -MaxExp - 1 // exponent for Inf values
)
// NewInf returns a new infinite Float value with value +Inf (sign >= 0),
// or -Inf (sign < 0).
func NewInf(sign int) *Float {
return &Float{neg: sign < 0, exp: infExp}
}
// Accuracy describes the rounding error produced by the most recent
// operation that generated a Float value, relative to the exact value:
//
// -1: below exact value
// 0: exact value
// +1: above exact value
//
type Accuracy int8
// Constants describing the Accuracy of a Float.
const (
Below Accuracy = -1
Exact Accuracy = 0
Above Accuracy = +1
)
func (a Accuracy) String() string {
switch {
case a < 0:
return "below"
default:
return "exact"
case a > 0:
return "above"
}
}
// RoundingMode determines how a Float value is rounded to the
// desired precision. Rounding may change the Float value; the
// rounding error is described by the Float's Accuracy.
type RoundingMode uint8
// The following rounding modes are supported.
const (
ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
ToNearestAway // == IEEE 754-2008 roundTiesToAway
ToZero // == IEEE 754-2008 roundTowardZero
AwayFromZero // no IEEE 754-2008 equivalent
ToNegativeInf // == IEEE 754-2008 roundTowardNegative
ToPositiveInf // == IEEE 754-2008 roundTowardPositive
)
func (mode RoundingMode) String() string {
switch mode {
case ToNearestEven:
return "ToNearestEven"
case ToNearestAway:
return "ToNearestAway"
case ToZero:
return "ToZero"
case AwayFromZero:
return "AwayFromZero"
case ToNegativeInf:
return "ToNegativeInf"
case ToPositiveInf:
return "ToPositiveInf"
}
panic("unreachable")
}
// SetPrec sets z's precision to prec and returns the (possibly) rounded
// value of z. Rounding occurs according to z's rounding mode if the mantissa
// cannot be represented in prec bits without loss of precision.
func (z *Float) SetPrec(prec uint) *Float {
old := z.prec
z.prec = prec
if prec < old {
z.round(0)
}
return z
}
// SetMode sets z's rounding mode to mode and returns z.
// z remains unchanged otherwise.
func (z *Float) SetMode(mode RoundingMode) *Float {
z.mode = mode
return z
}
// Prec returns the mantissa precision of x in bits.
// The result may be 0 for |x| == 0 or |x| == Inf.
func (x *Float) Prec() uint {
return uint(x.prec)
}
// Acc returns the accuracy of x produced by the most recent operation.
func (x *Float) Acc() Accuracy {
return x.acc
}
// Mode returns the rounding mode of x.
func (x *Float) Mode() RoundingMode {
return x.mode
}
// Sign returns:
//
// -1 if x < 0
// 0 if x == 0 or x == -0
// +1 if x > 0
//
func (x *Float) Sign() int {
s := 0
if len(x.mant) != 0 || x.exp == infExp {
s = 1 // non-zero x
}
if x.neg {
s = -s
}
return s
}
// MantExp breaks x into its mantissa and exponent components.
// It returns mant and exp satisfying x == mant × 2**exp, with
// the absolute value of mant satisfying 0.5 <= |mant| < 1.0.
// mant has the same precision and rounding mode as x.
//
// Special cases are:
//
// ( ±0).MantExp() = ±0, 0
// (±Inf).MantExp() = ±Inf, 0
//
// MantExp does not modify x; the result mant is a new Float.
func (x *Float) MantExp() (mant *Float, exp int) {
mant = new(Float).Copy(x)
if x.exp != infExp {
mant.exp = 0
exp = int(x.exp)
}
return
}
// SetMantExp is the inverse of MantExp. It sets z to mant × 2**exp and
// and returns z. The result z has the same precision and rounding mode
// as mant.
//
// Special cases are:
//
// z.SetMantExp( ±0, exp) = ±0
// z.SetMantExp(±Inf, exp) = ±Inf
//
// The result is ±Inf if the magnitude of exp is > MaxExp.
func (z *Float) SetMantExp(mant *Float, exp int) *Float {
z.Copy(mant)
if len(z.mant) == 0 || z.exp == infExp {
return z
}
z.setExp(int64(exp))
return z
}
// IsInt reports whether x is an integer.
// ±Inf are not considered integers.
func (x *Float) IsInt() bool {
if debugFloat {
validate(x)
}
// pick off easy cases
if x.exp <= 0 {
// |x| < 1 || |x| == Inf
return len(x.mant) == 0 && x.exp != infExp
}
// x.exp > 0
return x.prec <= uint(x.exp) || x.minPrec() <= uint(x.exp) // not enough bits for fractional mantissa
}
// IsInf reports whether x is an infinity, according to sign.
// If sign > 0, IsInf reports whether x is positive infinity.
// If sign < 0, IsInf reports whether x is negative infinity.
// If sign == 0, IsInf reports whether x is either infinity.
func (x *Float) IsInf(sign int) bool {
return x.exp == infExp && (sign == 0 || x.neg == (sign < 0))
}
// setExp sets the exponent for z.
// If the exponent's magnitude is too large, z becomes ±Inf.
func (z *Float) setExp(e int64) {
if -MaxExp <= e && e <= MaxExp {
if len(z.mant) == 0 {
e = 0
}
z.exp = int32(e)
return
}
// Inf
z.mant = z.mant[:0]
z.exp = infExp
}
// debugging support
func validate(args ...*Float) {
for i, x := range args {
const msb = 1 << (_W - 1)
m := len(x.mant)
if m == 0 {
// 0.0 or Inf
if x.exp != 0 && x.exp != infExp {
panic(fmt.Sprintf("#%d: %empty matissa with invalid exponent %d", i, x.exp))
}
continue
}
if x.mant[m-1]&msb == 0 {
panic(fmt.Sprintf("#%d: msb not set in last word %#x of %s", i, x.mant[m-1], x.Format('p', 0)))
}
if x.prec <= 0 {
panic(fmt.Sprintf("#%d: invalid precision %d", i, x.prec))
}
}
}
// round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
// sbit must be 0 or 1 and summarizes any "sticky bit" information one might
// have before calling round. z's mantissa must be normalized (with the msb set)
// or empty.
func (z *Float) round(sbit uint) {
if debugFloat {
validate(z)
}
z.acc = Exact
// handle zero and Inf
m := uint(len(z.mant)) // present mantissa length in words
if m == 0 {
if z.exp != infExp {
z.exp = 0
}
return
}
// m > 0 implies z.prec > 0 (checked by validate)
bits := m * _W // present mantissa bits
if bits <= z.prec {
// mantissa fits => nothing to do
return
}
// bits > z.prec
n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
// Rounding is based on two bits: the rounding bit (rbit) and the
// sticky bit (sbit). The rbit is the bit immediately before the
// z.prec leading mantissa bits (the "0.5"). The sbit is set if any
// of the bits before the rbit are set (the "0.25", "0.125", etc.):
//
// rbit sbit => "fractional part"
//
// 0 0 == 0
// 0 1 > 0 , < 0.5
// 1 0 == 0.5
// 1 1 > 0.5, < 1.0
// bits > z.prec: mantissa too large => round
r := bits - z.prec - 1 // rounding bit position; r >= 0
rbit := z.mant.bit(r) // rounding bit
if sbit == 0 {
sbit = z.mant.sticky(r)
}
if debugFloat && sbit&^1 != 0 {
panic(fmt.Sprintf("invalid sbit %#x", sbit))
}
// convert ToXInf rounding modes
mode := z.mode
switch mode {
case ToNegativeInf:
mode = ToZero
if z.neg {
mode = AwayFromZero
}
case ToPositiveInf:
mode = AwayFromZero
if z.neg {
mode = ToZero
}
}
// cut off extra words
if m > n {
copy(z.mant, z.mant[m-n:]) // move n last words to front
z.mant = z.mant[:n]
}
// determine number of trailing zero bits t
t := n*_W - z.prec // 0 <= t < _W
lsb := Word(1) << t
// make rounding decision
// TODO(gri) This can be simplified (see roundBits in float_test.go).
switch mode {
case ToZero:
// nothing to do
case ToNearestEven, ToNearestAway:
if rbit == 0 {
// rounding bits == 0b0x
mode = ToZero
} else if sbit == 1 {
// rounding bits == 0b11
mode = AwayFromZero
}
case AwayFromZero:
if rbit|sbit == 0 {
mode = ToZero
}
default:
// ToXInf modes have been converted to ToZero or AwayFromZero
panic("unreachable")
}
// round and determine accuracy
switch mode {
case ToZero:
if rbit|sbit != 0 {
z.acc = Below
}
case ToNearestEven, ToNearestAway:
if debugFloat && rbit != 1 {
panic("internal error in rounding")
}
if mode == ToNearestEven && sbit == 0 && z.mant[0]&lsb == 0 {
z.acc = Below
break
}
// mode == ToNearestAway || sbit == 1 || z.mant[0]&lsb != 0
fallthrough
case AwayFromZero:
// add 1 to mantissa
if addVW(z.mant, z.mant, lsb) != 0 {
// overflow => shift mantissa right by 1 and add msb
shrVU(z.mant, z.mant, 1)
z.mant[n-1] |= 1 << (_W - 1)
// adjust exponent
z.exp++
}
z.acc = Above
}
// zero out trailing bits in least-significant word
z.mant[0] &^= lsb - 1
// update accuracy
if z.neg {
z.acc = -z.acc
}
if debugFloat {
validate(z)
}
return
}
// Round sets z to the value of x rounded according to mode to prec bits and returns z.
// TODO(gri) rethink this signature.
func (z *Float) Round(x *Float, prec uint, mode RoundingMode) *Float {
z.Copy(x)
z.prec = prec
z.mode = mode
z.round(0)
return z
}
// nlz returns the number of leading zero bits in x.
func nlz(x Word) uint {
return _W - uint(bitLen(x))
}
func nlz64(x uint64) uint {
// TODO(gri) this can be done more nicely
if _W == 32 {
if x>>32 == 0 {
return 32 + nlz(Word(x))
}
return nlz(Word(x >> 32))
}
if _W == 64 {
return nlz(Word(x))
}
panic("unreachable")
}
// SetUint64 sets z to the (possibly rounded) value of x and returns z.
// If z's precision is 0, it is changed to 64 (and rounding will have
// no effect).
func (z *Float) SetUint64(x uint64) *Float {
if z.prec == 0 {
z.prec = 64
}
z.acc = Exact
z.neg = false
if x == 0 {
z.mant = z.mant[:0]
z.exp = 0
return z
}
// x != 0
s := nlz64(x)
z.mant = z.mant.setUint64(x << s)
z.exp = int32(64 - s) // always fits
if z.prec < 64 {
z.round(0)
}
return z
}
// SetInt64 sets z to the (possibly rounded) value of x and returns z.
// If z's precision is 0, it is changed to 64 (and rounding will have
// no effect).
func (z *Float) SetInt64(x int64) *Float {
u := x
if u < 0 {
u = -u
}
z.SetUint64(uint64(u))
z.neg = x < 0
return z
}
// SetFloat64 sets z to the (possibly rounded) value of x and returns z.
// If z's precision is 0, it is changed to 53 (and rounding will have
// no effect).
// If x is denormalized or NaN, the result is unspecified.
// TODO(gri) should return nil in those cases
func (z *Float) SetFloat64(x float64) *Float {
if z.prec == 0 {
z.prec = 53
}
z.acc = Exact
z.neg = math.Signbit(x) // handle -0 correctly
if math.IsInf(x, 0) {
z.mant = z.mant[:0]
z.exp = infExp
return z
}
if x == 0 {
z.mant = z.mant[:0]
z.exp = 0
return z
}
// x != 0
fmant, exp := math.Frexp(x) // get normalized mantissa
z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
z.exp = int32(exp) // always fits
if z.prec < 53 {
z.round(0)
}
return z
}
// fnorm normalizes mantissa m by shifting it to the left
// such that the msb of the most-significant word (msw) is 1.
// It returns the shift amount. It assumes that len(m) != 0.
func fnorm(m nat) uint {
if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
panic("msw of mantissa is 0")
}
s := nlz(m[len(m)-1])
if s > 0 {
c := shlVU(m, m, s)
if debugFloat && c != 0 {
panic("nlz or shlVU incorrect")
}
}
return s
}
// SetInt sets z to the (possibly rounded) value of x and returns z.
// If z's precision is 0, it is changed to the larger of x.BitLen()
// or 64 (and rounding will have no effect).
func (z *Float) SetInt(x *Int) *Float {
// TODO(gri) can be more efficient if z.prec > 0
// but small compared to the size of x, or if there
// are many trailing 0's.
bits := uint(x.BitLen())
if z.prec == 0 {
z.prec = umax(bits, 64)
}
z.acc = Exact
z.neg = x.neg
if len(x.abs) == 0 {
z.mant = z.mant[:0]
z.exp = 0
return z
}
// x != 0
z.mant = z.mant.set(x.abs)
fnorm(z.mant)
z.setExp(int64(bits))
if z.prec < bits {
z.round(0)
}
return z
}
// SetRat sets z to the (possibly rounded) value of x and returns z.
// If z's precision is 0, it is changed to the largest of a.BitLen(),
// b.BitLen(), or 64; with x = a/b.
func (z *Float) SetRat(x *Rat) *Float {
// TODO(gri) can be more efficient if x is an integer
var a, b Float
a.SetInt(x.Num())
b.SetInt(x.Denom())
if z.prec == 0 {
z.prec = umax(a.prec, b.prec)
}
return z.Quo(&a, &b)
}
// Set sets z to the (possibly rounded) value of x and returns z.
// If z's precision is 0, it is changed to the precision of x
// before setting z (and rounding will have no effect).
// Rounding is performed according to z's precision and rounding
// mode; and z's accuracy reports the result error relative to the
// exact (not rounded) result.
func (z *Float) Set(x *Float) *Float {
if z != x {
if z.prec == 0 {
z.prec = x.prec
}
z.acc = Exact
z.neg = x.neg
z.exp = x.exp
z.mant = z.mant.set(x.mant)
if z.prec < x.prec {
z.round(0)
}
}
return z
}
// Copy sets z to x, with the same precision and rounding mode as x,
// and returns z.
func (z *Float) Copy(x *Float) *Float {
if z != x {
z.acc = Exact
z.neg = x.neg
z.exp = x.exp
z.mant = z.mant.set(x.mant)
z.prec = x.prec
z.mode = x.mode
}
return z
}
func high64(x nat) uint64 {
i := len(x)
if i == 0 {
return 0
}
// i > 0
v := uint64(x[i-1])
if _W == 32 {
v <<= 32
if i > 1 {
v |= uint64(x[i-2])
}
}
return v
}
// minPrec returns the minimum precision required to represent
// x without loss of accuracy.
// TODO(gri) this might be useful to export, perhaps under a better name
func (x *Float) minPrec() uint {
return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
}
// Uint64 returns the unsigned integer resulting from truncating x
// towards zero. If 0 <= x <= math.MaxUint64, the result is Exact
// if x is an integer and Below otherwise.
// The result is (0, Above) for x < 0, and (math.MaxUint64, Below)
// for x > math.MaxUint64.
func (x *Float) Uint64() (uint64, Accuracy) {
if debugFloat {
validate(x)
}
switch x.ord() {
case -2, -1:
// x < 0
return 0, Above
case 0:
// x == 0 || x == -0
return 0, Exact
case 1:
// 0 < x < +Inf
if x.exp <= 0 {
// 0 < x < 1
return 0, Below
}
// 1 <= x < +Inf
if x.exp <= 64 {
// u = trunc(x) fits into a uint64
u := high64(x.mant) >> (64 - uint32(x.exp))
if x.minPrec() <= 64 {
return u, Exact
}
return u, Below // x truncated
}
fallthrough // x too large
case 2:
// x == +Inf
return math.MaxUint64, Below
}
panic("unreachable")
}
// Int64 returns the integer resulting from truncating x towards zero.
// If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is
// an integer, and Above (x < 0) or Below (x > 0) otherwise.
// The result is (math.MinInt64, Above) for x < math.MinInt64, and
// (math.MaxInt64, Below) for x > math.MaxInt64.
func (x *Float) Int64() (int64, Accuracy) {
if debugFloat {
validate(x)
}
switch x.ord() {
case -2:
// x == -Inf
return math.MinInt64, Above
case 0:
// x == 0 || x == -0
return 0, Exact
case -1, 1:
// 0 < |x| < +Inf
acc := Below
if x.neg {
acc = Above
}
if x.exp <= 0 {
// 0 < |x| < 1
return 0, acc
}
// 1 <= |x| < +Inf
if x.exp <= 63 {
// i = trunc(x) fits into an int64 (excluding math.MinInt64)
i := int64(high64(x.mant) >> (64 - uint32(x.exp)))
if x.neg {
i = -i
}
if x.minPrec() <= 63 {
return i, Exact
}
return i, acc // x truncated
}
if x.neg {
// check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
if x.exp == 64 && x.minPrec() == 1 {
acc = Exact
}
return math.MinInt64, acc
}
fallthrough
case 2:
// x == +Inf
return math.MaxInt64, Below
}
panic("unreachable")
}
// Float64 returns the closest float64 value of x
// by rounding to nearest with 53 bits precision.
// TODO(gri) implement/document error scenarios.
func (x *Float) Float64() (float64, Accuracy) {
// x == ±Inf
if x.exp == infExp {
var sign int
if x.neg {
sign = -1
}
return math.Inf(sign), Exact
}
// x == 0
if len(x.mant) == 0 {
return 0, Exact
}
// x != 0
r := new(Float).Round(x, 53, ToNearestEven)
var s uint64
if r.neg {
s = 1 << 63
}
e := uint64(1022+r.exp) & 0x7ff // TODO(gri) check for overflow
m := high64(r.mant) >> 11 & (1<<52 - 1)
return math.Float64frombits(s | e<<52 | m), r.acc
}
// Int returns the result of truncating x towards zero; or nil
// if x is an infinity. The result is Exact if x.IsInt();
// otherwise it is Below for x > 0, and Above for x < 0.
func (x *Float) Int() (res *Int, acc Accuracy) {
if debugFloat {
validate(x)
}
// accuracy for inexact results
acc = Below // truncation
if x.neg {
acc = Above
}
// pick off easy cases
if x.exp <= 0 {
// |x| < 1 || |x| == Inf
if x.exp == infExp {
return nil, acc // ±Inf
}
if len(x.mant) == 0 {
acc = Exact // ±0
}
return new(Int), acc // ±0.xxx
}
// x.exp > 0
// x.mant[len(x.mant)-1] != 0
// determine minimum required precision for x
allBits := uint(len(x.mant)) * _W
exp := uint(x.exp)
if x.minPrec() <= exp {
acc = Exact
}
// shift mantissa as needed
res = &Int{neg: x.neg}
// TODO(gri) should have a shift that takes positive and negative shift counts
switch {
case exp > allBits:
res.abs = res.abs.shl(x.mant, exp-allBits)
default:
res.abs = res.abs.set(x.mant)
case exp < allBits:
res.abs = res.abs.shr(x.mant, allBits-exp)
}
return
}
// BUG(gri) Rat is not yet implemented
func (x *Float) Rat() *Rat {
panic("unimplemented")
}
// Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
// and returns z.
func (z *Float) Abs(x *Float) *Float {
z.Set(x)
z.neg = false
return z
}
// Neg sets z to the (possibly rounded) value of x with its sign negated,
// and returns z.
func (z *Float) Neg(x *Float) *Float {
z.Set(x)
z.neg = !z.neg
return z
}
// z = x + y, ignoring signs of x and y.
// x and y must not be 0 or an Inf.
func (z *Float) uadd(x, y *Float) {
// Note: This implementation requires 2 shifts most of the
// time. It is also inefficient if exponents or precisions
// differ by wide margins. The following article describes
// an efficient (but much more complicated) implementation
// compatible with the internal representation used here:
//
// Vincent Lefèvre: "The Generic Multiple-Precision Floating-
// Point Addition With Exact Rounding (as in the MPFR Library)"
// http://www.vinc17.net/research/papers/rnc6.pdf
if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
panic("uadd called with 0 argument")
}
// compute exponents ex, ey for mantissa with "binary point"
// on the right (mantissa.0) - use int64 to avoid overflow
ex := int64(x.exp) - int64(len(x.mant))*_W
ey := int64(y.exp) - int64(len(y.mant))*_W
// TODO(gri) having a combined add-and-shift primitive
// could make this code significantly faster
switch {
case ex < ey:
// cannot re-use z.mant w/o testing for aliasing
t := nat(nil).shl(y.mant, uint(ey-ex))
z.mant = z.mant.add(x.mant, t)
default:
// ex == ey, no shift needed
z.mant = z.mant.add(x.mant, y.mant)
case ex > ey:
// cannot re-use z.mant w/o testing for aliasing
t := nat(nil).shl(x.mant, uint(ex-ey))
z.mant = z.mant.add(t, y.mant)
ex = ey
}
// len(z.mant) > 0
z.setExp(ex + int64(len(z.mant))*_W - int64(fnorm(z.mant)))
z.round(0)
}
// z = x - y for x >= y, ignoring signs of x and y.
// x and y must not be 0 or an Inf.
func (z *Float) usub(x, y *Float) {
// This code is symmetric to uadd.
// We have not factored the common code out because
// eventually uadd (and usub) should be optimized
// by special-casing, and the code will diverge.
if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
panic("usub called with 0 argument")
}
ex := int64(x.exp) - int64(len(x.mant))*_W
ey := int64(y.exp) - int64(len(y.mant))*_W
switch {
case ex < ey:
// cannot re-use z.mant w/o testing for aliasing
t := nat(nil).shl(y.mant, uint(ey-ex))
z.mant = t.sub(x.mant, t)
default:
// ex == ey, no shift needed
z.mant = z.mant.sub(x.mant, y.mant)
case ex > ey:
// cannot re-use z.mant w/o testing for aliasing
t := nat(nil).shl(x.mant, uint(ex-ey))
z.mant = t.sub(t, y.mant)
ex = ey
}
// operands may have cancelled each other out
if len(z.mant) == 0 {
z.acc = Exact
z.setExp(0)
return
}
// len(z.mant) > 0
z.setExp(ex + int64(len(z.mant))*_W - int64(fnorm(z.mant)))
z.round(0)
}
// z = x * y, ignoring signs of x and y.
// x and y must not be 0 or an Inf.
func (z *Float) umul(x, y *Float) {
if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
panic("umul called with 0 argument")
}
// Note: This is doing too much work if the precision
// of z is less than the sum of the precisions of x
// and y which is often the case (e.g., if all floats
// have the same precision).
// TODO(gri) Optimize this for the common case.
e := int64(x.exp) + int64(y.exp)
z.mant = z.mant.mul(x.mant, y.mant)
// normalize mantissa
z.setExp(e - int64(fnorm(z.mant)))
z.round(0)
}
// z = x / y, ignoring signs of x and y.
// x and y must not be 0 or an Inf.
func (z *Float) uquo(x, y *Float) {
if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
panic("uquo called with 0 argument")
}
// mantissa length in words for desired result precision + 1
// (at least one extra bit so we get the rounding bit after
// the division)
n := int(z.prec/_W) + 1
// compute adjusted x.mant such that we get enough result precision
xadj := x.mant
if d := n - len(x.mant) + len(y.mant); d > 0 {
// d extra words needed => add d "0 digits" to x
xadj = make(nat, len(x.mant)+d)
copy(xadj[d:], x.mant)
}
// TODO(gri): If we have too many digits (d < 0), we should be able
// to shorten x for faster division. But we must be extra careful
// with rounding in that case.
// divide
var r nat
z.mant, r = z.mant.div(nil, xadj, y.mant)
// determine exponent
e := int64(x.exp) - int64(y.exp) - int64(len(xadj)-len(y.mant)-len(z.mant))*_W
// normalize mantissa
z.setExp(e - int64(fnorm(z.mant)))
// The result is long enough to include (at least) the rounding bit.
// If there's a non-zero remainder, the corresponding fractional part
// (if it were computed), would have a non-zero sticky bit (if it were
// zero, it couldn't have a non-zero remainder).
var sbit uint
if len(r) > 0 {
sbit = 1
}
z.round(sbit)
}
// ucmp returns -1, 0, or 1, depending on whether x < y, x == y, or x > y,
// while ignoring the signs of x and y. x and y must not be 0 or an Inf.
func (x *Float) ucmp(y *Float) int {
if debugFloat && (len(x.mant) == 0 || len(y.mant) == 0) {
panic("ucmp called with 0 argument")
}
switch {
case x.exp < y.exp:
return -1
case x.exp > y.exp:
return 1
}
// x.exp == y.exp
// compare mantissas
i := len(x.mant)
j := len(y.mant)
for i > 0 || j > 0 {
var xm, ym Word
if i > 0 {
i--
xm = x.mant[i]
}
if j > 0 {
j--
ym = y.mant[j]
}
switch {
case xm < ym:
return -1
case xm > ym:
return 1
}
}
return 0
}
// Handling of sign bit as defined by IEEE 754-2008,
// section 6.3 (note that there are no NaN Floats):
//
// When neither the inputs nor result are NaN, the sign of a product or
// quotient is the exclusive OR of the operands signs; the sign of a sum,
// or of a difference xy regarded as a sum x+(y), differs from at most
// one of the addends signs; and the sign of the result of conversions,
// the quantize operation, the roundToIntegral operations, and the
// roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
// These rules shall apply even when operands or results are zero or infinite.
//
// When the sum of two operands with opposite signs (or the difference of
// two operands with like signs) is exactly zero, the sign of that sum (or
// difference) shall be +0 in all rounding-direction attributes except
// roundTowardNegative; under that attribute, the sign of an exact zero
// sum (or difference) shall be 0. However, x+x = x(x) retains the same
// sign as x even when x is zero.
// Add sets z to the rounded sum x+y and returns z.
// If z's precision is 0, it is changed to the larger
// of x's or y's precision before the operation.
// Rounding is performed according to z's precision
// and rounding mode; and z's accuracy reports the
// result error relative to the exact (not rounded)
// result.
func (z *Float) Add(x, y *Float) *Float {
if debugFloat {
validate(x, y)
}
if z.prec == 0 {
z.prec = umax(x.prec, y.prec)
}
// TODO(gri) what about -0?
if len(y.mant) == 0 {
// TODO(gri) handle Inf
return z.Set(x)
}
if len(x.mant) == 0 {
// TODO(gri) handle Inf
return z.Set(y)
}
// x, y != 0
neg := x.neg
if x.neg == y.neg {
// x + y == x + y
// (-x) + (-y) == -(x + y)
z.uadd(x, y)
} else {
// x + (-y) == x - y == -(y - x)
// (-x) + y == y - x == -(x - y)
if x.ucmp(y) >= 0 {
z.usub(x, y)
} else {
neg = !neg
z.usub(y, x)
}
}
z.neg = neg
return z
}
// Sub sets z to the rounded difference x-y and returns z.
// Precision, rounding, and accuracy reporting are as for Add.
func (z *Float) Sub(x, y *Float) *Float {
if debugFloat {
validate(x, y)
}
if z.prec == 0 {
z.prec = umax(x.prec, y.prec)
}
// TODO(gri) what about -0?
if len(y.mant) == 0 {
// TODO(gri) handle Inf
return z.Set(x)
}
if len(x.mant) == 0 {
return z.Neg(y)
}
// x, y != 0
neg := x.neg
if x.neg != y.neg {
// x - (-y) == x + y
// (-x) - y == -(x + y)
z.uadd(x, y)
} else {
// x - y == x - y == -(y - x)
// (-x) - (-y) == y - x == -(x - y)
if x.ucmp(y) >= 0 {
z.usub(x, y)
} else {
neg = !neg
z.usub(y, x)
}
}
z.neg = neg
return z
}
// Mul sets z to the rounded product x*y and returns z.
// Precision, rounding, and accuracy reporting are as for Add.
func (z *Float) Mul(x, y *Float) *Float {
if debugFloat {
validate(x, y)
}
if z.prec == 0 {
z.prec = umax(x.prec, y.prec)
}
// TODO(gri) handle Inf
// TODO(gri) what about -0?
if len(x.mant) == 0 || len(y.mant) == 0 {
z.neg = false
z.mant = z.mant[:0]
z.exp = 0
z.acc = Exact
return z
}
// x, y != 0
z.umul(x, y)
z.neg = x.neg != y.neg
return z
}
// Quo sets z to the rounded quotient x/y and returns z.
// Precision, rounding, and accuracy reporting are as for Add.
func (z *Float) Quo(x, y *Float) *Float {
if debugFloat {
validate(x, y)
}
if z.prec == 0 {
z.prec = umax(x.prec, y.prec)
}
// TODO(gri) handle Inf
// TODO(gri) check that this is correct
z.neg = x.neg != y.neg
if len(y.mant) == 0 {
z.setExp(infExp)
return z
}
if len(x.mant) == 0 {
z.mant = z.mant[:0]
z.exp = 0
z.acc = Exact
return z
}
// x, y != 0
z.uquo(x, y)
return z
}
// Lsh sets z to the rounded x * (1<<s) and returns z.
// If z's precision is 0, it is changed to x's precision.
// Rounding is performed according to z's precision
// and rounding mode; and z's accuracy reports the
// result error relative to the exact (not rounded)
// result.
// BUG(gri) Lsh is not tested and may not work correctly.
func (z *Float) Lsh(x *Float, s uint) *Float {
if debugFloat {
validate(x)
}
if z.prec == 0 {
z.prec = x.prec
}
// TODO(gri) handle Inf
z.round(0)
z.setExp(int64(z.exp) + int64(s))
return z
}
// Rsh sets z to the rounded x / (1<<s) and returns z.
// Precision, rounding, and accuracy reporting are as for Lsh.
// BUG(gri) Rsh is not tested and may not work correctly.
func (z *Float) Rsh(x *Float, s uint) *Float {
if debugFloat {
validate(x)
}
if z.prec == 0 {
z.prec = x.prec
}
// TODO(gri) handle Inf
z.round(0)
z.setExp(int64(z.exp) - int64(s))
return z
}
// Cmp compares x and y and returns:
//
// -1 if x < y
// 0 if x == y (incl. -0 == 0)
// +1 if x > y
//
// Infinities with matching sign are equal.
func (x *Float) Cmp(y *Float) int {
if debugFloat {
validate(x, y)
}
mx := x.ord()
my := y.ord()
switch {
case mx < my:
return -1
case mx > my:
return +1
}
// mx == my
// only if |mx| == 1 we have to compare the mantissae
switch mx {
case -1:
return -x.ucmp(y)
case +1:
return +x.ucmp(y)
}
return 0
}
func umax(x, y uint) uint {
if x > y {
return x
}
return y
}
// ord classifies x and returns:
//
// -2 if -Inf == x
// -1 if -Inf < x < 0
// 0 if x == 0 (signed or unsigned)
// +1 if 0 < x < +Inf
// +2 if x == +Inf
//
// TODO(gri) export (and remove IsInf)?
func (x *Float) ord() int {
m := 1 // common case
if len(x.mant) == 0 {
m = 0
if x.exp == infExp {
m = 2
}
}
if x.neg {
m = -m
}
return m
}