Floating Point Precision

Floating point precision in Rust

Part 1 - Part 2

Rust has a f32 and f64 data types for 32 and 64 bit floating point numbers. If we want to go beyond that, we need to use something like the rug crate. Rug allows us to simulate larger floating point data types. This comes with a performance hit, but in many situations it is incredibly useful.

Below is the code from part one, modified to include 128 and 256 bit floatingp point numbers.

The Rust code

[package]
name = "floatlimit"
version = "0.1.0"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
rug = "1.8"
			 
use rug::{Assign, Float};

fn main() {
    // rust does not support 128-bit or larger floating point numbers natively
    // so we use the rug crate to simulate 128-bit and 256-bit floating point numbers
    // Define the precision for rug floating point vars
    let precision_128 = 128;
    let precision_256 = 256;

    let initial_value = 2.0 / 3.0;
    // Initialize floating point numbers
    
    let mut s23: f32 = initial_value; // Single precision (32-bit)
    let mut d23: f64 = initial_value as f64; // Double precision (64-bit)
    let mut q23 = Float::with_val(precision_128, initial_value);
    let mut e23 = Float::with_val(precision_256, initial_value);

    // Perform the first series of operations
    println!("Performing operations with initial values of 2/3");
    println!("{:<15} {:<20} {:<42} {:<20}", "s23 (f32)", "d23 (f64)", "q23 (rug float 128)", "e23 (rug float 256)");
    println!("{:<15} {:<20} {:<42} {:<20}", "---------", "---------", "---------", "---------");
    for _ in 1..=18 {
        s23 = s23 / 10.0 + 1.0;
        d23 = d23 / 10.0 + 1.0;
        q23 = q23 / 10.0 + 1.0;
        e23 = e23 / 10.0 + 1.0;
        println!("{:<15} {:<20} {:<42} {:<20}", s23, d23, q23, e23);
    }

    for _ in 1..=17 {
        s23 = (s23 - 1.0) * 10.0;
        d23 = (d23 - 1.0) * 10.0;
        q23 = (q23 - 1.0) * 10.0;
        e23 = (e23 - 1.0) * 10.0;
        println!("{:<15} {:<20} {:<42} {:<20}", s23, d23, q23, e23);
    }
}
                

The code output

 Performing operations with initial values of 2/3
q23 (rug float 128)                        e23 (rug float 256) 
---------                                  ---------           
1.066666668653488159179687500000000000001  1.06666666865348815917968750000000000000000000000000000000000
1.106666666865348815917968750000000000002  1.10666666686534881591796874999999999999999999999999999999999
1.110666666686534881591796875000000000003  1.11066666668653488159179687500000000000000000000000000000000
1.111066666668653488159179687500000000000  1.11106666666865348815917968749999999999999999999999999999999
1.111106666666865348815917968750000000001  1.11110666666686534881591796875000000000000000000000000000000
1.111110666666686534881591796874999999998  1.11111066666668653488159179687499999999999999999999999999999
1.111111066666668653488159179687499999998  1.11111106666666865348815917968750000000000000000000000000000
1.111111106666666865348815917968750000000  1.11111110666666686534881591796874999999999999999999999999999
1.111111110666666686534881591796875000002  1.11111111066666668653488159179687500000000000000000000000000
1.111111111066666668653488159179687500001  1.11111111106666666865348815917968750000000000000000000000000
1.111111111106666666865348815917968749998  1.11111111110666666686534881591796874999999999999999999999999
1.111111111110666666686534881591796874999  1.11111111111066666668653488159179687499999999999999999999999
1.111111111111066666668653488159179687499  1.11111111111106666666865348815917968750000000000000000000000
1.111111111111106666666865348815917968751  1.11111111111110666666686534881591796874999999999999999999999
1.111111111111110666666686534881591796872  1.11111111111111066666668653488159179687500000000000000000000
1.111111111111111066666668653488159179688  1.11111111111111106666666865348815917968749999999999999999999
1.111111111111111106666666865348815917967  1.11111111111111110666666686534881591796874999999999999999999
1.111111111111111110666666686534881591794  1.11111111111111111066666668653488159179687500000000000000000
1.111111111111111106666666865348815917938  1.11111111111111110666666686534881591796875000000000000000000
1.111111111111111066666668653488159179377  1.11111111111111106666666865348815917968750000000000000000000
1.111111111111110666666686534881591793769  1.11111111111111066666668653488159179687500000000000000000000
1.111111111111106666666865348815917937688  1.11111111111110666666686534881591796875000000000000000000000
1.111111111111066666668653488159179376881  1.11111111111106666666865348815917968750000000000000000000000
1.111111111110666666686534881591793768808  1.11111111111066666668653488159179687500000000000000000000000
1.111111111106666666865348815917937688077  1.11111111110666666686534881591796875000000000000000000000000
1.111111111066666668653488159179376880773  1.11111111106666666865348815917968750000000000000000000000000
1.111111110666666686534881591793768807732  1.11111111066666668653488159179687500000000000000000000000000
1.111111106666666865348815917937688077316  1.11111110666666686534881591796875000000000000000000000000000
1.111111066666668653488159179376880773165  1.11111106666666865348815917968750000000000000000000000000000
1.111110666666686534881591793768807731647  1.11111066666668653488159179687500000000000000000000000000000
1.111106666666865348815917937688077316467  1.11110666666686534881591796875000000000000000000000000000000
1.111066666668653488159179376880773164670  1.11106666666865348815917968750000000000000000000000000000000
1.110666666686534881591793768807731646702  1.11066666668653488159179687500000000000000000000000000000000
1.106666666865348815917937688077316467019  1.10666666686534881591796875000000000000000000000000000000000
1.066666668653488159179376880773164670190  1.06666666865348815917968750000000000000000000000000000000000
                

Mission Accomplished

As you can see, the 128 and 256 bit outputs show we now have a lot more accuracy, ending up much closer to the start number than we got with the 32 and 64 bit floats.

Part 1 - Part 2