Wednesday, September 24, 2008

Tutorial 9: Generic programming using type constraints

This Tutorial 9 shows a generic implementation of the integrator, based on type parameters.

#light

open System


let run_time_func (func: unit -> 'a) =
let watch = new System.Diagnostics.Stopwatch()
watch.Start()
let res = func ()
watch.Stop()
let diff0 = float watch.ElapsedMilliseconds / 1000.0
(diff0, res)


let map_parallel func items =
let tasks =
seq {
for i in items -> async {
return (func i)
}
}
Async.Run (Async.Parallel tasks)

type ps<'num> = struct
val pos:'num
val speed:'num
new(pos, speed) = { pos = pos; speed = speed }
end

type VecN =
One of float
| Two of float*float
| Three of float*float*float
with
static member inline plus (a: VecN) (b: VecN) =
match (a, b) with
| (One(v), One(w)) -> One(v+w)
| (Two(v0, v1), Two(w0, w1)) -> Two(v0 + w0, v1 + w1)
| (Three(v0, v1, v2), Three(w0, w1, w2)) -> Three(v0 + w0, v1 + w1, v2 + w2)
| _ -> failwith "Size mismatch"

static member inline scale (k: float) (a: VecN) =
match a with
| One(v) -> One(k * v)
| Two(v0, v1) -> Two(k * v0, k * v1)
| Three(v0, v1, v2) -> Three(k * v0, k * v1, k * v2)


type Vec1d = struct
val x: float

new (x) = { x = x }

static member inline plus (a: Vec1d) (b: Vec1d) = Vec1d(a.x + b.x)
static member inline scale (k: float) (a: Vec1d) = Vec1d(k * a.x)
end


type Vec2d = struct
val x: float
val y: float

new (x, y) = { x = x; y = y }

static member inline plus (a: Vec2d) (b: Vec2d) = Vec2d(a.x + b.x, a.y + b.y)
static member inline scale (k: float) (a: Vec2d) = Vec2d(k * a.x, k * a.y)
end


type Vec3d = struct
val x: float
val y: float
val z: float

new (x, y, z) = { x = x; y = y; z = z }

static member inline plus (a: Vec3d) (b: Vec3d) = Vec3d(a.x + b.x, a.y + b.y, a.z + b.z)
static member inline scale (k: float) (a: Vec3d) = Vec3d(k * a.x, k * a.y, k * a.z)
end


type CVec1d (x: float) = class
member v.X = x

static member inline plus (a: CVec1d) (b: CVec1d) = CVec1d(a.X + b.X)
static member inline scale (k: float) (a: CVec1d) = CVec1d(k * a.X)
end


type CVec2d (x: float, y:float) = class
member v.X = x
member v.Y = y

static member inline plus (a: CVec2d) (b: CVec2d) = CVec2d(a.X + b.X, a.Y + b.Y)
static member inline scale (k: float) (a: CVec2d) = CVec2d(k * a.X, k * a.Y)
end


type CVec3d (x: float, y: float, z: float) = class
member v.X = x
member v.Y = y
member v.Z = z

static member inline plus (a: CVec3d) (b: CVec3d) = CVec3d(a.X + b.X, a.Y + b.Y, a.Z + b.Z)
static member inline scale (k: float) (a: CVec3d) = CVec3d(k * a.X, k * a.Y, k * a.Z)
end


let gravity =
-9.81


let inline spring (pos: ^vec) =
let (*) k x = (^vec: (static member inline scale: float -> ^vec -> ^vec) k) x
let k = 100.0
-k * pos


let inline drag k (pos: ^speed) (speed: ^speed) =
let (*) k x = (^vec: (static member inline scale: float -> ^vec -> ^vec) k) x
-k * speed


let inline euler_implicit accel_func (pos: ^vec) (speed: ^vec) delta =
let (+) x y = (^vec: (static member inline plus: ^vec -> ^vec -> ^vec) x) y
let (*) k x = (^vec: (static member inline scale: float -> ^vec -> ^vec) k) x

let speed2 = speed + (delta * (accel_func pos speed))
let pos2 = pos + (delta * speed2)
ps<_>(pos2, speed2)


let inline simulate intg_func t0 t_fin delta pos0 speed0 =
let oc = OptimizedClosures.FastFunc3<_,_,_,_>.Adapt(intg_func)
let rec repeat t0 pos0 speed0 =
if t0 > t_fin then (pos0, speed0)
else
let t1 = t0 + delta
let ps: ps<_> = oc.Invoke(pos0, speed0, delta)
repeat t1 ps.pos ps.speed
repeat t0 pos0 speed0


let inline accel (up: ^vec) (pos: ^vec) (speed: ^vec) =
let (+) x y = (^vec: (static member inline plus: ^vec -> ^vec -> ^vec) x) y
let (*) k x = (^vec: (static member inline scale: float -> ^vec -> ^vec) k) x
(drag 1.0 pos speed) + (spring pos) + (gravity * up)


let run euler_func s0 =
let t0 = 0.0
let t_fin = 1000.0
let delta = 0.005

let run_timed map_func =
let n_runs = 3
let times =
[
for i in 1..n_runs ->
let (run_time, res) = run_time_func (fun () -> s0 |> map_func(fun (x, y) -> simulate euler_func t0 t_fin delta x y))
run_time
]

let max = Seq.max times
let min = Seq.min times
let avg = (Seq.fold (fun x y -> x+y) 0.0 times) / float n_runs
(min, max, avg)

let single_min, single_max, single_avg = run_timed List.map
let multi_min, multi_max, multi_avg = run_timed map_parallel

printfn "Single (min, max, avg): %f %f %f" single_min single_max single_avg
printfn "Multi (min, max, avg): %f %f %f" multi_min multi_max multi_avg


let n_bodies = 10

let s0_Vec1d = [ for s in 1..n_bodies -> (Vec1d(float s), Vec1d(0.0)) ]
let ef_Vec1d = euler_implicit (accel (Vec1d(1.0)))

let s0_Vec2d = [ for s in 1..n_bodies -> (Vec2d(float s, 1.0), Vec2d(0.0, 0.0)) ]
let ef_Vec2d = euler_implicit (accel (Vec2d(0.0, 1.0)))

let s0_Vec3d = [ for s in 1..n_bodies -> (Vec3d(float s, 1.0, -1.0), Vec3d(0.0, 0.0, 0.0)) ]
let ef_Vec3d = euler_implicit (accel (Vec3d(0.0, 1.0, 0.0)))

let s0_CVec1d = [ for s in 1..n_bodies -> (CVec1d(float s), CVec1d(0.0)) ]
let ef_CVec1d = euler_implicit (accel (CVec1d(1.0)))

let s0_CVec2d = [ for s in 1..n_bodies -> (CVec2d(float s, 1.0), CVec2d(0.0, 0.0)) ]
let ef_CVec2d = euler_implicit (accel (CVec2d(0.0, 1.0)))

let s0_CVec3d = [ for s in 1..n_bodies -> (CVec3d(float s, 1.0, -1.0), CVec3d(0.0, 0.0, 0.0)) ]
let ef_CVec3d = euler_implicit (accel (CVec3d(0.0, 1.0, 0.0)))

let s0_One = [ for s in 1..n_bodies -> (One(float s), One(0.0)) ]
let ef_One = euler_implicit (accel (One(1.0)))

let s0_Two = [ for s in 1..n_bodies -> (Two(float s, 1.0), Two(0.0, 0.0)) ]
let ef_Two = euler_implicit (accel (Two(0.0, 1.0)))

let s0_Three = [ for s in 1..n_bodies -> (Three(float s, 1.0, -1.0), Three(0.0, 0.0, 0.0)) ]
let ef_Three = euler_implicit (accel (Three(0.0, 1.0, 0.0)))

printfn "1D (struct)"
run ef_Vec1d s0_Vec1d

printfn "2D (struct)"
run ef_Vec2d s0_Vec2d

printfn "3D (struct)"
run ef_Vec3d s0_Vec3d

printfn "1D (class)"
run ef_CVec1d s0_CVec1d

printfn "2D (class)"
run ef_CVec2d s0_CVec2d

printfn "3D (class)"
run ef_CVec3d s0_CVec3d

printfn "1D (union)"
run ef_One s0_One

printfn "2D (union)"
run ef_Two s0_Two

printfn "3D (union)"
run ef_Three s0_Three

ignore (Console.ReadKey(true))


Here come the detailed explanations:
type VecN =
One of float
[...]
type CVec3d (x: float, y: float, z: float) = class
member v.X = x
member v.Y = y
member v.Z = z

static member inline plus (a: CVec3d) (b: CVec3d) = CVec3d(a.X + b.X, a.Y + b.Y, a.Z + b.Z)
static member inline scale (k: float) (a: CVec3d) = CVec3d(k * a.X, k * a.Y, k * a.Z)
end

These definitions show different ways to implement vector types. The first method, which uses a union, is a bit uncommon. It is inelegant because it does not take advantage of the type system to capture the fact that adding vectors of different dimensions is not allowed. It is also the slowest implementation of vectors of all.
Follow definitions of one, two and three dimensional vectors using structs, which we already saw in the previous tutorial. The last group is almost identical, except for the fact that classes are used. I won't go into the differences between structs and classes in this post, but it's interesting to see that structs are faster in this program. This is however not generally true, even when considering only the special case of 3d vectors.

The interesting part starts below.
let inline spring (pos: ^vec) =

This defines a generic function "spring" taking a parameter of type "^vec". Note that "vec" here is not an actual type, but a type variable. We have already used the notation " 'vec", which plays a similar role. The difference resides in who creates the concrete function. In the case of " 'vec", this is handled by the generics mechanics in the .NET runtime. In the present case, using "^vec", it is handled by the F# compiler.
(QUESTION: is that correct? I tried replacing "^vec" with "'vec" in this line, and that worked too.)
This requires the function to be declared as inline. The compiler will generate a new copy of the code for each concrete type with which it is used.
For further details about type constraints, see Statically typed duck typing in F#.

let (*) k x = (^vec: (static member scale: float -> ^vec -> ^vec) k) x

I will start with the part of the line after the "=" symbol. It constitutes a type constraint, and means that "^vec" must have a static member named "scale", accepting a float and returning a function from "^vec" to "^vec" (in other words: a function taking a float and a "^vec" and returning a "^vec").
The rest of the line defines a function "*", thus overloading the multiplication operator, which allows us to write expressions in a natural way. Note that this method could also have been used in Tutorial 8, its availability is not subject to the use of type constraints.

let k = 100.0
-k * pos

Finally something we recognize...

You may have noticed that unlike Tutorial 8, this tutorial includes the optimizations from Tutorial 7 and 7b (aggressive inlining and OptimizedClosures). They do seem to make a difference here.

Update:
This Tutorial used to include a section comparing the performance of the F# and the C++ versions of Euler using doubles and structs (for 1-, 2- and 3-dimensional vectors). I noticed the timing results depended very much on whether the programs were started from Visual Studio or directly from Windows. C++ programs using .NET are faster when started from Visual studio, whereas F# programs run faster when started outside of Visual Studio. The differences were large enough to render comparisons meaningless, and I have therefore decided to remove the benchmark results until I have cleared that up.

I have kept the most original method to write generic code for the next Tutorial. Stay tuned!

No comments: