Skip to content

Commit 95c986b

Browse files
committed
Minor slowdown? try non-mutable HitRecord
- heavily inspired by @paulmelis' hittable.jl - It seems 2X faster in small scenes without reflections/refractions... 951us vs 2.225ms! - ... but it's slower for larger scene: `@btime render($_scene_random_spheres, $t_cam1, 200, 32) `... ... repeatedly goes from 286ms to 300ms?! Committing in case someone can take a look and give me suggestions...
1 parent 0edba58 commit 95c986b

File tree

3 files changed

+119
-73
lines changed

3 files changed

+119
-73
lines changed

README.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,10 @@ Unlike the C++ implementation:
132132
- [Ray Tracing In One Weekend by Peter Shirley](https://raytracing.github.io/books/RayTracingInOneWeekend.html)
133133
- [ChrisRackauckhas's awesome MIT course of Parallel Computing and Scientific Machine Learning](https://github.com/mitmath/18337): I only watched the first few lessons on performance optimization so far, but I learnt a lot already and highly recommend it.
134134
- https://docs.julialang.org/en/v1/manual/integers-and-floating-point-numbers/#Floating-Point-Numbers
135-
- [cshenton's Julia implementation](https://github.com/cshenton/RayTracing.jl): This was useful as a starting point as I developed something roughly similar at first, then I tried it optimize it further.
135+
- Other Julia raytracers
136+
- [cshenton's Julia implementation](https://github.com/cshenton/RayTracing.jl): This was useful as a starting point as I developed something roughly similar at first, then I tried it optimize it further.
137+
- https://github.com/pxl-th/Trace.jl : more sophisticated raytracer based on this excellent book: Physically Based Rendering: From Theory to Implementation"
138+
- https://github.com/paulmelis/riow.jl : another example based on Peter Shirley's books
136139

137140
# Special thanks to:
138141

examples/track_allocations_proto_HitRecords.jl examples/proto.mem_immutable_hitrecord.jl

+66-44
Original file line numberDiff line numberDiff line change
@@ -370,11 +370,10 @@
370370
- abstract type Material{T <: AbstractFloat} end
371371
-
372372
- "Record a hit between a ray and an object's surface"
373-
- mutable struct HitRecord{T <: AbstractFloat}
373+
- struct HitRecord{T <: AbstractFloat}
374374
- t::T # distance from the ray's origin to the intersection with a surface.
375-
-
376375
- # If t==Inf32, there was no hit, and all following values are undefined!
377-
- #
376+
-
378377
- p::Vec3{T} # point of the intersection between an object's surface and a ray
379378
- n⃗::Vec3{T} # surface's outward normal vector, points towards outside of object?
380379
-
@@ -383,7 +382,6 @@
383382
- front_face::Bool
384383
- mat::Material{T}
385384
-
386-
0 @inline HitRecord{T}() where T = new{T}(typemax(T)) # no hit!
387385
- @inline HitRecord(t::T,p,n⃗,front_face,mat) where T = new{T}(t,p,n⃗,front_face,mat)
388386
- end
389387
-
@@ -398,7 +396,7 @@
398396
- # """
399397
-
400398
- """Equivalent to `hit_record.set_face_normal()`"""
401-
- @inline @fastmath function ray_to_HitRecord(t::T, p, outward_n⃗, r_dir::Vec3{T}, mat::Material{T}) where T
399+
- @inline @fastmath function ray_to_HitRecord(t::T, p, outward_n⃗, r_dir::Vec3{T}, mat::Material{T})::Union{HitRecord,Nothing} where T
402400
- front_face = r_dir outward_n⃗ < 0
403401
- n⃗ = front_face ? outward_n⃗ : -outward_n⃗
404402
- HitRecord(t,p,n⃗,front_face,mat)
@@ -431,7 +429,7 @@
431429
- Args:
432430
- rec: the HitRecord of the surface from which to scatter the ray.
433431
-
434-
- Return missing if it's fully absorbed. """
432+
- Return `nothing`` if it's fully absorbed. """
435433
- @inline @fastmath function scatter(mat::Lambertian{T}, r::Ray{T}, rec::HitRecord{T})::Scatter{T} where T
436434
- scatter_dir = rec.n⃗ + random_vec3_on_sphere(T)
437435
- if near_zero(scatter_dir) # Catch degenerate scatter direction
@@ -444,24 +442,22 @@
444442
- return Scatter(scattered_r, attenuation)
445443
- end
446444
-
447-
- const _no_hit = HitRecord{Float64}() # claforte: HACK! favoring Float64...
448-
-
449-
- @inline @fastmath function hit(s::Sphere{T}, r::Ray{T}, tmin::T, tmax::T) where T
445+
- @inline @fastmath function hit(s::Sphere{T}, r::Ray{T}, tmin::T, tmax::T)::Union{HitRecord,Nothing} where T
450446
- oc = r.origin - s.center
451447
- #a = r.dir ⋅ r.dir # unnecessary since `r.dir` is normalized
452448
- a = 1
453449
- half_b = oc r.dir
454450
- c = ococ - s.radius^2
455451
- discriminant = half_b^2 - a*c
456-
- if discriminant < 0 return _no_hit end
452+
- if discriminant < 0 return nothing end # no hit!
457453
- sqrtd = discriminant
458454
-
459455
- # Find the nearest root that lies in the acceptable range
460456
- root = (-half_b - sqrtd) / a
461457
- if root < tmin || tmax < root
462458
- root = (-half_b + sqrtd) / a
463459
- if root < tmin || tmax < root
464-
- return _no_hit
460+
- return nothing # no hit!
465461
- end
466462
- end
467463
-
@@ -474,17 +470,19 @@
474470
- const HittableList = Vector{Hittable}
475471
-
476472
- #"""Find closest hit between `Ray r` and a list of Hittable objects `h`, within distance `tmin` < `tmax`"""
477-
- @inline function hit(hittables::HittableList, r::Ray{T}, tmin::T, tmax::T) where T
473+
- @inline function hit(hittables::HittableList, r::Ray{T}, tmin::T, tmax::T)::Union{HitRecord,Nothing} where T
478474
- closest = tmax # closest t so far
479-
- rec = _no_hit
480-
- for h in hittables # @simd seems to make no difference...?
481-
- temprec = hit(h, r, tmin, closest)
482-
- if temprec !== _no_hit
483-
- rec = temprec
484-
- closest = rec.t # i.e. ignore any further hit > this one's.
475+
- best_rec::Union{HitRecord,Nothing} = nothing # by default, no hit
476+
- #for h in hittables # @simd seems to make no difference...?
477+
- @inbounds for i in eachindex(hittables)
478+
- h = hittables[i]
479+
- rec = hit(h, r, tmin, closest)
480+
- if rec !== nothing
481+
- best_rec = rec
482+
- closest = best_rec.t # i.e. ignore any further hit > this one's.
485483
- end
486484
- end
487-
- rec
485+
- best_rec
488486
- end
489487
-
490488
- @inline color_vec3_in_rgb(v::Vec3{T}) where T = 0.5normalize(v) + SA{T}[0.5,0.5,0.5]
@@ -506,14 +504,14 @@
506504
-
507505
- #"Scene with 2 Lambertian spheres"
508506
- function scene_2_spheres(; elem_type::Type{T}) where T
509-
- spheres = Sphere[]
507+
0 spheres = Sphere[]
510508
-
511509
- # small center sphere
512-
- push!(spheres, Sphere((SA{T}[0,0,-1]), T(0.5), Lambertian(SA{T}[0.7,0.3,0.3])))
510+
0 push!(spheres, Sphere((SA{T}[0,0,-1]), T(0.5), Lambertian(SA{T}[0.7,0.3,0.3])))
513511
-
514512
- # ground sphere
515-
- push!(spheres, Sphere((SA{T}[0,-100.5,-1]), T(100), Lambertian(SA{T}[0.8,0.8,0.0])))
516-
- HittableList(spheres)
513+
0 push!(spheres, Sphere((SA{T}[0,-100.5,-1]), T(100), Lambertian(SA{T}[0.8,0.8,0.0])))
514+
0 HittableList(spheres)
517515
- end
518516
-
519517
- #"""Scene with 2 Lambertian, 2 Metal spheres.
@@ -595,8 +593,8 @@
595593
0 return SA{T}[0,0,0]
596594
- end
597595
-
598-
194432 rec = hit(world, r, T(1e-4), typemax(T))
599-
80 if rec !== _no_hit # claforte TODO: check if T is typemax instead?
596+
524336 rec = hit(world, r, T(1e-4), typemax(T))
597+
0 if rec !== nothing
600598
- # For debugging, represent vectors as RGB:
601599
- # claforte TODO: adapt to latest code!
602600
- # return color_vec3_in_rgb(rec.p) # show the normalized hit point
@@ -605,14 +603,14 @@
605603
- # return color_vec3_in_rgb(random_vec3_in_sphere())
606604
- #return color_vec3_in_rgb(rec.n⃗ + random_vec3_in_sphere())
607605
-
608-
960 s = scatter(rec.mat, r, rec)
606+
2480 s = scatter(rec.mat, r, rec)
609607
- if s.reflected
610-
80 return s.attenuation .* ray_color(s.r, world, depth-1)
608+
0 return s.attenuation .* ray_color(s.r, world, depth-1)
611609
- else
612610
- return SA{T}[0,0,0]
613611
- end
614612
- else
615-
0 skycolor(r)
613+
160 skycolor(r)
616614
- end
617615
- end
618616
-
@@ -630,7 +628,7 @@
630628
0 image_height = convert(Int64, floor(image_width / aspect_ratio))
631629
-
632630
- # Render
633-
3584 img = zeros(RGB{T}, image_height, image_width)
631+
3840 img = zeros(RGB{T}, image_height, image_width)
634632
0 f32_image_width = convert(Float32, image_width)
635633
0 f32_image_height = convert(Float32, image_height)
636634
-
@@ -711,7 +709,13 @@
711709
- # Above was all using max bounces=4, since this looked fine to me (except the negatively scaled sphere).
712710
- # Switching to max bounces=16 to match C++ version decreased performance by 7.2%:
713711
- # 4.465 ms (65680 allocations: 5.13 MiB)
714-
- #render(scene_2_spheres(; elem_type=ELEM_TYPE), t_default_cam, 96, 16) # 16 samples
712+
- # Lots of optimizations... ending with make HitRecord non-mutable:
713+
- # 2.225 ms (445188 allocations: 34.08 MiB)
714+
- # Using non-mutable HitRecord, Union{HitRecord,Missing}, ismissing():
715+
- # 976.365 μs (65574 allocations: 5.12 MiB)
716+
- # Using @paulmelis' style of hit(): @inbounds for i in eachindex(hittables) and Union{HitRecord, Nothing}
717+
- # 951.447 μs (65574 allocations: 5.12 MiB)
718+
- render(scene_2_spheres(; elem_type=ELEM_TYPE), t_default_cam, 96, 16) # 16 samples
715719
-
716720
- # Iterate over each column: 614.820 μs
717721
- # Iterate over each row: 500.334 μs
@@ -728,7 +732,13 @@
728732
- # Above was all using max bounces=4, since this looked fine to me (except the negatively scaled sphere).
729733
- # Switching to max bounces=16 to match C++ version decreased performance by 7.2%:
730734
- # 314.094 μs (4009 allocations: 434.97 KiB)
731-
- #render(scene_2_spheres(; elem_type=ELEM_TYPE), t_default_cam, 96, 1) # 1 sample
735+
- # Lots of optimizations... ending with make HitRecord non-mutable:
736+
- # 136.388 μs (28306 allocations: 2.28 MiB)
737+
- # Using non-mutable HitRecord, Union{HitRecordMissing}, ismissing():
738+
- # 102.764 μs (4314 allocations: 459.41 KiB)
739+
- # Using @paulmelis' style of hit(): @inbounds for i in eachindex(hittables) and Union{HitRecord, Nothing}
740+
- # 101.161 μs (4314 allocations: 459.41 KiB)
741+
- render(scene_2_spheres(; elem_type=ELEM_TYPE), t_default_cam, 96, 1) # 1 sample
732742
-
733743
- #render(scene_4_spheres(; elem_type=ELEM_TYPE), t_default_cam, 96, 16)
734744
-
@@ -788,28 +798,28 @@
788798
-
789799
- #"From C++: Image 15: Glass sphere that sometimes refracts"
790800
- @inline function scene_diel_spheres(left_radius=0.5; elem_type::Type{T}) where T # dielectric spheres
791-
- spheres = Sphere[]
801+
0 spheres = Sphere[]
792802
-
793803
- # small center sphere
794-
- push!(spheres, Sphere((SA{T}[0,0,-1]), T(0.5), Lambertian(SA{T}[0.1,0.2,0.5])))
804+
0 push!(spheres, Sphere((SA{T}[0,0,-1]), T(0.5), Lambertian(SA{T}[0.1,0.2,0.5])))
795805
-
796806
- # ground sphere (planet?)
797-
- push!(spheres, Sphere((SA{T}[0,-100.5,-1]), T(100), Lambertian(SA{T}[0.8,0.8,0.0])))
807+
0 push!(spheres, Sphere((SA{T}[0,-100.5,-1]), T(100), Lambertian(SA{T}[0.8,0.8,0.0])))
798808
-
799809
- # # left and right spheres.
800810
- # # Use a negative radius on the left sphere to create a "thin bubble"
801-
- push!(spheres, Sphere((SA{T}[-1,0,-1]), T(left_radius), Dielectric(T(1.5))))
802-
- push!(spheres, Sphere((SA{T}[1,0,-1]), T(0.5), Metal((SA{T}[0.8,0.6,0.2]), T(0))))
803-
- HittableList(spheres)
811+
0 push!(spheres, Sphere((SA{T}[-1,0,-1]), T(left_radius), Dielectric(T(1.5))))
812+
0 push!(spheres, Sphere((SA{T}[1,0,-1]), T(0.5), Metal((SA{T}[0.8,0.6,0.2]), T(0))))
813+
0 HittableList(spheres)
804814
- end
805815
-
806-
- #scene_diel_spheres(; elem_type=ELEM_TYPE)
816+
- scene_diel_spheres(; elem_type=ELEM_TYPE)
807817
-
808818
- #render(scene_diel_spheres(; elem_type=ELEM_TYPE), t_default_cam, 96, 16)
809819
- #render(scene_diel_spheres(), default_camera(), 320, 32)
810820
-
811821
- # Hollow Glass sphere using a negative radius
812-
- #ender(scene_diel_spheres(-0.5; elem_type=ELEM_TYPE), t_default_cam, 96, 16)
822+
- #render(scene_diel_spheres(-0.5; elem_type=ELEM_TYPE), t_default_cam, 96, 16)
813823
-
814824
- #render(scene_diel_spheres(; elem_type=ELEM_TYPE), default_camera((SA{ELEM_TYPE}[-2,2,1]), (SA{ELEM_TYPE}[0,0,-1]),
815825
- # (SA{ELEM_TYPE}[0,1,0]), ELEM_TYPE(20)), 96, 16)
@@ -846,15 +856,15 @@
846856
0 if choose_mat < T(0.8)
847857
- # diffuse
848858
0 albedo = @SVector[trand(T) for i 1:3] .* @SVector[trand(T) for i 1:3]
849-
38176 push!(spheres, Sphere(center, T(0.2), Lambertian(albedo)))
859+
38800 push!(spheres, Sphere(center, T(0.2), Lambertian(albedo)))
850860
0 elseif choose_mat < T(0.95)
851861
- # metal
852862
0 albedo = @SVector[random_between(T(0.5),T(1.0)) for i 1:3]
853863
0 fuzz = random_between(T(0.0), T(5.0))
854-
7824 push!(spheres, Sphere(center, T(0.2), Metal(albedo, fuzz)))
864+
7008 push!(spheres, Sphere(center, T(0.2), Metal(albedo, fuzz)))
855865
- else
856866
- # glass
857-
1296 push!(spheres, Sphere(center, T(0.2), Dielectric(T(1.5))))
867+
1392 push!(spheres, Sphere(center, T(0.2), Dielectric(T(1.5))))
858868
- end
859869
- end
860870
-
@@ -906,6 +916,8 @@
906916
- # 2.168 ms (13791 allocations: 1.15 MiB)
907917
- # Using bunch of @inbounds, @simd in low-level functions
908918
- # 2.076 ms (13861 allocations: 1.15 MiB)
919+
- # Lots of optimizations, up to `Using non-mutable HitRecord, Union{HitRecordMissing}, ismissing():`
920+
- # 2.042 ms (14825 allocations: 1.23 MiB)
909921
- #render(scene_random_spheres(; elem_type=ELEM_TYPE), t_cam1, 96, 1)
910922
-
911923
- # took 5020s in Pluto.jl, before optimizations!
@@ -949,8 +961,18 @@
949961
- # 286.873 ms (1811412 allocations: 138.69 MiB) (ran multiple times, seems like ~2.5% speed-up)
950962
- # Fixed, per-thread RNGs with fixed seeds
951963
- # 286.575 ms (1884433 allocations: 144.26 MiB) (i.e. maybe a tiny bit faster considering this fixed seed has more allocations?)
964+
- # Make HitRecord non-mutable:
965+
- # 29.733 s (937962909 allocations: 69.88 GiB) (WTF!)
966+
- # Lots of optimizations, up to `Using non-mutable HitRecord, Union{HitRecordMissing}, ismissing():`
967+
- # 306.011 ms (1884433 allocations: 144.26 MiB) (Still slower... Hum)
968+
- # Using @paulmelis' style of hit(): @inbounds for i in eachindex(hittables) and Union{HitRecord, Nothing}
969+
- # 304.877 ms (1884433 allocations: 144.26 MiB)
970+
- # Extract the scene creation from the render() call:
971+
- # 300.344 ms (1883484 allocations: 144.21 MiB)
952972
- # print("render(scene_random_spheres(; elem_type=ELEM_TYPE), t_cam1, 200, 32):")
953-
- # render(scene_random_spheres(; elem_type=ELEM_TYPE), t_cam1, 200, 32)
973+
- # reseed!()
974+
- # _scene_random_spheres = scene_random_spheres(; elem_type=ELEM_TYPE)
975+
- # @btime render($_scene_random_spheres, $t_cam1, 200, 32)
954976
-
955977
- # After some optimization, took ~5.6 hours:
956978
- # 20171.646846 seconds (94.73 G allocations: 2.496 TiB, 1.06% gc time)
@@ -1030,5 +1052,5 @@
10301052
- using Profile
10311053
- render(scene_random_spheres(; elem_type=ELEM_TYPE), t_cam1, 16, 1)
10321054
- Profile.clear_malloc_data()
1033-
- render(scene_random_spheres(; elem_type=ELEM_TYPE), t_cam1, 16, 4)
1055+
- render(scene_random_spheres(; elem_type=ELEM_TYPE), t_cam1, 17, 13)
10341056
-

0 commit comments

Comments
 (0)