|
370 | 370 | - abstract type Material{T <: AbstractFloat} end
|
371 | 371 | -
|
372 | 372 | - "Record a hit between a ray and an object's surface"
|
373 |
| - - mutable struct HitRecord{T <: AbstractFloat} |
| 373 | + - struct HitRecord{T <: AbstractFloat} |
374 | 374 | - t::T # distance from the ray's origin to the intersection with a surface.
|
375 |
| - - |
376 | 375 | - # If t==Inf32, there was no hit, and all following values are undefined!
|
377 |
| - - # |
| 376 | + - |
378 | 377 | - p::Vec3{T} # point of the intersection between an object's surface and a ray
|
379 | 378 | - n⃗::Vec3{T} # surface's outward normal vector, points towards outside of object?
|
380 | 379 | -
|
|
383 | 382 | - front_face::Bool
|
384 | 383 | - mat::Material{T}
|
385 | 384 | -
|
386 |
| - 0 @inline HitRecord{T}() where T = new{T}(typemax(T)) # no hit! |
387 | 385 | - @inline HitRecord(t::T,p,n⃗,front_face,mat) where T = new{T}(t,p,n⃗,front_face,mat)
|
388 | 386 | - end
|
389 | 387 | -
|
|
398 | 396 | - # """
|
399 | 397 | -
|
400 | 398 | - """Equivalent to `hit_record.set_face_normal()`"""
|
401 |
| - - @inline @fastmath function ray_to_HitRecord(t::T, p, outward_n⃗, r_dir::Vec3{T}, mat::Material{T}) where T |
| 399 | + - @inline @fastmath function ray_to_HitRecord(t::T, p, outward_n⃗, r_dir::Vec3{T}, mat::Material{T})::Union{HitRecord,Nothing} where T |
402 | 400 | - front_face = r_dir ⋅ outward_n⃗ < 0
|
403 | 401 | - n⃗ = front_face ? outward_n⃗ : -outward_n⃗
|
404 | 402 | - HitRecord(t,p,n⃗,front_face,mat)
|
|
431 | 429 | - Args:
|
432 | 430 | - rec: the HitRecord of the surface from which to scatter the ray.
|
433 | 431 | -
|
434 |
| - - Return missing if it's fully absorbed. """ |
| 432 | + - Return `nothing`` if it's fully absorbed. """ |
435 | 433 | - @inline @fastmath function scatter(mat::Lambertian{T}, r::Ray{T}, rec::HitRecord{T})::Scatter{T} where T
|
436 | 434 | - scatter_dir = rec.n⃗ + random_vec3_on_sphere(T)
|
437 | 435 | - if near_zero(scatter_dir) # Catch degenerate scatter direction
|
|
444 | 442 | - return Scatter(scattered_r, attenuation)
|
445 | 443 | - end
|
446 | 444 | -
|
447 |
| - - const _no_hit = HitRecord{Float64}() # claforte: HACK! favoring Float64... |
448 |
| - - |
449 |
| - - @inline @fastmath function hit(s::Sphere{T}, r::Ray{T}, tmin::T, tmax::T) where T |
| 445 | + - @inline @fastmath function hit(s::Sphere{T}, r::Ray{T}, tmin::T, tmax::T)::Union{HitRecord,Nothing} where T |
450 | 446 | - oc = r.origin - s.center
|
451 | 447 | - #a = r.dir ⋅ r.dir # unnecessary since `r.dir` is normalized
|
452 | 448 | - a = 1
|
453 | 449 | - half_b = oc ⋅ r.dir
|
454 | 450 | - c = oc⋅oc - s.radius^2
|
455 | 451 | - discriminant = half_b^2 - a*c
|
456 |
| - - if discriminant < 0 return _no_hit end |
| 452 | + - if discriminant < 0 return nothing end # no hit! |
457 | 453 | - sqrtd = √discriminant
|
458 | 454 | -
|
459 | 455 | - # Find the nearest root that lies in the acceptable range
|
460 | 456 | - root = (-half_b - sqrtd) / a
|
461 | 457 | - if root < tmin || tmax < root
|
462 | 458 | - root = (-half_b + sqrtd) / a
|
463 | 459 | - if root < tmin || tmax < root
|
464 |
| - - return _no_hit |
| 460 | + - return nothing # no hit! |
465 | 461 | - end
|
466 | 462 | - end
|
467 | 463 | -
|
|
474 | 470 | - const HittableList = Vector{Hittable}
|
475 | 471 | -
|
476 | 472 | - #"""Find closest hit between `Ray r` and a list of Hittable objects `h`, within distance `tmin` < `tmax`"""
|
477 |
| - - @inline function hit(hittables::HittableList, r::Ray{T}, tmin::T, tmax::T) where T |
| 473 | + - @inline function hit(hittables::HittableList, r::Ray{T}, tmin::T, tmax::T)::Union{HitRecord,Nothing} where T |
478 | 474 | - closest = tmax # closest t so far
|
479 |
| - - rec = _no_hit |
480 |
| - - for h in hittables # @simd seems to make no difference...? |
481 |
| - - temprec = hit(h, r, tmin, closest) |
482 |
| - - if temprec !== _no_hit |
483 |
| - - rec = temprec |
484 |
| - - closest = rec.t # i.e. ignore any further hit > this one's. |
| 475 | + - best_rec::Union{HitRecord,Nothing} = nothing # by default, no hit |
| 476 | + - #for h in hittables # @simd seems to make no difference...? |
| 477 | + - @inbounds for i in eachindex(hittables) |
| 478 | + - h = hittables[i] |
| 479 | + - rec = hit(h, r, tmin, closest) |
| 480 | + - if rec !== nothing |
| 481 | + - best_rec = rec |
| 482 | + - closest = best_rec.t # i.e. ignore any further hit > this one's. |
485 | 483 | - end
|
486 | 484 | - end
|
487 |
| - - rec |
| 485 | + - best_rec |
488 | 486 | - end
|
489 | 487 | -
|
490 | 488 | - @inline color_vec3_in_rgb(v::Vec3{T}) where T = 0.5normalize(v) + SA{T}[0.5,0.5,0.5]
|
|
506 | 504 | -
|
507 | 505 | - #"Scene with 2 Lambertian spheres"
|
508 | 506 | - function scene_2_spheres(; elem_type::Type{T}) where T
|
509 |
| - - spheres = Sphere[] |
| 507 | + 0 spheres = Sphere[] |
510 | 508 | -
|
511 | 509 | - # small center sphere
|
512 |
| - - push!(spheres, Sphere((SA{T}[0,0,-1]), T(0.5), Lambertian(SA{T}[0.7,0.3,0.3]))) |
| 510 | + 0 push!(spheres, Sphere((SA{T}[0,0,-1]), T(0.5), Lambertian(SA{T}[0.7,0.3,0.3]))) |
513 | 511 | -
|
514 | 512 | - # ground sphere
|
515 |
| - - push!(spheres, Sphere((SA{T}[0,-100.5,-1]), T(100), Lambertian(SA{T}[0.8,0.8,0.0]))) |
516 |
| - - HittableList(spheres) |
| 513 | + 0 push!(spheres, Sphere((SA{T}[0,-100.5,-1]), T(100), Lambertian(SA{T}[0.8,0.8,0.0]))) |
| 514 | + 0 HittableList(spheres) |
517 | 515 | - end
|
518 | 516 | -
|
519 | 517 | - #"""Scene with 2 Lambertian, 2 Metal spheres.
|
|
595 | 593 | 0 return SA{T}[0,0,0]
|
596 | 594 | - end
|
597 | 595 | -
|
598 |
| - 194432 rec = hit(world, r, T(1e-4), typemax(T)) |
599 |
| - 80 if rec !== _no_hit # claforte TODO: check if T is typemax instead? |
| 596 | + 524336 rec = hit(world, r, T(1e-4), typemax(T)) |
| 597 | + 0 if rec !== nothing |
600 | 598 | - # For debugging, represent vectors as RGB:
|
601 | 599 | - # claforte TODO: adapt to latest code!
|
602 | 600 | - # return color_vec3_in_rgb(rec.p) # show the normalized hit point
|
|
605 | 603 | - # return color_vec3_in_rgb(random_vec3_in_sphere())
|
606 | 604 | - #return color_vec3_in_rgb(rec.n⃗ + random_vec3_in_sphere())
|
607 | 605 | -
|
608 |
| - 960 s = scatter(rec.mat, r, rec) |
| 606 | + 2480 s = scatter(rec.mat, r, rec) |
609 | 607 | - if s.reflected
|
610 |
| - 80 return s.attenuation .* ray_color(s.r, world, depth-1) |
| 608 | + 0 return s.attenuation .* ray_color(s.r, world, depth-1) |
611 | 609 | - else
|
612 | 610 | - return SA{T}[0,0,0]
|
613 | 611 | - end
|
614 | 612 | - else
|
615 |
| - 0 skycolor(r) |
| 613 | + 160 skycolor(r) |
616 | 614 | - end
|
617 | 615 | - end
|
618 | 616 | -
|
|
630 | 628 | 0 image_height = convert(Int64, floor(image_width / aspect_ratio))
|
631 | 629 | -
|
632 | 630 | - # Render
|
633 |
| - 3584 img = zeros(RGB{T}, image_height, image_width) |
| 631 | + 3840 img = zeros(RGB{T}, image_height, image_width) |
634 | 632 | 0 f32_image_width = convert(Float32, image_width)
|
635 | 633 | 0 f32_image_height = convert(Float32, image_height)
|
636 | 634 | -
|
|
711 | 709 | - # Above was all using max bounces=4, since this looked fine to me (except the negatively scaled sphere).
|
712 | 710 | - # Switching to max bounces=16 to match C++ version decreased performance by 7.2%:
|
713 | 711 | - # 4.465 ms (65680 allocations: 5.13 MiB)
|
714 |
| - - #render(scene_2_spheres(; elem_type=ELEM_TYPE), t_default_cam, 96, 16) # 16 samples |
| 712 | + - # Lots of optimizations... ending with make HitRecord non-mutable: |
| 713 | + - # 2.225 ms (445188 allocations: 34.08 MiB) |
| 714 | + - # Using non-mutable HitRecord, Union{HitRecord,Missing}, ismissing(): |
| 715 | + - # 976.365 μs (65574 allocations: 5.12 MiB) |
| 716 | + - # Using @paulmelis' style of hit(): @inbounds for i in eachindex(hittables) and Union{HitRecord, Nothing} |
| 717 | + - # 951.447 μs (65574 allocations: 5.12 MiB) |
| 718 | + - render(scene_2_spheres(; elem_type=ELEM_TYPE), t_default_cam, 96, 16) # 16 samples |
715 | 719 | -
|
716 | 720 | - # Iterate over each column: 614.820 μs
|
717 | 721 | - # Iterate over each row: 500.334 μs
|
|
728 | 732 | - # Above was all using max bounces=4, since this looked fine to me (except the negatively scaled sphere).
|
729 | 733 | - # Switching to max bounces=16 to match C++ version decreased performance by 7.2%:
|
730 | 734 | - # 314.094 μs (4009 allocations: 434.97 KiB)
|
731 |
| - - #render(scene_2_spheres(; elem_type=ELEM_TYPE), t_default_cam, 96, 1) # 1 sample |
| 735 | + - # Lots of optimizations... ending with make HitRecord non-mutable: |
| 736 | + - # 136.388 μs (28306 allocations: 2.28 MiB) |
| 737 | + - # Using non-mutable HitRecord, Union{HitRecordMissing}, ismissing(): |
| 738 | + - # 102.764 μs (4314 allocations: 459.41 KiB) |
| 739 | + - # Using @paulmelis' style of hit(): @inbounds for i in eachindex(hittables) and Union{HitRecord, Nothing} |
| 740 | + - # 101.161 μs (4314 allocations: 459.41 KiB) |
| 741 | + - render(scene_2_spheres(; elem_type=ELEM_TYPE), t_default_cam, 96, 1) # 1 sample |
732 | 742 | -
|
733 | 743 | - #render(scene_4_spheres(; elem_type=ELEM_TYPE), t_default_cam, 96, 16)
|
734 | 744 | -
|
|
788 | 798 | -
|
789 | 799 | - #"From C++: Image 15: Glass sphere that sometimes refracts"
|
790 | 800 | - @inline function scene_diel_spheres(left_radius=0.5; elem_type::Type{T}) where T # dielectric spheres
|
791 |
| - - spheres = Sphere[] |
| 801 | + 0 spheres = Sphere[] |
792 | 802 | -
|
793 | 803 | - # small center sphere
|
794 |
| - - push!(spheres, Sphere((SA{T}[0,0,-1]), T(0.5), Lambertian(SA{T}[0.1,0.2,0.5]))) |
| 804 | + 0 push!(spheres, Sphere((SA{T}[0,0,-1]), T(0.5), Lambertian(SA{T}[0.1,0.2,0.5]))) |
795 | 805 | -
|
796 | 806 | - # ground sphere (planet?)
|
797 |
| - - push!(spheres, Sphere((SA{T}[0,-100.5,-1]), T(100), Lambertian(SA{T}[0.8,0.8,0.0]))) |
| 807 | + 0 push!(spheres, Sphere((SA{T}[0,-100.5,-1]), T(100), Lambertian(SA{T}[0.8,0.8,0.0]))) |
798 | 808 | -
|
799 | 809 | - # # left and right spheres.
|
800 | 810 | - # # Use a negative radius on the left sphere to create a "thin bubble"
|
801 |
| - - push!(spheres, Sphere((SA{T}[-1,0,-1]), T(left_radius), Dielectric(T(1.5)))) |
802 |
| - - push!(spheres, Sphere((SA{T}[1,0,-1]), T(0.5), Metal((SA{T}[0.8,0.6,0.2]), T(0)))) |
803 |
| - - HittableList(spheres) |
| 811 | + 0 push!(spheres, Sphere((SA{T}[-1,0,-1]), T(left_radius), Dielectric(T(1.5)))) |
| 812 | + 0 push!(spheres, Sphere((SA{T}[1,0,-1]), T(0.5), Metal((SA{T}[0.8,0.6,0.2]), T(0)))) |
| 813 | + 0 HittableList(spheres) |
804 | 814 | - end
|
805 | 815 | -
|
806 |
| - - #scene_diel_spheres(; elem_type=ELEM_TYPE) |
| 816 | + - scene_diel_spheres(; elem_type=ELEM_TYPE) |
807 | 817 | -
|
808 | 818 | - #render(scene_diel_spheres(; elem_type=ELEM_TYPE), t_default_cam, 96, 16)
|
809 | 819 | - #render(scene_diel_spheres(), default_camera(), 320, 32)
|
810 | 820 | -
|
811 | 821 | - # Hollow Glass sphere using a negative radius
|
812 |
| - - #ender(scene_diel_spheres(-0.5; elem_type=ELEM_TYPE), t_default_cam, 96, 16) |
| 822 | + - #render(scene_diel_spheres(-0.5; elem_type=ELEM_TYPE), t_default_cam, 96, 16) |
813 | 823 | -
|
814 | 824 | - #render(scene_diel_spheres(; elem_type=ELEM_TYPE), default_camera((SA{ELEM_TYPE}[-2,2,1]), (SA{ELEM_TYPE}[0,0,-1]),
|
815 | 825 | - # (SA{ELEM_TYPE}[0,1,0]), ELEM_TYPE(20)), 96, 16)
|
|
846 | 856 | 0 if choose_mat < T(0.8)
|
847 | 857 | - # diffuse
|
848 | 858 | 0 albedo = @SVector[trand(T) for i ∈ 1:3] .* @SVector[trand(T) for i ∈ 1:3]
|
849 |
| - 38176 push!(spheres, Sphere(center, T(0.2), Lambertian(albedo))) |
| 859 | + 38800 push!(spheres, Sphere(center, T(0.2), Lambertian(albedo))) |
850 | 860 | 0 elseif choose_mat < T(0.95)
|
851 | 861 | - # metal
|
852 | 862 | 0 albedo = @SVector[random_between(T(0.5),T(1.0)) for i ∈ 1:3]
|
853 | 863 | 0 fuzz = random_between(T(0.0), T(5.0))
|
854 |
| - 7824 push!(spheres, Sphere(center, T(0.2), Metal(albedo, fuzz))) |
| 864 | + 7008 push!(spheres, Sphere(center, T(0.2), Metal(albedo, fuzz))) |
855 | 865 | - else
|
856 | 866 | - # glass
|
857 |
| - 1296 push!(spheres, Sphere(center, T(0.2), Dielectric(T(1.5)))) |
| 867 | + 1392 push!(spheres, Sphere(center, T(0.2), Dielectric(T(1.5)))) |
858 | 868 | - end
|
859 | 869 | - end
|
860 | 870 | -
|
|
906 | 916 | - # 2.168 ms (13791 allocations: 1.15 MiB)
|
907 | 917 | - # Using bunch of @inbounds, @simd in low-level functions
|
908 | 918 | - # 2.076 ms (13861 allocations: 1.15 MiB)
|
| 919 | + - # Lots of optimizations, up to `Using non-mutable HitRecord, Union{HitRecordMissing}, ismissing():` |
| 920 | + - # 2.042 ms (14825 allocations: 1.23 MiB) |
909 | 921 | - #render(scene_random_spheres(; elem_type=ELEM_TYPE), t_cam1, 96, 1)
|
910 | 922 | -
|
911 | 923 | - # took 5020s in Pluto.jl, before optimizations!
|
|
949 | 961 | - # 286.873 ms (1811412 allocations: 138.69 MiB) (ran multiple times, seems like ~2.5% speed-up)
|
950 | 962 | - # Fixed, per-thread RNGs with fixed seeds
|
951 | 963 | - # 286.575 ms (1884433 allocations: 144.26 MiB) (i.e. maybe a tiny bit faster considering this fixed seed has more allocations?)
|
| 964 | + - # Make HitRecord non-mutable: |
| 965 | + - # 29.733 s (937962909 allocations: 69.88 GiB) (WTF!) |
| 966 | + - # Lots of optimizations, up to `Using non-mutable HitRecord, Union{HitRecordMissing}, ismissing():` |
| 967 | + - # 306.011 ms (1884433 allocations: 144.26 MiB) (Still slower... Hum) |
| 968 | + - # Using @paulmelis' style of hit(): @inbounds for i in eachindex(hittables) and Union{HitRecord, Nothing} |
| 969 | + - # 304.877 ms (1884433 allocations: 144.26 MiB) |
| 970 | + - # Extract the scene creation from the render() call: |
| 971 | + - # 300.344 ms (1883484 allocations: 144.21 MiB) |
952 | 972 | - # print("render(scene_random_spheres(; elem_type=ELEM_TYPE), t_cam1, 200, 32):")
|
953 |
| - - # render(scene_random_spheres(; elem_type=ELEM_TYPE), t_cam1, 200, 32) |
| 973 | + - # reseed!() |
| 974 | + - # _scene_random_spheres = scene_random_spheres(; elem_type=ELEM_TYPE) |
| 975 | + - # @btime render($_scene_random_spheres, $t_cam1, 200, 32) |
954 | 976 | -
|
955 | 977 | - # After some optimization, took ~5.6 hours:
|
956 | 978 | - # 20171.646846 seconds (94.73 G allocations: 2.496 TiB, 1.06% gc time)
|
|
1030 | 1052 | - using Profile
|
1031 | 1053 | - render(scene_random_spheres(; elem_type=ELEM_TYPE), t_cam1, 16, 1)
|
1032 | 1054 | - Profile.clear_malloc_data()
|
1033 |
| - - render(scene_random_spheres(; elem_type=ELEM_TYPE), t_cam1, 16, 4) |
| 1055 | + - render(scene_random_spheres(; elem_type=ELEM_TYPE), t_cam1, 17, 13) |
1034 | 1056 | -
|
0 commit comments