From 0a304a207422cb92b8c7b0ac2bbf05898d45fc69 Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Mon, 16 Feb 2026 12:19:04 -0500 Subject: [PATCH 01/22] Moved all of the IB marker calculation to the GPU without copy --- src/simulation/m_ib_patches.fpp | 122 ++++++++++++++++---------------- src/simulation/m_ibm.fpp | 9 ++- 2 files changed, 65 insertions(+), 66 deletions(-) diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index d983a9f1de..0509593918 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -61,9 +61,9 @@ module m_ib_patches contains - impure subroutine s_apply_ib_patches(ib_markers_sf) + impure subroutine s_apply_ib_patches(ib_markers) - integer, dimension(:, :, :), intent(inout), optional :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers integer :: i @@ -76,16 +76,16 @@ contains do i = 1, num_ibs if (patch_ib(i)%geometry == 8) then - call s_ib_sphere(i, ib_markers_sf) + call s_ib_sphere(i, ib_markers) elseif (patch_ib(i)%geometry == 9) then - call s_ib_cuboid(i, ib_markers_sf) + call s_ib_cuboid(i, ib_markers) elseif (patch_ib(i)%geometry == 10) then - call s_ib_cylinder(i, ib_markers_sf) + call s_ib_cylinder(i, ib_markers) elseif (patch_ib(i)%geometry == 11) then - call s_ib_3D_airfoil(i, ib_markers_sf) + call s_ib_3D_airfoil(i, ib_markers) ! STL+IBM patch elseif (patch_ib(i)%geometry == 12) then - call s_ib_model(i, ib_markers_sf) + call s_ib_model(i, ib_markers) end if end do !> @} @@ -97,16 +97,16 @@ contains !> @{ do i = 1, num_ibs if (patch_ib(i)%geometry == 2) then - call s_ib_circle(i, ib_markers_sf) + call s_ib_circle(i, ib_markers) elseif (patch_ib(i)%geometry == 3) then - call s_ib_rectangle(i, ib_markers_sf) + call s_ib_rectangle(i, ib_markers) elseif (patch_ib(i)%geometry == 4) then - call s_ib_airfoil(i, ib_markers_sf) + call s_ib_airfoil(i, ib_markers) ! STL+IBM patch elseif (patch_ib(i)%geometry == 5) then - call s_ib_model(i, ib_markers_sf) + call s_ib_model(i, ib_markers) elseif (patch_ib(i)%geometry == 6) then - call s_ib_ellipse(i, ib_markers_sf) + call s_ib_ellipse(i, ib_markers) end if end do !> @} @@ -248,12 +248,12 @@ contains !! are provided. Note that the circular patch DOES allow for !! the smoothing of its boundary. !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids + !! @param ib_markers Array to track patch ids !! @param ib True if this patch is an immersed boundary - subroutine s_ib_circle(patch_id, ib_markers_sf) + subroutine s_ib_circle(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers real(wp), dimension(1:2) :: center real(wp) :: radius @@ -272,14 +272,14 @@ contains ! that cell. If both queries check out, the primitive variables of ! the current patch are assigned to this cell. - $:GPU_PARALLEL_LOOP(private='[i,j]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j]',& & copyin='[patch_id,center,radius]', collapse=2) do j = 0, n do i = 0, m if ((x_cc(i) - center(1))**2 & + (y_cc(j) - center(2))**2 <= radius**2) & then - ib_markers_sf(i, j, 0) = patch_id + ib_markers%sf(i, j, 0) = patch_id end if end do end do @@ -288,11 +288,11 @@ contains end subroutine s_ib_circle !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids - subroutine s_ib_airfoil(patch_id, ib_markers_sf) + !! @param ib_markers Array to track patch ids + subroutine s_ib_airfoil(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers real(wp) :: f, ca_in, pa, ma, ta real(wp) :: xa, yt, xu, yu, xl, yl, xc, yc, dycdxc, sin_c, cos_c @@ -376,7 +376,7 @@ contains end if - $:GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,f]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,f]', & & copyin='[patch_id,center,inverse_rotation,offset,ma,ca_in,airfoil_grid_u,airfoil_grid_l]', collapse=2) do j = 0, n do i = 0, m @@ -401,13 +401,13 @@ contains if (f_approx_equal(airfoil_grid_u(k)%x, xy_local(1))) then if (xy_local(2) <= airfoil_grid_u(k)%y) then !!IB - ib_markers_sf(i, j, 0) = patch_id + ib_markers%sf(i, j, 0) = patch_id end if else f = (airfoil_grid_u(k)%x - xy_local(1))/(airfoil_grid_u(k)%x - airfoil_grid_u(k - 1)%x) if (xy_local(2) <= ((1._wp - f)*airfoil_grid_u(k)%y + f*airfoil_grid_u(k - 1)%y)) then !!IB - ib_markers_sf(i, j, 0) = patch_id + ib_markers%sf(i, j, 0) = patch_id end if end if else @@ -418,14 +418,14 @@ contains if (f_approx_equal(airfoil_grid_l(k)%x, xy_local(1))) then if (xy_local(2) >= airfoil_grid_l(k)%y) then !!IB - ib_markers_sf(i, j, 0) = patch_id + ib_markers%sf(i, j, 0) = patch_id end if else f = (airfoil_grid_l(k)%x - xy_local(1))/(airfoil_grid_l(k)%x - airfoil_grid_l(k - 1)%x) if (xy_local(2) >= ((1._wp - f)*airfoil_grid_l(k)%y + f*airfoil_grid_l(k - 1)%y)) then !!IB - ib_markers_sf(i, j, 0) = patch_id + ib_markers%sf(i, j, 0) = patch_id end if end if end if @@ -437,12 +437,12 @@ contains end subroutine s_ib_airfoil !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids + !! @param ib_markers Array to track patch ids !! @param ib True if this patch is an immersed boundary - subroutine s_ib_3D_airfoil(patch_id, ib_markers_sf) + subroutine s_ib_3D_airfoil(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers real(wp) :: lz, z_max, z_min, f, ca_in, pa, ma, ta, xa, yt, xu, yu, xl, yl, xc, yc, dycdxc, sin_c, cos_c integer :: i, j, k, l @@ -527,7 +527,7 @@ contains $:GPU_UPDATE(device='[airfoil_grid_l,airfoil_grid_u]') end if - $:GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,f]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,f]',& & copyin='[patch_id,center,inverse_rotation,offset,ma,ca_in,airfoil_grid_u,airfoil_grid_l,z_min,z_max]', collapse=3) do l = 0, p do j = 0, n @@ -547,13 +547,13 @@ contains if (f_approx_equal(airfoil_grid_u(k)%x, xyz_local(1))) then if (xyz_local(2) <= airfoil_grid_u(k)%y) then !IB - ib_markers_sf(i, j, l) = patch_id + ib_markers%sf(i, j, l) = patch_id end if else f = (airfoil_grid_u(k)%x - xyz_local(1))/(airfoil_grid_u(k)%x - airfoil_grid_u(k - 1)%x) if (xyz_local(2) <= ((1._wp - f)*airfoil_grid_u(k)%y + f*airfoil_grid_u(k - 1)%y)) then !!IB - ib_markers_sf(i, j, l) = patch_id + ib_markers%sf(i, j, l) = patch_id end if end if else @@ -564,14 +564,14 @@ contains if (f_approx_equal(airfoil_grid_l(k)%x, xyz_local(1))) then if (xyz_local(2) >= airfoil_grid_l(k)%y) then !!IB - ib_markers_sf(i, j, l) = patch_id + ib_markers%sf(i, j, l) = patch_id end if else f = (airfoil_grid_l(k)%x - xyz_local(1))/(airfoil_grid_l(k)%x - airfoil_grid_l(k - 1)%x) if (xyz_local(2) >= ((1._wp - f)*airfoil_grid_l(k)%y + f*airfoil_grid_l(k - 1)%y)) then !!IB - ib_markers_sf(i, j, l) = patch_id + ib_markers%sf(i, j, l) = patch_id end if end if end if @@ -593,12 +593,12 @@ contains !! rectangular patch DOES NOT allow for the smoothing of its !! boundaries. !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids + !! @param ib_markers Array to track patch ids !! @param ib True if this patch is an immersed boundary - subroutine s_ib_rectangle(patch_id, ib_markers_sf) + subroutine s_ib_rectangle(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers integer :: i, j, k !< generic loop iterators real(wp) :: pi_inf, gamma, lit_gamma !< Equation of state parameters @@ -621,7 +621,7 @@ contains ! domain and verifying whether the current patch has the permission ! to write to that cell. If both queries check out, the primitive ! variables of the current patch are assigned to this cell. - $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]',& & copyin='[patch_id,center,length,inverse_rotation,x_cc,y_cc]', collapse=2) do j = 0, n do i = 0, m @@ -635,7 +635,7 @@ contains 0.5_wp*length(2) >= xy_local(2)) then ! Updating the patch identities bookkeeping variable - ib_markers_sf(i, j, 0) = patch_id + ib_markers%sf(i, j, 0) = patch_id end if end do @@ -650,12 +650,12 @@ contains !! provided. Please note that the spherical patch DOES allow !! for the smoothing of its boundary. !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids + !! @param ib_markers Array to track patch ids !! @param ib True if this patch is an immersed boundary - subroutine s_ib_sphere(patch_id, ib_markers_sf) + subroutine s_ib_sphere(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers ! Generic loop iterators integer :: i, j, k @@ -676,7 +676,7 @@ contains ! and verifying whether the current patch has permission to write to ! that cell. If both queries check out, the primitive variables of ! the current patch are assigned to this cell. - $:GPU_PARALLEL_LOOP(private='[i,j,k,cart_y,cart_z]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,k,cart_y,cart_z]',& & copyin='[patch_id,center,radius]', collapse=3) do k = 0, p do j = 0, n @@ -691,7 +691,7 @@ contains if (((x_cc(i) - center(1))**2 & + (cart_y - center(2))**2 & + (cart_z - center(3))**2 <= radius**2)) then - ib_markers_sf(i, j, k) = patch_id + ib_markers%sf(i, j, k) = patch_id end if end do end do @@ -709,11 +709,11 @@ contains !! the cuboidal patch DOES NOT allow for the smearing of its !! boundaries. !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids - subroutine s_ib_cuboid(patch_id, ib_markers_sf) + !! @param ib_markers Array to track patch ids + subroutine s_ib_cuboid(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers integer :: i, j, k !< Generic loop iterators real(wp), dimension(1:3) :: xyz_local, center, length !< x and y coordinates in local IB frame @@ -732,7 +732,7 @@ contains ! and verifying whether the current patch has permission to write to ! to that cell. If both queries check out, the primitive variables ! of the current patch are assigned to this cell. - $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]',& & copyin='[patch_id,center,length,inverse_rotation]', collapse=3) do k = 0, p do j = 0, n @@ -756,7 +756,7 @@ contains 0.5*length(3) >= xyz_local(3)) then ! Updating the patch identities bookkeeping variable - ib_markers_sf(i, j, k) = patch_id + ib_markers%sf(i, j, k) = patch_id end if end do end do @@ -774,12 +774,12 @@ contains !! that the cylindrical patch DOES allow for the smoothing !! of its lateral boundary. !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids + !! @param ib_markers Array to track patch ids !! @param ib True if this patch is an immersed boundary - subroutine s_ib_cylinder(patch_id, ib_markers_sf) + subroutine s_ib_cylinder(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers integer :: i, j, k !< Generic loop iterators real(wp) :: radius @@ -802,7 +802,7 @@ contains ! domain and verifying whether the current patch has the permission ! to write to that cell. If both queries check out, the primitive ! variables of the current patch are assigned to this cell. - $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]',& & copyin='[patch_id,center,length,radius,inverse_rotation]', collapse=3) do k = 0, p do j = 0, n @@ -836,7 +836,7 @@ contains 0.5_wp*length(3) >= xyz_local(3)))) then ! Updating the patch identities bookkeeping variable - ib_markers_sf(i, j, k) = patch_id + ib_markers%sf(i, j, k) = patch_id end if end do end do @@ -845,10 +845,10 @@ contains end subroutine s_ib_cylinder - subroutine s_ib_ellipse(patch_id, ib_markers_sf) + subroutine s_ib_ellipse(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers integer :: i, j, k !< generic loop iterators real(wp), dimension(1:3) :: xy_local !< x and y coordinates in local IB frame @@ -865,7 +865,7 @@ contains ! Checking whether the ellipse covers a particular cell in the ! domain - $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]',& & copyin='[patch_id,center,ellipse_coeffs,inverse_rotation,x_cc,y_cc]', collapse=2) do j = 0, n do i = 0, m @@ -876,7 +876,7 @@ contains ! Ellipse condition (x/a)^2 + (y/b)^2 <= 1 if ((xy_local(1)/ellipse_coeffs(1))**2 + (xy_local(2)/ellipse_coeffs(2))**2 <= 1._wp) then ! Updating the patch identities bookkeeping variable - ib_markers_sf(i, j, 0) = patch_id + ib_markers%sf(i, j, 0) = patch_id end if end do end do @@ -886,13 +886,13 @@ contains !> The STL patch is a 2/3D geometry that is imported from an STL file. !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids + !! @param ib_markers Array to track patch ids !! @param STL_levelset STL levelset !! @param STL_levelset_norm STL levelset normals - subroutine s_ib_model(patch_id, ib_markers_sf) + subroutine s_ib_model(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers integer :: i, j, k !< Generic loop iterators @@ -934,7 +934,7 @@ contains ! Reading STL boundary vertices and compute the levelset and levelset_norm if (eta > patch_ib(patch_id)%model_threshold) then - ib_markers_sf(i, j, k) = patch_id + ib_markers%sf(i, j, k) = patch_id end if end do diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index bd497231f6..965fa68950 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -106,9 +106,9 @@ contains ! recompute the new ib_patch locations and broadcast them. ib_markers%sf = 0._wp - call s_apply_ib_patches(ib_markers%sf(0:m, 0:n, 0:p)) $:GPU_UPDATE(device='[ib_markers%sf]') - call s_populate_ib_buffers() + call s_apply_ib_patches(ib_markers) + ! call s_populate_ib_buffers() $:GPU_UPDATE(host='[ib_markers%sf]') do i = 1, num_ibs if (patch_ib(i)%moving_ibm /= 0) call s_compute_centroid_offset(i) ! offsets are computed after IB markers are generated @@ -1002,9 +1002,8 @@ contains $:GPU_UPDATE(device='[patch_ib]') ! recompute the new ib_patch locations and broadcast them. - call s_apply_ib_patches(ib_markers%sf(0:m, 0:n, 0:p)) - $:GPU_UPDATE(device='[ib_markers%sf]') - call s_populate_ib_buffers() + $:GPU_UPDATE(device='[ib_markers%sf]') + call s_apply_ib_patches(ib_markers) $:GPU_UPDATE(host='[ib_markers%sf]') ! recalculate the ghost point locations and coefficients From e8c778e5034ee35929ee74fc2ead0acfec4aa03a Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Mon, 16 Feb 2026 13:36:11 -0500 Subject: [PATCH 02/22] Added profiling and increased maximum num IBs to 1000 --- src/common/m_constants.fpp | 2 +- src/simulation/m_ibm.fpp | 17 +++++++++++++++-- src/simulation/m_time_steppers.fpp | 4 ++++ toolchain/mfc/case_validator.py | 2 +- toolchain/mfc/params/definitions.py | 4 ++-- 5 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index d728447f5f..039e7d5d2b 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -22,7 +22,7 @@ module m_constants integer, parameter :: fourier_rings = 5 !< Fourier filter ring limit integer, parameter :: num_fluids_max = 10 !< Maximum number of fluids in the simulation integer, parameter :: num_probes_max = 10 !< Maximum number of flow probes in the simulation - integer, parameter :: num_patches_max = 10 + integer, parameter :: num_patches_max = 1000 integer, parameter :: num_bc_patches_max = 10 integer, parameter :: pathlen_max = 400 integer, parameter :: nnode = 4 !< Number of QBMM nodes diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 965fa68950..d42d58d782 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -84,6 +84,8 @@ contains integer :: i, j, k integer :: max_num_gps, max_num_inner_gps + call nvtxStartRange("SETUP-IBM-MODULE") + ! do all set up for moving immersed boundaries moving_immersed_boundary_flag = .false. do i = 1, num_ibs @@ -108,7 +110,7 @@ contains ib_markers%sf = 0._wp $:GPU_UPDATE(device='[ib_markers%sf]') call s_apply_ib_patches(ib_markers) - ! call s_populate_ib_buffers() + call s_populate_ib_buffers() $:GPU_UPDATE(host='[ib_markers%sf]') do i = 1, num_ibs if (patch_ib(i)%moving_ibm /= 0) call s_compute_centroid_offset(i) ! offsets are computed after IB markers are generated @@ -141,6 +143,8 @@ contains call s_compute_interpolation_coeffs(ghost_points) $:GPU_UPDATE(device='[ghost_points]') + call nvtxEndRange + end subroutine s_ibm_setup subroutine s_populate_ib_buffers() @@ -989,6 +993,8 @@ contains integer :: i, ierr + call nvtxStartRange("UPDATE-MIB") + ! Clears the existing immersed boundary indices ib_markers%sf = 0._wp @@ -1002,9 +1008,12 @@ contains $:GPU_UPDATE(device='[patch_ib]') ! recompute the new ib_patch locations and broadcast them. - $:GPU_UPDATE(device='[ib_markers%sf]') + call nvtxStartRange("COMPUTE-IB-MARKERS") + $:GPU_UPDATE(device='[ib_markers%sf]') call s_apply_ib_patches(ib_markers) + call s_populate_ib_buffers() $:GPU_UPDATE(host='[ib_markers%sf]') + call nvtxEndRange ! recalculate the ghost point locations and coefficients call s_find_num_ghost_points(num_gps, num_inner_gps) @@ -1013,8 +1022,10 @@ contains call s_find_ghost_points(ghost_points, inner_points) $:GPU_UPDATE(device='[ghost_points, inner_points]') + call nvtxStartRange("APPLY-LEVELSET") call s_apply_levelset(ghost_points, num_gps) $:GPU_UPDATE(device='[ghost_points]') + call nvtxEndRange call s_compute_image_points(ghost_points) $:GPU_UPDATE(device='[ghost_points]') @@ -1022,6 +1033,8 @@ contains call s_compute_interpolation_coeffs(ghost_points) $:GPU_UPDATE(device='[ghost_points]') + call nvtxEndRange + end subroutine s_update_mib ! compute the surface integrals of the IB via a volume integraion method described in diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index 94ea98bf8c..f6eb0b530d 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -802,6 +802,8 @@ contains integer :: i logical :: forces_computed + call nvtxStartRange("PROPAGATE-IMERSED-BOUNDARIES") + forces_computed = .false. do i = 1, num_ibs @@ -849,6 +851,8 @@ contains call s_update_mib(num_ibs) + call nvtxEndRange + end subroutine s_propagate_immersed_boundaries !> This subroutine saves the temporary q_prim_vf vector diff --git a/toolchain/mfc/case_validator.py b/toolchain/mfc/case_validator.py index 2c29c75e7f..9b071e9e66 100644 --- a/toolchain/mfc/case_validator.py +++ b/toolchain/mfc/case_validator.py @@ -684,7 +684,7 @@ def check_ibm(self): self.prohibit(ib and n <= 0, "Immersed Boundaries do not work in 1D (requires n > 0)") - self.prohibit(ib and (num_ibs <= 0 or num_ibs > 10), + self.prohibit(ib and (num_ibs <= 0 or num_ibs > 1000), "num_ibs must be between 1 and num_patches_max (10)") self.prohibit(not ib and num_ibs > 0, "num_ibs is set, but ib is not enabled") diff --git a/toolchain/mfc/params/definitions.py b/toolchain/mfc/params/definitions.py index d44249f664..7c456b36fc 100644 --- a/toolchain/mfc/params/definitions.py +++ b/toolchain/mfc/params/definitions.py @@ -11,7 +11,7 @@ from .registry import REGISTRY # Index limits -NP, NF, NI, NA, NPR, NB = 10, 10, 10, 4, 10, 10 # patches, fluids, ibs, acoustic, probes, bc_patches +NP, NF, NI, NA, NPR, NB = 10, 10, 1000, 4, 10, 10 # patches, fluids, ibs, acoustic, probes, bc_patches # ============================================================================= @@ -672,7 +672,7 @@ def get_value_label(param_name: str, value: int) -> str: # Counts (must be positive) "num_fluids": {"min": 1, "max": 10}, "num_patches": {"min": 0, "max": 10}, - "num_ibs": {"min": 0, "max": 10}, + "num_ibs": {"min": 0, "max": 1000}, "num_source": {"min": 1}, "num_probes": {"min": 1}, "num_integrals": {"min": 1}, From 5d885bbcfecd3db17b4acd9eb0a4ef14be6f0606 Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Mon, 16 Feb 2026 14:31:49 -0500 Subject: [PATCH 03/22] Performance tuning complted for marker generation --- src/simulation/m_ib_patches.fpp | 114 +++++++++++++++++++++----------- src/simulation/m_ibm.fpp | 11 ++- 2 files changed, 81 insertions(+), 44 deletions(-) diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index 0509593918..c0ab727dd7 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -85,7 +85,7 @@ contains call s_ib_3D_airfoil(i, ib_markers) ! STL+IBM patch elseif (patch_ib(i)%geometry == 12) then - call s_ib_model(i, ib_markers) + call s_ib_3d_model(i, ib_markers) end if end do !> @} @@ -274,8 +274,8 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j]',& & copyin='[patch_id,center,radius]', collapse=2) - do j = 0, n - do i = 0, m + do j = -gp_layers, n+gp_layers + do i = -gp_layers, m+gp_layers if ((x_cc(i) - center(1))**2 & + (y_cc(j) - center(2))**2 <= radius**2) & then @@ -378,8 +378,8 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,f]', & & copyin='[patch_id,center,inverse_rotation,offset,ma,ca_in,airfoil_grid_u,airfoil_grid_l]', collapse=2) - do j = 0, n - do i = 0, m + do j = -gp_layers, n+gp_layers + do i = -gp_layers, m+gp_layers xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] ! get coordinate frame centered on IB xy_local = matmul(inverse_rotation, xy_local) ! rotate the frame into the IB's coordinates xy_local = xy_local - offset ! airfoils are a patch that require a centroid offset @@ -529,9 +529,9 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,f]',& & copyin='[patch_id,center,inverse_rotation,offset,ma,ca_in,airfoil_grid_u,airfoil_grid_l,z_min,z_max]', collapse=3) - do l = 0, p - do j = 0, n - do i = 0, m + do l = -gp_layers, p+gp_layers + do j = -gp_layers, n+gp_layers + do i = -gp_layers, m+gp_layers xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(l) - center(3)] ! get coordinate frame centered on IB xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates xyz_local = xyz_local - offset ! airfoils are a patch that require a centroid offset @@ -623,8 +623,8 @@ contains ! variables of the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]',& & copyin='[patch_id,center,length,inverse_rotation,x_cc,y_cc]', collapse=2) - do j = 0, n - do i = 0, m + do j = -gp_layers, n+gp_layers + do i = -gp_layers, m+gp_layers ! get the x and y coordinates in the local IB frame xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] xy_local = matmul(inverse_rotation, xy_local) @@ -678,9 +678,9 @@ contains ! the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,cart_y,cart_z]',& & copyin='[patch_id,center,radius]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m + do k = -gp_layers, p+gp_layers + do j = -gp_layers, n+gp_layers + do i = -gp_layers, m+gp_layers if (grid_geometry == 3) then call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) else @@ -734,9 +734,9 @@ contains ! of the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]',& & copyin='[patch_id,center,length,inverse_rotation]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m + do k = -gp_layers, p+gp_layers + do j = -gp_layers, n+gp_layers + do i = -gp_layers, m+gp_layers if (grid_geometry == 3) then ! TODO :: This does not work and is not covered by any tests. This should be fixed @@ -804,9 +804,9 @@ contains ! variables of the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]',& & copyin='[patch_id,center,length,radius,inverse_rotation]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m + do k = -gp_layers, p+gp_layers + do j = -gp_layers, n+gp_layers + do i = -gp_layers, m+gp_layers if (grid_geometry == 3) then call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) @@ -867,8 +867,8 @@ contains ! domain $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]',& & copyin='[patch_id,center,ellipse_coeffs,inverse_rotation,x_cc,y_cc]', collapse=2) - do j = 0, n - do i = 0, m + do j = -gp_layers, n+gp_layers + do i = -gp_layers, m+gp_layers ! get the x and y coordinates in the local IB frame xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] xy_local = matmul(inverse_rotation, xy_local) @@ -884,11 +884,9 @@ contains end subroutine s_ib_ellipse - !> The STL patch is a 2/3D geometry that is imported from an STL file. + !> The STL patch is a 2D geometry that is imported from an STL file. !! @param patch_id is the patch identifier !! @param ib_markers Array to track patch ids - !! @param STL_levelset STL levelset - !! @param STL_levelset_norm STL levelset normals subroutine s_ib_model(patch_id, ib_markers) integer, intent(in) :: patch_id @@ -900,25 +898,69 @@ contains real(wp) :: eta real(wp), dimension(1:3) :: point, local_point, offset - real(wp), dimension(1:3) :: center, xyz_local + real(wp), dimension(1:3) :: center, xy_local real(wp), dimension(1:3, 1:3) :: inverse_rotation model => models(patch_id)%model center = 0._wp center(1) = patch_ib(patch_id)%x_centroid center(2) = patch_ib(patch_id)%y_centroid - if (p > 0) center(3) = patch_ib(patch_id)%z_centroid inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) offset(:) = patch_ib(patch_id)%centroid_offset(:) - do i = 0, m - do j = 0, n - do k = 0, p + do i = 0-gp_layers, m+gp_layers + do j = -gp_layers, n+gp_layers - xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] - if (p > 0) then - xyz_local(3) = z_cc(k) - center(3) + xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] + xy_local = matmul(inverse_rotation, xy_local) + xy_local = xy_local - offset + + if (grid_geometry == 3) then + xy_local = f_convert_cyl_to_cart(xy_local) end if + + eta = f_model_is_inside(model, xy_local, (/dx(i), dy(j), 0._wp/), patch_ib(patch_id)%model_spc) + + ! Reading STL boundary vertices and compute the levelset and levelset_norm + if (eta > patch_ib(patch_id)%model_threshold) then + ib_markers%sf(i, j, 0) = patch_id + end if + + end do + end do + + end subroutine s_ib_model + + !> The STL patch is a 3D geometry that is imported from an STL file. + !! @param patch_id is the patch identifier + !! @param ib_markers Array to track patch ids + subroutine s_ib_3d_model(patch_id, ib_markers) + + integer, intent(in) :: patch_id + type(integer_field), intent(inout) :: ib_markers + + integer :: i, j, k !< Generic loop iterators + + type(t_model), pointer :: model + + real(wp) :: eta + real(wp), dimension(1:3) :: point, local_point, offset + real(wp), dimension(1:3) :: center, xyz_local + real(wp), dimension(1:3, 1:3) :: inverse_rotation + + model => models(patch_id)%model + center = 0._wp + center(1) = patch_ib(patch_id)%x_centroid + center(2) = patch_ib(patch_id)%y_centroid + center(3) = patch_ib(patch_id)%z_centroid + inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) + offset(:) = patch_ib(patch_id)%centroid_offset(:) + + do i = 0-gp_layers, m+gp_layers + do j = -gp_layers, n+gp_layers + do k = -gp_layers, p+gp_layers + + xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(k) - center(3)] xyz_local = matmul(inverse_rotation, xyz_local) xyz_local = xyz_local - offset @@ -926,11 +968,7 @@ contains xyz_local = f_convert_cyl_to_cart(xyz_local) end if - if (p == 0) then - eta = f_model_is_inside(model, xyz_local, (/dx(i), dy(j), 0._wp/), patch_ib(patch_id)%model_spc) - else - eta = f_model_is_inside(model, xyz_local, (/dx(i), dy(j), dz(k)/), patch_ib(patch_id)%model_spc) - end if + eta = f_model_is_inside(model, xyz_local, (/dx(i), dy(j), dz(k)/), patch_ib(patch_id)%model_spc) ! Reading STL boundary vertices and compute the levelset and levelset_norm if (eta > patch_ib(patch_id)%model_threshold) then @@ -941,7 +979,7 @@ contains end do end do - end subroutine s_ib_model + end subroutine s_ib_3d_model !> Subroutine that computes a rotation matrix for converting to the rotating frame of the boundary subroutine s_update_ib_rotation_matrix(patch_id) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index d42d58d782..7c6c609d4a 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -110,7 +110,6 @@ contains ib_markers%sf = 0._wp $:GPU_UPDATE(device='[ib_markers%sf]') call s_apply_ib_patches(ib_markers) - call s_populate_ib_buffers() $:GPU_UPDATE(host='[ib_markers%sf]') do i = 1, num_ibs if (patch_ib(i)%moving_ibm /= 0) call s_compute_centroid_offset(i) ! offsets are computed after IB markers are generated @@ -1008,13 +1007,13 @@ contains $:GPU_UPDATE(device='[patch_ib]') ! recompute the new ib_patch locations and broadcast them. - call nvtxStartRange("COMPUTE-IB-MARKERS") + call nvtxStartRange("APPLY-IB-PATCHES") $:GPU_UPDATE(device='[ib_markers%sf]') call s_apply_ib_patches(ib_markers) - call s_populate_ib_buffers() - $:GPU_UPDATE(host='[ib_markers%sf]') call nvtxEndRange + call nvtxStartRange("COMPUTE-GHOST-POINTS") + ! recalculate the ghost point locations and coefficients call s_find_num_ghost_points(num_gps, num_inner_gps) $:GPU_UPDATE(device='[num_gps, num_inner_gps]') @@ -1022,10 +1021,10 @@ contains call s_find_ghost_points(ghost_points, inner_points) $:GPU_UPDATE(device='[ghost_points, inner_points]') - call nvtxStartRange("APPLY-LEVELSET") + call nvtxEndRange + call s_apply_levelset(ghost_points, num_gps) $:GPU_UPDATE(device='[ghost_points]') - call nvtxEndRange call s_compute_image_points(ghost_points) $:GPU_UPDATE(device='[ghost_points]') From 85889c059443a20d7eccf19ca39c05819cee2691 Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Mon, 16 Feb 2026 16:34:21 -0500 Subject: [PATCH 04/22] Bindary search for IB index region beginning for reduced IB marker compute --- src/simulation/m_ib_patches.fpp | 58 +++++++++++++++++++++++++++++++-- src/simulation/m_ibm.fpp | 7 +++- 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index c0ab727dd7..fd8e22f574 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -659,6 +659,7 @@ contains ! Generic loop iterators integer :: i, j, k + integer :: il, ir, jl, jr, kl, kr real(wp) :: radius real(wp), dimension(1:3) :: center @@ -672,15 +673,26 @@ contains center(3) = patch_ib(patch_id)%z_centroid radius = patch_ib(patch_id)%radius + il = -gp_layers + jl = -gp_layers + kl = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + kr = p + gp_layers + call get_bounding_indices(center(1) - radius, center(1) + radius, x_cc, il, ir) + call get_bounding_indices(center(2) - radius, center(2) + radius, y_cc, jl, jr) + call get_bounding_indices(center(3) - radius, center(3) + radius, z_cc, kl, kr) + ! Checking whether the sphere covers a particular cell in the domain ! and verifying whether the current patch has permission to write to ! that cell. If both queries check out, the primitive variables of ! the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,cart_y,cart_z]',& & copyin='[patch_id,center,radius]', collapse=3) - do k = -gp_layers, p+gp_layers - do j = -gp_layers, n+gp_layers - do i = -gp_layers, m+gp_layers + do k = kl-1, kr+1 + do j = jl-1, jr+1 + do i = il-1, ir+1 + ! do i = -gp_layers, m+gp_layers if (grid_geometry == 3) then call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) else @@ -1058,6 +1070,46 @@ contains end subroutine s_convert_cylindrical_to_spherical_coord + subroutine get_bounding_indices(left_bound, right_bound, cell_centers, left_index, right_index) + + real(wp), intent(in) :: left_bound, right_bound + integer, intent(inout) :: left_index, right_index + real(wp), dimension(-buff_size:), intent(in) :: cell_centers + + integer :: itr_left, itr_middle, itr_right + + itr_left = left_index + itr_right = right_index + + do while (itr_left + 1 < itr_right) + itr_middle = (itr_left + itr_right) / 2 + if (cell_centers(itr_middle) < left_bound) then + itr_left = itr_middle + else if (cell_centers(itr_middle) > left_bound) then + itr_right = itr_middle + else + itr_left = itr_middle + exit + end if + end do + left_index = itr_left + + itr_right = right_index + do while (itr_left + 1 < itr_right) + itr_middle = (itr_left + itr_right) / 2 + if (cell_centers(itr_middle) < right_bound) then + itr_left = itr_middle + else if (cell_centers(itr_middle) > right_bound) then + itr_right = itr_middle + else + itr_right = itr_middle + exit + end if + end do + right_index = itr_right + + end subroutine get_bounding_indices + !> Archimedes spiral function !! @param myth Angle !! @param offset Thickness diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 7c6c609d4a..71d4b12768 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -992,7 +992,7 @@ contains integer :: i, ierr - call nvtxStartRange("UPDATE-MIB") + call nvtxStartRange("UPDATE-MIBM") ! Clears the existing immersed boundary indices ib_markers%sf = 0._wp @@ -1057,6 +1057,9 @@ contains #:else real(wp), dimension(num_fluids) :: dynamic_viscosities #:endif + + call nvtxStartRange("COMPUTE-IB-FORCES") + forces = 0._wp torques = 0._wp @@ -1187,6 +1190,8 @@ contains patch_ib(i)%torque(:) = matmul(patch_ib(i)%rotation_matrix_inverse, torques(i, :)) ! torques must be converted to the local coordinates of the IB end do + call nvtxEndRange + end subroutine s_compute_ib_forces !> Subroutine to deallocate memory reserved for the IBM module From f0085c93bd8cf4dae3c7460cb7a2d338d04d86e1 Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Tue, 17 Feb 2026 09:15:25 -0500 Subject: [PATCH 05/22] ghost points are now computed on the GPU --- src/common/include/parallel_macros.fpp | 10 ++ src/simulation/m_compute_levelset.fpp | 4 - src/simulation/m_ib_patches.fpp | 95 +++++------ src/simulation/m_ibm.fpp | 222 +++++++++++-------------- 4 files changed, 155 insertions(+), 176 deletions(-) diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index bfe4b3beaf..c5ad5c1fb3 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -174,6 +174,16 @@ #endif #:enddef +#:def END_GPU_ATOMIC_CAPTURE() + #:set acc_end_directive = '!$acc end atomic' + #:set omp_end_directive = '!$omp end atomic' +#if defined(MFC_OpenACC) + $:acc_end_directive +#elif defined(MFC_OpenMP) + $:omp_end_directive +#endif +#:enddef + #:def GPU_UPDATE(host=None, device=None, extraAccArgs=None, extraOmpArgs=None) #:set acc_code = ACC_UPDATE(host=host, device=device, extraAccArgs=extraAccArgs) #:set omp_code = OMP_UPDATE(host=host, device=device, extraOmpArgs=extraOmpArgs) diff --git a/src/simulation/m_compute_levelset.fpp b/src/simulation/m_compute_levelset.fpp index 69bcd4e8b5..ef3333745b 100644 --- a/src/simulation/m_compute_levelset.fpp +++ b/src/simulation/m_compute_levelset.fpp @@ -33,8 +33,6 @@ contains integer :: i, patch_id, patch_geometry - $:GPU_UPDATE(device='[gps(1:num_gps)]') - ! 3D Patch Geometries if (p > 0) then @@ -92,8 +90,6 @@ contains end if end do - $:GPU_UPDATE(host='[gps(1:num_gps)]') - end subroutine s_apply_levelset subroutine s_circle_levelset(gp) diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index fd8e22f574..d662bff389 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -274,8 +274,8 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j]',& & copyin='[patch_id,center,radius]', collapse=2) - do j = -gp_layers, n+gp_layers - do i = -gp_layers, m+gp_layers + do j = -gp_layers, n + gp_layers + do i = -gp_layers, m + gp_layers if ((x_cc(i) - center(1))**2 & + (y_cc(j) - center(2))**2 <= radius**2) & then @@ -378,8 +378,8 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,f]', & & copyin='[patch_id,center,inverse_rotation,offset,ma,ca_in,airfoil_grid_u,airfoil_grid_l]', collapse=2) - do j = -gp_layers, n+gp_layers - do i = -gp_layers, m+gp_layers + do j = -gp_layers, n + gp_layers + do i = -gp_layers, m + gp_layers xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] ! get coordinate frame centered on IB xy_local = matmul(inverse_rotation, xy_local) ! rotate the frame into the IB's coordinates xy_local = xy_local - offset ! airfoils are a patch that require a centroid offset @@ -529,9 +529,9 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,f]',& & copyin='[patch_id,center,inverse_rotation,offset,ma,ca_in,airfoil_grid_u,airfoil_grid_l,z_min,z_max]', collapse=3) - do l = -gp_layers, p+gp_layers - do j = -gp_layers, n+gp_layers - do i = -gp_layers, m+gp_layers + do l = -gp_layers, p + gp_layers + do j = -gp_layers, n + gp_layers + do i = -gp_layers, m + gp_layers xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(l) - center(3)] ! get coordinate frame centered on IB xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates xyz_local = xyz_local - offset ! airfoils are a patch that require a centroid offset @@ -623,8 +623,8 @@ contains ! variables of the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]',& & copyin='[patch_id,center,length,inverse_rotation,x_cc,y_cc]', collapse=2) - do j = -gp_layers, n+gp_layers - do i = -gp_layers, m+gp_layers + do j = -gp_layers, n + gp_layers + do i = -gp_layers, m + gp_layers ! get the x and y coordinates in the local IB frame xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] xy_local = matmul(inverse_rotation, xy_local) @@ -673,6 +673,7 @@ contains center(3) = patch_ib(patch_id)%z_centroid radius = patch_ib(patch_id)%radius + ! find the indices to the left and right of the IB in i, j, k il = -gp_layers jl = -gp_layers kl = -gp_layers @@ -689,10 +690,10 @@ contains ! the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,cart_y,cart_z]',& & copyin='[patch_id,center,radius]', collapse=3) - do k = kl-1, kr+1 - do j = jl-1, jr+1 - do i = il-1, ir+1 - ! do i = -gp_layers, m+gp_layers + do k = kl - 1, kr + 1 + do j = jl - 1, jr + 1 + do i = il - 1, ir + 1 + ! do i = -gp_layers, m+gp_layers if (grid_geometry == 3) then call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) else @@ -746,9 +747,9 @@ contains ! of the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]',& & copyin='[patch_id,center,length,inverse_rotation]', collapse=3) - do k = -gp_layers, p+gp_layers - do j = -gp_layers, n+gp_layers - do i = -gp_layers, m+gp_layers + do k = -gp_layers, p + gp_layers + do j = -gp_layers, n + gp_layers + do i = -gp_layers, m + gp_layers if (grid_geometry == 3) then ! TODO :: This does not work and is not covered by any tests. This should be fixed @@ -816,9 +817,9 @@ contains ! variables of the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]',& & copyin='[patch_id,center,length,radius,inverse_rotation]', collapse=3) - do k = -gp_layers, p+gp_layers - do j = -gp_layers, n+gp_layers - do i = -gp_layers, m+gp_layers + do k = -gp_layers, p + gp_layers + do j = -gp_layers, n + gp_layers + do i = -gp_layers, m + gp_layers if (grid_geometry == 3) then call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) @@ -879,8 +880,8 @@ contains ! domain $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]',& & copyin='[patch_id,center,ellipse_coeffs,inverse_rotation,x_cc,y_cc]', collapse=2) - do j = -gp_layers, n+gp_layers - do i = -gp_layers, m+gp_layers + do j = -gp_layers, n + gp_layers + do i = -gp_layers, m + gp_layers ! get the x and y coordinates in the local IB frame xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] xy_local = matmul(inverse_rotation, xy_local) @@ -920,23 +921,23 @@ contains inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) offset(:) = patch_ib(patch_id)%centroid_offset(:) - do i = 0-gp_layers, m+gp_layers - do j = -gp_layers, n+gp_layers + do i = 0 - gp_layers, m + gp_layers + do j = -gp_layers, n + gp_layers - xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] - xy_local = matmul(inverse_rotation, xy_local) - xy_local = xy_local - offset + xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] + xy_local = matmul(inverse_rotation, xy_local) + xy_local = xy_local - offset - if (grid_geometry == 3) then - xy_local = f_convert_cyl_to_cart(xy_local) - end if + if (grid_geometry == 3) then + xy_local = f_convert_cyl_to_cart(xy_local) + end if - eta = f_model_is_inside(model, xy_local, (/dx(i), dy(j), 0._wp/), patch_ib(patch_id)%model_spc) + eta = f_model_is_inside(model, xy_local, (/dx(i), dy(j), 0._wp/), patch_ib(patch_id)%model_spc) - ! Reading STL boundary vertices and compute the levelset and levelset_norm - if (eta > patch_ib(patch_id)%model_threshold) then - ib_markers%sf(i, j, 0) = patch_id - end if + ! Reading STL boundary vertices and compute the levelset and levelset_norm + if (eta > patch_ib(patch_id)%model_threshold) then + ib_markers%sf(i, j, 0) = patch_id + end if end do end do @@ -968,9 +969,9 @@ contains inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) offset(:) = patch_ib(patch_id)%centroid_offset(:) - do i = 0-gp_layers, m+gp_layers - do j = -gp_layers, n+gp_layers - do k = -gp_layers, p+gp_layers + do i = 0 - gp_layers, m + gp_layers + do j = -gp_layers, n + gp_layers + do k = -gp_layers, p + gp_layers xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(k) - center(3)] xyz_local = matmul(inverse_rotation, xyz_local) @@ -1072,21 +1073,21 @@ contains subroutine get_bounding_indices(left_bound, right_bound, cell_centers, left_index, right_index) - real(wp), intent(in) :: left_bound, right_bound - integer, intent(inout) :: left_index, right_index - real(wp), dimension(-buff_size:), intent(in) :: cell_centers + real(wp), intent(in) :: left_bound, right_bound + integer, intent(inout) :: left_index, right_index + real(wp), dimension(-buff_size:), intent(in) :: cell_centers - integer :: itr_left, itr_middle, itr_right + integer :: itr_left, itr_middle, itr_right itr_left = left_index itr_right = right_index do while (itr_left + 1 < itr_right) - itr_middle = (itr_left + itr_right) / 2 + itr_middle = (itr_left + itr_right)/2 if (cell_centers(itr_middle) < left_bound) then - itr_left = itr_middle + itr_left = itr_middle else if (cell_centers(itr_middle) > left_bound) then - itr_right = itr_middle + itr_right = itr_middle else itr_left = itr_middle exit @@ -1096,11 +1097,11 @@ contains itr_right = right_index do while (itr_left + 1 < itr_right) - itr_middle = (itr_left + itr_right) / 2 + itr_middle = (itr_left + itr_right)/2 if (cell_centers(itr_middle) < right_bound) then - itr_left = itr_middle + itr_left = itr_middle else if (cell_centers(itr_middle) > right_bound) then - itr_right = itr_middle + itr_right = itr_middle else itr_right = itr_middle exit diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 71d4b12768..9267ed668a 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -129,12 +129,9 @@ contains @:ALLOCATE(inner_points(1:int((max_num_gps + max_num_inner_gps) * 2.0))) $:GPU_ENTER_DATA(copyin='[ghost_points,inner_points]') - call s_find_ghost_points(ghost_points, inner_points) - $:GPU_UPDATE(device='[ghost_points, inner_points]') - call s_apply_levelset(ghost_points, num_gps) - $:GPU_UPDATE(device='[ghost_points]') + $:GPU_UPDATE(host='[ghost_points, inner_points]') call s_compute_image_points(ghost_points) $:GPU_UPDATE(device='[ghost_points]') @@ -534,52 +531,55 @@ contains end subroutine s_compute_image_points - !> Function that finds the number of ghost points, used for allocating + !> Subroutine that finds the number of ghost points, used for allocating !! memory. subroutine s_find_num_ghost_points(num_gps_out, num_inner_gps_out) integer, intent(out) :: num_gps_out integer, intent(out) :: num_inner_gps_out - integer, dimension(2*gp_layers + 1, 2*gp_layers + 1) & - :: subsection_2D - integer, dimension(2*gp_layers + 1, 2*gp_layers + 1, 2*gp_layers + 1) & - :: subsection_3D - integer :: i, j, k!< Iterator variables + integer :: i, j, k, ii, jj, kk, gp_layers_z !< Iterator variables + integer :: num_gps_local, num_inner_gps_local !< local copies of the gp count to support GPU compute + logical :: is_gp - num_gps_out = 0 - num_inner_gps_out = 0 + num_gps_local = 0 + num_inner_gps_local = 0 + gp_layers_z = gp_layers + if (p == 0) gp_layers_z = 0 + $:GPU_PARALLEL_LOOP(private='[i,j,k,ii,jj,kk,is_gp]', copy='[num_gps_local,num_inner_gps_local]', firstprivate='[gp_layers,gp_layers_z]', collapse=3) do i = 0, m do j = 0, n - if (p == 0) then - if (ib_markers%sf(i, j, 0) /= 0) then - subsection_2D = ib_markers%sf( & - i - gp_layers:i + gp_layers, & - j - gp_layers:j + gp_layers, 0) - if (any(subsection_2D == 0)) then - num_gps_out = num_gps_out + 1 + do k = 0, p + if (ib_markers%sf(i, j, k) /= 0) then + is_gp = .false. + marker_search: do ii = i - gp_layers, i + gp_layers + do jj = j - gp_layers, j + gp_layers + do kk = k - gp_layers_z, k + gp_layers_z + if (ib_markers%sf(ii, jj, kk) == 0) then + ! if any neighbors are not in the IB, it is a ghost point + is_gp = .true. + exit marker_search + end if + end do + end do + end do marker_search + + if (is_gp) then + $:GPU_ATOMIC(atomic='update') + num_gps_local = num_gps_local + 1 else - num_inner_gps_out = num_inner_gps_out + 1 + $:GPU_ATOMIC(atomic='update') + num_inner_gps_local = num_inner_gps_local + 1 end if end if - else - do k = 0, p - if (ib_markers%sf(i, j, k) /= 0) then - subsection_3D = ib_markers%sf( & - i - gp_layers:i + gp_layers, & - j - gp_layers:j + gp_layers, & - k - gp_layers:k + gp_layers) - if (any(subsection_3D == 0)) then - num_gps_out = num_gps_out + 1 - else - num_inner_gps_out = num_inner_gps_out + 1 - end if - end if - end do - end if + end do end do end do + $:END_GPU_PARALLEL_LOOP() + + num_gps_out = num_gps_local + num_inner_gps_out = num_inner_gps_local end subroutine s_find_num_ghost_points @@ -588,116 +588,89 @@ contains type(ghost_point), dimension(num_gps), intent(INOUT) :: ghost_points_in type(ghost_point), dimension(num_inner_gps), intent(INOUT) :: inner_points_in - integer, dimension(2*gp_layers + 1, 2*gp_layers + 1) & - :: subsection_2D - integer, dimension(2*gp_layers + 1, 2*gp_layers + 1, 2*gp_layers + 1) & - :: subsection_3D - integer :: i, j, k !< Iterator variables - integer :: count, count_i + integer :: i, j, k, ii, jj, kk, gp_layers_z !< Iterator variables + integer :: count, count_i, local_idx integer :: patch_id + logical :: is_gp - count = 1 - count_i = 1 + count = 0 + count_i = 0 + gp_layers_z = gp_layers + if (p == 0) gp_layers_z = 0 + $:GPU_PARALLEL_LOOP(private='[i,j,k,ii,jj,kk,is_gp,local_idx]', copyin='[count,count_i, x_domain, y_domain, z_domain]', firstprivate='[gp_layers,gp_layers_z]', collapse=3) do i = 0, m do j = 0, n - if (p == 0) then - ! 2D - if (ib_markers%sf(i, j, 0) /= 0) then - subsection_2D = ib_markers%sf( & - i - gp_layers:i + gp_layers, & - j - gp_layers:j + gp_layers, 0) - if (any(subsection_2D == 0)) then - ghost_points_in(count)%loc = [i, j, 0] - patch_id = ib_markers%sf(i, j, 0) - ghost_points_in(count)%ib_patch_id = & - patch_id - - ghost_points_in(count)%slip = patch_ib(patch_id)%slip - ! ghost_points(count)%rank = proc_rank + do k = 0, p + if (ib_markers%sf(i, j, k) /= 0) then + is_gp = .false. + marker_search: do ii = i - gp_layers, i + gp_layers + do jj = j - gp_layers, j + gp_layers + do kk = k - gp_layers_z, k + gp_layers_z + if (ib_markers%sf(ii, jj, kk) == 0) then + ! if any neighbors are not in the IB, it is a ghost point + is_gp = .true. + exit marker_search + end if + end do + end do + end do marker_search + + if (is_gp) then + $:GPU_ATOMIC(atomic='capture') + count = count + 1 + local_idx = count + $:END_GPU_ATOMIC_CAPTURE() + + ghost_points_in(local_idx)%loc = [i, j, k] + patch_id = ib_markers%sf(i, j, k) + ghost_points_in(local_idx)%ib_patch_id = patch_id + + ghost_points_in(local_idx)%slip = patch_ib(patch_id)%slip if ((x_cc(i) - dx(i)) < x_domain%beg) then - ghost_points_in(count)%DB(1) = -1 + ghost_points_in(local_idx)%DB(1) = -1 else if ((x_cc(i) + dx(i)) > x_domain%end) then - ghost_points_in(count)%DB(1) = 1 + ghost_points_in(local_idx)%DB(1) = 1 else - ghost_points_in(count)%DB(1) = 0 + ghost_points_in(local_idx)%DB(1) = 0 end if if ((y_cc(j) - dy(j)) < y_domain%beg) then - ghost_points_in(count)%DB(2) = -1 + ghost_points_in(local_idx)%DB(2) = -1 else if ((y_cc(j) + dy(j)) > y_domain%end) then - ghost_points_in(count)%DB(2) = 1 + ghost_points_in(local_idx)%DB(2) = 1 else - ghost_points_in(count)%DB(2) = 0 + ghost_points_in(local_idx)%DB(2) = 0 end if - count = count + 1 - - else - inner_points_in(count_i)%loc = [i, j, 0] - patch_id = ib_markers%sf(i, j, 0) - inner_points_in(count_i)%ib_patch_id = & - patch_id - inner_points_in(count_i)%slip = patch_ib(patch_id)%slip - count_i = count_i + 1 - - end if - end if - else - ! 3D - do k = 0, p - if (ib_markers%sf(i, j, k) /= 0) then - subsection_3D = ib_markers%sf( & - i - gp_layers:i + gp_layers, & - j - gp_layers:j + gp_layers, & - k - gp_layers:k + gp_layers) - if (any(subsection_3D == 0)) then - ghost_points_in(count)%loc = [i, j, k] - patch_id = ib_markers%sf(i, j, k) - ghost_points_in(count)%ib_patch_id = & - ib_markers%sf(i, j, k) - ghost_points_in(count)%slip = patch_ib(patch_id)%slip - - if ((x_cc(i) - dx(i)) < x_domain%beg) then - ghost_points_in(count)%DB(1) = -1 - else if ((x_cc(i) + dx(i)) > x_domain%end) then - ghost_points_in(count)%DB(1) = 1 - else - ghost_points_in(count)%DB(1) = 0 - end if - - if ((y_cc(j) - dy(j)) < y_domain%beg) then - ghost_points_in(count)%DB(2) = -1 - else if ((y_cc(j) + dy(j)) > y_domain%end) then - ghost_points_in(count)%DB(2) = 1 - else - ghost_points_in(count)%DB(2) = 0 - end if - + if (p /= 0) then if ((z_cc(k) - dz(k)) < z_domain%beg) then - ghost_points_in(count)%DB(3) = -1 + ghost_points_in(local_idx)%DB(3) = -1 else if ((z_cc(k) + dz(k)) > z_domain%end) then - ghost_points_in(count)%DB(3) = 1 + ghost_points_in(local_idx)%DB(3) = 1 else - ghost_points_in(count)%DB(3) = 0 + ghost_points_in(local_idx)%DB(3) = 0 end if + end if - count = count + 1 - else - inner_points_in(count_i)%loc = [i, j, k] - patch_id = ib_markers%sf(i, j, k) - inner_points_in(count_i)%ib_patch_id = & - ib_markers%sf(i, j, k) - inner_points_in(count_i)%slip = patch_ib(patch_id)%slip + else + $:GPU_ATOMIC(atomic='capture') + count_i = count_i + 1 + local_idx = count_i + $:END_GPU_ATOMIC_CAPTURE() + + inner_points_in(local_idx)%loc = [i, j, k] + patch_id = ib_markers%sf(i, j, k) + inner_points_in(local_idx)%ib_patch_id = patch_id + inner_points_in(local_idx)%slip = patch_ib(patch_id)%slip - count_i = count_i + 1 - end if end if - end do - end if + end if + end do end do end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_find_ghost_points @@ -1013,24 +986,23 @@ contains call nvtxEndRange call nvtxStartRange("COMPUTE-GHOST-POINTS") - ! recalculate the ghost point locations and coefficients call s_find_num_ghost_points(num_gps, num_inner_gps) $:GPU_UPDATE(device='[num_gps, num_inner_gps]') call s_find_ghost_points(ghost_points, inner_points) - $:GPU_UPDATE(device='[ghost_points, inner_points]') - call nvtxEndRange - + + call nvtxStartRange("COMPUTE-IMAGE-POINTS") call s_apply_levelset(ghost_points, num_gps) - $:GPU_UPDATE(device='[ghost_points]') + $:GPU_UPDATE(host='[ghost_points, inner_points]') call s_compute_image_points(ghost_points) $:GPU_UPDATE(device='[ghost_points]') call s_compute_interpolation_coeffs(ghost_points) $:GPU_UPDATE(device='[ghost_points]') + call nvtxEndRange call nvtxEndRange From bfcc5935eb6953ab2e66d4ab6c3f06465ca31d71 Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Tue, 17 Feb 2026 10:58:11 -0500 Subject: [PATCH 06/22] image points computed on the GPU for x4 performance in that subroutine --- src/simulation/m_ibm.fpp | 209 +++++++++++++++------------------------ 1 file changed, 80 insertions(+), 129 deletions(-) diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 9267ed668a..01cdc632c6 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -131,13 +131,9 @@ contains $:GPU_ENTER_DATA(copyin='[ghost_points,inner_points]') call s_find_ghost_points(ghost_points, inner_points) call s_apply_levelset(ghost_points, num_gps) - $:GPU_UPDATE(host='[ghost_points, inner_points]') call s_compute_image_points(ghost_points) - $:GPU_UPDATE(device='[ghost_points]') - call s_compute_interpolation_coeffs(ghost_points) - $:GPU_UPDATE(device='[ghost_points]') call nvtxEndRange @@ -458,6 +454,7 @@ contains integer :: dir integer :: index + $:GPU_PARALLEL_LOOP(private='[q,gp,i,j,k,physical_loc,patch_id,dist,norm,dim,bound,dir,index,temp_loc,s_cc]') do q = 1, num_gps gp = ghost_points_in(q) i = gp%loc(1) @@ -507,6 +504,7 @@ contains do while ((temp_loc < s_cc(index) & .or. temp_loc > s_cc(index + 1))) index = index + dir +#if !defined(MFC_OpenACC) && !defined(MFC_OpenMP) if (index < -buff_size .or. index > bound) then print *, "A required image point is not located in this computational domain." print *, "Ghost Point is located at ", [x_cc(i), y_cc(j), z_cc(k)], " while moving in dimension ", dim @@ -518,6 +516,7 @@ contains print *, "A short term fix may include increasing buff_size further in m_helper_basic (currently set to a minimum of 10)" error stop "Ghost Point and Image Point on Different Processors" end if +#endif end do ghost_points_in(q)%ip_grid(dim) = index if (ghost_points_in(q)%DB(dim) == -1) then @@ -528,6 +527,7 @@ contains end if end do end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_image_points @@ -685,51 +685,74 @@ contains real(wp) :: buf real(wp), dimension(2, 2, 2) :: eta type(ghost_point) :: gp - integer :: i !< Iterator variables - integer :: i1, i2, j1, j2, k1, k2 !< Grid indexes + integer :: q, i, j, k, ii, jj, kk !< Grid indexes and iterators integer :: patch_id + logical is_cell_center - ! 2D - if (p <= 0) then - do i = 1, num_gps - gp = ghost_points_in(i) - ! Get the interpolation points - i1 = gp%ip_grid(1); i2 = i1 + 1 - j1 = gp%ip_grid(2); j2 = j1 + 1 - - dist = 0._wp - buf = 1._wp - dist(1, 1, 1) = sqrt( & - (x_cc(i1) - gp%ip_loc(1))**2 + & - (y_cc(j1) - gp%ip_loc(2))**2) - dist(2, 1, 1) = sqrt( & - (x_cc(i2) - gp%ip_loc(1))**2 + & - (y_cc(j1) - gp%ip_loc(2))**2) - dist(1, 2, 1) = sqrt( & - (x_cc(i1) - gp%ip_loc(1))**2 + & - (y_cc(j2) - gp%ip_loc(2))**2) - dist(2, 2, 1) = sqrt( & - (x_cc(i2) - gp%ip_loc(1))**2 + & - (y_cc(j2) - gp%ip_loc(2))**2) - - interp_coeffs = 0._wp - - if (dist(1, 1, 1) <= 1.e-16_wp) then - interp_coeffs(1, 1, 1) = 1._wp - else if (dist(2, 1, 1) <= 1.e-16_wp) then - interp_coeffs(2, 1, 1) = 1._wp - else if (dist(1, 2, 1) <= 1.e-16_wp) then - interp_coeffs(1, 2, 1) = 1._wp - else if (dist(2, 2, 1) <= 1.e-16_wp) then - interp_coeffs(2, 2, 1) = 1._wp - else + $:GPU_PARALLEL_LOOP(private='[q,i,j,k,ii,jj,kk,dist,buf,gp,interp_coeffs,eta,alpha,patch_id,is_cell_center]') + do q = 1, num_gps + gp = ghost_points_in(q) + ! Get the interpolation points + i = gp%ip_grid(1) + j = gp%ip_grid(2) + if (p /= 0) then + k = gp%ip_grid(3) + else + k = 0; + end if + + ! get the distance to a cell in each direction + dist = 0._wp + buf = 1._wp + do ii = 0, 1 + do jj = 0, 1 + if (p == 0) then + dist(1 + ii, 1 + jj, 1) = sqrt( & + (x_cc(i + ii) - gp%ip_loc(1))**2 + & + (y_cc(j + jj) - gp%ip_loc(2))**2) + else + do kk = 0, 1 + dist(1 + ii, 1 + jj, 1 + kk) = sqrt( & + (x_cc(i + ii) - gp%ip_loc(1))**2 + & + (y_cc(j + jj) - gp%ip_loc(2))**2 + & + (z_cc(k + kk) - gp%ip_loc(3))**2) + end do + end if + end do + end do + + ! check if we are arbitrarily close to a cell center + interp_coeffs = 0._wp + is_cell_center = .false. + check_is_cell_center: do ii = 0, 1 + do jj = 0, 1 + if (dist(ii + 1, jj + 1, 1) <= 1.e-16_wp) then + interp_coeffs(ii + 1, jj + 1, 1) = 1._wp + is_cell_center = .true. + exit check_is_cell_center + else + if (p /= 0) then + if (dist(ii + 1, jj + 1, 2) <= 1.e-16_wp) then + interp_coeffs(ii + 1, jj + 1, 2) = 1._wp + is_cell_center = .true. + exit check_is_cell_center + end if + end if + end if + end do + end do check_is_cell_center + + if (.not. is_cell_center) then + ! if we are not arbitrarily close, interpolate + alpha = 1._wp + patch_id = gp%ib_patch_id + if (ib_markers%sf(i, j, k) /= 0) alpha(1, 1, 1) = 0._wp + if (ib_markers%sf(i + 1, j, k) /= 0) alpha(2, 1, 1) = 0._wp + if (ib_markers%sf(i, j + 1, k) /= 0) alpha(1, 2, 1) = 0._wp + if (ib_markers%sf(i + 1, j + 1, k) /= 0) alpha(2, 2, 1) = 0._wp + + if (p == 0) then eta(:, :, 1) = 1._wp/dist(:, :, 1)**2 - alpha = 1._wp - patch_id = gp%ib_patch_id - if (ib_markers%sf(i1, j1, 0) /= 0) alpha(1, 1, 1) = 0._wp - if (ib_markers%sf(i2, j1, 0) /= 0) alpha(2, 1, 1) = 0._wp - if (ib_markers%sf(i1, j2, 0) /= 0) alpha(1, 2, 1) = 0._wp - if (ib_markers%sf(i2, j2, 0) /= 0) alpha(2, 2, 1) = 0._wp buf = sum(alpha(:, :, 1)*eta(:, :, 1)) if (buf > 0._wp) then interp_coeffs(:, :, 1) = alpha(:, :, 1)*eta(:, :, 1)/buf @@ -737,82 +760,15 @@ contains buf = sum(eta(:, :, 1)) interp_coeffs(:, :, 1) = eta(:, :, 1)/buf end if - end if - - ghost_points_in(i)%interp_coeffs = interp_coeffs - end do - - else - do i = 1, num_gps - gp = ghost_points_in(i) - ! Get the interpolation points - i1 = gp%ip_grid(1); i2 = i1 + 1 - j1 = gp%ip_grid(2); j2 = j1 + 1 - k1 = gp%ip_grid(3); k2 = k1 + 1 - - ! Get interpolation weights (Chaudhuri et al. 2011, JCP) - dist(1, 1, 1) = sqrt( & - (x_cc(i1) - gp%ip_loc(1))**2 + & - (y_cc(j1) - gp%ip_loc(2))**2 + & - (z_cc(k1) - gp%ip_loc(3))**2) - dist(2, 1, 1) = sqrt( & - (x_cc(i2) - gp%ip_loc(1))**2 + & - (y_cc(j1) - gp%ip_loc(2))**2 + & - (z_cc(k1) - gp%ip_loc(3))**2) - dist(1, 2, 1) = sqrt( & - (x_cc(i1) - gp%ip_loc(1))**2 + & - (y_cc(j2) - gp%ip_loc(2))**2 + & - (z_cc(k1) - gp%ip_loc(3))**2) - dist(2, 2, 1) = sqrt( & - (x_cc(i2) - gp%ip_loc(1))**2 + & - (y_cc(j2) - gp%ip_loc(2))**2 + & - (z_cc(k1) - gp%ip_loc(3))**2) - dist(1, 1, 2) = sqrt( & - (x_cc(i1) - gp%ip_loc(1))**2 + & - (y_cc(j1) - gp%ip_loc(2))**2 + & - (z_cc(k2) - gp%ip_loc(3))**2) - dist(2, 1, 2) = sqrt( & - (x_cc(i2) - gp%ip_loc(1))**2 + & - (y_cc(j1) - gp%ip_loc(2))**2 + & - (z_cc(k2) - gp%ip_loc(3))**2) - dist(1, 2, 2) = sqrt( & - (x_cc(i1) - gp%ip_loc(1))**2 + & - (y_cc(j2) - gp%ip_loc(2))**2 + & - (z_cc(k2) - gp%ip_loc(3))**2) - dist(2, 2, 2) = sqrt( & - (x_cc(i2) - gp%ip_loc(1))**2 + & - (y_cc(j2) - gp%ip_loc(2))**2 + & - (z_cc(k2) - gp%ip_loc(3))**2) - interp_coeffs = 0._wp - buf = 1._wp - if (dist(1, 1, 1) <= 1.e-16_wp) then - interp_coeffs(1, 1, 1) = 1._wp - else if (dist(2, 1, 1) <= 1.e-16_wp) then - interp_coeffs(2, 1, 1) = 1._wp - else if (dist(1, 2, 1) <= 1.e-16_wp) then - interp_coeffs(1, 2, 1) = 1._wp - else if (dist(2, 2, 1) <= 1.e-16_wp) then - interp_coeffs(2, 2, 1) = 1._wp - else if (dist(1, 1, 2) <= 1.e-16_wp) then - interp_coeffs(1, 1, 2) = 1._wp - else if (dist(2, 1, 2) <= 1.e-16_wp) then - interp_coeffs(2, 1, 2) = 1._wp - else if (dist(1, 2, 2) <= 1.e-16_wp) then - interp_coeffs(1, 2, 2) = 1._wp - else if (dist(2, 2, 2) <= 1.e-16_wp) then - interp_coeffs(2, 2, 2) = 1._wp else + + if (ib_markers%sf(i, j, k + 1) /= 0) alpha(1, 1, 2) = 0._wp + if (ib_markers%sf(i + 1, j, k + 1) /= 0) alpha(2, 1, 2) = 0._wp + if (ib_markers%sf(i, j + 1, k + 1) /= 0) alpha(1, 2, 2) = 0._wp + if (ib_markers%sf(i + 1, j + 1, k + 1) /= 0) alpha(2, 2, 2) = 0._wp eta = 1._wp/dist**2 - alpha = 1._wp - if (ib_markers%sf(i1, j1, k1) /= 0) alpha(1, 1, 1) = 0._wp - if (ib_markers%sf(i2, j1, k1) /= 0) alpha(2, 1, 1) = 0._wp - if (ib_markers%sf(i1, j2, k1) /= 0) alpha(1, 2, 1) = 0._wp - if (ib_markers%sf(i2, j2, k1) /= 0) alpha(2, 2, 1) = 0._wp - if (ib_markers%sf(i1, j1, k2) /= 0) alpha(1, 1, 2) = 0._wp - if (ib_markers%sf(i2, j1, k2) /= 0) alpha(2, 1, 2) = 0._wp - if (ib_markers%sf(i1, j2, k2) /= 0) alpha(1, 2, 2) = 0._wp - if (ib_markers%sf(i2, j2, k2) /= 0) alpha(2, 2, 2) = 0._wp buf = sum(alpha*eta) + if (buf > 0._wp) then interp_coeffs = alpha*eta/buf else @@ -821,9 +777,11 @@ contains end if end if - ghost_points_in(i)%interp_coeffs = interp_coeffs - end do - end if + end if + + ghost_points_in(q)%interp_coeffs = interp_coeffs + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_interpolation_coeffs @@ -988,20 +946,13 @@ contains call nvtxStartRange("COMPUTE-GHOST-POINTS") ! recalculate the ghost point locations and coefficients call s_find_num_ghost_points(num_gps, num_inner_gps) - $:GPU_UPDATE(device='[num_gps, num_inner_gps]') - call s_find_ghost_points(ghost_points, inner_points) call nvtxEndRange call nvtxStartRange("COMPUTE-IMAGE-POINTS") call s_apply_levelset(ghost_points, num_gps) - $:GPU_UPDATE(host='[ghost_points, inner_points]') - call s_compute_image_points(ghost_points) - $:GPU_UPDATE(device='[ghost_points]') - call s_compute_interpolation_coeffs(ghost_points) - $:GPU_UPDATE(device='[ghost_points]') call nvtxEndRange call nvtxEndRange From 5201ee7a6c3d6f023572eb6db0aa57cf76601915 Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Tue, 17 Feb 2026 11:20:32 -0500 Subject: [PATCH 07/22] Need WAY more parameters in the case file... We should probably do something about that... --- toolchain/mfc/params_tests/test_definitions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/toolchain/mfc/params_tests/test_definitions.py b/toolchain/mfc/params_tests/test_definitions.py index 7b9710bf43..20e0774526 100644 --- a/toolchain/mfc/params_tests/test_definitions.py +++ b/toolchain/mfc/params_tests/test_definitions.py @@ -161,10 +161,10 @@ class TestParameterCounts(unittest.TestCase): """Tests for expected parameter counts.""" def test_total_param_count(self): - """Total parameter count should be around 3400.""" + """Total parameter count should be around 40000.""" count = len(REGISTRY.all_params) - self.assertGreater(count, 3000, "Too few parameters") - self.assertLess(count, 4000, "Too many parameters") + self.assertGreater(count, 39000, f"Too few parameters. Got {count}.") + self.assertLess(count, 41000, f"Too many parameters. Got {count}.") def test_log_params_count(self): """Should have many LOG type parameters.""" From 622edb0c50a688287e6cbdae9f02c735b4ef3933 Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Tue, 17 Feb 2026 11:56:04 -0500 Subject: [PATCH 08/22] Extended the binary search reduction to all 3D IB geometries --- src/simulation/m_ib_patches.fpp | 55 +++++++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 9 deletions(-) diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index d662bff389..5bc294c8fb 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -445,7 +445,7 @@ contains type(integer_field), intent(inout) :: ib_markers real(wp) :: lz, z_max, z_min, f, ca_in, pa, ma, ta, xa, yt, xu, yu, xl, yl, xc, yc, dycdxc, sin_c, cos_c - integer :: i, j, k, l + integer :: i, j, k, l, il, ir, jl, jr, ll, lr integer :: Np1, Np2 real(wp), dimension(1:3) :: xyz_local, center, offset !< x, y, z coordinates in local IB frame @@ -527,11 +527,23 @@ contains $:GPU_UPDATE(device='[airfoil_grid_l,airfoil_grid_u]') end if + ! find the indices to the left and right of the IB in i, j, k + il = -gp_layers + jl = -gp_layers + ll = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + lr = p + gp_layers + ! maximum distance any marker can be from the center is the legnth of the airfoil + call get_bounding_indices(center(1) - ca_in, center(1) + ca_in, x_cc, il, ir) + call get_bounding_indices(center(2) - ca_in, center(2) + ca_in, y_cc, jl, jr) + call get_bounding_indices(center(3) - ca_in, center(3) + ca_in, z_cc, ll, lr) + $:GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,f]',& & copyin='[patch_id,center,inverse_rotation,offset,ma,ca_in,airfoil_grid_u,airfoil_grid_l,z_min,z_max]', collapse=3) - do l = -gp_layers, p + gp_layers - do j = -gp_layers, n + gp_layers - do i = -gp_layers, m + gp_layers + do l = ll, lr + do j = jl, jr + do i = il, ir xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(l) - center(3)] ! get coordinate frame centered on IB xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates xyz_local = xyz_local - offset ! airfoils are a patch that require a centroid offset @@ -728,9 +740,10 @@ contains integer, intent(in) :: patch_id type(integer_field), intent(inout) :: ib_markers - integer :: i, j, k !< Generic loop iterators + integer :: i, j, k, ir, il, jr, jl, kr, kl !< Generic loop iterators real(wp), dimension(1:3) :: xyz_local, center, length !< x and y coordinates in local IB frame real(wp), dimension(1:3, 1:3) :: inverse_rotation + real(wp) :: corner_distance ! Transferring the cuboid's centroid and length information center(1) = patch_ib(patch_id)%x_centroid @@ -741,15 +754,27 @@ contains length(3) = patch_ib(patch_id)%length_z inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) + ! find the indices to the left and right of the IB in i, j, k + il = -gp_layers + jl = -gp_layers + kl = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + kr = p + gp_layers + corner_distance = sqrt(dot_product(length, length))/2._wp ! maximum distance any marker can be from the center + call get_bounding_indices(center(1) - corner_distance, center(1) + corner_distance, x_cc, il, ir) + call get_bounding_indices(center(2) - corner_distance, center(2) + corner_distance, y_cc, jl, jr) + call get_bounding_indices(center(3) - corner_distance, center(3) + corner_distance, z_cc, kl, kr) + ! Checking whether the cuboid covers a particular cell in the domain ! and verifying whether the current patch has permission to write to ! to that cell. If both queries check out, the primitive variables ! of the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]',& & copyin='[patch_id,center,length,inverse_rotation]', collapse=3) - do k = -gp_layers, p + gp_layers - do j = -gp_layers, n + gp_layers - do i = -gp_layers, m + gp_layers + do k = kl, kr + do j = jl, jr + do i = il, ir if (grid_geometry == 3) then ! TODO :: This does not work and is not covered by any tests. This should be fixed @@ -794,10 +819,11 @@ contains integer, intent(in) :: patch_id type(integer_field), intent(inout) :: ib_markers - integer :: i, j, k !< Generic loop iterators + integer :: i, j, k, il, ir, jl, jr, kl, kr !< Generic loop iterators real(wp) :: radius real(wp), dimension(1:3) :: xyz_local, center, length !< x and y coordinates in local IB frame real(wp), dimension(1:3, 1:3) :: inverse_rotation + real(wp) :: corner_distance ! Transferring the cylindrical patch's centroid, length, radius, ! smoothing patch identity and smoothing coefficient information @@ -811,6 +837,17 @@ contains radius = patch_ib(patch_id)%radius inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) + il = -gp_layers + jl = -gp_layers + kl = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + kr = p + gp_layers + corner_distance = sqrt(radius**2 + maxval(length)**2) ! distance to rim of cylinder + call get_bounding_indices(center(1) - corner_distance, center(1) + corner_distance, x_cc, il, ir) + call get_bounding_indices(center(2) - corner_distance, center(2) + corner_distance, y_cc, jl, jr) + call get_bounding_indices(center(3) - corner_distance, center(3) + corner_distance, z_cc, kl, kr) + ! Checking whether the cylinder covers a particular cell in the ! domain and verifying whether the current patch has the permission ! to write to that cell. If both queries check out, the primitive From 0a089ce4e6c884f4a13ba48bc0ef8619a5426ba4 Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Tue, 17 Feb 2026 12:19:02 -0500 Subject: [PATCH 09/22] Extended area reduction to all non-model IBs --- src/simulation/m_ib_patches.fpp | 64 ++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index 5bc294c8fb..9bded72b72 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -258,7 +258,7 @@ contains real(wp), dimension(1:2) :: center real(wp) :: radius - integer :: i, j, k !< Generic loop iterators + integer :: i, j, il, ir, jl, jr !< Generic loop iterators ! Transferring the circular patch's radius, centroid, smearing patch ! identity and smearing coefficient information @@ -267,6 +267,14 @@ contains center(2) = patch_ib(patch_id)%y_centroid radius = patch_ib(patch_id)%radius + ! find the indices to the left and right of the IB in i, j, k + il = -gp_layers + jl = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + call get_bounding_indices(center(1) - radius, center(1) + radius, x_cc, il, ir) + call get_bounding_indices(center(2) - radius, center(2) + radius, y_cc, jl, jr) + ! Checking whether the circle covers a particular cell in the domain ! and verifying whether the current patch has permission to write to ! that cell. If both queries check out, the primitive variables of @@ -274,8 +282,8 @@ contains $:GPU_PARALLEL_LOOP(private='[i,j]',& & copyin='[patch_id,center,radius]', collapse=2) - do j = -gp_layers, n + gp_layers - do i = -gp_layers, m + gp_layers + do j = jl, jr + do i = il, ir if ((x_cc(i) - center(1))**2 & + (y_cc(j) - center(2))**2 <= radius**2) & then @@ -296,7 +304,7 @@ contains real(wp) :: f, ca_in, pa, ma, ta real(wp) :: xa, yt, xu, yu, xl, yl, xc, yc, dycdxc, sin_c, cos_c - integer :: i, j, k + integer :: i, j, k, il, ir, jl, jr integer :: Np1, Np2 real(wp), dimension(1:3) :: xy_local, offset !< x and y coordinates in local IB frame @@ -376,10 +384,19 @@ contains end if + ! find the indices to the left and right of the IB in i, j, k + il = -gp_layers + jl = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + ! maximum distance any marker can be from the center is the length of the airfoil + call get_bounding_indices(center(1) - ca_in, center(1) + ca_in, x_cc, il, ir) + call get_bounding_indices(center(2) - ca_in, center(2) + ca_in, y_cc, jl, jr) + $:GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,f]', & & copyin='[patch_id,center,inverse_rotation,offset,ma,ca_in,airfoil_grid_u,airfoil_grid_l]', collapse=2) - do j = -gp_layers, n + gp_layers - do i = -gp_layers, m + gp_layers + do j = jl, jr + do i = il, ir xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] ! get coordinate frame centered on IB xy_local = matmul(inverse_rotation, xy_local) ! rotate the frame into the IB's coordinates xy_local = xy_local - offset ! airfoils are a patch that require a centroid offset @@ -534,7 +551,7 @@ contains ir = m + gp_layers jr = n + gp_layers lr = p + gp_layers - ! maximum distance any marker can be from the center is the legnth of the airfoil + ! maximum distance any marker can be from the center is the length of the airfoil call get_bounding_indices(center(1) - ca_in, center(1) + ca_in, x_cc, il, ir) call get_bounding_indices(center(2) - ca_in, center(2) + ca_in, y_cc, jl, jr) call get_bounding_indices(center(3) - ca_in, center(3) + ca_in, z_cc, ll, lr) @@ -612,16 +629,12 @@ contains integer, intent(in) :: patch_id type(integer_field), intent(inout) :: ib_markers - integer :: i, j, k !< generic loop iterators - real(wp) :: pi_inf, gamma, lit_gamma !< Equation of state parameters + integer :: i, j, il, ir, jl, jr !< generic loop iterators + real(wp) :: corner_distance !< Equation of state parameters real(wp), dimension(1:3) :: xy_local !< x and y coordinates in local IB frame real(wp), dimension(1:2) :: length, center !< x and y coordinates in local IB frame real(wp), dimension(1:3, 1:3) :: inverse_rotation - pi_inf = fluid_pp(1)%pi_inf - gamma = fluid_pp(1)%gamma - lit_gamma = (1._wp + gamma)/gamma - ! Transferring the rectangle's centroid and length information center(1) = patch_ib(patch_id)%x_centroid center(2) = patch_ib(patch_id)%y_centroid @@ -629,14 +642,23 @@ contains length(2) = patch_ib(patch_id)%length_y inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) + ! find the indices to the left and right of the IB in i, j, k + il = -gp_layers + jl = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + corner_distance = sqrt(dot_product(length, length))/2._wp ! maximum distance any marker can be from the center + call get_bounding_indices(center(1) - corner_distance, center(1) + corner_distance, x_cc, il, ir) + call get_bounding_indices(center(2) - corner_distance, center(2) + corner_distance, y_cc, jl, jr) + ! Checking whether the rectangle covers a particular cell in the ! domain and verifying whether the current patch has the permission ! to write to that cell. If both queries check out, the primitive ! variables of the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]',& & copyin='[patch_id,center,length,inverse_rotation,x_cc,y_cc]', collapse=2) - do j = -gp_layers, n + gp_layers - do i = -gp_layers, m + gp_layers + do j = jl, jr + do i = il, ir ! get the x and y coordinates in the local IB frame xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] xy_local = matmul(inverse_rotation, xy_local) @@ -702,9 +724,9 @@ contains ! the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,cart_y,cart_z]',& & copyin='[patch_id,center,radius]', collapse=3) - do k = kl - 1, kr + 1 - do j = jl - 1, jr + 1 - do i = il - 1, ir + 1 + do k = kl, kr + do j = jl, jr + do i = il, ir ! do i = -gp_layers, m+gp_layers if (grid_geometry == 3) then call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) @@ -854,9 +876,9 @@ contains ! variables of the current patch are assigned to this cell. $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]',& & copyin='[patch_id,center,length,radius,inverse_rotation]', collapse=3) - do k = -gp_layers, p + gp_layers - do j = -gp_layers, n + gp_layers - do i = -gp_layers, m + gp_layers + do k = kl, kr + do j = jl, jr + do i = il, ir if (grid_geometry == 3) then call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) From 804a2865442584e99a551cea6422b3e912165687 Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Wed, 18 Feb 2026 11:45:46 -0500 Subject: [PATCH 10/22] Intermittent commit for GPU STLs --- src/common/m_model.fpp | 49 +++++++++++++++++++++++---------- src/simulation/m_ib_patches.fpp | 29 +++++++++++++++++-- 2 files changed, 61 insertions(+), 17 deletions(-) diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index 67c19fa454..90f72eda04 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -479,6 +479,23 @@ contains is_buffered = .true. end subroutine s_skip_ignored_lines + !> This function is used to replace the fortran random number + !! generator because the native generator is not compatible being called + !! from GPU routines/functions + function f_model_random_number(seed) result(rval) + + ! $:GPU_ROUTINE(parallelism='[seq]') + + integer, intent(inout) :: seed + real(wp) :: rval + + seed = ieor(seed, ishft(seed, 13)) + seed = ieor(seed, ishft(seed, -17)) + seed = ieor(seed, ishft(seed, 5)) + + rval = abs(real(seed, wp)) / real(huge(seed), wp) + end function f_model_random_number + !> This procedure, recursively, finds whether a point is inside an octree. !! @param model Model to search in. !! @param point Point to test. @@ -493,32 +510,33 @@ contains real(wp), dimension(1:3), intent(in) :: point real(wp), dimension(1:3), intent(in) :: spacing integer, intent(in) :: spc + real(wp) :: phi, theta + integer :: rand_seed real(wp) :: fraction type(t_ray) :: ray - integer :: i, j, nInOrOut, nHits + integer :: i, j, k, nInOrOut, nHits real(wp), dimension(1:spc, 1:3) :: ray_origins, ray_dirs - ! TODO :: The random number generation prohibits GPU compute due to the subroutine not being able to be called in kernels - ! This should be swapped out with something that allows GPU compute. I recommend the fibonacci sphere: - ! do i = 1, spc - ! phi = acos(1.0 - 2.0*(i-1.0)/(spc-1.0)) - ! theta = pi * (1.0 + sqrt(5.0)) * (i-1.0) - ! ray_dirs(i,:) = [cos(theta)*sin(phi), sin(theta)*sin(phi), cos(phi)] - ! ray_origins(i,:) = point - ! end do + rand_seed = int(point(1) * 73856093_wp) + & + int(point(2) * 19349663_wp) + & + int(point(3) * 83492791_wp) + if (rand_seed == 0) rand_seed = 1 + ! generate our random collection or rays do i = 1, spc - call random_number(ray_origins(i, :)) - ray_origins(i, :) = point + (ray_origins(i, :) - 0.5_wp)*spacing(:) - - call random_number(ray_dirs(i, :)) - ray_dirs(i, :) = ray_dirs(i, :) - 0.5_wp + do k = 1, 3 + ! random jitter in the origin helps us estimate volume fraction instead of only at the cell center + ray_origins(i, k) = point(k) + (f_model_random_number(rand_seed) - 0.5_wp) * spacing(k) + ! cast sample rays in all directions + ray_dirs(i, k) = point(k) + f_model_random_number(rand_seed) - 0.5_wp + end do ray_dirs(i, :) = ray_dirs(i, :)/sqrt(sum(ray_dirs(i, :)*ray_dirs(i, :))) end do + ! ray trace nInOrOut = 0 do i = 1, spc ray%o = ray_origins(i, :) @@ -526,11 +544,14 @@ contains nHits = 0 do j = 1, model%ntrs + ! count the number of triangles this ray intersects if (f_intersects_triangle(ray, model%trs(j))) then nHits = nHits + 1 end if end do + ! if thje ray hits an odd number of triangles on its way out, then + ! it must be on the inside of the model nInOrOut = nInOrOut + mod(nHits, 2) end do diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index 9bded72b72..8c4dfd696e 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -138,11 +138,9 @@ contains real(wp), dimension(1:4, 1:4) :: transform, transform_n - call random_seed() - do patch_id = 1, num_ibs if (patch_ib(patch_id)%geometry == 5 .or. patch_ib(patch_id)%geometry == 12) then - allocate (models(patch_id)%model) + @:ALLOCATE(models(patch_id)%model) print *, " * Reading model: "//trim(patch_ib(patch_id)%model_filepath) model = f_model_read(patch_ib(patch_id)%model_filepath) @@ -237,6 +235,30 @@ contains models(patch_id)%total_vertices = total_vertices end if + ! update allocatables + $:GPU_ENTER_DATA(copyin='[models(patch_id)]') + $:GPU_ENTER_DATA(copyin='[models(patch_id)%model]') + $:GPU_ENTER_DATA(copyin='[models(patch_id)%model%trs]') + $:GPU_ENTER_DATA(copyin='[models(patch_id)%model%ntrs]') + $:GPU_ENTER_DATA(copyin='[models(patch_id)%boundary_v]') + do i = 1, models(patch_id)%model%ntrs + $:GPU_ENTER_DATA(copyin='[models(patch_id)%model%trs(i)]') + end do + if (interpolate) then + $:GPU_ENTER_DATA(copyin='[models(patch_id)%interpolated_boundary_v]') + end if + + print *, "Entering GPU loop to read" + + ! TODO :: REMOVE ME + $:GPU_PARALLEL_LOOP(private='[i]') + do i = 1, 3 + print *, "Num Triangles", models(1)%model%ntrs + ! print *, "interpolate", models(i)%interpolate + ! print *, "Vertices", models(i)%model%trs(1)%v + end do + $:END_GPU_PARALLEL_LOOP() + end if end do @@ -995,6 +1017,7 @@ contains ! Reading STL boundary vertices and compute the levelset and levelset_norm if (eta > patch_ib(patch_id)%model_threshold) then + print *, eta, i, j ib_markers%sf(i, j, 0) = patch_id end if From 64bc3480260f9d32de0f8a78109a52503bb4ae3b Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Wed, 18 Feb 2026 15:34:31 -0500 Subject: [PATCH 11/22] Ib markers computed on GPU working --- src/common/m_derived_types.fpp | 8 ++- src/common/m_helper.fpp | 2 + src/common/m_model.fpp | 73 +++++++++++++++++++- src/simulation/m_ib_patches.fpp | 116 +++++++++++++++++++++----------- src/simulation/m_ibm.fpp | 4 +- 5 files changed, 160 insertions(+), 43 deletions(-) diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index 7aab3c96e2..e0e84b3b06 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -184,12 +184,18 @@ module m_derived_types end type t_model type :: t_model_array + ! --- Original CPU-side fields (unchanged) --- type(t_model), allocatable :: model real(wp), allocatable, dimension(:, :, :) :: boundary_v real(wp), allocatable, dimension(:, :) :: interpolated_boundary_v integer :: boundary_edge_count integer :: total_vertices - logical :: interpolate + integer :: interpolate + + ! --- GPU-friendly flattened arrays --- + integer :: ntrs ! copy of model%ntrs + real(wp), allocatable, dimension(:, :, :) :: trs_v ! (3, 3, ntrs) - triangle vertices + real(wp), allocatable, dimension(:, :) :: trs_n ! (3, ntrs) - triangle normals end type t_model_array !> Derived type adding initial condition (ic) patch parameters as attributes diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp index d990a83345..f50b99a965 100644 --- a/src/common/m_helper.fpp +++ b/src/common/m_helper.fpp @@ -328,6 +328,8 @@ contains !! @return The cross product of the two vectors. pure function f_cross(a, b) result(c) + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), dimension(3), intent(in) :: a, b real(wp), dimension(3) :: c diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index 90f72eda04..afb46ac7f4 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -22,7 +22,8 @@ module m_model ! Subroutines for STL immersed boundaries public :: f_check_boundary, f_register_edge, f_check_interpolation_2D, & f_check_interpolation_3D, f_interpolate_2D, f_interpolate_3D, & - f_interpolated_distance, f_normals, f_distance, f_distance_normals_3D, f_tri_area + f_interpolated_distance, f_normals, f_distance, f_distance_normals_3D, f_tri_area, s_pack_model_for_gpu, & + f_model_is_inside_flat contains @@ -484,7 +485,7 @@ contains !! from GPU routines/functions function f_model_random_number(seed) result(rval) - ! $:GPU_ROUTINE(parallelism='[seq]') + $:GPU_ROUTINE(parallelism='[seq]') integer, intent(inout) :: seed real(wp) :: rval @@ -559,6 +560,58 @@ contains end function f_model_is_inside + impure function f_model_is_inside_flat(ntrs, trs_v, trs_n, pid, point, spacing, spc) result(fraction) + + $:GPU_ROUTINE(parallelism='[seq]') + + integer, intent(in) :: ntrs + real(wp), dimension(:,:,:,:), intent(in) :: trs_v + real(wp), dimension(:,:,:), intent(in) :: trs_n + integer, intent(in) :: pid + real(wp), dimension(1:3), intent(in) :: point + real(wp), dimension(1:3), intent(in) :: spacing + integer, intent(in) :: spc + + real(wp) :: fraction + real(wp) :: origin(1:3), dir(1:3), dir_mag + type(t_ray) :: ray + type(t_triangle) :: tri + integer :: i, j, k, nInOrOut, nHits + integer :: rand_seed + + rand_seed = int(point(1) * 73856093_wp) + & + int(point(2) * 19349663_wp) + & + int(point(3) * 83492791_wp) + if (rand_seed == 0) rand_seed = 1 + + ! generate our random collection of rays + nInOrOut = 0 + do i = 1, spc + ! Generate one ray at a time — no arrays needed + do k = 1, 3 + origin(k) = point(k) + (f_model_random_number(rand_seed) - 0.5_wp) * spacing(k) + dir(k) = point(k) + f_model_random_number(rand_seed) - 0.5_wp + end do + dir_mag = sqrt(dir(1)*dir(1) + dir(2)*dir(2) + dir(3)*dir(3)) + dir(:) = dir(:) / dir_mag + + ray%o = origin + ray%d = dir + + nHits = 0 + do j = 1, ntrs + tri%v(:, :) = trs_v(:, :, j, pid) + tri%n(:) = trs_n(:, j, pid) + if (f_intersects_triangle(ray, tri)) then + nHits = nHits + 1 + end if + end do + nInOrOut = nInOrOut + mod(nHits, 2) + end do + + fraction = real(nInOrOut)/real(spc) + end function f_model_is_inside_flat + ! From https://www.scratchapixel.com/lessons/3e-basic-rendering/ray-tracing-rendering-a-triangle/ray-triangle-intersection-geometric-solution.html !> This procedure checks if a ray intersects a triangle. !! @param ray Ray. @@ -566,6 +619,8 @@ contains !! @return True if the ray intersects the triangle, false otherwise. elemental function f_intersects_triangle(ray, triangle) result(intersects) + $:GPU_ROUTINE(parallelism='[seq]') + type(t_ray), intent(in) :: ray type(t_triangle), intent(in) :: triangle @@ -1269,4 +1324,18 @@ contains end function f_interpolated_distance + subroutine s_pack_model_for_gpu(ma) + type(t_model_array), intent(inout) :: ma + integer :: i + + ma%ntrs = ma%model%ntrs + allocate(ma%trs_v(1:3, 1:3, 1:ma%ntrs)) + allocate(ma%trs_n(1:3, 1:ma%ntrs)) + + do i = 1, ma%ntrs + ma%trs_v(:, :, i) = ma%model%trs(i)%v(:, :) + ma%trs_n(:, i) = ma%model%trs(i)%n(:) + end do + end subroutine + end module m_model diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index 8c4dfd696e..df7641daab 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -25,7 +25,7 @@ module m_ib_patches implicit none - private; public :: s_apply_ib_patches, s_update_ib_rotation_matrix, s_instantiate_STL_models, models, f_convert_cyl_to_cart + private; public :: s_apply_ib_patches, s_update_ib_rotation_matrix, s_instantiate_STL_models, models, f_convert_cyl_to_cart, gpu_ntrs, gpu_trs_v, gpu_trs_n real(wp) :: x_centroid, y_centroid, z_centroid real(wp) :: length_x, length_y, length_z @@ -55,9 +55,12 @@ module m_ib_patches character(len=5) :: istr ! string to store int to string result for error checking - type(t_model_array), allocatable, target :: models(:) - $:GPU_DECLARE(create='[models]') !! array of STL models that can be allocated and then used in IB marker and levelset compute + type(t_model_array), allocatable, target :: models(:) + !! GPU-friendly flat arrays for STL model data + integer, allocatable :: gpu_ntrs(:) + real(wp), allocatable, dimension(:,:,:,:) :: gpu_trs_v + real(wp), allocatable, dimension(:,:,:) :: gpu_trs_n contains @@ -140,7 +143,7 @@ contains do patch_id = 1, num_ibs if (patch_ib(patch_id)%geometry == 5 .or. patch_ib(patch_id)%geometry == 12) then - @:ALLOCATE(models(patch_id)%model) + allocate(models(patch_id)%model) print *, " * Reading model: "//trim(patch_ib(patch_id)%model_filepath) model = f_model_read(patch_ib(patch_id)%model_filepath) @@ -229,38 +232,52 @@ contains models(patch_id)%model = model models(patch_id)%boundary_v = boundary_v models(patch_id)%boundary_edge_count = boundary_edge_count - models(patch_id)%interpolate = interpolate + if (interpolate) then + models(patch_id)%interpolate = 1 + else + models(patch_id)%interpolate = 0 + end if if (interpolate) then models(patch_id)%interpolated_boundary_v = interpolated_boundary_v models(patch_id)%total_vertices = total_vertices end if - ! update allocatables - $:GPU_ENTER_DATA(copyin='[models(patch_id)]') - $:GPU_ENTER_DATA(copyin='[models(patch_id)%model]') - $:GPU_ENTER_DATA(copyin='[models(patch_id)%model%trs]') - $:GPU_ENTER_DATA(copyin='[models(patch_id)%model%ntrs]') - $:GPU_ENTER_DATA(copyin='[models(patch_id)%boundary_v]') - do i = 1, models(patch_id)%model%ntrs - $:GPU_ENTER_DATA(copyin='[models(patch_id)%model%trs(i)]') - end do - if (interpolate) then - $:GPU_ENTER_DATA(copyin='[models(patch_id)%interpolated_boundary_v]') + end if + end do + + ! Pack and upload flat arrays for GPU (AFTER the loop) + block + integer :: pid, max_ntrs + + max_ntrs = 0 + do pid = 1, num_ibs + if (allocated(models(pid)%model)) then + call s_pack_model_for_gpu(models(pid)) + max_ntrs = max(max_ntrs, models(pid)%ntrs) end if + end do + + if (max_ntrs > 0) then + allocate(gpu_ntrs(1:num_ibs)) + allocate(gpu_trs_v(1:3, 1:3, 1:max_ntrs, 1:num_ibs)) + allocate(gpu_trs_n(1:3, 1:max_ntrs, 1:num_ibs)) - print *, "Entering GPU loop to read" + gpu_ntrs = 0 + gpu_trs_v = 0._wp + gpu_trs_n = 0._wp - ! TODO :: REMOVE ME - $:GPU_PARALLEL_LOOP(private='[i]') - do i = 1, 3 - print *, "Num Triangles", models(1)%model%ntrs - ! print *, "interpolate", models(i)%interpolate - ! print *, "Vertices", models(i)%model%trs(1)%v + do pid = 1, num_ibs + if (allocated(models(pid)%model)) then + gpu_ntrs(pid) = models(pid)%ntrs + gpu_trs_v(:, :, 1:models(pid)%ntrs, pid) = models(pid)%trs_v + gpu_trs_n(:, 1:models(pid)%ntrs, pid) = models(pid)%trs_n + end if end do - $:END_GPU_PARALLEL_LOOP() + + $:GPU_ENTER_DATA(copyin='[gpu_ntrs, gpu_trs_v, gpu_trs_n]') end if - end do + end block end subroutine s_instantiate_STL_models @@ -986,23 +1003,25 @@ contains integer, intent(in) :: patch_id type(integer_field), intent(inout) :: ib_markers - integer :: i, j, k !< Generic loop iterators + integer :: i, j, k !< Generic loop iterators + integer :: spc - type(t_model), pointer :: model - - real(wp) :: eta + real(wp) :: eta, threshold real(wp), dimension(1:3) :: point, local_point, offset real(wp), dimension(1:3) :: center, xy_local real(wp), dimension(1:3, 1:3) :: inverse_rotation - model => models(patch_id)%model center = 0._wp center(1) = patch_ib(patch_id)%x_centroid center(2) = patch_ib(patch_id)%y_centroid inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) offset(:) = patch_ib(patch_id)%centroid_offset(:) + spc = patch_ib(patch_id)%model_spc + threshold = patch_ib(patch_id)%model_threshold - do i = 0 - gp_layers, m + gp_layers + $:GPU_PARALLEL_LOOP(private='[i,j, xy_local, eta]',& + & copyin='[patch_id,center,inverse_rotation, offset, spc, threshold]', collapse=2) + do i = -gp_layers, m + gp_layers do j = -gp_layers, n + gp_layers xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] @@ -1013,16 +1032,30 @@ contains xy_local = f_convert_cyl_to_cart(xy_local) end if - eta = f_model_is_inside(model, xy_local, (/dx(i), dy(j), 0._wp/), patch_ib(patch_id)%model_spc) + if (i == 13 .and. j == 16) then + print *, "spc:", spc + print *, "ntrs:", gpu_ntrs(patch_id) + print *, "threshold:", threshold + print *, "dx:", dx(i), dy(j) + print *, "xy_local:", xy_local(1) + + end if + + eta = f_model_is_inside_flat(gpu_ntrs(patch_id), & + gpu_trs_v, gpu_trs_n, & + patch_id, & + xy_local, (/dx(i), dy(j), 0._wp/), & + spc) ! Reading STL boundary vertices and compute the levelset and levelset_norm - if (eta > patch_ib(patch_id)%model_threshold) then + if (eta > threshold) then print *, eta, i, j ib_markers%sf(i, j, 0) = patch_id end if end do end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_model @@ -1035,10 +1068,9 @@ contains type(integer_field), intent(inout) :: ib_markers integer :: i, j, k !< Generic loop iterators + integer :: spc - type(t_model), pointer :: model - - real(wp) :: eta + real(wp) :: eta, threshold real(wp), dimension(1:3) :: point, local_point, offset real(wp), dimension(1:3) :: center, xyz_local real(wp), dimension(1:3, 1:3) :: inverse_rotation @@ -1050,8 +1082,12 @@ contains center(3) = patch_ib(patch_id)%z_centroid inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) offset(:) = patch_ib(patch_id)%centroid_offset(:) + spc = patch_ib(patch_id)%model_spc + threshold = patch_ib(patch_id)%model_threshold - do i = 0 - gp_layers, m + gp_layers + $:GPU_PARALLEL_LOOP(private='[i,j,k, xyz_local, eta]',& + & copyin='[patch_id,center,inverse_rotation, offset, spc, threshold]', collapse=2) + do i = -gp_layers, m + gp_layers do j = -gp_layers, n + gp_layers do k = -gp_layers, p + gp_layers @@ -1063,7 +1099,11 @@ contains xyz_local = f_convert_cyl_to_cart(xyz_local) end if - eta = f_model_is_inside(model, xyz_local, (/dx(i), dy(j), dz(k)/), patch_ib(patch_id)%model_spc) + eta = f_model_is_inside_flat(gpu_ntrs(patch_id), & + gpu_trs_v, gpu_trs_n, & + patch_id, & + xyz_local, (/dx(i), dy(j), dz(k)/), & + spc) ! Reading STL boundary vertices and compute the levelset and levelset_norm if (eta > patch_ib(patch_id)%model_threshold) then diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 01cdc632c6..92400123bf 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -98,9 +98,9 @@ contains $:GPU_UPDATE(device='[patch_ib(1:num_ibs)]') ! GPU routines require updated cell centers - $:GPU_UPDATE(device='[x_cc, y_cc]') + $:GPU_UPDATE(device='[x_cc, y_cc, dx, dy]') if (p /= 0) then - $:GPU_UPDATE(device='[z_cc]') + $:GPU_UPDATE(device='[z_cc, dz]') end if ! allocate STL models From bc972cabd2437eddfe63b6c9268bad9cbec5b334 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Wed, 18 Feb 2026 15:54:17 -0500 Subject: [PATCH 12/22] Passes STL tests with GPU compute for IB markers (not added levelset yet) --- src/common/m_derived_types.fpp | 2 +- src/common/m_model.fpp | 44 +++++++++++++-------------- src/simulation/m_compute_levelset.fpp | 2 ++ src/simulation/m_ib_patches.fpp | 17 ++++------- src/simulation/m_igr.fpp | 26 ++++++++-------- 5 files changed, 44 insertions(+), 47 deletions(-) diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index e0e84b3b06..cbdb3e2aa4 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -195,7 +195,7 @@ module m_derived_types ! --- GPU-friendly flattened arrays --- integer :: ntrs ! copy of model%ntrs real(wp), allocatable, dimension(:, :, :) :: trs_v ! (3, 3, ntrs) - triangle vertices - real(wp), allocatable, dimension(:, :) :: trs_n ! (3, ntrs) - triangle normals + real(wp), allocatable, dimension(:, :) :: trs_n ! (3, ntrs) - triangle normals end type t_model_array !> Derived type adding initial condition (ic) patch parameters as attributes diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index afb46ac7f4..2c62a1444f 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -484,17 +484,17 @@ contains !! generator because the native generator is not compatible being called !! from GPU routines/functions function f_model_random_number(seed) result(rval) - + $:GPU_ROUTINE(parallelism='[seq]') - + integer, intent(inout) :: seed real(wp) :: rval - + seed = ieor(seed, ishft(seed, 13)) seed = ieor(seed, ishft(seed, -17)) seed = ieor(seed, ishft(seed, 5)) - - rval = abs(real(seed, wp)) / real(huge(seed), wp) + + rval = abs(real(seed, wp))/real(huge(seed), wp) end function f_model_random_number !> This procedure, recursively, finds whether a point is inside an octree. @@ -521,16 +521,16 @@ contains real(wp), dimension(1:spc, 1:3) :: ray_origins, ray_dirs - rand_seed = int(point(1) * 73856093_wp) + & - int(point(2) * 19349663_wp) + & - int(point(3) * 83492791_wp) + rand_seed = int(point(1)*73856093_wp) + & + int(point(2)*19349663_wp) + & + int(point(3)*83492791_wp) if (rand_seed == 0) rand_seed = 1 ! generate our random collection or rays do i = 1, spc do k = 1, 3 ! random jitter in the origin helps us estimate volume fraction instead of only at the cell center - ray_origins(i, k) = point(k) + (f_model_random_number(rand_seed) - 0.5_wp) * spacing(k) + ray_origins(i, k) = point(k) + (f_model_random_number(rand_seed) - 0.5_wp)*spacing(k) ! cast sample rays in all directions ray_dirs(i, k) = point(k) + f_model_random_number(rand_seed) - 0.5_wp end do @@ -561,12 +561,12 @@ contains end function f_model_is_inside impure function f_model_is_inside_flat(ntrs, trs_v, trs_n, pid, point, spacing, spc) result(fraction) - + $:GPU_ROUTINE(parallelism='[seq]') integer, intent(in) :: ntrs - real(wp), dimension(:,:,:,:), intent(in) :: trs_v - real(wp), dimension(:,:,:), intent(in) :: trs_n + real(wp), dimension(:, :, :, :), intent(in) :: trs_v + real(wp), dimension(:, :, :), intent(in) :: trs_n integer, intent(in) :: pid real(wp), dimension(1:3), intent(in) :: point real(wp), dimension(1:3), intent(in) :: spacing @@ -579,9 +579,9 @@ contains integer :: i, j, k, nInOrOut, nHits integer :: rand_seed - rand_seed = int(point(1) * 73856093_wp) + & - int(point(2) * 19349663_wp) + & - int(point(3) * 83492791_wp) + rand_seed = int(point(1)*73856093_wp) + & + int(point(2)*19349663_wp) + & + int(point(3)*83492791_wp) if (rand_seed == 0) rand_seed = 1 ! generate our random collection of rays @@ -589,11 +589,11 @@ contains do i = 1, spc ! Generate one ray at a time — no arrays needed do k = 1, 3 - origin(k) = point(k) + (f_model_random_number(rand_seed) - 0.5_wp) * spacing(k) + origin(k) = point(k) + (f_model_random_number(rand_seed) - 0.5_wp)*spacing(k) dir(k) = point(k) + f_model_random_number(rand_seed) - 0.5_wp end do dir_mag = sqrt(dir(1)*dir(1) + dir(2)*dir(2) + dir(3)*dir(3)) - dir(:) = dir(:) / dir_mag + dir(:) = dir(:)/dir_mag ray%o = origin ray%d = dir @@ -1327,14 +1327,14 @@ contains subroutine s_pack_model_for_gpu(ma) type(t_model_array), intent(inout) :: ma integer :: i - + ma%ntrs = ma%model%ntrs - allocate(ma%trs_v(1:3, 1:3, 1:ma%ntrs)) - allocate(ma%trs_n(1:3, 1:ma%ntrs)) - + allocate (ma%trs_v(1:3, 1:3, 1:ma%ntrs)) + allocate (ma%trs_n(1:3, 1:ma%ntrs)) + do i = 1, ma%ntrs ma%trs_v(:, :, i) = ma%model%trs(i)%v(:, :) - ma%trs_n(:, i) = ma%model%trs(i)%n(:) + ma%trs_n(:, i) = ma%model%trs(i)%n(:) end do end subroutine diff --git a/src/simulation/m_compute_levelset.fpp b/src/simulation/m_compute_levelset.fpp index ef3333745b..1f72b6a37a 100644 --- a/src/simulation/m_compute_levelset.fpp +++ b/src/simulation/m_compute_levelset.fpp @@ -729,6 +729,8 @@ contains end if + ! print gp%levelset, gp%levelset_norm(1), gp%levelset_norm(2), gp%levelset_norm(3) + end subroutine s_model_levelset end module m_compute_levelset diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index df7641daab..4617e98ff7 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -61,6 +61,8 @@ module m_ib_patches integer, allocatable :: gpu_ntrs(:) real(wp), allocatable, dimension(:,:,:,:) :: gpu_trs_v real(wp), allocatable, dimension(:,:,:) :: gpu_trs_n + real(wp), allocatable, dimension(:,:,:) :: gpu_boundary_v + real(wp), allocatable, dimension(:,:,:) :: gpu_interpolated_boundary_v contains @@ -1032,15 +1034,6 @@ contains xy_local = f_convert_cyl_to_cart(xy_local) end if - if (i == 13 .and. j == 16) then - print *, "spc:", spc - print *, "ntrs:", gpu_ntrs(patch_id) - print *, "threshold:", threshold - print *, "dx:", dx(i), dy(j) - print *, "xy_local:", xy_local(1) - - end if - eta = f_model_is_inside_flat(gpu_ntrs(patch_id), & gpu_trs_v, gpu_trs_n, & patch_id, & @@ -1049,7 +1042,6 @@ contains ! Reading STL boundary vertices and compute the levelset and levelset_norm if (eta > threshold) then - print *, eta, i, j ib_markers%sf(i, j, 0) = patch_id end if @@ -1057,6 +1049,8 @@ contains end do $:END_GPU_PARALLEL_LOOP() + $:GPU_UPDATE(host='[ib_markers%sf]') + end subroutine s_ib_model !> The STL patch is a 3D geometry that is imported from an STL file. @@ -1075,7 +1069,6 @@ contains real(wp), dimension(1:3) :: center, xyz_local real(wp), dimension(1:3, 1:3) :: inverse_rotation - model => models(patch_id)%model center = 0._wp center(1) = patch_ib(patch_id)%x_centroid center(2) = patch_ib(patch_id)%y_centroid @@ -1114,6 +1107,8 @@ contains end do end do + $:GPU_UPDATE(host='[ib_markers%sf]') + end subroutine s_ib_3d_model !> Subroutine that computes a rotation matrix for converting to the rotating frame of the boundary diff --git a/src/simulation/m_igr.fpp b/src/simulation/m_igr.fpp index fa06ef9092..3f98d0e7e4 100644 --- a/src/simulation/m_igr.fpp +++ b/src/simulation/m_igr.fpp @@ -269,7 +269,7 @@ contains subroutine s_igr_iterative_solve(q_cons_vf, bc_type, t_step) #ifdef _CRAYFTN - !DIR$ OPTIMIZE (-haggress) + !DIR$ OPTIMIZE (-haggress) #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type @@ -370,7 +370,7 @@ contains subroutine s_igr_sigma_x(q_cons_vf, rhs_vf) #ifdef _CRAYFTN - !DIR$ OPTIMIZE (-haggress) + !DIR$ OPTIMIZE (-haggress) #endif type(scalar_field), & dimension(sys_size), & @@ -455,7 +455,7 @@ contains subroutine s_igr_riemann_solver(q_cons_vf, rhs_vf, idir) #ifdef _CRAYFTN - !DIR$ OPTIMIZE (-haggress) + !DIR$ OPTIMIZE (-haggress) #endif type(scalar_field), & dimension(sys_size), & @@ -495,9 +495,9 @@ contains #:if MFC_CASE_OPTIMIZATION #:if igr_order == 5 - !DIR$ unroll 6 + !DIR$ unroll 6 #:elif igr_order == 3 - !DIR$ unroll 4 + !DIR$ unroll 4 #:endif #:endif $:GPU_LOOP(parallelism='[seq]') @@ -912,9 +912,9 @@ contains #:if MFC_CASE_OPTIMIZATION #:if igr_order == 5 - !DIR$ unroll 6 + !DIR$ unroll 6 #:elif igr_order == 3 - !DIR$ unroll 4 + !DIR$ unroll 4 #:endif #:endif $:GPU_LOOP(parallelism='[seq]') @@ -1429,9 +1429,9 @@ contains #:if MFC_CASE_OPTIMIZATION #:if igr_order == 5 - !DIR$ unroll 6 + !DIR$ unroll 6 #:elif igr_order == 3 - !DIR$ unroll 4 + !DIR$ unroll 4 #:endif #:endif $:GPU_LOOP(parallelism='[seq]') @@ -1826,9 +1826,9 @@ contains #:if MFC_CASE_OPTIMIZATION #:if igr_order == 5 - !DIR$ unroll 6 + !DIR$ unroll 6 #:elif igr_order == 3 - !DIR$ unroll 4 + !DIR$ unroll 4 #:endif #:endif $:GPU_LOOP(parallelism='[seq]') @@ -2311,9 +2311,9 @@ contains #:if MFC_CASE_OPTIMIZATION #:if igr_order == 5 - !DIR$ unroll 6 + !DIR$ unroll 6 #:elif igr_order == 3 - !DIR$ unroll 4 + !DIR$ unroll 4 #:endif #:endif $:GPU_LOOP(parallelism='[seq]') From a1769d06c5958342878eeefbb85235ff5437e056 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Thu, 19 Feb 2026 04:18:18 -0500 Subject: [PATCH 13/22] Moved mdoel-specific code to the model file for cleanliness --- src/common/m_model.fpp | 182 +++++++++++++++++++++++++++++- src/simulation/m_ib_patches.fpp | 190 ++------------------------------ src/simulation/m_ibm.fpp | 2 + src/simulation/m_igr.fpp | 26 ++--- 4 files changed, 205 insertions(+), 195 deletions(-) diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index 2c62a1444f..ff8e0d57f9 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -17,7 +17,7 @@ module m_model private - public :: f_model_read, s_model_write, s_model_free, f_model_is_inside + public :: s_instantiate_STL_models, f_model_read, s_model_write, s_model_free, f_model_is_inside, models, gpu_ntrs, gpu_trs_v, gpu_trs_n ! Subroutines for STL immersed boundaries public :: f_check_boundary, f_register_edge, f_check_interpolation_2D, & @@ -25,8 +25,188 @@ module m_model f_interpolated_distance, f_normals, f_distance, f_distance_normals_3D, f_tri_area, s_pack_model_for_gpu, & f_model_is_inside_flat + !! array of STL models that can be allocated and then used in IB marker and levelset compute + type(t_model_array), allocatable, target :: models(:) + !! GPU-friendly flat arrays for STL model data + integer, allocatable :: gpu_ntrs(:) + real(wp), allocatable, dimension(:, :, :, :) :: gpu_trs_v + real(wp), allocatable, dimension(:, :, :) :: gpu_trs_n + real(wp), allocatable, dimension(:, :, :) :: gpu_boundary_v + real(wp), allocatable, dimension(:, :, :) :: gpu_interpolated_boundary_v + contains + subroutine s_instantiate_STL_models() + + ! Variables for IBM+STL + real(wp) :: normals(1:3) !< Boundary normal buffer + integer :: boundary_vertex_count, boundary_edge_count, total_vertices !< Boundary vertex + real(wp), allocatable, dimension(:, :, :) :: boundary_v !< Boundary vertex buffer + real(wp), allocatable, dimension(:, :) :: interpolated_boundary_v !< Interpolated vertex buffer + real(wp) :: dx_local, dy_local, dz_local !< Levelset distance buffer + logical :: interpolate !< Logical variable to determine whether or not the model should be interpolated + + integer :: i, j, k !< Generic loop iterators + integer :: patch_id + + type(t_bbox) :: bbox, bbox_old + type(t_model) :: model + type(ic_model_parameters) :: params + + real(wp) :: eta + real(wp), dimension(1:3) :: point, model_center + real(wp) :: grid_mm(1:3, 1:2) + + real(wp), dimension(1:4, 1:4) :: transform, transform_n + +#ifdef MFC_PRE_PROCESS + dx_local = dx; dy_local = dy + if (p /= 0) dz_local = dz +#else + dx_local = minval(dx); dy_local = minval(dy) + if (p /= 0) dz_local = minval(dz) +#endif + + do patch_id = 1, num_ibs + if (patch_ib(patch_id)%geometry == 5 .or. patch_ib(patch_id)%geometry == 12) then + allocate (models(patch_id)%model) + print *, " * Reading model: "//trim(patch_ib(patch_id)%model_filepath) + + model = f_model_read(patch_ib(patch_id)%model_filepath) + params%scale(:) = patch_ib(patch_id)%model_scale(:) + params%translate(:) = patch_ib(patch_id)%model_translate(:) + params%rotate(:) = patch_ib(patch_id)%model_rotate(:) + params%spc = patch_ib(patch_id)%model_spc + params%threshold = patch_ib(patch_id)%model_threshold + + if (f_approx_equal(dot_product(params%scale, params%scale), 0._wp)) then + params%scale(:) = 1._wp + end if + + if (proc_rank == 0) then + print *, " * Transforming model." + end if + + ! Get the model center before transforming the model + bbox_old = f_create_bbox(model) + model_center(1:3) = (bbox_old%min(1:3) + bbox_old%max(1:3))/2._wp + + ! Compute the transform matrices for vertices and normals + transform = f_create_transform_matrix(params, model_center) + transform_n = f_create_transform_matrix(params) + + call s_transform_model(model, transform, transform_n) + + ! Recreate the bounding box after transformation + bbox = f_create_bbox(model) + + ! Show the number of vertices in the original STL model + if (proc_rank == 0) then + print *, ' * Number of input model vertices:', 3*model%ntrs + end if + + call f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) + + ! Check if the model needs interpolation + if (p > 0) then + call f_check_interpolation_3D(model, (/dx_local, dy_local, dz_local/), interpolate) + else + call f_check_interpolation_2D(boundary_v, boundary_edge_count, (/dx_local, dy_local, 0._wp/), interpolate) + end if + + ! Show the number of edges and boundary edges in 2D STL models + if (proc_rank == 0 .and. p == 0) then + print *, ' * Number of 2D model boundary edges:', boundary_edge_count + end if + + ! Interpolate the STL model along the edges (2D) and on triangle facets (3D) + if (interpolate) then + if (proc_rank == 0) then + print *, ' * Interpolating STL vertices.' + end if + + if (p > 0) then + call f_interpolate_3D(model, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) + else + call f_interpolate_2D(boundary_v, boundary_edge_count, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) + end if + + if (proc_rank == 0) then + print *, ' * Total number of interpolated boundary vertices:', total_vertices + end if + end if + + if (proc_rank == 0) then + write (*, "(A, 3(2X, F20.10))") " > Model: Min:", bbox%min(1:3) + write (*, "(A, 3(2X, F20.10))") " > Cen:", (bbox%min(1:3) + bbox%max(1:3))/2._wp + write (*, "(A, 3(2X, F20.10))") " > Max:", bbox%max(1:3) + + grid_mm(1, :) = (/minval(x_cc(0:m)) - 0.e5_wp*dx_local, maxval(x_cc(0:m)) + 0.e5_wp*dx_local/) + grid_mm(2, :) = (/minval(y_cc(0:n)) - 0.e5_wp*dy_local, maxval(y_cc(0:n)) + 0.e5_wp*dy_local/) + + if (p > 0) then + grid_mm(3, :) = (/minval(z_cc(0:p)) - 0.e5_wp*dz_local, maxval(z_cc(0:p)) + 0.e5_wp*dz_local/) + else + grid_mm(3, :) = (/0._wp, 0._wp/) + end if + + write (*, "(A, 3(2X, F20.10))") " > Domain: Min:", grid_mm(:, 1) + write (*, "(A, 3(2X, F20.10))") " > Cen:", (grid_mm(:, 1) + grid_mm(:, 2))/2._wp + write (*, "(A, 3(2X, F20.10))") " > Max:", grid_mm(:, 2) + end if + + models(patch_id)%model = model + models(patch_id)%boundary_v = boundary_v + models(patch_id)%boundary_edge_count = boundary_edge_count + if (interpolate) then + models(patch_id)%interpolate = 1 + else + models(patch_id)%interpolate = 0 + end if + if (interpolate) then + models(patch_id)%interpolated_boundary_v = interpolated_boundary_v + models(patch_id)%total_vertices = total_vertices + end if + + end if + end do + + ! Pack and upload flat arrays for GPU (AFTER the loop) + block + integer :: pid, max_ntrs + + max_ntrs = 0 + do pid = 1, num_ibs + if (allocated(models(pid)%model)) then + call s_pack_model_for_gpu(models(pid)) + max_ntrs = max(max_ntrs, models(pid)%ntrs) + end if + end do + + if (max_ntrs > 0) then + allocate (gpu_ntrs(1:num_ibs)) + allocate (gpu_trs_v(1:3, 1:3, 1:max_ntrs, 1:num_ibs)) + allocate (gpu_trs_n(1:3, 1:max_ntrs, 1:num_ibs)) + + gpu_ntrs = 0 + gpu_trs_v = 0._wp + gpu_trs_n = 0._wp + + do pid = 1, num_ibs + if (allocated(models(pid)%model)) then + gpu_ntrs(pid) = models(pid)%ntrs + gpu_trs_v(:, :, 1:models(pid)%ntrs, pid) = models(pid)%trs_v + gpu_trs_n(:, 1:models(pid)%ntrs, pid) = models(pid)%trs_n + end if + end do + + $:GPU_ENTER_DATA(copyin='[gpu_ntrs, gpu_trs_v, gpu_trs_n]') + + end if + end block + + end subroutine s_instantiate_STL_models + !> This procedure reads a binary STL file. !! @param filepath Path to the STL file. !! @param model The binary of the STL file. diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index 4617e98ff7..719a9e56cc 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -25,7 +25,7 @@ module m_ib_patches implicit none - private; public :: s_apply_ib_patches, s_update_ib_rotation_matrix, s_instantiate_STL_models, models, f_convert_cyl_to_cart, gpu_ntrs, gpu_trs_v, gpu_trs_n + private; public :: s_apply_ib_patches, s_update_ib_rotation_matrix, f_convert_cyl_to_cart real(wp) :: x_centroid, y_centroid, z_centroid real(wp) :: length_x, length_y, length_z @@ -55,15 +55,6 @@ module m_ib_patches character(len=5) :: istr ! string to store int to string result for error checking - !! array of STL models that can be allocated and then used in IB marker and levelset compute - type(t_model_array), allocatable, target :: models(:) - !! GPU-friendly flat arrays for STL model data - integer, allocatable :: gpu_ntrs(:) - real(wp), allocatable, dimension(:,:,:,:) :: gpu_trs_v - real(wp), allocatable, dimension(:,:,:) :: gpu_trs_n - real(wp), allocatable, dimension(:,:,:) :: gpu_boundary_v - real(wp), allocatable, dimension(:,:,:) :: gpu_interpolated_boundary_v - contains impure subroutine s_apply_ib_patches(ib_markers) @@ -120,169 +111,6 @@ contains end subroutine s_apply_ib_patches - subroutine s_instantiate_STL_models() - - ! Variables for IBM+STL - real(wp) :: normals(1:3) !< Boundary normal buffer - integer :: boundary_vertex_count, boundary_edge_count, total_vertices !< Boundary vertex - real(wp), allocatable, dimension(:, :, :) :: boundary_v !< Boundary vertex buffer - real(wp), allocatable, dimension(:, :) :: interpolated_boundary_v !< Interpolated vertex buffer - real(wp) :: distance !< Levelset distance buffer - logical :: interpolate !< Logical variable to determine whether or not the model should be interpolated - - integer :: i, j, k !< Generic loop iterators - integer :: patch_id - - type(t_bbox) :: bbox, bbox_old - type(t_model) :: model - type(ic_model_parameters) :: params - - real(wp) :: eta - real(wp), dimension(1:3) :: point, model_center - real(wp) :: grid_mm(1:3, 1:2) - - real(wp), dimension(1:4, 1:4) :: transform, transform_n - - do patch_id = 1, num_ibs - if (patch_ib(patch_id)%geometry == 5 .or. patch_ib(patch_id)%geometry == 12) then - allocate(models(patch_id)%model) - print *, " * Reading model: "//trim(patch_ib(patch_id)%model_filepath) - - model = f_model_read(patch_ib(patch_id)%model_filepath) - params%scale(:) = patch_ib(patch_id)%model_scale(:) - params%translate(:) = patch_ib(patch_id)%model_translate(:) - params%rotate(:) = patch_ib(patch_id)%model_rotate(:) - params%spc = patch_ib(patch_id)%model_spc - params%threshold = patch_ib(patch_id)%model_threshold - - if (f_approx_equal(dot_product(params%scale, params%scale), 0._wp)) then - params%scale(:) = 1._wp - end if - - if (proc_rank == 0) then - print *, " * Transforming model." - end if - - ! Get the model center before transforming the model - bbox_old = f_create_bbox(model) - model_center(1:3) = (bbox_old%min(1:3) + bbox_old%max(1:3))/2._wp - - ! Compute the transform matrices for vertices and normals - transform = f_create_transform_matrix(params, model_center) - transform_n = f_create_transform_matrix(params) - - call s_transform_model(model, transform, transform_n) - - ! Recreate the bounding box after transformation - bbox = f_create_bbox(model) - - ! Show the number of vertices in the original STL model - if (proc_rank == 0) then - print *, ' * Number of input model vertices:', 3*model%ntrs - end if - - call f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) - - ! Check if the model needs interpolation - if (p > 0) then - call f_check_interpolation_3D(model, (/dx, dy, dz/), interpolate) - else - call f_check_interpolation_2D(boundary_v, boundary_edge_count, (/minval(dx), minval(dy), 0._wp/), interpolate) - end if - - ! Show the number of edges and boundary edges in 2D STL models - if (proc_rank == 0 .and. p == 0) then - print *, ' * Number of 2D model boundary edges:', boundary_edge_count - end if - - ! Interpolate the STL model along the edges (2D) and on triangle facets (3D) - if (interpolate) then - if (proc_rank == 0) then - print *, ' * Interpolating STL vertices.' - end if - - if (p > 0) then - call f_interpolate_3D(model, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) - else - call f_interpolate_2D(boundary_v, boundary_edge_count, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) - end if - - if (proc_rank == 0) then - print *, ' * Total number of interpolated boundary vertices:', total_vertices - end if - end if - - if (proc_rank == 0) then - write (*, "(A, 3(2X, F20.10))") " > Model: Min:", bbox%min(1:3) - write (*, "(A, 3(2X, F20.10))") " > Cen:", (bbox%min(1:3) + bbox%max(1:3))/2._wp - write (*, "(A, 3(2X, F20.10))") " > Max:", bbox%max(1:3) - - grid_mm(1, :) = (/minval(x_cc(0:m)) - 0.e5_wp*dx(0), maxval(x_cc(0:m)) + 0.e5_wp*dx(m)/) - grid_mm(2, :) = (/minval(y_cc(0:n)) - 0.e5_wp*dy(0), maxval(y_cc(0:n)) + 0.e5_wp*dy(n)/) - - if (p > 0) then - grid_mm(3, :) = (/minval(z_cc(0:p)) - 0.e5_wp*dz(0), maxval(z_cc(0:p)) + 0.e5_wp*dz(p)/) - else - grid_mm(3, :) = (/0._wp, 0._wp/) - end if - - write (*, "(A, 3(2X, F20.10))") " > Domain: Min:", grid_mm(:, 1) - write (*, "(A, 3(2X, F20.10))") " > Cen:", (grid_mm(:, 1) + grid_mm(:, 2))/2._wp - write (*, "(A, 3(2X, F20.10))") " > Max:", grid_mm(:, 2) - end if - - models(patch_id)%model = model - models(patch_id)%boundary_v = boundary_v - models(patch_id)%boundary_edge_count = boundary_edge_count - if (interpolate) then - models(patch_id)%interpolate = 1 - else - models(patch_id)%interpolate = 0 - end if - if (interpolate) then - models(patch_id)%interpolated_boundary_v = interpolated_boundary_v - models(patch_id)%total_vertices = total_vertices - end if - - end if - end do - - ! Pack and upload flat arrays for GPU (AFTER the loop) - block - integer :: pid, max_ntrs - - max_ntrs = 0 - do pid = 1, num_ibs - if (allocated(models(pid)%model)) then - call s_pack_model_for_gpu(models(pid)) - max_ntrs = max(max_ntrs, models(pid)%ntrs) - end if - end do - - if (max_ntrs > 0) then - allocate(gpu_ntrs(1:num_ibs)) - allocate(gpu_trs_v(1:3, 1:3, 1:max_ntrs, 1:num_ibs)) - allocate(gpu_trs_n(1:3, 1:max_ntrs, 1:num_ibs)) - - gpu_ntrs = 0 - gpu_trs_v = 0._wp - gpu_trs_n = 0._wp - - do pid = 1, num_ibs - if (allocated(models(pid)%model)) then - gpu_ntrs(pid) = models(pid)%ntrs - gpu_trs_v(:, :, 1:models(pid)%ntrs, pid) = models(pid)%trs_v - gpu_trs_n(:, 1:models(pid)%ntrs, pid) = models(pid)%trs_n - end if - end do - - $:GPU_ENTER_DATA(copyin='[gpu_ntrs, gpu_trs_v, gpu_trs_n]') - - end if - end block - - end subroutine s_instantiate_STL_models - !> The circular patch is a 2D geometry that may be used, for !! example, in creating a bubble or a droplet. The geometry !! of the patch is well-defined when its centroid and radius @@ -1035,10 +863,10 @@ contains end if eta = f_model_is_inside_flat(gpu_ntrs(patch_id), & - gpu_trs_v, gpu_trs_n, & - patch_id, & - xy_local, (/dx(i), dy(j), 0._wp/), & - spc) + gpu_trs_v, gpu_trs_n, & + patch_id, & + xy_local, (/dx(i), dy(j), 0._wp/), & + spc) ! Reading STL boundary vertices and compute the levelset and levelset_norm if (eta > threshold) then @@ -1093,10 +921,10 @@ contains end if eta = f_model_is_inside_flat(gpu_ntrs(patch_id), & - gpu_trs_v, gpu_trs_n, & - patch_id, & - xyz_local, (/dx(i), dy(j), dz(k)/), & - spc) + gpu_trs_v, gpu_trs_n, & + patch_id, & + xyz_local, (/dx(i), dy(j), dz(k)/), & + spc) ! Reading STL boundary vertices and compute the levelset and levelset_norm if (eta > patch_ib(patch_id)%model_threshold) then diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index 92400123bf..7855a60abf 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -28,6 +28,8 @@ module m_ibm use m_viscous + use m_model + implicit none private :: s_compute_image_points, & diff --git a/src/simulation/m_igr.fpp b/src/simulation/m_igr.fpp index 3f98d0e7e4..fa06ef9092 100644 --- a/src/simulation/m_igr.fpp +++ b/src/simulation/m_igr.fpp @@ -269,7 +269,7 @@ contains subroutine s_igr_iterative_solve(q_cons_vf, bc_type, t_step) #ifdef _CRAYFTN - !DIR$ OPTIMIZE (-haggress) + !DIR$ OPTIMIZE (-haggress) #endif type(scalar_field), dimension(sys_size), intent(inout) :: q_cons_vf type(integer_field), dimension(1:num_dims, 1:2), intent(in) :: bc_type @@ -370,7 +370,7 @@ contains subroutine s_igr_sigma_x(q_cons_vf, rhs_vf) #ifdef _CRAYFTN - !DIR$ OPTIMIZE (-haggress) + !DIR$ OPTIMIZE (-haggress) #endif type(scalar_field), & dimension(sys_size), & @@ -455,7 +455,7 @@ contains subroutine s_igr_riemann_solver(q_cons_vf, rhs_vf, idir) #ifdef _CRAYFTN - !DIR$ OPTIMIZE (-haggress) + !DIR$ OPTIMIZE (-haggress) #endif type(scalar_field), & dimension(sys_size), & @@ -495,9 +495,9 @@ contains #:if MFC_CASE_OPTIMIZATION #:if igr_order == 5 - !DIR$ unroll 6 + !DIR$ unroll 6 #:elif igr_order == 3 - !DIR$ unroll 4 + !DIR$ unroll 4 #:endif #:endif $:GPU_LOOP(parallelism='[seq]') @@ -912,9 +912,9 @@ contains #:if MFC_CASE_OPTIMIZATION #:if igr_order == 5 - !DIR$ unroll 6 + !DIR$ unroll 6 #:elif igr_order == 3 - !DIR$ unroll 4 + !DIR$ unroll 4 #:endif #:endif $:GPU_LOOP(parallelism='[seq]') @@ -1429,9 +1429,9 @@ contains #:if MFC_CASE_OPTIMIZATION #:if igr_order == 5 - !DIR$ unroll 6 + !DIR$ unroll 6 #:elif igr_order == 3 - !DIR$ unroll 4 + !DIR$ unroll 4 #:endif #:endif $:GPU_LOOP(parallelism='[seq]') @@ -1826,9 +1826,9 @@ contains #:if MFC_CASE_OPTIMIZATION #:if igr_order == 5 - !DIR$ unroll 6 + !DIR$ unroll 6 #:elif igr_order == 3 - !DIR$ unroll 4 + !DIR$ unroll 4 #:endif #:endif $:GPU_LOOP(parallelism='[seq]') @@ -2311,9 +2311,9 @@ contains #:if MFC_CASE_OPTIMIZATION #:if igr_order == 5 - !DIR$ unroll 6 + !DIR$ unroll 6 #:elif igr_order == 3 - !DIR$ unroll 4 + !DIR$ unroll 4 #:endif #:endif $:GPU_LOOP(parallelism='[seq]') From 5e586554ed2ea59db34f70d9bc3b0481a0121230 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Thu, 19 Feb 2026 05:29:18 -0500 Subject: [PATCH 14/22] STLs appear to be working on the GPU with NVHPC! --- src/common/m_derived_types.fpp | 4 +- src/common/m_model.fpp | 123 ++++++++++++++++++++++++-- src/simulation/m_compute_levelset.fpp | 39 ++++---- src/simulation/m_ib_patches.fpp | 4 - 4 files changed, 134 insertions(+), 36 deletions(-) diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index cbdb3e2aa4..21ccbf3bbd 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -184,7 +184,7 @@ module m_derived_types end type t_model type :: t_model_array - ! --- Original CPU-side fields (unchanged) --- + ! Original CPU-side fields (unchanged) type(t_model), allocatable :: model real(wp), allocatable, dimension(:, :, :) :: boundary_v real(wp), allocatable, dimension(:, :) :: interpolated_boundary_v @@ -192,7 +192,7 @@ module m_derived_types integer :: total_vertices integer :: interpolate - ! --- GPU-friendly flattened arrays --- + ! GPU-friendly flattened arrays integer :: ntrs ! copy of model%ntrs real(wp), allocatable, dimension(:, :, :) :: trs_v ! (3, 3, ntrs) - triangle vertices real(wp), allocatable, dimension(:, :) :: trs_n ! (3, ntrs) - triangle normals diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index ff8e0d57f9..3e8812485d 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -17,13 +17,15 @@ module m_model private - public :: s_instantiate_STL_models, f_model_read, s_model_write, s_model_free, f_model_is_inside, models, gpu_ntrs, gpu_trs_v, gpu_trs_n + public :: s_instantiate_STL_models, f_model_read, s_model_write, s_model_free, f_model_is_inside, models, gpu_ntrs, & + gpu_trs_v, gpu_trs_n, gpu_boundary_v, gpu_interpolated_boundary_v, gpu_interpolate, gpu_boundary_edge_count, & + gpu_total_vertices ! Subroutines for STL immersed boundaries public :: f_check_boundary, f_register_edge, f_check_interpolation_2D, & f_check_interpolation_3D, f_interpolate_2D, f_interpolate_3D, & f_interpolated_distance, f_normals, f_distance, f_distance_normals_3D, f_tri_area, s_pack_model_for_gpu, & - f_model_is_inside_flat + f_model_is_inside_flat, f_distance_normals_3d_flat !! array of STL models that can be allocated and then used in IB marker and levelset compute type(t_model_array), allocatable, target :: models(:) @@ -31,8 +33,11 @@ module m_model integer, allocatable :: gpu_ntrs(:) real(wp), allocatable, dimension(:, :, :, :) :: gpu_trs_v real(wp), allocatable, dimension(:, :, :) :: gpu_trs_n - real(wp), allocatable, dimension(:, :, :) :: gpu_boundary_v + real(wp), allocatable, dimension(:, :, :, :) :: gpu_boundary_v real(wp), allocatable, dimension(:, :, :) :: gpu_interpolated_boundary_v + integer, allocatable :: gpu_interpolate(:) + integer, allocatable :: gpu_boundary_edge_count(:) + integer, allocatable :: gpu_total_vertices(:) contains @@ -174,34 +179,80 @@ contains ! Pack and upload flat arrays for GPU (AFTER the loop) block integer :: pid, max_ntrs + integer :: max_bv1, max_bv2, max_bv3, max_iv1, max_iv2 max_ntrs = 0 + max_bv1 = 0; max_bv2 = 0; max_bv3 = 0 + max_iv1 = 0; max_iv2 = 0 + do pid = 1, num_ibs if (allocated(models(pid)%model)) then call s_pack_model_for_gpu(models(pid)) max_ntrs = max(max_ntrs, models(pid)%ntrs) end if + if (allocated(models(pid)%boundary_v)) then + max_bv1 = max(max_bv1, size(models(pid)%boundary_v, 1)) + max_bv2 = max(max_bv2, size(models(pid)%boundary_v, 2)) + max_bv3 = max(max_bv3, size(models(pid)%boundary_v, 3)) + end if + if (allocated(models(pid)%interpolated_boundary_v)) then + max_iv1 = max(max_iv1, size(models(pid)%interpolated_boundary_v, 1)) + max_iv2 = max(max_iv2, size(models(pid)%interpolated_boundary_v, 2)) + end if end do if (max_ntrs > 0) then allocate (gpu_ntrs(1:num_ibs)) allocate (gpu_trs_v(1:3, 1:3, 1:max_ntrs, 1:num_ibs)) allocate (gpu_trs_n(1:3, 1:max_ntrs, 1:num_ibs)) + allocate (gpu_interpolate(1:num_ibs)) + allocate (gpu_boundary_edge_count(1:num_ibs)) + allocate (gpu_total_vertices(1:num_ibs)) gpu_ntrs = 0 gpu_trs_v = 0._wp gpu_trs_n = 0._wp + gpu_interpolate = 0 + gpu_boundary_edge_count = 0 + gpu_total_vertices = 0 + + if (max_bv1 > 0) then + allocate (gpu_boundary_v(1:max_bv1, 1:max_bv2, 1:max_bv3, 1:num_ibs)) + gpu_boundary_v = 0._wp + end if + + if (max_iv1 > 0) then + allocate (gpu_interpolated_boundary_v(1:max_iv1, 1:max_iv2, 1:num_ibs)) + gpu_interpolated_boundary_v = 0._wp + end if do pid = 1, num_ibs if (allocated(models(pid)%model)) then gpu_ntrs(pid) = models(pid)%ntrs gpu_trs_v(:, :, 1:models(pid)%ntrs, pid) = models(pid)%trs_v gpu_trs_n(:, 1:models(pid)%ntrs, pid) = models(pid)%trs_n + gpu_interpolate(pid) = models(pid)%interpolate + gpu_boundary_edge_count(pid) = models(pid)%boundary_edge_count + gpu_total_vertices(pid) = models(pid)%total_vertices + end if + if (allocated(models(pid)%boundary_v)) then + gpu_boundary_v(1:size(models(pid)%boundary_v, 1), & + 1:size(models(pid)%boundary_v, 2), & + 1:size(models(pid)%boundary_v, 3), pid) = models(pid)%boundary_v + end if + if (allocated(models(pid)%interpolated_boundary_v)) then + gpu_interpolated_boundary_v(1:size(models(pid)%interpolated_boundary_v, 1), & + 1:size(models(pid)%interpolated_boundary_v, 2), pid) = models(pid)%interpolated_boundary_v end if end do - $:GPU_ENTER_DATA(copyin='[gpu_ntrs, gpu_trs_v, gpu_trs_n]') - + $:GPU_ENTER_DATA(copyin='[gpu_ntrs, gpu_trs_v, gpu_trs_n, gpu_interpolate, gpu_boundary_edge_count, gpu_total_vertices]') + if (allocated(gpu_boundary_v)) then + $:GPU_ENTER_DATA(copyin='[gpu_boundary_v]') + end if + if (allocated(gpu_interpolated_boundary_v)) then + $:GPU_ENTER_DATA(copyin='[gpu_interpolated_boundary_v]') + end if end if end block @@ -731,7 +782,7 @@ contains end if end do - ! if thje ray hits an odd number of triangles on its way out, then + ! if the ray hits an odd number of triangles on its way out, then ! it must be on the inside of the model nInOrOut = nInOrOut + mod(nHits, 2) end do @@ -1371,6 +1422,66 @@ contains end subroutine f_distance_normals_3D + subroutine f_distance_normals_3D_flat(ntrs, trs_v, trs_n, pid, point, normals, distance) + + $:GPU_ROUTINE(parallelism='[seq]') + + integer, intent(in) :: ntrs + real(wp), dimension(:, :, :, :), intent(in) :: trs_v + real(wp), dimension(:, :, :), intent(in) :: trs_n + integer, intent(in) :: pid + real(wp), dimension(1:3), intent(in) :: point + real(wp), dimension(1:3), intent(out) :: normals + real(wp), intent(out) :: distance + + real(wp), dimension(1:3, 1:3) :: tri + real(wp) :: dist_min, dist_t_min + real(wp) :: dist_min_normal, dist_buffer_normal + real(wp), dimension(1:3) :: midp + real(wp), dimension(1:3) :: dist_buffer + integer :: i, j, tri_idx + + dist_min = 1.e12_wp + dist_min_normal = 1.e12_wp + distance = 0._wp + + tri_idx = 0 + do i = 1, ntrs + do j = 1, 3 + tri(j, 1) = trs_v(j, 1, i, pid) + tri(j, 2) = trs_v(j, 2, i, pid) + tri(j, 3) = trs_v(j, 3, i, pid) + dist_buffer(j) = sqrt((point(1) - tri(j, 1))**2 + & + (point(2) - tri(j, 2))**2 + & + (point(3) - tri(j, 3))**2) + end do + + do j = 1, 3 + midp(j) = (tri(1, j) + tri(2, j) + tri(3, j))/3 + end do + + dist_t_min = minval(dist_buffer(1:3)) + dist_buffer_normal = sqrt((point(1) - midp(1))**2 + & + (point(2) - midp(2))**2 + & + (point(3) - midp(3))**2) + + if (dist_t_min < dist_min) then + dist_min = dist_t_min + end if + + if (dist_buffer_normal < dist_min_normal) then + dist_min_normal = dist_buffer_normal + tri_idx = i + end if + end do + + normals(1) = trs_n(1, tri_idx, pid) + normals(2) = trs_n(2, tri_idx, pid) + normals(3) = trs_n(3, tri_idx, pid) + distance = dist_min + + end subroutine f_distance_normals_3D_flat + !> This procedure determines the levelset distance of 2D models without interpolation. !! @param boundary_v Group of all the boundary vertices of the 2D model without interpolation !! @param boundary_vertex_count Output the total number of boundary vertices diff --git a/src/simulation/m_compute_levelset.fpp b/src/simulation/m_compute_levelset.fpp index 1f72b6a37a..263c1bcacc 100644 --- a/src/simulation/m_compute_levelset.fpp +++ b/src/simulation/m_compute_levelset.fpp @@ -50,6 +50,8 @@ contains call s_cylinder_levelset(gps(i)) elseif (patch_geometry == 11) then call s_3d_airfoil_levelset(gps(i)) + elseif (patch_geometry == 12) then + call s_model_levelset(gps(i)) end if end do $:END_GPU_PARALLEL_LOOP() @@ -70,6 +72,8 @@ contains call s_rectangle_levelset(gps(i)) elseif (patch_geometry == 4) then call s_airfoil_levelset(gps(i)) + elseif (patch_geometry == 5) then + call s_model_levelset(gps(i)) elseif (patch_geometry == 6) then call s_ellipse_levelset(gps(i)) end if @@ -79,17 +83,6 @@ contains end if - ! STL models computed on the CPU for now - do i = 1, num_gps - patch_id = gps(i)%ib_patch_id - patch_geometry = patch_ib(patch_id)%geometry - - if (patch_geometry == 5 .or. patch_geometry == 12) then - call s_model_levelset(gps(i)) - $:GPU_UPDATE(device='[gps(i)]') - end if - end do - end subroutine s_apply_levelset subroutine s_circle_levelset(gp) @@ -651,7 +644,7 @@ contains type(ghost_point), intent(inout) :: gp integer :: i, j, k, patch_id, boundary_edge_count, total_vertices - logical :: interpolate + integer :: interpolate real(wp), dimension(1:3) :: center, xyz_local real(wp) :: normals(1:3) !< Boundary normal buffer real(wp) :: distance @@ -663,9 +656,9 @@ contains k = gp%loc(3) ! load in model values - interpolate = models(patch_id)%interpolate - boundary_edge_count = models(patch_id)%boundary_edge_count - total_vertices = models(patch_id)%total_vertices + interpolate = gpu_interpolate(patch_id) + boundary_edge_count = gpu_boundary_edge_count(patch_id) + total_vertices = gpu_total_vertices(patch_id) center = 0._wp if (.not. f_is_default(patch_ib(patch_id)%x_centroid)) center(1) = patch_ib(patch_id)%x_centroid @@ -673,6 +666,7 @@ contains if (p > 0) then if (.not. f_is_default(patch_ib(patch_id)%z_centroid)) center(3) = patch_ib(patch_id)%z_centroid end if + inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) rotation(:, :) = patch_ib(patch_id)%rotation_matrix(:, :) @@ -691,11 +685,11 @@ contains if (p > 0) then ! Get the boundary normals and shortest distance between the cell center and the model boundary - call f_distance_normals_3D(models(patch_id)%model, xyz_local, normals, distance) + call f_distance_normals_3D_flat(gpu_ntrs(patch_id), gpu_trs_v, gpu_trs_n, patch_id, xyz_local, normals, distance) ! Get the shortest distance between the cell center and the interpolated model boundary if (interpolate) then - gp%levelset = f_interpolated_distance(models(patch_id)%interpolated_boundary_v, total_vertices, xyz_local) + gp%levelset = f_interpolated_distance(gpu_interpolated_boundary_v(:, :, patch_id), total_vertices, xyz_local) else gp%levelset = distance end if @@ -707,19 +701,18 @@ contains gp%levelset_norm = matmul(rotation, normals(1:3)) else ! 2D models - if (interpolate) then + if (interpolate == 1) then ! Get the shortest distance between the cell center and the model boundary - gp%levelset = f_interpolated_distance(models(patch_id)%interpolated_boundary_v, total_vertices, xyz_local) + gp%levelset = f_interpolated_distance(gpu_interpolated_boundary_v(:, :, patch_id), total_vertices, xyz_local) ! Change 7 else - ! Get the shortest distance between the cell center and the interpolated model boundary - gp%levelset = f_distance(models(patch_id)%boundary_v, boundary_edge_count, xyz_local) + gp%levelset = f_distance(gpu_boundary_v(:, :, :, patch_id), boundary_edge_count, xyz_local) ! Change 8 end if ! Correct the sign of the levelset gp%levelset = -abs(gp%levelset) ! Get the boundary normals - call f_normals(models(patch_id)%boundary_v, & + call f_normals(gpu_boundary_v(:, :, :, patch_id), & boundary_edge_count, & xyz_local, & normals) @@ -729,8 +722,6 @@ contains end if - ! print gp%levelset, gp%levelset_norm(1), gp%levelset_norm(2), gp%levelset_norm(3) - end subroutine s_model_levelset end module m_compute_levelset diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index 719a9e56cc..cab36bcda7 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -877,8 +877,6 @@ contains end do $:END_GPU_PARALLEL_LOOP() - $:GPU_UPDATE(host='[ib_markers%sf]') - end subroutine s_ib_model !> The STL patch is a 3D geometry that is imported from an STL file. @@ -935,8 +933,6 @@ contains end do end do - $:GPU_UPDATE(host='[ib_markers%sf]') - end subroutine s_ib_3d_model !> Subroutine that computes a rotation matrix for converting to the rotating frame of the boundary From 6cc7accce8b08b447dd41d6f740fbadf25d5bd5b Mon Sep 17 00:00:00 2001 From: Daniel J Vickers Date: Thu, 19 Feb 2026 09:49:48 -0500 Subject: [PATCH 15/22] STLs ran on GPU in 3D! --- src/common/m_model.fpp | 222 +--------------------------- src/simulation/m_ib_patches.fpp | 248 +++++++++++++++++++++++++++++++- 2 files changed, 244 insertions(+), 226 deletions(-) diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index 3e8812485d..272eb2dfea 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -17,9 +17,9 @@ module m_model private - public :: s_instantiate_STL_models, f_model_read, s_model_write, s_model_free, f_model_is_inside, models, gpu_ntrs, & + public :: f_model_read, s_model_write, s_model_free, f_model_is_inside, models, gpu_ntrs, & gpu_trs_v, gpu_trs_n, gpu_boundary_v, gpu_interpolated_boundary_v, gpu_interpolate, gpu_boundary_edge_count, & - gpu_total_vertices + gpu_total_vertices, stl_bounding_boxes ! Subroutines for STL immersed boundaries public :: f_check_boundary, f_register_edge, f_check_interpolation_2D, & @@ -38,226 +38,10 @@ module m_model integer, allocatable :: gpu_interpolate(:) integer, allocatable :: gpu_boundary_edge_count(:) integer, allocatable :: gpu_total_vertices(:) + real(wp), allocatable :: stl_bounding_boxes(:, :, :) contains - subroutine s_instantiate_STL_models() - - ! Variables for IBM+STL - real(wp) :: normals(1:3) !< Boundary normal buffer - integer :: boundary_vertex_count, boundary_edge_count, total_vertices !< Boundary vertex - real(wp), allocatable, dimension(:, :, :) :: boundary_v !< Boundary vertex buffer - real(wp), allocatable, dimension(:, :) :: interpolated_boundary_v !< Interpolated vertex buffer - real(wp) :: dx_local, dy_local, dz_local !< Levelset distance buffer - logical :: interpolate !< Logical variable to determine whether or not the model should be interpolated - - integer :: i, j, k !< Generic loop iterators - integer :: patch_id - - type(t_bbox) :: bbox, bbox_old - type(t_model) :: model - type(ic_model_parameters) :: params - - real(wp) :: eta - real(wp), dimension(1:3) :: point, model_center - real(wp) :: grid_mm(1:3, 1:2) - - real(wp), dimension(1:4, 1:4) :: transform, transform_n - -#ifdef MFC_PRE_PROCESS - dx_local = dx; dy_local = dy - if (p /= 0) dz_local = dz -#else - dx_local = minval(dx); dy_local = minval(dy) - if (p /= 0) dz_local = minval(dz) -#endif - - do patch_id = 1, num_ibs - if (patch_ib(patch_id)%geometry == 5 .or. patch_ib(patch_id)%geometry == 12) then - allocate (models(patch_id)%model) - print *, " * Reading model: "//trim(patch_ib(patch_id)%model_filepath) - - model = f_model_read(patch_ib(patch_id)%model_filepath) - params%scale(:) = patch_ib(patch_id)%model_scale(:) - params%translate(:) = patch_ib(patch_id)%model_translate(:) - params%rotate(:) = patch_ib(patch_id)%model_rotate(:) - params%spc = patch_ib(patch_id)%model_spc - params%threshold = patch_ib(patch_id)%model_threshold - - if (f_approx_equal(dot_product(params%scale, params%scale), 0._wp)) then - params%scale(:) = 1._wp - end if - - if (proc_rank == 0) then - print *, " * Transforming model." - end if - - ! Get the model center before transforming the model - bbox_old = f_create_bbox(model) - model_center(1:3) = (bbox_old%min(1:3) + bbox_old%max(1:3))/2._wp - - ! Compute the transform matrices for vertices and normals - transform = f_create_transform_matrix(params, model_center) - transform_n = f_create_transform_matrix(params) - - call s_transform_model(model, transform, transform_n) - - ! Recreate the bounding box after transformation - bbox = f_create_bbox(model) - - ! Show the number of vertices in the original STL model - if (proc_rank == 0) then - print *, ' * Number of input model vertices:', 3*model%ntrs - end if - - call f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) - - ! Check if the model needs interpolation - if (p > 0) then - call f_check_interpolation_3D(model, (/dx_local, dy_local, dz_local/), interpolate) - else - call f_check_interpolation_2D(boundary_v, boundary_edge_count, (/dx_local, dy_local, 0._wp/), interpolate) - end if - - ! Show the number of edges and boundary edges in 2D STL models - if (proc_rank == 0 .and. p == 0) then - print *, ' * Number of 2D model boundary edges:', boundary_edge_count - end if - - ! Interpolate the STL model along the edges (2D) and on triangle facets (3D) - if (interpolate) then - if (proc_rank == 0) then - print *, ' * Interpolating STL vertices.' - end if - - if (p > 0) then - call f_interpolate_3D(model, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) - else - call f_interpolate_2D(boundary_v, boundary_edge_count, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) - end if - - if (proc_rank == 0) then - print *, ' * Total number of interpolated boundary vertices:', total_vertices - end if - end if - - if (proc_rank == 0) then - write (*, "(A, 3(2X, F20.10))") " > Model: Min:", bbox%min(1:3) - write (*, "(A, 3(2X, F20.10))") " > Cen:", (bbox%min(1:3) + bbox%max(1:3))/2._wp - write (*, "(A, 3(2X, F20.10))") " > Max:", bbox%max(1:3) - - grid_mm(1, :) = (/minval(x_cc(0:m)) - 0.e5_wp*dx_local, maxval(x_cc(0:m)) + 0.e5_wp*dx_local/) - grid_mm(2, :) = (/minval(y_cc(0:n)) - 0.e5_wp*dy_local, maxval(y_cc(0:n)) + 0.e5_wp*dy_local/) - - if (p > 0) then - grid_mm(3, :) = (/minval(z_cc(0:p)) - 0.e5_wp*dz_local, maxval(z_cc(0:p)) + 0.e5_wp*dz_local/) - else - grid_mm(3, :) = (/0._wp, 0._wp/) - end if - - write (*, "(A, 3(2X, F20.10))") " > Domain: Min:", grid_mm(:, 1) - write (*, "(A, 3(2X, F20.10))") " > Cen:", (grid_mm(:, 1) + grid_mm(:, 2))/2._wp - write (*, "(A, 3(2X, F20.10))") " > Max:", grid_mm(:, 2) - end if - - models(patch_id)%model = model - models(patch_id)%boundary_v = boundary_v - models(patch_id)%boundary_edge_count = boundary_edge_count - if (interpolate) then - models(patch_id)%interpolate = 1 - else - models(patch_id)%interpolate = 0 - end if - if (interpolate) then - models(patch_id)%interpolated_boundary_v = interpolated_boundary_v - models(patch_id)%total_vertices = total_vertices - end if - - end if - end do - - ! Pack and upload flat arrays for GPU (AFTER the loop) - block - integer :: pid, max_ntrs - integer :: max_bv1, max_bv2, max_bv3, max_iv1, max_iv2 - - max_ntrs = 0 - max_bv1 = 0; max_bv2 = 0; max_bv3 = 0 - max_iv1 = 0; max_iv2 = 0 - - do pid = 1, num_ibs - if (allocated(models(pid)%model)) then - call s_pack_model_for_gpu(models(pid)) - max_ntrs = max(max_ntrs, models(pid)%ntrs) - end if - if (allocated(models(pid)%boundary_v)) then - max_bv1 = max(max_bv1, size(models(pid)%boundary_v, 1)) - max_bv2 = max(max_bv2, size(models(pid)%boundary_v, 2)) - max_bv3 = max(max_bv3, size(models(pid)%boundary_v, 3)) - end if - if (allocated(models(pid)%interpolated_boundary_v)) then - max_iv1 = max(max_iv1, size(models(pid)%interpolated_boundary_v, 1)) - max_iv2 = max(max_iv2, size(models(pid)%interpolated_boundary_v, 2)) - end if - end do - - if (max_ntrs > 0) then - allocate (gpu_ntrs(1:num_ibs)) - allocate (gpu_trs_v(1:3, 1:3, 1:max_ntrs, 1:num_ibs)) - allocate (gpu_trs_n(1:3, 1:max_ntrs, 1:num_ibs)) - allocate (gpu_interpolate(1:num_ibs)) - allocate (gpu_boundary_edge_count(1:num_ibs)) - allocate (gpu_total_vertices(1:num_ibs)) - - gpu_ntrs = 0 - gpu_trs_v = 0._wp - gpu_trs_n = 0._wp - gpu_interpolate = 0 - gpu_boundary_edge_count = 0 - gpu_total_vertices = 0 - - if (max_bv1 > 0) then - allocate (gpu_boundary_v(1:max_bv1, 1:max_bv2, 1:max_bv3, 1:num_ibs)) - gpu_boundary_v = 0._wp - end if - - if (max_iv1 > 0) then - allocate (gpu_interpolated_boundary_v(1:max_iv1, 1:max_iv2, 1:num_ibs)) - gpu_interpolated_boundary_v = 0._wp - end if - - do pid = 1, num_ibs - if (allocated(models(pid)%model)) then - gpu_ntrs(pid) = models(pid)%ntrs - gpu_trs_v(:, :, 1:models(pid)%ntrs, pid) = models(pid)%trs_v - gpu_trs_n(:, 1:models(pid)%ntrs, pid) = models(pid)%trs_n - gpu_interpolate(pid) = models(pid)%interpolate - gpu_boundary_edge_count(pid) = models(pid)%boundary_edge_count - gpu_total_vertices(pid) = models(pid)%total_vertices - end if - if (allocated(models(pid)%boundary_v)) then - gpu_boundary_v(1:size(models(pid)%boundary_v, 1), & - 1:size(models(pid)%boundary_v, 2), & - 1:size(models(pid)%boundary_v, 3), pid) = models(pid)%boundary_v - end if - if (allocated(models(pid)%interpolated_boundary_v)) then - gpu_interpolated_boundary_v(1:size(models(pid)%interpolated_boundary_v, 1), & - 1:size(models(pid)%interpolated_boundary_v, 2), pid) = models(pid)%interpolated_boundary_v - end if - end do - - $:GPU_ENTER_DATA(copyin='[gpu_ntrs, gpu_trs_v, gpu_trs_n, gpu_interpolate, gpu_boundary_edge_count, gpu_total_vertices]') - if (allocated(gpu_boundary_v)) then - $:GPU_ENTER_DATA(copyin='[gpu_boundary_v]') - end if - if (allocated(gpu_interpolated_boundary_v)) then - $:GPU_ENTER_DATA(copyin='[gpu_interpolated_boundary_v]') - end if - end if - end block - - end subroutine s_instantiate_STL_models - !> This procedure reads a binary STL file. !! @param filepath Path to the STL file. !! @param model The binary of the STL file. diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index cab36bcda7..82405566ec 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -25,7 +25,7 @@ module m_ib_patches implicit none - private; public :: s_apply_ib_patches, s_update_ib_rotation_matrix, f_convert_cyl_to_cart + private; public :: s_apply_ib_patches, s_update_ib_rotation_matrix, f_convert_cyl_to_cart, s_instantiate_STL_models real(wp) :: x_centroid, y_centroid, z_centroid real(wp) :: length_x, length_y, length_z @@ -887,10 +887,10 @@ contains integer, intent(in) :: patch_id type(integer_field), intent(inout) :: ib_markers - integer :: i, j, k !< Generic loop iterators + integer :: i, j, k, il, ir, jl, jr, kl, kr !< Generic loop iterators integer :: spc - real(wp) :: eta, threshold + real(wp) :: eta, threshold, corner_distance real(wp), dimension(1:3) :: point, local_point, offset real(wp), dimension(1:3) :: center, xyz_local real(wp), dimension(1:3, 1:3) :: inverse_rotation @@ -904,11 +904,26 @@ contains spc = patch_ib(patch_id)%model_spc threshold = patch_ib(patch_id)%model_threshold + il = -gp_layers + jl = -gp_layers + kl = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + kr = p + gp_layers + corner_distance = 0._wp + do i = 1, 3 + corner_distance = corner_distance + maxval(abs(stl_bounding_boxes(patch_id, i, 1:3)))**2 ! distance to rim of cylinder + end do + corner_distance = sqrt(corner_distance) + call get_bounding_indices(center(1) - corner_distance, center(1) + corner_distance, x_cc, il, ir) + call get_bounding_indices(center(2) - corner_distance, center(2) + corner_distance, y_cc, jl, jr) + call get_bounding_indices(center(3) - corner_distance, center(3) + corner_distance, z_cc, kl, kr) + $:GPU_PARALLEL_LOOP(private='[i,j,k, xyz_local, eta]',& - & copyin='[patch_id,center,inverse_rotation, offset, spc, threshold]', collapse=2) - do i = -gp_layers, m + gp_layers - do j = -gp_layers, n + gp_layers - do k = -gp_layers, p + gp_layers + & copyin='[patch_id,center,inverse_rotation, offset, spc, threshold]', collapse=3) + do i = il, ir + do j = jl, jr + do k = kl, kr xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(k) - center(3)] xyz_local = matmul(inverse_rotation, xyz_local) @@ -932,9 +947,228 @@ contains end do end do end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_ib_3d_model + subroutine s_instantiate_STL_models() + + ! Variables for IBM+STL + real(wp) :: normals(1:3) !< Boundary normal buffer + integer :: boundary_vertex_count, boundary_edge_count, total_vertices !< Boundary vertex + real(wp), allocatable, dimension(:, :, :) :: boundary_v !< Boundary vertex buffer + real(wp), allocatable, dimension(:, :) :: interpolated_boundary_v !< Interpolated vertex buffer + real(wp) :: dx_local, dy_local, dz_local !< Levelset distance buffer + logical :: interpolate !< Logical variable to determine whether or not the model should be interpolated + + integer :: i, j, k !< Generic loop iterators + integer :: patch_id + + type(t_bbox) :: bbox, bbox_old + type(t_model) :: model + type(ic_model_parameters) :: params + + real(wp) :: eta + real(wp), dimension(1:3) :: point, model_center + real(wp) :: grid_mm(1:3, 1:2) + + real(wp), dimension(1:4, 1:4) :: transform, transform_n + + dx_local = minval(dx); dy_local = minval(dy) + if (p /= 0) dz_local = minval(dz) + + do patch_id = 1, num_ibs + if (patch_ib(patch_id)%geometry == 5 .or. patch_ib(patch_id)%geometry == 12) then + allocate (models(patch_id)%model) + print *, " * Reading model: "//trim(patch_ib(patch_id)%model_filepath) + + model = f_model_read(patch_ib(patch_id)%model_filepath) + params%scale(:) = patch_ib(patch_id)%model_scale(:) + params%translate(:) = patch_ib(patch_id)%model_translate(:) + params%rotate(:) = patch_ib(patch_id)%model_rotate(:) + params%spc = patch_ib(patch_id)%model_spc + params%threshold = patch_ib(patch_id)%model_threshold + + if (f_approx_equal(dot_product(params%scale, params%scale), 0._wp)) then + params%scale(:) = 1._wp + end if + + if (proc_rank == 0) then + print *, " * Transforming model." + end if + + ! Get the model center before transforming the model + bbox_old = f_create_bbox(model) + model_center(1:3) = (bbox_old%min(1:3) + bbox_old%max(1:3))/2._wp + + ! Compute the transform matrices for vertices and normals + transform = f_create_transform_matrix(params, model_center) + transform_n = f_create_transform_matrix(params) + + call s_transform_model(model, transform, transform_n) + + ! Recreate the bounding box after transformation + bbox = f_create_bbox(model) + + ! Show the number of vertices in the original STL model + if (proc_rank == 0) then + print *, ' * Number of input model vertices:', 3*model%ntrs + end if + + call f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) + + ! Check if the model needs interpolation + if (p > 0) then + call f_check_interpolation_3D(model, (/dx_local, dy_local, dz_local/), interpolate) + else + call f_check_interpolation_2D(boundary_v, boundary_edge_count, (/dx_local, dy_local, 0._wp/), interpolate) + end if + interpolate = .false. + + ! Show the number of edges and boundary edges in 2D STL models + if (proc_rank == 0 .and. p == 0) then + print *, ' * Number of 2D model boundary edges:', boundary_edge_count + end if + + ! Interpolate the STL model along the edges (2D) and on triangle facets (3D) + if (interpolate) then + if (proc_rank == 0) then + print *, ' * Interpolating STL vertices.' + end if + + if (p > 0) then + call f_interpolate_3D(model, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) + else + call f_interpolate_2D(boundary_v, boundary_edge_count, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) + end if + + if (proc_rank == 0) then + print *, ' * Total number of interpolated boundary vertices:', total_vertices + end if + end if + + if (proc_rank == 0) then + write (*, "(A, 3(2X, F20.10))") " > Model: Min:", bbox%min(1:3) + write (*, "(A, 3(2X, F20.10))") " > Cen:", (bbox%min(1:3) + bbox%max(1:3))/2._wp + write (*, "(A, 3(2X, F20.10))") " > Max:", bbox%max(1:3) + + grid_mm(1, :) = (/minval(x_cc(0:m)) - 0.5_wp*dx_local, maxval(x_cc(0:m)) + 0.5_wp*dx_local/) + grid_mm(2, :) = (/minval(y_cc(0:n)) - 0.5_wp*dy_local, maxval(y_cc(0:n)) + 0.5_wp*dy_local/) + + if (p > 0) then + grid_mm(3, :) = (/minval(z_cc(0:p)) - 0.5_wp*dz_local, maxval(z_cc(0:p)) + 0.5_wp*dz_local/) + else + grid_mm(3, :) = (/0._wp, 0._wp/) + end if + + write (*, "(A, 3(2X, F20.10))") " > Domain: Min:", grid_mm(:, 1) + write (*, "(A, 3(2X, F20.10))") " > Cen:", (grid_mm(:, 1) + grid_mm(:, 2))/2._wp + write (*, "(A, 3(2X, F20.10))") " > Max:", grid_mm(:, 2) + end if + + allocate (stl_bounding_boxes(patch_id, 1:3, 1:3)) + stl_bounding_boxes(patch_id, 1, 1:3) = [bbox%min(1), (bbox%min(1) + bbox%max(1))/2._wp, bbox%max(1)] + stl_bounding_boxes(patch_id, 2, 1:3) = [bbox%min(2), (bbox%min(2) + bbox%max(2))/2._wp, bbox%max(2)] + stl_bounding_boxes(patch_id, 3, 1:3) = [bbox%min(3), (bbox%min(3) + bbox%max(3))/2._wp, bbox%max(3)] + + models(patch_id)%model = model + models(patch_id)%boundary_v = boundary_v + models(patch_id)%boundary_edge_count = boundary_edge_count + if (interpolate) then + models(patch_id)%interpolate = 1 + else + models(patch_id)%interpolate = 0 + end if + if (interpolate) then + models(patch_id)%interpolated_boundary_v = interpolated_boundary_v + models(patch_id)%total_vertices = total_vertices + end if + + end if + end do + + ! Pack and upload flat arrays for GPU (AFTER the loop) + block + integer :: pid, max_ntrs + integer :: max_bv1, max_bv2, max_bv3, max_iv1, max_iv2 + + max_ntrs = 0 + max_bv1 = 0; max_bv2 = 0; max_bv3 = 0 + max_iv1 = 0; max_iv2 = 0 + + do pid = 1, num_ibs + if (allocated(models(pid)%model)) then + call s_pack_model_for_gpu(models(pid)) + max_ntrs = max(max_ntrs, models(pid)%ntrs) + end if + if (allocated(models(pid)%boundary_v)) then + max_bv1 = max(max_bv1, size(models(pid)%boundary_v, 1)) + max_bv2 = max(max_bv2, size(models(pid)%boundary_v, 2)) + max_bv3 = max(max_bv3, size(models(pid)%boundary_v, 3)) + end if + if (allocated(models(pid)%interpolated_boundary_v)) then + max_iv1 = max(max_iv1, size(models(pid)%interpolated_boundary_v, 1)) + max_iv2 = max(max_iv2, size(models(pid)%interpolated_boundary_v, 2)) + end if + end do + + if (max_ntrs > 0) then + allocate (gpu_ntrs(1:num_ibs)) + allocate (gpu_trs_v(1:3, 1:3, 1:max_ntrs, 1:num_ibs)) + allocate (gpu_trs_n(1:3, 1:max_ntrs, 1:num_ibs)) + allocate (gpu_interpolate(1:num_ibs)) + allocate (gpu_boundary_edge_count(1:num_ibs)) + allocate (gpu_total_vertices(1:num_ibs)) + + gpu_ntrs = 0 + gpu_trs_v = 0._wp + gpu_trs_n = 0._wp + gpu_interpolate = 0 + gpu_boundary_edge_count = 0 + gpu_total_vertices = 0 + + if (max_bv1 > 0) then + allocate (gpu_boundary_v(1:max_bv1, 1:max_bv2, 1:max_bv3, 1:num_ibs)) + gpu_boundary_v = 0._wp + end if + + if (max_iv1 > 0) then + allocate (gpu_interpolated_boundary_v(1:max_iv1, 1:max_iv2, 1:num_ibs)) + gpu_interpolated_boundary_v = 0._wp + end if + + do pid = 1, num_ibs + if (allocated(models(pid)%model)) then + gpu_ntrs(pid) = models(pid)%ntrs + gpu_trs_v(:, :, 1:models(pid)%ntrs, pid) = models(pid)%trs_v + gpu_trs_n(:, 1:models(pid)%ntrs, pid) = models(pid)%trs_n + gpu_interpolate(pid) = models(pid)%interpolate + gpu_boundary_edge_count(pid) = models(pid)%boundary_edge_count + gpu_total_vertices(pid) = models(pid)%total_vertices + end if + if (allocated(models(pid)%boundary_v)) then + gpu_boundary_v(1:size(models(pid)%boundary_v, 1), & + 1:size(models(pid)%boundary_v, 2), & + 1:size(models(pid)%boundary_v, 3), pid) = models(pid)%boundary_v + end if + if (allocated(models(pid)%interpolated_boundary_v)) then + gpu_interpolated_boundary_v(1:size(models(pid)%interpolated_boundary_v, 1), & + 1:size(models(pid)%interpolated_boundary_v, 2), pid) = models(pid)%interpolated_boundary_v + end if + end do + + $:GPU_ENTER_DATA(copyin='[gpu_ntrs, gpu_trs_v, gpu_trs_n, gpu_interpolate, gpu_boundary_edge_count, gpu_total_vertices]') + if (allocated(gpu_boundary_v)) then + $:GPU_ENTER_DATA(copyin='[gpu_boundary_v]') + end if + if (allocated(gpu_interpolated_boundary_v)) then + $:GPU_ENTER_DATA(copyin='[gpu_interpolated_boundary_v]') + end if + end if + end block + + end subroutine s_instantiate_STL_models + !> Subroutine that computes a rotation matrix for converting to the rotating frame of the boundary subroutine s_update_ib_rotation_matrix(patch_id) From 4dc9072a0421edb834ed11725b12433d541aaba3 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Fri, 20 Feb 2026 10:27:10 -0500 Subject: [PATCH 16/22] Missed on comparison that NVHPC allows, but GNU does not --- src/simulation/m_compute_levelset.fpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/simulation/m_compute_levelset.fpp b/src/simulation/m_compute_levelset.fpp index eaae58667b..1c254e0d9f 100644 --- a/src/simulation/m_compute_levelset.fpp +++ b/src/simulation/m_compute_levelset.fpp @@ -691,7 +691,7 @@ contains call f_distance_normals_3D_flat(gpu_ntrs(patch_id), gpu_trs_v, gpu_trs_n, patch_id, xyz_local, normals, distance) ! Get the shortest distance between the cell center and the interpolated model boundary - if (interpolate) then + if (interpolate == 1) then gp%levelset = f_interpolated_distance(gpu_interpolated_boundary_v(:, :, patch_id), total_vertices, xyz_local) else gp%levelset = distance From 0fd5492941bd02ebb3c9df792a32d0312104cade Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Fri, 20 Feb 2026 11:46:11 -0500 Subject: [PATCH 17/22] Resolved issues with GPU arrays allocation for 3D STLs --- src/simulation/m_ib_patches.fpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index 7d0256e414..5aa302b32b 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -1028,7 +1028,7 @@ contains else call f_check_interpolation_2D(boundary_v, boundary_edge_count, (/dx_local, dy_local, 0._wp/), interpolate) end if - interpolate = .false. + print *, interpolate ! Show the number of edges and boundary edges in 2D STL models if (proc_rank == 0 .and. p == 0) then @@ -1151,12 +1151,13 @@ contains gpu_boundary_edge_count(pid) = models(pid)%boundary_edge_count gpu_total_vertices(pid) = models(pid)%total_vertices end if - if (allocated(models(pid)%boundary_v)) then + if (allocated(models(pid)%boundary_v) .and. p == 0) then + print *, size(models(pid)%boundary_v, 1), size(models(pid)%boundary_v, 2), size(models(pid)%boundary_v, 3) gpu_boundary_v(1:size(models(pid)%boundary_v, 1), & 1:size(models(pid)%boundary_v, 2), & 1:size(models(pid)%boundary_v, 3), pid) = models(pid)%boundary_v end if - if (allocated(models(pid)%interpolated_boundary_v)) then + if (allocated(models(pid)%interpolated_boundary_v) .and. gpu_interpolate(pid) == 1) then gpu_interpolated_boundary_v(1:size(models(pid)%interpolated_boundary_v, 1), & 1:size(models(pid)%interpolated_boundary_v, 2), pid) = models(pid)%interpolated_boundary_v end if From 6edad1b9ceb8f4f5227c1913245fce7399005bc3 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Fri, 20 Feb 2026 14:37:17 -0500 Subject: [PATCH 18/22] Finished GPU implementation of all subroutines. Final check before regenerating golden file --- src/common/m_model.fpp | 177 ++++++++++++++++---------- src/simulation/m_compute_levelset.fpp | 1 - src/simulation/m_ib_patches.fpp | 7 +- 3 files changed, 115 insertions(+), 70 deletions(-) diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index 443f6f399e..54704c7551 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -734,6 +734,7 @@ contains end do ! Check all edges and count repeated edges + $:GPU_PARALLEL_LOOP(private='[i,j]', copy='[temp_boundary_v,edge_occurrence]', copyin='[threshold_edge_zero]', collapse=2) do i = 1, edge_count do j = 1, edge_count if (i /= j) then @@ -746,11 +747,13 @@ contains (abs(temp_boundary_v(i, 2, 1) - temp_boundary_v(j, 1, 1)) < threshold_edge_zero) .and. & (abs(temp_boundary_v(i, 2, 2) - temp_boundary_v(j, 1, 2)) < threshold_edge_zero))) then + $:GPU_ATOMIC(atomic='update') edge_occurrence(i) = edge_occurrence(i) + 1 end if end if end do end do + $:END_GPU_PARALLEL_LOOP() ! Count the number of boundary vertices/edges boundary_vertex_count = 0 @@ -818,7 +821,7 @@ contains end subroutine f_register_edge - !> This procedure check if interpolates is needed for 2D models. + !> This procedure check if interpolation is needed for 2D models. !! @param boundary_v Temporary edge end vertex buffer !! @param spacing Dimensions of the current levelset cell subroutine f_check_interpolation_2D(boundary_v, boundary_edge_count, spacing, interpolate) @@ -830,24 +833,29 @@ contains real(wp) :: l1, cell_width !< Length of each boundary edge and cell width integer :: j !< Boundary edge index iterator + integer :: interpolate_integer !< integer form of interpolate logical to support GPU checking cell_width = minval(spacing(1:2)) - interpolate = .false. + interpolate_integer = 0 + $:GPU_PARALLEL_LOOP(private='[j,l1]', copyin='[boundary_v,cell_width,Ifactor_2D]', copy='[interpolate_integer]') do j = 1, boundary_edge_count - l1 = sqrt((boundary_v(j, 2, 1) - boundary_v(j, 1, 1))**2 + & - (boundary_v(j, 2, 2) - boundary_v(j, 1, 2))**2) + if (interpolate_integer == 0) then + l1 = sqrt((boundary_v(j, 2, 1) - boundary_v(j, 1, 1))**2 + & + (boundary_v(j, 2, 2) - boundary_v(j, 1, 2))**2) - if ((l1 > cell_width)) then - interpolate = .true. - else - interpolate = .false. + if ((l1*Ifactor_2D > cell_width)) then + interpolate_integer = 1 + end if end if end do + $:END_GPU_PARALLEL_LOOP() + + interpolate = (interpolate_integer == 1) end subroutine f_check_interpolation_2D - !> This procedure check if interpolates is needed for 3D models. + !> This procedure check if interpolation is needed for 3D models. !! @param model Model to search in. !! @param spacing Dimensions of the current levelset cell !! @param interpolate Logical output @@ -858,37 +866,45 @@ contains real(wp), dimension(1:3), intent(in) :: spacing real(wp), dimension(1:3) :: edge_l real(wp) :: cell_width - real(wp), dimension(1:3, 1:3) :: tri_v - integer :: i, j !< Loop iterator + real(wp), dimension(1:model%ntrs, 1:3, 1:3) :: tri_v + integer :: i, j, interpolate_integer !< Loop iterator cell_width = minval(spacing) - interpolate = .false. + interpolate_integer = 0 + ! load up the array of triangles for GPU packing do i = 1, model%ntrs do j = 1, 3 - tri_v(1, j) = model%trs(i)%v(1, j) - tri_v(2, j) = model%trs(i)%v(2, j) - tri_v(3, j) = model%trs(i)%v(3, j) + tri_v(i, 1, j) = model%trs(i)%v(1, j) + tri_v(i, 2, j) = model%trs(i)%v(2, j) + tri_v(i, 3, j) = model%trs(i)%v(3, j) end do + end do - edge_l(1) = sqrt((tri_v(1, 2) - tri_v(1, 1))**2 + & - (tri_v(2, 2) - tri_v(2, 1))**2 + & - (tri_v(3, 2) - tri_v(3, 1))**2) - edge_l(2) = sqrt((tri_v(1, 3) - tri_v(1, 2))**2 + & - (tri_v(2, 3) - tri_v(2, 2))**2 + & - (tri_v(3, 3) - tri_v(3, 2))**2) - edge_l(3) = sqrt((tri_v(1, 1) - tri_v(1, 3))**2 + & - (tri_v(2, 1) - tri_v(2, 3))**2 + & - (tri_v(3, 1) - tri_v(3, 3))**2) - - if ((edge_l(1) > cell_width) .or. & - (edge_l(2) > cell_width) .or. & - (edge_l(3) > cell_width)) then - interpolate = .true. - else - interpolate = .false. + ! compare the side of all + $:GPU_PARALLEL_LOOP(private='[i,edge_l]', copyin='[cell_width,tri_v,Ifactor_3D]', copy='[interpolate_integer]') + do i = 1, model%ntrs + if (interpolate_integer == 0) then + edge_l(1) = sqrt((tri_v(i, 1, 2) - tri_v(i, 1, 1))**2 + & + (tri_v(i, 2, 2) - tri_v(i, 2, 1))**2 + & + (tri_v(i, 3, 2) - tri_v(i, 3, 1))**2) + edge_l(2) = sqrt((tri_v(i, 1, 3) - tri_v(i, 1, 2))**2 + & + (tri_v(i, 2, 3) - tri_v(i, 2, 2))**2 + & + (tri_v(i, 3, 3) - tri_v(i, 3, 2))**2) + edge_l(3) = sqrt((tri_v(i, 1, 1) - tri_v(i, 1, 3))**2 + & + (tri_v(i, 2, 1) - tri_v(i, 2, 3))**2 + & + (tri_v(i, 3, 1) - tri_v(i, 3, 3))**2) + + if ((edge_l(1) > cell_width*Ifactor_3D) .or. & + (edge_l(2) > cell_width*Ifactor_3D) .or. & + (edge_l(3) > cell_width*Ifactor_3D)) then + interpolate_integer = 1 + end if end if end do + $:END_GPU_PARALLEL_LOOP() + + interpolate = (interpolate_integer == 1) end subroutine f_check_interpolation_3D @@ -905,7 +921,7 @@ contains real(wp), allocatable, intent(inout), dimension(:, :) :: interpolated_boundary_v integer, intent(inout) :: total_vertices, boundary_edge_count - integer :: num_segments + integer :: num_segments, vertex_idx integer :: i, j real(wp) :: edge_length, cell_width @@ -917,6 +933,7 @@ contains ! First pass: Calculate the total number of vertices including interpolated ones total_vertices = 1 + $:GPU_PARALLEL_LOOP(private='[i,edge_x,edge_y,edge_length,num_segments]', copyin='[Ifactor_2D]', copy='[total_vertices]') do i = 1, boundary_edge_count ! Get the coordinates of the two ends of the current edge edge_x(1) = boundary_v(i, 1, 1) @@ -936,14 +953,17 @@ contains end if ! Each edge contributes num_segments vertices + $:GPU_ATOMIC(atomic='update') total_vertices = total_vertices + num_segments end do + $:END_GPU_PARALLEL_LOOP() ! Allocate memory for the new boundary vertices array allocate (interpolated_boundary_v(1:total_vertices, 1:3)) ! Fill the new boundary vertices array with original and interpolated vertices total_vertices = 1 + $:GPU_PARALLEL_LOOP(private='[i,edge_x,edge_y,edge_length,num_segments,edge_del,vertex_idx]', copyin='[Ifactor_2D]', copy='[total_vertices,interpolated_boundary_v]') do i = 1, boundary_edge_count ! Get the coordinates of the two ends of the current edge edge_x(1) = boundary_v(i, 1, 1) @@ -972,18 +992,25 @@ contains ! Add original and interpolated vertices to the output array do j = 1, num_segments - 1 + $:GPU_ATOMIC(atomic='capture') total_vertices = total_vertices + 1 - interpolated_boundary_v(total_vertices, 1) = edge_x(1) + j*edge_del(1) - interpolated_boundary_v(total_vertices, 2) = edge_y(1) + j*edge_del(2) + vertex_idx = total_vertices + $:END_GPU_ATOMIC_CAPTURE() + interpolated_boundary_v(vertex_idx, 1) = edge_x(1) + j*edge_del(1) + interpolated_boundary_v(vertex_idx, 2) = edge_y(1) + j*edge_del(2) end do ! Add the last vertex of the edge if (num_segments > 0) then + $:GPU_ATOMIC(atomic='capture') total_vertices = total_vertices + 1 - interpolated_boundary_v(total_vertices, 1) = edge_x(2) - interpolated_boundary_v(total_vertices, 2) = edge_y(2) + vertex_idx = total_vertices + $:END_GPU_ATOMIC_CAPTURE() + interpolated_boundary_v(vertex_idx, 1) = edge_x(2) + interpolated_boundary_v(vertex_idx, 2) = edge_y(2) end if end do + $:END_GPU_PARALLEL_LOOP() end subroutine f_interpolate_2D @@ -998,12 +1025,16 @@ contains real(wp), allocatable, intent(inout), dimension(:, :) :: interpolated_boundary_v integer, intent(out) :: total_vertices - integer :: i, j, k, num_triangles, num_segments, num_inner_vertices + integer :: i, j, k, num_triangles, num_segments, num_inner_vertices, vertex_idx real(wp), dimension(1:3, 1:3) :: tri real(wp), dimension(1:3) :: edge_del, cell_area real(wp), dimension(1:3) :: bary_coord !< Barycentric coordinates real(wp) :: edge_length, cell_width, cell_area_min, tri_area + ! GPU-friendly flat arrays packed from model + real(wp), allocatable, dimension(:, :, :) :: flat_v ! (3, 3, num_triangles) + real(wp), allocatable, dimension(:, :) :: flat_n ! (3, num_triangles) + ! Number of triangles in the model num_triangles = model%ntrs cell_width = minval(spacing) @@ -1015,18 +1046,23 @@ contains cell_area_min = minval(cell_area) num_inner_vertices = 0 + ! Pack model into flat arrays on CPU + allocate (flat_v(1:3, 1:3, 1:num_triangles)) + allocate (flat_n(1:3, 1:num_triangles)) + do i = 1, num_triangles + flat_v(:, :, i) = model%trs(i)%v(:, :) + flat_n(:, i) = model%trs(i)%n(:) + end do + ! Calculate the total number of vertices including interpolated ones total_vertices = 0 + $:GPU_PARALLEL_LOOP(private='[i,j,k,tri,edge_length,num_segments,num_inner_vertices]', copyin='[Ifactor_3D,Ifactor_bary_3D,flat_v,flat_n]', copy='[total_vertices]', collapse=1) do i = 1, num_triangles do j = 1, 3 ! Get the coordinates of the two vertices of the current edge - tri(1, 1) = model%trs(i)%v(j, 1) - tri(1, 2) = model%trs(i)%v(j, 2) - tri(1, 3) = model%trs(i)%v(j, 3) + tri(1, :) = flat_v(j, :, i) ! Next vertex in the triangle (cyclic) - tri(2, 1) = model%trs(i)%v(mod(j, 3) + 1, 1) - tri(2, 2) = model%trs(i)%v(mod(j, 3) + 1, 2) - tri(2, 3) = model%trs(i)%v(mod(j, 3) + 1, 3) + tri(2, :) = flat_v(mod(j, 3) + 1, :, i) ! Compute the length of the edge edge_length = sqrt((tri(2, 1) - tri(1, 1))**2 + & @@ -1041,39 +1077,35 @@ contains end if ! Each edge contributes num_segments vertices + $:GPU_ATOMIC(atomic='update') total_vertices = total_vertices + num_segments + 1 end do ! Add vertices inside the triangle do k = 1, 3 - tri(k, 1) = model%trs(i)%v(k, 1) - tri(k, 2) = model%trs(i)%v(k, 2) - tri(k, 3) = model%trs(i)%v(k, 3) + tri(k, :) = flat_v(k, :, i) end do call f_tri_area(tri, tri_area) if (tri_area > threshold_bary*cell_area_min) then num_inner_vertices = Ifactor_bary_3D*ceiling(tri_area/cell_area_min) + $:GPU_ATOMIC(atomic='update') total_vertices = total_vertices + num_inner_vertices end if end do + $:END_GPU_PARALLEL_LOOP() ! Allocate memory for the new boundary vertices array allocate (interpolated_boundary_v(1:total_vertices, 1:3)) ! Fill the new boundary vertices array with original and interpolated vertices total_vertices = 0 + $:GPU_PARALLEL_LOOP(private='[i,j,k,tri,edge_length,num_segments,num_inner_vertices,edge_del,bary_coord,vertex_idx]', copyin='[Ifactor_3D,Ifactor_bary_3D]', copy='[total_vertices]', collapse=1) do i = 1, num_triangles ! Loop through the 3 edges of each triangle do j = 1, 3 - ! Get the coordinates of the two vertices of the current edge - tri(1, 1) = model%trs(i)%v(j, 1) - tri(1, 2) = model%trs(i)%v(j, 2) - tri(1, 3) = model%trs(i)%v(j, 3) - ! Next vertex in the triangle (cyclic) - tri(2, 1) = model%trs(i)%v(mod(j, 3) + 1, 1) - tri(2, 2) = model%trs(i)%v(mod(j, 3) + 1, 2) - tri(2, 3) = model%trs(i)%v(mod(j, 3) + 1, 3) + tri(1, :) = flat_v(j, :, i) + tri(2, :) = flat_v(mod(j, 3) + 1, :, i) ! Compute the length of the edge edge_length = sqrt((tri(2, 1) - tri(1, 1))**2 + & @@ -1093,33 +1125,42 @@ contains ! Add original and interpolated vertices to the output array do k = 0, num_segments - 1 + $:GPU_ATOMIC(atomic='capture') total_vertices = total_vertices + 1 - interpolated_boundary_v(total_vertices, 1) = tri(1, 1) + k*edge_del(1) - interpolated_boundary_v(total_vertices, 2) = tri(1, 2) + k*edge_del(2) - interpolated_boundary_v(total_vertices, 3) = tri(1, 3) + k*edge_del(3) + vertex_idx = total_vertices + $:END_GPU_ATOMIC_CAPTURE() + interpolated_boundary_v(vertex_idx, 1) = tri(1, 1) + k*edge_del(1) + interpolated_boundary_v(vertex_idx, 2) = tri(1, 2) + k*edge_del(2) + interpolated_boundary_v(vertex_idx, 3) = tri(1, 3) + k*edge_del(3) end do ! Add the last vertex of the edge + $:GPU_ATOMIC(atomic='capture') total_vertices = total_vertices + 1 - interpolated_boundary_v(total_vertices, 1) = tri(2, 1) - interpolated_boundary_v(total_vertices, 2) = tri(2, 2) - interpolated_boundary_v(total_vertices, 3) = tri(2, 3) + vertex_idx = total_vertices + $:END_GPU_ATOMIC_CAPTURE() + interpolated_boundary_v(vertex_idx, 1) = tri(2, 1) + interpolated_boundary_v(vertex_idx, 2) = tri(2, 2) + interpolated_boundary_v(vertex_idx, 3) = tri(2, 3) end do ! Interpolate verties that are not on edges do k = 1, 3 - tri(k, 1) = model%trs(i)%v(k, 1) - tri(k, 2) = model%trs(i)%v(k, 2) - tri(k, 3) = model%trs(i)%v(k, 3) + tri(k, :) = flat_v(k, :, i) end do call f_tri_area(tri, tri_area) if (tri_area > threshold_bary*cell_area_min) then num_inner_vertices = Ifactor_bary_3D*ceiling(tri_area/cell_area_min) !Use barycentric coordinates for randomly distributed points + + ! Deterministic seed per triangle + rand_seed = i*73856093 + 19349663 + if (rand_seed == 0) rand_seed = 1 + do k = 1, num_inner_vertices - call random_number(bary_coord(1)) - call random_number(bary_coord(2)) + bary_coord(1) = f_model_random_number(rand_seed) + bary_coord(2) = f_model_random_number(rand_seed) if ((bary_coord(1) + bary_coord(2)) >= 1._wp) then bary_coord(1) = 1._wp - bary_coord(1) @@ -1127,6 +1168,7 @@ contains end if bary_coord(3) = 1._wp - bary_coord(1) - bary_coord(2) + $:GPU_ATOMIC(atomic='update') total_vertices = total_vertices + 1 interpolated_boundary_v(total_vertices, 1) = dot_product(bary_coord, tri(1:3, 1)) interpolated_boundary_v(total_vertices, 2) = dot_product(bary_coord, tri(1:3, 2)) @@ -1134,6 +1176,9 @@ contains end do end if end do + $:END_GPU_PARALLEL_LOOP() + + deallocate (flat_v, flat_n) end subroutine f_interpolate_3D @@ -1344,6 +1389,8 @@ contains real(wp), dimension(1:3) :: AB, AC, cross integer :: i !< Loop iterator + $:GPU_ROUTINE(parallelism='[seq]') + do i = 1, 3 AB(i) = tri(2, i) - tri(1, i) AC(i) = tri(3, i) - tri(1, i) diff --git a/src/simulation/m_compute_levelset.fpp b/src/simulation/m_compute_levelset.fpp index 1c254e0d9f..8b04f66b11 100644 --- a/src/simulation/m_compute_levelset.fpp +++ b/src/simulation/m_compute_levelset.fpp @@ -722,7 +722,6 @@ contains ! Assign the levelset_norm gp%levelset_norm = matmul(rotation, normals(1:3)) - end if end subroutine s_model_levelset diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index 5aa302b32b..050bfdb35b 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -1020,7 +1020,8 @@ contains print *, ' * Number of input model vertices:', 3*model%ntrs end if - call f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) + ! Need the cells that form the boundary of the flat projection in 2D + if (p == 0) call f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) ! Check if the model needs interpolation if (p > 0) then @@ -1028,7 +1029,6 @@ contains else call f_check_interpolation_2D(boundary_v, boundary_edge_count, (/dx_local, dy_local, 0._wp/), interpolate) end if - print *, interpolate ! Show the number of edges and boundary edges in 2D STL models if (proc_rank == 0 .and. p == 0) then @@ -1044,7 +1044,7 @@ contains if (p > 0) then call f_interpolate_3D(model, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) else - call f_interpolate_2D(boundary_v, boundary_edge_count, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) + call f_interpolate_2D(boundary_v, boundary_edge_count, (/dx, dy, 0._wp/), interpolated_boundary_v, total_vertices) end if if (proc_rank == 0) then @@ -1152,7 +1152,6 @@ contains gpu_total_vertices(pid) = models(pid)%total_vertices end if if (allocated(models(pid)%boundary_v) .and. p == 0) then - print *, size(models(pid)%boundary_v, 1), size(models(pid)%boundary_v, 2), size(models(pid)%boundary_v, 3) gpu_boundary_v(1:size(models(pid)%boundary_v, 1), & 1:size(models(pid)%boundary_v, 2), & 1:size(models(pid)%boundary_v, 3), pid) = models(pid)%boundary_v From 1e5326a6430cc1cc822209fa0f526c1b22e52f5b Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Sat, 21 Feb 2026 00:24:09 -0500 Subject: [PATCH 19/22] Refactored interpolation out of the code and replaced it with a projection-based distance method that is more accurate, faster, and simpler. Also removed some redundant subroutine calls from ICPP patches to better align it with IB patches --- src/common/m_model.fpp | 760 ++++++-------------------- src/pre_process/m_icpp_patches.fpp | 25 - src/simulation/m_compute_levelset.fpp | 38 +- src/simulation/m_ib_patches.fpp | 57 +- src/simulation/m_ibm.fpp | 13 - src/simulation/m_mpi_proxy.fpp | 185 ------- 6 files changed, 186 insertions(+), 892 deletions(-) diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index 54704c7551..b474fec80e 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -19,14 +19,12 @@ module m_model private public :: f_model_read, s_model_write, s_model_free, f_model_is_inside, models, gpu_ntrs, & - gpu_trs_v, gpu_trs_n, gpu_boundary_v, gpu_interpolated_boundary_v, gpu_interpolate, gpu_boundary_edge_count, & + gpu_trs_v, gpu_trs_n, gpu_boundary_v, gpu_boundary_edge_count, & gpu_total_vertices, stl_bounding_boxes ! Subroutines for STL immersed boundaries - public :: f_check_boundary, f_register_edge, f_check_interpolation_2D, & - f_check_interpolation_3D, f_interpolate_2D, f_interpolate_3D, & - f_interpolated_distance, f_normals, f_distance, f_distance_normals_3D, f_tri_area, s_pack_model_for_gpu, & - f_model_is_inside_flat, f_distance_normals_3d_flat + public :: f_check_boundary, f_register_edge, f_model_is_inside_flat, & + f_distance_normals_3D, f_distance_normals_2D, s_pack_model_for_gpu !! array of STL models that can be allocated and then used in IB marker and levelset compute type(t_model_array), allocatable, target :: models(:) @@ -35,8 +33,6 @@ module m_model real(wp), allocatable, dimension(:, :, :, :) :: gpu_trs_v real(wp), allocatable, dimension(:, :, :) :: gpu_trs_n real(wp), allocatable, dimension(:, :, :, :) :: gpu_boundary_v - real(wp), allocatable, dimension(:, :, :) :: gpu_interpolated_boundary_v - integer, allocatable :: gpu_interpolate(:) integer, allocatable :: gpu_boundary_edge_count(:) integer, allocatable :: gpu_total_vertices(:) real(wp), allocatable :: stl_bounding_boxes(:, :, :) @@ -56,7 +52,7 @@ contains character(kind=c_char, len=80) :: header integer(kind=c_int32_t) :: nTriangles - real(kind=c_float) :: normal(3), v(3, 3) + real(kind=c_float) :: normal(3), v(3, 3), v_norm integer(kind=c_int16_t) :: attribute open (newunit=iunit, file=filepath, action='READ', & @@ -86,6 +82,8 @@ contains model%trs(i)%v = v model%trs(i)%n = normal + v_norm = sqrt(normal(1)**2 + normal(2)**2 + normal(3)**2) + if (v_norm > 0._wp) model%trs(i)%n = normal/v_norm end do close (iunit) @@ -102,6 +100,7 @@ contains integer :: i, j, iunit, iostat character(80) :: line, buffered_line logical :: is_buffered + real(wp) :: normal(3), v_norm is_buffered = .false. @@ -150,7 +149,9 @@ contains end if call s_skip_ignored_lines(iunit, buffered_line, is_buffered) - read (line(13:), *) model%trs(i)%n + read (line(13:), *) normal + v_norm = sqrt(normal(1)**2 + normal(2)**2 + normal(3)**2) + if (v_norm > 0._wp) model%trs(i)%n = normal/v_norm call s_skip_ignored_lines(iunit, buffered_line, is_buffered) if (is_buffered) then @@ -821,432 +822,16 @@ contains end subroutine f_register_edge - !> This procedure check if interpolation is needed for 2D models. - !! @param boundary_v Temporary edge end vertex buffer - !! @param spacing Dimensions of the current levelset cell - subroutine f_check_interpolation_2D(boundary_v, boundary_edge_count, spacing, interpolate) - - logical, intent(inout) :: interpolate !< Logical indicator of interpolation - integer, intent(in) :: boundary_edge_count !< Number of boundary edges - real(wp), intent(in), dimension(1:boundary_edge_count, 1:3, 1:2) :: boundary_v - real(wp), dimension(1:3), intent(in) :: spacing - - real(wp) :: l1, cell_width !< Length of each boundary edge and cell width - integer :: j !< Boundary edge index iterator - integer :: interpolate_integer !< integer form of interpolate logical to support GPU checking - - cell_width = minval(spacing(1:2)) - interpolate_integer = 0 - - $:GPU_PARALLEL_LOOP(private='[j,l1]', copyin='[boundary_v,cell_width,Ifactor_2D]', copy='[interpolate_integer]') - do j = 1, boundary_edge_count - if (interpolate_integer == 0) then - l1 = sqrt((boundary_v(j, 2, 1) - boundary_v(j, 1, 1))**2 + & - (boundary_v(j, 2, 2) - boundary_v(j, 1, 2))**2) - - if ((l1*Ifactor_2D > cell_width)) then - interpolate_integer = 1 - end if - end if - end do - $:END_GPU_PARALLEL_LOOP() - - interpolate = (interpolate_integer == 1) - - end subroutine f_check_interpolation_2D - - !> This procedure check if interpolation is needed for 3D models. - !! @param model Model to search in. - !! @param spacing Dimensions of the current levelset cell - !! @param interpolate Logical output - subroutine f_check_interpolation_3D(model, spacing, interpolate) - - logical, intent(inout) :: interpolate - type(t_model), intent(in) :: model - real(wp), dimension(1:3), intent(in) :: spacing - real(wp), dimension(1:3) :: edge_l - real(wp) :: cell_width - real(wp), dimension(1:model%ntrs, 1:3, 1:3) :: tri_v - integer :: i, j, interpolate_integer !< Loop iterator - - cell_width = minval(spacing) - interpolate_integer = 0 - - ! load up the array of triangles for GPU packing - do i = 1, model%ntrs - do j = 1, 3 - tri_v(i, 1, j) = model%trs(i)%v(1, j) - tri_v(i, 2, j) = model%trs(i)%v(2, j) - tri_v(i, 3, j) = model%trs(i)%v(3, j) - end do - end do - - ! compare the side of all - $:GPU_PARALLEL_LOOP(private='[i,edge_l]', copyin='[cell_width,tri_v,Ifactor_3D]', copy='[interpolate_integer]') - do i = 1, model%ntrs - if (interpolate_integer == 0) then - edge_l(1) = sqrt((tri_v(i, 1, 2) - tri_v(i, 1, 1))**2 + & - (tri_v(i, 2, 2) - tri_v(i, 2, 1))**2 + & - (tri_v(i, 3, 2) - tri_v(i, 3, 1))**2) - edge_l(2) = sqrt((tri_v(i, 1, 3) - tri_v(i, 1, 2))**2 + & - (tri_v(i, 2, 3) - tri_v(i, 2, 2))**2 + & - (tri_v(i, 3, 3) - tri_v(i, 3, 2))**2) - edge_l(3) = sqrt((tri_v(i, 1, 1) - tri_v(i, 1, 3))**2 + & - (tri_v(i, 2, 1) - tri_v(i, 2, 3))**2 + & - (tri_v(i, 3, 1) - tri_v(i, 3, 3))**2) - - if ((edge_l(1) > cell_width*Ifactor_3D) .or. & - (edge_l(2) > cell_width*Ifactor_3D) .or. & - (edge_l(3) > cell_width*Ifactor_3D)) then - interpolate_integer = 1 - end if - end if - end do - $:END_GPU_PARALLEL_LOOP() - - interpolate = (interpolate_integer == 1) - - end subroutine f_check_interpolation_3D - - !> This procedure interpolates 2D models. - !! @param boundary_v Group of all the boundary vertices of the 2D model without interpolation - !! @param boundary_edge_count Output total number of boundary edges - !! @param spacing Dimensions of the current levelset cell - !! @param interpolated_boundary_v Output all the boundary vertices of the interpolated 2D model - !! @param total_vertices Total number of vertices after interpolation - subroutine f_interpolate_2D(boundary_v, boundary_edge_count, spacing, interpolated_boundary_v, total_vertices) - - real(wp), intent(in), dimension(:, :, :) :: boundary_v - real(wp), dimension(1:3), intent(in) :: spacing - real(wp), allocatable, intent(inout), dimension(:, :) :: interpolated_boundary_v - - integer, intent(inout) :: total_vertices, boundary_edge_count - integer :: num_segments, vertex_idx - integer :: i, j - - real(wp) :: edge_length, cell_width - real(wp), dimension(1:2) :: edge_x, edge_y, edge_del - - ! Get the number of boundary edges - cell_width = minval(spacing(1:2)) - num_segments = 0 - - ! First pass: Calculate the total number of vertices including interpolated ones - total_vertices = 1 - $:GPU_PARALLEL_LOOP(private='[i,edge_x,edge_y,edge_length,num_segments]', copyin='[Ifactor_2D]', copy='[total_vertices]') - do i = 1, boundary_edge_count - ! Get the coordinates of the two ends of the current edge - edge_x(1) = boundary_v(i, 1, 1) - edge_y(1) = boundary_v(i, 1, 2) - edge_x(2) = boundary_v(i, 2, 1) - edge_y(2) = boundary_v(i, 2, 2) - - ! Compute the length of the edge - edge_length = sqrt((edge_x(2) - edge_x(1))**2 + & - (edge_y(2) - edge_y(1))**2) - - ! Determine the number of segments - if (edge_length > cell_width) then - num_segments = Ifactor_2D*ceiling(edge_length/cell_width) - else - num_segments = 1 - end if - - ! Each edge contributes num_segments vertices - $:GPU_ATOMIC(atomic='update') - total_vertices = total_vertices + num_segments - end do - $:END_GPU_PARALLEL_LOOP() - - ! Allocate memory for the new boundary vertices array - allocate (interpolated_boundary_v(1:total_vertices, 1:3)) - - ! Fill the new boundary vertices array with original and interpolated vertices - total_vertices = 1 - $:GPU_PARALLEL_LOOP(private='[i,edge_x,edge_y,edge_length,num_segments,edge_del,vertex_idx]', copyin='[Ifactor_2D]', copy='[total_vertices,interpolated_boundary_v]') - do i = 1, boundary_edge_count - ! Get the coordinates of the two ends of the current edge - edge_x(1) = boundary_v(i, 1, 1) - edge_y(1) = boundary_v(i, 1, 2) - edge_x(2) = boundary_v(i, 2, 1) - edge_y(2) = boundary_v(i, 2, 2) - - ! Compute the length of the edge - edge_length = sqrt((edge_x(2) - edge_x(1))**2 + & - (edge_y(2) - edge_y(1))**2) - - ! Determine the number of segments and interpolation step - if (edge_length > cell_width) then - num_segments = Ifactor_2D*ceiling(edge_length/cell_width) - edge_del(1) = (edge_x(2) - edge_x(1))/num_segments - edge_del(2) = (edge_y(2) - edge_y(1))/num_segments - else - num_segments = 1 - edge_del(1) = 0._wp - edge_del(2) = 0._wp - end if - - interpolated_boundary_v(1, 1) = edge_x(1) - interpolated_boundary_v(1, 2) = edge_y(1) - interpolated_boundary_v(1, 3) = 0._wp - - ! Add original and interpolated vertices to the output array - do j = 1, num_segments - 1 - $:GPU_ATOMIC(atomic='capture') - total_vertices = total_vertices + 1 - vertex_idx = total_vertices - $:END_GPU_ATOMIC_CAPTURE() - interpolated_boundary_v(vertex_idx, 1) = edge_x(1) + j*edge_del(1) - interpolated_boundary_v(vertex_idx, 2) = edge_y(1) + j*edge_del(2) - end do - - ! Add the last vertex of the edge - if (num_segments > 0) then - $:GPU_ATOMIC(atomic='capture') - total_vertices = total_vertices + 1 - vertex_idx = total_vertices - $:END_GPU_ATOMIC_CAPTURE() - interpolated_boundary_v(vertex_idx, 1) = edge_x(2) - interpolated_boundary_v(vertex_idx, 2) = edge_y(2) - end if - end do - $:END_GPU_PARALLEL_LOOP() - - end subroutine f_interpolate_2D - - !> This procedure interpolates 3D models. - !! @param model Model to search in. - !! @param spacing Dimensions of the current levelset cell - !! @param interpolated_boundary_v Output all the boundary vertices of the interpolated 3D model - !! @param total_vertices Total number of vertices after interpolation - impure subroutine f_interpolate_3D(model, spacing, interpolated_boundary_v, total_vertices) - real(wp), dimension(1:3), intent(in) :: spacing - type(t_model), intent(in) :: model - real(wp), allocatable, intent(inout), dimension(:, :) :: interpolated_boundary_v - integer, intent(out) :: total_vertices - - integer :: i, j, k, num_triangles, num_segments, num_inner_vertices, vertex_idx - real(wp), dimension(1:3, 1:3) :: tri - real(wp), dimension(1:3) :: edge_del, cell_area - real(wp), dimension(1:3) :: bary_coord !< Barycentric coordinates - real(wp) :: edge_length, cell_width, cell_area_min, tri_area - - ! GPU-friendly flat arrays packed from model - real(wp), allocatable, dimension(:, :, :) :: flat_v ! (3, 3, num_triangles) - real(wp), allocatable, dimension(:, :) :: flat_n ! (3, num_triangles) - - ! Number of triangles in the model - num_triangles = model%ntrs - cell_width = minval(spacing) - - ! Find the minimum surface area - cell_area(1) = spacing(1)*spacing(2) - cell_area(2) = spacing(1)*spacing(3) - cell_area(3) = spacing(2)*spacing(3) - cell_area_min = minval(cell_area) - num_inner_vertices = 0 - - ! Pack model into flat arrays on CPU - allocate (flat_v(1:3, 1:3, 1:num_triangles)) - allocate (flat_n(1:3, 1:num_triangles)) - do i = 1, num_triangles - flat_v(:, :, i) = model%trs(i)%v(:, :) - flat_n(:, i) = model%trs(i)%n(:) - end do - - ! Calculate the total number of vertices including interpolated ones - total_vertices = 0 - $:GPU_PARALLEL_LOOP(private='[i,j,k,tri,edge_length,num_segments,num_inner_vertices]', copyin='[Ifactor_3D,Ifactor_bary_3D,flat_v,flat_n]', copy='[total_vertices]', collapse=1) - do i = 1, num_triangles - do j = 1, 3 - ! Get the coordinates of the two vertices of the current edge - tri(1, :) = flat_v(j, :, i) - ! Next vertex in the triangle (cyclic) - tri(2, :) = flat_v(mod(j, 3) + 1, :, i) - - ! Compute the length of the edge - edge_length = sqrt((tri(2, 1) - tri(1, 1))**2 + & - (tri(2, 2) - tri(1, 2))**2 + & - (tri(2, 3) - tri(1, 3))**2) - - ! Determine the number of segments - if (edge_length > cell_width) then - num_segments = Ifactor_3D*ceiling(edge_length/cell_width) - else - num_segments = 1 - end if - - ! Each edge contributes num_segments vertices - $:GPU_ATOMIC(atomic='update') - total_vertices = total_vertices + num_segments + 1 - end do - - ! Add vertices inside the triangle - do k = 1, 3 - tri(k, :) = flat_v(k, :, i) - end do - call f_tri_area(tri, tri_area) - - if (tri_area > threshold_bary*cell_area_min) then - num_inner_vertices = Ifactor_bary_3D*ceiling(tri_area/cell_area_min) - $:GPU_ATOMIC(atomic='update') - total_vertices = total_vertices + num_inner_vertices - end if - end do - $:END_GPU_PARALLEL_LOOP() - - ! Allocate memory for the new boundary vertices array - allocate (interpolated_boundary_v(1:total_vertices, 1:3)) - - ! Fill the new boundary vertices array with original and interpolated vertices - total_vertices = 0 - $:GPU_PARALLEL_LOOP(private='[i,j,k,tri,edge_length,num_segments,num_inner_vertices,edge_del,bary_coord,vertex_idx]', copyin='[Ifactor_3D,Ifactor_bary_3D]', copy='[total_vertices]', collapse=1) - do i = 1, num_triangles - ! Loop through the 3 edges of each triangle - do j = 1, 3 - tri(1, :) = flat_v(j, :, i) - tri(2, :) = flat_v(mod(j, 3) + 1, :, i) - - ! Compute the length of the edge - edge_length = sqrt((tri(2, 1) - tri(1, 1))**2 + & - (tri(2, 2) - tri(1, 2))**2 + & - (tri(2, 3) - tri(1, 3))**2) - - ! Determine the number of segments and interpolation step - if (edge_length > cell_width) then - num_segments = Ifactor_3D*ceiling(edge_length/cell_width) - edge_del(1) = (tri(2, 1) - tri(1, 1))/num_segments - edge_del(2) = (tri(2, 2) - tri(1, 2))/num_segments - edge_del(3) = (tri(2, 3) - tri(1, 3))/num_segments - else - num_segments = 1 - edge_del = 0._wp - end if - - ! Add original and interpolated vertices to the output array - do k = 0, num_segments - 1 - $:GPU_ATOMIC(atomic='capture') - total_vertices = total_vertices + 1 - vertex_idx = total_vertices - $:END_GPU_ATOMIC_CAPTURE() - interpolated_boundary_v(vertex_idx, 1) = tri(1, 1) + k*edge_del(1) - interpolated_boundary_v(vertex_idx, 2) = tri(1, 2) + k*edge_del(2) - interpolated_boundary_v(vertex_idx, 3) = tri(1, 3) + k*edge_del(3) - end do - - ! Add the last vertex of the edge - $:GPU_ATOMIC(atomic='capture') - total_vertices = total_vertices + 1 - vertex_idx = total_vertices - $:END_GPU_ATOMIC_CAPTURE() - interpolated_boundary_v(vertex_idx, 1) = tri(2, 1) - interpolated_boundary_v(vertex_idx, 2) = tri(2, 2) - interpolated_boundary_v(vertex_idx, 3) = tri(2, 3) - end do - - ! Interpolate verties that are not on edges - do k = 1, 3 - tri(k, :) = flat_v(k, :, i) - end do - call f_tri_area(tri, tri_area) - - if (tri_area > threshold_bary*cell_area_min) then - num_inner_vertices = Ifactor_bary_3D*ceiling(tri_area/cell_area_min) - !Use barycentric coordinates for randomly distributed points - - ! Deterministic seed per triangle - rand_seed = i*73856093 + 19349663 - if (rand_seed == 0) rand_seed = 1 - - do k = 1, num_inner_vertices - bary_coord(1) = f_model_random_number(rand_seed) - bary_coord(2) = f_model_random_number(rand_seed) - - if ((bary_coord(1) + bary_coord(2)) >= 1._wp) then - bary_coord(1) = 1._wp - bary_coord(1) - bary_coord(2) = 1._wp - bary_coord(2) - end if - bary_coord(3) = 1._wp - bary_coord(1) - bary_coord(2) - - $:GPU_ATOMIC(atomic='update') - total_vertices = total_vertices + 1 - interpolated_boundary_v(total_vertices, 1) = dot_product(bary_coord, tri(1:3, 1)) - interpolated_boundary_v(total_vertices, 2) = dot_product(bary_coord, tri(1:3, 2)) - interpolated_boundary_v(total_vertices, 3) = dot_product(bary_coord, tri(1:3, 3)) - end do - end if - end do - $:END_GPU_PARALLEL_LOOP() - - deallocate (flat_v, flat_n) - - end subroutine f_interpolate_3D - - !> This procedure determines the levelset distance and normals of the 3D models without interpolation. - !! @param model Model to search in. - !! @param point The cell centers of the current level cell - !! @param normals The output levelset normals - !! @param distance The output levelset distance - subroutine f_distance_normals_3D(model, point, normals, distance) - - $:GPU_ROUTINE(parallelism='[seq]') - - type(t_model), intent(IN) :: model - real(wp), dimension(1:3), intent(in) :: point - real(wp), dimension(1:3), intent(out) :: normals - real(wp), intent(out) :: distance - - real(wp), dimension(1:3, 1:3) :: tri - real(wp) :: dist_min, dist_t_min - real(wp) :: dist_min_normal, dist_buffer_normal - real(wp), dimension(1:3) :: midp !< Centers of the triangle facets - real(wp), dimension(1:3) :: dist_buffer !< Distance between the cell center and the vertices - integer :: i, j, tri_idx !< Iterator - - dist_min = 1.e12_wp - dist_min_normal = 1.e12_wp - distance = 0._wp - - tri_idx = 0 - do i = 1, model%ntrs - do j = 1, 3 - tri(j, 1) = model%trs(i)%v(j, 1) - tri(j, 2) = model%trs(i)%v(j, 2) - tri(j, 3) = model%trs(i)%v(j, 3) - dist_buffer(j) = sqrt((point(1) - tri(j, 1))**2 + & - (point(2) - tri(j, 2))**2 + & - (point(3) - tri(j, 3))**2) - end do - - ! Get the surface center of each triangle facet - do j = 1, 3 - midp(j) = (tri(1, j) + tri(2, j) + tri(3, j))/3 - end do - - dist_t_min = minval(dist_buffer(1:3)) - dist_buffer_normal = sqrt((point(1) - midp(1))**2 + & - (point(2) - midp(2))**2 + & - (point(3) - midp(3))**2) - - if (dist_t_min < dist_min) then - dist_min = dist_t_min - end if - - if (dist_buffer_normal < dist_min_normal) then - dist_min_normal = dist_buffer_normal - tri_idx = i - end if - end do - - normals(1) = model%trs(tri_idx)%n(1) - normals(2) = model%trs(tri_idx)%n(2) - normals(3) = model%trs(tri_idx)%n(3) - distance = dist_min - - end subroutine f_distance_normals_3D - - subroutine f_distance_normals_3D_flat(ntrs, trs_v, trs_n, pid, point, normals, distance) - + !> This procedure determines the levelset distance and normals of 3D models + !! by computing the exact closest point via projection onto triangle surfaces. + !! @param ntrs Number of triangles for this patch + !! @param trs_v Flat GPU array of triangle vertices for all patches + !! @param trs_n Flat GPU array of triangle normals for all patches + !! @param pid Patch index into the arrays + !! @param point The cell center of the current levelset cell + !! @param normals Output levelset normals + !! @param distance Output levelset distance + subroutine f_distance_normals_3D(ntrs, trs_v, trs_n, pid, point, normals, distance) $:GPU_ROUTINE(parallelism='[seq]') integer, intent(in) :: ntrs @@ -1257,186 +842,193 @@ contains real(wp), dimension(1:3), intent(out) :: normals real(wp), intent(out) :: distance - real(wp), dimension(1:3, 1:3) :: tri - real(wp) :: dist_min, dist_t_min - real(wp) :: dist_min_normal, dist_buffer_normal - real(wp), dimension(1:3) :: midp - real(wp), dimension(1:3) :: dist_buffer - integer :: i, j, tri_idx + integer :: i, j + real(wp) :: dist_min, dist_proj, dist_v, dist_e, t + real(wp) :: v1(1:3), v2(1:3), v3(1:3) + real(wp) :: e0(1:3), e1(1:3), pv(1:3) + real(wp) :: n(1:3), proj(1:3) + real(wp) :: d, ndot, denom + real(wp) :: u, v_bary, w + real(wp) :: d00, d01, d11, d20, d21 + real(wp) :: edge(1:3), pe(1:3) + real(wp) :: verts(1:3, 1:3) dist_min = 1.e12_wp - dist_min_normal = 1.e12_wp - distance = 0._wp + normals = 0._wp - tri_idx = 0 do i = 1, ntrs - do j = 1, 3 - tri(j, 1) = trs_v(j, 1, i, pid) - tri(j, 2) = trs_v(j, 2, i, pid) - tri(j, 3) = trs_v(j, 3, i, pid) - dist_buffer(j) = sqrt((point(1) - tri(j, 1))**2 + & - (point(2) - tri(j, 2))**2 + & - (point(3) - tri(j, 3))**2) - end do + ! Triangle vertices + v1(:) = trs_v(1, :, i, pid) + v2(:) = trs_v(2, :, i, pid) + v3(:) = trs_v(3, :, i, pid) + + ! Triangle normal + n(:) = trs_n(:, i, pid) + + ! Project point onto triangle plane + pv(:) = point(:) - v1(:) + d = dot_product(pv, n) + if (abs(d) >= dist_min) cycle ! minimum distance is not small enough, no need to check validity + proj(:) = point(:) - d*n(:) + + ! Check if projection is inside triangle using barycentric coordinates + e0(:) = v2(:) - v1(:) + e1(:) = v3(:) - v1(:) + pv(:) = proj(:) - v1(:) + + d00 = dot_product(e0, e0) + d01 = dot_product(e0, e1) + d11 = dot_product(e1, e1) + d20 = dot_product(pv, e0) + d21 = dot_product(pv, e1) + + denom = d00*d11 - d01*d01 + + if (abs(denom) > 0._wp) then + v_bary = (d11*d20 - d01*d21)/denom + w = (d00*d21 - d01*d20)/denom + u = 1._wp - v_bary - w + else + u = -1._wp + v_bary = -1._wp + w = -1._wp + end if - do j = 1, 3 - midp(j) = (tri(1, j) + tri(2, j) + tri(3, j))/3 - end do + ! If projection is inside triangle + if (u >= 0._wp .and. v_bary >= 0._wp .and. w >= 0._wp) then + dist_proj = sqrt((point(1) - proj(1))**2 + & + (point(2) - proj(2))**2 + & + (point(3) - proj(3))**2) - dist_t_min = minval(dist_buffer(1:3)) - dist_buffer_normal = sqrt((point(1) - midp(1))**2 + & - (point(2) - midp(2))**2 + & - (point(3) - midp(3))**2) + if (dist_proj < dist_min) then + dist_min = dist_proj + normals(:) = n(:) + end if + else + ! Projection outside triangle: check edges and vertices + verts(:, 1) = v1(:) + verts(:, 2) = v2(:) + verts(:, 3) = v3(:) + + ! Check three edges + do j = 1, 3 + edge(:) = verts(:, mod(j, 3) + 1) - verts(:, j) + pe(:) = point(:) - verts(:, j) + + t = dot_product(pe, edge)/max(dot_product(edge, edge), 1.e-30_wp) + + if (t >= 0._wp .and. t <= 1._wp) then + proj(:) = verts(:, j) + t*edge(:) + dist_e = sqrt((point(1) - proj(1))**2 + & + (point(2) - proj(2))**2 + & + (point(3) - proj(3))**2) + + if (dist_e < dist_min) then + dist_min = dist_e + normals(:) = n(:) + end if + end if + end do - if (dist_t_min < dist_min) then - dist_min = dist_t_min - end if + ! Check three vertices + do j = 1, 3 + dist_v = sqrt((point(1) - verts(1, j))**2 + & + (point(2) - verts(2, j))**2 + & + (point(3) - verts(3, j))**2) - if (dist_buffer_normal < dist_min_normal) then - dist_min_normal = dist_buffer_normal - tri_idx = i + if (dist_v < dist_min) then + dist_min = dist_v + normals(:) = n(:) + end if + end do end if end do - normals(1) = trs_n(1, tri_idx, pid) - normals(2) = trs_n(2, tri_idx, pid) - normals(3) = trs_n(3, tri_idx, pid) distance = dist_min - end subroutine f_distance_normals_3D_flat - - !> This procedure determines the levelset distance of 2D models without interpolation. - !! @param boundary_v Group of all the boundary vertices of the 2D model without interpolation - !! @param boundary_edge_count Output the total number of boundary edges - !! @param point The cell centers of the current levelset cell - !! @return Distance which the levelset distance without interpolation - function f_distance(boundary_v, boundary_edge_count, point) result(distance) - - $:GPU_ROUTINE(parallelism='[seq]') - - integer, intent(in) :: boundary_edge_count - real(wp), intent(in), dimension(:, :, :) :: boundary_v - ! real(wp), intent(in), dimension(1:boundary_edge_count, 1:3, 1:2) :: boundary_v - real(wp), dimension(1:3), intent(in) :: point - - integer :: i - real(wp) :: dist_buffer1, dist_buffer2 - real(wp), dimension(1:boundary_edge_count) :: dist_buffer - real(wp) :: distance - - distance = 0._wp - do i = 1, boundary_edge_count - dist_buffer1 = sqrt((point(1) - boundary_v(i, 1, 1))**2 + & - & (point(2) - boundary_v(i, 1, 2))**2) - - dist_buffer2 = sqrt((point(1) - boundary_v(i, 2, 1))**2 + & - & (point(2) - boundary_v(i, 2, 2))**2) - - dist_buffer(i) = minval((/dist_buffer1, dist_buffer2/)) - end do - - distance = minval(dist_buffer) - - end function f_distance - - !> This procedure determines the levelset normals of 2D models without interpolation. - !! @param boundary_v Group of all the boundary vertices of the 2D model without interpolation - !! @param boundary_edge_count Output the total number of boundary edges - !! @param point The cell centers of the current levelset cell - !! @param normals Output levelset normals without interpolation - subroutine f_normals(boundary_v, boundary_edge_count, point, normals) + end subroutine f_distance_normals_3D + !> This procedure determines the levelset distance and normals of 2D models + !! by computing the exact closest point via projection onto boundary edges. + !! @param boundary_v Flat GPU array of boundary vertices/normals for all patches + !! @param pid Patch index into the boundary_v array + !! @param boundary_edge_count Total number of boundary edges for this patch + !! @param point The cell center of the current levelset cell + !! @param normals Output levelset normals + !! @param distance Output levelset distance + subroutine f_distance_normals_2D(boundary_v, pid, boundary_edge_count, point, normals, distance) $:GPU_ROUTINE(parallelism='[seq]') + real(wp), dimension(:, :, :, :), intent(in) :: boundary_v + integer, intent(in) :: pid integer, intent(in) :: boundary_edge_count - real(wp), intent(in), dimension(:, :, :) :: boundary_v real(wp), dimension(1:3), intent(in) :: point real(wp), dimension(1:3), intent(out) :: normals + real(wp), intent(out) :: distance - integer :: i, idx_buffer - real(wp) :: dist_min, dist_buffer - real(wp) :: midp(1:3) + integer :: i + real(wp) :: dist_min, dist_proj, dist_v1, dist_v2, t + real(wp) :: v1(1:2), v2(1:2), edge(1:2), pv(1:2) + real(wp) :: edge_len_sq, proj(1:2) - dist_buffer = 0._wp - dist_min = initial_distance_buffer - idx_buffer = 0 + dist_min = 1.e12_wp + normals = 0._wp do i = 1, boundary_edge_count - midp(1) = (boundary_v(i, 2, 1) + boundary_v(i, 1, 1))/2 - midp(2) = (boundary_v(i, 2, 2) + boundary_v(i, 1, 2))/2 - midp(3) = 0._wp - - dist_buffer = sqrt((point(1) - midp(1))**2 + & - & (point(2) - midp(2))**2) - - if (dist_buffer < dist_min) then - dist_min = dist_buffer - idx_buffer = i + ! Edge endpoints + v1(1) = boundary_v(i, 1, 1, pid) + v1(2) = boundary_v(i, 1, 2, pid) + v2(1) = boundary_v(i, 2, 1, pid) + v2(2) = boundary_v(i, 2, 2, pid) + + ! Edge vector and point-to-v1 vector + edge(1) = v2(1) - v1(1) + edge(2) = v2(2) - v1(2) + pv(1) = point(1) - v1(1) + pv(2) = point(2) - v1(2) + + edge_len_sq = dot_product(edge, edge) + + ! Parameter of projection onto the edge line + if (edge_len_sq > 0._wp) then + t = dot_product(pv, edge)/edge_len_sq + else + t = 0._wp end if - end do - - normals(1) = boundary_v(idx_buffer, 3, 1) - normals(2) = boundary_v(idx_buffer, 3, 2) - normals(3) = 0._wp - - end subroutine f_normals - - !> This procedure calculates the barycentric facet area - subroutine f_tri_area(tri, tri_area) - - real(wp), dimension(1:3, 1:3), intent(in) :: tri - real(wp), intent(out) :: tri_area - real(wp), dimension(1:3) :: AB, AC, cross - integer :: i !< Loop iterator - - $:GPU_ROUTINE(parallelism='[seq]') - - do i = 1, 3 - AB(i) = tri(2, i) - tri(1, i) - AC(i) = tri(3, i) - tri(1, i) - end do - - cross(1) = AB(2)*AC(3) - AB(3)*AC(2) - cross(2) = AB(3)*AC(1) - AB(1)*AC(3) - cross(3) = AB(1)*AC(2) - AB(2)*AC(1) - tri_area = 0.5_wp*sqrt(cross(1)**2 + cross(2)**2 + cross(3)**2) - - end subroutine f_tri_area - !> This procedure determines the levelset of interpolated 2D models. - !! @param interpolated_boundary_v Group of all the boundary vertices of the interpolated 2D model - !! @param total_vertices Total number of vertices after interpolation - !! @param point The cell centers of the current levelset cell - !! @return Distance which the levelset distance without interpolation - function f_interpolated_distance(interpolated_boundary_v, total_vertices, point) result(distance) + ! Check if projection falls within the segment + if (t >= 0._wp .and. t <= 1._wp) then + proj(1) = v1(1) + t*edge(1) + proj(2) = v1(2) + t*edge(2) + dist_proj = sqrt((point(1) - proj(1))**2 + (point(2) - proj(2))**2) - $:GPU_ROUTINE(parallelism='[seq]') - - integer, intent(in) :: total_vertices - real(wp), intent(in), dimension(:, :) :: interpolated_boundary_v - real(wp), dimension(1:3), intent(in) :: point - - integer :: i !< Loop iterator - real(wp) :: dist_buffer, min_dist - real(wp) :: distance - - distance = initial_distance_buffer - dist_buffer = initial_distance_buffer - min_dist = initial_distance_buffer - - do i = 1, total_vertices - dist_buffer = sqrt((point(1) - interpolated_boundary_v(i, 1))**2 + & - (point(2) - interpolated_boundary_v(i, 2))**2 + & - (point(3) - interpolated_boundary_v(i, 3))**2) + if (dist_proj < dist_min) then + dist_min = dist_proj + normals(1) = boundary_v(i, 3, 1, pid) + normals(2) = boundary_v(i, 3, 2, pid) + end if + else + ! Check distance to first vertex + dist_v1 = sqrt(pv(1)*pv(1) + pv(2)*pv(2)) + if (dist_v1 < dist_min) then + dist_min = dist_v1 + normals(1) = boundary_v(i, 3, 1, pid) + normals(2) = boundary_v(i, 3, 2, pid) + end if - if (min_dist > dist_buffer) then - min_dist = dist_buffer + ! Check distance to second vertex + dist_v2 = sqrt((point(1) - v2(1))**2 + (point(2) - v2(2))**2) + if (dist_v2 < dist_min) then + dist_min = dist_v2 + normals(1) = boundary_v(i, 3, 1, pid) + normals(2) = boundary_v(i, 3, 2, pid) + end if end if end do - distance = min_dist + distance = dist_min - end function f_interpolated_distance + end subroutine f_distance_normals_2D subroutine s_pack_model_for_gpu(ma) type(t_model_array), intent(inout) :: ma diff --git a/src/pre_process/m_icpp_patches.fpp b/src/pre_process/m_icpp_patches.fpp index 70af860ce0..0a2a6fe85c 100644 --- a/src/pre_process/m_icpp_patches.fpp +++ b/src/pre_process/m_icpp_patches.fpp @@ -1601,7 +1601,6 @@ contains real(wp) :: normals(1:3) !< Boundary normal buffer integer :: boundary_vertex_count, boundary_edge_count, total_vertices !< Boundary vertex real(wp), allocatable, dimension(:, :, :) :: boundary_v !< Boundary vertex buffer - real(wp), allocatable, dimension(:, :) :: interpolated_boundary_v !< Interpolated vertex buffer real(wp) :: distance !< Levelset distance buffer logical :: interpolate !< Logical variable to determine whether or not the model should be interpolated @@ -1655,35 +1654,11 @@ contains call f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) - ! Check if the model needs interpolation - if (p > 0) then - call f_check_interpolation_3D(model, (/dx, dy, dz/), interpolate) - else - call f_check_interpolation_2D(boundary_v, boundary_edge_count, (/dx, dy, dz/), interpolate) - end if - ! Show the number of edges and boundary edges in 2D STL models if (proc_rank == 0 .and. p == 0) then print *, ' * Number of 2D model boundary edges:', boundary_edge_count end if - ! Interpolate the STL model along the edges (2D) and on triangle facets (3D) - if (interpolate) then - if (proc_rank == 0) then - print *, ' * Interpolating STL vertices.' - end if - - if (p > 0) then - call f_interpolate_3D(model, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) - else - call f_interpolate_2D(boundary_v, boundary_edge_count, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) - end if - - if (proc_rank == 0) then - print *, ' * Total number of interpolated boundary vertices:', total_vertices - end if - end if - if (proc_rank == 0) then write (*, "(A, 3(2X, F20.10))") " > Model: Min:", bbox%min(1:3) write (*, "(A, 3(2X, F20.10))") " > Cen:", (bbox%min(1:3) + bbox%max(1:3))/2._wp diff --git a/src/simulation/m_compute_levelset.fpp b/src/simulation/m_compute_levelset.fpp index 8b04f66b11..19301f2e30 100644 --- a/src/simulation/m_compute_levelset.fpp +++ b/src/simulation/m_compute_levelset.fpp @@ -647,7 +647,6 @@ contains type(ghost_point), intent(inout) :: gp integer :: i, j, k, patch_id, boundary_edge_count, total_vertices - integer :: interpolate real(wp), dimension(1:3) :: center, xyz_local real(wp) :: normals(1:3) !< Boundary normal buffer real(wp) :: distance @@ -659,7 +658,6 @@ contains k = gp%loc(3) ! load in model values - interpolate = gpu_interpolate(patch_id) boundary_edge_count = gpu_boundary_edge_count(patch_id) total_vertices = gpu_total_vertices(patch_id) @@ -686,44 +684,26 @@ contains ! 3D models if (p > 0) then - ! Get the boundary normals and shortest distance between the cell center and the model boundary - call f_distance_normals_3D_flat(gpu_ntrs(patch_id), gpu_trs_v, gpu_trs_n, patch_id, xyz_local, normals, distance) - - ! Get the shortest distance between the cell center and the interpolated model boundary - if (interpolate == 1) then - gp%levelset = f_interpolated_distance(gpu_interpolated_boundary_v(:, :, patch_id), total_vertices, xyz_local) - else - gp%levelset = distance - end if + call f_distance_normals_3D(gpu_ntrs(patch_id), gpu_trs_v, gpu_trs_n, patch_id, xyz_local, normals, distance) - ! Correct the sign of the levelset + ! Get the shortest distance between the cell center and the model boundary + gp%levelset = distance gp%levelset = -abs(gp%levelset) ! Assign the levelset_norm gp%levelset_norm = matmul(rotation, normals(1:3)) else ! 2D models - if (interpolate == 1) then - ! Get the shortest distance between the cell center and the model boundary - gp%levelset = f_interpolated_distance(gpu_interpolated_boundary_v(:, :, patch_id), total_vertices, xyz_local) ! Change 7 - else - gp%levelset = f_distance(gpu_boundary_v(:, :, :, patch_id), boundary_edge_count, xyz_local) ! Change 8 - end if - - ! Correct the sign of the levelset - gp%levelset = -abs(gp%levelset) - - ! Get the boundary normals - call f_normals(gpu_boundary_v(:, :, :, patch_id), & - boundary_edge_count, & - xyz_local, & - normals) - - ! Assign the levelset_norm + call f_distance_normals_2D(gpu_boundary_v, patch_id, & + boundary_edge_count, & + xyz_local, normals, distance) + gp%levelset = -abs(distance) gp%levelset_norm = matmul(rotation, normals(1:3)) end if + print *, gp%levelset, gp%levelset_norm + end subroutine s_model_levelset end module m_compute_levelset diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index 050bfdb35b..686ba6fbb3 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -962,9 +962,7 @@ contains real(wp) :: normals(1:3) !< Boundary normal buffer integer :: boundary_vertex_count, boundary_edge_count, total_vertices !< Boundary vertex real(wp), allocatable, dimension(:, :, :) :: boundary_v !< Boundary vertex buffer - real(wp), allocatable, dimension(:, :) :: interpolated_boundary_v !< Interpolated vertex buffer real(wp) :: dx_local, dy_local, dz_local !< Levelset distance buffer - logical :: interpolate !< Logical variable to determine whether or not the model should be interpolated integer :: i, j, k !< Generic loop iterators integer :: patch_id @@ -1023,35 +1021,11 @@ contains ! Need the cells that form the boundary of the flat projection in 2D if (p == 0) call f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) - ! Check if the model needs interpolation - if (p > 0) then - call f_check_interpolation_3D(model, (/dx_local, dy_local, dz_local/), interpolate) - else - call f_check_interpolation_2D(boundary_v, boundary_edge_count, (/dx_local, dy_local, 0._wp/), interpolate) - end if - ! Show the number of edges and boundary edges in 2D STL models if (proc_rank == 0 .and. p == 0) then print *, ' * Number of 2D model boundary edges:', boundary_edge_count end if - ! Interpolate the STL model along the edges (2D) and on triangle facets (3D) - if (interpolate) then - if (proc_rank == 0) then - print *, ' * Interpolating STL vertices.' - end if - - if (p > 0) then - call f_interpolate_3D(model, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) - else - call f_interpolate_2D(boundary_v, boundary_edge_count, (/dx, dy, 0._wp/), interpolated_boundary_v, total_vertices) - end if - - if (proc_rank == 0) then - print *, ' * Total number of interpolated boundary vertices:', total_vertices - end if - end if - if (proc_rank == 0) then write (*, "(A, 3(2X, F20.10))") " > Model: Min:", bbox%min(1:3) write (*, "(A, 3(2X, F20.10))") " > Cen:", (bbox%min(1:3) + bbox%max(1:3))/2._wp @@ -1079,16 +1053,6 @@ contains models(patch_id)%model = model models(patch_id)%boundary_v = boundary_v models(patch_id)%boundary_edge_count = boundary_edge_count - if (interpolate) then - models(patch_id)%interpolate = 1 - else - models(patch_id)%interpolate = 0 - end if - if (interpolate) then - models(patch_id)%interpolated_boundary_v = interpolated_boundary_v - models(patch_id)%total_vertices = total_vertices - end if - end if end do @@ -1111,24 +1075,18 @@ contains max_bv2 = max(max_bv2, size(models(pid)%boundary_v, 2)) max_bv3 = max(max_bv3, size(models(pid)%boundary_v, 3)) end if - if (allocated(models(pid)%interpolated_boundary_v)) then - max_iv1 = max(max_iv1, size(models(pid)%interpolated_boundary_v, 1)) - max_iv2 = max(max_iv2, size(models(pid)%interpolated_boundary_v, 2)) - end if end do if (max_ntrs > 0) then allocate (gpu_ntrs(1:num_ibs)) allocate (gpu_trs_v(1:3, 1:3, 1:max_ntrs, 1:num_ibs)) allocate (gpu_trs_n(1:3, 1:max_ntrs, 1:num_ibs)) - allocate (gpu_interpolate(1:num_ibs)) allocate (gpu_boundary_edge_count(1:num_ibs)) allocate (gpu_total_vertices(1:num_ibs)) gpu_ntrs = 0 gpu_trs_v = 0._wp gpu_trs_n = 0._wp - gpu_interpolate = 0 gpu_boundary_edge_count = 0 gpu_total_vertices = 0 @@ -1137,17 +1095,11 @@ contains gpu_boundary_v = 0._wp end if - if (max_iv1 > 0) then - allocate (gpu_interpolated_boundary_v(1:max_iv1, 1:max_iv2, 1:num_ibs)) - gpu_interpolated_boundary_v = 0._wp - end if - do pid = 1, num_ibs if (allocated(models(pid)%model)) then gpu_ntrs(pid) = models(pid)%ntrs gpu_trs_v(:, :, 1:models(pid)%ntrs, pid) = models(pid)%trs_v gpu_trs_n(:, 1:models(pid)%ntrs, pid) = models(pid)%trs_n - gpu_interpolate(pid) = models(pid)%interpolate gpu_boundary_edge_count(pid) = models(pid)%boundary_edge_count gpu_total_vertices(pid) = models(pid)%total_vertices end if @@ -1156,19 +1108,12 @@ contains 1:size(models(pid)%boundary_v, 2), & 1:size(models(pid)%boundary_v, 3), pid) = models(pid)%boundary_v end if - if (allocated(models(pid)%interpolated_boundary_v) .and. gpu_interpolate(pid) == 1) then - gpu_interpolated_boundary_v(1:size(models(pid)%interpolated_boundary_v, 1), & - 1:size(models(pid)%interpolated_boundary_v, 2), pid) = models(pid)%interpolated_boundary_v - end if end do - $:GPU_ENTER_DATA(copyin='[gpu_ntrs, gpu_trs_v, gpu_trs_n, gpu_interpolate, gpu_boundary_edge_count, gpu_total_vertices]') + $:GPU_ENTER_DATA(copyin='[gpu_ntrs, gpu_trs_v, gpu_trs_n, gpu_boundary_edge_count, gpu_total_vertices]') if (allocated(gpu_boundary_v)) then $:GPU_ENTER_DATA(copyin='[gpu_boundary_v]') end if - if (allocated(gpu_interpolated_boundary_v)) then - $:GPU_ENTER_DATA(copyin='[gpu_interpolated_boundary_v]') - end if end if end block diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index a7f1e20aad..3a02eec5e7 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -140,19 +140,6 @@ contains end subroutine s_ibm_setup - !> @brief Exchanges immersed boundary marker data across MPI subdomain boundaries. - subroutine s_populate_ib_buffers() - - #:for DIRC, DIRI in [('x', 1), ('y', 2), ('z', 3)] - #:for LOCC, LOCI in [('beg', -1), ('end', 1)] - if (bc_${DIRC}$%${LOCC}$ >= 0) then - call s_mpi_sendrecv_ib_buffers(ib_markers, ${DIRI}$, ${LOCI}$) - end if - #:endfor - #:endfor - - end subroutine s_populate_ib_buffers - !> Subroutine that updates the conservative variables at the ghost points !! @param pb_in Internal bubble pressure !! @param mv_in Mass of vapor in bubble diff --git a/src/simulation/m_mpi_proxy.fpp b/src/simulation/m_mpi_proxy.fpp index be737782be..3da31fbac8 100644 --- a/src/simulation/m_mpi_proxy.fpp +++ b/src/simulation/m_mpi_proxy.fpp @@ -255,191 +255,6 @@ contains end subroutine s_mpi_bcast_user_inputs - !> @brief Packs, exchanges, and unpacks immersed boundary marker buffers between neighboring MPI ranks. - subroutine s_mpi_sendrecv_ib_buffers(ib_markers, mpi_dir, pbc_loc) - - type(integer_field), intent(inout) :: ib_markers - - integer, intent(in) :: mpi_dir, pbc_loc - - integer :: i, j, k, l, r, q !< Generic loop iterators - - integer :: buffer_counts(1:3), buffer_count - - type(int_bounds_info) :: boundary_conditions(1:3) - integer :: beg_end(1:2), grid_dims(1:3) - integer :: dst_proc, src_proc, recv_tag, send_tag - - logical :: beg_end_geq_0, qbmm_comm - - integer :: pack_offset, unpack_offset - -#ifdef MFC_MPI - integer :: ierr !< Generic flag used to identify and report MPI errors - - call nvtxStartRange("IB-MARKER-COMM-PACKBUF") - - buffer_counts = (/ & - buff_size*(n + 1)*(p + 1), & - buff_size*(m + 2*buff_size + 1)*(p + 1), & - buff_size*(m + 2*buff_size + 1)*(n + 2*buff_size + 1) & - /) - - buffer_count = buffer_counts(mpi_dir) - boundary_conditions = (/bc_x, bc_y, bc_z/) - beg_end = (/boundary_conditions(mpi_dir)%beg, boundary_conditions(mpi_dir)%end/) - beg_end_geq_0 = beg_end(max(pbc_loc, 0) - pbc_loc + 1) >= 0 - - ! Implements: - ! pbc_loc bc_x >= 0 -> [send/recv]_tag [dst/src]_proc - ! -1 (=0) 0 -> [1,0] [0,0] | 0 0 [1,0] [beg,beg] - ! -1 (=0) 1 -> [0,0] [1,0] | 0 1 [0,0] [end,beg] - ! +1 (=1) 0 -> [0,1] [1,1] | 1 0 [0,1] [end,end] - ! +1 (=1) 1 -> [1,1] [0,1] | 1 1 [1,1] [beg,end] - - send_tag = f_logical_to_int(.not. f_xor(beg_end_geq_0, pbc_loc == 1)) - recv_tag = f_logical_to_int(pbc_loc == 1) - - dst_proc = beg_end(1 + f_logical_to_int(f_xor(pbc_loc == 1, beg_end_geq_0))) - src_proc = beg_end(1 + f_logical_to_int(pbc_loc == 1)) - - grid_dims = (/m, n, p/) - - pack_offset = 0 - if (f_xor(pbc_loc == 1, beg_end_geq_0)) then - pack_offset = grid_dims(mpi_dir) - buff_size + 1 - end if - - unpack_offset = 0 - if (pbc_loc == 1) then - unpack_offset = grid_dims(mpi_dir) + buff_size + 1 - end if - - ! Pack Buffer to Send - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') - do l = 0, p - do k = 0, n - do j = 0, buff_size - 1 - r = (j + buff_size*(k + (n + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j + pack_offset, k, l) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') - do l = 0, p - do k = 0, buff_size - 1 - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & - (k + buff_size*l)) - ib_buff_send(r) = ib_markers%sf(j, k + pack_offset, l) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - #:else - $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') - do l = 0, buff_size - 1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)*l)) - ib_buff_send(r) = ib_markers%sf(j, k, l + pack_offset) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - #:endif - end if - #:endfor - call nvtxEndRange ! Packbuf - - #:for rdma_mpi in [False, True] - if (rdma_mpi .eqv. ${'.true.' if rdma_mpi else '.false.'}$) then - #:if rdma_mpi - #:call GPU_HOST_DATA(use_device_addr='[ib_buff_send, ib_buff_recv]') - - call nvtxStartRange("IB-MARKER-SENDRECV-RDMA") - call MPI_SENDRECV( & - ib_buff_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, & - ib_buff_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - call nvtxEndRange - - #:endcall GPU_HOST_DATA - $:GPU_WAIT() - #:else - call nvtxStartRange("IB-MARKER-DEV2HOST") - $:GPU_UPDATE(host='[ib_buff_send]') - call nvtxEndRange - - call nvtxStartRange("IB-MARKER-SENDRECV-NO-RMDA") - call MPI_SENDRECV( & - ib_buff_send, buffer_count, MPI_INTEGER, dst_proc, send_tag, & - ib_buff_recv, buffer_count, MPI_INTEGER, src_proc, recv_tag, & - MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) - call nvtxEndRange - - call nvtxStartRange("IB-MARKER-HOST2DEV") - $:GPU_UPDATE(device='[ib_buff_recv]') - call nvtxEndRange - #:endif - end if - #:endfor - - ! Unpack Received Buffer - call nvtxStartRange("IB-MARKER-COMM-UNPACKBUF") - #:for mpi_dir in [1, 2, 3] - if (mpi_dir == ${mpi_dir}$) then - #:if mpi_dir == 1 - $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') - do l = 0, p - do k = 0, n - do j = -buff_size, -1 - r = (j + buff_size*((k + 1) + (n + 1)*l)) - ib_markers%sf(j + unpack_offset, k, l) = ib_buff_recv(r) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - #:elif mpi_dir == 2 - $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') - do l = 0, p - do k = -buff_size, -1 - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + buff_size*l)) - ib_markers%sf(j, k + unpack_offset, l) = ib_buff_recv(r) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - #:else - ! Unpacking buffer from bc_z%beg - $:GPU_PARALLEL_LOOP(collapse=3,private='[j,k,l,r]') - do l = -buff_size, -1 - do k = -buff_size, n + buff_size - do j = -buff_size, m + buff_size - r = ((j + buff_size) + (m + 2*buff_size + 1)* & - ((k + buff_size) + (n + 2*buff_size + 1)* & - (l + buff_size))) - ib_markers%sf(j, k, l + unpack_offset) = ib_buff_recv(r) - end do - end do - end do - $:END_GPU_PARALLEL_LOOP() - #:endif - end if - #:endfor - call nvtxEndRange -#endif - - end subroutine s_mpi_sendrecv_ib_buffers - !> @brief Broadcasts random phase numbers from rank 0 to all MPI processes. impure subroutine s_mpi_send_random_number(phi_rn, num_freq) integer, intent(in) :: num_freq From f42982e3f9cb9a6bf31e9a99ca7c9837a519e143 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Sat, 21 Feb 2026 00:36:05 -0500 Subject: [PATCH 20/22] The code really wanted me to run formatting --- src/common/m_model.fpp | 18 +++++++++--------- src/simulation/m_compute_levelset.fpp | 2 -- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index b474fec80e..c68cf5ffba 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -849,7 +849,7 @@ contains real(wp) :: n(1:3), proj(1:3) real(wp) :: d, ndot, denom real(wp) :: u, v_bary, w - real(wp) :: d00, d01, d11, d20, d21 + real(wp) :: l00, l01, l11, l20, l21 real(wp) :: edge(1:3), pe(1:3) real(wp) :: verts(1:3, 1:3) @@ -876,17 +876,17 @@ contains e1(:) = v3(:) - v1(:) pv(:) = proj(:) - v1(:) - d00 = dot_product(e0, e0) - d01 = dot_product(e0, e1) - d11 = dot_product(e1, e1) - d20 = dot_product(pv, e0) - d21 = dot_product(pv, e1) + l00 = dot_product(e0, e0) + l01 = dot_product(e0, e1) + l11 = dot_product(e1, e1) + l20 = dot_product(pv, e0) + l21 = dot_product(pv, e1) - denom = d00*d11 - d01*d01 + denom = l00*l11 - l01*l01 if (abs(denom) > 0._wp) then - v_bary = (d11*d20 - d01*d21)/denom - w = (d00*d21 - d01*d20)/denom + v_bary = (l11*l20 - l01*l21)/denom + w = (l00*l21 - l01*l20)/denom u = 1._wp - v_bary - w else u = -1._wp diff --git a/src/simulation/m_compute_levelset.fpp b/src/simulation/m_compute_levelset.fpp index 19301f2e30..0971e8fbdf 100644 --- a/src/simulation/m_compute_levelset.fpp +++ b/src/simulation/m_compute_levelset.fpp @@ -702,8 +702,6 @@ contains gp%levelset_norm = matmul(rotation, normals(1:3)) end if - print *, gp%levelset, gp%levelset_norm - end subroutine s_model_levelset end module m_compute_levelset From 0099975bce44a7ebf35b7f4b89391e4092f1a5fe Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Sat, 21 Feb 2026 01:06:49 -0500 Subject: [PATCH 21/22] Some more line deletions for the model normal vector --- src/common/m_model.fpp | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index c68cf5ffba..763f7e69a1 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -967,7 +967,7 @@ contains real(wp), intent(out) :: distance integer :: i - real(wp) :: dist_min, dist_proj, dist_v1, dist_v2, t + real(wp) :: dist_min, dist, t real(wp) :: v1(1:2), v2(1:2), edge(1:2), pv(1:2) real(wp) :: edge_len_sq, proj(1:2) @@ -982,11 +982,9 @@ contains v2(2) = boundary_v(i, 2, 2, pid) ! Edge vector and point-to-v1 vector - edge(1) = v2(1) - v1(1) - edge(2) = v2(2) - v1(2) + edge = v2 - v1 pv(1) = point(1) - v1(1) pv(2) = point(2) - v1(2) - edge_len_sq = dot_product(edge, edge) ! Parameter of projection onto the edge line @@ -998,31 +996,18 @@ contains ! Check if projection falls within the segment if (t >= 0._wp .and. t <= 1._wp) then - proj(1) = v1(1) + t*edge(1) - proj(2) = v1(2) + t*edge(2) - dist_proj = sqrt((point(1) - proj(1))**2 + (point(2) - proj(2))**2) - - if (dist_proj < dist_min) then - dist_min = dist_proj - normals(1) = boundary_v(i, 3, 1, pid) - normals(2) = boundary_v(i, 3, 2, pid) - end if - else - ! Check distance to first vertex - dist_v1 = sqrt(pv(1)*pv(1) + pv(2)*pv(2)) - if (dist_v1 < dist_min) then - dist_min = dist_v1 - normals(1) = boundary_v(i, 3, 1, pid) - normals(2) = boundary_v(i, 3, 2, pid) - end if + proj = v1 + t*edge + dist = sqrt((point(1) - proj(1))**2 + (point(2) - proj(2))**2) + else if (t < 0._wp) then ! negative t means that v1 is the closest point on the edge + dist = sqrt((point(1) - v1(1))**2 + (point(2) - v1(2))**2) + else ! t > 1 means that v2 is the closest point on the line edge + dist = sqrt((point(1) - v2(1))**2 + (point(2) - v2(2))**2) + end if - ! Check distance to second vertex - dist_v2 = sqrt((point(1) - v2(1))**2 + (point(2) - v2(2))**2) - if (dist_v2 < dist_min) then - dist_min = dist_v2 - normals(1) = boundary_v(i, 3, 1, pid) - normals(2) = boundary_v(i, 3, 2, pid) - end if + if (dist < dist_min) then + dist_min = dist + normals(1) = boundary_v(i, 3, 1, pid) + normals(2) = boundary_v(i, 3, 2, pid) end if end do From 8709609dae3f5dc319e4af095860438bcfa03f96 Mon Sep 17 00:00:00 2001 From: danieljvickers Date: Sat, 21 Feb 2026 01:10:31 -0500 Subject: [PATCH 22/22] Renamed model subroutines to use the s_ prefix for consistency with the rest of the code --- src/common/m_model.fpp | 26 +++++++++++++------------- src/pre_process/m_icpp_patches.fpp | 2 +- src/simulation/m_compute_levelset.fpp | 4 ++-- src/simulation/m_ib_patches.fpp | 2 +- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index 763f7e69a1..2bbdda5a91 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -23,8 +23,8 @@ module m_model gpu_total_vertices, stl_bounding_boxes ! Subroutines for STL immersed boundaries - public :: f_check_boundary, f_register_edge, f_model_is_inside_flat, & - f_distance_normals_3D, f_distance_normals_2D, s_pack_model_for_gpu + public :: s_check_boundary, s_register_edge, f_model_is_inside_flat, & + s_distance_normals_3D, s_distance_normals_2D, s_pack_model_for_gpu !! array of STL models that can be allocated and then used in IB marker and levelset compute type(t_model_array), allocatable, target :: models(:) @@ -695,7 +695,7 @@ contains !> This procedure checks and labels edges shared by two or more triangles facets of the 2D STL model. !! @param model Model to search in. !! @param boundary_vertex_count Output total boundary vertex count - subroutine f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) + subroutine s_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) type(t_model), intent(in) :: model real(wp), allocatable, intent(out), dimension(:, :, :) :: boundary_v !< Output boundary vertices/normals @@ -721,17 +721,17 @@ contains ! First edge (v1, v2) edge(1, 1:2) = model%trs(i)%v(1, 1:2) edge(2, 1:2) = model%trs(i)%v(2, 1:2) - call f_register_edge(temp_boundary_v, edge, edge_index, edge_count) + call s_register_edge(temp_boundary_v, edge, edge_index, edge_count) ! Second edge (v2, v3) edge(1, 1:2) = model%trs(i)%v(2, 1:2) edge(2, 1:2) = model%trs(i)%v(3, 1:2) - call f_register_edge(temp_boundary_v, edge, edge_index, edge_count) + call s_register_edge(temp_boundary_v, edge, edge_index, edge_count) ! Third edge (v3, v1) edge(1, 1:2) = model%trs(i)%v(3, 1:2) edge(2, 1:2) = model%trs(i)%v(1, 1:2) - call f_register_edge(temp_boundary_v, edge, edge_index, edge_count) + call s_register_edge(temp_boundary_v, edge, edge_index, edge_count) end do ! Check all edges and count repeated edges @@ -805,10 +805,10 @@ contains boundary_v(i, 3, 2) = ynormal/v_norm end do - end subroutine f_check_boundary + end subroutine s_check_boundary !> This procedure appends the edge end vertices to a temporary buffer. - subroutine f_register_edge(temp_boundary_v, edge, edge_index, edge_count) + subroutine s_register_edge(temp_boundary_v, edge, edge_index, edge_count) integer, intent(inout) :: edge_index !< Edge index iterator integer, intent(inout) :: edge_count !< Total number of edges @@ -820,7 +820,7 @@ contains temp_boundary_v(edge_index, 1, 1:2) = edge(1, 1:2) temp_boundary_v(edge_index, 2, 1:2) = edge(2, 1:2) - end subroutine f_register_edge + end subroutine s_register_edge !> This procedure determines the levelset distance and normals of 3D models !! by computing the exact closest point via projection onto triangle surfaces. @@ -831,7 +831,7 @@ contains !! @param point The cell center of the current levelset cell !! @param normals Output levelset normals !! @param distance Output levelset distance - subroutine f_distance_normals_3D(ntrs, trs_v, trs_n, pid, point, normals, distance) + subroutine s_distance_normals_3D(ntrs, trs_v, trs_n, pid, point, normals, distance) $:GPU_ROUTINE(parallelism='[seq]') integer, intent(in) :: ntrs @@ -946,7 +946,7 @@ contains distance = dist_min - end subroutine f_distance_normals_3D + end subroutine s_distance_normals_3D !> This procedure determines the levelset distance and normals of 2D models !! by computing the exact closest point via projection onto boundary edges. @@ -956,7 +956,7 @@ contains !! @param point The cell center of the current levelset cell !! @param normals Output levelset normals !! @param distance Output levelset distance - subroutine f_distance_normals_2D(boundary_v, pid, boundary_edge_count, point, normals, distance) + subroutine s_distance_normals_2D(boundary_v, pid, boundary_edge_count, point, normals, distance) $:GPU_ROUTINE(parallelism='[seq]') real(wp), dimension(:, :, :, :), intent(in) :: boundary_v @@ -1013,7 +1013,7 @@ contains distance = dist_min - end subroutine f_distance_normals_2D + end subroutine s_distance_normals_2D subroutine s_pack_model_for_gpu(ma) type(t_model_array), intent(inout) :: ma diff --git a/src/pre_process/m_icpp_patches.fpp b/src/pre_process/m_icpp_patches.fpp index 0a2a6fe85c..b2d101188b 100644 --- a/src/pre_process/m_icpp_patches.fpp +++ b/src/pre_process/m_icpp_patches.fpp @@ -1652,7 +1652,7 @@ contains print *, ' * Number of input model vertices:', 3*model%ntrs end if - call f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) + call s_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) ! Show the number of edges and boundary edges in 2D STL models if (proc_rank == 0 .and. p == 0) then diff --git a/src/simulation/m_compute_levelset.fpp b/src/simulation/m_compute_levelset.fpp index 0971e8fbdf..2fedac4ea9 100644 --- a/src/simulation/m_compute_levelset.fpp +++ b/src/simulation/m_compute_levelset.fpp @@ -685,7 +685,7 @@ contains ! 3D models if (p > 0) then ! Get the boundary normals and shortest distance between the cell center and the model boundary - call f_distance_normals_3D(gpu_ntrs(patch_id), gpu_trs_v, gpu_trs_n, patch_id, xyz_local, normals, distance) + call s_distance_normals_3D(gpu_ntrs(patch_id), gpu_trs_v, gpu_trs_n, patch_id, xyz_local, normals, distance) ! Get the shortest distance between the cell center and the model boundary gp%levelset = distance @@ -695,7 +695,7 @@ contains gp%levelset_norm = matmul(rotation, normals(1:3)) else ! 2D models - call f_distance_normals_2D(gpu_boundary_v, patch_id, & + call s_distance_normals_2D(gpu_boundary_v, patch_id, & boundary_edge_count, & xyz_local, normals, distance) gp%levelset = -abs(distance) diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index 686ba6fbb3..fd5fee175b 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -1019,7 +1019,7 @@ contains end if ! Need the cells that form the boundary of the flat projection in 2D - if (p == 0) call f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) + if (p == 0) call s_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) ! Show the number of edges and boundary edges in 2D STL models if (proc_rank == 0 .and. p == 0) then