diff --git a/src/common/include/parallel_macros.fpp b/src/common/include/parallel_macros.fpp index bfe4b3beaf..c5ad5c1fb3 100644 --- a/src/common/include/parallel_macros.fpp +++ b/src/common/include/parallel_macros.fpp @@ -174,6 +174,16 @@ #endif #:enddef +#:def END_GPU_ATOMIC_CAPTURE() + #:set acc_end_directive = '!$acc end atomic' + #:set omp_end_directive = '!$omp end atomic' +#if defined(MFC_OpenACC) + $:acc_end_directive +#elif defined(MFC_OpenMP) + $:omp_end_directive +#endif +#:enddef + #:def GPU_UPDATE(host=None, device=None, extraAccArgs=None, extraOmpArgs=None) #:set acc_code = ACC_UPDATE(host=host, device=device, extraAccArgs=extraAccArgs) #:set omp_code = OMP_UPDATE(host=host, device=device, extraOmpArgs=extraOmpArgs) diff --git a/src/common/m_constants.fpp b/src/common/m_constants.fpp index f926824cb8..b2f92a146a 100644 --- a/src/common/m_constants.fpp +++ b/src/common/m_constants.fpp @@ -23,7 +23,7 @@ module m_constants integer, parameter :: fourier_rings = 5 !< Fourier filter ring limit integer, parameter :: num_fluids_max = 10 !< Maximum number of fluids in the simulation integer, parameter :: num_probes_max = 10 !< Maximum number of flow probes in the simulation - integer, parameter :: num_patches_max = 10 + integer, parameter :: num_patches_max = 1000 integer, parameter :: num_bc_patches_max = 10 integer, parameter :: pathlen_max = 400 integer, parameter :: nnode = 4 !< Number of QBMM nodes diff --git a/src/common/m_derived_types.fpp b/src/common/m_derived_types.fpp index e38f1dea6a..8254e4da80 100644 --- a/src/common/m_derived_types.fpp +++ b/src/common/m_derived_types.fpp @@ -183,12 +183,18 @@ module m_derived_types end type t_model type :: t_model_array + ! Original CPU-side fields (unchanged) type(t_model), allocatable :: model real(wp), allocatable, dimension(:, :, :) :: boundary_v real(wp), allocatable, dimension(:, :) :: interpolated_boundary_v integer :: boundary_edge_count integer :: total_vertices - logical :: interpolate + integer :: interpolate + + ! GPU-friendly flattened arrays + integer :: ntrs ! copy of model%ntrs + real(wp), allocatable, dimension(:, :, :) :: trs_v ! (3, 3, ntrs) - triangle vertices + real(wp), allocatable, dimension(:, :) :: trs_n ! (3, ntrs) - triangle normals end type t_model_array !> Derived type adding initial condition (ic) patch parameters as attributes diff --git a/src/common/m_helper.fpp b/src/common/m_helper.fpp index e53f5e6b2d..95ba4d78ad 100644 --- a/src/common/m_helper.fpp +++ b/src/common/m_helper.fpp @@ -333,6 +333,8 @@ contains !! @return The cross product of the two vectors. pure function f_cross(a, b) result(c) + $:GPU_ROUTINE(parallelism='[seq]') + real(wp), dimension(3), intent(in) :: a, b real(wp), dimension(3) :: c diff --git a/src/common/m_model.fpp b/src/common/m_model.fpp index cf6d23effd..443f6f399e 100644 --- a/src/common/m_model.fpp +++ b/src/common/m_model.fpp @@ -18,12 +18,28 @@ module m_model private - public :: f_model_read, s_model_write, s_model_free, f_model_is_inside + public :: f_model_read, s_model_write, s_model_free, f_model_is_inside, models, gpu_ntrs, & + gpu_trs_v, gpu_trs_n, gpu_boundary_v, gpu_interpolated_boundary_v, gpu_interpolate, gpu_boundary_edge_count, & + gpu_total_vertices, stl_bounding_boxes ! Subroutines for STL immersed boundaries public :: f_check_boundary, f_register_edge, f_check_interpolation_2D, & f_check_interpolation_3D, f_interpolate_2D, f_interpolate_3D, & - f_interpolated_distance, f_normals, f_distance, f_distance_normals_3D, f_tri_area + f_interpolated_distance, f_normals, f_distance, f_distance_normals_3D, f_tri_area, s_pack_model_for_gpu, & + f_model_is_inside_flat, f_distance_normals_3d_flat + + !! array of STL models that can be allocated and then used in IB marker and levelset compute + type(t_model_array), allocatable, target :: models(:) + !! GPU-friendly flat arrays for STL model data + integer, allocatable :: gpu_ntrs(:) + real(wp), allocatable, dimension(:, :, :, :) :: gpu_trs_v + real(wp), allocatable, dimension(:, :, :) :: gpu_trs_n + real(wp), allocatable, dimension(:, :, :, :) :: gpu_boundary_v + real(wp), allocatable, dimension(:, :, :) :: gpu_interpolated_boundary_v + integer, allocatable :: gpu_interpolate(:) + integer, allocatable :: gpu_boundary_edge_count(:) + integer, allocatable :: gpu_total_vertices(:) + real(wp), allocatable :: stl_bounding_boxes(:, :, :) contains @@ -481,6 +497,23 @@ contains is_buffered = .true. end subroutine s_skip_ignored_lines + !> This function is used to replace the fortran random number + !! generator because the native generator is not compatible being called + !! from GPU routines/functions + function f_model_random_number(seed) result(rval) + + $:GPU_ROUTINE(parallelism='[seq]') + + integer, intent(inout) :: seed + real(wp) :: rval + + seed = ieor(seed, ishft(seed, 13)) + seed = ieor(seed, ishft(seed, -17)) + seed = ieor(seed, ishft(seed, 5)) + + rval = abs(real(seed, wp))/real(huge(seed), wp) + end function f_model_random_number + !> This procedure, recursively, finds whether a point is inside an octree. !! @param model Model to search in. !! @param point Point to test. @@ -495,32 +528,33 @@ contains real(wp), dimension(1:3), intent(in) :: point real(wp), dimension(1:3), intent(in) :: spacing integer, intent(in) :: spc + real(wp) :: phi, theta + integer :: rand_seed real(wp) :: fraction type(t_ray) :: ray - integer :: i, j, nInOrOut, nHits + integer :: i, j, k, nInOrOut, nHits real(wp), dimension(1:spc, 1:3) :: ray_origins, ray_dirs - ! TODO :: The random number generation prohibits GPU compute due to the subroutine not being able to be called in kernels - ! This should be swapped out with something that allows GPU compute. I recommend the fibonacci sphere: - ! do i = 1, spc - ! phi = acos(1.0 - 2.0*(i-1.0)/(spc-1.0)) - ! theta = pi * (1.0 + sqrt(5.0)) * (i-1.0) - ! ray_dirs(i,:) = [cos(theta)*sin(phi), sin(theta)*sin(phi), cos(phi)] - ! ray_origins(i,:) = point - ! end do + rand_seed = int(point(1)*73856093_wp) + & + int(point(2)*19349663_wp) + & + int(point(3)*83492791_wp) + if (rand_seed == 0) rand_seed = 1 + ! generate our random collection or rays do i = 1, spc - call random_number(ray_origins(i, :)) - ray_origins(i, :) = point + (ray_origins(i, :) - 0.5_wp)*spacing(:) - - call random_number(ray_dirs(i, :)) - ray_dirs(i, :) = ray_dirs(i, :) - 0.5_wp + do k = 1, 3 + ! random jitter in the origin helps us estimate volume fraction instead of only at the cell center + ray_origins(i, k) = point(k) + (f_model_random_number(rand_seed) - 0.5_wp)*spacing(k) + ! cast sample rays in all directions + ray_dirs(i, k) = point(k) + f_model_random_number(rand_seed) - 0.5_wp + end do ray_dirs(i, :) = ray_dirs(i, :)/sqrt(sum(ray_dirs(i, :)*ray_dirs(i, :))) end do + ! ray trace nInOrOut = 0 do i = 1, spc ray%o = ray_origins(i, :) @@ -528,11 +562,14 @@ contains nHits = 0 do j = 1, model%ntrs + ! count the number of triangles this ray intersects if (f_intersects_triangle(ray, model%trs(j))) then nHits = nHits + 1 end if end do + ! if the ray hits an odd number of triangles on its way out, then + ! it must be on the inside of the model nInOrOut = nInOrOut + mod(nHits, 2) end do @@ -540,6 +577,58 @@ contains end function f_model_is_inside + impure function f_model_is_inside_flat(ntrs, trs_v, trs_n, pid, point, spacing, spc) result(fraction) + + $:GPU_ROUTINE(parallelism='[seq]') + + integer, intent(in) :: ntrs + real(wp), dimension(:, :, :, :), intent(in) :: trs_v + real(wp), dimension(:, :, :), intent(in) :: trs_n + integer, intent(in) :: pid + real(wp), dimension(1:3), intent(in) :: point + real(wp), dimension(1:3), intent(in) :: spacing + integer, intent(in) :: spc + + real(wp) :: fraction + real(wp) :: origin(1:3), dir(1:3), dir_mag + type(t_ray) :: ray + type(t_triangle) :: tri + integer :: i, j, k, nInOrOut, nHits + integer :: rand_seed + + rand_seed = int(point(1)*73856093_wp) + & + int(point(2)*19349663_wp) + & + int(point(3)*83492791_wp) + if (rand_seed == 0) rand_seed = 1 + + ! generate our random collection of rays + nInOrOut = 0 + do i = 1, spc + ! Generate one ray at a time — no arrays needed + do k = 1, 3 + origin(k) = point(k) + (f_model_random_number(rand_seed) - 0.5_wp)*spacing(k) + dir(k) = point(k) + f_model_random_number(rand_seed) - 0.5_wp + end do + dir_mag = sqrt(dir(1)*dir(1) + dir(2)*dir(2) + dir(3)*dir(3)) + dir(:) = dir(:)/dir_mag + + ray%o = origin + ray%d = dir + + nHits = 0 + do j = 1, ntrs + tri%v(:, :) = trs_v(:, :, j, pid) + tri%n(:) = trs_n(:, j, pid) + if (f_intersects_triangle(ray, tri)) then + nHits = nHits + 1 + end if + end do + nInOrOut = nInOrOut + mod(nHits, 2) + end do + + fraction = real(nInOrOut)/real(spc) + end function f_model_is_inside_flat + ! From https://www.scratchapixel.com/lessons/3e-basic-rendering/ray-tracing-rendering-a-triangle/ray-triangle-intersection-geometric-solution.html !> This procedure checks if a ray intersects a triangle. !! @param ray Ray. @@ -547,6 +636,8 @@ contains !! @return True if the ray intersects the triangle, false otherwise. elemental function f_intersects_triangle(ray, triangle) result(intersects) + $:GPU_ROUTINE(parallelism='[seq]') + type(t_ray), intent(in) :: ray type(t_triangle), intent(in) :: triangle @@ -1109,6 +1200,66 @@ contains end subroutine f_distance_normals_3D + subroutine f_distance_normals_3D_flat(ntrs, trs_v, trs_n, pid, point, normals, distance) + + $:GPU_ROUTINE(parallelism='[seq]') + + integer, intent(in) :: ntrs + real(wp), dimension(:, :, :, :), intent(in) :: trs_v + real(wp), dimension(:, :, :), intent(in) :: trs_n + integer, intent(in) :: pid + real(wp), dimension(1:3), intent(in) :: point + real(wp), dimension(1:3), intent(out) :: normals + real(wp), intent(out) :: distance + + real(wp), dimension(1:3, 1:3) :: tri + real(wp) :: dist_min, dist_t_min + real(wp) :: dist_min_normal, dist_buffer_normal + real(wp), dimension(1:3) :: midp + real(wp), dimension(1:3) :: dist_buffer + integer :: i, j, tri_idx + + dist_min = 1.e12_wp + dist_min_normal = 1.e12_wp + distance = 0._wp + + tri_idx = 0 + do i = 1, ntrs + do j = 1, 3 + tri(j, 1) = trs_v(j, 1, i, pid) + tri(j, 2) = trs_v(j, 2, i, pid) + tri(j, 3) = trs_v(j, 3, i, pid) + dist_buffer(j) = sqrt((point(1) - tri(j, 1))**2 + & + (point(2) - tri(j, 2))**2 + & + (point(3) - tri(j, 3))**2) + end do + + do j = 1, 3 + midp(j) = (tri(1, j) + tri(2, j) + tri(3, j))/3 + end do + + dist_t_min = minval(dist_buffer(1:3)) + dist_buffer_normal = sqrt((point(1) - midp(1))**2 + & + (point(2) - midp(2))**2 + & + (point(3) - midp(3))**2) + + if (dist_t_min < dist_min) then + dist_min = dist_t_min + end if + + if (dist_buffer_normal < dist_min_normal) then + dist_min_normal = dist_buffer_normal + tri_idx = i + end if + end do + + normals(1) = trs_n(1, tri_idx, pid) + normals(2) = trs_n(2, tri_idx, pid) + normals(3) = trs_n(3, tri_idx, pid) + distance = dist_min + + end subroutine f_distance_normals_3D_flat + !> This procedure determines the levelset distance of 2D models without interpolation. !! @param boundary_v Group of all the boundary vertices of the 2D model without interpolation !! @param boundary_edge_count Output the total number of boundary edges @@ -1240,4 +1391,18 @@ contains end function f_interpolated_distance + subroutine s_pack_model_for_gpu(ma) + type(t_model_array), intent(inout) :: ma + integer :: i + + ma%ntrs = ma%model%ntrs + allocate (ma%trs_v(1:3, 1:3, 1:ma%ntrs)) + allocate (ma%trs_n(1:3, 1:ma%ntrs)) + + do i = 1, ma%ntrs + ma%trs_v(:, :, i) = ma%model%trs(i)%v(:, :) + ma%trs_n(:, i) = ma%model%trs(i)%n(:) + end do + end subroutine + end module m_model diff --git a/src/simulation/m_compute_levelset.fpp b/src/simulation/m_compute_levelset.fpp index c9ae771599..eaae58667b 100644 --- a/src/simulation/m_compute_levelset.fpp +++ b/src/simulation/m_compute_levelset.fpp @@ -33,12 +33,6 @@ contains integer :: i, patch_id, patch_geometry - if (num_gps < 1) then - return - end if - - $:GPU_UPDATE(device='[gps(1:num_gps)]') - ! 3D Patch Geometries if (p > 0) then @@ -56,6 +50,8 @@ contains call s_cylinder_levelset(gps(i)) elseif (patch_geometry == 11) then call s_3d_airfoil_levelset(gps(i)) + elseif (patch_geometry == 12) then + call s_model_levelset(gps(i)) end if end do $:END_GPU_PARALLEL_LOOP() @@ -75,6 +71,8 @@ contains call s_rectangle_levelset(gps(i)) elseif (patch_geometry == 4) then call s_airfoil_levelset(gps(i)) + elseif (patch_geometry == 5) then + call s_model_levelset(gps(i)) elseif (patch_geometry == 6) then call s_ellipse_levelset(gps(i)) end if @@ -83,19 +81,6 @@ contains end if - ! STL models computed on the CPU for now - do i = 1, num_gps - patch_id = gps(i)%ib_patch_id - patch_geometry = patch_ib(patch_id)%geometry - - if (patch_geometry == 5 .or. patch_geometry == 12) then - call s_model_levelset(gps(i)) - $:GPU_UPDATE(device='[gps(i)]') - end if - end do - - $:GPU_UPDATE(host='[gps(1:num_gps)]') - end subroutine s_apply_levelset !> @brief Computes the signed distance and outward normal from a ghost point to a circular immersed boundary. @@ -662,7 +647,7 @@ contains type(ghost_point), intent(inout) :: gp integer :: i, j, k, patch_id, boundary_edge_count, total_vertices - logical :: interpolate + integer :: interpolate real(wp), dimension(1:3) :: center, xyz_local real(wp) :: normals(1:3) !< Boundary normal buffer real(wp) :: distance @@ -674,9 +659,9 @@ contains k = gp%loc(3) ! load in model values - interpolate = models(patch_id)%interpolate - boundary_edge_count = models(patch_id)%boundary_edge_count - total_vertices = models(patch_id)%total_vertices + interpolate = gpu_interpolate(patch_id) + boundary_edge_count = gpu_boundary_edge_count(patch_id) + total_vertices = gpu_total_vertices(patch_id) center = 0._wp if (.not. f_is_default(patch_ib(patch_id)%x_centroid)) center(1) = patch_ib(patch_id)%x_centroid @@ -684,6 +669,7 @@ contains if (p > 0) then if (.not. f_is_default(patch_ib(patch_id)%z_centroid)) center(3) = patch_ib(patch_id)%z_centroid end if + inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) rotation(:, :) = patch_ib(patch_id)%rotation_matrix(:, :) @@ -702,11 +688,11 @@ contains if (p > 0) then ! Get the boundary normals and shortest distance between the cell center and the model boundary - call f_distance_normals_3D(models(patch_id)%model, xyz_local, normals, distance) + call f_distance_normals_3D_flat(gpu_ntrs(patch_id), gpu_trs_v, gpu_trs_n, patch_id, xyz_local, normals, distance) ! Get the shortest distance between the cell center and the interpolated model boundary if (interpolate) then - gp%levelset = f_interpolated_distance(models(patch_id)%interpolated_boundary_v, total_vertices, xyz_local) + gp%levelset = f_interpolated_distance(gpu_interpolated_boundary_v(:, :, patch_id), total_vertices, xyz_local) else gp%levelset = distance end if @@ -718,19 +704,18 @@ contains gp%levelset_norm = matmul(rotation, normals(1:3)) else ! 2D models - if (interpolate) then + if (interpolate == 1) then ! Get the shortest distance between the cell center and the model boundary - gp%levelset = f_interpolated_distance(models(patch_id)%interpolated_boundary_v, total_vertices, xyz_local) + gp%levelset = f_interpolated_distance(gpu_interpolated_boundary_v(:, :, patch_id), total_vertices, xyz_local) ! Change 7 else - ! Get the shortest distance between the cell center and the interpolated model boundary - gp%levelset = f_distance(models(patch_id)%boundary_v, boundary_edge_count, xyz_local) + gp%levelset = f_distance(gpu_boundary_v(:, :, :, patch_id), boundary_edge_count, xyz_local) ! Change 8 end if ! Correct the sign of the levelset gp%levelset = -abs(gp%levelset) ! Get the boundary normals - call f_normals(models(patch_id)%boundary_v, & + call f_normals(gpu_boundary_v(:, :, :, patch_id), & boundary_edge_count, & xyz_local, & normals) diff --git a/src/simulation/m_ib_patches.fpp b/src/simulation/m_ib_patches.fpp index 9839e49f52..7d0256e414 100644 --- a/src/simulation/m_ib_patches.fpp +++ b/src/simulation/m_ib_patches.fpp @@ -26,7 +26,7 @@ module m_ib_patches implicit none - private; public :: s_apply_ib_patches, s_update_ib_rotation_matrix, s_instantiate_STL_models, models, f_convert_cyl_to_cart + private; public :: s_apply_ib_patches, s_update_ib_rotation_matrix, f_convert_cyl_to_cart, s_instantiate_STL_models real(wp) :: x_centroid, y_centroid, z_centroid real(wp) :: length_x, length_y, length_z @@ -56,16 +56,12 @@ module m_ib_patches character(len=5) :: istr ! string to store int to string result for error checking - type(t_model_array), allocatable, target :: models(:) - $:GPU_DECLARE(create='[models]') - !! array of STL models that can be allocated and then used in IB marker and levelset compute - contains !> @brief Applies all immersed boundary patch geometries to mark interior cells in the IB marker array. - impure subroutine s_apply_ib_patches(ib_markers_sf) + impure subroutine s_apply_ib_patches(ib_markers) - integer, dimension(:, :, :), intent(inout), optional :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers integer :: i @@ -78,16 +74,16 @@ contains do i = 1, num_ibs if (patch_ib(i)%geometry == 8) then - call s_ib_sphere(i, ib_markers_sf) + call s_ib_sphere(i, ib_markers) elseif (patch_ib(i)%geometry == 9) then - call s_ib_cuboid(i, ib_markers_sf) + call s_ib_cuboid(i, ib_markers) elseif (patch_ib(i)%geometry == 10) then - call s_ib_cylinder(i, ib_markers_sf) + call s_ib_cylinder(i, ib_markers) elseif (patch_ib(i)%geometry == 11) then - call s_ib_3D_airfoil(i, ib_markers_sf) + call s_ib_3D_airfoil(i, ib_markers) ! STL+IBM patch elseif (patch_ib(i)%geometry == 12) then - call s_ib_model(i, ib_markers_sf) + call s_ib_3d_model(i, ib_markers) end if end do !> @} @@ -99,16 +95,16 @@ contains !> @{ do i = 1, num_ibs if (patch_ib(i)%geometry == 2) then - call s_ib_circle(i, ib_markers_sf) + call s_ib_circle(i, ib_markers) elseif (patch_ib(i)%geometry == 3) then - call s_ib_rectangle(i, ib_markers_sf) + call s_ib_rectangle(i, ib_markers) elseif (patch_ib(i)%geometry == 4) then - call s_ib_airfoil(i, ib_markers_sf) + call s_ib_airfoil(i, ib_markers) ! STL+IBM patch elseif (patch_ib(i)%geometry == 5) then - call s_ib_model(i, ib_markers_sf) + call s_ib_model(i, ib_markers) elseif (patch_ib(i)%geometry == 6) then - call s_ib_ellipse(i, ib_markers_sf) + call s_ib_ellipse(i, ib_markers) end if end do !> @} @@ -117,149 +113,23 @@ contains end subroutine s_apply_ib_patches - subroutine s_instantiate_STL_models() - - ! Variables for IBM+STL - real(wp) :: normals(1:3) !< Boundary normal buffer - integer :: boundary_vertex_count, boundary_edge_count, total_vertices !< Boundary vertex - real(wp), allocatable, dimension(:, :, :) :: boundary_v !< Boundary vertex buffer - real(wp), allocatable, dimension(:, :) :: interpolated_boundary_v !< Interpolated vertex buffer - real(wp) :: distance !< Levelset distance buffer - logical :: interpolate !< Logical variable to determine whether or not the model should be interpolated - - integer :: i, j, k !< Generic loop iterators - integer :: patch_id - - type(t_bbox) :: bbox, bbox_old - type(t_model) :: model - type(ic_model_parameters) :: params - - real(wp) :: eta - real(wp), dimension(1:3) :: point, model_center - real(wp) :: grid_mm(1:3, 1:2) - - real(wp), dimension(1:4, 1:4) :: transform, transform_n - - call random_seed() - - do patch_id = 1, num_ibs - if (patch_ib(patch_id)%geometry == 5 .or. patch_ib(patch_id)%geometry == 12) then - allocate (models(patch_id)%model) - print *, " * Reading model: "//trim(patch_ib(patch_id)%model_filepath) - - model = f_model_read(patch_ib(patch_id)%model_filepath) - params%scale(:) = patch_ib(patch_id)%model_scale(:) - params%translate(:) = patch_ib(patch_id)%model_translate(:) - params%rotate(:) = patch_ib(patch_id)%model_rotate(:) - params%spc = patch_ib(patch_id)%model_spc - params%threshold = patch_ib(patch_id)%model_threshold - - if (f_approx_equal(dot_product(params%scale, params%scale), 0._wp)) then - params%scale(:) = 1._wp - end if - - if (proc_rank == 0) then - print *, " * Transforming model." - end if - - ! Get the model center before transforming the model - bbox_old = f_create_bbox(model) - model_center(1:3) = (bbox_old%min(1:3) + bbox_old%max(1:3))/2._wp - - ! Compute the transform matrices for vertices and normals - transform = f_create_transform_matrix(params, model_center) - transform_n = f_create_transform_matrix(params) - - call s_transform_model(model, transform, transform_n) - - ! Recreate the bounding box after transformation - bbox = f_create_bbox(model) - - ! Show the number of vertices in the original STL model - if (proc_rank == 0) then - print *, ' * Number of input model vertices:', 3*model%ntrs - end if - - call f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) - - ! Check if the model needs interpolation - if (p > 0) then - call f_check_interpolation_3D(model, (/dx, dy, dz/), interpolate) - else - call f_check_interpolation_2D(boundary_v, boundary_edge_count, (/minval(dx), minval(dy), 0._wp/), interpolate) - end if - - ! Show the number of edges and boundary edges in 2D STL models - if (proc_rank == 0 .and. p == 0) then - print *, ' * Number of 2D model boundary edges:', boundary_edge_count - end if - - ! Interpolate the STL model along the edges (2D) and on triangle facets (3D) - if (interpolate) then - if (proc_rank == 0) then - print *, ' * Interpolating STL vertices.' - end if - - if (p > 0) then - call f_interpolate_3D(model, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) - else - call f_interpolate_2D(boundary_v, boundary_edge_count, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) - end if - - if (proc_rank == 0) then - print *, ' * Total number of interpolated boundary vertices:', total_vertices - end if - end if - - if (proc_rank == 0) then - write (*, "(A, 3(2X, F20.10))") " > Model: Min:", bbox%min(1:3) - write (*, "(A, 3(2X, F20.10))") " > Cen:", (bbox%min(1:3) + bbox%max(1:3))/2._wp - write (*, "(A, 3(2X, F20.10))") " > Max:", bbox%max(1:3) - - grid_mm(1, :) = (/minval(x_cc(0:m)) - 0.e5_wp*dx(0), maxval(x_cc(0:m)) + 0.e5_wp*dx(m)/) - grid_mm(2, :) = (/minval(y_cc(0:n)) - 0.e5_wp*dy(0), maxval(y_cc(0:n)) + 0.e5_wp*dy(n)/) - - if (p > 0) then - grid_mm(3, :) = (/minval(z_cc(0:p)) - 0.e5_wp*dz(0), maxval(z_cc(0:p)) + 0.e5_wp*dz(p)/) - else - grid_mm(3, :) = (/0._wp, 0._wp/) - end if - - write (*, "(A, 3(2X, F20.10))") " > Domain: Min:", grid_mm(:, 1) - write (*, "(A, 3(2X, F20.10))") " > Cen:", (grid_mm(:, 1) + grid_mm(:, 2))/2._wp - write (*, "(A, 3(2X, F20.10))") " > Max:", grid_mm(:, 2) - end if - - models(patch_id)%model = model - models(patch_id)%boundary_v = boundary_v - models(patch_id)%boundary_edge_count = boundary_edge_count - models(patch_id)%interpolate = interpolate - if (interpolate) then - models(patch_id)%interpolated_boundary_v = interpolated_boundary_v - models(patch_id)%total_vertices = total_vertices - end if - - end if - end do - - end subroutine s_instantiate_STL_models - !> The circular patch is a 2D geometry that may be used, for !! example, in creating a bubble or a droplet. The geometry !! of the patch is well-defined when its centroid and radius !! are provided. Note that the circular patch DOES allow for !! the smoothing of its boundary. !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids - subroutine s_ib_circle(patch_id, ib_markers_sf) + !! @param ib_markers Array to track patch ids + !! @param ib True if this patch is an immersed boundary + subroutine s_ib_circle(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers real(wp), dimension(1:2) :: center real(wp) :: radius - integer :: i, j, k !< Generic loop iterators + integer :: i, j, il, ir, jl, jr !< Generic loop iterators ! Transferring the circular patch's radius, centroid, smearing patch ! identity and smearing coefficient information @@ -268,19 +138,27 @@ contains center(2) = patch_ib(patch_id)%y_centroid radius = patch_ib(patch_id)%radius + ! find the indices to the left and right of the IB in i, j, k + il = -gp_layers + jl = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + call get_bounding_indices(center(1) - radius, center(1) + radius, x_cc, il, ir) + call get_bounding_indices(center(2) - radius, center(2) + radius, y_cc, jl, jr) + ! Checking whether the circle covers a particular cell in the domain ! and verifying whether the current patch has permission to write to ! that cell. If both queries check out, the primitive variables of ! the current patch are assigned to this cell. - $:GPU_PARALLEL_LOOP(private='[i,j]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j]',& & copyin='[patch_id,center,radius]', collapse=2) - do j = 0, n - do i = 0, m + do j = jl, jr + do i = il, ir if ((x_cc(i) - center(1))**2 & + (y_cc(j) - center(2))**2 <= radius**2) & then - ib_markers_sf(i, j, 0) = patch_id + ib_markers%sf(i, j, 0) = patch_id end if end do end do @@ -290,15 +168,15 @@ contains !> @brief Marks cells inside a 2D NACA 4-digit airfoil immersed boundary using upper and lower surface grids. !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids - subroutine s_ib_airfoil(patch_id, ib_markers_sf) + !! @param ib_markers Array to track patch ids + subroutine s_ib_airfoil(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers real(wp) :: f, ca_in, pa, ma, ta real(wp) :: xa, yt, xu, yu, xl, yl, xc, yc, dycdxc, sin_c, cos_c - integer :: i, j, k + integer :: i, j, k, il, ir, jl, jr integer :: Np1, Np2 real(wp), dimension(1:3) :: xy_local, offset !< x and y coordinates in local IB frame @@ -378,10 +256,19 @@ contains end if - $:GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,f]', copy='[ib_markers_sf]',& + ! find the indices to the left and right of the IB in i, j, k + il = -gp_layers + jl = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + ! maximum distance any marker can be from the center is the length of the airfoil + call get_bounding_indices(center(1) - ca_in, center(1) + ca_in, x_cc, il, ir) + call get_bounding_indices(center(2) - ca_in, center(2) + ca_in, y_cc, jl, jr) + + $:GPU_PARALLEL_LOOP(private='[i,j,xy_local,k,f]', & & copyin='[patch_id,center,inverse_rotation,offset,ma,ca_in,airfoil_grid_u,airfoil_grid_l]', collapse=2) - do j = 0, n - do i = 0, m + do j = jl, jr + do i = il, ir xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] ! get coordinate frame centered on IB xy_local = matmul(inverse_rotation, xy_local) ! rotate the frame into the IB's coordinates xy_local = xy_local - offset ! airfoils are a patch that require a centroid offset @@ -403,13 +290,13 @@ contains if (f_approx_equal(airfoil_grid_u(k)%x, xy_local(1))) then if (xy_local(2) <= airfoil_grid_u(k)%y) then !!IB - ib_markers_sf(i, j, 0) = patch_id + ib_markers%sf(i, j, 0) = patch_id end if else f = (airfoil_grid_u(k)%x - xy_local(1))/(airfoil_grid_u(k)%x - airfoil_grid_u(k - 1)%x) if (xy_local(2) <= ((1._wp - f)*airfoil_grid_u(k)%y + f*airfoil_grid_u(k - 1)%y)) then !!IB - ib_markers_sf(i, j, 0) = patch_id + ib_markers%sf(i, j, 0) = patch_id end if end if else @@ -420,14 +307,14 @@ contains if (f_approx_equal(airfoil_grid_l(k)%x, xy_local(1))) then if (xy_local(2) >= airfoil_grid_l(k)%y) then !!IB - ib_markers_sf(i, j, 0) = patch_id + ib_markers%sf(i, j, 0) = patch_id end if else f = (airfoil_grid_l(k)%x - xy_local(1))/(airfoil_grid_l(k)%x - airfoil_grid_l(k - 1)%x) if (xy_local(2) >= ((1._wp - f)*airfoil_grid_l(k)%y + f*airfoil_grid_l(k - 1)%y)) then !!IB - ib_markers_sf(i, j, 0) = patch_id + ib_markers%sf(i, j, 0) = patch_id end if end if end if @@ -440,14 +327,15 @@ contains !> @brief Marks cells inside a 3D extruded NACA 4-digit airfoil immersed boundary with finite span. !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids - subroutine s_ib_3D_airfoil(patch_id, ib_markers_sf) + !! @param ib_markers Array to track patch ids + !! @param ib True if this patch is an immersed boundary + subroutine s_ib_3D_airfoil(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers real(wp) :: lz, z_max, z_min, f, ca_in, pa, ma, ta, xa, yt, xu, yu, xl, yl, xc, yc, dycdxc, sin_c, cos_c - integer :: i, j, k, l + integer :: i, j, k, l, il, ir, jl, jr, ll, lr integer :: Np1, Np2 real(wp), dimension(1:3) :: xyz_local, center, offset !< x, y, z coordinates in local IB frame @@ -529,11 +417,23 @@ contains $:GPU_UPDATE(device='[airfoil_grid_l,airfoil_grid_u]') end if - $:GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,f]', copy='[ib_markers_sf]',& + ! find the indices to the left and right of the IB in i, j, k + il = -gp_layers + jl = -gp_layers + ll = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + lr = p + gp_layers + ! maximum distance any marker can be from the center is the length of the airfoil + call get_bounding_indices(center(1) - ca_in, center(1) + ca_in, x_cc, il, ir) + call get_bounding_indices(center(2) - ca_in, center(2) + ca_in, y_cc, jl, jr) + call get_bounding_indices(center(3) - ca_in, center(3) + ca_in, z_cc, ll, lr) + + $:GPU_PARALLEL_LOOP(private='[i,j,l,xyz_local,k,f]',& & copyin='[patch_id,center,inverse_rotation,offset,ma,ca_in,airfoil_grid_u,airfoil_grid_l,z_min,z_max]', collapse=3) - do l = 0, p - do j = 0, n - do i = 0, m + do l = ll, lr + do j = jl, jr + do i = il, ir xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(l) - center(3)] ! get coordinate frame centered on IB xyz_local = matmul(inverse_rotation, xyz_local) ! rotate the frame into the IB's coordinates xyz_local = xyz_local - offset ! airfoils are a patch that require a centroid offset @@ -549,13 +449,13 @@ contains if (f_approx_equal(airfoil_grid_u(k)%x, xyz_local(1))) then if (xyz_local(2) <= airfoil_grid_u(k)%y) then !IB - ib_markers_sf(i, j, l) = patch_id + ib_markers%sf(i, j, l) = patch_id end if else f = (airfoil_grid_u(k)%x - xyz_local(1))/(airfoil_grid_u(k)%x - airfoil_grid_u(k - 1)%x) if (xyz_local(2) <= ((1._wp - f)*airfoil_grid_u(k)%y + f*airfoil_grid_u(k - 1)%y)) then !!IB - ib_markers_sf(i, j, l) = patch_id + ib_markers%sf(i, j, l) = patch_id end if end if else @@ -566,14 +466,14 @@ contains if (f_approx_equal(airfoil_grid_l(k)%x, xyz_local(1))) then if (xyz_local(2) >= airfoil_grid_l(k)%y) then !!IB - ib_markers_sf(i, j, l) = patch_id + ib_markers%sf(i, j, l) = patch_id end if else f = (airfoil_grid_l(k)%x - xyz_local(1))/(airfoil_grid_l(k)%x - airfoil_grid_l(k - 1)%x) if (xyz_local(2) >= ((1._wp - f)*airfoil_grid_l(k)%y + f*airfoil_grid_l(k - 1)%y)) then !!IB - ib_markers_sf(i, j, l) = patch_id + ib_markers%sf(i, j, l) = patch_id end if end if end if @@ -595,22 +495,19 @@ contains !! rectangular patch DOES NOT allow for the smoothing of its !! boundaries. !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids - subroutine s_ib_rectangle(patch_id, ib_markers_sf) + !! @param ib_markers Array to track patch ids + !! @param ib True if this patch is an immersed boundary + subroutine s_ib_rectangle(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers - integer :: i, j, k !< generic loop iterators - real(wp) :: pi_inf, gamma, lit_gamma !< Equation of state parameters + integer :: i, j, il, ir, jl, jr !< generic loop iterators + real(wp) :: corner_distance !< Equation of state parameters real(wp), dimension(1:3) :: xy_local !< x and y coordinates in local IB frame real(wp), dimension(1:2) :: length, center !< x and y coordinates in local IB frame real(wp), dimension(1:3, 1:3) :: inverse_rotation - pi_inf = fluid_pp(1)%pi_inf - gamma = fluid_pp(1)%gamma - lit_gamma = (1._wp + gamma)/gamma - ! Transferring the rectangle's centroid and length information center(1) = patch_ib(patch_id)%x_centroid center(2) = patch_ib(patch_id)%y_centroid @@ -618,14 +515,23 @@ contains length(2) = patch_ib(patch_id)%length_y inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) + ! find the indices to the left and right of the IB in i, j, k + il = -gp_layers + jl = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + corner_distance = sqrt(dot_product(length, length))/2._wp ! maximum distance any marker can be from the center + call get_bounding_indices(center(1) - corner_distance, center(1) + corner_distance, x_cc, il, ir) + call get_bounding_indices(center(2) - corner_distance, center(2) + corner_distance, y_cc, jl, jr) + ! Checking whether the rectangle covers a particular cell in the ! domain and verifying whether the current patch has the permission ! to write to that cell. If both queries check out, the primitive ! variables of the current patch are assigned to this cell. - $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]',& & copyin='[patch_id,center,length,inverse_rotation,x_cc,y_cc]', collapse=2) - do j = 0, n - do i = 0, m + do j = jl, jr + do i = il, ir ! get the x and y coordinates in the local IB frame xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] xy_local = matmul(inverse_rotation, xy_local) @@ -636,7 +542,7 @@ contains 0.5_wp*length(2) >= xy_local(2)) then ! Updating the patch identities bookkeeping variable - ib_markers_sf(i, j, 0) = patch_id + ib_markers%sf(i, j, 0) = patch_id end if end do @@ -651,14 +557,16 @@ contains !! provided. Please note that the spherical patch DOES allow !! for the smoothing of its boundary. !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids - subroutine s_ib_sphere(patch_id, ib_markers_sf) + !! @param ib_markers Array to track patch ids + !! @param ib True if this patch is an immersed boundary + subroutine s_ib_sphere(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers ! Generic loop iterators integer :: i, j, k + integer :: il, ir, jl, jr, kl, kr real(wp) :: radius real(wp), dimension(1:3) :: center @@ -672,15 +580,27 @@ contains center(3) = patch_ib(patch_id)%z_centroid radius = patch_ib(patch_id)%radius + ! find the indices to the left and right of the IB in i, j, k + il = -gp_layers + jl = -gp_layers + kl = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + kr = p + gp_layers + call get_bounding_indices(center(1) - radius, center(1) + radius, x_cc, il, ir) + call get_bounding_indices(center(2) - radius, center(2) + radius, y_cc, jl, jr) + call get_bounding_indices(center(3) - radius, center(3) + radius, z_cc, kl, kr) + ! Checking whether the sphere covers a particular cell in the domain ! and verifying whether the current patch has permission to write to ! that cell. If both queries check out, the primitive variables of ! the current patch are assigned to this cell. - $:GPU_PARALLEL_LOOP(private='[i,j,k,cart_y,cart_z]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,k,cart_y,cart_z]',& & copyin='[patch_id,center,radius]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m + do k = kl, kr + do j = jl, jr + do i = il, ir + ! do i = -gp_layers, m+gp_layers if (grid_geometry == 3) then call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) else @@ -691,7 +611,7 @@ contains if (((x_cc(i) - center(1))**2 & + (cart_y - center(2))**2 & + (cart_z - center(3))**2 <= radius**2)) then - ib_markers_sf(i, j, k) = patch_id + ib_markers%sf(i, j, k) = patch_id end if end do end do @@ -709,15 +629,16 @@ contains !! the cuboidal patch DOES NOT allow for the smearing of its !! boundaries. !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids - subroutine s_ib_cuboid(patch_id, ib_markers_sf) + !! @param ib_markers Array to track patch ids + subroutine s_ib_cuboid(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers - integer :: i, j, k !< Generic loop iterators + integer :: i, j, k, ir, il, jr, jl, kr, kl !< Generic loop iterators real(wp), dimension(1:3) :: xyz_local, center, length !< x and y coordinates in local IB frame real(wp), dimension(1:3, 1:3) :: inverse_rotation + real(wp) :: corner_distance ! Transferring the cuboid's centroid and length information center(1) = patch_ib(patch_id)%x_centroid @@ -728,15 +649,27 @@ contains length(3) = patch_ib(patch_id)%length_z inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) + ! find the indices to the left and right of the IB in i, j, k + il = -gp_layers + jl = -gp_layers + kl = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + kr = p + gp_layers + corner_distance = sqrt(dot_product(length, length))/2._wp ! maximum distance any marker can be from the center + call get_bounding_indices(center(1) - corner_distance, center(1) + corner_distance, x_cc, il, ir) + call get_bounding_indices(center(2) - corner_distance, center(2) + corner_distance, y_cc, jl, jr) + call get_bounding_indices(center(3) - corner_distance, center(3) + corner_distance, z_cc, kl, kr) + ! Checking whether the cuboid covers a particular cell in the domain ! and verifying whether the current patch has permission to write to ! to that cell. If both queries check out, the primitive variables ! of the current patch are assigned to this cell. - $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]',& & copyin='[patch_id,center,length,inverse_rotation]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m + do k = kl, kr + do j = jl, jr + do i = il, ir if (grid_geometry == 3) then ! TODO :: This does not work and is not covered by any tests. This should be fixed @@ -756,7 +689,7 @@ contains 0.5*length(3) >= xyz_local(3)) then ! Updating the patch identities bookkeeping variable - ib_markers_sf(i, j, k) = patch_id + ib_markers%sf(i, j, k) = patch_id end if end do end do @@ -774,16 +707,18 @@ contains !! that the cylindrical patch DOES allow for the smoothing !! of its lateral boundary. !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids - subroutine s_ib_cylinder(patch_id, ib_markers_sf) + !! @param ib_markers Array to track patch ids + !! @param ib True if this patch is an immersed boundary + subroutine s_ib_cylinder(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers - integer :: i, j, k !< Generic loop iterators + integer :: i, j, k, il, ir, jl, jr, kl, kr !< Generic loop iterators real(wp) :: radius real(wp), dimension(1:3) :: xyz_local, center, length !< x and y coordinates in local IB frame real(wp), dimension(1:3, 1:3) :: inverse_rotation + real(wp) :: corner_distance ! Transferring the cylindrical patch's centroid, length, radius, ! smoothing patch identity and smoothing coefficient information @@ -797,15 +732,26 @@ contains radius = patch_ib(patch_id)%radius inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) + il = -gp_layers + jl = -gp_layers + kl = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + kr = p + gp_layers + corner_distance = sqrt(radius**2 + maxval(length)**2) ! distance to rim of cylinder + call get_bounding_indices(center(1) - corner_distance, center(1) + corner_distance, x_cc, il, ir) + call get_bounding_indices(center(2) - corner_distance, center(2) + corner_distance, y_cc, jl, jr) + call get_bounding_indices(center(3) - corner_distance, center(3) + corner_distance, z_cc, kl, kr) + ! Checking whether the cylinder covers a particular cell in the ! domain and verifying whether the current patch has the permission ! to write to that cell. If both queries check out, the primitive ! variables of the current patch are assigned to this cell. - $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j,k,xyz_local,cart_y,cart_z]',& & copyin='[patch_id,center,length,radius,inverse_rotation]', collapse=3) - do k = 0, p - do j = 0, n - do i = 0, m + do k = kl, kr + do j = jl, jr + do i = il, ir if (grid_geometry == 3) then call s_convert_cylindrical_to_cartesian_coord(y_cc(j), z_cc(k)) @@ -835,7 +781,7 @@ contains 0.5_wp*length(3) >= xyz_local(3)))) then ! Updating the patch identities bookkeeping variable - ib_markers_sf(i, j, k) = patch_id + ib_markers%sf(i, j, k) = patch_id end if end do end do @@ -845,10 +791,10 @@ contains end subroutine s_ib_cylinder !> @brief Marks cells inside a 2D elliptical immersed boundary defined by semi-axis lengths and rotation. - subroutine s_ib_ellipse(patch_id, ib_markers_sf) + subroutine s_ib_ellipse(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf + type(integer_field), intent(inout) :: ib_markers integer :: i, j, k !< generic loop iterators real(wp), dimension(1:3) :: xy_local !< x and y coordinates in local IB frame @@ -865,10 +811,10 @@ contains ! Checking whether the ellipse covers a particular cell in the ! domain - $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]', copy='[ib_markers_sf]',& + $:GPU_PARALLEL_LOOP(private='[i,j, xy_local]',& & copyin='[patch_id,center,ellipse_coeffs,inverse_rotation,x_cc,y_cc]', collapse=2) - do j = 0, n - do i = 0, m + do j = -gp_layers, n + gp_layers + do i = -gp_layers, m + gp_layers ! get the x and y coordinates in the local IB frame xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] xy_local = matmul(inverse_rotation, xy_local) @@ -876,7 +822,7 @@ contains ! Ellipse condition (x/a)^2 + (y/b)^2 <= 1 if ((xy_local(1)/ellipse_coeffs(1))**2 + (xy_local(2)/ellipse_coeffs(2))**2 <= 1._wp) then ! Updating the patch identities bookkeeping variable - ib_markers_sf(i, j, 0) = patch_id + ib_markers%sf(i, j, 0) = patch_id end if end do end do @@ -884,39 +830,107 @@ contains end subroutine s_ib_ellipse - !> The STL patch is a 2/3D geometry that is imported from an STL file. + !> The STL patch is a 2D geometry that is imported from an STL file. !! @param patch_id is the patch identifier - !! @param ib_markers_sf Array to track patch ids - subroutine s_ib_model(patch_id, ib_markers_sf) + !! @param ib_markers Array to track patch ids + subroutine s_ib_model(patch_id, ib_markers) integer, intent(in) :: patch_id - integer, dimension(0:m, 0:n, 0:p), intent(inout) :: ib_markers_sf - - integer :: i, j, k !< Generic loop iterators + type(integer_field), intent(inout) :: ib_markers - type(t_model), pointer :: model + integer :: i, j, k !< Generic loop iterators + integer :: spc - real(wp) :: eta + real(wp) :: eta, threshold real(wp), dimension(1:3) :: point, local_point, offset - real(wp), dimension(1:3) :: center, xyz_local + real(wp), dimension(1:3) :: center, xy_local real(wp), dimension(1:3, 1:3) :: inverse_rotation - model => models(patch_id)%model center = 0._wp center(1) = patch_ib(patch_id)%x_centroid center(2) = patch_ib(patch_id)%y_centroid - if (p > 0) center(3) = patch_ib(patch_id)%z_centroid inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) offset(:) = patch_ib(patch_id)%centroid_offset(:) + spc = patch_ib(patch_id)%model_spc + threshold = patch_ib(patch_id)%model_threshold - do i = 0, m - do j = 0, n - do k = 0, p + $:GPU_PARALLEL_LOOP(private='[i,j, xy_local, eta]',& + & copyin='[patch_id,center,inverse_rotation, offset, spc, threshold]', collapse=2) + do i = -gp_layers, m + gp_layers + do j = -gp_layers, n + gp_layers - xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] - if (p > 0) then - xyz_local(3) = z_cc(k) - center(3) - end if + xy_local = [x_cc(i) - center(1), y_cc(j) - center(2), 0._wp] + xy_local = matmul(inverse_rotation, xy_local) + xy_local = xy_local - offset + + if (grid_geometry == 3) then + xy_local = f_convert_cyl_to_cart(xy_local) + end if + + eta = f_model_is_inside_flat(gpu_ntrs(patch_id), & + gpu_trs_v, gpu_trs_n, & + patch_id, & + xy_local, (/dx(i), dy(j), 0._wp/), & + spc) + + ! Reading STL boundary vertices and compute the levelset and levelset_norm + if (eta > threshold) then + ib_markers%sf(i, j, 0) = patch_id + end if + + end do + end do + $:END_GPU_PARALLEL_LOOP() + + end subroutine s_ib_model + + !> The STL patch is a 3D geometry that is imported from an STL file. + !! @param patch_id is the patch identifier + !! @param ib_markers Array to track patch ids + subroutine s_ib_3d_model(patch_id, ib_markers) + + integer, intent(in) :: patch_id + type(integer_field), intent(inout) :: ib_markers + + integer :: i, j, k, il, ir, jl, jr, kl, kr !< Generic loop iterators + integer :: spc + + real(wp) :: eta, threshold, corner_distance + real(wp), dimension(1:3) :: point, local_point, offset + real(wp), dimension(1:3) :: center, xyz_local + real(wp), dimension(1:3, 1:3) :: inverse_rotation + + center = 0._wp + center(1) = patch_ib(patch_id)%x_centroid + center(2) = patch_ib(patch_id)%y_centroid + center(3) = patch_ib(patch_id)%z_centroid + inverse_rotation(:, :) = patch_ib(patch_id)%rotation_matrix_inverse(:, :) + offset(:) = patch_ib(patch_id)%centroid_offset(:) + spc = patch_ib(patch_id)%model_spc + threshold = patch_ib(patch_id)%model_threshold + + il = -gp_layers + jl = -gp_layers + kl = -gp_layers + ir = m + gp_layers + jr = n + gp_layers + kr = p + gp_layers + corner_distance = 0._wp + do i = 1, 3 + corner_distance = corner_distance + maxval(abs(stl_bounding_boxes(patch_id, i, 1:3)))**2 ! distance to rim of cylinder + end do + corner_distance = sqrt(corner_distance) + call get_bounding_indices(center(1) - corner_distance, center(1) + corner_distance, x_cc, il, ir) + call get_bounding_indices(center(2) - corner_distance, center(2) + corner_distance, y_cc, jl, jr) + call get_bounding_indices(center(3) - corner_distance, center(3) + corner_distance, z_cc, kl, kr) + + $:GPU_PARALLEL_LOOP(private='[i,j,k, xyz_local, eta]',& + & copyin='[patch_id,center,inverse_rotation, offset, spc, threshold]', collapse=3) + do i = il, ir + do j = jl, jr + do k = kl, kr + + xyz_local = [x_cc(i) - center(1), y_cc(j) - center(2), z_cc(k) - center(3)] xyz_local = matmul(inverse_rotation, xyz_local) xyz_local = xyz_local - offset @@ -924,22 +938,241 @@ contains xyz_local = f_convert_cyl_to_cart(xyz_local) end if - if (p == 0) then - eta = f_model_is_inside(model, xyz_local, (/dx(i), dy(j), 0._wp/), patch_ib(patch_id)%model_spc) - else - eta = f_model_is_inside(model, xyz_local, (/dx(i), dy(j), dz(k)/), patch_ib(patch_id)%model_spc) - end if + eta = f_model_is_inside_flat(gpu_ntrs(patch_id), & + gpu_trs_v, gpu_trs_n, & + patch_id, & + xyz_local, (/dx(i), dy(j), dz(k)/), & + spc) ! Reading STL boundary vertices and compute the levelset and levelset_norm if (eta > patch_ib(patch_id)%model_threshold) then - ib_markers_sf(i, j, k) = patch_id + ib_markers%sf(i, j, k) = patch_id end if end do end do end do + $:END_GPU_PARALLEL_LOOP() - end subroutine s_ib_model + end subroutine s_ib_3d_model + + subroutine s_instantiate_STL_models() + + ! Variables for IBM+STL + real(wp) :: normals(1:3) !< Boundary normal buffer + integer :: boundary_vertex_count, boundary_edge_count, total_vertices !< Boundary vertex + real(wp), allocatable, dimension(:, :, :) :: boundary_v !< Boundary vertex buffer + real(wp), allocatable, dimension(:, :) :: interpolated_boundary_v !< Interpolated vertex buffer + real(wp) :: dx_local, dy_local, dz_local !< Levelset distance buffer + logical :: interpolate !< Logical variable to determine whether or not the model should be interpolated + + integer :: i, j, k !< Generic loop iterators + integer :: patch_id + + type(t_bbox) :: bbox, bbox_old + type(t_model) :: model + type(ic_model_parameters) :: params + + real(wp) :: eta + real(wp), dimension(1:3) :: point, model_center + real(wp) :: grid_mm(1:3, 1:2) + + real(wp), dimension(1:4, 1:4) :: transform, transform_n + + dx_local = minval(dx); dy_local = minval(dy) + if (p /= 0) dz_local = minval(dz) + + do patch_id = 1, num_ibs + if (patch_ib(patch_id)%geometry == 5 .or. patch_ib(patch_id)%geometry == 12) then + allocate (models(patch_id)%model) + print *, " * Reading model: "//trim(patch_ib(patch_id)%model_filepath) + + model = f_model_read(patch_ib(patch_id)%model_filepath) + params%scale(:) = patch_ib(patch_id)%model_scale(:) + params%translate(:) = patch_ib(patch_id)%model_translate(:) + params%rotate(:) = patch_ib(patch_id)%model_rotate(:) + params%spc = patch_ib(patch_id)%model_spc + params%threshold = patch_ib(patch_id)%model_threshold + + if (f_approx_equal(dot_product(params%scale, params%scale), 0._wp)) then + params%scale(:) = 1._wp + end if + + if (proc_rank == 0) then + print *, " * Transforming model." + end if + + ! Get the model center before transforming the model + bbox_old = f_create_bbox(model) + model_center(1:3) = (bbox_old%min(1:3) + bbox_old%max(1:3))/2._wp + + ! Compute the transform matrices for vertices and normals + transform = f_create_transform_matrix(params, model_center) + transform_n = f_create_transform_matrix(params) + + call s_transform_model(model, transform, transform_n) + + ! Recreate the bounding box after transformation + bbox = f_create_bbox(model) + + ! Show the number of vertices in the original STL model + if (proc_rank == 0) then + print *, ' * Number of input model vertices:', 3*model%ntrs + end if + + call f_check_boundary(model, boundary_v, boundary_vertex_count, boundary_edge_count) + + ! Check if the model needs interpolation + if (p > 0) then + call f_check_interpolation_3D(model, (/dx_local, dy_local, dz_local/), interpolate) + else + call f_check_interpolation_2D(boundary_v, boundary_edge_count, (/dx_local, dy_local, 0._wp/), interpolate) + end if + interpolate = .false. + + ! Show the number of edges and boundary edges in 2D STL models + if (proc_rank == 0 .and. p == 0) then + print *, ' * Number of 2D model boundary edges:', boundary_edge_count + end if + + ! Interpolate the STL model along the edges (2D) and on triangle facets (3D) + if (interpolate) then + if (proc_rank == 0) then + print *, ' * Interpolating STL vertices.' + end if + + if (p > 0) then + call f_interpolate_3D(model, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) + else + call f_interpolate_2D(boundary_v, boundary_edge_count, (/dx, dy, dz/), interpolated_boundary_v, total_vertices) + end if + + if (proc_rank == 0) then + print *, ' * Total number of interpolated boundary vertices:', total_vertices + end if + end if + + if (proc_rank == 0) then + write (*, "(A, 3(2X, F20.10))") " > Model: Min:", bbox%min(1:3) + write (*, "(A, 3(2X, F20.10))") " > Cen:", (bbox%min(1:3) + bbox%max(1:3))/2._wp + write (*, "(A, 3(2X, F20.10))") " > Max:", bbox%max(1:3) + + grid_mm(1, :) = (/minval(x_cc(0:m)) - 0.5_wp*dx_local, maxval(x_cc(0:m)) + 0.5_wp*dx_local/) + grid_mm(2, :) = (/minval(y_cc(0:n)) - 0.5_wp*dy_local, maxval(y_cc(0:n)) + 0.5_wp*dy_local/) + + if (p > 0) then + grid_mm(3, :) = (/minval(z_cc(0:p)) - 0.5_wp*dz_local, maxval(z_cc(0:p)) + 0.5_wp*dz_local/) + else + grid_mm(3, :) = (/0._wp, 0._wp/) + end if + + write (*, "(A, 3(2X, F20.10))") " > Domain: Min:", grid_mm(:, 1) + write (*, "(A, 3(2X, F20.10))") " > Cen:", (grid_mm(:, 1) + grid_mm(:, 2))/2._wp + write (*, "(A, 3(2X, F20.10))") " > Max:", grid_mm(:, 2) + end if + + allocate (stl_bounding_boxes(patch_id, 1:3, 1:3)) + stl_bounding_boxes(patch_id, 1, 1:3) = [bbox%min(1), (bbox%min(1) + bbox%max(1))/2._wp, bbox%max(1)] + stl_bounding_boxes(patch_id, 2, 1:3) = [bbox%min(2), (bbox%min(2) + bbox%max(2))/2._wp, bbox%max(2)] + stl_bounding_boxes(patch_id, 3, 1:3) = [bbox%min(3), (bbox%min(3) + bbox%max(3))/2._wp, bbox%max(3)] + + models(patch_id)%model = model + models(patch_id)%boundary_v = boundary_v + models(patch_id)%boundary_edge_count = boundary_edge_count + if (interpolate) then + models(patch_id)%interpolate = 1 + else + models(patch_id)%interpolate = 0 + end if + if (interpolate) then + models(patch_id)%interpolated_boundary_v = interpolated_boundary_v + models(patch_id)%total_vertices = total_vertices + end if + + end if + end do + + ! Pack and upload flat arrays for GPU (AFTER the loop) + block + integer :: pid, max_ntrs + integer :: max_bv1, max_bv2, max_bv3, max_iv1, max_iv2 + + max_ntrs = 0 + max_bv1 = 0; max_bv2 = 0; max_bv3 = 0 + max_iv1 = 0; max_iv2 = 0 + + do pid = 1, num_ibs + if (allocated(models(pid)%model)) then + call s_pack_model_for_gpu(models(pid)) + max_ntrs = max(max_ntrs, models(pid)%ntrs) + end if + if (allocated(models(pid)%boundary_v)) then + max_bv1 = max(max_bv1, size(models(pid)%boundary_v, 1)) + max_bv2 = max(max_bv2, size(models(pid)%boundary_v, 2)) + max_bv3 = max(max_bv3, size(models(pid)%boundary_v, 3)) + end if + if (allocated(models(pid)%interpolated_boundary_v)) then + max_iv1 = max(max_iv1, size(models(pid)%interpolated_boundary_v, 1)) + max_iv2 = max(max_iv2, size(models(pid)%interpolated_boundary_v, 2)) + end if + end do + + if (max_ntrs > 0) then + allocate (gpu_ntrs(1:num_ibs)) + allocate (gpu_trs_v(1:3, 1:3, 1:max_ntrs, 1:num_ibs)) + allocate (gpu_trs_n(1:3, 1:max_ntrs, 1:num_ibs)) + allocate (gpu_interpolate(1:num_ibs)) + allocate (gpu_boundary_edge_count(1:num_ibs)) + allocate (gpu_total_vertices(1:num_ibs)) + + gpu_ntrs = 0 + gpu_trs_v = 0._wp + gpu_trs_n = 0._wp + gpu_interpolate = 0 + gpu_boundary_edge_count = 0 + gpu_total_vertices = 0 + + if (max_bv1 > 0) then + allocate (gpu_boundary_v(1:max_bv1, 1:max_bv2, 1:max_bv3, 1:num_ibs)) + gpu_boundary_v = 0._wp + end if + + if (max_iv1 > 0) then + allocate (gpu_interpolated_boundary_v(1:max_iv1, 1:max_iv2, 1:num_ibs)) + gpu_interpolated_boundary_v = 0._wp + end if + + do pid = 1, num_ibs + if (allocated(models(pid)%model)) then + gpu_ntrs(pid) = models(pid)%ntrs + gpu_trs_v(:, :, 1:models(pid)%ntrs, pid) = models(pid)%trs_v + gpu_trs_n(:, 1:models(pid)%ntrs, pid) = models(pid)%trs_n + gpu_interpolate(pid) = models(pid)%interpolate + gpu_boundary_edge_count(pid) = models(pid)%boundary_edge_count + gpu_total_vertices(pid) = models(pid)%total_vertices + end if + if (allocated(models(pid)%boundary_v)) then + gpu_boundary_v(1:size(models(pid)%boundary_v, 1), & + 1:size(models(pid)%boundary_v, 2), & + 1:size(models(pid)%boundary_v, 3), pid) = models(pid)%boundary_v + end if + if (allocated(models(pid)%interpolated_boundary_v)) then + gpu_interpolated_boundary_v(1:size(models(pid)%interpolated_boundary_v, 1), & + 1:size(models(pid)%interpolated_boundary_v, 2), pid) = models(pid)%interpolated_boundary_v + end if + end do + + $:GPU_ENTER_DATA(copyin='[gpu_ntrs, gpu_trs_v, gpu_trs_n, gpu_interpolate, gpu_boundary_edge_count, gpu_total_vertices]') + if (allocated(gpu_boundary_v)) then + $:GPU_ENTER_DATA(copyin='[gpu_boundary_v]') + end if + if (allocated(gpu_interpolated_boundary_v)) then + $:GPU_ENTER_DATA(copyin='[gpu_interpolated_boundary_v]') + end if + end if + end block + + end subroutine s_instantiate_STL_models !> Subroutine that computes a rotation matrix for converting to the rotating frame of the boundary subroutine s_update_ib_rotation_matrix(patch_id) @@ -1021,6 +1254,46 @@ contains end subroutine s_convert_cylindrical_to_spherical_coord + subroutine get_bounding_indices(left_bound, right_bound, cell_centers, left_index, right_index) + + real(wp), intent(in) :: left_bound, right_bound + integer, intent(inout) :: left_index, right_index + real(wp), dimension(-buff_size:), intent(in) :: cell_centers + + integer :: itr_left, itr_middle, itr_right + + itr_left = left_index + itr_right = right_index + + do while (itr_left + 1 < itr_right) + itr_middle = (itr_left + itr_right)/2 + if (cell_centers(itr_middle) < left_bound) then + itr_left = itr_middle + else if (cell_centers(itr_middle) > left_bound) then + itr_right = itr_middle + else + itr_left = itr_middle + exit + end if + end do + left_index = itr_left + + itr_right = right_index + do while (itr_left + 1 < itr_right) + itr_middle = (itr_left + itr_right)/2 + if (cell_centers(itr_middle) < right_bound) then + itr_left = itr_middle + else if (cell_centers(itr_middle) > right_bound) then + itr_right = itr_middle + else + itr_right = itr_middle + exit + end if + end do + right_index = itr_right + + end subroutine get_bounding_indices + !> Archimedes spiral function !! @param myth Angle !! @param offset Thickness diff --git a/src/simulation/m_ibm.fpp b/src/simulation/m_ibm.fpp index bfe09ec5ac..a7f1e20aad 100644 --- a/src/simulation/m_ibm.fpp +++ b/src/simulation/m_ibm.fpp @@ -27,6 +27,8 @@ module m_ibm use m_viscous + use m_model + implicit none private :: s_compute_image_points, & @@ -83,6 +85,8 @@ contains integer :: i, j, k integer :: max_num_gps, max_num_inner_gps + call nvtxStartRange("SETUP-IBM-MODULE") + ! do all set up for moving immersed boundaries moving_immersed_boundary_flag = .false. do i = 1, num_ibs @@ -95,9 +99,9 @@ contains $:GPU_UPDATE(device='[patch_ib(1:num_ibs)]') ! GPU routines require updated cell centers - $:GPU_UPDATE(device='[x_cc, y_cc]') + $:GPU_UPDATE(device='[x_cc, y_cc, dx, dy]') if (p /= 0) then - $:GPU_UPDATE(device='[z_cc]') + $:GPU_UPDATE(device='[z_cc, dz]') end if ! allocate STL models @@ -105,9 +109,8 @@ contains ! recompute the new ib_patch locations and broadcast them. ib_markers%sf = 0._wp - call s_apply_ib_patches(ib_markers%sf(0:m, 0:n, 0:p)) $:GPU_UPDATE(device='[ib_markers%sf]') - call s_populate_ib_buffers() + call s_apply_ib_patches(ib_markers) $:GPU_UPDATE(host='[ib_markers%sf]') do i = 1, num_ibs if (patch_ib(i)%moving_ibm /= 0) call s_compute_centroid_offset(i) ! offsets are computed after IB markers are generated @@ -127,18 +130,13 @@ contains @:ALLOCATE(inner_points(1:int((max_num_gps + max_num_inner_gps) * 2.0))) $:GPU_ENTER_DATA(copyin='[ghost_points,inner_points]') - call s_find_ghost_points(ghost_points, inner_points) - $:GPU_UPDATE(device='[ghost_points, inner_points]') - call s_apply_levelset(ghost_points, num_gps) - $:GPU_UPDATE(device='[ghost_points]') call s_compute_image_points(ghost_points) - $:GPU_UPDATE(device='[ghost_points]') - call s_compute_interpolation_coeffs(ghost_points) - $:GPU_UPDATE(device='[ghost_points]') + + call nvtxEndRange end subroutine s_ibm_setup @@ -456,6 +454,7 @@ contains integer :: dir integer :: index + $:GPU_PARALLEL_LOOP(private='[q,gp,i,j,k,physical_loc,patch_id,dist,norm,dim,bound,dir,index,temp_loc,s_cc]') do q = 1, num_gps gp = ghost_points_in(q) i = gp%loc(1) @@ -505,6 +504,7 @@ contains do while ((temp_loc < s_cc(index) & .or. temp_loc > s_cc(index + 1))) index = index + dir +#if !defined(MFC_OpenACC) && !defined(MFC_OpenMP) if (index < -buff_size .or. index > bound) then print *, "A required image point is not located in this computational domain." print *, "Ghost Point is located at ", [x_cc(i), y_cc(j), z_cc(k)], " while moving in dimension ", dim @@ -516,6 +516,7 @@ contains print *, "A short term fix may include increasing buff_size further in m_helper_basic (currently set to a minimum of 10)" error stop "Ghost Point and Image Point on Different Processors" end if +#endif end do ghost_points_in(q)%ip_grid(dim) = index if (ghost_points_in(q)%DB(dim) == -1) then @@ -526,55 +527,59 @@ contains end if end do end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_image_points - !> Function that finds the number of ghost points, used for allocating + !> Subroutine that finds the number of ghost points, used for allocating !! memory. subroutine s_find_num_ghost_points(num_gps_out, num_inner_gps_out) integer, intent(out) :: num_gps_out integer, intent(out) :: num_inner_gps_out - integer, dimension(2*gp_layers + 1, 2*gp_layers + 1) & - :: subsection_2D - integer, dimension(2*gp_layers + 1, 2*gp_layers + 1, 2*gp_layers + 1) & - :: subsection_3D - integer :: i, j, k!< Iterator variables + integer :: i, j, k, ii, jj, kk, gp_layers_z !< Iterator variables + integer :: num_gps_local, num_inner_gps_local !< local copies of the gp count to support GPU compute + logical :: is_gp - num_gps_out = 0 - num_inner_gps_out = 0 + num_gps_local = 0 + num_inner_gps_local = 0 + gp_layers_z = gp_layers + if (p == 0) gp_layers_z = 0 + $:GPU_PARALLEL_LOOP(private='[i,j,k,ii,jj,kk,is_gp]', copy='[num_gps_local,num_inner_gps_local]', firstprivate='[gp_layers,gp_layers_z]', collapse=3) do i = 0, m do j = 0, n - if (p == 0) then - if (ib_markers%sf(i, j, 0) /= 0) then - subsection_2D = ib_markers%sf( & - i - gp_layers:i + gp_layers, & - j - gp_layers:j + gp_layers, 0) - if (any(subsection_2D == 0)) then - num_gps_out = num_gps_out + 1 + do k = 0, p + if (ib_markers%sf(i, j, k) /= 0) then + is_gp = .false. + marker_search: do ii = i - gp_layers, i + gp_layers + do jj = j - gp_layers, j + gp_layers + do kk = k - gp_layers_z, k + gp_layers_z + if (ib_markers%sf(ii, jj, kk) == 0) then + ! if any neighbors are not in the IB, it is a ghost point + is_gp = .true. + exit marker_search + end if + end do + end do + end do marker_search + + if (is_gp) then + $:GPU_ATOMIC(atomic='update') + num_gps_local = num_gps_local + 1 else - num_inner_gps_out = num_inner_gps_out + 1 + $:GPU_ATOMIC(atomic='update') + num_inner_gps_local = num_inner_gps_local + 1 end if end if - else - do k = 0, p - if (ib_markers%sf(i, j, k) /= 0) then - subsection_3D = ib_markers%sf( & - i - gp_layers:i + gp_layers, & - j - gp_layers:j + gp_layers, & - k - gp_layers:k + gp_layers) - if (any(subsection_3D == 0)) then - num_gps_out = num_gps_out + 1 - else - num_inner_gps_out = num_inner_gps_out + 1 - end if - end if - end do - end if + end do end do end do + $:END_GPU_PARALLEL_LOOP() + + num_gps_out = num_gps_local + num_inner_gps_out = num_inner_gps_local end subroutine s_find_num_ghost_points @@ -583,116 +588,89 @@ contains type(ghost_point), dimension(num_gps), intent(INOUT) :: ghost_points_in type(ghost_point), dimension(num_inner_gps), intent(INOUT) :: inner_points_in - integer, dimension(2*gp_layers + 1, 2*gp_layers + 1) & - :: subsection_2D - integer, dimension(2*gp_layers + 1, 2*gp_layers + 1, 2*gp_layers + 1) & - :: subsection_3D - integer :: i, j, k !< Iterator variables - integer :: count, count_i + integer :: i, j, k, ii, jj, kk, gp_layers_z !< Iterator variables + integer :: count, count_i, local_idx integer :: patch_id + logical :: is_gp - count = 1 - count_i = 1 + count = 0 + count_i = 0 + gp_layers_z = gp_layers + if (p == 0) gp_layers_z = 0 + $:GPU_PARALLEL_LOOP(private='[i,j,k,ii,jj,kk,is_gp,local_idx]', copyin='[count,count_i, x_domain, y_domain, z_domain]', firstprivate='[gp_layers,gp_layers_z]', collapse=3) do i = 0, m do j = 0, n - if (p == 0) then - ! 2D - if (ib_markers%sf(i, j, 0) /= 0) then - subsection_2D = ib_markers%sf( & - i - gp_layers:i + gp_layers, & - j - gp_layers:j + gp_layers, 0) - if (any(subsection_2D == 0)) then - ghost_points_in(count)%loc = [i, j, 0] - patch_id = ib_markers%sf(i, j, 0) - ghost_points_in(count)%ib_patch_id = & - patch_id - - ghost_points_in(count)%slip = patch_ib(patch_id)%slip - ! ghost_points(count)%rank = proc_rank + do k = 0, p + if (ib_markers%sf(i, j, k) /= 0) then + is_gp = .false. + marker_search: do ii = i - gp_layers, i + gp_layers + do jj = j - gp_layers, j + gp_layers + do kk = k - gp_layers_z, k + gp_layers_z + if (ib_markers%sf(ii, jj, kk) == 0) then + ! if any neighbors are not in the IB, it is a ghost point + is_gp = .true. + exit marker_search + end if + end do + end do + end do marker_search + + if (is_gp) then + $:GPU_ATOMIC(atomic='capture') + count = count + 1 + local_idx = count + $:END_GPU_ATOMIC_CAPTURE() + + ghost_points_in(local_idx)%loc = [i, j, k] + patch_id = ib_markers%sf(i, j, k) + ghost_points_in(local_idx)%ib_patch_id = patch_id + + ghost_points_in(local_idx)%slip = patch_ib(patch_id)%slip if ((x_cc(i) - dx(i)) < x_domain%beg) then - ghost_points_in(count)%DB(1) = -1 + ghost_points_in(local_idx)%DB(1) = -1 else if ((x_cc(i) + dx(i)) > x_domain%end) then - ghost_points_in(count)%DB(1) = 1 + ghost_points_in(local_idx)%DB(1) = 1 else - ghost_points_in(count)%DB(1) = 0 + ghost_points_in(local_idx)%DB(1) = 0 end if if ((y_cc(j) - dy(j)) < y_domain%beg) then - ghost_points_in(count)%DB(2) = -1 + ghost_points_in(local_idx)%DB(2) = -1 else if ((y_cc(j) + dy(j)) > y_domain%end) then - ghost_points_in(count)%DB(2) = 1 + ghost_points_in(local_idx)%DB(2) = 1 else - ghost_points_in(count)%DB(2) = 0 + ghost_points_in(local_idx)%DB(2) = 0 end if - count = count + 1 - - else - inner_points_in(count_i)%loc = [i, j, 0] - patch_id = ib_markers%sf(i, j, 0) - inner_points_in(count_i)%ib_patch_id = & - patch_id - inner_points_in(count_i)%slip = patch_ib(patch_id)%slip - count_i = count_i + 1 - - end if - end if - else - ! 3D - do k = 0, p - if (ib_markers%sf(i, j, k) /= 0) then - subsection_3D = ib_markers%sf( & - i - gp_layers:i + gp_layers, & - j - gp_layers:j + gp_layers, & - k - gp_layers:k + gp_layers) - if (any(subsection_3D == 0)) then - ghost_points_in(count)%loc = [i, j, k] - patch_id = ib_markers%sf(i, j, k) - ghost_points_in(count)%ib_patch_id = & - ib_markers%sf(i, j, k) - ghost_points_in(count)%slip = patch_ib(patch_id)%slip - - if ((x_cc(i) - dx(i)) < x_domain%beg) then - ghost_points_in(count)%DB(1) = -1 - else if ((x_cc(i) + dx(i)) > x_domain%end) then - ghost_points_in(count)%DB(1) = 1 - else - ghost_points_in(count)%DB(1) = 0 - end if - - if ((y_cc(j) - dy(j)) < y_domain%beg) then - ghost_points_in(count)%DB(2) = -1 - else if ((y_cc(j) + dy(j)) > y_domain%end) then - ghost_points_in(count)%DB(2) = 1 - else - ghost_points_in(count)%DB(2) = 0 - end if - + if (p /= 0) then if ((z_cc(k) - dz(k)) < z_domain%beg) then - ghost_points_in(count)%DB(3) = -1 + ghost_points_in(local_idx)%DB(3) = -1 else if ((z_cc(k) + dz(k)) > z_domain%end) then - ghost_points_in(count)%DB(3) = 1 + ghost_points_in(local_idx)%DB(3) = 1 else - ghost_points_in(count)%DB(3) = 0 + ghost_points_in(local_idx)%DB(3) = 0 end if + end if - count = count + 1 - else - inner_points_in(count_i)%loc = [i, j, k] - patch_id = ib_markers%sf(i, j, k) - inner_points_in(count_i)%ib_patch_id = & - ib_markers%sf(i, j, k) - inner_points_in(count_i)%slip = patch_ib(patch_id)%slip + else + $:GPU_ATOMIC(atomic='capture') + count_i = count_i + 1 + local_idx = count_i + $:END_GPU_ATOMIC_CAPTURE() + + inner_points_in(local_idx)%loc = [i, j, k] + patch_id = ib_markers%sf(i, j, k) + inner_points_in(local_idx)%ib_patch_id = patch_id + inner_points_in(local_idx)%slip = patch_ib(patch_id)%slip - count_i = count_i + 1 - end if end if - end do - end if + end if + end do end do end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_find_ghost_points @@ -707,51 +685,74 @@ contains real(wp) :: buf real(wp), dimension(2, 2, 2) :: eta type(ghost_point) :: gp - integer :: i !< Iterator variables - integer :: i1, i2, j1, j2, k1, k2 !< Grid indexes + integer :: q, i, j, k, ii, jj, kk !< Grid indexes and iterators integer :: patch_id + logical is_cell_center - ! 2D - if (p <= 0) then - do i = 1, num_gps - gp = ghost_points_in(i) - ! Get the interpolation points - i1 = gp%ip_grid(1); i2 = i1 + 1 - j1 = gp%ip_grid(2); j2 = j1 + 1 - - dist = 0._wp - buf = 1._wp - dist(1, 1, 1) = sqrt( & - (x_cc(i1) - gp%ip_loc(1))**2 + & - (y_cc(j1) - gp%ip_loc(2))**2) - dist(2, 1, 1) = sqrt( & - (x_cc(i2) - gp%ip_loc(1))**2 + & - (y_cc(j1) - gp%ip_loc(2))**2) - dist(1, 2, 1) = sqrt( & - (x_cc(i1) - gp%ip_loc(1))**2 + & - (y_cc(j2) - gp%ip_loc(2))**2) - dist(2, 2, 1) = sqrt( & - (x_cc(i2) - gp%ip_loc(1))**2 + & - (y_cc(j2) - gp%ip_loc(2))**2) - - interp_coeffs = 0._wp - - if (dist(1, 1, 1) <= 1.e-16_wp) then - interp_coeffs(1, 1, 1) = 1._wp - else if (dist(2, 1, 1) <= 1.e-16_wp) then - interp_coeffs(2, 1, 1) = 1._wp - else if (dist(1, 2, 1) <= 1.e-16_wp) then - interp_coeffs(1, 2, 1) = 1._wp - else if (dist(2, 2, 1) <= 1.e-16_wp) then - interp_coeffs(2, 2, 1) = 1._wp - else + $:GPU_PARALLEL_LOOP(private='[q,i,j,k,ii,jj,kk,dist,buf,gp,interp_coeffs,eta,alpha,patch_id,is_cell_center]') + do q = 1, num_gps + gp = ghost_points_in(q) + ! Get the interpolation points + i = gp%ip_grid(1) + j = gp%ip_grid(2) + if (p /= 0) then + k = gp%ip_grid(3) + else + k = 0; + end if + + ! get the distance to a cell in each direction + dist = 0._wp + buf = 1._wp + do ii = 0, 1 + do jj = 0, 1 + if (p == 0) then + dist(1 + ii, 1 + jj, 1) = sqrt( & + (x_cc(i + ii) - gp%ip_loc(1))**2 + & + (y_cc(j + jj) - gp%ip_loc(2))**2) + else + do kk = 0, 1 + dist(1 + ii, 1 + jj, 1 + kk) = sqrt( & + (x_cc(i + ii) - gp%ip_loc(1))**2 + & + (y_cc(j + jj) - gp%ip_loc(2))**2 + & + (z_cc(k + kk) - gp%ip_loc(3))**2) + end do + end if + end do + end do + + ! check if we are arbitrarily close to a cell center + interp_coeffs = 0._wp + is_cell_center = .false. + check_is_cell_center: do ii = 0, 1 + do jj = 0, 1 + if (dist(ii + 1, jj + 1, 1) <= 1.e-16_wp) then + interp_coeffs(ii + 1, jj + 1, 1) = 1._wp + is_cell_center = .true. + exit check_is_cell_center + else + if (p /= 0) then + if (dist(ii + 1, jj + 1, 2) <= 1.e-16_wp) then + interp_coeffs(ii + 1, jj + 1, 2) = 1._wp + is_cell_center = .true. + exit check_is_cell_center + end if + end if + end if + end do + end do check_is_cell_center + + if (.not. is_cell_center) then + ! if we are not arbitrarily close, interpolate + alpha = 1._wp + patch_id = gp%ib_patch_id + if (ib_markers%sf(i, j, k) /= 0) alpha(1, 1, 1) = 0._wp + if (ib_markers%sf(i + 1, j, k) /= 0) alpha(2, 1, 1) = 0._wp + if (ib_markers%sf(i, j + 1, k) /= 0) alpha(1, 2, 1) = 0._wp + if (ib_markers%sf(i + 1, j + 1, k) /= 0) alpha(2, 2, 1) = 0._wp + + if (p == 0) then eta(:, :, 1) = 1._wp/dist(:, :, 1)**2 - alpha = 1._wp - patch_id = gp%ib_patch_id - if (ib_markers%sf(i1, j1, 0) /= 0) alpha(1, 1, 1) = 0._wp - if (ib_markers%sf(i2, j1, 0) /= 0) alpha(2, 1, 1) = 0._wp - if (ib_markers%sf(i1, j2, 0) /= 0) alpha(1, 2, 1) = 0._wp - if (ib_markers%sf(i2, j2, 0) /= 0) alpha(2, 2, 1) = 0._wp buf = sum(alpha(:, :, 1)*eta(:, :, 1)) if (buf > 0._wp) then interp_coeffs(:, :, 1) = alpha(:, :, 1)*eta(:, :, 1)/buf @@ -759,82 +760,15 @@ contains buf = sum(eta(:, :, 1)) interp_coeffs(:, :, 1) = eta(:, :, 1)/buf end if - end if - - ghost_points_in(i)%interp_coeffs = interp_coeffs - end do - - else - do i = 1, num_gps - gp = ghost_points_in(i) - ! Get the interpolation points - i1 = gp%ip_grid(1); i2 = i1 + 1 - j1 = gp%ip_grid(2); j2 = j1 + 1 - k1 = gp%ip_grid(3); k2 = k1 + 1 - - ! Get interpolation weights (Chaudhuri et al. 2011, JCP) - dist(1, 1, 1) = sqrt( & - (x_cc(i1) - gp%ip_loc(1))**2 + & - (y_cc(j1) - gp%ip_loc(2))**2 + & - (z_cc(k1) - gp%ip_loc(3))**2) - dist(2, 1, 1) = sqrt( & - (x_cc(i2) - gp%ip_loc(1))**2 + & - (y_cc(j1) - gp%ip_loc(2))**2 + & - (z_cc(k1) - gp%ip_loc(3))**2) - dist(1, 2, 1) = sqrt( & - (x_cc(i1) - gp%ip_loc(1))**2 + & - (y_cc(j2) - gp%ip_loc(2))**2 + & - (z_cc(k1) - gp%ip_loc(3))**2) - dist(2, 2, 1) = sqrt( & - (x_cc(i2) - gp%ip_loc(1))**2 + & - (y_cc(j2) - gp%ip_loc(2))**2 + & - (z_cc(k1) - gp%ip_loc(3))**2) - dist(1, 1, 2) = sqrt( & - (x_cc(i1) - gp%ip_loc(1))**2 + & - (y_cc(j1) - gp%ip_loc(2))**2 + & - (z_cc(k2) - gp%ip_loc(3))**2) - dist(2, 1, 2) = sqrt( & - (x_cc(i2) - gp%ip_loc(1))**2 + & - (y_cc(j1) - gp%ip_loc(2))**2 + & - (z_cc(k2) - gp%ip_loc(3))**2) - dist(1, 2, 2) = sqrt( & - (x_cc(i1) - gp%ip_loc(1))**2 + & - (y_cc(j2) - gp%ip_loc(2))**2 + & - (z_cc(k2) - gp%ip_loc(3))**2) - dist(2, 2, 2) = sqrt( & - (x_cc(i2) - gp%ip_loc(1))**2 + & - (y_cc(j2) - gp%ip_loc(2))**2 + & - (z_cc(k2) - gp%ip_loc(3))**2) - interp_coeffs = 0._wp - buf = 1._wp - if (dist(1, 1, 1) <= 1.e-16_wp) then - interp_coeffs(1, 1, 1) = 1._wp - else if (dist(2, 1, 1) <= 1.e-16_wp) then - interp_coeffs(2, 1, 1) = 1._wp - else if (dist(1, 2, 1) <= 1.e-16_wp) then - interp_coeffs(1, 2, 1) = 1._wp - else if (dist(2, 2, 1) <= 1.e-16_wp) then - interp_coeffs(2, 2, 1) = 1._wp - else if (dist(1, 1, 2) <= 1.e-16_wp) then - interp_coeffs(1, 1, 2) = 1._wp - else if (dist(2, 1, 2) <= 1.e-16_wp) then - interp_coeffs(2, 1, 2) = 1._wp - else if (dist(1, 2, 2) <= 1.e-16_wp) then - interp_coeffs(1, 2, 2) = 1._wp - else if (dist(2, 2, 2) <= 1.e-16_wp) then - interp_coeffs(2, 2, 2) = 1._wp else + + if (ib_markers%sf(i, j, k + 1) /= 0) alpha(1, 1, 2) = 0._wp + if (ib_markers%sf(i + 1, j, k + 1) /= 0) alpha(2, 1, 2) = 0._wp + if (ib_markers%sf(i, j + 1, k + 1) /= 0) alpha(1, 2, 2) = 0._wp + if (ib_markers%sf(i + 1, j + 1, k + 1) /= 0) alpha(2, 2, 2) = 0._wp eta = 1._wp/dist**2 - alpha = 1._wp - if (ib_markers%sf(i1, j1, k1) /= 0) alpha(1, 1, 1) = 0._wp - if (ib_markers%sf(i2, j1, k1) /= 0) alpha(2, 1, 1) = 0._wp - if (ib_markers%sf(i1, j2, k1) /= 0) alpha(1, 2, 1) = 0._wp - if (ib_markers%sf(i2, j2, k1) /= 0) alpha(2, 2, 1) = 0._wp - if (ib_markers%sf(i1, j1, k2) /= 0) alpha(1, 1, 2) = 0._wp - if (ib_markers%sf(i2, j1, k2) /= 0) alpha(2, 1, 2) = 0._wp - if (ib_markers%sf(i1, j2, k2) /= 0) alpha(1, 2, 2) = 0._wp - if (ib_markers%sf(i2, j2, k2) /= 0) alpha(2, 2, 2) = 0._wp buf = sum(alpha*eta) + if (buf > 0._wp) then interp_coeffs = alpha*eta/buf else @@ -843,9 +777,11 @@ contains end if end if - ghost_points_in(i)%interp_coeffs = interp_coeffs - end do - end if + end if + + ghost_points_in(q)%interp_coeffs = interp_coeffs + end do + $:END_GPU_PARALLEL_LOOP() end subroutine s_compute_interpolation_coeffs @@ -1003,6 +939,8 @@ contains integer :: i, ierr + call nvtxStartRange("UPDATE-MIBM") + ! Clears the existing immersed boundary indices ib_markers%sf = 0._wp @@ -1016,26 +954,24 @@ contains $:GPU_UPDATE(device='[patch_ib]') ! recompute the new ib_patch locations and broadcast them. - call s_apply_ib_patches(ib_markers%sf(0:m, 0:n, 0:p)) + call nvtxStartRange("APPLY-IB-PATCHES") $:GPU_UPDATE(device='[ib_markers%sf]') - call s_populate_ib_buffers() - $:GPU_UPDATE(host='[ib_markers%sf]') + call s_apply_ib_patches(ib_markers) + call nvtxEndRange + call nvtxStartRange("COMPUTE-GHOST-POINTS") ! recalculate the ghost point locations and coefficients call s_find_num_ghost_points(num_gps, num_inner_gps) - $:GPU_UPDATE(device='[num_gps, num_inner_gps]') - call s_find_ghost_points(ghost_points, inner_points) - $:GPU_UPDATE(device='[ghost_points, inner_points]') + call nvtxEndRange + call nvtxStartRange("COMPUTE-IMAGE-POINTS") call s_apply_levelset(ghost_points, num_gps) - $:GPU_UPDATE(device='[ghost_points]') - call s_compute_image_points(ghost_points) - $:GPU_UPDATE(device='[ghost_points]') - call s_compute_interpolation_coeffs(ghost_points) - $:GPU_UPDATE(device='[ghost_points]') + call nvtxEndRange + + call nvtxEndRange end subroutine s_update_mib @@ -1058,6 +994,9 @@ contains #:else real(wp), dimension(num_fluids) :: dynamic_viscosities #:endif + + call nvtxStartRange("COMPUTE-IB-FORCES") + forces = 0._wp torques = 0._wp @@ -1188,6 +1127,8 @@ contains patch_ib(i)%torque(:) = matmul(patch_ib(i)%rotation_matrix_inverse, torques(i, :)) ! torques must be converted to the local coordinates of the IB end do + call nvtxEndRange + end subroutine s_compute_ib_forces !> Subroutine to deallocate memory reserved for the IBM module diff --git a/src/simulation/m_time_steppers.fpp b/src/simulation/m_time_steppers.fpp index cfcb8568db..2a33e90bce 100644 --- a/src/simulation/m_time_steppers.fpp +++ b/src/simulation/m_time_steppers.fpp @@ -803,6 +803,8 @@ contains integer :: i logical :: forces_computed + call nvtxStartRange("PROPAGATE-IMERSED-BOUNDARIES") + forces_computed = .false. do i = 1, num_ibs @@ -850,6 +852,8 @@ contains call s_update_mib(num_ibs) + call nvtxEndRange + end subroutine s_propagate_immersed_boundaries !> This subroutine saves the temporary q_prim_vf vector diff --git a/toolchain/mfc/case_validator.py b/toolchain/mfc/case_validator.py index 2c29c75e7f..9b071e9e66 100644 --- a/toolchain/mfc/case_validator.py +++ b/toolchain/mfc/case_validator.py @@ -684,7 +684,7 @@ def check_ibm(self): self.prohibit(ib and n <= 0, "Immersed Boundaries do not work in 1D (requires n > 0)") - self.prohibit(ib and (num_ibs <= 0 or num_ibs > 10), + self.prohibit(ib and (num_ibs <= 0 or num_ibs > 1000), "num_ibs must be between 1 and num_patches_max (10)") self.prohibit(not ib and num_ibs > 0, "num_ibs is set, but ib is not enabled") diff --git a/toolchain/mfc/params/definitions.py b/toolchain/mfc/params/definitions.py index d44249f664..7c456b36fc 100644 --- a/toolchain/mfc/params/definitions.py +++ b/toolchain/mfc/params/definitions.py @@ -11,7 +11,7 @@ from .registry import REGISTRY # Index limits -NP, NF, NI, NA, NPR, NB = 10, 10, 10, 4, 10, 10 # patches, fluids, ibs, acoustic, probes, bc_patches +NP, NF, NI, NA, NPR, NB = 10, 10, 1000, 4, 10, 10 # patches, fluids, ibs, acoustic, probes, bc_patches # ============================================================================= @@ -672,7 +672,7 @@ def get_value_label(param_name: str, value: int) -> str: # Counts (must be positive) "num_fluids": {"min": 1, "max": 10}, "num_patches": {"min": 0, "max": 10}, - "num_ibs": {"min": 0, "max": 10}, + "num_ibs": {"min": 0, "max": 1000}, "num_source": {"min": 1}, "num_probes": {"min": 1}, "num_integrals": {"min": 1}, diff --git a/toolchain/mfc/params_tests/test_definitions.py b/toolchain/mfc/params_tests/test_definitions.py index 7b9710bf43..20e0774526 100644 --- a/toolchain/mfc/params_tests/test_definitions.py +++ b/toolchain/mfc/params_tests/test_definitions.py @@ -161,10 +161,10 @@ class TestParameterCounts(unittest.TestCase): """Tests for expected parameter counts.""" def test_total_param_count(self): - """Total parameter count should be around 3400.""" + """Total parameter count should be around 40000.""" count = len(REGISTRY.all_params) - self.assertGreater(count, 3000, "Too few parameters") - self.assertLess(count, 4000, "Too many parameters") + self.assertGreater(count, 39000, f"Too few parameters. Got {count}.") + self.assertLess(count, 41000, f"Too many parameters. Got {count}.") def test_log_params_count(self): """Should have many LOG type parameters."""