Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ endmacro(add_psc_executable)

if(NOT USE_CUDA)
add_psc_executable(psc_bgk)
add_psc_executable(psc_shock)
endif()

add_psc_executable(psc_bubble_yz)
Expand All @@ -19,16 +20,15 @@ add_psc_executable(psc_whistler)
add_psc_executable(psc_harris_yz)
add_psc_executable(psc_2d_shock)
add_psc_executable(psc_radiation)
add_psc_executable(psc_shock)

if(NOT USE_CUDA)
install(
TARGETS psc_bgk
TARGETS psc_bgk psc_shock
RUNTIME DESTINATION bin
)
endif()

install(
TARGETS psc_bubble_yz psc_flatfoil_yz psc_whistler psc_shock
TARGETS psc_bubble_yz psc_flatfoil_yz psc_whistler
RUNTIME DESTINATION bin
)
178 changes: 0 additions & 178 deletions src/libpsc/cuda/cuda_base.cu

This file was deleted.

60 changes: 52 additions & 8 deletions src/libpsc/cuda/cuda_base.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,49 @@ static track_mr_type* track_mr;
static pool_mr_type* pool_mr;
#endif

namespace
{
struct DevicePropsCompat
{
int clock_rate;
int concurrent_kernels;
int kernel_exec_timeout;
int integrated;
int can_map_host_memory;
int compute_mode;
};

int cuda_device_attr(int dev, cudaDeviceAttr attr)
{
int value = 0;
cudaError_t ierr = cudaDeviceGetAttribute(&value, attr, dev);
if (ierr != cudaSuccess) {
return 0;
}
return value;
}

DevicePropsCompat get_device_props_compat(const hipDeviceProp_t& deviceProp,
int dev)
{
#if CUDART_VERSION >= 13000
return {cuda_device_attr(dev, cudaDevAttrClockRate),
cuda_device_attr(dev, cudaDevAttrConcurrentKernels),
cuda_device_attr(dev, cudaDevAttrKernelExecTimeout),
cuda_device_attr(dev, cudaDevAttrIntegrated),
cuda_device_attr(dev, cudaDevAttrCanMapHostMemory),
cuda_device_attr(dev, cudaDevAttrComputeMode)};
#else
return {deviceProp.clockRate,
deviceProp.deviceOverlap,
deviceProp.kernelExecTimeoutEnabled,
deviceProp.integrated,
deviceProp.canMapHostMemory,
deviceProp.computeMode};
#endif
}
} // namespace

void cuda_base_init(void)
{
static bool first_time = true;
Expand Down Expand Up @@ -79,6 +122,7 @@ void cuda_base_init(void)
for (int dev = 0; dev < deviceCount; ++dev) {
hipDeviceProp_t deviceProp;
hipGetDeviceProperties(&deviceProp, dev);
auto compat = get_device_props_compat(deviceProp, dev);

if (dev == 0) {
// This function call returns 9999 for both major & minor fields, if no
Expand Down Expand Up @@ -124,25 +168,25 @@ void cuda_base_init(void)
printf(" Texture alignment: %lu bytes\n",
deviceProp.textureAlignment);
printf(" Clock rate: %.2f GHz\n",
deviceProp.clockRate * 1e-6f);
compat.clock_rate * 1e-6f);
#if CUDART_VERSION >= 2000
printf(" Concurrent copy and execution: %s\n",
deviceProp.deviceOverlap ? "Yes" : "No");
compat.concurrent_kernels ? "Yes" : "No");
#endif
#if CUDART_VERSION >= 2020
printf(" Run time limit on kernels: %s\n",
deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No");
compat.kernel_exec_timeout ? "Yes" : "No");
printf(" Integrated: %s\n",
deviceProp.integrated ? "Yes" : "No");
compat.integrated ? "Yes" : "No");
printf(" Support host page-locked memory mapping: %s\n",
deviceProp.canMapHostMemory ? "Yes" : "No");
compat.can_map_host_memory ? "Yes" : "No");
printf(
" Compute mode: %s\n",
deviceProp.computeMode == hipComputeModeDefault
compat.compute_mode == hipComputeModeDefault
? "Default (multiple host threads can use this device simultaneously)"
: deviceProp.computeMode == hipComputeModeExclusive
: compat.compute_mode == hipComputeModeExclusive
? "Exclusive (only one host thread at a time can use this device)"
: deviceProp.computeMode == hipComputeModeProhibited
: compat.compute_mode == hipComputeModeProhibited
? "Prohibited (no host thread can use this device)"
: "Unknown");
#endif
Expand Down
4 changes: 2 additions & 2 deletions src/libpsc/cuda/cuda_mparticles.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,12 @@ public:

__host__ __device__ iterator begin()
{
return iterator({xi4.begin(), pxi4.begin()});
return iterator(thrust::make_tuple(xi4.begin(), pxi4.begin()));
}

__host__ __device__ iterator end()
{
return iterator({xi4.end(), pxi4.end()});
return iterator(thrust::make_tuple(xi4.end(), pxi4.end()));
}

__host__ void resize(size_t n)
Expand Down
1 change: 1 addition & 0 deletions src/libpsc/psc_output_fields/fields_item_moments_1st.hxx
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

#pragma once

#include <psc_fields_single.h>
#include <psc/moment.hxx>
#include "fields_item.hxx"

Expand Down
2 changes: 1 addition & 1 deletion src/libpsc/psc_push_fields/marder_impl.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ inline void correct(const Grid_t& grid, E1& efield, const Int3& efield_ib,

template <typename E1, typename E2>
inline void cuda_marder_correct_yz(E1& efield, E2& res, Float3 fac, Int3 l,
Int3 r, Int3 l, Int3 r)
Int3 r)
{
auto k_efield = efield.to_kernel();
auto k_res = res.to_kernel();
Expand Down
Loading