OpenPFC/gpu__vector_8hpp_source.html

// SPDX-FileCopyrightText: 2025 VTT Technical Research Centre of Finland Ltd

// SPDX-License-Identifier: AGPL-3.0-or-later


#ifndef PFC_GPU_VECTOR_HPP

#define PFC_GPU_VECTOR_HPP


#include <cstddef>

#include <stdexcept>

#include <vector>


// Only include CUDA headers if CUDA is enabled

#if defined(OpenPFC_ENABLE_CUDA)

#include <cuda_runtime.h>

#define PFC_GPU_CUDA_AVAILABLE 1

#else

#define PFC_GPU_CUDA_AVAILABLE 0

#endif


namespace pfc {

namespace gpu {


template <typename T> class GPUVector {

private:

#if PFC_GPU_CUDA_AVAILABLE

  T *m_device_ptr = nullptr;

#else

  void *m_device_ptr = nullptr; // Placeholder for non-CUDA builds

#endif

  size_t m_size = 0;


public:


  explicit GPUVector(size_t size) : m_size(size) {

#if PFC_GPU_CUDA_AVAILABLE

    if (size > 0) {

      cudaError_t err = cudaMalloc(&m_device_ptr, size * sizeof(T));

      if (err != cudaSuccess) {

        throw std::runtime_error("Failed to allocate GPU memory: " +

                                 std::string(cudaGetErrorString(err)));

      }

    }

#else

    // On non-CUDA systems, this should not be called

    // But we provide a stub to allow compilation

    if (size > 0) {

      throw std::runtime_error("GPUVector: CUDA not enabled at compile time");

    }

#endif

  }


  ~GPUVector() {

#if PFC_GPU_CUDA_AVAILABLE

    if (m_device_ptr) {

      cudaFree(m_device_ptr);

    }

#endif

  }


  // No copy (would need deep copy)

  GPUVector(const GPUVector &) = delete;

  GPUVector &operator=(const GPUVector &) = delete;


  GPUVector(GPUVector &&other) noexcept

      : m_device_ptr(other.m_device_ptr), m_size(other.m_size) {

    other.m_device_ptr = nullptr;

    other.m_size = 0;

  }


  GPUVector &operator=(GPUVector &&other) noexcept {

    if (this != &other) {

#if PFC_GPU_CUDA_AVAILABLE

      if (m_device_ptr) cudaFree(m_device_ptr);

#endif

      m_device_ptr = other.m_device_ptr;

      m_size = other.m_size;

      other.m_device_ptr = nullptr;

      other.m_size = 0;

    }

    return *this;

  }


  T *data() {

#if PFC_GPU_CUDA_AVAILABLE

    return m_device_ptr;

#else

    return nullptr;

#endif

  }


  const T *data() const {

#if PFC_GPU_CUDA_AVAILABLE

    return m_device_ptr;

#else

    return nullptr;

#endif

  }


  size_t size() const { return m_size; }


  bool empty() const { return m_size == 0; }


  void copy_from_host(const std::vector<T> &host_data) {

    if (host_data.size() != m_size) {

      throw std::runtime_error("Size mismatch in copy_from_host: expected " +

                               std::to_string(m_size) + ", got " +

                               std::to_string(host_data.size()));

    }

#if PFC_GPU_CUDA_AVAILABLE

    if (m_size > 0) {

      cudaError_t err = cudaMemcpy(m_device_ptr, host_data.data(),

                                   m_size * sizeof(T), cudaMemcpyHostToDevice);

      if (err != cudaSuccess) {

        throw std::runtime_error("Failed to copy from host to device: " +

                                 std::string(cudaGetErrorString(err)));

      }

    }

#else

    throw std::runtime_error("copy_from_host: CUDA not enabled at compile time");

#endif

  }


  void copy_to_host(std::vector<T> &host_data) const {

    if (host_data.size() != m_size) {

      host_data.resize(m_size);

    }

#if PFC_GPU_CUDA_AVAILABLE

    if (m_size > 0) {

      cudaError_t err = cudaMemcpy(host_data.data(), m_device_ptr,

                                   m_size * sizeof(T), cudaMemcpyDeviceToHost);

      if (err != cudaSuccess) {

        throw std::runtime_error("Failed to copy from device to host: " +

                                 std::string(cudaGetErrorString(err)));

      }

    }

#else

    throw std::runtime_error("copy_to_host: CUDA not enabled at compile time");

#endif

  }


  std::vector<T> to_host() const {

    std::vector<T> result(m_size);

    copy_to_host(result);

    return result;

  }


};


} // namespace gpu

} // namespace pfc


#undef PFC_GPU_CUDA_AVAILABLE


#endif

pfc::gpu::GPUVector
Simple GPU memory container (RAII)
Definition gpu_vector.hpp:73

pfc::gpu::GPUVector::data
const T * data() const
Get const pointer to GPU memory.
Definition gpu_vector.hpp:171

pfc::gpu::GPUVector::size
size_t size() const
Get the number of elements.
Definition gpu_vector.hpp:182

pfc::gpu::GPUVector::copy_from_host
void copy_from_host(const std::vector< T > &host_data)
Copy data from host (CPU) to device (GPU)
Definition gpu_vector.hpp:195

pfc::gpu::GPUVector::~GPUVector
~GPUVector()
Destructor - automatically frees GPU memory.
Definition gpu_vector.hpp:115

pfc::gpu::GPUVector::to_host
std::vector< T > to_host() const
Copy data from device to host and return as vector.
Definition gpu_vector.hpp:243

pfc::gpu::GPUVector::copy_to_host
void copy_to_host(std::vector< T > &host_data) const
Copy data from device (GPU) to host (CPU)
Definition gpu_vector.hpp:220

pfc::gpu::GPUVector::GPUVector
GPUVector(GPUVector &&other) noexcept
Move constructor.
Definition gpu_vector.hpp:130

pfc::gpu::GPUVector::operator=
GPUVector & operator=(GPUVector &&other) noexcept
Move assignment operator.
Definition gpu_vector.hpp:139

pfc::gpu::GPUVector::data
T * data()
Get pointer to GPU memory.
Definition gpu_vector.hpp:160

pfc::gpu::GPUVector::empty
bool empty() const
Check if the vector is empty.
Definition gpu_vector.hpp:187

pfc::gpu::GPUVector::GPUVector
GPUVector(size_t size)
Construct a GPUVector with the specified size.
Definition gpu_vector.hpp:94

pfc::world::World
Represents the global simulation domain (the "world").
Definition world.hpp:91

pfc::world::World::m_size
const Int3 m_size
Grid dimensions: {nx, ny, nz}.
Definition world.hpp:94