gentoo-ebuilds/sci-libs/vtk/files/vtk-9.5.0-cuda-13-1.patch
Paul Zander 84b0ebc2a6
sci-libs/vtk: add 9.5.2
Bug: https://bugs.gentoo.org/963035
Closes: https://bugs.gentoo.org/966144
Closes: https://bugs.gentoo.org/965701
Signed-off-by: Paul Zander <negril.nx+gentoo@gmail.com>
Part-of: https://github.com/gentoo/gentoo/pull/44665
Signed-off-by: Sam James <sam@gentoo.org>
2025-11-18 14:56:04 +00:00

99 lines
4.9 KiB
Diff

From 00d84bf753a0f4d79ea3025c04862d0308cbcb6b Mon Sep 17 00:00:00 2001
From: Paul Zander <negril.nx+gentoo@gmail.com>
Date: Mon, 25 Aug 2025 21:14:03 +0200
Subject: [PATCH] cuda 13
Signed-off-by: Paul Zander <negril.nx+gentoo@gmail.com>
diff --git a/ThirdParty/viskores/vtkviskores/viskores/viskores/cont/cuda/internal/CudaAllocator.cu b/ThirdParty/viskores/vtkviskores/viskores/viskores/cont/cuda/internal/CudaAllocator.cu
index cc2da2b..ad382ee 100644
--- a/ThirdParty/viskores/vtkviskores/viskores/viskores/cont/cuda/internal/CudaAllocator.cu
+++ b/ThirdParty/viskores/vtkviskores/viskores/viskores/cont/cuda/internal/CudaAllocator.cu
@@ -284,10 +284,14 @@ void CudaAllocator::PrepareForControl(const void* ptr, std::size_t numBytes)
{
if (IsManagedPointer(ptr) && numBytes >= Threshold)
{
+ // Create device location with specific device ID
+ cudaMemLocation hostLoc;
+ hostLoc.type = cudaMemLocationTypeHost;
+
// TODO these hints need to be benchmarked and adjusted once we start
// sharing the pointers between cont/exec
- VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, cudaCpuDeviceId));
- VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, cudaCpuDeviceId, cudaStreamPerThread));
+ VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, hostLoc));
+ VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, hostLoc, 0));
}
}
@@ -299,10 +303,15 @@ void CudaAllocator::PrepareForInput(const void* ptr, std::size_t numBytes)
viskores::cont::RuntimeDeviceInformation()
.GetRuntimeConfiguration(viskores::cont::DeviceAdapterTagCuda())
.GetDeviceInstance(dev);
+
+ cudaMemLocation deviceLoc;
+ deviceLoc.type = cudaMemLocationTypeDevice;
+ deviceLoc.id = dev;
+
// VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev));
// VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetReadMostly, dev));
- VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, dev));
- VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, dev, cudaStreamPerThread));
+ VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, deviceLoc));
+ VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, deviceLoc, 0, cudaStreamPerThread));
}
}
@@ -314,10 +323,15 @@ void CudaAllocator::PrepareForOutput(const void* ptr, std::size_t numBytes)
viskores::cont::RuntimeDeviceInformation()
.GetRuntimeConfiguration(viskores::cont::DeviceAdapterTagCuda())
.GetDeviceInstance(dev);
+
+ cudaMemLocation deviceLoc;
+ deviceLoc.type = cudaMemLocationTypeDevice;
+ deviceLoc.id = dev;
+
// VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev));
// VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseUnsetReadMostly, dev));
- VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, dev));
- VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, dev, cudaStreamPerThread));
+ VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, deviceLoc));
+ VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, deviceLoc, 0, cudaStreamPerThread));
}
}
@@ -329,10 +343,15 @@ void CudaAllocator::PrepareForInPlace(const void* ptr, std::size_t numBytes)
viskores::cont::RuntimeDeviceInformation()
.GetRuntimeConfiguration(viskores::cont::DeviceAdapterTagCuda())
.GetDeviceInstance(dev);
+
+ cudaMemLocation deviceLoc;
+ deviceLoc.type = cudaMemLocationTypeDevice;
+ deviceLoc.id = dev;
+
// VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetPreferredLocation, dev));
// VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseUnsetReadMostly, dev));
- VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, dev));
- VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, dev, cudaStreamPerThread));
+ VISKORES_CUDA_CALL(cudaMemAdvise(ptr, numBytes, cudaMemAdviseSetAccessedBy, deviceLoc));
+ VISKORES_CUDA_CALL(cudaMemPrefetchAsync(ptr, numBytes, deviceLoc, 0, cudaStreamPerThread));
}
}
diff --git a/ThirdParty/viskores/vtkviskores/viskores/viskores/Swap.h b/ThirdParty/viskores/vtkviskores/viskores/viskores/Swap.h
index 918075e..4d380db 100644
--- a/ThirdParty/viskores/vtkviskores/viskores/viskores/Swap.h
+++ b/ThirdParty/viskores/vtkviskores/viskores/viskores/Swap.h
@@ -41,7 +41,8 @@ namespace viskores
// defined in the `viskores` namespace as an argument. If that function has an unqualified call to
// `Swap`, it results in ADL being used, causing the templated functions `cub::Swap` and
// `viskores::Swap` to conflict.
-#if defined(VISKORES_CUDA_VERSION_MAJOR) && (VISKORES_CUDA_VERSION_MAJOR >= 12)
+// This was deprecated in favour of `cuda::std::swap` in CUDA 13.
+#if defined(VISKORES_CUDA_VERSION_MAJOR) && (VISKORES_CUDA_VERSION_MAJOR == 12)
using cub::Swap;
#else
template <typename T>
--
2.51.0