1/**
2 * Copyright (c) 2017-present, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9#pragma once
10
11#include <cstdint>
12#include <functional>
13#include <memory>
14#include <mutex>
15
16#ifdef __linux__
17#include "gloo/common/linux.h"
18#endif
19#include "gloo/common/logging.h"
20#include "gloo/cuda.h"
21#include "gloo/transport/device.h"
22
23namespace gloo {
24
25#define CUDA_CHECK(condition) \
26 do { \
27 cudaError_t error = condition; \
28 GLOO_ENFORCE_EQ( \
29 error, \
30 cudaSuccess, \
31 "Error at: ", \
32 __FILE__, \
33 ":", \
34 __LINE__, \
35 ": ", \
36 cudaGetErrorString(error)); \
37 } while (0)
38
39inline int getCurrentGPUID() {
40 int id = 0;
41 CUDA_CHECK(cudaGetDevice(&id));
42 return id;
43}
44
45inline int getGPUIDForPointer(const void* ptr) {
46 cudaPointerAttributes attr;
47 CUDA_CHECK(cudaPointerGetAttributes(&attr, ptr));
48 return attr.device;
49}
50
51inline int getDeviceCount() {
52 int count;
53 CUDA_CHECK(cudaGetDeviceCount(&count));
54 return count;
55}
56
57const std::string& getCudaPCIBusID(int device);
58
59template<typename T>
60int findCudaDevicePointerClosestToDevice(
61 std::vector<CudaDevicePointer<T> >& ptrs,
62 std::shared_ptr<transport::Device>& dev) {
63 // Compute distance between every pointer
64 auto devBusID = dev->getPCIBusID();
65 std::vector<int> distance(ptrs.size());
66 int minDistance = INT_MAX;
67 int minDistanceCount = 0;
68 for (auto i = 0; i < ptrs.size(); i++) {
69#ifdef __linux__
70 auto cudaBusID = getCudaPCIBusID(ptrs[i].getDeviceID());
71 distance[i] = pciDistance(devBusID, cudaBusID);
72#else
73 distance[i] = 0;
74#endif
75 if (distance[i] <= minDistance) {
76 if (distance[i] < minDistance) {
77 minDistance = distance[i];
78 minDistanceCount = 0;
79 }
80 minDistanceCount++;
81 }
82 }
83 // Choose random pointer closest to device;
84 auto minOffset = rand() % minDistanceCount;
85 int minIndex = 0;
86 for (auto i = 0; i < ptrs.size(); i++) {
87 if (distance[i] == minDistance) {
88 if (minOffset == 0) {
89 minIndex = i;
90 }
91 minOffset--;
92 }
93 }
94 return minIndex;
95}
96
97class CudaDeviceGuard {
98 public:
99 CudaDeviceGuard() : previous_(getCurrentGPUID()) {
100 }
101
102 ~CudaDeviceGuard() noexcept(false) {
103 CUDA_CHECK(cudaSetDevice(previous_));
104 }
105
106 private:
107 int previous_;
108};
109
110class CudaDeviceScope {
111 public:
112 explicit CudaDeviceScope(int device) : guard_() {
113 CUDA_CHECK(cudaSetDevice(device));
114 }
115
116 private:
117 CudaDeviceGuard guard_;
118};
119
120// Managed chunk of GPU memory.
121// Convenience class used for tests and benchmarks.
122template<typename T>
123class CudaMemory {
124 public:
125 explicit CudaMemory(size_t elements);
126 CudaMemory(CudaMemory&&) noexcept;
127 ~CudaMemory() noexcept(false);
128
129 T* operator*() const {
130 return ptr_;
131 }
132
133 const size_t elements;
134 const size_t bytes;
135
136 protected:
137 CudaMemory(const CudaMemory&) = delete;
138 CudaMemory& operator=(const CudaMemory&) = delete;
139
140 int device_;
141 T* ptr_;
142};
143
144// Container class for a set of per-device streams
145class CudaDeviceStreams {
146 public:
147 CudaDeviceStreams() {
148 const int numDevices = getDeviceCount();
149 streams_.reserve(numDevices);
150 for (auto i = 0; i < numDevices; i++) {
151 streams_.emplace_back(i);
152 }
153 }
154 cudaStream_t operator[](const int i) {
155 GLOO_ENFORCE_LT(i, streams_.size());
156 return *streams_[i];
157 }
158
159 protected:
160 CudaDeviceStreams(const CudaDeviceStreams&) = delete;
161 CudaDeviceStreams& operator=(const CudaDeviceStreams&) = delete;
162
163 std::vector<CudaStream> streams_;
164};
165
166} // namespace gloo
167