1#pragma once
2
3#include <c10/core/Device.h>
4
5namespace c10 {
6
7using CopyBytesFunction = void (*)(
8 size_t nbytes,
9 const void* src,
10 Device src_device,
11 void* dst,
12 Device dst_device);
13
14struct C10_API _CopyBytesFunctionRegisterer {
15 _CopyBytesFunctionRegisterer(
16 DeviceType from,
17 DeviceType to,
18 CopyBytesFunction func_sync,
19 CopyBytesFunction func_async = nullptr);
20};
21
22#define REGISTER_COPY_BYTES_FUNCTION(from, to, ...) \
23 namespace { \
24 static _CopyBytesFunctionRegisterer C10_ANONYMOUS_VARIABLE( \
25 g_copy_function)(from, to, __VA_ARGS__); \
26 }
27
28/*
29 * WARNING: Implementations for this function are currently registered from
30 * ATen and caffe2, not yet from c10. Don't use this if not either ATen
31 * or caffe2 is present as well.
32 * We can't move them yet, because the CUDA implementations aren't unified yet
33 * between ATen and caffe2.
34 * We're planning to move the implementations into c10/backend/xxx
35 * to make c10 self contained again.
36 */
37C10_API void CopyBytes(
38 size_t nbytes,
39 const void* src,
40 Device src_device,
41 void* dst,
42 Device dst_device,
43 bool async);
44} // namespace c10
45