1 | #include "taichi/codegen/cc/cc_program.h" |
2 | |
3 | using namespace taichi::lang::cccp; |
4 | |
5 | namespace taichi::lang { |
6 | |
7 | CCProgramImpl::CCProgramImpl(CompileConfig &config) : ProgramImpl(config) { |
8 | this->config = &config; |
9 | runtime_ = std::make_unique<CCRuntime>(this, |
10 | #include "runtime/base.h" |
11 | "\n" , |
12 | #include "runtime/base.c" |
13 | "\n" ); |
14 | runtime_->compile(); |
15 | context_ = std::make_unique<CCContext>(); |
16 | } |
17 | |
18 | FunctionType CCProgramImpl::compile(const CompileConfig &compile_config, |
19 | Kernel *kernel) { |
20 | CCKernelGen codegen(compile_config, kernel, this); |
21 | auto ker = codegen.compile(); |
22 | auto ker_ptr = ker.get(); |
23 | this->add_kernel(std::move(ker)); |
24 | return [ker_ptr](RuntimeContext &ctx) { return ker_ptr->launch(&ctx); }; |
25 | } |
26 | |
27 | void CCProgramImpl::materialize_runtime(MemoryPool *memory_pool, |
28 | KernelProfilerBase *, |
29 | uint64 **result_buffer_ptr) { |
30 | TI_ASSERT(*result_buffer_ptr == nullptr); |
31 | *result_buffer_ptr = (uint64 *)memory_pool->allocate( |
32 | sizeof(uint64) * taichi_result_buffer_entries, 8); |
33 | result_buffer_ = *result_buffer_ptr; |
34 | } |
35 | |
36 | void CCProgramImpl::materialize_snode_tree(SNodeTree *tree, |
37 | uint64 *result_buffer) { |
38 | auto *const root = tree->root(); |
39 | CCLayoutGen gen(this, root); |
40 | layout_ = gen.compile(); |
41 | size_t root_size = layout_->compile(); |
42 | size_t gtmp_size = taichi_global_tmp_buffer_size; |
43 | size_t args_size = taichi_result_buffer_entries * sizeof(uint64); |
44 | |
45 | TI_INFO("[cc] C backend root buffer size: {} B" , root_size); |
46 | |
47 | ActionRecorder::get_instance().record( |
48 | "allocate_buffer" , { |
49 | ActionArg("root_size" , (int32)root_size), |
50 | ActionArg("gtmp_size" , (int32)gtmp_size), |
51 | }); |
52 | |
53 | root_buf_.resize(root_size, 0); |
54 | gtmp_buf_.resize(gtmp_size, 0); |
55 | args_buf_.resize(args_size, 0); |
56 | |
57 | context_->root = root_buf_.data(); |
58 | context_->gtmp = gtmp_buf_.data(); |
59 | context_->args = (uint64 *)args_buf_.data(); |
60 | context_->earg = nullptr; |
61 | } |
62 | |
63 | void CCProgramImpl::add_kernel(std::unique_ptr<CCKernel> kernel) { |
64 | kernels_.push_back(std::move(kernel)); |
65 | need_relink_ = true; |
66 | } |
67 | |
68 | void CCKernel::compile() { |
69 | if (!kernel_->is_evaluator) |
70 | ActionRecorder::get_instance().record( |
71 | "compile_kernel" , { |
72 | ActionArg("kernel_name" , name_), |
73 | ActionArg("kernel_source" , source_), |
74 | }); |
75 | |
76 | obj_path_ = fmt::format("{}/{}.o" , runtime_tmp_dir, name_); |
77 | src_path_ = fmt::format("{}/{}.c" , runtime_tmp_dir, name_); |
78 | |
79 | std::ofstream(src_path_) << cc_program_impl_->get_runtime()->header << "\n" |
80 | << cc_program_impl_->get_layout()->source << "\n" |
81 | << source_; |
82 | TI_DEBUG("[cc] compiling [{}] -> [{}]:\n{}\n" , name_, obj_path_, source_); |
83 | execute(cc_program_impl_->config->cc_compile_cmd, obj_path_, src_path_); |
84 | } |
85 | |
86 | void CCRuntime::compile() { |
87 | ActionRecorder::get_instance().record("compile_runtime" , |
88 | { |
89 | ActionArg("runtime_header" , header), |
90 | ActionArg("runtime_source" , source), |
91 | }); |
92 | |
93 | obj_path_ = fmt::format("{}/_rti_runtime.o" , runtime_tmp_dir); |
94 | src_path_ = fmt::format("{}/_rti_runtime.c" , runtime_tmp_dir); |
95 | |
96 | std::ofstream(src_path_) << header << "\n" << source; |
97 | TI_DEBUG("[cc] compiling runtime -> [{}]:\n{}\n" , obj_path_, source); |
98 | execute(cc_program_impl_->config->cc_compile_cmd, obj_path_, src_path_); |
99 | } |
100 | |
101 | void CCKernel::launch(RuntimeContext *ctx) { |
102 | if (!kernel_->is_evaluator) |
103 | ActionRecorder::get_instance().record("launch_kernel" , |
104 | { |
105 | ActionArg("kernel_name" , name_), |
106 | }); |
107 | |
108 | cc_program_impl_->relink(); |
109 | TI_TRACE("[cc] entering kernel [{}]" , name_); |
110 | auto entry = cc_program_impl_->load_kernel(name_); |
111 | TI_ASSERT(entry); |
112 | auto *context = cc_program_impl_->update_context(ctx); |
113 | (*entry)(context); |
114 | cc_program_impl_->context_to_result_buffer(); |
115 | TI_TRACE("[cc] leaving kernel [{}]" , name_); |
116 | } |
117 | |
118 | size_t CCLayout::compile() { |
119 | ActionRecorder::get_instance().record("compile_layout" , |
120 | { |
121 | ActionArg("layout_source" , source), |
122 | }); |
123 | |
124 | obj_path_ = fmt::format("{}/_rti_root.o" , runtime_tmp_dir); |
125 | src_path_ = fmt::format("{}/_rti_root.c" , runtime_tmp_dir); |
126 | auto dll_path = fmt::format("{}/libti_roottest.so" , runtime_tmp_dir); |
127 | |
128 | std::ofstream(src_path_) << cc_program_impl_->get_runtime()->header << "\n" |
129 | << source << "\n" |
130 | << "void *Ti_get_root_size(void) { \n" |
131 | << " return (void *) sizeof(struct Ti_S0root);\n" |
132 | << "}\n" ; |
133 | |
134 | TI_DEBUG("[cc] compiling root struct -> [{}]:\n{}\n" , obj_path_, source); |
135 | execute(cc_program_impl_->config->cc_compile_cmd, obj_path_, src_path_); |
136 | |
137 | TI_DEBUG("[cc] linking root struct object [{}] -> [{}]" , obj_path_, dll_path); |
138 | execute(cc_program_impl_->config->cc_link_cmd, dll_path, obj_path_); |
139 | |
140 | TI_DEBUG("[cc] loading root struct object: {}" , dll_path); |
141 | DynamicLoader dll(dll_path); |
142 | TI_ASSERT_INFO(dll.loaded(), "[cc] could not load shared object: {}" , |
143 | dll_path); |
144 | |
145 | using FuncGetRootSizeType = size_t(); |
146 | auto get_root_size = reinterpret_cast<FuncGetRootSizeType *>( |
147 | dll.load_function("Ti_get_root_size" )); |
148 | TI_ASSERT(get_root_size); |
149 | return (*get_root_size)(); |
150 | } |
151 | |
152 | void CCProgramImpl::relink() { |
153 | if (!need_relink_) |
154 | return; |
155 | |
156 | dll_path_ = fmt::format("{}/libti_program.so" , runtime_tmp_dir); |
157 | |
158 | std::vector<std::string> objects; |
159 | objects.push_back(runtime_->get_object()); |
160 | for (auto const &ker : kernels_) { |
161 | objects.push_back(ker->get_object()); |
162 | } |
163 | |
164 | TI_DEBUG("[cc] linking shared object [{}] with [{}]" , dll_path_, |
165 | fmt::join(objects, "] [" )); |
166 | execute(this->config->cc_link_cmd, dll_path_, fmt::join(objects, "' '" )); |
167 | |
168 | dll_ = nullptr; |
169 | TI_DEBUG("[cc] loading shared object: {}" , dll_path_); |
170 | dll_ = std::make_unique<DynamicLoader>(dll_path_); |
171 | TI_ASSERT_INFO(dll_->loaded(), "[cc] could not load shared object: {}" , |
172 | dll_path_); |
173 | |
174 | need_relink_ = false; |
175 | } |
176 | |
177 | CCFuncEntryType *CCProgramImpl::load_kernel(std::string const &name) { |
178 | return reinterpret_cast<CCFuncEntryType *>(dll_->load_function("Tk_" + name)); |
179 | } |
180 | |
181 | CCContext *CCProgramImpl::update_context(RuntimeContext *ctx) { |
182 | // TODO(k-ye): Do you have other zero-copy ideas for arg buf? |
183 | std::memcpy(context_->args, ctx->args, taichi_max_num_args * sizeof(uint64)); |
184 | context_->earg = (int *)ctx->extra_args; |
185 | return context_.get(); |
186 | } |
187 | |
188 | void CCProgramImpl::context_to_result_buffer() { |
189 | TI_ASSERT(result_buffer_); |
190 | std::memcpy(result_buffer_, context_->args, |
191 | taichi_max_num_ret_value * sizeof(uint64)); |
192 | context_->earg = nullptr; |
193 | } |
194 | |
195 | namespace cccp { |
196 | bool is_c_backend_available() { |
197 | return true; |
198 | } |
199 | }; // namespace cccp |
200 | |
201 | } // namespace taichi::lang |
202 | |