llvm.cc source code [triton/lib/driver/llvm.cc]

1	/ Copyright 2015-2017 Philippe Tillet*
2	*
3	* Permission is hereby granted, free of charge, to any person obtaining
4	* a copy of this software and associated documentation files
5	* (the "Software"), to deal in the Software without restriction,
6	* including without limitation the rights to use, copy, modify, merge,
7	* publish, distribute, sublicense, and/or sell copies of the Software,
8	* and to permit persons to whom the Software is furnished to do so,
9	* subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice shall be
12	* included in all copies or substantial portions of the Software.
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15	* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16	* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17	* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18	* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19	* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20	* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21	*/
22	#include <fstream>
23	#if __has_include(<unistd.h>)
24	#include <unistd.h>
25	#endif
26	#include <memory>
27	#include <regex>
28	#include "triton/driver/llvm.h"
29	#include "triton/driver/dispatch.h"
30	#include "triton/driver/error.h"
31	#include "triton/tools/sha1.hpp"
32	#include "triton/tools/sys/getenv.hpp"
33	#include "triton/tools/sys/mkdir.hpp"
34	#include "triton/tools/sys/exec.hpp"
35	#include "llvm/IR/IRBuilder.h"
36	#include "llvm/IR/Verifier.h"
37	#include "llvm/IR/IRPrintingPasses.h"
38	#include "llvm/IR/Module.h"
39	#include "llvm/Support/CodeGen.h"
40	#include "llvm/Support/CommandLine.h"
41	#include "llvm/Support/SourceMgr.h"
42	#include "llvm/Support/raw_ostream.h"
43	#include "llvm/Support/TargetRegistry.h"
44	#include "llvm/Support/TargetSelect.h"
45	#include "llvm/Target/TargetMachine.h"
46	#include "llvm/Target/TargetOptions.h"
47	#include "llvm/IR/LegacyPassManager.h"
48	#include "llvm/ExecutionEngine/ExecutionEngine.h"
49	#include "llvm/ExecutionEngine/SectionMemoryManager.h"
50	#include "llvm/Transforms/Utils/Cloning.h"
51	#include "llvm/Transforms/Scalar.h"
52
53	// begin AMD stuff
54	#include "llvm/Support/FileSystem.h"
55	#include "llvm/Support/FormattedStream.h"
56	#include "llvm/Support/Program.h"
57	#include "llvm/Support/ToolOutputFile.h"
58	#include "llvm/ADT/StringRef.h"
59	#include "llvm/Analysis/TargetLibraryInfo.h"
60	// end AMD stuff
61
62	extern "C"
63	{
64	int set_curterm(char nterm) { return* `0`; }
65	int del_curterm(char nterm) { return* `0`; }
66	int tigetnum(char capname) { return* `0`; }
67	int setupterm(char term, int* fildes, int errret) { return* `0`; }
68	}
69
70	namespace triton
71	{
72	namespace driver
73	{
74
75	void init_llvm()
76	{
77	LLVMInitializeNVPTXTargetInfo();
78	LLVMInitializeNVPTXTarget();
79	LLVMInitializeNVPTXTargetMC();
80	LLVMInitializeNVPTXAsmPrinter();
81	LLVMInitializeAMDGPUTargetInfo();
82	LLVMInitializeAMDGPUTarget();
83	LLVMInitializeAMDGPUTargetMC();
84	LLVMInitializeAMDGPUAsmPrinter();
85	}
86
87	/ ------------------------ /
88	// CUDA //
89	/ ------------------------ /
90	static bool find_and_replace(std::string &str, const std::string &begin, const std::string &end, const std::string &target)
91	{
92	size_t start_replace = str.find(begin);
93	size_t end_replace = str.find(end, start_replace);
94	if (start_replace == std::string::npos)
95	return false;
96	str.replace(start_replace, end_replace + `1` - start_replace, target);
97	return true;
98	}
99
100	std::string path_to_ptxas(int &version)
101	{
102	std::vector<std::string> rets;
103	std::string ret;
104	// search paths for ptxas
105	std::vector<std::string> ptxas_prefixes = {"", "/usr/local/cuda/bin/"};
106	std::string triton_ptxas = tools::getenv("TRITON_PTXAS_PATH");
107	if (!triton_ptxas.empty())
108	ptxas_prefixes.insert(ptxas_prefixes.begin(), triton_ptxas);
109	// see what path for ptxas are valid
110	std::vector<std::string> working_ptxas;
111	for (std::string prefix : ptxas_prefixes)
112	{
113	std::string ptxas = prefix + "ptxas";
114	bool works = tools::exec(ptxas + " --version 2>&1", ret) == `0`;
115	if (works)
116	{
117	working_ptxas.push_back(ptxas);
118	rets.push_back(ret);
119	}
120	}
121	// error if no working ptxas was found
122	if (working_ptxas.empty())
123	throw std::runtime_error ("`ptxas` was searched in TRITON_PTXAS_PATH, /usr/local/cuda/bin/ or PATH"
124	" but a working version could not be found.");
125	std::string ptxas = working_ptxas.front();
126	// parse version
127	std::regex version_regex("release (\\d+)\\.(\\d+)");
128	std::smatch match;
129	bool found = false;
130	// currently choosing the first ptxas. Other logics can be implemented in future
131	for (std::string ret : rets)
132	{
133	if (std::regex_search(ret, match, version_regex))
134	{
135	int major = std::stoi(match [`1`]);
136	int minor = std::stoi(match [`2`]);
137	version = major * `1000` + minor * `10`;
138	found = true;
139	break;
140	}
141	}
142	if (not found)
143	{
144	throw std::runtime_error ("Error in parsing version");
145	}
146	return ptxas;
147	}
148
149	int vptx(int version)
150	{
151	if (version >= `11040`)
152	return `74`;
153	// if(version >= 11030) return 73;
154	// if(version >= 11020) return 72;
155	// if(version >= 11010) return 71;
156	// if(version >= 11000) return 70;
157	// if(version >= 10020) return 65;
158	// if(version >= 10010) return 64;
159	// if(version >= 10000) return 63;
160	throw std::runtime_error ("Triton requires CUDA 11.4+");
161	}
162
163	std::string llir_to_ptx(llvm::Module module, int* cc, int version)
164	{
165	// LLVM version in use may not officially support target hardware
166	int max_nvvm_cc = `75`;
167	int max_nvvm_ptx = `74`;
168	// options
169	auto options = llvm::cl::getRegisteredOptions();
170	auto short_ptr = static_cast<llvm::cl::opt<bool> >(options["nvptx-short-ptr"]);
171	assert(short_ptr);
172	short_ptr->setValue(true);
173	// compute capability
174	std::string sm = "sm_" + std::to_string(cc);
175	// max PTX version
176	int ptx = vptx(version);
177	int ptx_major = ptx / `10`;
178	int ptx_minor = ptx % `10`;
179	// create
180	llvm::SmallVector<char, `0`> buffer;
181	std::string triple = "nvptx64-nvidia-cuda";
182	std::string proc = "sm_" + std::to_string(std::min(cc, max_nvvm_cc));
183	std::string layout = "";
184	std::string features = "";
185	// std::string features = "+ptx" + std::to_string(std::min(ptx, max_nvvm_ptx));
186	init_llvm();
187	// verify and store llvm
188	llvm::legacy::PassManager pm;
189	// pm.add(llvm::createPrintModulePass(llvm::outs()));
190	pm.add(llvm::createVerifierPass());
191	pm.run(*module);
192	// module->print(llvm::outs(), nullptr);
193
194	// create machine
195	module->setTargetTriple(triple);
196	std::string error;
197	llvm::TargetMachine *machine;
198	auto target = llvm::TargetRegistry::lookupTarget(module->getTargetTriple(), error);
199	llvm::TargetOptions opt;
200	opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
201	opt.UnsafeFPMath = false;
202	opt.NoInfsFPMath = false;
203	opt.NoNaNsFPMath = true;
204	machine = target->createTargetMachine(module->getTargetTriple(), proc, features, opt,
205	llvm::Reloc::PIC_, llvm::None, llvm::CodeGenOpt::Aggressive);
206	// set data layout
207	if (layout.empty())
208	module->setDataLayout(machine->createDataLayout());
209	else
210	module->setDataLayout(layout);
211	// emit machine code
212	for (llvm::Function &f : module->functions())
213	f.addFnAttr(llvm::Attribute::AlwaysInline);
214	llvm::legacy::PassManager pass;
215	llvm::raw_svector_ostream stream(buffer);
216	// emit
217	machine->addPassesToEmitFile(pass, stream, nullptr, llvm::CodeGenFileType::CGFT_AssemblyFile);
218	pass.run(*module);
219
220	// post-process
221	std::string result(buffer.begin(), buffer.end());
222	find_and_replace(result, ".version", "\n", ".version " + std::to_string(ptx_major) + "." + std::to_string(ptx_minor) + "\n");
223	find_and_replace(result, ".target", "\n", ".target " + sm + "\n");
224	while (find_and_replace(result, "\t// begin inline asm", "\n", ""))
225	;
226	while (find_and_replace(result, "\t// end inline asm", "\n", ""))
227	;
228	return result;
229	}
230
231	std::string ptx_to_cubin(const std::string &ptx, const std::string &ptxas, int cc)
232	{
233	// compile ptx with ptxas
234	char _fsrc[L_tmpnam];
235	char _flog[L_tmpnam];
236	std::tmpnam(_fsrc);
237	std::tmpnam(_flog);
238	std::string fsrc = _fsrc;
239	std::string flog = _flog;
240	std::string fbin = fsrc + ".o";
241	const char *_fbin = fbin.c_str();
242	std::ofstream ofs(fsrc);
243	ofs << ptx << std::endl;
244	ofs.close();
245	std::string cmd;
246	int err;
247	cmd = ptxas + " -v --gpu-name=sm_" + std::to_string(cc) + " " + fsrc + " -o " + fsrc + ".o 2> " + flog;
248	err = system(cmd.c_str());
249	if (err != `0`)
250	{
251	std::ifstream _log(_flog);
252	std::string log(std::istreambuf_iterator<char>(_log), {});
253	unlink(_fsrc);
254	unlink(_flog);
255	throw std::runtime_error ("Internal Triton PTX codegen error: \n" + log);
256	}
257	std::ifstream _cubin(_fbin, std::ios::binary);
258	std::string cubin(std::istreambuf_iterator<char>(_cubin), {});
259	_cubin.close();
260	unlink(_fsrc);
261	unlink(_flog);
262	unlink(_fbin);
263	return cubin;
264	}
265
266	/ ------------------------ /
267	// HIP //
268	/ ------------------------ /
269
270	std::string llir_to_amdgpu(llvm::Module module, const* std::string &_proc)
271	{
272	init_llvm();
273
274	// proc = std::get<0>(GetFeatureStrFromGCNArchName(rocminfo));
275	// features = std::get<1>(GetFeatureStrFromGCNArchName(rocminfo));
276
277	// create
278	llvm::SmallVector<char, `0`> buffer;
279	std::string triple = "amdgcn-amd-amdhsa";
280	std::string layout = "";
281	std::string features;
282	std::string proc = "gfx908";
283	// verify and store llvm
284	llvm::legacy::PassManager pm;
285	pm.add(llvm::createVerifierPass());
286	pm.run(*module);
287	// create machine
288	module->setTargetTriple(triple);
289	std::string error;
290	auto target = llvm::TargetRegistry::lookupTarget(module->getTargetTriple(), error);
291	llvm::TargetOptions opt;
292	opt.AllowFPOpFusion = llvm::FPOpFusion::Fast;
293	opt.UnsafeFPMath = false;
294	opt.NoInfsFPMath = false;
295	opt.NoNaNsFPMath = true;
296	llvm::TargetMachine *machine = target->createTargetMachine(module->getTargetTriple(), proc, features, opt,
297	llvm::Reloc::PIC_, llvm::None,
298	llvm::CodeGenOpt::Aggressive);
299	// set data layout
300	if (layout.empty())
301	module->setDataLayout(machine->createDataLayout());
302	else
303	module->setDataLayout(layout);
304	// emit machine code
305	for (llvm::Function &f : module->functions())
306	f.addFnAttr(llvm::Attribute::AlwaysInline);
307	llvm::legacy::PassManager pass;
308	llvm::raw_svector_ostream stream(buffer);
309
310	// create dump files
311	std::string module_name = module->getModuleIdentifier();
312	std::error_code ec;
313
314	// Save GCN ISA binary.
315	std::string isabin_path = std::string ("/tmp/") + module_name + std::string (".o");
316	std::unique_ptr<llvm::raw_fd_ostream> isabin_fs(
317	new llvm::raw_fd_ostream(isabin_path, ec, llvm::sys::fs::OF_Text));
318	if (ec)
319	{
320	std::cout << isabin_path << " was not created. error code: " << ec << std::endl;
321	}
322
323	// emit
324	machine->addPassesToEmitFile(pass, isabin_fs, nullptr*, llvm::CGFT_ObjectFile);
325	pass.run(*module);
326	// Save GCN ISA.
327	std::string amdgcn_path = std::string ("/tmp/") + module_name + std::string (".gcn");
328	std::string result(buffer.begin(), buffer.end());
329	std::ofstream amdgcn(amdgcn_path);
330	amdgcn << result;
331	amdgcn.close();
332
333	// generate HASCO file
334	std::string hsaco_path = std::string ("/tmp/") + module_name + std::string (".hsaco");
335	std::string error_message;
336	int lld_result =
337	llvm::sys::ExecuteAndWait("/opt/rocm/llvm/bin/ld.lld",
338	{"/opt/rocm/llvm/bin/ld.lld", "-flavor", "gnu", "-shared", "-o", hsaco_path, isabin_path},
339	llvm::None, {}, `0`, `0`, &error_message);
340	if (lld_result)
341	{
342	std::cout << "ld.lld execute fail: " << std::endl;
343	std::cout << error_message << std::endl;
344	std::cout << lld_result << std::endl;
345	}
346
347	return hsaco_path;
348	}
349
350	hipModule_t amdgpu_to_hipmodule(const std::string &path)
351	{
352	// Read HSACO.
353	std::ifstream hsaco_file(path, std::ios::binary \| std::ios::ate);
354	std::ifstream::pos_type hsaco_file_size = hsaco_file.tellg();
355
356	std::vector<unsigned char> hsaco(hsaco_file_size);
357	hsaco_file.seekg(`0`, std::ios::beg);
358	hsaco_file.read(reinterpret_cast<char *>(&hsaco [`0`]), hsaco_file_size);
359	hsaco_file.close();
360	hipJitOption opt[] = {hipJitOptionErrorLogBufferSizeBytes, hipJitOptionErrorLogBuffer,
361	hipJitOptionInfoLogBufferSizeBytes, hipJitOptionInfoLogBuffer,
362	hipJitOptionLogVerbose};
363	const unsigned int errbufsize = `8192`;
364	const unsigned int logbufsize = `8192`;
365	char _err[errbufsize];
366	char _log[logbufsize];
367	void optval[] = {(void* *)(uintptr_t)errbufsize,
368	(void )_err, (void* *)(uintptr_t)logbufsize,
369	(void )_log, (void* *)`1`};
370	hipModule_t ret;
371	dispatch::hipModuleLoadDataEx(&ret, hsaco.data(), `5`, opt, optval);
372	return ret;
373	}
374
375	} // namespace driver
376	} // namespace triton
377

Browse the source code of triton/lib/driver/llvm.cc