complexity.cc source code [pytorch/third_party/benchmark/src/complexity.cc]

1	// Copyright 2016 Ismael Jimenez Martinez. All rights reserved.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// http://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	// Source project : https://github.com/ismaelJimenez/cpp.leastsq
16	// Adapted to be used with google benchmark
17
18	#include "complexity.h"
19
20	#include <algorithm>
21	#include <cmath>
22
23	#include "benchmark/benchmark.h"
24	#include "check.h"
25
26	namespace benchmark {
27
28	// Internal function to calculate the different scalability forms
29	BigOFunc* FittingCurve(BigO complexity) {
30	static const double kLog2E = `1.44269504088896340736`;
31	switch (complexity) {
32	case oN:
33	return [](IterationCount n) -> double { return static_cast<double>(n); };
34	case oNSquared:
35	return [](IterationCount n) -> double { return std::pow(n, `2`); };
36	case oNCubed:
37	return [](IterationCount n) -> double { return std::pow(n, `3`); };
38	case oLogN:
39	/ Note: can't use log2 because Android's GNU STL lacks it /
40	return
41	[](IterationCount n) { return kLog2E * log(static_cast<double>(n)); };
42	case oNLogN:
43	/ Note: can't use log2 because Android's GNU STL lacks it /
44	return [](IterationCount n) {
45	return kLog2E * n * log(static_cast<double>(n));
46	};
47	case o1:
48	default:
49	return [](IterationCount) { return `1.0`; };
50	}
51	}
52
53	// Function to return an string for the calculated complexity
54	std::string GetBigOString(BigO complexity) {
55	switch (complexity) {
56	case oN:
57	return "N";
58	case oNSquared:
59	return "N^2";
60	case oNCubed:
61	return "N^3";
62	case oLogN:
63	return "lgN";
64	case oNLogN:
65	return "NlgN";
66	case o1:
67	return "(1)";
68	default:
69	return "f(N)";
70	}
71	}
72
73	// Find the coefficient for the high-order term in the running time, by
74	// minimizing the sum of squares of relative error, for the fitting curve
75	// given by the lambda expression.
76	// - n : Vector containing the size of the benchmark tests.
77	// - time : Vector containing the times for the benchmark tests.
78	// - fitting_curve : lambda expression (e.g. [](int64_t n) {return n; };).
79
80	// For a deeper explanation on the algorithm logic, please refer to
81	// https://en.wikipedia.org/wiki/Least_squares#Least_squares,_regression_analysis_and_statistics
82
83	LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
84	const std::vector<double>& time,
85	BigOFunc* fitting_curve) {
86	double sigma_gn_squared = `0.0`;
87	double sigma_time = `0.0`;
88	double sigma_time_gn = `0.0`;
89
90	// Calculate least square fitting parameter
91	for (size_t i = `0`; i < n.size(); ++i) {
92	double gn_i = fitting_curve(n [i]);
93	sigma_gn_squared += gn_i * gn_i;
94	sigma_time += time [i];
95	sigma_time_gn += time [i] * gn_i;
96	}
97
98	LeastSq result;
99	result.complexity = oLambda;
100
101	// Calculate complexity.
102	result.coef = sigma_time_gn / sigma_gn_squared;
103
104	// Calculate RMS
105	double rms = `0.0`;
106	for (size_t i = `0`; i < n.size(); ++i) {
107	double fit = result.coef * fitting_curve(n [i]);
108	rms += pow((time [i] - fit), `2`);
109	}
110
111	// Normalized RMS by the mean of the observed values
112	double mean = sigma_time / n.size();
113	result.rms = sqrt(rms / n.size()) / mean;
114
115	return result;
116	}
117
118	// Find the coefficient for the high-order term in the running time, by
119	// minimizing the sum of squares of relative error.
120	// - n : Vector containing the size of the benchmark tests.
121	// - time : Vector containing the times for the benchmark tests.
122	// - complexity : If different than oAuto, the fitting curve will stick to
123	// this one. If it is oAuto, it will be calculated the best
124	// fitting curve.
125	LeastSq MinimalLeastSq(const std::vector<int64_t>& n,
126	const std::vector<double>& time, const BigO complexity) {
127	BM_CHECK_EQ(n.size(), time.size());
128	BM_CHECK_GE(n.size(), `2`); // Do not compute fitting curve is less than two
129	// benchmark runs are given
130	BM_CHECK_NE(complexity, oNone);
131
132	LeastSq best_fit;
133
134	if (complexity == oAuto) {
135	std::vector<BigO> fit_curves = {oLogN, oN, oNLogN, oNSquared, oNCubed};
136
137	// Take o1 as default best fitting curve
138	best_fit = MinimalLeastSq(n, time, FittingCurve(o1));
139	best_fit.complexity = o1;
140
141	// Compute all possible fitting curves and stick to the best one
142	for (const auto& fit : fit_curves) {
143	LeastSq current_fit = MinimalLeastSq(n, time, FittingCurve(fit));
144	if (current_fit.rms < best_fit.rms) {
145	best_fit = current_fit;
146	best_fit.complexity = fit;
147	}
148	}
149	} else {
150	best_fit = MinimalLeastSq(n, time, FittingCurve(complexity));
151	best_fit.complexity = complexity;
152	}
153
154	return best_fit;
155	}
156
157	std::vector<BenchmarkReporter::Run> ComputeBigO(
158	const std::vector<BenchmarkReporter::Run>& reports) {
159	typedef BenchmarkReporter::Run Run;
160	std::vector<Run> results;
161
162	if (reports.size() < `2`) return results;
163
164	// Accumulators.
165	std::vector<int64_t> n;
166	std::vector<double> real_time;
167	std::vector<double> cpu_time;
168
169	// Populate the accumulators.
170	for (const Run& run : reports) {
171	BM_CHECK_GT(run.complexity_n, `0`)
172	<< "Did you forget to call SetComplexityN?";
173	n.push_back(run.complexity_n);
174	real_time.push_back(run.real_accumulated_time / run.iterations);
175	cpu_time.push_back(run.cpu_accumulated_time / run.iterations);
176	}
177
178	LeastSq result_cpu;
179	LeastSq result_real;
180
181	if (reports [`0`].complexity == oLambda) {
182	result_cpu = MinimalLeastSq(n, cpu_time, reports [`0`].complexity_lambda);
183	result_real = MinimalLeastSq(n, real_time, reports [`0`].complexity_lambda);
184	} else {
185	result_cpu = MinimalLeastSq(n, cpu_time, reports [`0`].complexity);
186	result_real = MinimalLeastSq(n, real_time, result_cpu.complexity);
187	}
188
189	// Drop the 'args' when reporting complexity.
190	auto run_name = reports [`0`].run_name;
191	run_name.args.clear();
192
193	// Get the data from the accumulator to BenchmarkReporter::Run's.
194	Run big_o;
195	big_o.run_name = run_name;
196	big_o.family_index = reports [`0`].family_index;
197	big_o.per_family_instance_index = reports [`0`].per_family_instance_index;
198	big_o.run_type = BenchmarkReporter::Run::RT_Aggregate;
199	big_o.repetitions = reports [`0`].repetitions;
200	big_o.repetition_index = Run::no_repetition_index;
201	big_o.threads = reports [`0`].threads;
202	big_o.aggregate_name = "BigO";
203	big_o.aggregate_unit = StatisticUnit::kTime;
204	big_o.report_label = reports [`0`].report_label;
205	big_o.iterations = `0`;
206	big_o.real_accumulated_time = result_real.coef;
207	big_o.cpu_accumulated_time = result_cpu.coef;
208	big_o.report_big_o = true;
209	big_o.complexity = result_cpu.complexity;
210
211	// All the time results are reported after being multiplied by the
212	// time unit multiplier. But since RMS is a relative quantity it
213	// should not be multiplied at all. So, here, we _divide_ it by the
214	// multiplier so that when it is multiplied later the result is the
215	// correct one.
216	double multiplier = GetTimeUnitMultiplier(reports [`0`].time_unit);
217
218	// Only add label to mean/stddev if it is same for all runs
219	Run rms;
220	rms.run_name = run_name;
221	rms.family_index = reports [`0`].family_index;
222	rms.per_family_instance_index = reports [`0`].per_family_instance_index;
223	rms.run_type = BenchmarkReporter::Run::RT_Aggregate;
224	rms.aggregate_name = "RMS";
225	rms.aggregate_unit = StatisticUnit::kPercentage;
226	rms.report_label = big_o.report_label;
227	rms.iterations = `0`;
228	rms.repetition_index = Run::no_repetition_index;
229	rms.repetitions = reports [`0`].repetitions;
230	rms.threads = reports [`0`].threads;
231	rms.real_accumulated_time = result_real.rms / multiplier;
232	rms.cpu_accumulated_time = result_cpu.rms / multiplier;
233	rms.report_rms = true;
234	rms.complexity = result_cpu.complexity;
235	// don't forget to keep the time unit, or we won't be able to
236	// recover the correct value.
237	rms.time_unit = reports [`0`].time_unit;
238
239	results.push_back(big_o);
240	results.push_back(rms);
241	return results;
242	}
243
244	} // end namespace benchmark
245

Browse the source code of pytorch/third_party/benchmark/src/complexity.cc