metrics.cc source code [tensorflow/tensorflow/core/framework/metrics.cc]

1	/ Copyright 2018 The TensorFlow Authors. All Rights Reserved.*
2
3	Licensed under the Apache License, Version 2.0 (the "License");
4	you may not use this file except in compliance with the License.
5	You may obtain a copy of the License at
6
7	http://www.apache.org/licenses/LICENSE-2.0
8
9	Unless required by applicable law or agreed to in writing, software
10	distributed under the License is distributed on an "AS IS" BASIS,
11	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	See the License for the specific language governing permissions and
13	limitations under the License.
14	==============================================================================/*
15
16	#include "tensorflow/core/framework/metrics.h"
17
18	#include <cstdint>
19	#include <string>
20
21	#include "absl/strings/str_cat.h"
22	#include "tensorflow/core/lib/monitoring/counter.h"
23	#include "tensorflow/core/lib/monitoring/gauge.h"
24	#include "tensorflow/core/lib/monitoring/sampler.h"
25	#include "tensorflow/core/protobuf/data_service.pb.h"
26
27	namespace tensorflow {
28	namespace metrics {
29	namespace {
30
31	auto* graph_runs = monitoring::Counter<`0`>::New(
32	"/tensorflow/core/graph_runs",
33	"The number of graph executions used to collect "
34	"/tensorflow/core/graph_run_time_usecs");
35
36	auto* graph_run_time_usecs = monitoring::Counter<`0`>::New(
37	"/tensorflow/core/graph_run_time_usecs",
38	"The total time spent on executing graphs in microseconds.");
39
40	auto* graph_run_time_usecs_histogram = monitoring::Sampler<`0`>::New(
41	{"/tensorflow/core/graph_run_time_usecs_histogram",
42	"The wall-clock time spent on executing graphs in microseconds."},
43	// Power of 2 with bucket count 20 (> 17 minutes)
44	{monitoring::Buckets::Exponential(`1000`, `2`, `20`)});
45
46	auto* graph_pending_queue_length_histogram = monitoring::Sampler<`0`>::New(
47	{"/tensorflow/core/graph_pending_queue_length_histogram",
48	"The number of pending (ready but not running) tasks in graph executor."},
49	// Power of 1.5 with bucket count 30 (> 191k)
50	{monitoring::Buckets::Exponential(`1`, `1.5`, `30`)});
51
52	auto* graph_run_input_tensor_bytes = monitoring::Sampler<`0`>::New(
53	{"/tensorflow/core/graph_run_input_tensor_bytes",
54	"The size of input tensors in bytes."},
55	// Power of 2 with bucket count 14 (256MB)
56	{monitoring::Buckets::Exponential(`1`, `4`, `14`)});
57
58	auto* graph_run_output_tensor_bytes = monitoring::Sampler<`0`>::New(
59	{"/tensorflow/core/graph_run_output_tensor_bytes",
60	"The size of output tensors in bytes."},
61	// Power of 2 with bucket count 14 (256MB)
62	{monitoring::Buckets::Exponential(`1`, `4`, `14`)});
63
64	auto* graph_unused_outputs = monitoring::Counter<`1`>::New(
65	"/tensorflow/core/graph_unused_outputs",
66	"The number of unused outputs for ops of a given type.", "name");
67
68	auto* tf_data_autotune_counter = monitoring::Counter<`1`>::New(
69	"/tensorflow/data/autotune", "tf.data autotuning", "name");
70
71	auto* tf_data_bytes_consumed_counter = monitoring::Counter<`1`>::New(
72	"/tensorflow/data/bytes_consumed",
73	"The number of bytes consumed by a tf.data Dataset.", "name");
74
75	auto* tf_data_bytes_produced_counter = monitoring::Counter<`1`>::New(
76	"/tensorflow/data/bytes_produced",
77	"The number of bytes produced by a tf.data Dataset.", "name");
78
79	auto* tf_data_bytes_read_counter = monitoring::Counter<`1`>::New(
80	"/tensorflow/data/bytes_read",
81	"The number of bytes read by tf.data Dataset sources.", "name");
82
83	auto* tf_data_bytes_fetched_counter = monitoring::Counter<`0`>::New(
84	"/tensorflow/data/bytes_fetched",
85	"The number of bytes fetched from tf.data Dataset iterator.");
86
87	auto* tf_data_elements_counter = monitoring::Counter<`1`>::New(
88	"/tensorflow/data/elements", "tf.data elements", "name");
89
90	auto* tf_data_experiment_counter = monitoring::Counter<`1`>::New(
91	"/tensorflow/data/experiment",
92	"The number of times tf.data experiment is applied to input pipelines.",
93	"name");
94
95	auto* tf_data_fingerprint_counter = monitoring::Counter<`1`>::New(
96	"/tensorflow/data/fingerprint", "tf.data fingerprint", "name");
97
98	auto* tf_data_get_next_duration_usecs_histogram = monitoring::Sampler<`0`>::New(
99	{"/tensorflow/data/getnext_duration",
100	"Microseconds spent fetching an element from tf.data iterator."},
101	// Power of 2 with bucket count 10 (1024 microseconds) and 1 second.
102	{monitoring::Buckets::Explicit(
103	{`2.`, `4.`, `8.`, `16.`, `32.`, `64.`, `128.`, `256.`, `512.`, `1024.`, `1e6`})});
104
105	auto* tf_data_used_vs_budget_ratio_histogram = monitoring::Sampler<`0`>::New(
106	{"/tensorflow/data/used_vs_budget_ratio",
107	"Ratio of tf.data used ram over ram budget when running optimization."},
108	// Uniform linear buckets with count 10 from 0 to 2
109	{monitoring::Buckets::Explicit(
110	{`0.0`, `0.2`, `0.4`, `0.6`, `0.8`, `1.0`, `1.2`, `1.4`, `1.6`, `1.8`, `2.0`})});
111
112	auto* tf_data_buffered_vs_budget_ratio_histogram = monitoring::Sampler<`0`>::New(
113	{"/tensorflow/data/buffered_vs_budget_ratio",
114	"Ratio of tf.data max buffer bytes over ram budget when running "
115	"optimization."},
116	// Uniform linear buckets with count 10 from 0 to 2
117	{monitoring::Buckets::Explicit(
118	{`0.0`, `0.2`, `0.4`, `0.6`, `0.8`, `1.0`, `1.2`, `1.4`, `1.6`, `1.8`, `2.0`})});
119
120	auto* tf_data_iterator_busy_counter =
121	monitoring::Counter<`0`>::New("/tensorflow/data/iterator_busy",
122	"The time (in microseconds) during which a "
123	"tf.data iterator was busy processing at "
124	"least one `GetNext()` request.");
125
126	auto* tf_data_iterator_lifetime_counter = monitoring::Counter<`0`>::New(
127	"/tensorflow/data/iterator_lifetime",
128	"The time (in microseconds) between a tf.data iterator receiving the first "
129	"`GetNext()` request and responding to the last `GetNext()` request.");
130
131	auto* tf_data_iterator_gap_msec_histogram = monitoring::Sampler<`0`>::New(
132	{"/tensorflow/data/iterator_gap",
133	"The time (in milliseconds) between a tf.data iterator responding to a "
134	"`GetNext()` request and receiving the next `GetNext()` request."},
135	// Power of 1.5 with bucket count of 20 (from 1 msec to about 2.2 secs).
136	{monitoring::Buckets::Exponential(`1`, `1.5`, `20`)});
137
138	auto* tf_data_optimization_counter = monitoring::Counter<`1`>::New(
139	"/tensorflow/data/optimization", "tf.data optimization", "name");
140
141	auto* tf_data_service_workers_created_counter =
142	monitoring::Counter<`0`>::New("/tensorflow/data/service/workers_created",
143	"Number of tf.data service workers created");
144
145	auto* tf_data_service_jobs_created_counter = monitoring::Counter<`2`>::New(
146	"/tensorflow/data/service/jobs_created", "Number of tf.data service jobs.",
147	"processing_mode", "coordinated_read");
148
149	auto* tf_data_service_client_iterators_counter = monitoring::Counter<`4`>::New(
150	"/tensorflow/data/service/client_iterators",
151	"Number of tf.data service client iterators created.", "worker_uid",
152	"deployment_mode", "processing_mode", "is_coordinated_read");
153
154	auto* tf_data_service_cross_trainer_cache_queries_counter =
155	monitoring::Counter<`1`>::New(
156	"/tensorflow/data/service/cross_trainer_cache_queries",
157	"tf.data service cross-trainer cache queries counter. The result can "
158	"be hit or miss.",
159	"cache_hit");
160
161	auto* tf_data_service_cross_trainer_cache_size_bytes =
162	monitoring::Gauge<int64_t, `0`>::New(
163	"/tensorflow/data/service/cross_trainer_cache_size_bytes",
164	"tf.data service cross-trainer cache memory usage in bytes.");
165
166	auto* tf_data_filename_counter = monitoring::Counter<`2`>::New(
167	"/tensorflow/data/filename", "The file name read by a tf.data Dataset.",
168	"name", "filename");
169
170	auto* tf_data_model_gauge =
171	monitoring::Gauge<std::function<std::string()>, `1`>::New(
172	"/tensorflow/data/model", "tf.data autotuning model proto.", "id");
173
174	auto* tf_data_auto_shard = monitoring::Gauge<int64, `2`>::New(
175	"/tensorflow/data/autoshard", "tf.data autoshard statistics.", "id",
176	"name");
177
178	auto* tf_data_auto_shard_rewrite_batch_size_eligible =
179	monitoring::Counter<`1`>::New(
180	"/tensorflow/data/autoshard_rewrite_batch_size/eligible",
181	"Whether tf.data pipelines that are eligible for autoshard "
182	"to rewrite the batch size.",
183	"eligible");
184
185	auto* tf_data_auto_shard_rewrite_batch_size_reason =
186	monitoring::Counter<`1`>::New(
187	"/tensorflow/data/autoshard_rewrite_batch_size/reason",
188	"The reasons that tf.data pipelines are ineligible for autoshard "
189	"to rewrite the batch size.",
190	"reason");
191
192	auto* tf_data_autotune_stopping_criteria_counter =
193	monitoring::Counter<`1`>::New("/tensorflow/data/autotune_stopping_criteria",
194	"The number of times each tf.data autotune "
195	"algorithm stopping criterion is met.",
196	"name");
197
198	auto* parse_dense_feature_counter = monitoring::Counter<`0`>::New(
199	"/tensorflow/data/dense_feature",
200	"The number of dense features parsed by ops for parsing tf.Example.");
201
202	auto* parse_sparse_feature_counter = monitoring::Counter<`0`>::New(
203	"/tensorflow/data/sparse_feature",
204	"The number of sparse features parsed by ops for parsing tf.Example.");
205
206	auto* parse_ragged_feature_counter = monitoring::Counter<`0`>::New(
207	"/tensorflow/data/ragged_feature",
208	"The number of ragged features parsed by ops for parsing tf.Example.");
209
210	auto* build_graph_calls = monitoring::Counter<`0`>::New(
211	"/tensorflow/core/graph_build_calls",
212	"The number of times TensorFlow has created a new client graph. "
213	"A client graph is a sub-graph of the full graph, induced by a set of "
214	"options, including the requested feeds and fetches. It includes time "
215	"spent optimizing the graph with Grappler, and time spent pruning the "
216	"sub-graph.");
217
218	auto* build_graph_time_usecs = monitoring::Counter<`0`>::New(
219	"/tensorflow/core/graph_build_time_usecs",
220	"The amount of time TensorFlow has spent creating new client graphs in "
221	"microseconds. "
222	"A client graph is a sub-graph of the full graph, induced by a set of "
223	"options, including the requested feeds and fetches. It includes time "
224	"spent optimizing the graph with Grappler, and time spent pruning the "
225	"sub-graph.");
226
227	auto* function_graph_optimization_time_usecs = monitoring::Counter<`0`>::New(
228	"/tensorflow/core/function_graph_optimization_time_usecs",
229	"The amount of time TensorFlow has spent optimizing function graphs, in "
230	"microseconds. ");
231
232	auto* xla_compilations = monitoring::Counter<`0`>::New(
233	"/tensorflow/core/xla_compilations",
234	"The number of XLA compilations used to collect "
235	"/tensorflow/core/xla_compilation_time_usecs");
236
237	auto* xla_compilation_time_usecs = monitoring::Counter<`0`>::New(
238	"/tensorflow/core/xla_compilation_time_usecs",
239	"The total time spent on compiling XLA graphs in microseconds.");
240
241	auto* xla_tpu_spmd_cores_per_replica = monitoring::Counter<`1`>::New(
242	"/tensorflow/tpu/xla_spmd_cores_per_replica",
243	"The number of cores used by XLA SPMD-replicated models.", "cores");
244
245	auto* bfc_allocator_delay =
246	monitoring::Counter<`0`>::New("/tensorflow/core/bfc_allocator_delay",
247	"The total time spent running each graph "
248	"optimization pass in microseconds.");
249
250	auto* tpu_variable_distribution_time_usecs = monitoring::Counter<`0`>::New(
251	"/tensorflow/tpu/variable_distribution_time",
252	"Time spent sending variables from primary task to other worker tasks "
253	"at the start of a call to TPUExecute. Timer starts at RunGraph "
254	"invocation and ends when TPUExecute args are ready on the current task.");
255
256	auto* test_counters =
257	monitoring::Counter<`2`>::New("/tensorflow/core/test_counters",
258	"Counters used for testing.", "name", "label");
259
260	} // namespace
261
262	auto* tpu_op_error_counter = monitoring::Counter<`2`>::New(
263	"/tensorflow/tpu/op_error_count",
264	"Count the tpu related errors by op and error_type.", "op", "error_type");
265
266	auto* eager_client_error_counter = monitoring::Counter<`2`>::New(
267	"/tensorflow/core/eager_client_error_count",
268	"Count the errors in eager client as a central place.", "error_source",
269	"error_type");
270
271	monitoring::Counter<`2`>* GetGraphOptimizationCounter() {
272	static auto* graph_optimization_counter =
273	monitoring::Counter<`2`>::New("/tensorflow/core/graph_optimization_usecs",
274	"The total time spent running each graph "
275	"optimization pass in microseconds.",
276	"kind", "name");
277	return graph_optimization_counter;
278	}
279
280	void RecordTFDataAutotune(const string& name) {
281	tf_data_autotune_counter->GetCell(name)->IncrementBy(`1`);
282	}
283
284	monitoring::CounterCell* GetTFDataBytesConsumedCounter(const string& name) {
285	return tf_data_bytes_consumed_counter->GetCell(name);
286	}
287
288	monitoring::CounterCell* GetTFDataBytesProducedCounter(const string& name) {
289	return tf_data_bytes_produced_counter->GetCell(name);
290	}
291
292	monitoring::CounterCell* GetTFDataBytesReadCounter(const string& name) {
293	return tf_data_bytes_read_counter->GetCell(name);
294	}
295
296	monitoring::CounterCell* GetTFDataElementsCounter(const string& name) {
297	return tf_data_elements_counter->GetCell(name);
298	}
299
300	monitoring::GaugeCell<std::function<std::string()>>* GetTFDataModelGauge(
301	const string& id) {
302	return tf_data_model_gauge->GetCell(id);
303	}
304
305	void RecordTFDataBytesFetched(int64_t num_bytes) {
306	tf_data_bytes_fetched_counter->GetCell()->IncrementBy(num_bytes);
307	}
308
309	void RecordTFDataExperiment(const string& name) {
310	tf_data_experiment_counter->GetCell(name)->IncrementBy(`1`);
311	}
312
313	void RecordTFDataFingerprint(const string& name) {
314	tf_data_fingerprint_counter->GetCell(name)->IncrementBy(`1`);
315	}
316
317	void RecordTFDataGetNextDuration(uint64 duration_us) {
318	static auto* tf_data_get_next_duration_cell =
319	tf_data_get_next_duration_usecs_histogram->GetCell();
320	tf_data_get_next_duration_cell->Add(duration_us);
321	}
322
323	void RecordTFDataAutotuneUsedRamBudgetRatio(const double ratio) {
324	static auto* tf_data_used_vs_budget_ratio_histogram_cell =
325	tf_data_used_vs_budget_ratio_histogram->GetCell();
326	tf_data_used_vs_budget_ratio_histogram_cell->Add(ratio);
327	}
328
329	void RecordTFDataAutotuneMaxBufferBudgetRatio(const double ratio) {
330	static auto* tf_data_buffered_vs_budget_ratio_histogram_cell =
331	tf_data_buffered_vs_budget_ratio_histogram->GetCell();
332	tf_data_buffered_vs_budget_ratio_histogram_cell->Add(ratio);
333	}
334
335	void RecordTFDataIteratorBusy(uint64 duration_us) {
336	static auto* tf_data_iterator_busy_cell =
337	tf_data_iterator_busy_counter->GetCell();
338	tf_data_iterator_busy_cell->IncrementBy(duration_us);
339	}
340
341	void RecordTFDataIteratorLifetime(uint64 duration_us) {
342	static auto* tf_data_iterator_lifetime_cell =
343	tf_data_iterator_lifetime_counter->GetCell();
344	tf_data_iterator_lifetime_cell->IncrementBy(duration_us);
345	}
346
347	void RecordTFDataIteratorGap(uint64 duration_us) {
348	static auto* tf_data_iterator_gap_msec_histogram_cell =
349	tf_data_iterator_gap_msec_histogram->GetCell();
350	tf_data_iterator_gap_msec_histogram_cell->Add(duration_us * `0.001`);
351	}
352
353	void RecordTFDataOptimization(const string& name, int64_t num_changes) {
354	tf_data_optimization_counter->GetCell(name)->IncrementBy(num_changes);
355	}
356
357	void RecordTFDataServiceWorkerCreated() {
358	tf_data_service_workers_created_counter->GetCell()->IncrementBy(`1`);
359	}
360
361	void RecordTFDataServiceJobsCreated(
362	const tensorflow::data::ProcessingModeDef& processing_mode,
363	bool is_coordinated_read) {
364	const std::string sharding_policy_str =
365	data::ProcessingModeDef::ShardingPolicy_Name(
366	processing_mode.sharding_policy());
367	const std::string coordinated_read_str =
368	is_coordinated_read ? "true" : "false";
369	tf_data_service_jobs_created_counter
370	->GetCell(sharding_policy_str, coordinated_read_str)
371	->IncrementBy(`1`);
372	}
373
374	void RecordTFDataServiceClientIterators(
375	int64_t worker_uid, tensorflow::data::DeploymentMode deployment_mode,
376	const tensorflow::data::ProcessingModeDef& processing_mode,
377	bool is_coordinated_read) {
378	const std::string deployment_mode_str =
379	tensorflow::data::DeploymentMode_Name(deployment_mode);
380	const std::string sharding_policy_str =
381	data::ProcessingModeDef::ShardingPolicy_Name(
382	processing_mode.sharding_policy());
383	const std::string coordinated_read_str =
384	is_coordinated_read ? "true" : "false";
385	tf_data_service_client_iterators_counter
386	->GetCell(absl::StrCat(worker_uid), deployment_mode_str,
387	sharding_policy_str, coordinated_read_str)
388	->IncrementBy(`1`);
389	}
390
391	void RecordTFDataServiceCrossTrainerCacheQuery(bool cache_hit) {
392	std::string cache_hit_str = cache_hit ? "true" : "false";
393	tf_data_service_cross_trainer_cache_queries_counter->GetCell(cache_hit_str)
394	->IncrementBy(`1`);
395	}
396
397	void RecordTFDataServiceCrossTrainerCacheSizeBytes(size_t bytes) {
398	tf_data_service_cross_trainer_cache_size_bytes->GetCell()->Set(
399	static_cast<int64_t>(bytes));
400	}
401
402	void RecordTFDataFilename(const string& name, const string& filename) {
403	tf_data_filename_counter->GetCell(name, filename)->IncrementBy(`1`);
404	}
405
406	void RecordTFDataAutoShard(const string& id, data::AutoShardPolicy policy,
407	int64 num_workers, int64 num_replicas) {
408	tf_data_auto_shard->GetCell(id, "policy")->Set(static_cast<int64_t>(policy));
409	tf_data_auto_shard->GetCell(id, "num_workers")->Set(num_workers);
410	tf_data_auto_shard->GetCell(id, "num_replicas")->Set(num_replicas);
411	}
412
413	void RecordTFDataAutoShardRewriteBatchSize(
414	bool eligible, const std::vector<string>& ineligible_reason) {
415	tf_data_auto_shard_rewrite_batch_size_eligible
416	->GetCell(eligible ? "true" : "false")
417	->IncrementBy(`1`);
418	for (const string& reason : ineligible_reason) {
419	tf_data_auto_shard_rewrite_batch_size_reason->GetCell(reason)->IncrementBy(
420	`1`);
421	}
422	}
423
424	void RecordTFDataAutotuneStoppingCriteria(const string& name) {
425	tf_data_autotune_stopping_criteria_counter->GetCell(name)->IncrementBy(`1`);
426	}
427
428	void RecordParseDenseFeature(int64 num_features) {
429	static auto* parse_dense_feature_counter_cell =
430	parse_dense_feature_counter->GetCell();
431	parse_dense_feature_counter_cell->IncrementBy(num_features);
432	}
433
434	void RecordParseSparseFeature(int64_t num_features) {
435	static auto* parse_sparse_feature_counter_cell =
436	parse_sparse_feature_counter->GetCell();
437	parse_sparse_feature_counter_cell->IncrementBy(num_features);
438	}
439
440	void RecordParseRaggedFeature(int64_t num_features) {
441	static auto* parse_ragged_feature_counter_cell =
442	parse_ragged_feature_counter->GetCell();
443	parse_ragged_feature_counter_cell->IncrementBy(num_features);
444	}
445
446	void RecordGraphInputTensors(const size_t size) {
447	static auto* graph_run_input_tensor_bytes_cell =
448	graph_run_input_tensor_bytes->GetCell();
449	graph_run_input_tensor_bytes_cell->Add(size);
450	}
451
452	void RecordGraphOutputTensors(const size_t size) {
453	static auto* graph_run_output_tensor_bytes_cell =
454	graph_run_output_tensor_bytes->GetCell();
455	graph_run_output_tensor_bytes_cell->Add(size);
456	}
457
458	void RecordTPUXlaSpmdCoresPerReplica(int64_t cores_per_replica) {
459	xla_tpu_spmd_cores_per_replica->GetCell(absl::StrCat(cores_per_replica))
460	->IncrementBy(`1`);
461	}
462
463	void UpdateGraphExecTime(const uint64 running_time_usecs) {
464	if (running_time_usecs > `0`) {
465	static auto* graph_runs_cell = graph_runs->GetCell();
466	static auto* graph_run_time_usecs_cell = graph_run_time_usecs->GetCell();
467	static auto* graph_run_time_usecs_histogram_cell =
468	graph_run_time_usecs_histogram->GetCell();
469	graph_runs_cell->IncrementBy(`1`);
470	graph_run_time_usecs_cell->IncrementBy(running_time_usecs);
471	graph_run_time_usecs_histogram_cell->Add(running_time_usecs);
472	}
473	}
474
475	void UpdateGraphPendingQueueLength(uint64 len) {
476	static auto* graph_pending_queue_length_cell =
477	graph_pending_queue_length_histogram->GetCell();
478	graph_pending_queue_length_cell->Add(len);
479	}
480
481	void UpdateGraphBuildTime(const uint64 running_time_usecs) {
482	if (running_time_usecs > `0`) {
483	static auto* build_graph_calls_cell = build_graph_calls->GetCell();
484	static auto* build_graph_time_usecs_cell =
485	build_graph_time_usecs->GetCell();
486	build_graph_calls_cell->IncrementBy(`1`);
487	build_graph_time_usecs_cell->IncrementBy(running_time_usecs);
488	}
489	}
490
491	void UpdateFunctionGraphOptimizationTime(const uint64 running_time_usecs) {
492	if (running_time_usecs > `0`) {
493	static auto* function_graph_optimization_time_usecs_cell =
494	function_graph_optimization_time_usecs->GetCell();
495	function_graph_optimization_time_usecs_cell->IncrementBy(
496	running_time_usecs);
497	}
498	}
499
500	void UpdateTpuVariableDistributionTime(const uint64 distribution_time_usecs) {
501	if (distribution_time_usecs > `0`) {
502	tpu_variable_distribution_time_usecs->GetCell()->IncrementBy(
503	distribution_time_usecs);
504	}
505	}
506
507	void UpdateXlaCompilationTime(const uint64 compilation_time_usecs) {
508	if (compilation_time_usecs > `0`) {
509	static auto* xla_compilations_cell = xla_compilations->GetCell();
510	static auto* xla_compilation_time_usecs_cell =
511	xla_compilation_time_usecs->GetCell();
512	xla_compilations_cell->IncrementBy(`1`);
513	xla_compilation_time_usecs_cell->IncrementBy(compilation_time_usecs);
514	}
515	}
516
517	void UpdateBfcAllocatorDelayTime(const uint64 delay_usecs) {
518	static auto* bfc_allocator_delay_cell = bfc_allocator_delay->GetCell();
519	if (delay_usecs > `0`) {
520	bfc_allocator_delay_cell->IncrementBy(delay_usecs);
521	}
522	}
523
524	void RecordUnusedOutput(const string& op_name) {
525	graph_unused_outputs->GetCell(op_name)->IncrementBy(`1`);
526	}
527
528	void IncrementTestCounter(const string& name, const string& label) {
529	test_counters->GetCell(name, label)->IncrementBy(`1`);
530	}
531
532	const monitoring::CounterCell* TestCounter(const string& name,
533	const string& label) {
534	return test_counters->GetCell(name, label);
535	}
536
537	TestDelta::TestDelta(const string& name, const string& label)
538	: cell_(TestCounter(name, label)) {
539	Reset();
540	}
541
542	void TestDelta::Reset() { last_value_ = cell_->value(); }
543
544	int64 TestDelta::Get() { return cell_->value() - last_value_; }
545
546	void UpdateTfMlirBridgeFirstPhaseCounter(const std::string& device_type,
547	const std::string& bridge_version,
548	bool fallback_enabled,
549	const std::string& result) {
550	static auto* metric = monitoring::Counter<`4`>::New(
551	"/tensorflow/core/tf_mlir_bridge_first_phase_count",
552	"Tracks processing state in first phase of mlir bridge", "device",
553	"version", "fallback", "result");
554	std::string fallback_status =
555	fallback_enabled ? "fallback_enabled" : "fallback_disabled";
556	metric->GetCell(device_type, bridge_version, fallback_status, result)
557	->IncrementBy(`1`);
558	}
559
560	void UpdateTpuErrorCounter(const string& op, const string& error_type) {
561	tpu_op_error_counter->GetCell(op, error_type)->IncrementBy(`1`);
562	}
563
564	void UpdateEagerClientErrorCounter(const string& error_source,
565	const string& error_type) {
566	eager_client_error_counter->GetCell(error_source, error_type)->IncrementBy(`1`);
567	}
568
569	void UpdateTfMlirBridgeGraphAnalysisPerOp(
570	const std::string& op_name, const std::string& construction_context,
571	bool is_single_core_inference_mode, const std::string& num_replicas,
572	const std::string& num_cores_per_replica, const std::string& use_tpu,
573	const std::string& allow_soft_placement,
574	const std::string& use_spmd_for_xla_partitioning,
575	const std::string& unsupported_reason, bool has_unsupported_features) {
576	static auto* metric = monitoring::Counter<`10`>::New(
577	"/tensorflow/core/tf_mlir_bridge_graph_analysis_per_op",
578	"Tracks processing state per op in first phase of mlir bridge", "op_name",
579	"construction_context", "is_single_core_inference_mode", "num_replicas",
580	"num_cores_per_replica", "use_tpu", "allow_soft_placement",
581	"use_spmd_for_xla_partitioning", "unsupported_reason",
582	"has_unsupported_features");
583
584	metric
585	->GetCell(op_name, construction_context,
586	is_single_core_inference_mode ? "Yes" : "No", num_replicas,
587	num_cores_per_replica, use_tpu, allow_soft_placement,
588	use_spmd_for_xla_partitioning, unsupported_reason,
589	has_unsupported_features ? "Yes" : "No")
590	->IncrementBy(`1`);
591	}
592
593	} // namespace metrics
594	} // namespace tensorflow
595

Browse the source code of tensorflow/tensorflow/core/framework/metrics.cc