1 | /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. |
2 | |
3 | Licensed under the Apache License, Version 2.0 (the "License"); |
4 | you may not use this file except in compliance with the License. |
5 | You may obtain a copy of the License at |
6 | |
7 | http://www.apache.org/licenses/LICENSE-2.0 |
8 | |
9 | Unless required by applicable law or agreed to in writing, software |
10 | distributed under the License is distributed on an "AS IS" BASIS, |
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | See the License for the specific language governing permissions and |
13 | limitations under the License. |
14 | ==============================================================================*/ |
15 | |
16 | #ifndef TENSORFLOW_CORE_KERNELS_LOGISTIC_LOSS_H_ |
17 | #define TENSORFLOW_CORE_KERNELS_LOGISTIC_LOSS_H_ |
18 | |
19 | #include <cmath> |
20 | |
21 | #include "tensorflow/core/kernels/loss.h" |
22 | #include "tensorflow/core/lib/core/errors.h" |
23 | |
24 | namespace tensorflow { |
25 | |
26 | class LogisticLossUpdater : public DualLossUpdater { |
27 | public: |
28 | // Adding vs. Averaging in Distributed Primal-Dual Optimization. |
29 | // Chenxin Ma, Virginia Smith, Martin Jaggi, Michael I. Jordan, Peter |
30 | // Richtarik, Martin Takac http://arxiv.org/abs/1502.03508 |
31 | double ComputeUpdatedDual(const int num_loss_partitions, const double label, |
32 | const double example_weight, |
33 | const double current_dual, const double wx, |
34 | const double weighted_example_norm) const final { |
35 | // Newton algorithm converges quadratically so 10 steps will be largely |
36 | // enough to achieve a very good precision |
37 | static const int newton_total_steps = 10; |
38 | double x = 0; |
39 | for (int i = 0; i < newton_total_steps; ++i) { |
40 | x = NewtonStep(x, num_loss_partitions, label, wx, example_weight, |
41 | weighted_example_norm, current_dual); |
42 | } |
43 | return 0.5 * (1 + tanh(x)) / label; |
44 | } |
45 | |
46 | // Dual of logistic loss function. |
47 | // https://en.wikipedia.org/wiki/Convex_conjugate |
48 | double ComputeDualLoss(const double current_dual, const double example_label, |
49 | const double example_weight) const final { |
50 | // Dual of the logistic loss function is |
51 | // ay * log(ay) + (1-ay) * log (1-ay), where a is the dual variable. |
52 | const double ay = current_dual * example_label; |
53 | const double log_ay = (ay > 0) ? log(ay) : 0; |
54 | const double one_minus_ay = 1 - ay; |
55 | const double log_one_minus_ay = (one_minus_ay > 0) ? log(one_minus_ay) : 0; |
56 | return ((ay * log_ay) + (one_minus_ay * log_one_minus_ay)) * example_weight; |
57 | } |
58 | |
59 | // Logistic loss for binary classification. |
60 | // https://en.wikipedia.org/wiki/Loss_functions_for_classification |
61 | double ComputePrimalLoss(const double wx, const double example_label, |
62 | const double example_weight) const final { |
63 | // Logistic loss: |
64 | // log(1 + e^(-ywx)) |
65 | // log(e^0 + e^(-ywx)) |
66 | // a + log(e^(0-a) + e^(-ywx - a)), where a is max(0, -ywx) |
67 | // https://hips.seas.harvard.edu/blog/2013/01/09/computing-log-sum-exp/ |
68 | const double y_wx = example_label * wx; |
69 | if (y_wx > 0) { |
70 | // 0 + log(e^(0) + e^(-ywx - 0)) |
71 | // log(1 + e^(-ywx)) |
72 | return log1p(exp(-y_wx)) * example_weight; |
73 | } |
74 | // -ywx + log(e^(ywx) + e^(-ywx + ywx)) |
75 | // log(e^(ywx) + e^(0)) - ywx |
76 | // log(1 + e^(ywx)) - ywx |
77 | return (log1p(exp(y_wx)) - y_wx) * example_weight; |
78 | } |
79 | |
80 | // Derivative of logistic loss |
81 | double PrimalLossDerivative(const double wx, const double label, |
82 | const double example_weight) const final { |
83 | double inverse_exp_term = 0; |
84 | if (label * wx > 0) { |
85 | inverse_exp_term = exp(-label * wx) / (1 + exp(-label * wx)); |
86 | } else { |
87 | inverse_exp_term = 1 / (1 + exp(label * wx)); |
88 | } |
89 | return -inverse_exp_term * label * example_weight; |
90 | } |
91 | |
92 | // The smoothness constant is 4 since the derivative of logistic loss, which |
93 | // is exp(-x) / (1 + exp(-x)) can be shown to 0.25-Lipschitz (its derivative |
94 | // is bounded by 0.25) |
95 | double SmoothnessConstant() const final { return 4; } |
96 | |
97 | // Converts binary example labels from 0.0 or 1.0 to -1.0 or 1.0 respectively |
98 | // as expected by logistic regression. |
99 | Status ConvertLabel(float* const example_label) const final { |
100 | if (*example_label == 0.0) { |
101 | *example_label = -1; |
102 | return OkStatus(); |
103 | } |
104 | if (*example_label == 1.0) { |
105 | return OkStatus(); |
106 | } |
107 | return errors::InvalidArgument( |
108 | "Only labels of 0.0 or 1.0 are supported right now. " |
109 | "Found example with label: " , |
110 | *example_label); |
111 | } |
112 | |
113 | private: |
114 | // We use Newton algorithm on a modified function (see readme.md). |
115 | double NewtonStep(const double x, const int num_loss_partitions, |
116 | const double label, const double wx, |
117 | const double example_weight, |
118 | const double weighted_example_norm, |
119 | const double current_dual) const { |
120 | const double tanhx = tanh(x); |
121 | const double numerator = -2 * label * x - wx - |
122 | num_loss_partitions * weighted_example_norm * |
123 | example_weight * |
124 | (0.5 * (1 + tanhx) / label - current_dual); |
125 | const double denominator = |
126 | -2 * label - num_loss_partitions * weighted_example_norm * |
127 | example_weight * (1 - tanhx * tanhx) * 0.5 / label; |
128 | return x - numerator / denominator; |
129 | } |
130 | }; |
131 | |
132 | } // namespace tensorflow |
133 | |
134 | #endif // TENSORFLOW_CORE_KERNELS_LOGISTIC_LOSS_H_ |
135 | |