ANIMA  4.0
animaBoundedLevenbergMarquardtOptimizer.cxx
Go to the documentation of this file.
2 #include <limits>
3 #include <animaQRDecomposition.h>
5 
6 namespace anima
7 {
8 
10 {
11  m_CurrentPosition = this->GetInitialPosition();
12  ParametersType parameters(m_CurrentPosition);
13 
14  unsigned int nbParams = parameters.size();
15 
16  MeasureType newResidualValues;
17 
18  m_CurrentValue = this->EvaluateCostFunctionAtParameters(parameters,m_ResidualValues);
19  unsigned int numResiduals = m_ResidualValues.size();
20 
21  unsigned int numIterations = 0;
22  bool stopConditionReached = false;
23  bool rejectedStep = false;
24 
25  DerivativeType derivativeMatrix(numResiduals,nbParams);
26  DerivativeType derivativeMatrixCopy;
27  ParametersType oldParameters = parameters;
28  ParametersType dValues(nbParams);
29 
30  // Be careful here: we consider the problem of the form |f(x)|^2, J is thus the Jacobian of f
31  // If f is itself y - g(x), then J = - J_g which is what is on the wikipedia page
32  // We assume each entry of derivativeMatrix(i,j) is df_i / dx_j
33  m_CostFunction->GetDerivative(parameters,derivativeMatrix);
34  derivativeMatrixCopy = derivativeMatrix;
35 
36  bool derivativeCheck = false;
37  for (unsigned int i = 0;i < nbParams;++i)
38  {
39  for (unsigned int j = 0;j < numResiduals;++j)
40  {
41  if (std::abs(derivativeMatrix.get(i,j)) > std::sqrt(std::numeric_limits <double>::epsilon()))
42  {
43  derivativeCheck = true;
44  break;
45  }
46  }
47 
48  if (derivativeCheck)
49  break;
50  }
51 
52  if (!derivativeCheck)
53  return;
54 
55  m_DeltaParameter = 0.0;
56  double maxDValue = 0.0;
57 
58  for (unsigned int i = 0;i < nbParams;++i)
59  {
60  double normValue = 0.0;
61  for (unsigned int j = 0;j < numResiduals;++j)
62  {
63  double tmpVal = derivativeMatrix.get(j,i);
64  normValue += tmpVal * tmpVal;
65  }
66 
67  dValues[i] = std::sqrt(normValue);
68  if (dValues[i] != 0.0)
69  {
70  if ((i == 0) || (dValues[i] > maxDValue))
71  maxDValue = dValues[i];
72  }
73  }
74 
75  double basePower = std::floor(std::log(maxDValue) / std::log(2.0));
76  double epsilon = 20.0 * std::numeric_limits <double>::epsilon() * (numResiduals + nbParams) * std::pow(2.0,basePower);
77 
78  // Change the scaling d-values if they are below a threshold of matrix rank (as in QR decomposition)
79  for (unsigned int i = 0;i < nbParams;++i)
80  {
81  if (dValues[i] < epsilon)
82  dValues[i] = epsilon;
83 
84  m_DeltaParameter += dValues[i] * parameters[i] * parameters[i];
85  }
86 
87  m_DeltaParameter = std::sqrt(m_DeltaParameter);
88 
89  unsigned int rank = 0;
90  // indicates ones in pivot matrix as pivot(pivotVector(i),i) = 1
91  std::vector <unsigned int> pivotVector(nbParams);
92  // indicates ones in pivot matrix as pivot(i,inversePivotVector(i)) = 1
93  std::vector <unsigned int> inversePivotVector(nbParams);
94  std::vector <double> qrBetaValues(nbParams);
95  ParametersType qtResiduals = m_ResidualValues;
96  ParametersType lowerBoundsPermutted(nbParams);
97  ParametersType oldParametersPermutted(nbParams);
98  ParametersType upperBoundsPermutted(nbParams);
99  anima::QRPivotDecomposition(derivativeMatrix,pivotVector,qrBetaValues,rank);
100  anima::GetQtBFromQRPivotDecomposition(derivativeMatrix,qtResiduals,qrBetaValues,rank);
101  for (unsigned int i = 0;i < nbParams;++i)
102  inversePivotVector[pivotVector[i]] = i;
103 
104  m_LambdaCostFunction->SetInputWorkMatricesAndVectorsFromQRDerivative(derivativeMatrix,qtResiduals,rank);
105  m_LambdaCostFunction->SetJRank(rank);
106  m_LambdaCostFunction->SetDValues(dValues);
107  m_LambdaCostFunction->SetPivotVector(pivotVector);
108  m_LambdaCostFunction->SetInversePivotVector(inversePivotVector);
109 
110  while (!stopConditionReached)
111  {
112  ++numIterations;
113 
114  for (unsigned int i = 0;i < nbParams;++i)
115  {
116  lowerBoundsPermutted[i] = m_LowerBounds[pivotVector[i]];
117  upperBoundsPermutted[i] = m_UpperBounds[pivotVector[i]];
118  oldParametersPermutted[i] = oldParameters[pivotVector[i]];
119  }
120 
121  m_LambdaCostFunction->SetLowerBoundsPermutted(lowerBoundsPermutted);
122  m_LambdaCostFunction->SetUpperBoundsPermutted(upperBoundsPermutted);
123  m_LambdaCostFunction->SetPreviousParametersPermutted(oldParametersPermutted);
124 
125  // Updates lambda and get new addon vector at the same time
126  this->UpdateLambdaParameter(derivativeMatrix,dValues,pivotVector,qtResiduals,rank);
127 
128  parameters = oldParameters;
129  parameters += m_CurrentAddonVector;
130 
131  // Check acceptability of step, careful because EvaluateCostFunctionAtParameters returns the squared cost
132  double tentativeNewCostValue = this->EvaluateCostFunctionAtParameters(parameters,newResidualValues);
133  rejectedStep = (tentativeNewCostValue > m_CurrentValue);
134 
135  double acceptRatio = 0.0;
136 
137  if (!rejectedStep)
138  {
139  acceptRatio = 1.0 - tentativeNewCostValue / m_CurrentValue;
140 
141  // Compute || f + Jp ||^2
142  double fjpNorm = 0.0;
143  for (unsigned int i = 0;i < numResiduals;++i)
144  {
145  double fjpAddonValue = m_ResidualValues[i];
146 
147  for (unsigned int j = 0;j < nbParams;++j)
148  fjpAddonValue += derivativeMatrixCopy.get(i,j) * m_CurrentAddonVector[j];
149 
150  fjpNorm += fjpAddonValue * fjpAddonValue;
151  }
152 
153  double denomAcceptRatio = 1.0 - fjpNorm / m_CurrentValue;
154 
155  if (denomAcceptRatio > 0.0)
156  acceptRatio /= denomAcceptRatio;
157  else
158  acceptRatio = 0.0;
159  }
160 
161  if (acceptRatio >= 0.75)
162  {
163  // Increase Delta
164  m_DeltaParameter *= 2.0;
165  }
166  else if (acceptRatio <= 0.25)
167  {
168  double mu = 0.5;
169  if (tentativeNewCostValue > 100.0 * m_CurrentValue)
170  mu = 0.1;
171  else if (tentativeNewCostValue > m_CurrentValue)
172  {
173  // Gamma is p^T J^T f / |f|^2
174  double gamma = 0.0;
175  for (unsigned int i = 0;i < nbParams;++i)
176  {
177  double jtFValue = 0.0;
178  for (unsigned int j = 0;j < numResiduals;++j)
179  jtFValue += derivativeMatrixCopy.get(j,i) * m_ResidualValues[i];
180 
181  gamma += m_CurrentAddonVector[i] * jtFValue / m_CurrentValue;
182  }
183 
184  if (gamma < - 1.0)
185  gamma = - 1.0;
186  else if (gamma > 0.0)
187  gamma = 0.0;
188 
189  mu = 0.5 * gamma;
190  double denomMu = gamma + 0.5 * (1.0 - tentativeNewCostValue / m_CurrentValue);
191  mu /= denomMu;
192 
193  mu = std::min(0.5,std::max(0.1,mu));
194  }
195 
196  m_DeltaParameter *= mu;
197  }
198 
199  if (!rejectedStep)
200  {
201  m_ResidualValues = newResidualValues;
202  m_CostFunction->GetDerivative(parameters,derivativeMatrix);
203 
204  for (unsigned int i = 0;i < nbParams;++i)
205  {
206  double normValue = 0;
207  for (unsigned int j = 0;j < numResiduals;++j)
208  {
209  double tmpVal = derivativeMatrix.get(j,i);
210  normValue += tmpVal * tmpVal;
211  }
212 
213  normValue = std::sqrt(normValue);
214  dValues[i] = std::max(dValues[i], normValue);
215  }
216 
217  derivativeMatrixCopy = derivativeMatrix;
218 
219  qtResiduals = m_ResidualValues;
220  anima::QRPivotDecomposition(derivativeMatrix,pivotVector,qrBetaValues,rank);
221  anima::GetQtBFromQRPivotDecomposition(derivativeMatrix,qtResiduals,qrBetaValues,rank);
222  for (unsigned int i = 0;i < nbParams;++i)
223  inversePivotVector[pivotVector[i]] = i;
224 
225  m_LambdaCostFunction->SetInputWorkMatricesAndVectorsFromQRDerivative(derivativeMatrix,qtResiduals,rank);
226  m_LambdaCostFunction->SetJRank(rank);
227  m_LambdaCostFunction->SetDValues(dValues);
228  m_LambdaCostFunction->SetPivotVector(pivotVector);
229  m_LambdaCostFunction->SetInversePivotVector(inversePivotVector);
230  }
231 
232  if (numIterations != 1)
233  stopConditionReached = this->CheckConditions(numIterations,parameters,oldParameters,dValues,
234  tentativeNewCostValue);
235 
236  if (!rejectedStep)
237  {
238  oldParameters = parameters;
239  m_CurrentValue = tentativeNewCostValue;
240  }
241  }
242 
243  this->SetCurrentPosition(oldParameters);
244 }
245 
246 bool BoundedLevenbergMarquardtOptimizer::CheckSolutionIsInBounds(ParametersType &solutionVector, ParametersType &lowerBounds,
247  ParametersType &upperBounds, unsigned int rank)
248 {
249  for (unsigned int i = 0;i < rank;++i)
250  {
251  if (solutionVector[i] < lowerBounds[i])
252  return false;
253 
254  if (solutionVector[i] > upperBounds[i])
255  return false;
256  }
257 
258  return true;
259 }
260 
261 void BoundedLevenbergMarquardtOptimizer::UpdateLambdaParameter(DerivativeType &derivative, ParametersType &dValues,
262  std::vector <unsigned int> &pivotVector,
263  ParametersType &qtResiduals, unsigned int rank)
264 {
265  m_LambdaCostFunction->SetDeltaParameter(m_DeltaParameter);
266 
267  ParametersType p(m_LambdaCostFunction->GetNumberOfParameters());
268  p[0] = 0.0;
269 
270  double zeroCost = m_LambdaCostFunction->GetValue(p);
271  if (zeroCost <= 0.0)
272  {
273  m_LambdaParameter = 0.0;
274  m_CurrentAddonVector = m_LambdaCostFunction->GetSolutionVector();
275  return;
276  }
277 
278  double lowerBoundLambda, upperBoundLambda;
279  lowerBoundLambda = 0.0;
280  upperBoundLambda = 0.0;
281 
282  unsigned int n = derivative.cols();
283 
284  // Compute upper bound for lambda: D^-1 * pi * R^t * Q^t * residuals
285  double u0InVectorPart;
286  for (unsigned int i = 0;i < n;++i)
287  {
288  u0InVectorPart = 0.0;
289  unsigned int maxIndex = std::min(i + 1,rank);
290  for (unsigned int j = 0;j < maxIndex;++j)
291  u0InVectorPart += derivative.get(j,i) * qtResiduals[j];
292 
293  // u0InVectorPart is the one that goes into pivotVector[i] so we divide by the good d value
294  upperBoundLambda += (u0InVectorPart / dValues[pivotVector[i]]) * (u0InVectorPart / dValues[pivotVector[i]]);
295  }
296 
297  upperBoundLambda = std::sqrt(upperBoundLambda) / m_DeltaParameter;
298 
299  // More advises 0.1 * m_DeltaParameter but
300  // - results suggest that it is recommended to be more precise in the search of the zero;
301  // - a relative tolerance w.r.t. Delta might lead to solutions far from zero when Delta is large.
302  // hence we set up an absolute fTol to the machine precision.
303  double fTolAbs = 2.0 * std::sqrt(std::numeric_limits<double>::epsilon());
304  double xTolRel = 2.0 * std::sqrt(std::numeric_limits<double>::epsilon());
305  // Computing maximal number of dichotomy iterations: min spacing tolerated at the end is 10^-8
306  double logsDiff = std::log(upperBoundLambda) - 0.5 * std::log(std::numeric_limits<double>::epsilon());
307  unsigned int maxCount = static_cast<unsigned int> (1.0 + logsDiff / std::log(2.0));
308 
309  DekkerRootFindingAlgorithm algorithm;
310 
311  algorithm.SetRootRelativeTolerance(xTolRel);
312  algorithm.SetCostFunctionTolerance(fTolAbs);
313  algorithm.SetRootFindingFunction(m_LambdaCostFunction);
314  algorithm.SetMaximumNumberOfIterations(maxCount);
315  algorithm.SetLowerBound(lowerBoundLambda);
316  algorithm.SetUpperBound(upperBoundLambda);
317  algorithm.SetFunctionValueAtInitialLowerBound(zeroCost);
318 
319  m_LambdaParameter = algorithm.Optimize();
320  m_CurrentAddonVector = m_LambdaCostFunction->GetSolutionVector();
321 }
322 
324 {
325  residualValues = m_CostFunction->GetValue(parameters);
326 
327  unsigned int numResiduals = residualValues.size();
328  double costValue = 0.0;
329  for (unsigned int i = 0;i < numResiduals;++i)
330  costValue += residualValues[i] * residualValues[i];
331 
332  return costValue;
333 }
334 
335 bool BoundedLevenbergMarquardtOptimizer::CheckConditions(unsigned int numIterations, ParametersType &newParams,
336  ParametersType &oldParams, ParametersType &dValues, double newCostValue)
337 {
338  if (numIterations == m_NumberOfIterations)
339  return true;
340 
341  // Criterion as in More, equation 8.3
342  unsigned int numParams = newParams.size();
343  double dxNew = 0.0;
344  double dxDiff = 0.0;
345  for (unsigned int i = 0;i < numParams;++i)
346  {
347  double oldValue = dValues[i] * oldParams[i];
348  double newValue = dValues[i] * newParams[i];
349  dxNew += newValue * newValue;
350  dxDiff += (newValue - oldValue) * (newValue - oldValue);
351  }
352 
353  dxNew = std::sqrt(dxNew);
354 
355  if (m_DeltaParameter <= m_ValueTolerance * dxNew)
356  return true;
357 
358  // Criterion as in More, 8.4 equation
359  double fDiff = m_CurrentValue - newCostValue;
360 
361  if ((fDiff >= 0.0) && (fDiff <= m_CostTolerance * m_CurrentValue))
362  return true;
363 
364  // xTol relative tolerance check "NLOpt style"
365  dxDiff = std::sqrt(dxDiff);
366 
367  if (dxDiff <= m_ValueTolerance * dxNew)
368  return true;
369 
370  return false;
371 }
372 
373 } // end namespace anima
void QRPivotDecomposition(vnl_matrix< ScalarType > &aMatrix, std::vector< unsigned int > &pivotVector, std::vector< ScalarType > &houseBetaValues, unsigned int &rank)
void GetQtBFromQRPivotDecomposition(vnl_matrix< ScalarType > &qrMatrix, vnl_vector< ScalarType > &bVector, std::vector< ScalarType > &houseBetaValues, unsigned int rank)
void SetFunctionValueAtInitialLowerBound(const double &val)
bool CheckSolutionIsInBounds(ParametersType &solutionVector, ParametersType &lowerBounds, ParametersType &upperBounds, unsigned int rank)
void UpdateLambdaParameter(DerivativeType &derivative, ParametersType &dValues, std::vector< unsigned int > &pivotVector, ParametersType &qtResiduals, unsigned int rank)
bool CheckConditions(unsigned int numIterations, ParametersType &newParams, ParametersType &oldParams, ParametersType &dValues, double newCostValue)
double EvaluateCostFunctionAtParameters(ParametersType &parameters, MeasureType &residualValues)
void SetMaximumNumberOfIterations(const unsigned int &val)
void SetRootFindingFunction(BaseCostFunctionType *f)