[Insight-users] Regular Step Gradient Descent optimizer

Wed May 3 12:50:39 EDT 2006

Hi,

If I recall correctly, we had found that using gradient vector direction 
changes to relax (the original implementation) was causing divergence in 
our application. If the function has more than 1 direction, there is the 
question of what constitutes a significant change in gradient direction. 
I believe that evaluating the function itself (if possible) and relaxing 
whenever that increases is a more robust strategy.

Tolga

Paul Koshevoy wrote:
> Miller, James V (GE, Research) wrote:
>   
>> Luis,
>>  
>> I think Paul Koshevoy and Tolga Tasdizen have some recommended changes
>> to one of the gradient descent optimizers.  I don't recall the
>> specifics but I believe it had something to do with checking the value
>> of the cost function before making a step.  Perhaps they can chime in
>> here.
>>  
>> Jim
>>     
> Hi Jim,
>
> Tolga had found the problem in the relaxation criteria implemented in
> itk::RegularStegGradientDescentOptimizer -- the current implementation
> depends on detecting function derivative direction change, which may
> step out of the minima basin. The change we've implemented was to look
> at the function value change instead, and relax when the value starts to
> increase again.
>
> I've attached our version of the optimizer so you can take a look. I've
> only used this for function minimization, the code will probably have to
> be fixed to support maximization.
>
>     Paul.
>
>   
> ------------------------------------------------------------------------
>
> /*=========================================================================
>
>   Program:   Insight Segmentation & Registration Toolkit
>   Module:    $RCSfile: itkRegularStepGradientDescentOptimizer2.cxx,v $
>   Language:  C++
>   Date:      $Date: 2005/03/11 23:05:33 $
>   Version:   $Revision: 1.19 $
>
>   Copyright (c) Insight Software Consortium. All rights reserved.
>   See ITKCopyright.txt or http://www.itk.org/HTML/Copyright.htm for details.
>
>      This software is distributed WITHOUT ANY WARRANTY; without even 
>      the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
>      PURPOSE.  See the above copyright notices for more information.
>
> =========================================================================*/
> #ifndef _itkRegularStepGradientDescentOptimizer2_txx
> #define _itkRegularStepGradientDescentOptimizer2_txx
>
> #include "itkRegularStepGradientDescentOptimizer2.h"
> #include "itkCommand.h"
> #include "itkEventObject.h"
> #include "vnl/vnl_math.h"
>
> namespace itk
> {
>
> /**
>  * Constructor
>  */
> RegularStepGradientDescentOptimizer2
> ::RegularStepGradientDescentOptimizer2()
> {
>
>   itkDebugMacro("Constructor");
>       
>   m_MaximumStepLength = 1.0;
>   m_MinimumStepLength = 1e-3;
>   m_GradientMagnitudeTolerance = 1e-4;
>   m_NumberOfIterations = 100;
>   m_CurrentIteration   =   0;
>   m_Value = 0;
>   m_PreviousValue = 0;
>   m_Maximize = false;
>   m_CostFunction = 0;
>   m_CurrentStepLength   =   0;
>   m_StopCondition = MaximumNumberOfIterations;
>   m_Gradient.Fill( 0.0f );
>   m_PreviousGradient.Fill( 0.0f );
>   m_RelaxationFactor = 0.5;
>
> }
>
>
>
> /**
>  * Start the optimization
>  */
> void
> RegularStepGradientDescentOptimizer2
> ::StartOptimization( void )
> {
>
>   itkDebugMacro("StartOptimization");
>
>   m_CurrentStepLength         = m_MaximumStepLength;
>   m_CurrentIteration          = 0;
>
>   const unsigned int spaceDimension = 
>     m_CostFunction->GetNumberOfParameters();
>
>   m_PreviousValue = 0;
>   m_Gradient = DerivativeType( spaceDimension );
>   m_PreviousGradient = DerivativeType( spaceDimension );
>   m_Gradient.Fill( 0.0f );
>   m_PreviousGradient.Fill( 0.0f );
>
>   this->SetCurrentPosition( GetInitialPosition() );
>   this->ResumeOptimization();
>
> }
>
>
>
>
>
> /**
>  * Resume the optimization
>  */
> void
> RegularStepGradientDescentOptimizer2
> ::ResumeOptimization( void )
> {
>   
>   itkDebugMacro("ResumeOptimization");
>
>   m_Stop = false;
>
>   this->InvokeEvent( StartEvent() );
>
>   while( !m_Stop ) 
>     {
>
>     ParametersType currentPosition = this->GetCurrentPosition();
>     
>     m_PreviousGradient = m_Gradient;
>     m_PreviousValue = m_Value;
>
>     if( m_Stop )
>       {
>       break;
>       }
>
>     m_CostFunction->GetValueAndDerivative( currentPosition, m_Value, m_Gradient );
>
>     if( m_Stop )
>       {
>       break;
>       }
>
>     this->AdvanceOneStep();
>
>     m_CurrentIteration++;
>
>     if( m_CurrentIteration == m_NumberOfIterations )
>       {
>       m_StopCondition = MaximumNumberOfIterations;
>       this->StopOptimization();
>       break;
>       }
>     
>     }
>     
>
> }
>
>
>
>
>
> /**
>  * Stop optimization
>  */
> void
> RegularStepGradientDescentOptimizer2
> ::StopOptimization( void )
> {
>
>   itkDebugMacro("StopOptimization");
>   
>   m_Stop = true;
>   this->InvokeEvent( EndEvent() );
> }
>
>
>
>
> /**
>  * Advance one Step following the gradient direction
>  */
> void
> RegularStepGradientDescentOptimizer2
> ::AdvanceOneStep( void )
> { 
>
>   itkDebugMacro("AdvanceOneStep");
>
>   const unsigned int  spaceDimension =
>     m_CostFunction->GetNumberOfParameters();
>
>   DerivativeType transformedGradient( spaceDimension );
>   DerivativeType previousTransformedGradient( spaceDimension );
>   ScalesType     scales = this->GetScales();
>
>   if( m_RelaxationFactor < 0.0 )
>     {
>     itkExceptionMacro(<< "Relaxation factor must be positive. Current value is " << m_RelaxationFactor );
>     return;
>     }
>
>   if( m_RelaxationFactor >= 1.0 )
>     {
>     itkExceptionMacro(<< "Relaxation factor must less than 1.0. Current value is " << m_RelaxationFactor );
>     return;
>     }
>
>
>   // Make sure the scales have been set properly
>   if (scales.size() != spaceDimension)
>     {
>     itkExceptionMacro(<< "The size of Scales is "
>                       << scales.size()
>                       << ", but the NumberOfParameters for the CostFunction is "
>                       << spaceDimension
>                       << ".");
>     }
>
>   for(unsigned int i = 0;  i < spaceDimension; i++)
>     {
>     transformedGradient[i]  = m_Gradient[i] / scales[i];    
>     previousTransformedGradient[i] = 
>       m_PreviousGradient[i] / scales[i];    
>     }
>
>   double magnitudeSquare = 0;
>   for(unsigned int dim=0; dim<spaceDimension; dim++)
>     {
>     const double weighted = transformedGradient[dim];
>     magnitudeSquare += weighted * weighted;
>     }
>     
>   const double gradientMagnitude = vcl_sqrt( magnitudeSquare );
>
>   if( gradientMagnitude < m_GradientMagnitudeTolerance ) 
>     {
>     m_StopCondition = GradientMagnitudeTolerance;
>     this->StopOptimization();
>     return;
>     }
> #if 0
>   double scalarProduct = 0;
>
>   for(unsigned int i=0; i<spaceDimension; i++)
>     {
>     const double weight1 = transformedGradient[i];
>     const double weight2 = previousTransformedGradient[i];
>     scalarProduct += weight1 * weight2;
>     }
> #else
> #endif
>   
>   // If there is a direction change 
>   // if( scalarProduct < 0 )
>   // FIXME: what about maximizing?
>   if( m_Value > m_PreviousValue && m_CurrentIteration > 0 ) 
>     {
>       std::cout << "relaxing...." << std::endl;
>     m_CurrentStepLength *= m_RelaxationFactor;
>     }
>   
>   if( m_CurrentStepLength < m_MinimumStepLength )
>     {
>     m_StopCondition = StepTooSmall;
>     this->StopOptimization();
>     return;
>     }
>
>   double direction;
>   if( this->m_Maximize ) 
>     {
>     direction = 1.0;
>     }
>   else 
>     {
>     direction = -1.0;
>     }
>
>   const double factor = 
>     direction * m_CurrentStepLength;//FIXME: / gradientMagnitude;
>   std::cout << "gradientMagnitude: " << gradientMagnitude << std::endl;
>
>   // This method StepAlongGradient() will 
>   // be overloaded in non-vector spaces
>   this->StepAlongGradient( factor, transformedGradient );
>
>
>
>   this->InvokeEvent( IterationEvent() );
>
> }
>
> /**
>  * Advance one Step following the gradient direction
>  * This method will be overrided in non-vector spaces
>  */
> void
> RegularStepGradientDescentOptimizer2
> ::StepAlongGradient( double factor, 
>                      const DerivativeType & transformedGradient )
> { 
>
>   itkDebugMacro(<<"factor = " << factor << "  transformedGradient= " << transformedGradient );
>
>   const unsigned int spaceDimension =
>     m_CostFunction->GetNumberOfParameters();
>
>   ParametersType newPosition( spaceDimension );
>   ParametersType currentPosition = this->GetCurrentPosition();
>
>   for(unsigned int j=0; j<spaceDimension; j++)
>     {
>     newPosition[j] = currentPosition[j] + transformedGradient[j] * factor;
>     }
>
>   itkDebugMacro(<<"new position = " << newPosition );
>
>   this->SetCurrentPosition( newPosition );
>
> }
>
> void
> RegularStepGradientDescentOptimizer2
> ::PrintSelf( std::ostream& os, Indent indent ) const
> {
>   Superclass::PrintSelf(os,indent);
>   os << indent << "MaximumStepLength: "
>      << m_MaximumStepLength << std::endl;
>   os << indent << "MinimumStepLength: "
>      << m_MinimumStepLength << std::endl;
>   os << indent << "RelaxationFactor: "
>      << m_RelaxationFactor << std::endl;
>   os << indent << "GradientMagnitudeTolerance: "
>      << m_GradientMagnitudeTolerance << std::endl;
>   os << indent << "NumberOfIterations: "
>      << m_NumberOfIterations << std::endl;
>   os << indent << "CurrentIteration: "
>      << m_CurrentIteration   << std::endl;
>   os << indent << "Value: "
>      << m_Value << std::endl;
>   os << indent << "Maximize: "
>      << m_Maximize << std::endl;
>   if (m_CostFunction)
>     {
>     os << indent << "CostFunction: "
>        << &m_CostFunction << std::endl;
>     }
>   else
>     {
>     os << indent << "CostFunction: "
>        << "(None)" << std::endl;
>     }
>   os << indent << "CurrentStepLength: "
>      << m_CurrentStepLength << std::endl;
>   os << indent << "StopCondition: "
>      << m_StopCondition << std::endl;
>   os << indent << "Gradient: "
>      << m_Gradient << std::endl;
> }
> } // end namespace itk
>
> #endif
>   
> ------------------------------------------------------------------------
>
> /*=========================================================================
>
>   Program:   Insight Segmentation & Registration Toolkit
>   Module:    $RCSfile: itkRegularStepGradientDescentOptimizer2.h,v $
>   Language:  C++
>   Date:      $Date: 2005/03/28 20:43:22 $
>   Version:   $Revision: 1.17 $
>
>   Copyright (c) Insight Software Consortium. All rights reserved.
>   See ITKCopyright.txt or http://www.itk.org/HTML/Copyright.htm for details.
>
>      This software is distributed WITHOUT ANY WARRANTY; without even 
>      the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
>      PURPOSE.  See the above copyright notices for more information.
>
> =========================================================================*/
> #ifndef __itkRegularStepGradientDescentOptimizer2_h
> #define __itkRegularStepGradientDescentOptimizer2_h
>
> #include "itkSingleValuedNonLinearOptimizer.h"
>
> namespace itk
> {
>   
> /** \class RegularStepGradientDescentOptimizer2
>  * \brief Implement a gradient descent optimizer
>  *
>  * \ingroup Numerics Optimizers
>  */
> class ITK_EXPORT RegularStepGradientDescentOptimizer2 : 
>     public SingleValuedNonLinearOptimizer
> {
> public:
>   /** Standard "Self" typedef. */
>   typedef RegularStepGradientDescentOptimizer2      Self;
>   typedef SingleValuedNonLinearOptimizer               Superclass;
>   typedef SmartPointer<Self>                           Pointer;
>   typedef SmartPointer<const Self>                     ConstPointer;
>   
>   /** Method for creation through the object factory. */
>   itkNewMacro(Self);
>   
>   /** Run-time type information (and related methods). */
>   itkTypeMacro( RegularStepGradientDescentOptimizer2, 
>                 SingleValuedNonLinearOptimizer );
>   
>
>   /** Codes of stopping conditions. */
>   typedef enum {
>     GradientMagnitudeTolerance = 1,
>     StepTooSmall = 2,
>     ImageNotAvailable = 3,
>     SamplesNotAvailable = 4,
>     MaximumNumberOfIterations = 5
>   } StopConditionType;
>
>   /** Specify whether to minimize or maximize the cost function. */
>   itkSetMacro( Maximize, bool );
>   itkGetConstReferenceMacro( Maximize, bool );
>   itkBooleanMacro( Maximize );
>   bool GetMinimize( ) const
>   { return !m_Maximize; }
>   void SetMinimize(bool v)
>   { this->SetMaximize(!v); }
>   void    MinimizeOn(void) 
>   { SetMaximize( false ); }
>   void    MinimizeOff(void) 
>   { SetMaximize( true ); }
>
>   /** Start optimization. */
>   void    StartOptimization( void );
>
>   /** Resume previously stopped optimization with current parameters.
>    * \sa StopOptimization */
>   void    ResumeOptimization( void );
>
>   /** Stop optimization.
>    * \sa ResumeOptimization */
>   void    StopOptimization( void );
>
>   /** Set/Get parameters to control the optimization process. */
>   itkSetMacro( MaximumStepLength, double );
>   itkSetMacro( MinimumStepLength, double );
>   itkSetMacro( RelaxationFactor, double );
>   itkSetMacro( NumberOfIterations, unsigned long );
>   itkSetMacro( GradientMagnitudeTolerance, double );
>   itkGetConstReferenceMacro( CurrentStepLength, double);
>   itkGetConstReferenceMacro( MaximumStepLength, double );
>   itkGetConstReferenceMacro( MinimumStepLength, double );
>   itkGetConstReferenceMacro( RelaxationFactor, double );
>   itkGetConstReferenceMacro( NumberOfIterations, unsigned long );
>   itkGetConstReferenceMacro( GradientMagnitudeTolerance, double );
>   itkGetConstMacro( CurrentIteration, unsigned int );
>   itkGetConstReferenceMacro( StopCondition, StopConditionType );
>   itkGetConstReferenceMacro( Value, MeasureType );
>   itkGetConstReferenceMacro( Gradient, DerivativeType );
>   
>   
> protected:
>   RegularStepGradientDescentOptimizer2();
>   virtual ~RegularStepGradientDescentOptimizer2() {};
>   void PrintSelf(std::ostream& os, Indent indent) const;
>
>   /** Advance one step following the gradient direction
>    * This method verifies if a change in direction is required
>    * and if a reduction in steplength is required. */
>   virtual void AdvanceOneStep( void );
>
>   /** Advance one step along the corrected gradient taking into
>    * account the steplength represented by factor.
>    * This method is invoked by AdvanceOneStep. It is expected
>    * to be overrided by optimization methods in non-vector spaces
>    * \sa AdvanceOneStep */
>   virtual void StepAlongGradient( 
>     double,
>     const DerivativeType&);
>
>
> private:  
>   RegularStepGradientDescentOptimizer2(const Self&); //purposely not implemented
>   void operator=(const Self&);//purposely not implemented
>
> protected:
>   DerivativeType                m_Gradient; 
>   DerivativeType                m_PreviousGradient; 
>
>   bool                          m_Stop;
>   bool                          m_Maximize;
>   MeasureType                   m_Value;
>   MeasureType                   m_PreviousValue;
>   double                        m_GradientMagnitudeTolerance;
>   double                        m_MaximumStepLength;
>   double                        m_MinimumStepLength;
>   double                        m_CurrentStepLength;
>   double                        m_RelaxationFactor;
>   StopConditionType             m_StopCondition;
>   unsigned long                 m_NumberOfIterations;
>   unsigned long                 m_CurrentIteration;
>
>
> };
>
> } // end namespace itk
>
>
>
> #endif
>
>
>
>