function [nll,g,H] = tvacost(theta,theta_fix,tvamodel,tvadata,vcf,shutup)
% TVACOST
%
%  Synopsis
%  ========
%
%  [nll,g,H] = tvacost(theta,theta_fix,tvamodel,tvadata)
%  [nll,g,H] = tvacost(theta,theta_fix,tvamodel,tvadata,vcf)
%  [nll,g,H] = tvacost(theta,theta_fix,tvamodel,tvadata,vcf,shutup)
%
%  -- Author: Mads Dyrholm --
%     Center for Visual Cognition, University of Copenhagen.
%     2009 - July 2011
%
%  Purpose
%  ======= 
%
%  Compute the cost function, gradient, and Hessian, 
%  for optimization purposes.
%
%  Inputs
%  ======
%
%  theta - Parameter vector. Note: TVACOST only returns gradient
%  and Hessian for what is in theta, hence, theta should not 
%  be stripped if one is optimizing guessing constants. 
%  (see also TVASTRIPTHETA, TVAFLEXCHAIN)
%
%  theta_fix - Fixed parameters, can be []. (see also TVAFIXER)
%
%  tvamodel - Model struct. (see also TVAINIT)
%
%  tvadata - Dataset. (see also TVALOADER)
%
%  vcf - (Optional) This must be set to 1 when TVACOST is used
%  for optimization (default). When set to 0, the gradient
%  and Hessian outputs are derived in the human domain.
%
%  shutup - (Optional) Set this to 1 to disable the default
%  behavior of writing the cost function to the screen.
%
%  Outputs
%  =======
%  
%  nll - Negative log likelihood.
%
%  g - Gradient vector i.e. d_nll/d_theta.
%
%  H - Hessian matrix of second order derivatives d^2_nll/d_theta^2

if nargin<6, shutup = 0; end
if nargin<5, vcf = 1; end

% make theta as composed by theta and theta_fix
[theta,nanfix] = tvathetacombine(theta,theta_fix);
theta=theta(:);
dimtheta = length(theta);

% is there a CHDET guessing model?
if isfield(tvamodel,'chdetgm')
  gm = tvamodel.chdetgm;
  %
  %  gm = []        , chdet1 with pgy in theta
  %  gm = 2         , chdet2 with [pgy,pgn] in theta
  %  gm = [pgy]     , chdet1
  %  gm = [pgy,pgn] , chdet2
  %
  % figure out the guessing model
  switch length(gm)
   case 2
    pgy = gm(1);
    pgn = gm(2);
    chdetmode = 2;
   case 1
    if gm==2
      pgy = 1/(1+exp(-theta(end-1)));
      pgn = 1/(1+exp(-theta(end)));
      chdetmode = 3;      
      gpg = 0;
      Hpg = 0;
      Hpgx = 0;
    else
      pgy = gm;
      chdetmode = 0;
    end
   case 0 
    pgy = 1/(1+exp(-theta(end)));
    chdetmode = 1;
    gpgy = 0;
    Hpgy = 0;
    Hpgy2 = 0;    
   otherwise
    error('Invalid gm');
  end
else
  chdetmode = [];
end

[striptheta,tvamodel] = tvastriptheta(theta,tvamodel);

% allocate and init output
nll = 0;
g = 0; 
H = 0; 

% loop over trials
N = length(tvadata);
[dum,logistidx,expidx] = tvahuman(striptheta,tvamodel);
for n=1:N
  trial = tvadata{n};
  % compute the VSTM probability and derivatives
  [PE,g1,H1] = tvaflexchain(striptheta,tvamodel,trial,logistidx,expidx,vcf);
  % contrib to negative log likelihood according to task
  switch trial.task
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   case 'CD'
    if (trial.response<0) continue; end % no response given
    % delta
    if trial.change<2
      delta_c = trial.change;
      delta_nc = 1-trial.change;
    else
      delta_c = nan;
      delta_nc= nan;
    end
    switch chdetmode
     case {0,1} 
      %%%%%%%%%%%%%%%%%%%%%%%%%%%
      % ChDetI 
      % neg log lik
      l = delta_c*PE + (1-PE)*pgy;
      if (trial.response==0), l = 1-l; end
      nll = nll - log(l);
      % neg gradient
      dl_dp = delta_c-pgy;
      if (trial.response==0), dl_dp = -dl_dp; end
      kappa = dl_dp / l;
      tmp1 = kappa * g1;
      g = g - tmp1;
      % Hessian
      H = H - kappa * (H1 - kappa*g1*g1');
      if chdetmode==1
	% ChDetI with pgy in theta
	% wrt pgy
	dlogl_dpgy = (-1)^trial.response*(PE-1) / l;
	gpgy = gpgy - dlogl_dpgy;
	Hpgy = Hpgy + dlogl_dpgy*tmp1;
	Hpgy2 = Hpgy2 + dlogl_dpgy^2;
      end
     case {2,3} 
      %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      % ChDetII 
      % neg log lik
      switch trial.response
       case 0
	% M or CR
	l = delta_nc*PE + (1-PE)*pgn;
	dl_dp = delta_nc-pgn;
	% d wrt pgn and pgy
	if chdetmode==3
	  dlogl_dpgy = 0;
	  dlogl_dpgn = (1-PE)/l;
	end
       case 1
	% H or FA
	l = delta_c*PE + (1-PE)*pgy;
	dl_dp = delta_c - pgy;
	% d wrt pgn and pgy
	if chdetmode==3
	  dlogl_dpgy = (1-PE)/l;
	  dlogl_dpgn = 0;	   
	end
       case 2
	% ?C|C or ?C|noC
	l = 1 - PE - (1-PE)*pgy - (1-PE)*pgn;
	dl_dp = -1 + pgy + pgn;
	% d wrt pgn and pgy
	if chdetmode==3
	  dlogl_dpgy = -(1-PE)/l;
	  dlogl_dpgn = -(1-PE)/l;
	end
      end
      nll = nll - log(l);
      % neg gradient
      kappa = dl_dp / l;
      dlogl_dtheta = kappa * g1;
      g = g - dlogl_dtheta;
      % Hessian
      H = H - kappa * (H1 - dlogl_dtheta*g1');
      
      %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
      % ChDetII with [pgy;pgn] in theta
      if chdetmode==3
	% Hessian wrt pgy and pgn
	tmp = [dlogl_dpgy;dlogl_dpgn];
	gpg = gpg - tmp;
	Hpg = Hpg + tmp*tmp';
	Hpgx = Hpgx + dlogl_dtheta*tmp';	
      end
    end
    %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
   case {'WR','PR'}
    % negative log likelihood
    l = PE;
    logl = log(l);
    nll = nll - logl;
    kappa = 1 / l;
    % gradient
    g = g - kappa * g1;
    % Hessian
    H = H - kappa * (H1 - kappa*g1*g1');
  end
end

if chdetmode==1
  % variable change pgy
  if vcf
    dadu = pgy*(1-pgy);
    gpgy = dadu*gpgy;
    Hpgy = dadu*Hpgy;
    Hpgy2 = dadu^2*Hpgy2;
  end
  % gather
  g = [g;gpgy];
  H = [H,Hpgy;Hpgy',Hpgy2];
end

if chdetmode==3
  % variable change pgy and pgn
  if vcf
    dadu = [pgy;pgn].*(1-[pgy;pgn]);
    gpg = dadu.*gpg;
    Hpg = (dadu*dadu').*Hpg;
    Hpgx = Hpgx .* repmat(dadu',[dimtheta-2,1]);
  end
  % gather
  g = [g;gpg];
  H = [H,Hpgx;Hpgx',Hpg];
end

% get the adaptive part
if ~isempty(theta_fix)
  g = g(nanfix);
  H = H(nanfix,nanfix);
end

if ~shutup, fprintf('tvacost: %f\n',nll); end
