function ...
    [lnLR_test, sigma2_target, lnLR_s_train, lnLR_d_train] = ...
    biGaussianized_calibration ...
        (scores_test, scores_s_train, scores_d_train, methods, ...
         Cllr_to_sigma2_coefs, logreg_regularization_coefs, I_cdf_plot, I_map_plot)

% function ...
%     [lnLR_biGaus_test, sigma2_target, Cllr_target, lnLR_logreg_test, ...
%      lnLR_s_biGaus_train, lnLR_d_biGaus_train, lnLR_s_logreg_train, lnLR_d_logreg_train] = ...
%     biGaussianized_calibration ...
%         (scores_test, scores_s_train, scores_d_train, ...
%          Cllr_to_sigma2_coefs, logreg_regularization_coefs)
%
% function that performs bi-Gaussianized calibration (EER, LogReg, and KDE methods) and LogReg calibration
%   one or more of these methods can be requested
%
% available from: 
%   https://forensic-data-science.net/calibration-and-validation/#biGauss
%
% see: 
%   Morrison G.S. (2023). Bi-Gaussianized calibration of likelihood ratios. Manuscript submitted for publication. 
%   Preprint at https://geoff-morrison.net/#biGauss2023
%
% Needed functions are provided in the accompanying "functions" folder
%   These include functions from, or adpated from, Niko Brümmer's FoCal Toolkit.
% 
% version 2024-02-03a
% 
% tested on Matlab R2023b
% 
% Geoffrey Stewart Morrison 
% http://geoff-morrison.net/
%
% INPUTS:
%   scores_test:                    scalar or vector    scores to be calibrated
%   scores_s_train:                 vector              same-source scores for training
%   scores_d_train:                 vector              different-source scores for training 
%                                                           any -Inf scores will be converted to a point mass at the lowest non -Inf score
%   methods:                        string array        at least one of ["biGauss_EER", "biGauss_LogReg", "biGauss_KDE", "LogReg"]
%   Cllr_to_sigma2_coefs:           vector              coefficient values [b c] for Cllr_to_sigma2 function 
%                                                           optional, default vaules are specified within the Cllr_to_sigma2 function
%   logreg_regularization_coefs:    vector              coefficient values [kappa df] for train_llr_fusion_regularized function 
%                                                           optional, default values are specified in the "logistic-regression calibration" section below
%   I_cdf_plot:                     scaler or vector    indices of methods for which to plot cumulative density functions
%                                                           values correspond to order methods are named in "methods" input argument
% OUTPUTS:
%   lnLR_test:      matrix      bi-Gausianized-calibrated natural-log likelihood ratios corresponding to scores_test
%                                   columns correspond to methods in order given in input argument "methods"
%                                   rows correspond to test scores
%   sigma2_target:  scalar      variance of perfectly-calibrated bi-Gaussian system
%                                   elements corrspond to methods in order given in input argument "methods"
%   lnLR_s_train:   matrix      bi-Gausianized-calibrated natural-log likelihood ratios corresponding to scores_s_train
%                                   columns correspond to methods in order given in input argument "methods"
%                                   rows correspond to test scores
%   lnLR_d_train:   matrix      bi-Gausianized-calibrated natural-log likelihood ratios corresponding to scores_d_train
%                                   columns correspond to methods in order given in input argument "methods"
%                                   rows correspond to test scores
%


% check which methods have been requested and get method indices
I_biG_EER = find(matches(methods,'biGauss_EER',IgnoreCase=true));
I_biG_LogReg = find(matches(methods,'biGauss_LogReg',IgnoreCase=true));
I_biG_KDE = find(matches(methods,'biGauss_KDE',IgnoreCase=true));
I_LogReg = find(matches(methods,'LogReg',IgnoreCase=true));

I_methods = [I_biG_EER, I_biG_LogReg, I_biG_KDE, I_LogReg];

if isempty(I_methods)
    error(['Error calling biGaussianized_calibration ', ...
        '\n4th argument must be a string array including at least one of the following: ', ...
        '\n\t["biGauss_EER", "biGauss_LogReg", "biGauss_KDE", "LogReg"] ', ...
        '\nIf specifying more than one method, use square brackets and double quotation marks.%s'], '')
end

num_methods = length(I_methods);

I_biG_methods = [I_biG_EER, I_biG_LogReg, I_biG_KDE];
num_biG_methods = length(I_biG_methods);

% check whether ouput of calibrated training data is requested
if nargout > 2
    output_lnLR_train = true;
else
    output_lnLR_train = false;
end

% check whether cdf plot is requested
if nargin > 6 && ~isempty(I_cdf_plot)
    plot_cdf = true;
else
    plot_cdf = false;
end

% check whether mapping plot is requested
if nargin > 7 && ~isempty(I_map_plot)
    plot_mapping = true;
else
    plot_mapping = false;
end


% make sure scores_test, scores_s_train, and scores_d_train are row vectors
isrow_scores_test_input = isrow(scores_test);
if ~isrow_scores_test_input
    scores_test_input_shape = size(scores_test);
    scores_test = scores_test(:)';
end
isrow_scores_s_train_input = isrow(scores_s_train);
if ~isrow_scores_s_train_input
    scores_s_train_input_shape = size(scores_s_train);
    scores_s_train = scores_s_train(:)';
end
isrow_scores_d_train_input = isrow(scores_d_train);
if ~isrow_scores_d_train_input
    scores_d_train_input_shape = size(scores_d_train);
    scores_d_train = scores_d_train(:)';
end

% prepare training data

% amount of training data
num_s_train = length(scores_s_train);
num_d_train = length(scores_d_train);
num_train = num_s_train + num_d_train;

% prepare any -Inf score values in the different-source training scores
II_negInf_d_train = scores_d_train == -Inf;
if sum(II_negInf_d_train)>0
    point_mass = true;

    % replace any -Inf in training scores with lowest non -Inf score value [this assumes there are no -Inf in scores_s_train]
    scores_d_train(II_negInf_d_train) = min(scores_d_train(~II_negInf_d_train));

    % version of different-source training scores excluding any -Inf
    num_negInf_d_train = sum(II_negInf_d_train);
    num_d_train_ex_negInf = num_d_train - num_negInf_d_train;
    scores_d_train_ex_negInf = scores_d_train;
    scores_d_train_ex_negInf(II_negInf_d_train) = [];

    scores_train_ex_negInf = [scores_d_train_ex_negInf, scores_s_train];
    [scores_train_ex_negInf_sorted, ID_sorted_ex_negInf] = sort(scores_train_ex_negInf);
    
else
    point_mass = false;
end

% sort training scores
scores_train = [scores_d_train, scores_s_train];
[scores_train_sorted, ID_sorted] = sort(scores_train);


% prepare test data

% amount of test data
num_test = length(scores_test);

% convert any -Inf test scores to lowest value non -Inf score
II_negInf_test = scores_test == -Inf;
if sum(II_negInf_test)>0
    min_score_test = min(scores_test(~II_negInf_test));
    scores_test(II_negInf_test) = min([min_score_test, scores_train_sorted(1)]);
end

% if scores_test values are above max or below min values of scores_train_sorted, cdf_test values can be above 1 or below 0
% to resolve this, reset such scores_test values to max or min values of scores_train_sorted
scores_test_limited = scores_test;
scores_test_limited(scores_test > scores_train_sorted(end)) = scores_train_sorted(end);
scores_test_limited(scores_test < scores_train_sorted(1)) = scores_train_sorted(1);    


% empirical cdfs

% empirical cdf for training data giving equal weigthing to same-source and different-source categories
props_s = (ones(1,num_s_train)/(num_s_train+1))/2; % add 1 in denominator of empirical cdf so will not have to extrapolate to 1 for gmm cdf
if point_mass
    props_d = (ones(1,num_d_train_ex_negInf)/(num_d_train+1))/2;
    props = [props_d, props_s];
    props_sorted = props(ID_sorted_ex_negInf);    
    point_mass_prop = (num_negInf_d_train/(num_d_train+1))/2; % proportion for point mass corresponding to -Inf scores
    props_sorted(1) = props_sorted(1) + point_mass_prop; % add point mass at location of lowest value non -Inf score
else
    props_d = (ones(1,num_d_train)/(num_d_train+1))/2;
    props = [props_d, props_s];
    props_sorted = props(ID_sorted);
end
cdf_empirical = cumsum(props_sorted);

% interpolate cdf values for test scores
% in case there are repeated score values, convert scores to unique values, 
% and use cdf of highest value with each unique score value
if point_mass
    [scores_train_ex_negInf_sorted_unique, ID_unique] = unique(scores_train_ex_negInf_sorted,'last');
    cdf_empirical_unique = cdf_empirical(ID_unique);
    cdf_test = interp1(scores_train_ex_negInf_sorted_unique, cdf_empirical_unique, scores_test_limited, 'linear'); 
else
    [scores_train_sorted_unique, ID_unique] = unique(scores_train_sorted,'last');
    cdf_empirical_unique = cdf_empirical(ID_unique);
    cdf_test = interp1(scores_train_sorted_unique, cdf_empirical_unique, scores_test_limited, 'linear'); 
end

% cdf values for train scores
if output_lnLR_train || plot_cdf || plot_mapping
    if point_mass 
        cdf_train = [ones(1,num_negInf_d_train)*cdf_empirical(1), cdf_empirical];
    else
        cdf_train = cdf_empirical; 
    end
end


% biGauss calibration methods

% for each method, calculate EER/Cllr then sigma2_target
% also perform LogReg calibration 

sigma2_target = NaN(1,num_methods);

% EER method
if ~isempty(I_biG_EER)
    II_s_train = [zeros(1,num_d_train), ones(1,num_s_train)];
    II_s_train_sorted = II_s_train(ID_sorted);
    II_d_train_sorted = 1-II_s_train_sorted;
    error_rate_s_train_sorted = (cumsum(II_s_train_sorted)/num_s_train); % false-alarm rate
    error_rate_d_train_sorted = 1-(cumsum(II_d_train_sorted)/num_d_train); % miss rate
    
    I_eer = find(error_rate_d_train_sorted < error_rate_s_train_sorted, 1);
    if isempty(I_eer)
        FAR = 1/num_d_train;
        MR = 1/num_s_train;
    else
        FAR = max([error_rate_d_train_sorted(I_eer-1), 1/num_d_train]);
        MR = max([error_rate_s_train_sorted(I_eer-1), 1/num_s_train]);
    end
    EER_target = mean([FAR, MR]);
    
    % sigma2 of perfectly-calibrated bi-Gaussian system with the same EER
    sigma2_target(I_biG_EER) = EER_to_sigma2(EER_target);
end

% LogReg method
if ~isempty(I_biG_LogReg) || ~isempty(I_LogReg)
    if nargin < 6 || isempty(logreg_regularization_coefs)
        kappa = 0.01;
        df = num_s_train;
    else
        kappa = logreg_regularization_coefs(1);
        df = logreg_regularization_coefs(2);
    end
    
    w = train_llr_fusion_regularized(scores_s_train, scores_d_train, 0.5, kappa, df);
    lnLR_d_LogReg_train = lin_fusion(w, scores_d_train);
    lnLR_s_LogReg_train = lin_fusion(w, scores_s_train);
    
    % Cllr of training data after LogReg calibration 
    Cllr_LogReg_target = cllr(lnLR_s_LogReg_train, lnLR_d_LogReg_train);
    
    % sigma2 of perfectly-calibrated bi-Gaussian system with the same Cllr
    if nargin < 5 || isempty(Cllr_to_sigma2_coefs)
        sigma2_target(I_biG_LogReg) = Cllr_to_sigma2(Cllr_LogReg_target); % use default coefficient values specified within Cllr_to_sigma2 function
    else
        sigma2_target(I_biG_LogReg) = Cllr_to_sigma2(Cllr_LogReg_target, Cllr_to_sigma2_coefs(1), Cllr_to_sigma2_coefs(2));
    end
end

% KDE method
if ~isempty(I_biG_KDE)
    KDE_d_train = fitdist(scores_d_train', 'Kernel');
    KDE_s_train = fitdist(scores_s_train', 'Kernel');
    lnLR_KDE = log(pdf(KDE_s_train, scores_train)) - log(pdf(KDE_d_train, scores_train));
    lnLR_d_KDE_train = lnLR_KDE(1:num_d_train);
    lnLR_s_KDE_train = lnLR_KDE(num_d_train+1:end);
    
    % Cllr of training data after KDE calibration 
    Cllr_KDE_target = cllr(lnLR_s_KDE_train, lnLR_d_KDE_train);
    
    % sigma2 of perfectly-calibrated bi-Gaussian system with the same Cllr
    if nargin < 5 || isempty(Cllr_to_sigma2_coefs)
        sigma2_target(I_biG_KDE) = Cllr_to_sigma2(Cllr_KDE_target);
    else
        sigma2_target(I_biG_KDE) = Cllr_to_sigma2(Cllr_KDE_target, Cllr_to_sigma2_coefs(1), Cllr_to_sigma2_coefs(2));
    end
end


% for each method, 
% calculate target cdf given sigma2 of perfectly calibrated biGauss system, 
% then map test scores to biGauss-calibrated lnLRs via cdfs

half_sigma2_target = sigma2_target/2;
sigma_target = sqrt(sigma2_target);

lnLR_test = NaN(num_test,num_methods);

if output_lnLR_train || plot_mapping
    lnLR_train_sorted = NaN(num_train,num_methods);
end

for I_method = I_biG_methods
    % GMM mixture of perfectly calibrated biGauss system
    gmm_target = gmdistribution([-half_sigma2_target(I_method);half_sigma2_target(I_method)], sigma2_target(I_method)); 

    % cdf of perfectly calibrated system biGauss system
    % assume range of lnLRs will be within mu_d-4*sigma and mu_s+4sigma
    lnLR_max = half_sigma2_target(I_method) + 4*sigma_target(I_method);
    lnLR_step = lnLR_max / (4*(num_train+1));
    lnLR_target_grid = (-lnLR_max : lnLR_step : lnLR_max)';

    cdf_target_grid = cdf(gmm_target, lnLR_target_grid);
    
    % convert target grid to unique values to avoid problems due to numerical constraints on calculation of cdf target values
    [cdf_target_grid_unique, ID_unique] = unique(cdf_target_grid);
    lnLR_target_grid_unique = lnLR_target_grid(ID_unique);
    
    % map test scores to biGauss-calibrated lnLRs via cdfs
    lnLR_test(:,I_method) = interp1(cdf_target_grid_unique, lnLR_target_grid_unique, cdf_test, 'linear', 'extrap');

    if output_lnLR_train || plot_mapping
        lnLR_train_sorted(:,I_method) = interp1(cdf_target_grid_unique, lnLR_target_grid_unique, cdf_train, 'linear', 'extrap');
    end

    % plot cdfs
    if plot_cdf && any(I_cdf_plot == I_method)
        plot_line_width = 1;
        plot_font_size = 12;
        figure;
        plot([0; 0], [0; 1], '-k')
        hold on
        h1 = plot(lnLR_target_grid, cdf_target_grid, '-b', 'LineWidth',plot_line_width);
        if point_mass
            h2 = plot(scores_train_ex_negInf_sorted, cdf_empirical, ':r', 'LineWidth',plot_line_width);
        else
            h2 = plot(scores_train_sorted, cdf_train, ':r', 'LineWidth',plot_line_width);
        end
        grid on
        xlabel('score or ln(\Lambda)', 'FontSize',plot_font_size);
        ylabel('cumulative probability', 'FontSize',plot_font_size);
        title(['\sigma target = ', num2str(sigma_target(I_method), '%0.2f')]);
        legend([h1,h2],{'target', 'empirical'}, 'Location','northwest')
    end

end

% output calibrated training scores
if output_lnLR_train
    % unsort and split into same-source and different-source sets
    ID_unsort(ID_sorted) = 1:length(ID_sorted);
    lnLR_train = lnLR_train_sorted(ID_unsort,:);
    lnLR_s_train = lnLR_train(num_d_train+1:end,:);
    lnLR_d_train = lnLR_train(1:num_d_train,:);
end

% add LogReg to results, and remove extra element from sigma2_target
if ~isempty(I_LogReg)
    lnLR_LogReg_test = lin_fusion(w, scores_test)';
    lnLR_test(:,I_LogReg) = lnLR_LogReg_test;

    sigma2_target(I_LogReg) = [];

    if output_lnLR_train
        lnLR_s_train(:,I_LogReg) = lnLR_s_LogReg_train';
        lnLR_d_train(:,I_LogReg) = lnLR_d_LogReg_train';
    end
end


% plot mapping functions
if plot_mapping
    plot_line_width = 1;
    plot_font_size = 12;
    figure;
    colororder({'k','r','b','g','m'});
    minmax_x = [scores_train_sorted(1); scores_train_sorted(end)];
    plot(minmax_x, [0; 0], 'LineWidth',plot_line_width/2);
    hold on
    legend_h = [];
    legend_text = {};
    for I_plot = I_map_plot
        if I_plot == I_LogReg
            h(I_plot) = plot(minmax_x, minmax([lnLR_d_LogReg_train, lnLR_s_LogReg_train]), 'LineWidth',plot_line_width);
        else
            h(I_plot) = plot(scores_train_sorted, lnLR_train_sorted(:,I_plot), 'LineWidth',plot_line_width);
        end
        legend_h = [legend_h,h(I_plot)];
        legend_text = [legend_text, methods(I_plot)];
    end
    xlabel('Score', 'FontSize',plot_font_size);
    ylabel('ln(\Lambda)', 'FontSize',plot_font_size);
    axis tight
    grid on
    legend(legend_h, legend_text, 'Interpreter','none', 'Location','northwest')
end


