function [dataset] = initialization(features, config, mdl)
% INPUT
%   [features] N x d
%   [config]   configuration 
%       - DO_ZSCORING : True or False (optional)
%       - METHOD      : Default 'cal'
%   [mdl] cell classifier
%
% OUTPUT
%   [dataset] a structure that contain fields
%       - features       : N x d
%       - labels_ex      : N x 1 human / expert labels
%       - labels_ml      : N x 1 cell classifier / ML labels 
%       - labels_ml_probs: N x 1 ML labels corresponding probablities being
%                          a cell
%       - mdl            : the classifier [mdl]

if config.DO_ZSCORING
    % replace infs with 0s
    infs = isinf(features);
    features(infs) = 0;
    features = zscore(features, 0, 1);
    features(infs) = 0;
end

N = size(features,1);
labels = zeros(N, 1); % label = 1 if is cell, = 0 if not labeled, = -1 if not cell

dataset.features = features;
dataset.labels_ex = labels; % expert / human labels
dataset.labels_ml = labels; % cell classifier / ML labels
dataset.labels_ml_prob = zeros(N,1); % probability associated with ml_labels of being a cell

dataset.balance = config.balance;

if nargin < 3
    dataset.mdl = NaN;
else
    dataset.mdl = mdl;
end
end