function [data,sim,sigma2,cidx,AMAX,folds] = preprocess(cell_lines,mode)

global Ncell;
global Ndrug;
global Ntis;
global Ncv;

T = readtable('CCLE_NP24.2009_Drug_data_2015.02.24.csv');

drugs = {'Erlotinib','Lapatinib','PD-0325901','PLX4720','RAF265','AZD6244','ZD-6474'};
if(strcmp(mode,'fit')||strcmp(mode,'val'))
    load('I_all')
    AMAX = NaN(length(I),length(drugs));
    for idrug = 1:length(drugs)
        didx = strcmp(T{:,3},drugs{idrug});
        for icl = 1:length(I)
            cidx = not(cellfun(@isempty,strfind(T{:,1},strrep(I{icl}.name,'TUMOR-',''))));
            assert(sum(cidx)>0)
            if(any(and(didx,cidx)))
                AMAX(icl,idrug) = (100+T{and(didx,cidx),12})/100;
            end
        end
    end
end
if(strcmp(mode,'tval'))
    load('I_val')
    AMAX = NaN(length(I),length(drugs));
    for idrug = 1:length(drugs)
        didx = strcmp(T{:,3},drugs{idrug});
        for icl = 1:length(I)
            cidx = not(cellfun(@isempty,strfind(T{:,1},strrep(I{icl}.name,'TUMOR-',''))));
            assert(sum(cidx)>0)
            if(any(and(didx,cidx)))
                AMAX(icl,idrug) = (100+T{find(and(didx,cidx),1),12})/100;
            end
        end
    end
end
clear cidx

data = NaN(length(I),length(drugs),8,Ncv,5);
sim = NaN(length(I),length(drugs),8,Ncv,5);

for icv = 1:Ncv
    sel_MS =  getSelMS(icv);
    
    for iMS = 1:length(sel_MS)
        load(['./results/' mode '_cv' num2str(icv) '_MS' num2str(sel_MS(iMS)) '.mat']);
%         P = permute(arrayfun(@(x) P{x}{x},1:length(P),'UniformOutput',false),[2,1]);
        conc = arrayfun(@(x) x,P{1}.conc(1:8),'UniformOutput',false);
        drug = arrayfun(@(x) x,1:7,'UniformOutput',false);
        conc = permute(conc,[3,2,1]);
        conc = repmat(conc,[size(P,1),size(drug,2),1]);
        drug = repmat(drug,[size(P,1),1,size(conc,3)]);
        P = repmat(P,[1,size(drug,2),size(conc,3)]);
        cidx{icv} = cellfun(@(x) find(strcmp(x.name,cellfun(@(y) y.name,I,'UniformOutput',false))),P(:,1,1));
        tmpdata = cell2mat(cellfun(@(x,y,z) [x.D(find(and(x.conc==z,x.drug==y))).Y,NaN(isempty(find(and(x.conc==z,x.drug==y))),1)],P,drug,conc,'UniformOutput',false));
        tmpsim = cell2mat(cellfun(@(x,y,z) [x.sim(find(and(x.conc==z,x.drug==y))),NaN(isempty(find(and(x.conc==z,x.drug==y))),1)],P,drug,conc,'UniformOutput',false));
        if(~strcmp(mode,'tval'))
            data(cidx{icv},:,:,icv,iMS) = tmpdata;
            sim(cidx{icv},:,:,icv,iMS) = tmpsim;
        else
            data(:,:,:,icv,iMS) = tmpdata;
            sim(:,:,:,icv,iMS) = tmpsim;
        end
        
    end
    
end
sigma2 = nansum(nansum(nansum((data-sim).^2,1),2),3)./nansum(nansum(nansum(~isnan(data),1),2),3);



end

function tissue = getTissue(P)
fid = fopen('TissueOrigin_CelllinesInExperimentalTable.csv');
scan = textscan(fid,'%s');
tissues = reshape(scan{1},[2,124]);
fclose(fid);
tmp = cellfun(@(x) tissues{2,find(strcmp(x.name(7:end),tissues(1,:)))},P,'UniformOutput',false);
tissue = strcmp(tmp,'BREAST')+2*strcmp(tmp,'LARGE-INTESTINE')+3*strcmp(tmp,'LUNG')+4*strcmp(tmp,'PANCREAS')+5*strcmp(tmp,'SKIN');
end