%% preprocess
global Ncell;
global Ndrug;
global Ntis;
global Ncv;

addpath(genpath('/data'))

Ncell = 5;
Ndrug = 7;
Ntis = 5;
Ncv = 5;

drugorder = [1,2,7,4,5,3,6];

try
    load('preprocessed.mat')
catch
    load('I_val.mat')
    cell_lines = cellfun(@(x) x.name,I,'UniformOutput',false);
    [data_test,sim_test] = preprocess(cell_lines,'tval');
    
    load('I_all.mat')
    cell_lines = cellfun(@(x) x.name,I,'UniformOutput',false);
    [data_fit,sim_fit,sigma2_fit,cidx,~,fold_fit] = preprocess(cell_lines,'fit');
    [data_val,sim_val] = preprocess(cell_lines,'val');
    save('preprocessed.mat','data_test','sim_test','data_fit','sim_fit','fold_fit','data_val','sim_val','sigma2_fit','cidx','cell_lines','I')
end

% xx = nanmax(data_fit(:,:,8,:,1),[],4);
% yy = AMAX;
% sigma = nanstd((xx(:)-yy(:)));
% filteridx = abs(xx-yy)<4*sigma;

tissues = {'BREAST','LARGE-INTESTINE','LUNG','PANCREAS','SKIN'};
drugs = {'Erlotinib','Lapatinib','PD0325901','PLX_{4720}','CHIR_{265}','Selumetinib','Vandetanib'};

fid = fopen('TissueOrigin_CelllinesInExperimentalTable.csv');
scan = textscan(fid,'%s');
tissues = reshape(scan{1},[2,124]);
fclose(fid);
tmp = cellfun(@(x) tissues{2,strcmp(x,tissues(1,:))},strrep(cell_lines,'TUMOR-',''),'UniformOutput',false);
tissue = strcmp(tmp,'BREAST')+2*strcmp(tmp,'LARGE-INTESTINE')+3*strcmp(tmp,'LUNG')+4*strcmp(tmp,'PANCREAS')+5*strcmp(tmp,'SKIN');
tissues = {'BREAST','LARGE-INTESTINE','LUNG','PANCREAS','SKIN'};

try
    load('mutations.mat')
catch
    muttable = readtable('./matthias/MUTTable.txt');
    
    mutidx = not(cellfun(@isempty,strfind(muttable{:,1},'MutAA')));
    mutations = [];
    mut_names = muttable{mutidx,1};
    for icl = 1:length(I)
        %if(~isempty(sim_fit.lines{icl})) % only look at those that were selected for fitting
        cellidx = find(strcmp([strrep(I{icl}.name,'-','_') '_cellline_01_01'],muttable.Properties.VariableNames));
        mutations = [mutations,muttable{mutidx,cellidx}];
        %end
    end
    save('mutations.mat','mutations','mut_names')
end

try
    load('m_sd_ccle')
catch
    T = readtable('CCLE_NP24.2009_Drug_data_2015.02.24.csv');
    conc = [2.5e-3,8e-3,2.5e-2,8e-2,2.5e-1,8e-1,2.53e-0,8e-0];
    drugalias = {'Erlotinib','Lapatinib','PD-0325901','PLX4720','RAF265','AZD6244','ZD-6474'};
    for icl = 1:length(I);
        for idrug = 1:length(drugalias)
            cidx = not(cellfun(@isempty,strfind(T{:,1},strrep(I{icl}.name,'TUMOR-',''))));
            didx = strcmp(T{:,3},drugalias{idrug});
            if(any(and(cidx,didx)))
                eval(['conc_ccle = [' T{find(and(cidx,didx)),5}{1} '];']);
                eval(['m_ccle(' num2str(icl) ',' num2str(idrug) ',find(ismember(conc,conc_ccle)))=[' T{find(and(cidx,didx)),6}{1} '];']);
                eval(['sd_ccle(' num2str(icl) ',' num2str(idrug) ',find(ismember(conc,conc_ccle)))=[' T{find(and(cidx,didx)),7}{1} '];']);
                ec50 = x502str(T{find(and(cidx,didx)),10});
                ic50 = x502str(T{find(and(cidx,didx)),11});
                eval(['ec50_ccle(' num2str(icl) ',' num2str(idrug) ')=[' ec50 '];']);
                eval(['ic50_ccle(' num2str(icl) ',' num2str(idrug) ')=[' ic50 '];']);
                m_ccle(icl,idrug,find(not(ismember(conc,conc_ccle)))) = NaN;
                sd_ccle(icl,idrug,find(not(ismember(conc,conc_ccle)))) = NaN;
            else
                m_ccle(icl,idrug,:) = NaN;
                sd_ccle(icl,idrug,:) = NaN;
            end
        end
    end
    
    ic50_ccle(ic50_ccle==0) = NaN;
    ic50_ccle(ic50_ccle==8) = NaN;
    ec50_ccle(ec50_ccle==0) = NaN;
    ec50_ccle(ec50_ccle==8) = NaN;
    
    m_ccle =  (100+m_ccle)/100;
    sd_ccle = sd_ccle/100;
    save('m_sd_ccle','m_ccle','sd_ccle','conc','ic50_ccle','ec50_ccle')
end

for idrug = 1:7
    ic50 = quantile(ic50_ccle(:,idrug),0.5);
    phys_conc(idrug) = find(conc>ic50,1,'first');
end

for idrug = 1:length(drugs)
    data_phys_conc_fit(:,idrug,1,:,:) = data_fit(:,idrug,phys_conc(idrug),:,:);
    sim_phys_conc_fit(:,idrug,1,:,:) = sim_fit(:,idrug,phys_conc(idrug),:,:);
    data_phys_conc_val(:,idrug,1,:,:) = data_val(:,idrug,phys_conc(idrug),:,:);
    sim_phys_conc_val(:,idrug,1,:,:) = sim_val(:,idrug,phys_conc(idrug),:,:);
    data_phys_conc_test(:,idrug,1,:,:) = data_test(:,idrug,phys_conc(idrug),:,:);
    sim_phys_conc_test(:,idrug,1,:,:) = sim_test(:,idrug,phys_conc(idrug),:,:);
end

function str = x502str(x50)
   if(iscell(x50))
       str = strrep(x50{1},'NA','NaN');
   else
       str = num2str(x50);
   end
end

