{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ " ** Figure 6: Machine-learning based detection of EwS and distinction from other sarcomas **\n", "\n", "Here, we are using a table (\"ML_input_features.xlsx\") containing all the features required as input, including features from global fragmentation, regional fragmentation & read depth, as well as features based on fragment coverage at EwS-specific DHSs. How these metrics were generated is outlined in the previous notebooks." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%%bash\n", "wget http://medical-epigenomics.org/papers/peneder2020_f17c4e3befc643ffbb31e69f43630748/data/ML_input_features.xlsx" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from scipy import stats\n", "import sys\n", "import os\n", "sys.path.insert(0, os.getcwd())\n", "import binary_classifier_considering_patients as binary_classifier\n", "import subprocess\n", "from datetime import datetime\n", "from sklearn.linear_model import LinearRegression\n", "import pickle" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# settings\n", "max_threads=10\n", "max_mem=100\n", "myseed=42\n", "metalearn=True\n", "run_baselearners=True\n", "skip_baselearn_if_present=True\n", "n_bootstrap_reps=10" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# This function is used in all ML experiments and calls the training and testing procedure for all 4 feature sets and the metalearner,\n", "# for a given set of samples and a given response\n", "# The actual training and testing is in \n", "def run_classification(name, traintestset, unclear_set, response, response_name,alternative_reference_for_unclears,alternative_response_name,\n", " n_bootstrap_reps=n_bootstrap_reps):\n", " \n", " if not os.path.exists(name):\n", " os.mkdir(name)\n", " os.chdir(name)\n", " \n", " for predictors_w_names_and_metalearnername in [(\"METALEARNER_fullx\",predictorset_fullx),\n", " (\"METALEARNER_1x\",predictorset_1x),\n", " (\"METALEARNER_0.1x\", predictorset_0point1x)]:\n", " \n", " predictors_w_names=predictors_w_names_and_metalearnername[1]\n", " metalearn_dirname=predictors_w_names_and_metalearnername[0]\n", " \n", " # saves the predictions of each classifier for metalearning\n", " p1_dict={\"traintestset\":pd.DataFrame(),\"unclearset\":pd.DataFrame()}\n", " predictornames_for_metalearning_w_mean_p1=[]\n", " # saves the prediction of each classifier - only using samples in the trainingset\n", " trainingsetbased_p1=pd.DataFrame()\n", "\n", " for predictorelem in predictors_w_names:\n", " predictors=predictorelem[1]\n", " predictor_name=predictorelem[0]\n", " print(\"Running for\",predictor_name)\n", " if not os.path.exists(predictor_name):\n", " os.mkdir(predictor_name)\n", " os.chdir(predictor_name)\n", "\n", " if run_baselearners and not (skip_baselearn_if_present and os.path.isfile(\"bestmodel_classification_out_of_sample_predictions_\"+name+\".csv\")):\n", " cols=(predictors+[\"sample\",\"patient\",response]+([alternative_reference_for_unclears] if not alternative_reference_for_unclears in predictors else []))\n", " \n", " # run the actual classification using the given set of features\n", " binary_classifier.run_classification(comparisonname=name,df=pd.concat([traintestset,unclear_set],axis=0)[cols],\n", " labelsamples=list(traintestset[\"sample\"]),\n", " unclearsamples=list(unclear_set[\"sample\"]),\n", " response=response,predictors=predictors, response_name=response_name,\n", " alternative_reference_for_unclears=alternative_reference_for_unclears,\n", " alternative_response_name=alternative_response_name,\n", " n_bootstrap_reps=n_bootstrap_reps,max_threads=max_threads,max_mem=max_mem)\n", "\n", " # save the resulting predictions of this classifier in the table such that it can be used by the meta-learner\n", " trainingsetbased_p1_thisclassifier=pd.read_csv(\"trainingset_based_predictions_for_metalearner.csv\")\n", " trainingsetbased_p1_thisclassifier=trainingsetbased_p1_thisclassifier.rename({x:predictor_name+x for x in trainingsetbased_p1_thisclassifier.columns if not x==\"sample\"},axis=1)\n", " if trainingsetbased_p1.empty:\n", " trainingsetbased_p1=trainingsetbased_p1_thisclassifier\n", " else:\n", " trainingsetbased_p1=pd.merge(trainingsetbased_p1,trainingsetbased_p1_thisclassifier,left_on=\"sample\",right_on=\"sample\",how=\"inner\")\n", " \n", " os.chdir('..')\n", "\n", " ## metalearning :\n", " if metalearn:\n", " metainput=pd.merge(pd.concat([traintestset,unclear_set]),trainingsetbased_p1,left_on=\"sample\",right_on=\"sample\",how=\"inner\")\n", " metapreds=[x for x in trainingsetbased_p1.columns if not x==\"sample\"]\n", "\n", " if not os.path.exists(metalearn_dirname):\n", " os.mkdir(metalearn_dirname)\n", " else:\n", " print(\"Metalearning already done. skipping\")\n", " continue\n", " os.chdir(metalearn_dirname)\n", " \n", " # run the actual classification using the predictions of the previously run classifiers as input features\n", " binary_classifier.run_classification(comparisonname=name,df=metainput,\n", " labelsamples=list(traintestset[\"sample\"]),\n", " unclearsamples=list(unclear_set[\"sample\"]),\n", " response=response,predictors=metapreds,\n", " response_name=response_name,\n", " alternative_reference_for_unclears=alternative_reference_for_unclears,\n", " alternative_response_name=alternative_response_name,\n", " n_bootstrap_reps=n_bootstrap_reps,max_threads=max_threads,\n", " metalearn=True)\n", " os.chdir('..')\n", " os.chdir('..')\n", " \n", " \n", "def run_w_shuffled_lables(workdir,traintestset,response,nr_outerouter_folds_start,nr_outerouter_folds_end,name):\n", " ### with shuffled labels:\n", " os.makedirs(workdir,exist_ok=True)\n", " os.chdir(workdir)\n", " os.makedirs(\"RANDOMIZED_labels_traintestset\",exist_ok=True)\n", " os.chdir(\"RANDOMIZED_labels_traintestset\")\n", "\n", " for i in range(nr_outerouter_folds_start,nr_outerouter_folds_end+1):\n", " np.random.seed(seed=myseed+1000+i)\n", " os.makedirs(str(i),exist_ok=True)\n", " os.chdir(str(i))\n", " shuffledtraintestset=traintestset.assign(**{response:np.random.permutation(traintestset[response].values)})\n", " unclear_set=df[~df[\"sample\"].isin(shuffledtraintestset[\"sample\"])]\n", " unclear_set=unclear_set.assign(**{response:np.nan})\n", " run_classification(name=name,traintestset=shuffledtraintestset,\n", " unclear_set=unclear_set,response=response,response_name=\"RANDOM\"+response,\n", " alternative_reference_for_unclears=\"is_genomic_tumor_evidence_available\",\n", " alternative_response_name=\"Genomic tumor evidence available\",n_bootstrap_reps=1)\n", " os.chdir(\"..\")\n", " os.chdir(\"../..\" if not workdir==\".\" else \"..\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# load data\n", "df=pd.read_excel(\"ML_input_features.xlsx\")\n", "\n", "# Define the feature-sets for each of the coverage levels 12x, 1x, and 0.1x\n", "# to keep things simple, lists containing the feature-sets are already pickled and just loaded here:\n", "with open(\"predictorset_fullx.pickle\", \"rb\") as fp:\n", " predictorset_fullx=pickle.load(fp)\n", "with open(\"predictorset_1x.pickle\", \"rb\") as fp:\n", " predictorset_1x=pickle.load(fp)\n", "with open(\"predictorset_0point1x.pickle\", \"rb\") as fp:\n", " predictorset_0point1x=pickle.load(fp)\n", "df_all=df.copy()\n", "df=df[df[\"Sample type\"]!=\"Non-EwS sarcoma\"]\n", "\n", "# Define control sets\n", "our_controls=df[df[\"sample\"].str.contains(\"Ctrl\")]\n", "crist_controls=df[df[\"sample\"].str.contains(\"EGAR\")]\n", "ulz_ctrls=df[df[\"sample\"].str.contains(\"NPH\")]\n", "controls=pd.concat([our_controls,crist_controls,ulz_ctrls],axis=0)\n", "non_ews_cancers=df[df[\"Sample type\"]==\"Non-EwS sarcoma\"]\n", "\n", "\n", "# Start the ML experiments\n", "if True:\n", " ##### Clinical evidence for tumor: YES vs. healthy CTRLs (seperately for each control set) #####\n", " for controlsetname, controlset in [(\"our_ctrls_only\",our_controls),(\"crist_ctrl_only\",crist_controls),(\"ulz_ctrl_only\",ulz_ctrls)]:\n", "\n", " os.makedirs(controlsetname,exist_ok=True)\n", " os.chdir(controlsetname)\n", " response=\"clinical data indicating presence of tumor (PET-SCAN, MRI, CT)\"\n", " clinical_evidence_yes=df[df[response]==\"yes\"]\n", " clinical_evidence_yes=clinical_evidence_yes.assign(**{response:1})\n", " controlset=controlset.assign(**{response:0})\n", " traintestset=pd.concat([clinical_evidence_yes,controlset],axis=0)\n", " unclear_set=df[~df[\"sample\"].isin(traintestset[\"sample\"])]\n", " unclear_set=unclear_set.assign(**{response:np.nan})\n", " name=\"Clinical_evidence_for_tumor_YES__vs__healthy_CTRLs\"\n", " run_classification(name=name,traintestset=traintestset,\n", " unclear_set=unclear_set,response=response,response_name=\"Clinical tumor evidence\",\n", " alternative_reference_for_unclears=\"is_genomic_tumor_evidence_available\",\n", " alternative_response_name=\"Genomic tumor evidence available\",\n", " n_bootstrap_reps=n_bootstrap_reps)\n", " os.chdir(\"..\")\n", "\n", "if False:\n", " ##### OUR vs CRISTIANO CTRLS #####\n", " response=\"is_crist_ctrl\"\n", " crist_controls1=crist_controls.assign(**{response:1})\n", " our_controls1=our_controls.assign(**{response:0})\n", " traintestset=pd.concat([our_controls1,crist_controls1],axis=0)\n", " unclear_set=df[~df[\"sample\"].isin(traintestset[\"sample\"])]\n", " unclear_set=unclear_set.assign(**{response:np.nan})\n", " name=\"is_crist_ctrl\"\n", " run_classification(name=name,traintestset=traintestset,\n", " unclear_set=unclear_set,response=response,response_name=\"Is cristiano et al ctrl\",\n", " alternative_reference_for_unclears=\"is_genomic_tumor_evidence_available\",\n", " alternative_response_name=\"Genomic tumor evidence available\")\n", "if False:\n", " ##### OUR vs ULZ CTRLS #####\n", " response=\"is_ulz_ctrl\"\n", " ulz_controls2=ulz_ctrls.assign(**{response:1})\n", " our_controls2=our_controls.assign(**{response:0})\n", " traintestset=pd.concat([our_controls2,ulz_controls2],axis=0)\n", " unclear_set=df[~df[\"sample\"].isin(traintestset[\"sample\"])]\n", " unclear_set=unclear_set.assign(**{response:np.nan})\n", " name=\"is_ulz_ctrl\"\n", " run_classification(name=name,traintestset=traintestset,\n", " unclear_set=unclear_set,response=response,response_name=\"Is ulz et al ctrl\",\n", " alternative_reference_for_unclears=\"is_genomic_tumor_evidence_available\",\n", " alternative_response_name=\"Genomic tumor evidence available\")\n", "\n", "if False:\n", " #### Diagnostic EwS vs Ctrls from this study\n", " response=\"is_diag_EwS\"\n", " diag=df[df[\"sample timepoint\"]==\"diagnosis\"]\n", " diag=diag.assign(**{response:1})\n", " controls_for_this_experiment=our_controls.assign(**{response:0})\n", " traintestset=pd.concat([diag,controls_for_this_experiment],axis=0)\n", " unclear_set=df[~df[\"sample\"].isin(traintestset[\"sample\"])]\n", " unclear_set=unclear_set.assign(**{response:np.nan})\n", " name=\"diagnostic_EwS_vs_healthy\"\n", " run_classification(name=name,traintestset=traintestset,\n", " unclear_set=unclear_set,response=response,response_name=\"Diagnostic EwS sample\",\n", " alternative_reference_for_unclears=\"is_genomic_tumor_evidence_available\",\n", " alternative_response_name=\"Genomic tumor evidence available\")\n", " os.chdir(\"..\")\n", " \n", "if False:\n", " ##### EwS vs non-EwS samples - both with genomic evidence for tumor ######\n", " response=\"is_ewing_w_gen_evidence_not_nonewingcancerinclEwslike_w_gen_evidence\"\n", " ews_genomic_evidence_yes=df[df[\"is_genomic_tumor_evidence_available\"]==1]\n", " ews_genomic_evidence_yes=ews_genomic_evidence_yes.assign(**{response:1})\n", " nonews_cancer=df_all[(df_all[\"sample\"].isin(non_ews_cancers))].assign(**{response:0})\n", " nonews_genomic_evidence_yes=nonews_cancer[nonews_cancer[\"is_genomic_tumor_evidence_available\"]==1]\n", " nonews_genomic_evidence_yes=nonews_genomic_evidence_yes.assign(**{response:0})\n", " traintestset=pd.concat([ews_genomic_evidence_yes,\n", " nonews_genomic_evidence_yes],axis=0)\n", " unclear_set=df[~df[\"sample\"].isin(traintestset[\"sample\"])]\n", " unclear_set=unclear_set.assign(**{response:np.nan})\n", " unclear_set=unclear_set.assign(**{\"dummy\":np.nan})\n", " name=\"ewing_w_gen_evidence_not_nonewingcancerinclEwslike_w_gen_evidence\"\n", " run_classification(name=name,traintestset=traintestset,\n", " unclear_set=unclear_set,response=response,response_name=\"Is EwS sample w. tumor ev., not other cancer w. tumor ev.\",\n", " alternative_reference_for_unclears=\"dummy\",\n", " alternative_response_name=\"No information available\")" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "## To summarize the performance of different classifiers in one plot:\n", "\n", "import sklearn\n", "from matplotlib import pyplot as plt\n", "from matplotlib.font_manager import FontProperties\n", "from sklearn.metrics import roc_curve, precision_recall_curve, auc,average_precision_score\n", "import pandas as pd\n", "from scipy import interp\n", "import numpy as np\n", "from collections import defaultdict\n", "import sys\n", "import glob\n", "from matplotlib import rc\n", "rc('font',**{'sans-serif':['Arial']})\n", "np.seterr(all='raise')\n", "\n", "\n", "def plot_ROC_curves(outname, response, csvname,n_bootstrap_its, featureset_paths_and_names,combine_controlsets=True,\n", "use_only_ulz_ctrls=False,\n", "use_only_cristiano_ctrls=False,\n", "use_only_our_ctrls=False,sort_by_AUC=True,restrict_to_these_testset_ews_samples=None):\n", " \n", " plt.gcf().set_size_inches(4,4)\n", "\n", " table_text=[]\n", " rownames=[]\n", " colors=[]\n", " tabledict={}\n", "\n", " color_base=[\"cadetblue\",\"coral\",\"mediumseagreen\",\"firebrick\",\"#9467bd\"]\n", " def get_color(name):\n", " if \"Global\" in name:\n", " return color_base[0]\n", " if \"DHS\" in name:\n", " return color_base[1]\n", " if \"depth\" in name:\n", " return color_base[2]\n", " if \"Regional\" in name:\n", " return color_base[3]\n", " if \"Meta\" in name:\n", " return color_base[4]\n", " else:\n", " return \"black\"\n", "\n", " for j in range(0,len(featureset_paths_and_names),2): # for every feature-set (folder name and label)\n", " tprs = []\n", " aucs = []\n", " mean_fpr = np.linspace(0, 1, 10000)\n", " mean_sensitivity_at_100spec=[]\n", " mean_sensitivity_at_95spec=[]\n", " name=(featureset_paths_and_names[j+1]).replace(\"__\",\"\\n\")\n", "\n", " parentname=csvname.replace(\"bestmodel_classification_out_of_sample_predictions_\",\"\").replace(\".csv\",\"\")\n", "\n", " print(featureset_paths_and_names[j])\n", " if combine_controlsets==False and use_only_our_ctrls==False:\n", " df_our=pd.DataFrame()\n", " else:\n", " df_our=pd.read_csv(\"our_ctrls_only/\"+parentname+\"/\"+featureset_paths_and_names[j]+\"/%s\"%(csvname))[[response,\"sample\"]+[\"%s_p1\"%(idx) for idx in range(n_bootstrap_its)]]\n", " if combine_controlsets==False and use_only_cristiano_ctrls==False:\n", " df_crist=pd.DataFrame()\n", " else:\n", " df_crist=pd.read_csv(\"crist_ctrl_only/\"+parentname+\"/\"+featureset_paths_and_names[j]+\"/%s\"%(csvname))[[response,\"sample\"]+[\"%s_p1\"%(idx) for idx in range(n_bootstrap_its)]]\n", " if combine_controlsets==False and use_only_ulz_ctrls==False:\n", " df_ulz=pd.DataFrame()\n", " else:\n", " df_ulz=pd.read_csv(\"ulz_ctrl_only/\"+parentname+\"/\"+featureset_paths_and_names[j]+\"/%s\"%(csvname))[[response,\"sample\"]+[\"%s_p1\"%(idx) for idx in range(n_bootstrap_its)]]\n", "\n", " if combine_controlsets==True: # here, the results from the different control sets are averaged in a meta-analysis approach\n", "\n", " # simply rename the columns of the cristiano and ulz datasets to higher iteration numbers to keep them apart from the other datasets\n", " df_crist.columns=[response,\"sample\"]+[\"%s_p1\"%(idx) for idx in range(n_bootstrap_its,n_bootstrap_its*2)]\n", " df_ulz.columns=[response,\"sample\"]+[\"%s_p1\"%(idx) for idx in range(n_bootstrap_its*2,n_bootstrap_its*3)]\n", " sampleset=set()\n", " for i in range(n_bootstrap_its*3):\n", " if i>=n_bootstrap_its*2:\n", " df=df_ulz\n", " elif i>=n_bootstrap_its:\n", " df=df_crist\n", " else:\n", " df=df_our\n", " testset=df.dropna(subset=[str(i)+\"_p1\"],axis=0) # keep only samples that were in the testset in this fold\n", " if restrict_to_these_testset_ews_samples:\n", " testset=testset[(testset[response]==0) | (testset[\"sample\"].isin(restrict_to_these_testset_ews_samples))]\n", " sampleset.update(set(testset[testset[response]==1][\"sample\"].values))\n", "\n", " # Get the roc curve and auc of this fold:\n", " #https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc_crossval.html#sphx-glr-auto-examples-model-selection-plot-roc-crossval-py\n", " fpr, tpr, thresholds = roc_curve([int(x) for x in testset[response].values], testset[str(i)+\"_p1\"].values)\n", " tprs.append(interp(mean_fpr, fpr, tpr))\n", " mean_sensitivity_at_100spec.append(interp(mean_fpr, fpr, tpr)[0])\n", " mean_sensitivity_at_95spec.append(max([tpr_val for tpr_val,fpr_val in zip(tpr,fpr) if fpr_val<0.05]))# interp(mean_fpr, fpr, tpr)[0])\n", "\n", " tprs[-1][0] = 0.0\n", " roc_auc = auc(fpr, tpr)\n", " aucs.append(roc_auc)\n", " elif combine_controlsets==False: # Here, the results from only on control set are used\n", " sampleset=set()\n", " df=df_our.append(df_crist,sort=True)\n", " df=df.append(df_ulz,sort=True)\n", " for i in range(n_bootstrap_its):\n", " testset=df.dropna(subset=[str(i)+\"_p1\"],axis=0) # keep only samples that were in the testset in this fold\n", " if restrict_to_these_testset_ews_samples:\n", " testset=testset[(testset[response]==0) | (testset[\"sample\"].isin(restrict_to_these_testset_ews_samples))]\n", " sampleset.update(set(testset[testset[response]==1][\"sample\"].values))\n", "\n", " # Get the roc curve and auc of this fold:\n", " fpr, tpr, thresholds = roc_curve([int(x) for x in testset[response].values], testset[str(i)+\"_p1\"].values)\n", " tprs.append(interp(mean_fpr, fpr, tpr))\n", " mean_sensitivity_at_100spec.append(interp(mean_fpr, fpr, tpr)[0])\n", " mean_sensitivity_at_95spec.append(max([tpr_val for tpr_val,fpr_val in zip(tpr,fpr) if fpr_val<0.05]))# interp(mean_fpr, fpr, tpr)[0])\n", " tprs[-1][0] = 0.0\n", " roc_auc = auc(fpr, tpr)\n", " aucs.append(roc_auc)\n", "\n", " # Average over all folds:\n", " mean_tpr = np.mean(tprs, axis=0)\n", " mean_tpr[-1] = 1.0\n", " mean_auc = auc(mean_fpr, mean_tpr)\n", " std_auc = np.std(aucs)\n", " CI_auc = \"%0.2f-%0.2f\"%(np.percentile(aucs,2.5),np.percentile(aucs,97.5))\n", " CI_sens_100_spec= \"%0.2f-%0.2f\"%(np.percentile(mean_sensitivity_at_100spec,2.5),np.percentile(mean_sensitivity_at_100spec,97.5))\n", "\n", " # plot the averaged ROC curve\n", " p=plt.plot(mean_fpr,mean_tpr, lw=2, color=get_color(name), alpha=.7)\n", " # and add statistics to the table\n", " this_table_text=[\"%0.2f (%s)\"%(mean_auc,CI_auc),\"%0.2f (%s)\"%(np.mean(mean_sensitivity_at_100spec), CI_sens_100_spec)]\n", " rownames.append(name)\n", " colors.append(get_color(name))\n", " tabledict[name]={\"text\":this_table_text,\"color\":get_color(name),\"rank\":1-mean_auc}\n", " print(np.mean(mean_sensitivity_at_95spec))\n", "\n", "\n", " plt.plot([0, 1], [0, 1], linestyle='--', lw=1.5, color='grey',\n", " label='Chance', alpha=.8)\n", " colwidth=0.2\n", " bbox=[0.51,0.02,0.537,0.4]\n", " if sort_by_AUC:\n", " tabledict={k:v for k,v in sorted(tabledict.items(),key=lambda item: item[1][\"rank\"])}\n", " table_text=[v[\"text\"] for v in tabledict.values()]\n", " rownames=list(tabledict.keys())\n", " colors=[v[\"color\"] for v in tabledict.values()]\n", "\n", " table=plt.table(cellText=table_text,rowLabels=[\"—\" for x in rownames],colLabels=[\"ROC\\nAUC\\n\\nmean (CI) \",\"Sens. at\\n100% spec.\\n\\nmean (CI) \"],rowColours=colors,rowLoc=\"center\",cellLoc=\"center\",bbox=bbox,colWidths=[0.25,0.25])\n", "\n", " table.auto_set_font_size(False)\n", " table.set_fontsize(7)\n", "\n", " cellDict= table.get_celld()\n", "\n", " # columns\n", " for i in [0,1]:\n", "\n", " # column labels\n", " cellDict[(0,i)].set_color(\"white\")\n", " cellDict[(0,i)].set_edgecolor(None)\n", " cellDict[(0,i)].set_linewidth(2)\n", " cellDict[(0,i)].set_alpha(1)\n", " cellDict[(0,i)].set_height(0.067)\n", " cellDict[(0,i)].set_text_props(weight=\"bold\",color=\"black\")\n", "\n", "\n", " for j in range(1,len(range(0,len(featureset_paths_and_names),2))+1): # rows\n", " # row labels:\n", " cellDict[(j,-1)].set_alpha(1)\n", " cellDict[(j,-1)].set_text_props(weight=\"bold\",color=\"white\")\n", " cellDict[(j,-1)].set_edgecolor(None)\n", " cellDict[(j,-1)].set_linewidth(0)\n", " cellDict[(j,-1)].set_color(\"white\")#(\"#f2f2f2\")\n", " cellDict[(j,-1)].set_height(0.03)\n", " cellDict[(j,-1)].set_text_props(weight=1000,color=colors[j-1],fontproperties=FontProperties(size=15))\n", "\n", " # entries\n", " cellDict[(j,i)].set_color(\"white\")#(\"#f2f2f2\")\n", " cellDict[(j,i)].set_edgecolor(None)\n", " cellDict[(j,i)].set_linewidth(0)\n", " cellDict[(j,i)].set_height(0.03)\n", " cellDict[(j,i)].set_alpha(1)#(0.9)\n", " cellDict[(j,i)].set_text_props(fontproperties=FontProperties(size=7))\n", "\n", " for cell in table._cells:\n", " table._cells[cell].set_edgecolor(None)\n", " table._cells[cell].set_linewidth(0)\n", "\n", "\n", " plt.xlim([-.01, 1])\n", " plt.ylim([-0, 1])\n", " plt.xlabel('False-positive fraction')\n", " plt.ylabel('True-positive fraction')\n", " plt.gca().spines['top'].set_visible(False)\n", " plt.gca().spines['right'].set_visible(False)\n", " plt.gca().set_aspect('equal')\n", " plt.tight_layout()\n", " plt.gcf().savefig(outname)\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "coverage_at_EwS_DHS\n", "0.7235142118863049\n", "global_fragment_size\n", "0.8863049095607235\n", "read_depth_5mb\n", "0.9397071490094746\n", "regional_fragmentation_5mb\n", "0.9698535745047372\n", "METALEARNER_fullx\n", "0.9698535745047372\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAASAAAAERCAYAAADbpXqIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO2deXiU1dn/P3cSkrAkEEjCGgiLgKyyhEUpoSzWpYhtta8vWkVpA6K4vlrXn7UutC9VKSgCWmpdKmJd0L5VxAVRArKjTtiCCMi+Bghkv39/zEwcQpZJMmtyf65rrszM8zxnvmcy851z7nPuc0RVMQzDCAYRwRZgGEb9xQzIMIygYQZkGEbQMAMyDCNomAEZhhE0zIAMwwgafjMgERksIkvLeX6siKwWkRUi8jt/vb5hGKGP+GMekIjcC/wGyFXVIR7PNwA2AWlALrAc+LmqHvC5CMMwQh5/tYC2A78s5/nzgWxVPaaqBcCXwHA/aTAMI8SJ8kehqvqWiKSWcygeyPF4fBJoWl4ZIpIBZAD06NFjgMPh8LFKoyJemPIcxXkNkMgiEB8WrFBQXAz4tlgjcOQ3yudMfB4PPfmQT/6FfjGgSjgBxHk8jgOOl3eiqs4D5gEMHDjQ8kUCSYnzz/BbfsL5F/T2WbEnzpzhb8sziY+NZeKwi3xWrhEYcnNzWbBgAb1LTviszECPgm0CzhOR5iISjbP7tSLAGowqUHX+uEVFN/BpuSWunxGx9k9Y0rhxY6644gpGpST4rMyAtIBEZDzQRFXnichdwGKc5jdfVfcEQoO/2OU4wrbVBwi1nN5T339PwZHDNbu4qAGgRMX41oAU55sk5j9hhcPhICoqim7dupGUlAQRvvsH+s2AVPV7YIjr/j89nn8feN9frxto9mw9Rl5uYbBlnEPuwWNoSQ0/KCUlqJym0df/gc3RvhNVDJxsgEQoHP/Gd+UafsNx9AzL9+eSGhdN121xiAicKjdqUiMCHQOqMSUlJexZvYWivIJgSzmLE3tOU3iqhD5DmtAkPjLYckrJ3rgCVOn5yCNIRPV62nM/m8XhBmdokBcHZ3zXSy/RSChqRgTFkOO7D7HhHxwnS1h+rIQODYVRcfnICd9/98LGgDa8+ilZy/cFW0aF7N2+mmjOBFtGKVGANGhA03bJ1b62sFkEFAgy4DJokeIzTSW5p+HrLGjYEC7o6bNyDd/j2JrN8rUb6NCjDaMvGkxkpH9+XMPGgE4fOw1AbKzSsFHoyD5TXEBu8WF+iCnGGVcPHYq6pfLt1sXVvi5XncNgEXEtoFn1DawiNPIkNIhBYhv5tFzD9xQ02EOHTp0ZPXq038wHwsiA3KRe0IqBN14cbBmlvLjlA7blFAH9gi2lfI7vrsFFJUQjxEb52FBdkfoIGwULWfLy8oiNjaVfv36oqjPm40fCzoBCjYLiIgAuaZdGy4bNgqzGNxzd9y7HcwtZuXs/HM71Wbmn8/MBiPDhKIrhOxwOB2vWrGHcuHE0a9bM7+YDZkC1psQ1a69zfGvaN6kb3YqFZxqzp7gI9h6ABsd8Xn50lH3sQg2Hw8Hy5ctJTU0lLi6u6gt8hH0SaklxidOAoiR0RsBqS55rTtOAtq1o3LS5T8sWhE5JST4t06gdnuYzatQov8Z8yhI2BlSsxRSVFLHn9GHyD4ROXtipIufIV0Qdml1X7DKg3q2SSUhuG1wxhl/ZuXNn0MwHwsiADp45Tl5xIdk5R8jceTrYcs4hJrL8gO3+nBPknAk9vZWR7zKgyIiw+XgYNaRdu3YMGjSI3r17B9x8IIwMqMiVRR0f3YjOLXsEWc3ZtIxtRkJMk3OeP5WXx4LVqwmxLI2qcQluEGkLZtZVsrOzadeuHbGxsVxwwQVB0xE2BuQmKaYpV3a4MNgyvOJ0QQEKxERFkdqiRbDleM+hbbTUXBpGxwRbieEH3DGfvn37Mnjw4KBqCTsDCifc2d/NGjbkst69AvvihQVw/GDNrt1yGjTPFu2pg3gGnAcOHBhsOWZA/qTEPfEuGAHqj1+GY7Vc6Tai7ozsGcEd7aoIMyA/8uPyE0EwoJNHnX+btwapQSynRRuIaeRbTUbQKCoq4ttvvw0p8wEzIL9SUhLEFpBrfhKjfwNRvl3XxwgvVJWoqCjGjh1LTExMyJgP2L5gfiVoLSDVHw3IulH1GofDwdKlS1FVGjVqFFLmA2ZAfsXdAgq8AZXgHEsXqOZaQEbdwR3zKSwspMT9gxRiWBeslqxf8SkrsrMpb381579ciNi/BbI/DqAqlxYzn3pLKAacyyP8DCjEhoa37f2B/Ep/XZS2mgdFQVjJsWWHwL+mEXTCxXwgjAxIVVFVTuUXsP3QoWDLKSXXOUGbqwcPIan9eeccFxGig/UBaBBaC6QZgSEhIYEuXbqQnp4e0uYDYWRAuQUFlGgk+3JyeG/j18GW8yOuzM3GsQ2JadQ4yGKM+syxY8dISEigTZs2tGnTJthyvCJsDMg9qS86KorUxMQgq/EgZw8tis7QLMZaG0bwcDgcZGZmcskll5CS4rt1vP1N2BiQm/iGMYy7oG+wZfzI4a8h/7QFfI2g4RnzCZeWj5uw+daEbEZ5qO1IaNQrwingXB5hY0CGYZzN4cOHw9p8IAy7YCE3Dm8YQSIxMZExY8bQvn37sDQfCMMWUMjZj7sLVoeWZDVCm6ysLA4edC610rFjx7A1HwhDAzKM+ozD4eDLL7/E4QidddFrgxmQz7AWkOFfPAPOw4cPD7Ycn2AGVGvcXbDgqjDqNuE+2lURZkC1xUbhDT+jquzdu7fOmQ+E5SiYYdQfiouLiYyMZNSoUahqnTIfCKMWkIT8hD/rgxm+xeFw8M4775CXl0dERESdMx8IIwMKXWwY3vA97phPfHw8DRrU3SV1zYBqS8i3zIxwo64GnMvDDMgwQoht27bVG/MBC0IbRkjRtm1bevfuzaBBg+q8+YC1gHyHxYCMWrB7925KSkpo1KgRQ4cOrRfmA34yIBGJEJE5IrJCRJaKSJcyx+8WkbUislpEfuFNmSEbaglZYUa44HA4+OCDD+pMekV18FcX7EogVlWHisgQ4ClgHICINANuB7oAjYENwDt+0mEYIY1nwLlHjx7BlhNw/NUFGwZ8CKCqK4GBHsdygZ04zacx7t1ryiAiGSKyRkTWHAqhRejPxd0Csi6YUT3q02hXRfjLgOKBHI/HxSLi2draDWQB64CZ5RWgqvNUdaCqDkxKSvrxQKjGWkJUlhGanDlzhlWrVtVr8wH/dcFOAHEejyNUtch1/1KgNdDR9XixiCxX1VV+0uJfSkNA5kCG9zRs2JBx48bRtGnTems+4L8W0HLgMgBXDOgbj2PHgDNAvqrmAceBZn7SYRghhcPh4NtvvwWgefPm9dp8wH8G9A6QJyKZwDPAnSJyl4hcoapfAKuBlSKyAtgKLPGTjgBgo2CGd7hjPnv37i13K+/6iF+6YKpaAkwu8/Rmj+OPAI/447WDRqjGpoyQoGzAWezzAthExNqjNgpmVI6NdlWMGZBh+BlVNfOpAMsFqzXWlzfK5/Tp0zRq1IhevXrRs2dP63aVQ/i1gELtf2g9MKMcHA4Hb7zxBkeOHAEw86mA8DOgkMU+YIYTd8ynbdu2NGtmM0wqwwzIV9gvnIEFnKuLGVCtsRiQ4WT37t1mPtXEgtC1xSaUGS7atm3L0KFD6dGjh5mPl4RPC8j2/zNClK1bt3L69GkiIiLo3bu3mU81CB8DCnUsBlQvcTgcLF26lA0bNgRbSlgSRl0wVxMoLxe++zq4UjwpzA+2AiNIuAPOHTp0YPDgwcGWE5aEjwEV5oM2gJNHYeX7wVZzLhHW7K5PeJrP6NGjrdtVQ8LHgNzB3qhoSO0ZXC1laZoETWy+R32huLiYTZs2mfn4gPAxIDcxjeDCccFWYdRT3Puz//znP6dBgwZmPrXEgtCG4SUOh4OPP/6YkpISYmNjzXx8gBmQYXiBO+ajqraYmA8xAzKMKrD0Cv9RZQxIROJwLiQf635OVV/2pyjDCBWysrLMfPyIN0HoRcBenFvpQLCTn2zCnxFAEhMTOe+88xg+fLiZjx/wxoAiVPU6vysxjBDi8OHDJCYmkpycTHJycrDl1Fm8iQF9LSKDRSRGRKJFJNrvqgwjiDgcDt5++2127NgRbCl1Hm9aQOnAWI/HCnTyj5yKsXEHIxB4Bpzbt28fbDl1nioNSFX7AohIMnBEVYv9rqpcIUF5VaMeYaNdgafKLpiIjBCR74DFwHYRGeN/WYYRWI4dO0ZmZqaZT4Dxpgv2ODBMVfeKSFvgbcJ6J1PDOJeEhAQuueQS2rRpY+YTQLwJQher6l4AVd0D5PlXUlVYX8zwHVlZWezduxeAlJQUM58A440BnRCRqSLSV0SmAkf9LcowAoHD4eDLL79k8+bNVZ9s+AVvDOg6oD3wBJAC3ORXRYYRADwDzunp6cGWU2+pMAYkIu1U9QegJfCCx6Ek4Ji/hRmGv7DRrtChsiD0Xa7bXJyBF3cOhAIj/azLMPyCqnLo0CEznxChQgNS1btcd59W1dI1UEXk135XVQli+2IYNaSoqIioqCjS09MpKSkx8wkBKuuC/Ry4CPhvERnqejoCGAcsDIC2CoQF7ZWNMMbhcPDNN99wxRVX0KhRIzOfEKGyLthGoAVwBtiM86tfAiwIgC7D8BmeMZ+YmJhgyzE8qHAUTFV3q+o/cOaC7XXdjwe+D5C28rEWkFENLOAc2ngzDP8aPy5Gdgx41X9yDMN3ZGdnm/mEON4YUGNV/TeAqv4TaORfSYbhG9q1a0ffvn3NfEIYbwyoQETGiEiciIzCGQcyjJBl586dFBcXExsby+DBg818QhhvDOi3wC3AKmAKMKmqC0QkQkTmiMgKEVkqIl3KHL9URFaKyFciMlvE+3VWLQRkVIbD4WDx4sV88803wZZieIE36wFlA1e6H4tIay/KvRKIVdWhIjIEeArn8L17kfvpwAhVPSwi9wKJwKEa6DeMUjwDzr179w62HMMLvFkP6DEROSQiOSJSCHzsRbnDgA8BVHUlMNDj2IXAN8BTIvIFcEBVqzYf24vJqAQb7QpPvOmCjQXa4RwNOx/Y48U18UCOx+NiEXG3thKBnwK/x7ndzx0i0rVsASKSISJrRGTNoUPWODIqJi8vjzVr1pj5hCHeLEi2T1XzRSROVbO9XJT+BBDn8ThCVYtc948Aq1V1P4CILAMuALZ6FqCq84B5AAMHDrTmj1EhsbGxjBs3jri4ODOfMMObFtAPInITkCsi04BmXlyzHLgMwBUD8owIrgN6iUiiq1U0BMiqnmzDcHa71q9fD0CzZs3MfMIQb1pA9+LsUr0JTADGe3HNO8AYEcnEOXB1o4jcBWSr6nsicj/ONaYBFqrqt9VWbtRrPGM+qko1BlKNEMIbA3pfVYe57s/yplBVLQEml3l6s8fxBVhOmVFDygaczXzCF28M6KiI3A5swTUJUVU/8qsqw6gAG+2qW3hjQEdwBokvcD1WIAgGZL9yBkRFRdGxY0dGjhxp5lMHqGw9oMWq+jPge1V9NICaDOMccnNzady4Md26daNr167W7aojVNYCShSRN4GfiEg3zwOq6k0g2j/YB6/e4XA4WLlyJVdccQVJSUlmPnWIygxoFNAH6IJzXWjDCDieMZ/mzZsHW47hYypbE/o4sExEBqlqYQA1GQZgAef6QJUTEc18jGCwZ88eM596gDejYIYRcNq0acOwYcPo1q2bmU8dxpts+DgReVxE5ovIL8uu7WMYvmTLli2cPHkSEaFHjx5mPnUcb3LB5gPfAecB+4G/+VWRUW9xOBx8/vnnbNy4MdhSjADhjQG1UNX5QKGqZnp5jWFUC8+A89ChQ6u+wKgTeGUmItLd9bcdUFTF6X7CtSKHTQGpc9hoV/3FmyD0bcDfcS5G9i+c60IHHFsQqG5SUlLC1q1bzXzqKd4YUGfgIleGu2H4DFUlIiKCyy+/nMjISDOfeog3XbDRwEYReUJEOvpbUFVYD6xu4HA4+OCDDygqKiI6OtrMp57izUTEqcAAYAPwnIh4syi9YVSIO+YTFRVleV31HG9HtAYBPwNaAp/4T45R17GAs+FJlTEgEckCNgIvqupv/S+pAh0qFogOczZt2mTmY5yFN0Hon6jqEb8rMeo8ycnJdOvWjWHDhpn5GEAlXTAR+Zfr7rcistd12yciewOkrQIsZhBuHDhwAFWlRYsWpKenm/kYpVRoQKp6levuIFVt47q1BkYGRppRF3A4HCxatIjt27cHW4oRglS2JGsvoC3wZxG5B2fTIwL4Ez+uD20YFeIZcO7YMegzOIwQpLIYUAJwDc6RL/cSrCXAbH+LMsIfG+0yvKGyFRG/AL4Qkf6qui6AmowwJycnh8zMTDMfo0oq64I9q6q34px8eNYIuKpe6Hdl52CD8OFC06ZNufzyy2nZsqWZj1EplXXBHnP9vSYQQozwx+Fw0KRJEzp06ECbNm2CLccIAyobBTvgutsUaAO0wrk4WXBXRLSp+yGJO+aTnZ0dbClGGOFNKsYcIB94CHgQeMSviirAOmChi2fAecSIEcGWY4QR3hhQHuAAolV1JVDsX0lGOGGjXUZt8MaAFHgZ+I+I/BqwbXqMUo4fP27mY9QYb3LB/gvnbOj/iMgILChtAIWFhTRo0IALL7ywdGExw6gu3nxqCoCfisj/AeP8rMcIAxwOBwsXLuTUqVOIiJmPUWO83ZZnF84A9PfAS37UUzEWhQ4J3DGfpKQkGjZsGGw5RpjjTResharOct3fICJXVXq2vxFzomBhAWfD13jTAmooIq0ARKQlYJ+6esj27dvNfAyf400L6GEgU0ROAHHA7/wryQhFUlJS6NevH/379zfzMXxGlQakqktEpCvOmdB7VNX6QPWIHTt20K5dO6Kjo0lLSwu2HCNAHD58mMaNG1cW56uOD1SYvlBlF0xEfglsA94DtonIGC+uiRCROSKyQkSWisg56Ruucz4QkclVlWcEB4fDwZIlS/j666+DLaVa/OEPfyApKYnu3buTkpLCsmXLuOGGG+jevTs9e/bk7bffBmDJkiX07duXlJQUHnjggSCr9p6VK1fSo0cP2rdvz5133umX10hPT+fIEf+vxOxNDOhhYLCq9gcuAp7w4porgVhVHQrcBzxVzjmP41xzyAhBPAPOF1wQfuvP3X777WzevJlf/OIXjBo1isLCQjZt2sTHH3/MrbfeyrFjx7jxxht57733yMrKIj8/nzNnzgRbtlcsXLiQUaNGsWrVKgoKCsjPz+ehhx6iV69ejB8/HlUlPj6esWPH0r59e77//nv+53/+h44dOzJo0CBOnTp1TpkPPvggnTp1ol+/fixatIitW7dy9913+70u3hjQEVU9CKUJqie8uGYY8KHrmpXAQM+DrpG0Evc5RmhRF0a7ZsyYQWJiIsuXL2f48OGMHDkSEaF169akpKSwdetWIiIi6NChA3FxcTz11FNhM63gnnvuIScnh379+nHw4EEKCgqYPn06RUVFfPbZZ2RlZXHy5En+/ve/M2jQIL766is2bNjAlVdeycSJE2nQoME5ZV588cX84Q9/4MCBA3Tq1Im2bdvy1FPltRt8izcGdFJEFovIAyLyFtBIRJ4UkScruSYeyPF4XCwiUVC61Ot44P9V9qIikiEia0RkzaFDh7yQafiC/Px81q1bF9bmA3DHHXewYMEC9u7dy4gRI1iyZAmqyu7du/n+++/p0qULBQUF7Nq1i5ycHMaMGUNubm6wZXvF/PnzueGGG9i9ezfr169nw4YNNGrUiA0bNvDoo4/SsWNHIiMjSUxMpEmTJhQWFpKRkcGQIUOYPXs2n3322Tll/va3v6Vt27a0aNGC4uLigG0Y6c0o2Lse9/d4Wa57xMxNhKoWue5fj3Ot6U+BVKBARL5X1bNaQ6o6D5gHMHDgwNKAl3i9l6JRE2JiYhg3bhyNGzcOW/NxM3r0aK6//noyMzNJSkqia9euREZGMnPmTFq0aMHf/vY3LrvsMk6dOkVGRgaNGzcOtmSvGDRoEBkZGZSUlDBw4ECGDh3K5MmT6dq1K0OHDiUjI+Oca9auXcvrr79O06ZN6dmzJ9dffz133HEH/fv3B6BTp05MnDiRiIgIDh8+TJ8+fbj99tt56623/FoX8XZQS0TuV9VpXp77K2Csqk4QkSHAI6p6aTnn/QHYr6pzKitv4MCBetNFV6PH42nWs4Rr773FK82G9zgcDk6dOsWgQYNsu2TD19R8FMyDKke/PHgHyBORTOAZ4E4RuUtErqhGGUaAcMd8cnJysFkWRiCpjgF5/bOoqiWqOllVL1TVoaq6WVWfVtX3ypz3h6paP+eIsB9nn1I24FxXEkuXLVuGiPDVV18BzqH5xx9//Kz7+fn5XHfddZx33nn069ePrKysYEoul5kzZzJ69GgA3n///dL/U3FxMXPmzKF9+/Zcd9115ObmkpaWRlpaGrm5uUyZMoXvvvsuyOqrxqtPm4jEAw+KSHh0kg2vqAujXRXx4osvMmLECObOnVvpOY0aNWLbtm38/ve/D7kv7OzZs8+KwfzlL39h4cKFxMTEsGzZMqZNm8aaNWtYt24du3btIi4ujri4OLKzs4mKiqJTp05nlVd2KH7EiBFceumlpKamsmzZMr766iv69OnDwIEDycrKYtGiRXTp0oXu3bv7bandKoPQriHzB13nLhQRVdXH/aLGCCgNGzakY8eOjBw5sk6ZT05ODosXL2bdunX06dOHnJycc+JaIoLD4WDIkCEAXHNN6C1zlZGRQY8ePUpbbvv376dVq1a0atWK/fv3c/DgQZKTk2nVqhXHjx/n008/BWDChAmkpaVx/fXX8+CDD9KtWzeA0qH47t27lw7FT5w4EVVlxowZREZGcvz4cSIjI/nnP//Jhx9+yJtvvklBQYHfPh/etIDuBIYAh3FOHvyFX5QYAePkyZOAc+Rj9OjRdcp8AF577TVOnDjBgAEDyM3N5ZVXXiEuLo7du3cDzr3qmzRpQteuXUu7aDNmzOCFF14IpuxziIo6u32QnJzMwYMHOXDgAK1atSIxMZHDhw+XPgZYt24drVu35v333+eqq67i3Xd/HMQubyjePeQuIhQXF/PnP/+Zl156iV/96lcAFBUVcfLkSfw2FUZVK70By1x/P/V8HMjbgAED9Lnb/qTPXj9bX/vfZ9WoOd9++62+8MILum/fvmBL8Rv9+vXTjz76SFVV//Of/2ivXr10z549OmDAAG3fvr2mpaXp3r179fTp03r11Vdr165ddcCAAfrdd98FWfm5fPbZZzpq1ChVVX377be1Y8eOOmrUKC0qKtJZs2Zphw4d9Nprry09/9e//rUeOXJEp02bpm3atNGlS5eWHrv33ns1JSVFe/Xqpbt27dL09HT9yU9+oh06dNDPP/9cly9frl27dtWuXbvq5s2b9d1339VOnTpp9+7ddcuWLfqb3/xG165dW5NqVPjdrnIY3jXhsCMwAOfcnVxV9f8cbQ88h+ETepUw/h4bhq8JdTnmY1SfESNG8PjjjzNs2DB/v1SFQ0feZMM/ICKXAOuATar6b18qMwKDmY9RlqVLlwZbglfZ8NcDycABoLnrsRFG7N+/38zHCEm8ScU43/VXgAuAozi36THChJYtW5Kenk6XLl3MfIyQwpsu2P3u++Icy7QuWJiwefNmWrVqRbNmzUqHYg0jlPCmCxbtvgHtcQakjRDH4XCwbNmysFtMzKhfeNMF24Jz+UUBzgDT/aqoIixFyWs8A84XXXRRsOUYRoV4tSi9qr7qdyWGT7DRLiOc8GYmtO2CESaoKt99952ZjxE2eGNAMSKyXkQWiMg/ReSffldVKZYOXx4lJSWICJdccomZTzW5//77Wb16NevXr2fQoEF06NCBG2+8keLiYlJTU9m9eze//OUvgy3Tp4RKnb0xoN8DdwDPA3NdNyOEcDgc/Pvf/6awsJAGDRqEtflMmDCBkSNH0r59ezIyMmjfvj2vv/4627dvJy0tjT59+vD555+zaNEiOnXqRGpqKuvWrWPChAlcfPHFpKSk8Oyzz3r9evv37+eDDz4gLS2NjIwMZsyYwfbt22nVqhUHDx4EnImrSUlJ/N///V9Y1zUU6lyWCg1IRN4AUNXPy978qsioFu6YT2xsbJ1Zy2fcuHHccMMNREVF8dhjj/HBBx8wZ84cfvjhB3Jzc5k/fz69evXi0UcfJSUlheXLlwPws5/9jOeff75aX5rMzEzOO+88wPleDh48mKioKKZNm0br1q1Lzzv//PP5+OOPfVtRAltXN8GusyeVfWKT/PrKRq2pqwHnFi1aEBMTQ3JyMjExMZSUlFBcXMxtt93Gv/71L2666SamTZvGDz/8QLdu3SguLgacEy7di7BXB/fSFF27dmXVqlUUFhZy5ZVXsmvXrtJzYmJi/LJmdKDr6iaYdfakMgPq7N79ouzNr4oMr9i8eXOdNJ+KmDx5Mq+//jq/+tWviIiIIDU1lZkzZ7Jy5UoOHz5c43IvvPBCNm3aBMC8efO45ZZb6Nq1K23btiUlJaX0vA0bNjB06NBa18Mb/FVXN6FU5wqz4UVkE/Cn8o6p6j/8KaosAwcO1JsuvBrNiSehtzL+f6YE8uVDkpycHL7++msuvPDCOm8+/uaOO+5gwoQJFW7AeObMGa666iref//9OtPNDXCdKxw5qsyAPlPVn9b2lX2BGdCP7Nu3j1atWtnOFUY4UaNdMdb6QYhRCxwOB++//z5bt24NthTD8AkVGpCq/k8ghXhLff3d9ww4d+nSJdhyDMMn1I0ObR2nro52GYY3uWBGEDl58iQrVqyoN+aTmZlZupXOX//6V5o1awbAM888w9atW2natCmPP/44Dz30ELm5ubRp04b77y9dMYasrCy2bNmCqvLhhx9SUlLCnDlzShd4v++++0qvu++++8jIyKBhw4b06tXrnC2NX3vtNbZs2cIf//jH0ueee+45Nm3aRHR0NE8//TSPPPIIx48fJzk5mQcffJAnnniCBx54oFoxOm/rfOutt5KXl0dmZiZvvfUWvXv3rnad77//fp599ll27NjBmTNnmD17dqmOt0OmLhMAABcvSURBVN9+26vrp0+fzsGDBzl06BAvvfRSjersxgwoxImLi2Ps2LEkJiaGjPnMvb3mc1En/TW90uPz5s1j7ty5rFq1ijfeeINJkyYBsGLFChYuXMhbb73F4sWL2b59O2+++SZPPfUUDoeDnj17AjBnzhxmzJjB+PHjWbBgAS+//DKffPIJP/vZzwDOui4zM5OoqChmzpzJHXfcwYkTJ4iPjy99vezsbEpKSs7S98UXX7BgwQL++Mc/snHjRnbv3s38+fOZPHly6dbW7777Lr/4hfebx3hb5zlz5pCdnc1rr71Waj7VrbPD4WDJkiW0b9+ePn36nKVj4cKFXl2/aNEiUlNTadq0KUCN6uzGDChEcTgcxMbG0rlzZ1q2bBlsOWfxXw+m+a3s4uJiYmNjad26dek+VwC/+c1v+O1vf0uTJk3o168fl1xyCTfffDN5eXkMHDiw9LwjR44QERFROtGudevW7Nmzp/R42evat2/P1KlT2bNnD8eOHSs1oKFDh9K6dWtefPHFs/R5lvvtt9+SnJwMOCcUHj58mLS0NB544IFqfRm9rTM4W0XufcJqWufo6GhmzZrFjTfeyPjx40vr7O31f/rTnxg2bBgTJkwgPz+/RnV2YzGgEMQd8/nuu+9Ccq/2ZsmNanyrikaNGpGfn1863cBNXl4eL774Ir169SIlJYX4+Hief/75cybPuVuJ7r9lyyl73XnnncesWbNISEjg+PHjXHPNNcybN69Cfe45Mfv27aN3794cPXoUgEOHDpGYmEhUVFS1uyLe1vnMmTOoKgkJCWddX506JyUllXbxmjZtyo4dO0rr7M31DRs25NVXXy19/tSpUzWqsxszoBDDbT4dOnRg5MiR9W6+T0ZGBpMmTWLu3Llcd911vPLKK2RlZVFQUMDEiRNZuXIlI0aMYPfu3UyaNIn8/PyztiCOi4ujuLiYq6++mt/97ncsW7aMMWPGMH36dI4ePXrOdWvWrGHSpEl069aNvn37smDBgnNiQUDp9cOHD+eWW24hJyeHPn360LFjR6ZOnUqnTp1o0qQJmZmZpV0XX9c5KyuLzp07n3N9derco0cPUlNTufXWW0lISDirzt5cP2jQIFSVqVOnkpCQQIsWLWpUZzdV7gsWCnhORGzeW/nvOjoR0dN86uKOpYEgOzubjRs3lu7sGWimT5/OPffcE9DXDIM6V38mdChRXwxo1apVHDt2zMzHqGvUfGNCw//k5+cTExNDWloaqlpn8o0Moyrskx5kHA4HCxcu5MSJE4iImY9Rr7BPexBxx3xatmzp93VXDCMUMQMKEpZeYRjhaEB1YFh6x44dZj6GQRgGocPffiAlJYWBAwfSt29fM58yhEoumDd5UeVdb7lg1SP8WkBhzPbt2ykoKCAqKor+/fuHrfn8p3fvGt+qwp0XNXHiRN54443S51esWMHzzz9PWlpaaS7YrFmziI6OxuFwlJ43Z84cxo0bx8KFC5k3bx7Dhw/nk08+KT3ueZ1nLlhWVhYnTpwoPa+m17vzoqqDt3WeM2cODz30ENdee+05uWDe1tmdC1ZQUFBuLpg31y9atIh9+/aVxi1rUmc3YdcCClfcMZ9+/fqRlua/XKpAkP7++34rO1RywWp6veWCVQ+/tIBEJEJE5ojIChFZKiJdyhy/U0S+ct0e8YeGUMIz4Ny/f/9gy6k1jVNTa3yrilDJBavJ9S1btrRcsGrirxbQlUCsqg4VkSHAU8A4ABHpBFwLDAZKgC9F5B1V/dpPWoKKjXZVD3deVGFhIXPnzuWVV15hwIABpXlRIsJNN93EjBkzmDRpEvHx8ZXmgqkqc+fOZfr06UycOLE0r8l93bx581iyZMlZuWAAbdu2rfb1sbGxfPTRRzXOBauqzuvXr/cqF6wyzZ65YElJSdWu86BBg/jb3/7G1KlTad68OS1atKhRnd34JRVDRJ4GVqnqAtfjPara1nW/AdBUVQ+7Hq8CrlPVrWXKyAAyANq3bz/g9+OmoDnxtOgD19x9s881+4PCwkLefPNNEhMTzXwCRBjkRfmcMKhzYHPBRORF4C1V/cD1eBfQSVWLPM4RYDoQp6qTKivPMxesRV+45q7QNyBVRUTIzc0lNjbWzMeoz9RoV4zacAKI83ydMuYTC7zmOqfOZZY6HA6+/PJLVJXGjRub+RhGBfjLgJYDlwG4YkDfuA+4Wj6LgI2qOklVi6tTcKjPA3LHfNwBQ8MwKsZfQeh3gDEikonTM24UkbuAbCASSAdiRORS1/n3q+oKP2kJGGUDzpZYahiV4xcDUtUSYHKZpzd73I/1x+sGExvtMozqYxMRfUR8fDydO3dmxIgRZj6G4SVmQLUkJyeHpk2bkpKSctaEOMMwqiZ8DCgE47kOh4PMzEwuu+wy2rZtG2w5dQJvEzNffvllsrKy+OGHH/j73/9Ow4YNgeolZk6dOpVbb70VgM8//5xVq1aRlJQEhGYyalRUFJ9++in/+Mc/+Mc//lF6fXWTUZ988kkOHjxIXFwcjz32WLV1PPfccyxfvpxGjRoxbdo0Xn/9dSZPnkyjRlXvelKWsDGgUPMfz5iP57T1+sDFH9xX42s/uvRPlR73dpO+jz76CFWluLi41Hygepv07dy5k5deeokvv/ySoUOHlpoPeLdJX3kbG/pzY8IhQ4bw6aefUlx89sBxdTcm7Nu3L5dffjnjx4+vkY61a9eSmJhIkyZNaNmyJWPHjmXu3LnceeedXtfZTdgYUChR3wPO84ff7beyvU3MvPXWWxk2bBj33XcfO3fupEOHDkD1EzPB+cWbP3/+WTpCMRl12rRpPPLII9x889kTcatb58svv5zHHnsMESmdMFsdHVOmTGHw4MG88MILfPjhh1x22WWsXr3a6/p6YuPE1eTgwYP12nwA2jVOqvGtKrxNzHzmmWcASEpK4siRI6XnVTcZddeuXXTq1ImoqCg2btwYssmoxcXFbNu2jXvuuYdVq1axcePGGtd5+fLlPPzww/Tv358VK1ZwzTXX8MQTT3j93q9duxYRISEhgaIi5/xit/lVF2sBVZPk5GRGjRpFampqvTQff+NtYuZnn33GzTffXLq2kpvqJqN6LuwVysmo8+bN46abbgLguuuuo2/fvjWu89y5c3njjTc4c+YMt956a2md165d69V7n5WVxZQpU1BVZs6cydatWxkwYECN/t9hsy/YjUOvhhPxJF4g/NedZacY+Z9NmzaRnJxMixYtAv7ahveEQWKmzwl2nWfMmMGUKVOIjo6u6JTw35gwmAbkjvl069aN9PT0gL62YdQBAp6M6nOClQPmGXAeNmxYkFQYRt0kbAwoGO20+j7aZRj+JmwMKNCoKrt27TLzMQw/YqNg5VBcXExkZCQXX3wxgJmPYfgJawGVweFw8N5775Gfn09kZKSZj2H4EWsBeeAZ83Hn0RiBxdt8pLfffpvMzExycnKYPXt2jXLB7r//fu6+++7SiYwvvPBC6YQ6b3LBysslmzdvns9ywf7973/z8ccfs3//fp566ilmzZpVo80Y58+ff1be3LPPPnvWxoJuKqpz2etvu+22Wue/ubFvmQsLOHvPpFdeq/G1c39zbaXHvc1HWrRoEcnJyXTq1KnGuWAOh4MtW7bQrl07UlNTz5rN600uWHm5ZL7MBfvyyy/Jzs6moKCAxMTEc7T37NnTqzqXzZtbtGgRqampNG3a9CwdFdXZ8/rTp0/7JP/NjRkQsGXLFjOfavDHcWP9Vra3+Ui7du3i1Vdf5eGHHyYrK4sePXoA1c+LevDBBxk6dCgPPPDAWeXUNJfMl7lgV1xxBU8++STz5s1j5cqVNd6MsWzeXNmNBWNiYiqts+f1p06d8kn+m5vwiQH5cRy+TZs29OzZ08zHS1rGx9f4VhXe5iN16NABEaF58+aUlJSUnledvKh27dqxfv16ABISEigsLKx2LphnLhng01ywp59+moiIiNJ8t5puxuiZN3fgwIGzNhZcuXJllXUum3fni/w3N+HXAqphRctjz549tGnThri4OC666CKflWvUHG9zwb777jtuvvlmoqOjS7siUL28qM6dO7Nv3z7uuOMOGjRoUKNcMM9cMnDGc3yVCzZq1ChuvPFGIiIieO6555g9e3aNNmPs0aNHad7c3XfffdbGgunp6aWz+yuqs+f1/fv3Z+HChbXOf3MTPqkYQ66Gk/Ek9ovgv+6odBsxr3DHfC666KKzPsBGeBPsvKj6mAsWchsT+hpPA0rqH8Gvb6+dAVnA2TACSvjngvkKMx/DCB3qlQHl5uby1VdfmfkYRogQfkHoWtC4cWPGjh1L8+bNzXwMIwSoFy0gh8PB5s3OfRGTkpLMfAwjRAhDA6reMLw75rN7927bq90wQowwNCDv8Qw4jxw5ssaTpQzD8A9hFwPy1kNstCs88TYZddasWezcuZNDhw4xd+5cmjRpAvg/MbOqZNb//d//9Vky6kMPPcQPP/zA+vXrmTZtGlu2bKlRnf/617+WbiT42GOP8fDDDwPeb8ZY1XtmyajlUFhYaObjL57475pf++DrlR72Nhm1c+fO3HnnnTzzzDNs2rSJtLQ0wP+JmVUls/oyGfXxxx/n6NGjPProo1x22WUUFRXVqM6eGwm2bdu22psxVvWe1bNk1MpdNi8vj9jYWC644IKzNl0zfMjkp/1WtLfJqDfccAPZ2dlkZWVx++23l57n78TMqpJZfZmMCvDss8+WbkR4xRVX1KjO5W0kWJ3NGKt6z+pHMqoXOBwO3njjDY4fPw5g5uMvWrSu+a0KvE1GXbVqFTNnzuTZZ58lIuLHj7G/EzMrS2YtKiryaTIqwI4dO+jevTtAjetcdiPB6m7GWNl7durUqVolo9YZA3LHfFq3bk1cXFyw5Rg1xJ2YOXfuXK677jpeeeUVsrKySpNRV65cSXp6Otdeey1Hjhxh4sSJOByO0uvLJmYuW7aMMWPGMH36dI4ePVqaWPn9998zaNAgVJWpU6eSkJBAeno6CxYsICMjo8Lr3cmo+fn5ZyWzHjx4kD59+tQqGbVsnY8cOVIa51HVGtdZVZkyZQqffvopl156KatXrz5nM8bK6lzZe9aiRYsa1dlN2OWCJQ+I4urbfnfWcQs4G27CIDHT54RBnetOMmpZA9q5cyeLFy828zGM0KVCAwqfIHQFPtmuXTsGDx5Mr169zHwMI8wI2xjQtm3byMvLIzIykr59+5r5GEZgkWrcKiQsDcjhcPDZZ5+xcePGYEsxDKMW+MWARCRCROaIyAoRWSoiXcoc/52IrBGRlSLy8+qUnVNwpjTg7Lkwt2EY4Ye/YkBXArGqOlREhgBPAeMARKQVcBswEIgFvhSRJaqaX1Wh+Y3zOJV3hn59ulvA2TDqAP7qgg0DPgRQ1ZU4zcbNIGC5quarag6QDfSpqkBFyW+ST6OoaDMfw6gj+KsFFA/keDwuFpEoVS0q59hJ4OxEHEBEMoAM18NTa9eu3QIkAodvuz+w8yx8QCJwONgiqolpDhzhqPtbVe1V20L8ZUAnAM/pyBEu8ynvWBxwvGwBqjoPmOf5nIisUdWwC/yEo27THDjCUbeIrPFFOf7qgi0HLgNwxYC+8Ti2CviJiMSKSFPgfOBbP+kwDCOE8VcL6B1gjIhk4pwHcKOI3AVkq+p7IjIT+AKnAT6oqnl+0mEYRgjjFwNS1RJgcpmnN3scfwF4oQZFz6v6lJAkHHWb5sARjrp9ojkscsEMw6ibhOVMaMMw6gZmQIZhBI2QNCB/pnL4Cy803ykiX7lujwRLZ1mq0u1xzgciUjauFxS8eK8vdX02vhKR2RICS2N6ofluEVkrIqtFpPprm/oRERksIkvLeX6sS+8KEfldOZdWjaqG3A34JfCS6/4QYJHHsVY4h/VjcE5g/AaICXHNnYA1QCTOUcHlQJ9ga65Kt8c5TwIrgcnB1uvFex2Hc1pHouvxvUBSiGtuBuwCooEEYGew9Xpou9f1HVtZ5vkGOLMYEly6VwMtq1t+SLaA8EMqRwCoTPNu4BJVLVbnf68BECpTDyrTjYhcBZS4zwkRKtN8Ic4vzFMi8gVwQFUPBV7iOVSmORfYCTR23UoCrq5ituM0z7Kcj3NazTFVLQC+BIZXt/BQXZCs1qkcQaBCzapaCBx2dQWmA+tVdWtQVJ5LhbpFpBcwHrgK+H9BUVc+lX0+EoGfAhcAp4AvRGRFCLzflWkG549UFs5W8rRAi6sIVX1LRFLLOeST72GoGlCtUzmCQGWaEZFYYD7Of9SUAGurjMp0Xw+0BT4FUoECEfleVYPdGqpM8xFgtaruBxCRZTjNKNgGVJnmS4HWQEfX48UislxVVwVSYDXxyfcwVLtg4ZjKUaFmV8tnEbBRVSepanFwJJZLhbpV9V5VHayqI4CXgKdDwHyg8s/HOqCXiCSKSBTOeEtW4CWeQ2WajwFngHx1ZgUcxxkXCmU2AeeJSHMRicbZ/VpR3UJCtQUUjqkcFWrG2axOB2JE5FLX+ferarX/YX6g0vc6uNIqpKrPx/3AYte5C1U1FH6gqtI8GlgpIiU44ylLgqi1QkRkPNBEVee59C/G+T2cr6p7Kr+6nPJcEW3DMIyAE6pdMMMw6gFmQIZhBA0zIMMwgoYZkGEYQcMMyDCMoGEGFEaISKqInHAlM7pvFc5QFpGXROSSQGos8/qtRGS26/5wEenjuv92LcpMEJF1IlLjYWoRaS8iY133Z4hI+5qWZdSOUJ0HZFRMlmtiYMjjmo3snvV9E7AA+FpVy8st8pbewA5V/VUtyhgJdAfeV9U7alGOUUvMgOoAIhIJzAVScE7pf09VH/I43hX4O1CEs9U7XlV3i8g04Cc4J0o+rapvlil3As5NJuNw5lj90ZUbNAZ4HGdC7RGc5tIAeMNVfizOJXmP4zSdW4BLgP4ikoVzNnsvnJNJe6iqisizwCc4J27OxDlZ7whwkyvpGNeM25lAGxF5FOgAtHDdxgJ/LvseiMh5wIs4M7ZP48xtuw9o5JoUeJdL637gVZw5TlHAQ6r6qYh8DXyOM+FZgXFuPYYPCHa6v92qtTRCKs4cnKUet7au53/rOicWOOy6/xLOL/4twDM4TWIkzi//pcACj2s2AM3KvN4EnDNyI4CWODO2GwA7gLauc24H/gJcDrwJNAQGABe5dK301OK6v9/19w2cU/hjAAfOL/5KnKYEMBF4ooymER66XwLu9HhvynsPFnm87hXAxa56/cn13FKcraG/ALe7nmvrqqMA3wMXup5/Dbgm2J+DunSzFlD4cU4XTETigTQR+SlOg4opc83fgN/jXA4iB3gAZ1dmgMdCUw1w5lA97nq8BNgDfK7OTQYOiMgxnOsxndAfp90vw7le0L3AeTi/8IU4W0hV8QJwg6vM99SZgX8+MNu1hlgDYFsVZWxx/T1awXvQDVeOkrpSS1wtu7Kcj9NgUNU9InICSHYdW+/6uxunuRk+woLQdYMJwHFVvRZ4Cmf3wnMVwHHAF6o6Cmcr5fc4dyn5zGVmI4GFOJNlR7huT7iuHQAgIi1xdk/2AvEi0tp1PB1npvkIYJ+qXozTfJ4so7GEcz9vnwD9cHbhXnQ9twW43qXrXuDfVdTdvXZORe/BJiDNVYdrRWRqBVo24eyOIiJtcS60dcR1zPKV/IS1gOoGnwD/FJGhQD7OVkMbj+NrgH+IyEM44z134vxVH+FatKsJ8I6qniyn7FYi8gnOtV6mqGqxa/nNt12Jk8dwfvkVWCAiN+P8XP2xTDlfAX8SkR3uJ1RVReRfwGhV3e56+mbgZVcmu+LshtXmPbgHmOuq+2ngOpyxowdFZJ3H9U8C810LsDUEMlwtMi9f3qgJloxqVIirq9JdVe8LthajbmJdMMMwgoa1gAzDCBrWAjIMI2iYARmGETTMgAzDCBpmQIZhBA0zIMMwgsb/BzPie9w376zMAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Example of ROC curve calculation (here only for 2 bootstrap iterations for quick calculation):\n", "plot_ROC_curves(\"test.pdf\",\n", " 'clinical data indicating presence of tumor (PET-SCAN, MRI, CT)',\n", " 'bestmodel_classification_out_of_sample_predictions_Clinical_evidence_for_tumor_YES__vs__healthy_CTRLs.csv',\n", " 2,\n", " [\"coverage_at_EwS_DHS\",'Coverage at__Ews-specifc DHSs',\n", " \"global_fragment_size\",\"Global fragment__size distribution\",\n", " \"read_depth_5mb\",'Read depth__in 5 Mb bins',\n", " \"regional_fragmentation_5mb\",'Regional fragmentation__patterns',\n", " \"METALEARNER_fullx\", \"Metalearner\"],\n", " combine_controlsets=False,use_only_our_ctrls=True)" ] } ], "metadata": { "kernelspec": { "display_name": "ews_cfdna", "language": "python", "name": "ews_cfdna" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }