import pandas as pd import matplotlib matplotlib.use('Agg') import scipy.optimize import numpy as np from numpy import exp import glob import scipy import os import subprocess import argparse from pathlib import Path import math matplotlib.rcParams.update({'font.size': 20}) import matplotlib.pyplot as plt # an argparse action that converts relative paths to full paths; form: https://gist.github.com/brantfaircloth/1252339 class FullPaths(argparse.Action): """Expand user- and relative-paths""" def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, os.path.abspath(os.path.expanduser(values))) parser = argparse.ArgumentParser(description="Plotting tool for LIQUORICE", formatter_class=argparse.ArgumentDefaultsHelpFormatter) required_keyword_args = parser.add_argument_group('required named arguments') required_keyword_args.add_argument('--LIQUORICE_outdir', help='Name of the output directory that has been produced by ' 'LIQUORICE and which contains input for this plotting ' 'tool', required=True, action=FullPaths) required_keyword_args.add_argument('--LIQUORICE_extendto', help='"extendto setting that had been used for LIQUORICE ' 'analysis', required=True, type=int) required_keyword_args.add_argument('--LIQUORICE_binsize', help='Bin size that had been used for LIQUORICE analysis', type=int, required=True,) required_keyword_args.add_argument('--avg_center_size', help='Length (in bp) at which the central region should be ' 'displayed',type=int, required=True,) parser.add_argument('--tissue_type', help='Sets the scales of the axis such that it fits to the typical dips of a tissue types. Choose from: "haem", "ewing", and "universal".',type=str, required=False,) args = parser.parse_args() ################## PARAMS #################### corr_cov_glob=args.LIQUORICE_outdir+"/*/corr_coverage_mean_per_bin.csv" cov_glob=args.LIQUORICE_outdir+"/*/coverage_mean_per_bin.csv" extdto=args.LIQUORICE_extendto bs=args.LIQUORICE_binsize avg_centersize=args.avg_center_size tissuetype=args.tissue_type use_fixed_scale=True add_ctrls=False modeled_values_table=pd.read_csv(glob.glob(args.LIQUORICE_outdir+"/fitted_model/fitted_gaussians_sigmas*/parameter_summary_leastsq.csv")[0]) ################ CODE ########################### corr_cov_filelist=sorted(glob.glob(corr_cov_glob)) cov_filelist=sorted(glob.glob(cov_glob)) outdir="covplots_for_fig3" subprocess.check_call("mkdir -p %s"%(outdir,),shell=True) paramlist=[] if add_ctrls: ctrls_yvals=[] nr_of_ctrls=0 for corrcovfile,covfile in zip(corr_cov_filelist,cov_filelist):#,corr_cov2_filelist): assert str(Path(corrcovfile).parent).split("/")[-1] == str(Path(covfile).parent).split("/")[-1] samplename=str(Path(covfile).parent).split("/")[-1] if samplename in ["BD09PE001","CTR2PE001","CTR3PE001","CTRPE001","DH01PE001","LDXXPE001","SM05PE001"]: nr_of_ctrls+=1 df_corr=pd.read_csv(corrcovfile) y_corr=[y-df_corr["averagecoverage"].mean() for y in df_corr["averagecoverage"].values] ctrls_yvals.append(y_corr) ctrls_yvals_mins=[] ctrls_yvals_maxs=[] for index in range(len(ctrls_yvals[0])): ctrls_yvals_mins.append(max([x[index] for x in ctrls_yvals])) ctrls_yvals_maxs.append(min([x[index] for x in ctrls_yvals])) for corrcovfile,covfile in zip(corr_cov_filelist,cov_filelist):#,corr_cov2_filelist): assert str(Path(corrcovfile).parent).split("/")[-1] == str(Path(covfile).parent).split("/")[-1] samplename=str(Path(covfile).parent).split("/")[-1] sampledict={"sample":samplename} df_uncorr=pd.read_csv(covfile) df_uncorr=df_uncorr[["bin","averagecoverage"]] df_corr=pd.read_csv(corrcovfile) df_corr=df_corr[["bin","averagecoverage"]] central_bin_x_values=[x*avg_centersize-avg_centersize/2 for x in [0.05,0.175,0.5,0.825,0.95]] y_corr=[y-df_corr["averagecoverage"].mean() for y in df_corr["averagecoverage"].values] # y_corr=[y for y in df_corr["averagecoverage"].values] # todo note # y_corr2=[y-df_corr2["averagecoverage"].mean() for y in df_corr2["averagecoverage"].values] mean_uncorr=np.mean(df_uncorr["averagecoverage"].values) y_uncorr=[y-mean_uncorr for y in df_uncorr["averagecoverage"].values] intercept=modeled_values_table[modeled_values_table["sample"]==samplename]["intercept"].values[0] y_corr=[y-intercept for y in df_corr["averagecoverage"].values] x=list(np.arange(-extdto+(bs/2)-avg_centersize/2,(bs/2)-avg_centersize/2,bs))+central_bin_x_values+list(np.arange(avg_centersize/2+(bs/2),extdto+avg_centersize/2+(bs/2),bs)) assert len(x)==len(y_corr) assert len(y_corr)==len(y_uncorr) labels=list(df_uncorr.iloc[list(range(0,len(y_corr),50)),:]["bin"].values) plt.plot(x,y_corr, alpha=0.8,color="darkblue", linewidth=4, label="Corrected coverage")#, label="Position-wise mean of normalized coverage") # plt.plot(x,y_uncorr, marker="x",alpha=0.25, color="darkred", linewidth=4, label="Uncorrected coverage") if add_ctrls: plt.gca().fill_between(x,ctrls_yvals_mins,ctrls_yvals_maxs, color="grey", alpha=0.3, label="Range of controls, n=%s"%(nr_of_ctrls,)) # plt.legend(loc='upper right',fontsize=10) # plt.title(samplename, fontsize=45) ttl = plt.gca().title ttl.set_position([.5, 1.05]) # plt.xlabel("Distance to center of RoI [bp]", fontsize=40, labelpad=15) if use_fixed_scale: # plt.ylim(ymin,ymax) # plt.yticks([round(x/100,2) for x in range(ymin*100,ymax*100+1,10)],[round(x/100,2) for x in range(-75,21,10)]) if tissuetype=="universal": plt.ylim(-0.75,0.30) plt.yticks([round(x/100,2) for x in range(-75,31,10)],[round(x/100,2) for x in range(-75,31,10)]) plt.gca().set_yticks([round(x/100,2) for x in range(-75,31,5)],minor=True) if tissuetype=="haem": ymax=0.1 ymin=-0.35 plt.ylim(-0.35,0.1) # plt.yticks([round(x/100,2) for x in range(int(ymin*100),int(ymax*100)+1,10)],[round(x/100,2) for x in range(int(ymin*100),int(ymax*100)+1,10)]) # plt.yticks([-0.3,-0.2,-0.1,0,0.1],[-0.3,-0.2,-0.1,0,0.1]) plt.yticks([-0.2,0]) #plt.gca().set_yticks([round(x/100,2) for x in range(int(ymin*100),int(ymax*100)+1,5)],minor=True) if tissuetype=="ewing": plt.ylim(-0.1,0.045) plt.yticks([-0.1,0]) # plt.yticks([round(x/100,2) for x in range(-14,6,2)],[round(x/100,2) for x in range(-14,6,2)]) # plt.gca().set_yticks([round(x/100,2) for x in range(-14,6,1)],minor=True) if tissuetype=="low_x": plt.ylim(-0.7,0.5) # plt.yticks([round(x/100,2) for x in range(-14,6,2)],[round(x/100,2) for x in range(-14,6,2)]) # plt.gca().set_yticks([round(x/100,2) for x in range(-14,6,1)],minor=True) #plt.grid(which='both') plt.xlim(-15000,15000) # plt.ylim(-0.1,0.025) # plt.yticks([-0.1,-0.075,-0.05,-0.025,0,0.025]) plt.xticks([-extdto,0,extdto],["\n-15 kb","\nCenter","\n15 kb"]) else: plt.xlabel("Distance from center of RoI [bp]") plt.ylabel("Normalized coverage") fig=plt.gcf() fig.set_size_inches(10,10) plt.tight_layout() fig.savefig(outdir+"/"+samplename+"_dip_w_ctrls.pdf", ) plt.close()