1 Star 5 Fork 3

Vincent.Gao / 甲基化生信分析pipeline

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
manhattan.py 2.90 KB
AI 代码解读
一键复制 编辑 原始数据 按行查看 历史
Vincent.Gao 提交于 2022-01-26 17:37 . 更新文件
# -*- coding:utf-8 -*-
import os
import sys
import shelve
import matplotlib.pyplot as plt
import numpy as np
in_file = sys.argv[1]
out_file = sys.argv[2]
plot_file = sys.argv[3]
help_dict = {"10%":0,"20%":0,"30%":0,"40%":0,"50%":0,"60%":0,"70%":0,"80%":0,"90%":0,"100%":0} # 按区间统计信息
with shelve.open("/home/DUAN/methylation/data_files/dic") as dbs: # 读取已经提取过的refseq信息,按每个染色体存储基因的区间
new_dic = dbs["dic"]
with open(in_file) as f,open(out_file,"w") as w:
w.write("SNP"+"\t"+"CHR"+"\t"+"BP"+"\t"+"P"+"\n")
counter = 0
dic = {"chr1":1,"chr2":2,"chr3":3,"chr4":4,"chr5":5,"chr6":6,"chr7":7,"chr8":8,"chr9":9,"chr10":10,"chr11":11,"chr12":12,"chr13":13,"chr14":14,"chr15":15,"chr16":16,"chr17":17,"chr18":18,"chr19":19,"chr20":20,"chr21":21,"chr22":22,"chrX":23,"chrY":24}
for i in f: # 开始遍历dml位点
if i.split("\t")[0].strip('"') == 'chr':
pass
else:
if i.split("\t")[0].strip('"') not in list(dic.keys()): # 如果位点不在1-24染色体上,则跳过
continue
else:
SNP = str(counter)
chr_ori = i.split("\t")[0].strip('"')
CHR = str(dic[i.split("\t")[0].strip('"')])
BP = i.split("\t")[1]
P = i.split("\t")[9]
for pos in new_dic[chr_ori]:
if int(BP) in pos:
v = (int(BP) - list(pos)[0]) / len(list(pos)) # 开始计算位点在基因的位置
if 0<=v<0.1:
help_dict["10%"] +=1
elif 0.1<=v<0.2:
help_dict["20%"] +=1
elif 0.2<=v<0.3:
help_dict["30%"] +=1
elif 0.3<=v<0.4:
help_dict["40%"] +=1
elif 0.4<=v<0.5:
help_dict["50%"] +=1
elif 0.5<=v<0.6:
help_dict["60%"] +=1
elif 0.6<=v<0.7:
help_dict["70%"] +=1
elif 0.7<=v<0.8:
help_dict["80%"] +=1
elif 0.8<=v<0.9:
help_dict["90%"] +=1
else:
help_dict["100%"] +=1
break
if float(P) < 1e-30: # 将小于1e-30的pvalue归到1e-30
P = str(1e-30)
counter+=1
w.write(SNP+"\t"+CHR+"\t"+BP+"\t"+P+"\n")
y = list(help_dict.values())[1:11]
x = list(help_dict.keys())[1:11]
plt.plot(x,y)
plt.xlabel('gene position sliced to ten equal parts')
plt.ylabel('methylation counts in certain area')
ax = plt.gca()
ax.axes.yaxis.set_ticks([])
my_y_ticks = np.arange(0, 10000, 50)
plt.yticks(my_y_ticks)
plt.show()
plt.savefig(plot_file)
Python
1
https://gitee.com/duangao/methylation.git
git@gitee.com:duangao/methylation.git
duangao
methylation
甲基化生信分析pipeline
test

搜索帮助