-
Notifications
You must be signed in to change notification settings - Fork 53
常用命令
Kun Wang edited this page Aug 15, 2021
·
20 revisions
/project/software/minimap2/minimap2 -x map-ont -I 40G -d lungfish.mmi lungfish.fa
/project/software/minimap2/minimap2 -t 60 -a /project/lungfish/10.3rd.2round/result.next/lungfish.mmi /project/split_reads/ref0.fa | /project/software/samtools/samtools sort --threads 10 -o /project/lungfish/11.3rd.3round/bam/ref0.bam -
~/software/jellyfish/jellyfish-2.2.6/bin/jellyfish count -C -U 200000 -m 21 -s 2000000000 -t 20 ./clean/*.clean -o clean_reads_200k_21.jf
~/software/jellyfish/jellyfish-2.2.9-linux-static count -C -U 200000 -m 39 -s 2000000000 -t 16 -o clean_reads_200k_39.jf <(zcat ../reads/*.gz) # 如果需要用gz格式的reads,可以在zsh下面直接运行该命令
~/software/jellyfish/jellyfish-2.2.6/bin/jellyfish histo -h 200000 -t 20 clean_reads_200k_21.jf > clean_reads_200k_21.histo
接下来使用genome scope来处理,这里需要注意的是,使用的k参数要和前面的一样。可以多设置几个kmer,比如从21 25 33到39。 上述参数中,-m指代kmer大小。
~/software/gce-1.0.0/kmerfreq/kmer_freq_hash/kmer_freq_hash -k 17 -l clean.list -g 120000000000 -t 12 2>kmerfreq.log
~/software/gce-1.0.0/gce -f output.freq.stat -g 91541345605 -m 1 -D 8 -c 30 -b 0 >species.table 2> species.log
运行参数要注意,可能需要进行调整。
bwa index stickleback.fa
bwa mem -t 12 -R '@RG ID:hadal01 SM:hadal01 LB:hadal01' stickleback.fa reads/hadal01.1.fq.gz reads/hadal01.2.fq.gz | samtools sort -O bam -T hadal01 -l 3 -o hadal01.bam -
/home/share/software/blast/ncbi-blast-2.4.0+/bin/makeblastdb -in goodProteins.fasta -out goodProteins.fasta -dbtype prot # 蛋白库
/home/share/software/blast/ncbi-blast-2.4.0+/bin/blastp -query opsin.fa -db goodProteins.fasta -out opsin2all.out -outfmt 0 -num_threads 16 # 蛋白比对蛋白
/home/share/software/blast/ncbi-blast-2.4.0+/bin/makeblastdb -in ds.fa -out ds.fa -dbtype nucl # 核酸库
/home/share/software/blast/ncbi-blast-2.4.0+/bin/tblastn -query opsin.fa -db ds.fa -out opsin2all.out -outfmt 0 -num_threads 16 # 蛋白比对核酸
genewise query.fa -nosplice_gtag -trev scaffold1145.fa -u 125973 -v 130818 -pretty -pseudo -gff -cdna -trans > query.gff # -trev是反链,-tfor是正链
~/software/fastp/fastp
-i F1-10_1.clean.fq -I F1-10_2.clean.fq
-o clean/F1-10.1.fq.gz -O clean/F1-10.2.fq.gz
-z 3
-j clean/F1-10.json -h clean/F1-10.html
-w 10 # -i -I为输入文件,-z为压缩等级,0为不压缩,9为最大, -o -O为输出文件,-j -h为统计文件,-w为使用的cpu数量
sortBed -i cds2genome.out.bed > cds2genome.out.bed.sort
mergeBed -i cds2genome.out.bed.sort > cds2genome.out.bed.merge
subtractBed -a all.bed -b subtract.bed > rest.bed
samtools faidx genome.fa #先建立索引
samtools faidx genome.fa scaffold214:1190219-1190943
samtools view -b -f 2 $bam mito_chr | java -jar /path/to/picard.jar SamToFastq INPUT=/dev/stdin FASTQ=$out.fq INCLUDE_NON_PRIMARY_ALIGNMENTS=false INTERLEAVE=true VALIDATION_STRINGENCY=LENIENT
mira -H 1 -F -i -c -r /path/to/mito_chr.fa -f $out.fq -m $out.result
lastdb -cR01 humdb humanMito.fa
lastal humdb fuguMito.fa > myalns.maf
~/software/multiz/multiz-tba.012109-build/maf_project wisent2cattle.maf cattle > wisent2cattle.swap.maf #调换maf里面的顺序,最主要是调整strand
sed -i "s/delete//" test.file
http://www.fishbrowser.org/software/L_RNA_scaffolder/
http://www.fishbrowser.org/software/PEP_scaffolder/
LDhat2
LDhelmet 适用于大群体
fastEPRR
LDJump 速度快,适用于小群体
https://github.com/PhHermann/LDJump
R code
rooted tree:
n=69;factorial(2*n-3)/(factorial(n-2)*2**(n-2))
unrooted tree:
n=69;factorial(2*n-5)/(factorial(n-3)*2**(n-3))
from: http://carrot.mcb.uconn.edu/mcb396_41/tree_number.html Strongly recommended course: http://carrot.mcb.uconn.edu/mcb396_41/
raxmlHPC-PTHREADS -T 10 -f a -s raw_12species.align.fa -n raw_12species.align.fa.phb -m GTRGAMMAI -x 271828 -N 100 -p 31415 -o AY488491.1,JQ235521.1,EF536351.1
https://cran.r-project.org/web/packages/qqman/vignettes/qqman.html
mlrho 评估杂合度,这个软件还能干很多事情,比如θ等。其输入文件可以为bam,流程如下
samtools view -b test.bam | samtools mpileup - | cut -f 2,5 | awk -f /home/share/users/yangyongzhi2012/tools/mlRho/MlRho_2.9/Scripts/bam2pro.awk | /home/share/users/yangyongzhi2012/tools/mlRho/FormatPro_0.5/formatPro
/home/share/users/yangyongzhi2012/tools/mlRho/MlRho_2.9/mlRho -M 0 -I
output:
d n theta (represent hete) epsilon (represent error) -log(L)
0 46722 1.70e-02<1.83e-02<1.97e-02 4.55e-03<4.74e-03<4.95e-03 8.25e+04
java -jar ~/software/picard/picard.jar CollectInsertSizeMetrics I=test.sort.bam O=insert_size_metrics.txt H=insert_size_histogram.pdf M=0.5
~/software/clustalo/clustalo-1.2.4-Ubuntu-x86_64 -i 01.extract.pl.fa.random.fa -o 01.extract.pl.fa.random.phy --outfmt=phy --infmt=fa # clustalo貌似不好使,必须要比对一遍
~/software/clustalw/clustalw-2.1-linux-x86_64-libcppstatic/clustalw2 -CONVERT -INFILE=01.extract.pl.fa.random.fa -OUTFILE=01.extract.pl.fa.random.phy -OUTPUT=PHYLIP
java -jar /home/share/users/wangkun2010/software/jmodeltest/jmodeltest2/dist/jModelTest.jar -d 01.extract.pl.fa.random.phy -o 01.extract.pl.fa.random.phy.jtest -BIC -AIC -AICc -s 11 -f -i
https://github.com/OpenGene/gencore https://mp.weixin.qq.com/s/KS6gu6ZwBbAzSdrAYPRvqg
mydf <- subset(mydf, select = -X ) # 其中X是一列的名字
> summary(a)
brain_dp heart_dp liver_dp intestine_dp
Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
1st Qu.:0.0400 1st Qu.:0.0500 1st Qu.:0.040 1st Qu.:0.0400
Median :0.1700 Median :0.1700 Median :0.150 Median :0.1500
Mean :0.1962 Mean :0.1991 Mean :0.181 Mean :0.1813
3rd Qu.:0.2900 3rd Qu.:0.2900 3rd Qu.:0.270 3rd Qu.:0.2600
Max. :1.5900 Max. :1.6000 Max. :1.600 Max. :1.5700
# 数据格式为一列一个数据
b=prcomp(a)
write.table(b$x,file="pca.txt")
a=read.table('01.combine.pl.out',header=T)
b=princomp(a)
write.table(b$loadings,file="pca.txt")
a=read.table('qn.out',header=T)
b=prcomp(t(a))
write.table(b$x,file="qn.pca.txt",quote=F)
c=(b$sdev^2/sum(b$sdev^2))
d=cumsum(b$sdev^2/sum(b$sdev^2))
proportion=data.frame(Proportion_of_Variance=c,Cumulative_Proportion=d)
write.table(proportion,file="qn.pca.txt.proportion",quote=F)
# 建树
library('ape')
b=1-cor(a,method="spearman")
c=bionj(b)
write.tree(c,file='out.tre')
sratools下载reads 下载地址 https://github.com/ncbi/sra-tools/wiki/Downloads /home/qiuqiang/software/sratools/sratoolkit.2.8.2-1-centos_linux64/bin/fastq-dump --gzip --split-3 SRR1344934
> a$individual=factor(a$individual,levels=c("bluefintuna", "zebrafish", "stickleback", "hadal02", "hadal03", "hadal01", "cod", "shallow", "fugu", "flatfish", "platyfish"))
> ggplot(a,aes(individual,hete))+geom_boxplot()
mmseqs2
diamond
ggplot(a,aes(V2,V3,color=V4))+geom_point()+labs(x="Correlation Co-efficient of AAs usage between MHS and Stickleback",y="Correlation Co-efficient of AAs usage between MHS and Tanaka's snailfish",color="SF vs TS")
qsub -l hostname=ca-node-0001 -cwd -pe smp 60 -q all.q -e /project/lungfish/15.bgiseq.2round/tasks.01.bwa.pl.sh/test1.err -o /project/lungfish/15.bgiseq.2round/tasks.01.bwa.pl.sh/test1.out /project/lungfish/15.bgiseq.2round/tasks.01.bwa.pl.sh/test1.shi
qsub -l hostname=ca-node-0002 -cwd -pe smp 60 -q all.q -e /project/lungfish/15.bgiseq.2round/tasks.01.bwa.pl.sh/test2.err -o /project/lungfish/15.bgiseq.2round/tasks.01.bwa.pl.sh/test2.out /project/lungfish/15.bgiseq.2round/tasks.01.bwa.pl.sh/test2.shi
orthofinder -f /path/to/fasta -S diamond -t 60 -a 60
~/software/spaln/spaln/bin/makdbs -KD hadal_nanopore.fa
~/software/spaln/spaln/bin/spaln -W hadal_nanopore.bkp -KP -Xk6 -Xb6000 -XG2000000 -Xg48 -Xs6 hadal_nanopore.fa
~/software/spaln/spaln/bin/spaln -Q7 -O0 -t5 -d hadal_nanopore query.fa > query.gff3
程序需要在database的目录运行
~/software/blat/blat db.fa query.fa -makeOoc=db.fa.ooc -t=dnax -q=prot out.psl # 建库
~/software/blat/blat db.fa query.fa -ooc=db.fa.ooc -t=dnax -q=prot out.psl # 比对
gmap_build -d ref -k 12 -D /project/ref ref.fa
gmapl -D /project/ref -d ref -k 12 -B 5 -t 60 -A query.fa > alignment.result
input=transcripts.fa
~/software/cdhit/cdhit/cd-hit -i $input -o $input.nr.fa -c 0.95 -aS 0.8 -T 32
input=transcripts.fa
~/software/transdecoder/TransDecoder-TransDecoder-v5.5.0/TransDecoder.LongOrfs -t $input; ~/software/transdecoder/TransDecoder-TransDecoder-v5.5.0/TransDecoder.Predict -t $input
https://pbil.univ-lyon1.fr/software/phyldog/
purge_haplotigs
https://bitbucket.org/mroachawri/purge_haplotigs
input="total.nr.pep";
output="total_nr_tetrapod";
~/miniconda3/bin/python ~/software/busco/busco/scripts/run_BUSCO.py -i $input -o $output -l /public/home/wangkun/software/busco/tetrapoda_odb9/ -m prot -c 64 -sp human
query=transcripts.pep
db=homolog.pep
~/software/diamond/diamond makedb --in $db -d $db --threads 32
~/software/diamond/diamond blastp -d $db -q $query -o $query.$db.out --threads 32
gencore 0.12.0
https://github.com/OpenGene/gencore
sub mean{
my @a=@_;
my ($sum,$num)=@a;
foreach my $x(@a){
$sum+=$x;
$num++;
}
my $mean=$sum/$num;
return($mean);
}
sub sd{
my @a=@_;
my $mean=&mean(@a);
my $numerator=0;
my $num=0;
foreach my $x(@a){
$numerator+=($x-$mean)**2;
$num++;
}
return("NA") if($num<2);
my $sd=$numerator/($num-1);
$sd=sqrt($sd);
return($sd);
}
bcftools mpileup -a AD -A -q 20 -Q 20 -f CaiJi.fa -r chr1A:1-100000 --ignore-RG -b bam.lst -d 400 | bcftools call -cv | gzip - > test.vcf.gz