######################################## ### recentrifuge CPB DNA-seq samples ### ######################################## #run on bufo_Mint conda activate recentrifuge cd /DATA/markop mkdir CPB_rcf_DNAseq cd CPB_rcf_DNAseq #made a softlink "input" for "/NFSd-FITO_ws/markop/beetle_metatranscriptomics/_fromBUFO2/CPB_gut_metagenome_DNA-seq/" #retaxdump #downloaded https://drive.google.com/uc?id=1UqDo_ggzKzD35ZHkQGUldd__jVmbHI_R&export=download # RI0000 used as control sample although it's not a blank control #excluding taxa assigned to viridiplantae (taxid 33090), metazoa = multicellular animals (taxid 33208), Phix174microvirus (taxid 1910954), unclassified sequences (taxid 12908) and other sequences (taxid 28384). rcf \ -n ./taxdmp \ -c 1 \ -f ./input/RI0000_centrifuge \ -f ./input/RI0325_centrifuge \ -f ./input/RI0326_centrifuge \ -f ./input/RI0327_centrifuge \ -f ./input/RI0328_centrifuge \ -f ./input/RI0329_centrifuge \ -f ./input/RI0330_centrifuge \ -f ./input/RI0331_centrifuge \ -f ./input/RI0332_centrifuge \ -f ./input/RI0333_centrifuge \ -x 33090 -x 33208 -x 1910954 -x 12908 -x 28384 \ -o CPB-DNAseq_RI0000asCTRL.rcf.html # no control! #excluding taxa assigned to viridiplantae (taxid 33090), metazoa = multicellular animals (taxid 33208), Phix174microvirus (taxid 1910954), unclassified sequences (taxid 12908) and other sequences (taxid 28384). rcf \ -n ./taxdmp \ -c 0 \ -f ./input/RI0325_centrifuge \ -f ./input/RI0326_centrifuge \ -f ./input/RI0327_centrifuge \ -f ./input/RI0328_centrifuge \ -f ./input/RI0329_centrifuge \ -f ./input/RI0330_centrifuge \ -f ./input/RI0331_centrifuge \ -f ./input/RI0332_centrifuge \ -f ./input/RI0333_centrifuge \ -x 33090 -x 33208 -x 1910954 -x 12908 -x 28384 \ -o CPB-DNAseq_noCTRL.rcf.html ############### ### RCF cut-off to retain only high-scored taxa (minscore 75 as in the Recentrifuge publication Fig S7) # RI0000 used as control sample although it's not a blank control #excluding taxa assigned to viridiplantae (taxid 33090), metazoa = multicellular animals (taxid 33208), Phix174microvirus (taxid 1910954), unclassified sequences (taxid 12908) and other sequences (taxid 28384). rcf \ -n ./taxdmp \ -c 1 \ -f ./input/RI0000_centrifuge \ -f ./input/RI0325_centrifuge \ -f ./input/RI0326_centrifuge \ -f ./input/RI0327_centrifuge \ -f ./input/RI0328_centrifuge \ -f ./input/RI0329_centrifuge \ -f ./input/RI0330_centrifuge \ -f ./input/RI0331_centrifuge \ -f ./input/RI0332_centrifuge \ -f ./input/RI0333_centrifuge \ -x 33090 -x 33208 -x 1910954 -x 12908 -x 28384 \ --minscore 75 \ -o CPB-DNAseq_RI0000asCTRL_cut75.rcf.html # no control! #excluding taxa assigned to viridiplantae (taxid 33090), metazoa = multicellular animals (taxid 33208), Phix174microvirus (taxid 1910954), unclassified sequences (taxid 12908) and other sequences (taxid 28384). rcf \ -n ./taxdmp \ -c 0 \ -f ./input/RI0325_centrifuge \ -f ./input/RI0326_centrifuge \ -f ./input/RI0327_centrifuge \ -f ./input/RI0328_centrifuge \ -f ./input/RI0329_centrifuge \ -f ./input/RI0330_centrifuge \ -f ./input/RI0331_centrifuge \ -f ./input/RI0332_centrifuge \ -f ./input/RI0333_centrifuge \ -x 33090 -x 33208 -x 1910954 -x 12908 -x 28384 \ --minscore 75 \ -o CPB-DNAseq_noCTRL_cut75.rcf.html cp ./input/*.fq ./fq/ rm ./fq/RI0000*.fq #################### # extract Serratia marcensens (taxid 615)-assigned reads for de-novo assembly of it's genome #################### rextract \ -n ./taxdmp \ -f ./input/RI0325_centrifuge \ -i 615 \ -1 ./fq/RI0325_1.fq \ -2 ./fq/RI0325_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0326_centrifuge \ -i 615 \ -1 ./fq/RI0326_1.fq \ -2 ./fq/RI0326_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0327_centrifuge \ -i 615 \ -1 ./fq/RI0327_1.fq \ -2 ./fq/RI0327_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0328_centrifuge \ -i 615 \ -1 ./fq/RI0328_1.fq \ -2 ./fq/RI0328_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0329_centrifuge \ -i 615 \ -1 ./fq/RI0329_1.fq \ -2 ./fq/RI0329_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0330_centrifuge \ -i 615 \ -1 ./fq/RI0330_1.fq \ -2 ./fq/RI0330_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0331_centrifuge \ -i 615 \ -1 ./fq/RI0331_1.fq \ -2 ./fq/RI0331_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0332_centrifuge \ -i 615 \ -1 ./fq/RI0332_1.fq \ -2 ./fq/RI0332_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0333_centrifuge \ -i 615 \ -1 ./fq/RI0333_1.fq \ -2 ./fq/RI0333_2.fq #################### # extract Lactococcus lactis (taxid 1358)-assigned reads for de-novo assembly of it's genome #################### rextract \ -n ./taxdmp \ -f ./input/RI0325_centrifuge \ -i 1358 \ -1 ./fq/RI0325_1.fq \ -2 ./fq/RI0325_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0326_centrifuge \ -i 1358 \ -1 ./fq/RI0326_1.fq \ -2 ./fq/RI0326_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0327_centrifuge \ -i 1358 \ -1 ./fq/RI0327_1.fq \ -2 ./fq/RI0327_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0328_centrifuge \ -i 1358 \ -1 ./fq/RI0328_1.fq \ -2 ./fq/RI0328_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0329_centrifuge \ -i 1358 \ -1 ./fq/RI0329_1.fq \ -2 ./fq/RI0329_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0330_centrifuge \ -i 1358 \ -1 ./fq/RI0330_1.fq \ -2 ./fq/RI0330_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0331_centrifuge \ -i 1358 \ -1 ./fq/RI0331_1.fq \ -2 ./fq/RI0331_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0332_centrifuge \ -i 1358 \ -1 ./fq/RI0332_1.fq \ -2 ./fq/RI0332_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0333_centrifuge \ -i 1358 \ -1 ./fq/RI0333_1.fq \ -2 ./fq/RI0333_2.fq #################### # extract Enterococcus gallinarum (taxid 1353)-assigned reads for de-novo assembly of it's genome #################### rextract \ -n ./taxdmp \ -f ./input/RI0325_centrifuge \ -i 1353 \ -1 ./fq/RI0325_1.fq \ -2 ./fq/RI0325_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0326_centrifuge \ -i 1353 \ -1 ./fq/RI0326_1.fq \ -2 ./fq/RI0326_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0327_centrifuge \ -i 1353 \ -1 ./fq/RI0327_1.fq \ -2 ./fq/RI0327_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0328_centrifuge \ -i 1353 \ -1 ./fq/RI0328_1.fq \ -2 ./fq/RI0328_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0329_centrifuge \ -i 1353 \ -1 ./fq/RI0329_1.fq \ -2 ./fq/RI0329_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0330_centrifuge \ -i 1353 \ -1 ./fq/RI0330_1.fq \ -2 ./fq/RI0330_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0331_centrifuge \ -i 1353 \ -1 ./fq/RI0331_1.fq \ -2 ./fq/RI0331_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0332_centrifuge \ -i 1353 \ -1 ./fq/RI0332_1.fq \ -2 ./fq/RI0332_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0333_centrifuge \ -i 1353 \ -1 ./fq/RI0333_1.fq \ -2 ./fq/RI0333_2.fq #################### # extract Enterobacter cloacae (taxid 550)-assigned reads for de-novo assembly of it's genome #################### rextract \ -n ./taxdmp \ -f ./input/RI0325_centrifuge \ -i 550 \ -1 ./fq/RI0325_1.fq \ -2 ./fq/RI0325_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0326_centrifuge \ -i 550 \ -1 ./fq/RI0326_1.fq \ -2 ./fq/RI0326_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0327_centrifuge \ -i 550 \ -1 ./fq/RI0327_1.fq \ -2 ./fq/RI0327_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0328_centrifuge \ -i 550 \ -1 ./fq/RI0328_1.fq \ -2 ./fq/RI0328_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0329_centrifuge \ -i 550 \ -1 ./fq/RI0329_1.fq \ -2 ./fq/RI0329_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0330_centrifuge \ -i 550 \ -1 ./fq/RI0330_1.fq \ -2 ./fq/RI0330_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0331_centrifuge \ -i 550 \ -1 ./fq/RI0331_1.fq \ -2 ./fq/RI0331_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0332_centrifuge \ -i 550 \ -1 ./fq/RI0332_1.fq \ -2 ./fq/RI0332_2.fq rextract \ -n ./taxdmp \ -f ./input/RI0333_centrifuge \ -i 550 \ -1 ./fq/RI0333_1.fq \ -2 ./fq/RI0333_2.fq ## delete copied *.fq files and gzip all extracted *.fastq files rm ./fq/*.fq pigz -p 32 ./fq/*.fastq