### FieldTwo.batch ### Review before running: make.contigs(file=FieldTwo.files, processors=6) ### Chris added several summary.seqs to check outputs in the logfile ### Chris added the two commands below (they were previously commented out as below) and changed the start and end pcr.seqs(fasta=silva.bacteria.fasta, start=13862, end=23444, keepdots=F) system(mv silva.bacteria.pcr.fasta silva.v4.fasta) ### Process the silva data. ### trim inputted sequences to start and end specifications, won’t keep unknown bases system(/bin/echo "starting silva pcr.seqs `date`" >>/dev/stderr) pcr.seqs(fasta=silva.bacteria.fasta, start=13862, end=23444, keepdots=F) ### Rename the files system(/bin/echo "starting silva mv `date`" >>/dev/stderr) system(mv silva.bacteria.pcr.fasta silva.v4.fasta) ### 1 Summary of the silva data. system(/bin/echo "starting silva summary.seqs `date`" >>/dev/stderr) summary.seqs(fasta=silva.v4.fasta) system(/bin/ls -ltr > mothur-output-files-1.txt) #quit() ### Process the field data. ### original files split into paired forward and reverse complementary sequences system(/bin/echo "starting make.contigs `date`" >>/dev/stderr) make.contigs(file=FieldTwo2012.files, processors=8) ### 2 Summarizes raw data w/ mean/median/lengths etc system(/bin/echo "starting summary.seqs `date`" >>/dev/stderr) summary.seqs(fasta=current) system(/bin/ls -ltr > mothur-output-files-2.txt) #quit() ### 3 filters sequences to specifications: maxambig removes ambiguity, maxlength size system(/bin/echo "starting screen.seqs `date`" >>/dev/stderr) screen.seqs(fasta=FieldTwo2012.trim.contigs.fasta, group=FieldTwo2012.contigs.groups, summary=FieldTwo2012.trim.contigs.summary, maxambig=0, maxlength=275, processors=8) system(/bin/ls -ltr > mothur-output-files-3.txt) #quit() ### 4 identifies identical sequences and merges them system(/bin/echo "starting unique.seqs `date`" >>/dev/stderr) unique.seqs(fasta=FieldTwo2012.trim.contigs.good.fasta) system(/bin/ls -ltr > mothur-output-files-4.txt) #4quit() ### 5 counts the number of sequences system(/bin/echo "starting count.seqs `date`" >>/dev/stderr) count.seqs(name=FieldTwo2012.trim.contigs.good.names, group=FieldTwo2012.contigs.good.groups, processors=8) ### re-summarizes data up to current point system(/bin/echo "starting summary.seqs `date`" >>/dev/stderr) summary.seqs(count=FieldTwo2012.trim.contigs.good.count_table, fasta=FieldTwo2012.trim.contigs.good.unique.fasta, processors=8) system(/bin/ls -ltr > mothur-output-files-5.txt) #quit() ### 6 compares the sequences to a template file system(/bin/echo "starting align.seqs `date`" >>/dev/stderr) align.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.fasta, reference=silva.v4.fasta, processors=8) ### another summary of current sequences system(/bin/echo "starting summary.seqs `date`" >>/dev/stderr) summary.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.align, count=FieldTwo2012.trim.contigs.good.count_table, processors=8) system(/bin/ls -ltr > mothur-output-files-6.txt) #quit() ### 7 identifies/removes bad alignments to specifications of where they start/end ### maxhomop defines how many single base repeats before it is considered junk system(/bin/echo "starting screen.seqs `date`" >>/dev/stderr) screen.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.align, count=FieldTwo2012.trim.contigs.good.count_table, start=8, end=9582, maxhomop=8, processors=8) ### resummarizes system(/bin/echo "starting summary.seqs `date`" >>/dev/stderr) summary.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.good.align, count=FieldTwo2012.trim.contigs.good.good.count_table, processors=8) system(/bin/ls -ltr > mothur-output-files-7.txt) #quit() ### 8 ignores columns from alignment to specifications, default=column with all . ignored ### trump will ignore any column containing one of those characters system(/bin/echo "starting filter.seqs `date`" >>/dev/stderr) filter.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.good.align, vertical=T, trump=., processors=8) system(/bin/ls -ltr > mothur-output-files-8.txt) #quit() ### 9 merge any sequences with 2 or fewer nucleotide differences ### summarizes how many sequences currently and how many removed system(/bin/echo "starting pre.cluster `date`" >>/dev/stderr) pre.cluster(fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.fasta, count=FieldTwo2012.trim.contigs.good.good.count_table, diffs=2, processors=1) system(/bin/ls -ltr > mothur-output-files-9.txt) #quit() ### 10 again merges redundant sequences system(/bin/echo "starting unique.seqs `date`" >>/dev/stderr) unique.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.fasta, count=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.count_table) system(/bin/ls -ltr > mothur-output-files-10.txt) #quit() ### 11 removes chimeric sequences from the count(cDNA from two transcripts) system(/bin/echo "starting chimera.uchime `date`" >>/dev/stderr) chimera.uchime(fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.fasta, count=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.count_table, dereplicate=t, processors=8) system(/bin/ls -ltr > mothur-output-files-11.txt) #quit() ### 12 removes the previous sequences from the fasta system(/bin/echo "starting remove.seqs `date`" >>/dev/stderr) remove.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.fasta, accnos=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.uchime.accnos) ### summarizes the current sequences system(/bin/echo "starting summary.seqs `date`" >>/dev/stderr) summary.seqs(fasta=current, count=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.uchime.pick.count_table) system(/bin/ls -ltr > mothur-output-files-12.txt) #quit() ### 13 matches the sequences to taxonomy data system(/bin/echo "starting classify.seqs `date`" >>/dev/stderr) classify.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.fasta, count=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.uchime.pick.count_table, reference=trainset9_032012.pds.fasta, taxonomy=trainset9_032012.pds.tax, cutoff=80, processors=8) ### makes a new file without any of the sequences from the listed groups ### isolates the bacterial sequences system(/bin/echo "starting remove.lineage `date`" >>/dev/stderr) remove.lineage(fasta=current, count=current, taxonomy=current, taxon=Chloroplast-Mitochondria-unknown-Archaea-Eukaryota) system(/bin/ls -ltr > mothur-output-files-13.txt) #quit() ###Mock is meant to get error analysis. Mock was never added and error never calc.? system(/bin/echo "starting remove.groups `date`" >>/dev/stderr) remove.groups(count=FieldTwo.trim.contigs.good.unique.good.filter.precluster.unique.uchime.pick.pick.count_table, fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.pick.fasta, taxonomy=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.pds.wang.pick.taxonomy, groups=Mock) ### 14 splits data into OTUs, separates into 4th taxonomy level system(/bin/echo "starting cluster.split `date`" >>/dev/stderr) cluster.split(fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.pick.fasta,count=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.uchime.pick.pick.count_table, taxonomy=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.pds.wang.pick.taxonomy, splitmethod=classify, taxlevel=5,large=T, cutoff=0.15, processors=2) system(/bin/ls -ltr > mothur-output-files-14.txt) #quit() # and send email ### will tell us how many seq. in each OTU from each group system(/bin/echo "starting make.shared `date`" >>/dev/stderr) make.shared(list=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.pick.an.unique_list.list, count=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.uchime.pick.pick.count_table, label=0.03) ### organizes otu data into a consensus taxonomy system(/bin/echo "starting classify.otu `date`" >>/dev/stderr) classify.otu(list=current, count=current, taxonomy=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.pds.wang.pick.taxonomy, label=0.03) ### organizes data into phylotypes based on their taxonomic consensus system(/bin/echo "starting phylotype `date`" >>/dev/stderr) phylotype(taxonomy=current) ### gives us the genus level data system(/bin/echo "starting make.shared `date`" >>/dev/stderr) make.shared(list=current, count=current, label=1) ### organizes the phylotypes into OTUs system(/bin/echo "starting classify.otu `date`" >>/dev/stderr) classify.otu(list=current, count=current, taxonomy=current, label=1) ### summarizes the current sequences, added by charliep 22 September 2013 system(/bin/echo "starting summary.seqs `date`" >>/dev/stderr) summary.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.pick.fasta, count=current) system(/bin/ls -ltr > mothur-output-files-15.txt) quit()