### FieldTwo.batch  
### Review before running: make.contigs(file=FieldTwo.files, processors=6) 
### Chris added several summary.seqs to check outputs in the logfile
### Chris added the two commands below (they were previously commented out as below) and changed the start and end
pcr.seqs(fasta=silva.bacteria.fasta, start=13862, end=23444, keepdots=F)
system(mv silva.bacteria.pcr.fasta silva.v4.fasta)


### Process the silva data.  
### trim inputted sequences to start and end specifications, won’t keep unknown bases
system(/bin/echo  "starting silva pcr.seqs `date`" >>/dev/stderr)
pcr.seqs(fasta=silva.bacteria.fasta, start=13862, end=23444, keepdots=F)


### Rename the files
system(/bin/echo  "starting silva mv `date`" >>/dev/stderr)
system(mv silva.bacteria.pcr.fasta silva.v4.fasta)

### 1 Summary of the silva data.
system(/bin/echo  "starting silva summary.seqs `date`" >>/dev/stderr)
summary.seqs(fasta=silva.v4.fasta)

system(/bin/ls -ltr > mothur-output-files-1.txt)
#quit()

### Process the field data.
### original files split into paired forward and reverse complementary sequences
system(/bin/echo  "starting make.contigs `date`" >>/dev/stderr)
make.contigs(file=FieldTwo2012.files, processors=8) 

### 2 Summarizes raw data w/ mean/median/lengths etc
system(/bin/echo  "starting summary.seqs `date`" >>/dev/stderr)
summary.seqs(fasta=current)

system(/bin/ls -ltr > mothur-output-files-2.txt)
#quit()

### 3 filters sequences to specifications: maxambig removes ambiguity, maxlength size
system(/bin/echo  "starting screen.seqs `date`" >>/dev/stderr)
screen.seqs(fasta=FieldTwo2012.trim.contigs.fasta, group=FieldTwo2012.contigs.groups, summary=FieldTwo2012.trim.contigs.summary, maxambig=0, maxlength=275, processors=8)

system(/bin/ls -ltr > mothur-output-files-3.txt)
#quit()

### 4 identifies identical sequences and merges them
system(/bin/echo  "starting unique.seqs `date`" >>/dev/stderr)
unique.seqs(fasta=FieldTwo2012.trim.contigs.good.fasta)

system(/bin/ls -ltr > mothur-output-files-4.txt)
#4quit()

### 5 counts the number of sequences
system(/bin/echo  "starting count.seqs `date`" >>/dev/stderr)
count.seqs(name=FieldTwo2012.trim.contigs.good.names, group=FieldTwo2012.contigs.good.groups, processors=8)

### re-summarizes data up to current point
system(/bin/echo  "starting summary.seqs `date`" >>/dev/stderr)
summary.seqs(count=FieldTwo2012.trim.contigs.good.count_table, fasta=FieldTwo2012.trim.contigs.good.unique.fasta, processors=8)

system(/bin/ls -ltr > mothur-output-files-5.txt)
#quit()

### 6 compares the sequences to a template file
system(/bin/echo  "starting align.seqs `date`" >>/dev/stderr)
align.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.fasta, reference=silva.v4.fasta, processors=8)
        
### another summary of current sequences
system(/bin/echo  "starting summary.seqs `date`" >>/dev/stderr)
summary.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.align, count=FieldTwo2012.trim.contigs.good.count_table, processors=8)
        
system(/bin/ls -ltr > mothur-output-files-6.txt)
#quit()

### 7 identifies/removes bad alignments to specifications of where they start/end
### maxhomop defines how many single base repeats before it is considered junk
system(/bin/echo  "starting screen.seqs `date`" >>/dev/stderr)
screen.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.align, count=FieldTwo2012.trim.contigs.good.count_table, start=8, end=9582, maxhomop=8, processors=8)
        
### resummarizes
system(/bin/echo  "starting summary.seqs `date`" >>/dev/stderr)
summary.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.good.align, count=FieldTwo2012.trim.contigs.good.good.count_table, processors=8)

system(/bin/ls -ltr > mothur-output-files-7.txt)
#quit()
        
### 8 ignores columns from alignment to specifications, default=column with all . ignored
### trump will ignore any column containing one of those characters
system(/bin/echo  "starting filter.seqs `date`" >>/dev/stderr)
filter.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.good.align, vertical=T, trump=., processors=8)

system(/bin/ls -ltr > mothur-output-files-8.txt)
#quit()
        
### 9 merge any sequences with 2 or fewer nucleotide differences
### summarizes how many sequences currently and how many removed
system(/bin/echo  "starting pre.cluster `date`" >>/dev/stderr)
pre.cluster(fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.fasta, count=FieldTwo2012.trim.contigs.good.good.count_table, diffs=2, processors=1)

system(/bin/ls -ltr > mothur-output-files-9.txt)
#quit()
        
### 10 again merges redundant sequences
system(/bin/echo  "starting unique.seqs `date`" >>/dev/stderr)
unique.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.fasta, count=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.count_table)

system(/bin/ls -ltr > mothur-output-files-10.txt)
#quit()

### 11 removes chimeric sequences from the count(cDNA from two transcripts)
system(/bin/echo  "starting chimera.uchime `date`" >>/dev/stderr)
chimera.uchime(fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.fasta, count=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.count_table, dereplicate=t, processors=8)
        
system(/bin/ls -ltr > mothur-output-files-11.txt)
#quit()

### 12 removes the previous sequences from the fasta
system(/bin/echo  "starting remove.seqs `date`" >>/dev/stderr)
remove.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.fasta, accnos=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.uchime.accnos)
        
### summarizes the current sequences 
system(/bin/echo  "starting summary.seqs `date`" >>/dev/stderr)
summary.seqs(fasta=current, count=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.uchime.pick.count_table)

system(/bin/ls -ltr > mothur-output-files-12.txt)
#quit()

### 13 matches the sequences to taxonomy data
system(/bin/echo  "starting classify.seqs `date`" >>/dev/stderr)
classify.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.fasta, count=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.uchime.pick.count_table, reference=trainset9_032012.pds.fasta, taxonomy=trainset9_032012.pds.tax, cutoff=80, processors=8)
        
### makes a new file without any of the sequences from the listed groups
### isolates the bacterial sequences
system(/bin/echo  "starting remove.lineage `date`" >>/dev/stderr)
remove.lineage(fasta=current, count=current, taxonomy=current, taxon=Chloroplast-Mitochondria-unknown-Archaea-Eukaryota)
        
system(/bin/ls -ltr > mothur-output-files-13.txt)
#quit()


###Mock is meant to get error analysis. Mock was never added and error never calc.? 
system(/bin/echo  "starting remove.groups `date`" >>/dev/stderr)
remove.groups(count=FieldTwo.trim.contigs.good.unique.good.filter.precluster.unique.uchime.pick.pick.count_table, fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.pick.fasta, taxonomy=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.pds.wang.pick.taxonomy, groups=Mock)


### 14 splits data into OTUs, separates into 4th taxonomy level
system(/bin/echo  "starting cluster.split `date`" >>/dev/stderr)
cluster.split(fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.pick.fasta,count=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.uchime.pick.pick.count_table, taxonomy=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.pds.wang.pick.taxonomy, splitmethod=classify, taxlevel=5,large=T, cutoff=0.15, processors=2)

system(/bin/ls -ltr > mothur-output-files-14.txt)
#quit()

# and send email
        
### will tell us how many seq. in each OTU from each group
system(/bin/echo  "starting make.shared `date`" >>/dev/stderr)
make.shared(list=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.pick.an.unique_list.list, count=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.uchime.pick.pick.count_table, label=0.03)
        
### organizes otu data into a consensus taxonomy
system(/bin/echo  "starting classify.otu `date`" >>/dev/stderr)
classify.otu(list=current, count=current, taxonomy=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.pds.wang.pick.taxonomy, label=0.03)
        
### organizes data into phylotypes based on their taxonomic consensus
system(/bin/echo  "starting phylotype `date`" >>/dev/stderr)
phylotype(taxonomy=current)
        
### gives us the genus level data
system(/bin/echo  "starting make.shared `date`" >>/dev/stderr)
make.shared(list=current, count=current, label=1)
        
### organizes the phylotypes into OTUs
system(/bin/echo  "starting classify.otu `date`" >>/dev/stderr)
classify.otu(list=current, count=current, taxonomy=current, label=1)


### summarizes the current sequences, added by charliep 22 September 2013
system(/bin/echo  "starting summary.seqs `date`" >>/dev/stderr)
summary.seqs(fasta=FieldTwo2012.trim.contigs.good.unique.good.filter.precluster.unique.pick.pick.fasta, count=current)

system(/bin/ls -ltr > mothur-output-files-15.txt)
quit()