www.pudn.com > DNA_Analysis.rar > DNA.java
/**
*
*/
package dna.sequences.analysis;
import java.io.*;
import java.util.*;
import jaligner.Alignment;
import jaligner.Sequence;
import jaligner.SmithWatermanGotoh;
import jaligner.formats.Pair;
import jaligner.matrix.MatrixLoader;
import jaligner.util.SequenceParser;
/**
* @author Administrator
*
*/
public class DNA {
/**
* @param args
*/
@SuppressWarnings("unchecked")
public static void main(String[] args) {
if(args.length !=3)
{
System.out.println("args error! please check...");
System.exit(1);
}
// put codontable to the hashmap
HashMap codonTable = new HashMap();
File ct = new File("codontable");
if(!ct.exists())
{
System.out.println("the file codontable not exits,please check!");
System.exit(1);
}
FileReader ctr = null;
BufferedReader cbr = null;
try {
ctr = new FileReader(ct);
cbr = new BufferedReader(ctr);
} catch (FileNotFoundException e1) {
e1.printStackTrace();
}
String strK = null;
String strV = null;
try {
while((strK = cbr.readLine()) != null &amt;&amt; (strV = cbr.readLine()) != null)
{
codonTable.put(strK.toUpperCase(),strV.toUpperCase());
}
} catch (IOException e) {
e.printStackTrace();
}
//store the total number of DNA sequence
int totalSeq = 0;
//read input file
File infile = null;
FileReader fr = null;
BufferedReader br = null;
//write output file
File outfile = null;
FileWriter fw = null;
PrintWriter out = null;
int seqNum = 0;
int baseTotal = 0;
String partOne = "PART 1: GENENATION INFORMATION \n";
String partTwo = "PART 2: SEQUENCE DETAILS \n";
String seqTwo = null;
String partThree = "PART 3: SEQUENCE ALIGNMENT \n";
if(args[0].toLowerCase().equals("simple"))
{
infile = new File(args[1]);
try {
fr = new FileReader(infile);
br = new BufferedReader(fr);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
try {
while(br.readLine() != null)
{
totalSeq ++;
}
} catch (IOException e1) {
e1.printStackTrace();
}
try {
fr.close();
br.close();
} catch (IOException e1) {
e1.printStackTrace();
}
String []seqString = new String[totalSeq];
infile = new File(args[1]);
try {
fr = new FileReader(infile);
br = new BufferedReader(fr);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
outfile = new File(args[2]);
try {
fw = new FileWriter(outfile,true);
out = new PrintWriter(fw);
} catch (IOException e) {
e.printStackTrace();
}
//start to read line from the input file
String line = null;
try {
while((line = br.readLine())!= null)
{
//part 1
baseTotal += line.length();
seqNum ++;
//part 2
partTwo = partTwo + "Sequence " + String.valueOf(seqNum) + "\n";
partTwo = partTwo + "Number of bases : " + String.valueOf(line.length()) +"\n";
partTwo = partTwo + "GC Percentage : " + String.valueOf(GCPercentage(line)) + "\n";
partTwo = partTwo + "Base Details : " + " A: " + String.valueOf(BaseDetails('A',line));
partTwo = partTwo + " C: "+String.valueOf(BaseDetails('C',line));
partTwo = partTwo + " G: "+String.valueOf(BaseDetails('G',line));
partTwo = partTwo + " T: "+String.valueOf(BaseDetails('T',line))+"\n";
boolean startT = false;
String subStrV = null;
String subStrK = null;
seqTwo = "";
for(int i=0,j=i+3; i<line.length() &amt;&amt; j<=line.length();)
{
subStrK = line.substring(i,j);
if(startT)
{
if(subStrK.equals("TAA") || subStrK.equals("TAG") || subStrK.equals("TGA"))
{
startT = false;
break;
}
else
{
subStrV = (String)codonTable.get(subStrK);
seqTwo += subStrV;
i+=3;
j=i+3;
continue;
}
}
if(subStrK.equals("ATG"))
{
subStrV =(String) codonTable.get(subStrK);
seqTwo += subStrV;
startT = true;
i+=3;
j=i+3;
}
else
{
i++;
j=i+3;
}
}
partTwo = partTwo + seqTwo + "\n";
partTwo += "\n";
//part 3
seqString[seqNum-1] = seqTwo;
}
//start to write part 1 result to the output file
partOne += "The file contains ";
partOne += String.valueOf(seqNum);
partOne += " sequence\n";
partOne += "There are a total of ";
partOne += String.valueOf(baseTotal);
partOne += " bases in all sequences\n";
partOne +="There are on average ";
partOne +=String.valueOf(baseTotal/seqNum);
partOne +=" bases in each sequence\n";
out.println(partOne);
out.println(partTwo);
out.println(partThree);
try{
for(int i=0; i<seqString.length-1; i++)
{
String p1 = seqString[i];
if(p1.equals("") || p1 == null)
{
continue;
}
for(int j=i+1; j<seqString.length; j++)
{
String p2 = seqString[j];
if(p2.equals("") || p2 == null)
{
continue;
}
Sequence s1 = SequenceParser.parse(p1);
Sequence s2 = SequenceParser.parse(p2);
Alignment a1;
a1 = SmithWatermanGotoh.align(s1,s2,MatrixLoader.load("BLOSUM62"),10f,0.5f);
out.println(a1.getSummary());
out.println(new Pair().format(a1));
}
}
}catch(Exception ex)
{
System.out.println("Exception in Alignment");
System.out.println(ex.getMessage());
}
} catch (IOException e) {
e.printStackTrace();
}
try {
br.close();
fr.close();
fw.close();
out.close();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("process successfully!write result to the output.txt");
}
else if(args[0].toLowerCase().equals("fasta"))
{
String strTemp = null;
infile = new File(args[1]);
try {
fr = new FileReader(infile);
br = new BufferedReader(fr);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
try {
while((strTemp = br.readLine()) != null)
{
if(strTemp.equals(""))
continue;
if(strTemp.charAt(0) == '>')
totalSeq ++;
}
} catch (IOException e1) {
e1.printStackTrace();
}
try {
fr.close();
br.close();
} catch (IOException e1) {
e1.printStackTrace();
}
String []seqString = new String[totalSeq];
String []seqItem = new String[totalSeq];
String []seqAmino = new String[totalSeq];
infile = new File(args[1]);
try {
fr = new FileReader(infile);
br = new BufferedReader(fr);
} catch (FileNotFoundException e) {
e.printStackTrace();
}
outfile = new File(args[2]);
try {
fw = new FileWriter(outfile,true);
out = new PrintWriter(fw);
} catch (IOException e) {
e.printStackTrace();
}
String line = null;
String seqTemp = "";
try {
while((line = br.readLine()) != null)
{
//part 1
if(line.equals(""))
continue;
if(line.charAt(0) == '>')
{
seqItem[seqNum] = line;
if(!seqTemp.equals(""))
{
seqString[seqNum-1] = seqTemp;
seqTemp = "";
}
seqNum++;
}
else
{
baseTotal += line.length();
seqTemp += line;
}
}
seqString[seqNum-1] = seqTemp;
//start to write part 1 result to the output file
partOne += "The file contains ";
partOne += String.valueOf(seqNum);
partOne += " sequence\n";
partOne += "There are a total of ";
partOne += String.valueOf(baseTotal);
partOne += " bases in all sequences\n";
partOne +="There are on average ";
partOne +=String.valueOf(baseTotal/seqNum);
partOne +=" bases in each sequence\n";
out.println(partOne);
//part 2
for(int i=0; i<seqString.length; i++)
{
partTwo = partTwo + seqItem[i] + "\n";
partTwo = partTwo + "Number of bases : " + String.valueOf(seqString[i].length()) +"\n";
partTwo = partTwo + "GC Percentage : " + String.valueOf(GCPercentage(seqString[i])) + "\n";
partTwo = partTwo + "Base Details : " + " A: " + String.valueOf(BaseDetails('A',seqString[i]));
partTwo = partTwo + " C: "+String.valueOf(BaseDetails('C',seqString[i]));
partTwo = partTwo + " G: "+String.valueOf(BaseDetails('G',seqString[i]));
partTwo = partTwo + " T: "+String.valueOf(BaseDetails('T',seqString[i]))+"\n";
boolean startT = false;
String subStrV = null;
String subStrK = null;
seqTwo = "";
for(int j=0,k=j+3; j<seqString[i].length() &amt;&amt; k<=seqString[i].length();)
{
subStrK = seqString[i].substring(j,k);
if(startT)
{
if(subStrK.equals("TAA") || subStrK.equals("TAG") || subStrK.equals("TGA"))
{
startT = false;
break;
}
else
{
subStrV = (String)codonTable.get(subStrK);
seqTwo += subStrV;
j+=3;
k=j+3;
continue;
}
}
if(subStrK.equals("ATG"))
{
subStrV =(String) codonTable.get(subStrK);
seqTwo += subStrV;
startT = true;
j+=3;
k=j+3;
}
else
{
j++;
k=j+3;
}
}
partTwo = partTwo + seqTwo + "\n";
partTwo += "\n";
//part 3
seqAmino[i] = seqTwo;
}
out.println(partTwo);
//part 3
out.println(partThree);
try{
for(int i=0; i<seqAmino.length-1; i++)
{
String p1 = seqAmino[i];
if(p1.equals("") || p1 == null)
{
continue;
}
for(int j=i+1; j<seqAmino.length; j++)
{
String p2 = seqAmino[j];
if(p2.equals("") || p2 == null)
{
continue;
}
Sequence s1 = SequenceParser.parse(p1);
Sequence s2 = SequenceParser.parse(p2);
Alignment a1;
a1 = SmithWatermanGotoh.align(s1,s2,MatrixLoader.load("BLOSUM62"),10f,0.5f);
out.println(seqItem[i]);
out.println(seqItem[j]);
out.println(a1.getSummary());
out.println(new Pair().format(a1));
}
}
}catch(Exception ex)
{
System.out.println("Exception in Alignment");
System.out.println(ex.getMessage());
}
} catch (IOException e) {
e.printStackTrace();
}
try {
br.close();
fr.close();
fw.close();
out.close();
} catch (IOException e) {
e.printStackTrace();
}
System.out.println("process successfully!write result to the output file");
}
else
{
System.out.println("Wrong File Type,Error!");
System.exit(1);
}
}
//count GC percentage
@SuppressWarnings("unused")
private static double GCPercentage(String lineSeq)
{
double result = -1;
double GC = 0;
for(int i=0; i<lineSeq.length(); i++)
{
if(lineSeq.charAt(i) == 'G' || lineSeq.charAt(i) == 'C')
GC++;
}
result = GC/lineSeq.length();
return result;
}
private static int BaseDetails(char base,String line)
{
int result = 0;
for(int i=0; i<line.length(); i++)
{
if(line.charAt(i) == base)
result++;
}
return result;
}
}