Skip to content

Commit

Permalink
added vcf bed intersection utility
Browse files Browse the repository at this point in the history
  • Loading branch information
manojbhosale committed May 23, 2019
1 parent 9be5d9a commit 78db660
Show file tree
Hide file tree
Showing 15 changed files with 578 additions and 36 deletions.
287 changes: 287 additions & 0 deletions src/bedUtils/BedUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,287 @@
package bedUtils;
import java.beans.IntrospectionException;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

import liftoverutils.*;

public class BedUtils {

public static void main(String[] args) {

File f = new File("C:\\Users\\manojkumar_bhosale\\Desktop\\ForIntervalMerging\\DEF_662-1322126769L_AllTracks_amplicons.bed");
File f1 = new File("C:\\Users\\manojkumar_bhosale\\Desktop\\d\\CancerAll-In-OneLung_hg38Mut.bed");
//mergeBedFiles(f,null);
Set<BedInterval> intersectBedFiles = mergeBedFiles(f);

intersectBedFiles.forEach(System.out::println);

}

public static Set<BedInterval> intersectBedFiles(File one, File two){


//read a bed file record by record
//query another files tree with record
// intersect the resulting inervals each other
//add the interval to resutl

Map<String, IntervalTree<String>> bedMapTwo = getIntervalTree(two);
Map<String, IntervalTree<String>> bedMap1 = getIntervalTree(one);


Set<BedInterval> result1 = mergeSingleFile(bedMap1, one);


Set<BedInterval> result = new TreeSet<>();

for(BedInterval inter : result1) {


IntervalTree<String> intervalTree = bedMapTwo.get(inter.getChromosome());
List<Interval<String>> intervals = intervalTree.getIntervals(inter.getStart(), inter.getStop());
if(!intervals.isEmpty()) {
intervals.add(new Interval<String>(inter.getStart(), inter.getStop(),""));
Interval<String> mergedInterval = IntervalUtils.intersectIntervals(intervals);
result.add(new BedInterval(inter.getChromosome(),mergedInterval.getStart(),mergedInterval.getEnd()));
}
}



return result;


}

public static Set<BedInterval> mergeBedFiles(File one){

Set<BedInterval> result1 = new TreeSet<BedInterval>();
Set<BedInterval> result = new TreeSet<BedInterval>();
Map<String, IntervalTree<String>> bedMap = getIntervalTree(one);
result1 = mergeSingleFile(bedMap, one);
boolean checkFirst = true;
BedInterval first = null;
for(BedInterval inter : result1) {

if(checkFirst == true) {
first = inter;
checkFirst = false;
continue;
}

if(first.intersects(inter)) {
try {
inter = BedInterval.mergeOverlappingIntervals(first, inter);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
first = inter;
}else {
result.add(first);
first = inter;
}

}
result.add(first);

return result;

}



public static Set<BedInterval> mergeBedFiles(File one, File two){

Set<BedInterval> result = new TreeSet<>();
//Map<String, IntervalTree<String>> bedMap12 = getIntervalTree(one,two);
Map<String, IntervalTree<String>> bedMap1 = getIntervalTree(one);
Map<String, IntervalTree<String>> bedMap2 = getIntervalTree(two);

Set<BedInterval> result1 = mergeSingleFile(bedMap1, one);
Set<BedInterval> result2 = mergeSingleFile(bedMap2, two);

result1.addAll(result2);

for(BedInterval interval : result1) {

IntervalTree<String> intervalTree = bedMap1.get(interval.getChromosome());
List<Interval<String>> interList = intervalTree.get(interval.getStart(), interval.getStop());
if(interList.isEmpty()) {
result.add(interval);
System.out.println("Manoj");
continue;
}
Interval<String> mergedInterval = IntervalUtils.mergeIntervals(interList);
result.add(new BedInterval(interval.getChromosome(),mergedInterval.getStart(),mergedInterval.getEnd()));

}


return result;

}


static Set<BedInterval> mergeSingleFile(Map<String, IntervalTree<String>> bedMap, File one) {
Set<BedInterval> result = new TreeSet<>();
try(BufferedReader br = new BufferedReader(new FileReader(one))){

String line= "";
while((line = br.readLine()) != null) {
if(line.startsWith("#")) {
continue;
}
String[] splited = line.split("\t");
String chromosome = splited[0];
long start = Integer.parseInt(splited[1]);
long end = Integer.parseInt(splited[2]);

BedInterval interval = new BedInterval(chromosome,start,end);
IntervalTree<String> intervalTree = bedMap.get(chromosome);
List<Interval<String>> intervals = intervalTree.getIntervals(start, end);
//System.out.println(interval+" : "+ intervals);
if(!intervals.isEmpty()) {
Interval<String> mergedInterval = IntervalUtils.mergeIntervals(intervals);
result.add(new BedInterval(chromosome,mergedInterval.getStart(),mergedInterval.getEnd()));
}
}


} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

return result;

}

public BedInterval mergeIntervals(List<BedInterval> intervals) {
BedInterval first = intervals.get(0);
for(BedInterval inter : intervals) {
try {
first = BedInterval.mergeOverlappingIntervals(first, inter);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return first;
}

static void sortBedFile(File bedFile) {



}

static void addBedInterval(Map<String, IntervalTree<String>> bedMap, BedInterval interval) {

if(bedMap.containsKey(interval.getChromosome())) {
IntervalTree<String> intervalTree = bedMap.get(interval.getChromosome());
Interval<String> inter = new Interval(interval.getStart(), interval.getStop(),"");
intervalTree.addInterval(inter);
}else {
IntervalTree<String> intervalTree = new IntervalTree<>();
Interval<String> inter = new Interval(interval.getStart(), interval.getStop(),"");
intervalTree.addInterval(inter);
bedMap.put(interval.getChromosome(), intervalTree);
}
}

public static Map<String, IntervalTree<String>> getIntervalTree(File bedFile){
Map<String, IntervalTree<String>> bedMap = new HashMap<>();
try(BufferedReader br = new BufferedReader(new FileReader(bedFile))){

String line= "";
while((line = br.readLine()) != null) {
if(line.startsWith("#")) {
continue;
}
String[] splited = line.split("\t");
String chromosome = splited[0];
long start = Integer.parseInt(splited[1]);
long end = Integer.parseInt(splited[2]);

BedInterval interval = new BedInterval(chromosome,start,end);
addBedInterval(bedMap, interval);


}


} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

return bedMap;

}

public static Map<String, IntervalTree<String>> getIntervalTree(File bedFile1, File bedFile2){
Map<String, IntervalTree<String>> bedMap = new HashMap<>();
try(BufferedReader br = new BufferedReader(new FileReader(bedFile1));BufferedReader br1 = new BufferedReader(new FileReader(bedFile2))){

String line= "";
while((line = br.readLine()) != null) {
if(line.startsWith("#")) {
continue;
}
String[] splited = line.split("\t");
String chromosome = splited[0];
long start = Integer.parseInt(splited[1]);
long end = Integer.parseInt(splited[2]);

BedInterval interval = new BedInterval(chromosome,start,end);
addBedInterval(bedMap, interval);


}

while((line = br1.readLine()) != null) {
if(line.startsWith("#")) {
continue;
}
String[] splited = line.split("\t");
String chromosome = splited[0];
long start = Integer.parseInt(splited[1]);
long end = Integer.parseInt(splited[2]);

BedInterval interval = new BedInterval(chromosome,start,end);
addBedInterval(bedMap, interval);


}


} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}

return bedMap;

}


}
18 changes: 18 additions & 0 deletions src/bedUtils/Chromosome.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package bedUtils;

public class Chromosome implements Comparable<Chromosome>{

String chromName;

@Override
public int compareTo(Chromosome arg0) {

String noChr = chromName.replace("chr", "");


return 0;
}



}
27 changes: 19 additions & 8 deletions src/gatkUtils/GatkThread.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,14 +67,14 @@ public void run() {
runAndGobbleCommand(mdupCommand, r);

stepFileName = newStepFileName;
} else if (obj1.keySet().contains("BuildBamIndex")) {
} else if (obj1.keySet().contains("BuildBamIndex") && ((JSONObject)obj1.get("BuildBamIndex")).values().contains("true")) {
String bamIndexCommand = "C:\\Program Files\\Java\\jdk1.8.0_192\\bin\\java.exe "
+ GatkUtils.maxHeapSpace + " -jar " + GatkUtils.picardPath + " BuildBamIndex " + " I="
+ inputFileLocation.resolve(stepFileName);
System.out.println(bamIndexCommand);

runAndGobbleCommand(bamIndexCommand, r);
} else if (obj1.keySet().contains("RealignerTargetCreator")) {
} else if (obj1.keySet().contains("RealignerTargetCreator") && ((JSONObject)obj1.get("RealignerTargetCreator")).values().contains("true")) {
listFileName = inputFileLocation.resolve(FilenameUtils.getBaseName(stepFileName)).toString() + ".list";
String realignerCommand = "C:\\Program Files\\Java\\jdk1.8.0_192\\bin\\java.exe "
+ GatkUtils.maxHeapSpace + " -jar " + GatkUtils.gatkPath + " -T RealignerTargetCreator "
Expand All @@ -83,7 +83,7 @@ public void run() {
System.out.println(realignerCommand);
runAndGobbleCommand(realignerCommand, r);

} else if (obj1.keySet().contains("IndelRealigner")) {
} else if (obj1.keySet().contains("IndelRealigner") && ((JSONObject)obj1.get("IndelRealigner")).values().contains("true")) {
String newStepFileName = inputFileLocation.resolve(FilenameUtils.getBaseName(stepFileName)).toString()
+ ".realigned.bam";
String indelRealignerCommand = "C:\\Program Files\\Java\\jdk1.8.0_192\\bin\\java.exe "
Expand All @@ -96,7 +96,7 @@ public void run() {
stepFileName = newStepFileName;
}

else if (obj1.keySet().contains("FixMateInformation")) {
else if (obj1.keySet().contains("FixMateInformation") && ((JSONObject)obj1.get("FixMateInformation")).values().contains("true")) {
// java -Xmx1g -jar
// tools\picard-tools-1.82\picard-tools-1.82\FixMateInformation.jar
// INPUT=%sampleName%.sorted.realigned.bam
Expand All @@ -112,8 +112,15 @@ else if (obj1.keySet().contains("FixMateInformation")) {
runAndGobbleCommand(mateFixCommand, r);

stepFileName = newStepFileName;

String bamIndexCommand = "C:\\Program Files\\Java\\jdk1.8.0_192\\bin\\java.exe "
+ GatkUtils.maxHeapSpace + " -jar " + GatkUtils.picardPath + " BuildBamIndex " + " I="
+ inputFileLocation.resolve(stepFileName);
System.out.println(bamIndexCommand);
runAndGobbleCommand(bamIndexCommand, r);


} else if (obj1.keySet().contains("BaseRecalibrator")) {
} else if (obj1.keySet().contains("BaseRecalibrator") && ((JSONObject)obj1.get("BaseRecalibrator")).values().contains("true")) {
recalGrpFile = inputFileLocation.resolve(FilenameUtils.getBaseName(stepFileName)).toString()
+ ".recal.grp";
String baseRecalCommand = "C:\\Program Files\\Java\\jdk1.8.0_192\\bin\\java.exe "
Expand All @@ -122,7 +129,7 @@ else if (obj1.keySet().contains("FixMateInformation")) {
+ inputFileLocation.resolve(recalGrpFile) + " -bqsrBAQGOP 40 ";
runAndGobbleCommand(baseRecalCommand, r);

} else if (obj1.keySet().contains("PrintReads")) {
} else if (obj1.keySet().contains("PrintReads") && ((JSONObject)obj1.get("PrintReads")).values().contains("true")) {
// java -Xmx1g -jar tools\GenomeAnalysisTK.jar -T PrintReads -R %genomeRef% -I
// %sampleName%.sorted.realigned.fixed.bam -BQSR %sampleName%.recal.grp -o
// %sampleName%.sorted.realigned.fixed.bqsr.bam
Expand All @@ -136,16 +143,20 @@ else if (obj1.keySet().contains("FixMateInformation")) {
runAndGobbleCommand(printReadCommand, r);

stepFileName = newStepFile;
} else if (obj1.keySet().contains("UnifiedGenotyper")) {
} else if (obj1.keySet().contains("UnifiedGenotyper") && ((JSONObject)obj1.get("UnifiedGenotyper")).values().contains("true")) {
// java -Xmx1g -jar tools\GenomeAnalysisTK.jar -R %genomeRef% -T
// UnifiedGenotyper -I %sampleName%.sorted.realigned.fixed.bqsr.bam -mbq 13 -glm
// BOTH -indelGCP 20 -indelGOP 40 -o %sampleName%_snps.raw.vcf -L %targetBed%
String resultVcf = inputFileLocation.resolve(inputFileNameNoExt + ".raw.vcf").toString();
String UnifiedGenotyperCommand = "C:\\Program Files\\Java\\jdk1.8.0_192\\bin\\java.exe "
+ GatkUtils.maxHeapSpace + " -jar " + GatkUtils.gatkPath + " -T UnifiedGenotyper " + " -R "
+ GatkUtils.referencePath + " -I " + inputFileLocation.resolve(stepFileName) + " -o "
+ inputFileLocation.resolve(resultVcf) + " -BQSR " + inputFileLocation.resolve(recalGrpFile)
+ inputFileLocation.resolve(resultVcf)
+ " -mbq 13 -glm BOTH -indelGCP 20 -indelGOP 40 " + " -L " + GatkUtils.targetBedFile;
if(obj1.keySet().contains("BaseRecalibrator") && ((JSONObject)obj1.get("BaseRecalibrator")).values().contains("true")) {
UnifiedGenotyperCommand = UnifiedGenotyperCommand+" -BQSR " + inputFileLocation.resolve(recalGrpFile);
}

runAndGobbleCommand(UnifiedGenotyperCommand, r);

}
Expand Down
Loading

0 comments on commit 78db660

Please sign in to comment.