Skip to content

Commit

Permalink
Fix and add tests around AP delta encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
athos committed Sep 3, 2024
1 parent 6b3b4ca commit 9209c52
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 77 deletions.
Binary file modified test-resources/cram/medium.cram
Binary file not shown.
Binary file modified test-resources/cram/medium.cram.crai
Binary file not shown.
4 changes: 2 additions & 2 deletions test/cljam/algo/cram_indexer_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
(doall (#'crai/read-index-entries r))))

(deftest create-index-test
(let [f (io/file common/temp-dir "medium.cram.crai")]
(let [f (io/file common/temp-dir "medium_without_index.cram.crai")]
(common/with-before-after {:before (common/prepare-cache!)
:after (common/clean-cache!)}
(is (thrown-with-msg? Exception #"Cannot create CRAM index file .*"
(indexer/create-index common/medium-cram-file f)))
(indexer/create-index common/medium-without-index-cram-file f)))
(indexer/create-index common/medium-cram-file f
:skip-sort-order-check? true)
(is (= (read-index-entries common/medium-crai-file)
Expand Down
100 changes: 50 additions & 50 deletions test/cljam/io/crai_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -17,121 +17,121 @@
[{:chr "chr1"
:start 546609
:end (+ 546609 205262429)
:container-offset 324
:container-offset 327
:slice-offset 563
:size 22007}
:size 21475}
{:chr "chr1"
:start 206547069
:end (+ 206547069 42644506)
:container-offset 324
:slice-offset 22570
:size 7349}]
:container-offset 327
:slice-offset 22038
:size 7234}]

"chr1" 550000 600000
[{:chr "chr1"
:start 546609
:end (+ 546609 205262429)
:container-offset 324
:container-offset 327
:slice-offset 563
:size 22007}]
:size 21475}]

"chr1" 210000000 240000000
[{:chr "chr1"
:start 206547069
:end (+ 206547069 42644506)
:container-offset 324
:slice-offset 22570
:size 7349}]
:container-offset 327
:slice-offset 22038
:size 7234}]

"chr1" 200000000 210000000
[{:chr "chr1"
:start 546609
:end (+ 546609 205262429)
:container-offset 324
:container-offset 327
:slice-offset 563
:size 22007}
:size 21475}
{:chr "chr1"
:start 206547069
:end (+ 206547069 42644506)
:container-offset 324
:slice-offset 22570
:size 7349}]
:container-offset 327
:slice-offset 22038
:size 7234}]

"*" 0 0
[{:chr "*"
:start 0
:end 0
:container-offset 354657
:slice-offset 563
:size 23119}
:container-offset 368626
:slice-offset 541
:size 15676}
{:chr "*"
:start 0
:end 0
:container-offset 378365
:container-offset 384869
:slice-offset 171
:size 23494}
:size 23422}
{:chr "*"
:start 0
:end 0
:container-offset 378365
:slice-offset 23665
:size 23213}
:container-offset 384869
:slice-offset 23593
:size 23258}
{:chr "*"
:start 0
:end 0
:container-offset 378365
:slice-offset 46878
:size 23051}
:container-offset 384869
:slice-offset 46851
:size 23200}
{:chr "*"
:start 0
:end 0
:container-offset 378365
:slice-offset 69929
:size 23563}
:container-offset 384869
:slice-offset 70051
:size 23643}
{:chr "*"
:start 0
:end 0
:container-offset 378365
:slice-offset 93492
:size 24231}
:container-offset 384869
:slice-offset 93694
:size 24213}
{:chr "*"
:start 0
:end 0
:container-offset 378365
:slice-offset 117723
:size 24078}
:container-offset 384869
:slice-offset 117907
:size 24066}
{:chr "*"
:start 0
:end 0
:container-offset 378365
:slice-offset 141801
:size 23871}
:container-offset 384869
:slice-offset 141973
:size 23886}
{:chr "*"
:start 0
:end 0
:container-offset 378365
:slice-offset 165672
:size 24365}
:container-offset 384869
:slice-offset 165859
:size 24339}
{:chr "*"
:start 0
:end 0
:container-offset 378365
:slice-offset 190037
:size 12326}])))
:container-offset 384869
:slice-offset 190198
:size 11639}])))

(deftest write-index-entries-test
(let [entries [{:ref-seq-id 0, :start 546609, :span 205262429,
:container-offset 324, :slice-offset 563, :size 22007}
:container-offset 327, :slice-offset 563, :size 21475}
{:ref-seq-id 0, :start 206547069, :span 42644506,
:container-offset 324, :slice-offset 22570, :size 7349}
:container-offset 327, :slice-offset 22038, :size 7234}
{:ref-seq-id 1, :start 67302, :span 231638920,
:container-offset 30272, :slice-offset 563, :size 21618}
:container-offset 29628, :slice-offset 563, :size 21098}
{:ref-seq-id -1, :start 0, :span 0,
:container-offset 354657, :slice-offset 563, :size 23119}
:container-offset 368626, :slice-offset 541, :size 15676}
{:ref-seq-id -1, :start 0, :span 0,
:container-offset 378365, :slice-offset 171, :size 23494}
:container-offset 384869, :slice-offset 171, :size 23422}
{:ref-seq-id -1, :start 0, :span 0,
:container-offset 378365, :slice-offset 23665, :size 23213}]
:container-offset 384869, :slice-offset 23593, :size 23258}]
f (io/file common/temp-dir "test.cram.crai")]
(common/with-before-after {:before (common/prepare-cache!)
:after (common/clean-cache!)}
Expand Down
23 changes: 12 additions & 11 deletions test/cljam/io/cram/encode/record_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@
4]))

(defn- preprocess-slice-records [cram-header records]
(let [container-ctx (context/make-container-context cram-header {} test-seq-resolver)]
(record/preprocess-slice-records container-ctx records)
(context/finalize-container-context container-ctx
(let [container-ctx (context/make-container-context cram-header test-seq-resolver)
stats (record/preprocess-slice-records container-ctx records)]
(context/finalize-container-context container-ctx [stats]
(constantly :raw)
(constantly (constantly (constantly {:external :raw}))))))

Expand Down Expand Up @@ -153,7 +153,8 @@

(deftest encode-slice-records-test
(testing "mapped reads"
(let [cram-header {:SQ
(let [cram-header {:HD {:SO "coordinate"}
:SQ
[{:SN "ref"}
{:SN "ref2"}]
:RG
Expand Down Expand Up @@ -183,12 +184,12 @@
{:qname "q005", :flag 73, :rname "ref", :pos 20, :end 24, :mapq 0,
:cigar "5M", :rnext "*", :pnext 0, :tlen 0, :seq "CTGTG", :qual "AEEEE"
:options []}])
slice-ctx (context/make-slice-context (preprocess-slice-records cram-header records))
stats (record/encode-slice-records slice-ctx records)
slice-ctx (context/make-slice-context (preprocess-slice-records cram-header records) 0)
_ (record/encode-slice-records slice-ctx records)
ds-res (walk/prewalk #(if (fn? %) (%) %) (:ds-encoders slice-ctx))
tag-res (walk/prewalk #(if (fn? %) (%) %) (:tag-encoders slice-ctx))]
(is (= {:ri 0, :start 1, :end 24, :nbases 25, :nrecords 5}
(into {} stats)))
(into {} (:alignment-stats slice-ctx))))

(is (= 1 (count (get ds-res :BF))))
(is (= 1 (get-in ds-res [:BF 0 :content-id])))
Expand All @@ -209,7 +210,7 @@

(is (= 1 (count (get ds-res :AP))))
(is (= 5 (get-in ds-res [:AP 0 :content-id])))
(is (= [1 5 10 15 20] (seq (get-in ds-res [:AP 0 :data]))))
(is (= [0 4 5 5 5] (seq (get-in ds-res [:AP 0 :data]))))

(is (= 1 (count (get ds-res :RG))))
(is (= 6 (get-in ds-res [:RG 0 :content-id])))
Expand Down Expand Up @@ -354,12 +355,12 @@
{:qname "q003", :flag 77, :rname "*", :pos 0, :end 0, :mapq 0,
:cigar "*", :rnext "*", :pnext 0, :tlen 0, :seq "GCACA", :qual "BCCFD"
:options []}])
slice-ctx (context/make-slice-context (preprocess-slice-records cram-header records))
stats (record/encode-slice-records slice-ctx records)
slice-ctx (context/make-slice-context (preprocess-slice-records cram-header records) 0)
_ (record/encode-slice-records slice-ctx records)
ds-res (walk/prewalk #(if (fn? %) (%) %) (:ds-encoders slice-ctx))
tag-res (walk/prewalk #(if (fn? %) (%) %) (:tag-encoders slice-ctx))]
(is (= {:ri -1, :start 0, :end 0, :nbases 25, :nrecords 5}
(into {} stats)))
(into {} (:alignment-stats slice-ctx))))

(is (= 1 (count (get ds-res :BF))))
(is (= 1 (get-in ds-res [:BF 0 :content-id])))
Expand Down
45 changes: 31 additions & 14 deletions test/cljam/io/cram_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
(def ^:private temp-cram-file (io/file common/temp-dir "test.cram"))
(def ^:private temp-cram-file-2 (io/file common/temp-dir "test2.cram"))
(def ^:private temp-cram-file-3 (io/file common/temp-dir "test3.cram"))
(def ^:private temp-sorted-cram-file (io/file common/temp-dir "test.sorted.cram"))

(defn- fixup-bam-aln [aln]
(-> (into {} aln)
Expand Down Expand Up @@ -74,20 +75,36 @@
(deftest writer-test
(with-before-after {:before (prepare-cache!)
:after (clean-cache!)}
(with-open [r (cram/reader common/test-cram-file
{:reference common/test-fa-file})
w (cram/writer temp-cram-file
{:reference common/test-fa-file})]
(cram/write-header w (cram/read-header r))
(cram/write-alignments w (cram/read-alignments r) (cram/read-header r)))
(with-open [r (cram/reader common/test-cram-file
{:reference common/test-fa-file})
r' (cram/reader temp-cram-file
{:reference common/test-fa-file})]
(is (= (cram/read-header r)
(cram/read-header r')))
(is (= (cram/read-alignments r)
(cram/read-alignments r'))))))
(testing "unsorted"
(with-open [r (cram/reader common/test-cram-file
{:reference common/test-fa-file})
w (cram/writer temp-cram-file
{:reference common/test-fa-file})]
(cram/write-header w (cram/read-header r))
(cram/write-alignments w (cram/read-alignments r) (cram/read-header r)))
(with-open [r (cram/reader common/test-cram-file
{:reference common/test-fa-file})
r' (cram/reader temp-cram-file
{:reference common/test-fa-file})]
(is (= (cram/read-header r)
(cram/read-header r')))
(is (= (cram/read-alignments r)
(cram/read-alignments r')))))
(testing "sorted by coordinate"
(with-open [r (cram/reader common/test-sorted-cram-file
{:reference common/test-fa-file})
w (cram/writer temp-sorted-cram-file
{:reference common/test-fa-file})]
(cram/write-header w (cram/read-header r))
(cram/write-alignments w (cram/read-alignments r) (cram/read-header r)))
(with-open [r (cram/reader common/test-sorted-cram-file
{:reference common/test-fa-file})
r' (cram/reader temp-sorted-cram-file
{:reference common/test-fa-file})]
(is (= (cram/read-header r)
(cram/read-header r')))
(is (= (cram/read-alignments r)
(cram/read-alignments r')))))))

(deftest-remote writer-with-multiple-containers-test
(with-before-after {:before (do (prepare-cavia!)
Expand Down

0 comments on commit 9209c52

Please sign in to comment.