diff --git a/src/asami/durable/encoder.clj b/src/asami/durable/encoder.clj index 82fbe8f..098e0c9 100644 --- a/src/asami/durable/encoder.clj +++ b/src/asami/durable/encoder.clj @@ -11,7 +11,7 @@ [java.nio ByteBuffer] [java.nio.charset Charset])) -(def utf8 (Charset/forName ("UTF-8"))) +(def utf8 (Charset/forName "UTF-8")) (def type->code {Long (byte 0) diff --git a/src/asami/durable/flat.clj b/src/asami/durable/flat.clj index 4020638..1ea5a15 100644 --- a/src/asami/durable/flat.clj +++ b/src/asami/durable/flat.clj @@ -1,17 +1,19 @@ (ns ^{:doc "Manages a memory-mapped file that holds write once data" :author "Paula Gearon"} asami.durable.flat - (:require [asami.durable.pages :refer [Paged]]) - (:import [java.io.RandomAccessFile] + (:require [asami.durable.pages :refer [Paged refresh! read-byte read-bytes-into]]) + (:import [java.io RandomAccessFile] [java.nio.channels FileChannel FileChannel$MapMode])) -(def ^:const read-only FileChannel$MapMode/READ_ONLY) +(def read-only FileChannel$MapMode/READ_ONLY) + +(def ^:const default-region-size "Default region of 1GB" 0x40000000) ;; These functions do update the PagedFile state, but only to expand the mapped region. (defrecord PagedFile [^RandomAccessFile f regions region-size] Paged (refresh! [this] - (letfn [(remap [f mappings] + (letfn [(remap [mappings] (let [existing (or (if-let [tail (last mappings)] (if (< (.capacity tail) region-size) (butlast mappings))) @@ -24,7 +26,7 @@ (.map fchannel read-only offset (min region-size (- flength offset)))) (range unmapped-offset flength region-size))] (into [] (concat existing new-maps))))] - (swap! regions remap f))) + (swap! regions remap))) (read-byte [this offset] ;; finds a byte in a region @@ -59,23 +61,26 @@ (when (>= region-offset region-size) (throw (ex-info "Accessing trailing data beyond the end of file" {:region-size region-size :region-offset region-offset}))) - (if (= region-offset (dec region-size)) + (if (or (odd? region-offset) (= region-offset (dec region-size))) (short (bit-or (bit-shift-left (.get region region-offset) 8) (read-byte this (inc offset)))) - (.getShort region (bit-shift-right region-offset 1)))))) + (.getShort region region-offset))))) + + (read-bytes [this offset len] + (read-bytes-into this offset (byte-array len))) - (read-bytes [this offset bytes] + (read-bytes-into [this offset bytes] ;; when the bytes occur entirely in a region, then return a slice of the region ;; if the bytes straddle 2 regions, create a new buffer, and copy the bytes from both regions into it (let [region-nr (int (/ offset region-size)) region-offset (mod offset region-size) - array-size (count bytes)] + array-len (count bytes)] ;; the requested data is not currently mapped, so refresh (when (>= region-nr (count @regions)) (refresh! this)) - (when (> array-size region-size) + (when (> array-len region-size) (throw (ex-info "Data size beyond size limit" - {:requested array-size :limit region-size}))) + {:requested array-len :limit region-size}))) (when (>= region-nr (count @regions)) (throw (ex-info "Accessing data beyond the end of file" {:max (count @regions) :region region-nr :offset offset}))) @@ -85,14 +90,15 @@ (throw (ex-info "Accessing trailing data beyond the end of file" {:region-size region-size :region-offset region-offset}))) ;; check if the requested data is all in the same region - (if (> (+ region-offset array-size) region-size) + (if (> (+ region-offset array-len) region-size) (do ;; data straddles 2 regions (when (>= (inc region-nr) (count @regions)) (throw (ex-info "Accessing data beyond the end of file" {:max (count @regions) :region region-nr :offset offset}))) (let [nregion (nth @regions (inc region-nr)) fslice-size (- region-size region-offset) - nslice-size (- array-size fslice-size)] + nslice-size (- array-len fslice-size) + bytes (byte-array array-len)] (when (> nslice-size (.capacity nregion)) (throw (ex-info "Accessing data beyond the end of file" {:size nslice-size :limit (.capacity nregion)}))) @@ -102,5 +108,14 @@ (doto (.asReadOnlyBuffer nregion) (.get bytes fslice-size nslice-size)))) (doto (.asReadOnlyBuffer region) + (.position region-offset) (.get bytes))) bytes)))) + +(defn paged-file + "Creates a paged file reader" + ([f] (paged-file f default-region-size)) + ([f region-size] + (let [p (->PagedFile f (atom nil) region-size)] + (refresh! p) + p))) diff --git a/src/asami/durable/pages.cljc b/src/asami/durable/pages.cljc index 5ff6eb2..64bc578 100644 --- a/src/asami/durable/pages.cljc +++ b/src/asami/durable/pages.cljc @@ -6,4 +6,5 @@ (refresh! [this] "Refreshes the buffers") (read-byte [this offset] "Returns a byte from underlying pages") (read-short [this offset] "Returns a short from underlying pages. Offset in bytes.") - (read-bytes [this offset bytes] "Fills a byte array with data from the paged object")) + (read-bytes [this offset length] "Reads length bytes and returns as an array.") + (read-bytes-into [this offset bytes] "Fills a byte array with data from the paged object")) diff --git a/test/asami/durable/test_pages.clj b/test/asami/durable/test_pages.clj new file mode 100644 index 0000000..c048b1f --- /dev/null +++ b/test/asami/durable/test_pages.clj @@ -0,0 +1,66 @@ +(ns ^{:doc "Tests the paging mechanism for reading flat files" + :author "Paula Gearon"} + asami.durable.test-pages + (:require [asami.durable.encoder :refer [to-bytes]] + [asami.durable.pages :refer [refresh! read-byte read-short read-bytes]] + [asami.durable.flat :refer [paged-file]] + [clojure.test :refer [deftest is]]) + (:import [java.io RandomAccessFile File])) + +(deftest test-append + ;; don't really need to test this with a file, but it's a useful template for other tests + (let [f (File. "test-append.dat")] + (.delete f) + (with-open [of (RandomAccessFile. f "rw")] + (let [[sh sb] (to-bytes "1234567890") + [kh kb] (to-bytes :keyword) + [lh lb] (to-bytes 1023) + buffer (byte-array 10)] + (doto of + (.write sh) ;; 1 + (.write sb) ;; 10 + (.write kh) ;; 1 + (.write kb) ;; 7 + (.write lh) ;; 1 + (.write lb) ;; 8 + (.seek 0)) + (is (= 28 (.length of))) + (is (= 0xa (.read of))) + (is (= 10 (.read of buffer))) + (is (= "1234567890" (String. buffer "UTF-8"))) + (is (= 0xc7 (.read of))) + (is (= 7 (.read of buffer 0 7))) + (is (= "keyword" (String. buffer 0 7 "UTF-8"))) + (is (= 0xe0 (.read of))) + (is (= 1023 (.readLong of))))) + (.delete f))) + +(defn b-as-long [b] (bit-and 0xff b)) + +(deftest simple-read + (let [f (File. "test-simple.dat")] + (.delete f) + (with-open [of (RandomAccessFile. f "rw")] + (let [[sh sb] (to-bytes "1234567890") + [kh kb] (to-bytes :keyword) + [lh lb] (to-bytes 1023)] + (doto of + (.write sh) ;; 1 + (.write sb) ;; 10 + (.write kh) ;; 1 + (.write kb) ;; 7 + (.write lh) ;; 1 + (.write lb) ;; 8 + (.seek 0)) + (let [r (paged-file of) + bytes (byte-array 10)] + (is (= 0xe0 (b-as-long (read-byte r 19)))) + (is (= 0xc7 (b-as-long (read-byte r 11)))) + (is (= 0x0a (b-as-long (read-byte r 0)))) + (is (= 0 (read-short r 20))) + (is (= 0 (read-short r 22))) + (is (= 0 (read-short r 24))) + (is (= 0x03ff (read-short r 26))) + (is (= "keyword" (String. (read-bytes r 12 7) "UTF-8"))) + (is (= "1234567890" (String. (read-bytes r 1 10) "UTF-8")))))) + (.delete f)))