Skip to content

Commit

Permalink
Changing decoding to be buffer based
Browse files Browse the repository at this point in the history
  • Loading branch information
Paula Gearon committed Aug 28, 2020
1 parent 8ea920a commit 3d98539
Show file tree
Hide file tree
Showing 3 changed files with 212 additions and 112 deletions.
125 changes: 125 additions & 0 deletions src/asami/decoder.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
(ns ^{:doc "Encodes and decodes data for storage. Clojure implementation"
:author "Paula Gearon"}
asami.durable.decoder
(:require [clojure.string :as s])
(:import [clojure.lang Keyword BigInt]
[java.io RandomAccessFile]
[java.math BigInteger BigDecimal]
[java.net URI]
[java.time Instant]
[java.util Date UUID]
[java.nio ByteBuffer]
[java.nio.charset Charset]))

(def utf8 (Charset/forName ("UTF-8")))

(defn decode-length
[^bool ext ^RandomAccessFile f]
(if ext
(let [len (.readShort f)]
(if (< len 0)
(let [len2 (.readUnsignedShort f)]
(bit-or
(bit-shift-left (int (bit-and 0x7FFF len)) len 16)
len2))
len))
(int (.readByte f))))

(defn read-string
[^RandomAccessFile f ^long len]
(let [b (byte-array len)]
(.readFully f b)
b))

(defn read-uri
[^RandomAccessFile f ^long len]
(URI/create (read-string len)))

(defn read-keyword
[^RandomAccessFile f ^long len]
(keyword (read-string len)))

(defn long-decoder
[^bool ext ^RandomAccessFile f]
(.readLong f))

(defn double-decoder
[^bool ext ^RandomAccessFile f]
(.readDouble f))

(defn string-decoder
[^bool ext ^RandomAccessFile f]
(read-string f (decode-length ext f)))

(defn uri-decoder
[^bool ext ^RandomAccessFile f]
(read-uri f (decode-length ext f)))

(defn bigint-decoder
[^bool ext ^RandomAccessFile f]
(let [len (decode-length ext f)
b (byte-array len)]
(.readFully f b)
(bigint (BigInteger. b))))

(defn bigdec-decoder
[^bool ext ^RandomAccessFile f]
(big-decimal (string-decoder ext f)))

(defn date-decoder
[^bool ext ^RandomAccessFile f]
(Date. (.readLong f)))

(defn instant-decoder
[^bool ext ^RandomAccessFile f]
(let [epoch (.readLong f)
sec (.readInt f)]
(Instant/ofEpochSecond epoch sec)))

(defn keyword-decoder
[^bool ext ^RandomAccessFile f]
(read-keyword f (decode-length ext f)))

(defn uuid-decoder
[^bool ext ^RandomAccessFile f]
(let [low (.readLong f)
high (.readLong f)]
(UUID. high low)))

(defn blob-decoder
[^bool ext ^RandomAccessFile f]
(let [b (byte-array (decode-length ext f))]
(.readFully f b)
b))

(defn xsd-decoder
[^bool ext ^RandomAccessFile f]
(let [s (string-decoder ext f)
sp (s/index-of s \space)]
[(URI/create (subs s 0 sp)) (inc sp)]))

(def typecode->decoder
"Map of type codes to decoder functions"
{0 long-decoder
1 double-decoder
2 string-decoder
3 uri-decoder
6 bigint-decoder
7 bigdec-decoder
8 date-decoder
9 instant-decoder
10 keyword-decoder
11 uuid-decoder
12 blob-decoder
13 xsd-decoder})

(defn read-object
[^RandomAccessFile f ^long pos]
(.seek f pos)
(let [b0 (.readByte f)]
(cond
(zero? (bit-and 0x80 b0)) (read-string f b0)
(zero? (bit-and 0x40 b0)) (read-uri f (bit-and 0x3F b0))
(zero? (bit-and 0x20 b0)) (read-keyword f (bit-and 0x1F b0))
:default ((typecode->decoder (bit-and 0x0F b0) default-decoder)
(zero? (bit-and 0x10 b0)) f))))
113 changes: 1 addition & 112 deletions src/asami/durable/codec.clj → src/asami/encoder.clj
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
(ns ^{:doc "Encodes and decodes data for storage. Clojure implementation"
:author "Paula Gearon"}
asami.durable.codec
asami.durable.encoder
(:require [clojure.string :as s])
(:import [clojure.lang Keyword BigInt]
[java.io RandomAccessFile]
Expand Down Expand Up @@ -169,114 +169,3 @@
[o]
(let [b (body o)]
[(header o (count b)) b]))

(defn decode-length
[^bool ext ^RandomAccessFile f]
(if ext
(let [len (.readShort f)]
(if (< len 0)
(let [len2 (.readUnsignedShort f)]
(bit-or
(bit-shift-left (int (bit-and 0x7FFF len)) len 16)
len2))
len))
(int (.readByte f))))

(defn read-string
[^RandomAccessFile f ^long len]
(let [b (byte-array len)]
(.readFully f b)
b))

(defn read-uri
[^RandomAccessFile f ^long len]
(URI/create (read-string len)))

(defn read-keyword
[^RandomAccessFile f ^long len]
(keyword (read-string len)))

(defn long-decoder
[^bool ext ^RandomAccessFile f]
(.readLong f))

(defn double-decoder
[^bool ext ^RandomAccessFile f]
(.readDouble f))

(defn string-decoder
[^bool ext ^RandomAccessFile f]
(read-string f (decode-length ext f)))

(defn uri-decoder
[^bool ext ^RandomAccessFile f]
(read-uri f (decode-length ext f)))

(defn bigint-decoder
[^bool ext ^RandomAccessFile f]
(let [len (decode-length ext f)
b (byte-array len)]
(.readFully f b)
(bigint (BigInteger. b))))

(defn bigdec-decoder
[^bool ext ^RandomAccessFile f]
(big-decimal (string-decoder ext f)))

(defn date-decoder
[^bool ext ^RandomAccessFile f]
(Date. (.readLong f)))

(defn instant-decoder
[^bool ext ^RandomAccessFile f]
(let [epoch (.readLong f)
sec (.readInt f)]
(Instant/ofEpochSecond epoch sec)))

(defn keyword-decoder
[^bool ext ^RandomAccessFile f]
(read-keyword f (decode-length ext f)))

(defn uuid-decoder
[^bool ext ^RandomAccessFile f]
(let [low (.readLong f)
high (.readLong f)]
(UUID. high low)))

(defn blob-decoder
[^bool ext ^RandomAccessFile f]
(let [b (byte-array (decode-length ext f))]
(.readFully f b)
b))

(defn xsd-decoder
[^bool ext ^RandomAccessFile f]
(let [s (string-decoder ext f)
sp (s/index-of s \space)]
[(URI/create (subs s 0 sp)) (inc sp)]))

(def typecode->decoder
"Map of type codes to decoder functions"
{0 long-decoder
1 double-decoder
2 string-decoder
3 uri-decoder
6 bigint-decoder
7 bigdec-decoder
8 date-decoder
9 instant-decoder
10 keyword-decoder
11 uuid-decoder
12 blob-decoder
13 xsd-decoder})

(defn read-object
[^RandomAccessFile f ^long pos]
(.seek f pos)
(let [b0 (.readByte f)]
(cond
(zero? (bit-and 0x80 b0)) (read-string f b0)
(zero? (bit-and 0x40 b0)) (read-uri f (bit-and 0x3F b0))
(zero? (bit-and 0x20 b0)) (read-keyword f (bit-and 0x1F b0))
:default ((typecode->decoder (bit-and 0x0F b0) default-decoder)
(zero? (bit-and 0x10 b0)) f))))
86 changes: 86 additions & 0 deletions src/asami/flat.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
(ns ^{:doc "Manages a memory-mapped file that holds write once data"
:author "Paula Gearon"}
asami.durable.flat
(:import [java.io.RandomAccessFile]))

(defprotocol Paged
(refresh! [this] "Refreshes the buffers")
(read-byte [this offset] "Returns a byte from underlying pages")
(read-short [this offset] "Returns a short from underlying pages. Offset in bytes.")
(read-bytes [this offset bytes] "Fills a byte array with data from the paged object"))

;; These functions do update the PagedFile state, but only to expand the mapped region.
(defrecord PagedFile [f regions region-size]
Paged
(refresh! [this]
;; drop any partial final regions and remap to the end of the file
)
(read-byte [this offset]
(let [region-nr (int (/ offset region-size))
region-offset (mod offset region-size)]
(when (>= region-nr (count @regions))
(refresh! this))
(when (>= region-nr (count @regions))
(throw (ex-info "Accessing data beyond the end of file"
{:max (count @regions) :region region-nr :offset offset})))
(let [region (nth @regions region-nr)
region-size (.capacity region)]
(when (>= region-offset region-size)
(throw (ex-info "Accessing trailing data beyond the end of file"
{:region-size region-size :region-offset region-offset})))
(.get region region-offset))))

(read-short [this offset]
(let [region-nr (int (/ offset region-size))
region-offset (mod offset region-size)]
(when (>= region-nr (count @regions))
(refresh! this))
(when (>= region-nr (count @regions))
(throw (ex-info "Accessing data beyond the end of file"
{:max (count @regions) :region region-nr :offset offset})))
(let [region (nth @regions region-nr)
region-size (.capacity region)]
(when (>= region-offset region-size)
(throw (ex-info "Accessing trailing data beyond the end of file"
{:region-size region-size :region-offset region-offset})))
(if (= region-offset (dec region-size))
(short (bit-or (bit-shift-left (.get region region-offset) 8)
(read-byte this (inc offset))))
(.getShort region (bit-shift-right region-offset 1))))))

(read-bytes [this offset bytes]
(let [region-nr (int (/ offset region-size))
region-offset (mod offset region-size)
array-size (count bytes)]
(when (>= region-nr (count @regions))
(refresh! this))
(when (> array-size region-size)
(throw (ex-info "Data size beyond size limit"
{:requested array-size :limit region-size})))
(when (>= region-nr (count @regions))
(throw (ex-info "Accessing data beyond the end of file"
{:max (count @regions) :region region-nr :offset offset})))
(let [region (nth @regions region-nr)
region-size (.capacity region)]
(when (>= region-offset region-size)
(throw (ex-info "Accessing trailing data beyond the end of file"
{:region-size region-size :region-offset region-offset})))
(if (> (+ region-offset array-size) region-size)
(do
(when (>= (inc region-nr) (count @regions))
(throw (ex-info "Accessing data beyond the end of file"
{:max (count @regions) :region region-nr :offset offset})))
(let [nregion (nth @regions (inc region-nr))
fslice-size (- region-size region-offset)
nslice-size (- array-size fslice-size)]
(when (> nslice-size (.capacity nregion))
(throw (ex-info "Accessing data beyond the end of file"
{:size nslice-size :limit (.capacity nregion)})))
(doto (.asReadOnlyBuffer region)
(.position region-offset)
(.get bytes 0 fslice-size))
(doto (.asReadOnlyBuffer nregion)
(.get bytes fslice-size nslice-size))))
(doto (.asReadOnlyBuffer region)
(.get bytes)))
bytes))))

0 comments on commit 3d98539

Please sign in to comment.