From 3d985393fab249ab72f84b8e573fd4435e4af5d3 Mon Sep 17 00:00:00 2001 From: Paula Gearon Date: Thu, 27 Aug 2020 21:03:26 -0400 Subject: [PATCH] Changing decoding to be buffer based --- src/asami/decoder.clj | 125 +++++++++++++++++++ src/asami/{durable/codec.clj => encoder.clj} | 113 +---------------- src/asami/flat.clj | 86 +++++++++++++ 3 files changed, 212 insertions(+), 112 deletions(-) create mode 100644 src/asami/decoder.clj rename src/asami/{durable/codec.clj => encoder.clj} (64%) create mode 100644 src/asami/flat.clj diff --git a/src/asami/decoder.clj b/src/asami/decoder.clj new file mode 100644 index 0000000..b726a65 --- /dev/null +++ b/src/asami/decoder.clj @@ -0,0 +1,125 @@ +(ns ^{:doc "Encodes and decodes data for storage. Clojure implementation" + :author "Paula Gearon"} + asami.durable.decoder + (:require [clojure.string :as s]) + (:import [clojure.lang Keyword BigInt] + [java.io RandomAccessFile] + [java.math BigInteger BigDecimal] + [java.net URI] + [java.time Instant] + [java.util Date UUID] + [java.nio ByteBuffer] + [java.nio.charset Charset])) + +(def utf8 (Charset/forName ("UTF-8"))) + +(defn decode-length + [^bool ext ^RandomAccessFile f] + (if ext + (let [len (.readShort f)] + (if (< len 0) + (let [len2 (.readUnsignedShort f)] + (bit-or + (bit-shift-left (int (bit-and 0x7FFF len)) len 16) + len2)) + len)) + (int (.readByte f)))) + +(defn read-string + [^RandomAccessFile f ^long len] + (let [b (byte-array len)] + (.readFully f b) + b)) + +(defn read-uri + [^RandomAccessFile f ^long len] + (URI/create (read-string len))) + +(defn read-keyword + [^RandomAccessFile f ^long len] + (keyword (read-string len))) + +(defn long-decoder + [^bool ext ^RandomAccessFile f] + (.readLong f)) + +(defn double-decoder + [^bool ext ^RandomAccessFile f] + (.readDouble f)) + +(defn string-decoder + [^bool ext ^RandomAccessFile f] + (read-string f (decode-length ext f))) + +(defn uri-decoder + [^bool ext ^RandomAccessFile f] + (read-uri f (decode-length ext f))) + +(defn bigint-decoder + [^bool ext ^RandomAccessFile f] + (let [len (decode-length ext f) + b (byte-array len)] + (.readFully f b) + (bigint (BigInteger. b)))) + +(defn bigdec-decoder + [^bool ext ^RandomAccessFile f] + (big-decimal (string-decoder ext f))) + +(defn date-decoder + [^bool ext ^RandomAccessFile f] + (Date. (.readLong f))) + +(defn instant-decoder + [^bool ext ^RandomAccessFile f] + (let [epoch (.readLong f) + sec (.readInt f)] + (Instant/ofEpochSecond epoch sec))) + +(defn keyword-decoder + [^bool ext ^RandomAccessFile f] + (read-keyword f (decode-length ext f))) + +(defn uuid-decoder + [^bool ext ^RandomAccessFile f] + (let [low (.readLong f) + high (.readLong f)] + (UUID. high low))) + +(defn blob-decoder + [^bool ext ^RandomAccessFile f] + (let [b (byte-array (decode-length ext f))] + (.readFully f b) + b)) + +(defn xsd-decoder + [^bool ext ^RandomAccessFile f] + (let [s (string-decoder ext f) + sp (s/index-of s \space)] + [(URI/create (subs s 0 sp)) (inc sp)])) + +(def typecode->decoder + "Map of type codes to decoder functions" + {0 long-decoder + 1 double-decoder + 2 string-decoder + 3 uri-decoder + 6 bigint-decoder + 7 bigdec-decoder + 8 date-decoder + 9 instant-decoder + 10 keyword-decoder + 11 uuid-decoder + 12 blob-decoder + 13 xsd-decoder}) + +(defn read-object + [^RandomAccessFile f ^long pos] + (.seek f pos) + (let [b0 (.readByte f)] + (cond + (zero? (bit-and 0x80 b0)) (read-string f b0) + (zero? (bit-and 0x40 b0)) (read-uri f (bit-and 0x3F b0)) + (zero? (bit-and 0x20 b0)) (read-keyword f (bit-and 0x1F b0)) + :default ((typecode->decoder (bit-and 0x0F b0) default-decoder) + (zero? (bit-and 0x10 b0)) f)))) diff --git a/src/asami/durable/codec.clj b/src/asami/encoder.clj similarity index 64% rename from src/asami/durable/codec.clj rename to src/asami/encoder.clj index 315f5b2..82fbe8f 100644 --- a/src/asami/durable/codec.clj +++ b/src/asami/encoder.clj @@ -1,6 +1,6 @@ (ns ^{:doc "Encodes and decodes data for storage. Clojure implementation" :author "Paula Gearon"} - asami.durable.codec + asami.durable.encoder (:require [clojure.string :as s]) (:import [clojure.lang Keyword BigInt] [java.io RandomAccessFile] @@ -169,114 +169,3 @@ [o] (let [b (body o)] [(header o (count b)) b])) - -(defn decode-length - [^bool ext ^RandomAccessFile f] - (if ext - (let [len (.readShort f)] - (if (< len 0) - (let [len2 (.readUnsignedShort f)] - (bit-or - (bit-shift-left (int (bit-and 0x7FFF len)) len 16) - len2)) - len)) - (int (.readByte f)))) - -(defn read-string - [^RandomAccessFile f ^long len] - (let [b (byte-array len)] - (.readFully f b) - b)) - -(defn read-uri - [^RandomAccessFile f ^long len] - (URI/create (read-string len))) - -(defn read-keyword - [^RandomAccessFile f ^long len] - (keyword (read-string len))) - -(defn long-decoder - [^bool ext ^RandomAccessFile f] - (.readLong f)) - -(defn double-decoder - [^bool ext ^RandomAccessFile f] - (.readDouble f)) - -(defn string-decoder - [^bool ext ^RandomAccessFile f] - (read-string f (decode-length ext f))) - -(defn uri-decoder - [^bool ext ^RandomAccessFile f] - (read-uri f (decode-length ext f))) - -(defn bigint-decoder - [^bool ext ^RandomAccessFile f] - (let [len (decode-length ext f) - b (byte-array len)] - (.readFully f b) - (bigint (BigInteger. b)))) - -(defn bigdec-decoder - [^bool ext ^RandomAccessFile f] - (big-decimal (string-decoder ext f))) - -(defn date-decoder - [^bool ext ^RandomAccessFile f] - (Date. (.readLong f))) - -(defn instant-decoder - [^bool ext ^RandomAccessFile f] - (let [epoch (.readLong f) - sec (.readInt f)] - (Instant/ofEpochSecond epoch sec))) - -(defn keyword-decoder - [^bool ext ^RandomAccessFile f] - (read-keyword f (decode-length ext f))) - -(defn uuid-decoder - [^bool ext ^RandomAccessFile f] - (let [low (.readLong f) - high (.readLong f)] - (UUID. high low))) - -(defn blob-decoder - [^bool ext ^RandomAccessFile f] - (let [b (byte-array (decode-length ext f))] - (.readFully f b) - b)) - -(defn xsd-decoder - [^bool ext ^RandomAccessFile f] - (let [s (string-decoder ext f) - sp (s/index-of s \space)] - [(URI/create (subs s 0 sp)) (inc sp)])) - -(def typecode->decoder - "Map of type codes to decoder functions" - {0 long-decoder - 1 double-decoder - 2 string-decoder - 3 uri-decoder - 6 bigint-decoder - 7 bigdec-decoder - 8 date-decoder - 9 instant-decoder - 10 keyword-decoder - 11 uuid-decoder - 12 blob-decoder - 13 xsd-decoder}) - -(defn read-object - [^RandomAccessFile f ^long pos] - (.seek f pos) - (let [b0 (.readByte f)] - (cond - (zero? (bit-and 0x80 b0)) (read-string f b0) - (zero? (bit-and 0x40 b0)) (read-uri f (bit-and 0x3F b0)) - (zero? (bit-and 0x20 b0)) (read-keyword f (bit-and 0x1F b0)) - :default ((typecode->decoder (bit-and 0x0F b0) default-decoder) - (zero? (bit-and 0x10 b0)) f)))) diff --git a/src/asami/flat.clj b/src/asami/flat.clj new file mode 100644 index 0000000..bc8617f --- /dev/null +++ b/src/asami/flat.clj @@ -0,0 +1,86 @@ +(ns ^{:doc "Manages a memory-mapped file that holds write once data" + :author "Paula Gearon"} + asami.durable.flat + (:import [java.io.RandomAccessFile])) + +(defprotocol Paged + (refresh! [this] "Refreshes the buffers") + (read-byte [this offset] "Returns a byte from underlying pages") + (read-short [this offset] "Returns a short from underlying pages. Offset in bytes.") + (read-bytes [this offset bytes] "Fills a byte array with data from the paged object")) + +;; These functions do update the PagedFile state, but only to expand the mapped region. +(defrecord PagedFile [f regions region-size] + Paged + (refresh! [this] + ;; drop any partial final regions and remap to the end of the file + ) + (read-byte [this offset] + (let [region-nr (int (/ offset region-size)) + region-offset (mod offset region-size)] + (when (>= region-nr (count @regions)) + (refresh! this)) + (when (>= region-nr (count @regions)) + (throw (ex-info "Accessing data beyond the end of file" + {:max (count @regions) :region region-nr :offset offset}))) + (let [region (nth @regions region-nr) + region-size (.capacity region)] + (when (>= region-offset region-size) + (throw (ex-info "Accessing trailing data beyond the end of file" + {:region-size region-size :region-offset region-offset}))) + (.get region region-offset)))) + + (read-short [this offset] + (let [region-nr (int (/ offset region-size)) + region-offset (mod offset region-size)] + (when (>= region-nr (count @regions)) + (refresh! this)) + (when (>= region-nr (count @regions)) + (throw (ex-info "Accessing data beyond the end of file" + {:max (count @regions) :region region-nr :offset offset}))) + (let [region (nth @regions region-nr) + region-size (.capacity region)] + (when (>= region-offset region-size) + (throw (ex-info "Accessing trailing data beyond the end of file" + {:region-size region-size :region-offset region-offset}))) + (if (= region-offset (dec region-size)) + (short (bit-or (bit-shift-left (.get region region-offset) 8) + (read-byte this (inc offset)))) + (.getShort region (bit-shift-right region-offset 1)))))) + + (read-bytes [this offset bytes] + (let [region-nr (int (/ offset region-size)) + region-offset (mod offset region-size) + array-size (count bytes)] + (when (>= region-nr (count @regions)) + (refresh! this)) + (when (> array-size region-size) + (throw (ex-info "Data size beyond size limit" + {:requested array-size :limit region-size}))) + (when (>= region-nr (count @regions)) + (throw (ex-info "Accessing data beyond the end of file" + {:max (count @regions) :region region-nr :offset offset}))) + (let [region (nth @regions region-nr) + region-size (.capacity region)] + (when (>= region-offset region-size) + (throw (ex-info "Accessing trailing data beyond the end of file" + {:region-size region-size :region-offset region-offset}))) + (if (> (+ region-offset array-size) region-size) + (do + (when (>= (inc region-nr) (count @regions)) + (throw (ex-info "Accessing data beyond the end of file" + {:max (count @regions) :region region-nr :offset offset}))) + (let [nregion (nth @regions (inc region-nr)) + fslice-size (- region-size region-offset) + nslice-size (- array-size fslice-size)] + (when (> nslice-size (.capacity nregion)) + (throw (ex-info "Accessing data beyond the end of file" + {:size nslice-size :limit (.capacity nregion)}))) + (doto (.asReadOnlyBuffer region) + (.position region-offset) + (.get bytes 0 fslice-size)) + (doto (.asReadOnlyBuffer nregion) + (.get bytes fslice-size nslice-size)))) + (doto (.asReadOnlyBuffer region) + (.get bytes))) + bytes))))