From 9a0c3f3be7598147a5b9c53a2ceb62890d9fa1a9 Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Wed, 27 Sep 2023 15:44:46 -0400 Subject: [PATCH 1/4] Update `agg_tiles_hash` docs and minor bug The copying should set `agg_tiles_hash` in all cases because now it uses the always available `tiles` table/view. Also, a few minor cleanups and renames related to that. --- .github/workflows/ci.yml | 2 +- docs/src/tools.md | 103 +++++++++++++++++------------- martin-mbtiles/src/bin/main.rs | 4 +- martin-mbtiles/src/mbtiles.rs | 4 +- martin-mbtiles/src/tile_copier.rs | 5 +- 5 files changed, 67 insertions(+), 51 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fa5bd6473..39678e3cb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -506,7 +506,7 @@ jobs: elif [[ "${{ matrix.target }}" == "debian-x86_64" ]]; then mv debian-x86_64.deb ../${{ matrix.name }} else - tar czvf ../${{ matrix.name }} martin${{ matrix.ext }} mbtiles${{ matrix.ext }} + tar czvf ../${{ matrix.name }} martin${{ matrix.ext }} mbtiles${{ matrix.ext }} fi - name: Generate SHA-256 (MacOS) if: matrix.sha == 'true' diff --git a/docs/src/tools.md b/docs/src/tools.md index 05f444035..d59d8e6cb 100644 --- a/docs/src/tools.md +++ b/docs/src/tools.md @@ -69,47 +69,64 @@ If the `.mbtiles` file is of `flat_with_hash` or `normalized` type, then verify mbtiles validate src_file.mbtiles ``` +## Content Validation +The original [MBTiles specification](https://github.com/mapbox/mbtiles-spec#readme) does not provide any guarantees for the content of the tile data in MBTiles. This tool adds a few additional conventions to ensure that the content of the tile data is valid. + +A typical Normalized schema generated by tools like [tilelive-copy](https://github.com/mapbox/TileLive#bintilelive-copy) use MD5 hash in the `tile_id` column. The Martin's `mbtiles` tool can use this hash to verify the content of each tile. We also define a new `flat-with-hash` schema that stores the hash and tile data in the same table. This schema is more efficient than the `normalized` schema when data has no duplicate tiles (see below). Per tile validation is not available for `flat` schema. + +Per-tile validation will catch individual invalid tiles, but it will not detect overall datastore corruption (e.g. missing tiles or tiles that shouldn't exist, or tiles with incorrect z/x/y values). For that, Martin `mbtiles` tool defines a new metadata value called `agg_tiles_hash`. The value is computed by hashing `cast(zoom_level AS text), cast(tile_column AS text), cast(tile_row AS text), tile_data` combined for all rows in the `tiles` table/view, ordered by z,x,y. In case there are no rows or all are NULL, the hash value of an empty string is used. + +The `mbtiles` tool will compute `agg_tiles_hash` value when copying or validating mbtiles files. + ## Supported Schema -The `mbtiles` tool supports three different kinds of schema for `tiles` data in `.mbtiles` files: - -- `flat`: - ``` - CREATE TABLE tiles (zoom_level integer, tile_column integer, tile_row integer, tile_data blob); - CREATE UNIQUE INDEX tile_index on tiles (zoom_level, tile_column, tile_row); - ``` -- `flat-with-hash`: - ``` - CREATE TABLE tiles_with_hash (zoom_level integer NOT NULL, tile_column integer NOT NULL, tile_row integer NOT NULL, tile_data blob, tile_hash text); - CREATE UNIQUE INDEX tiles_with_hash_index on tiles_with_hash (zoom_level, tile_column, tile_row); - CREATE VIEW tiles AS SELECT zoom_level, tile_column, tile_row, tile_data FROM tiles_with_hash; - ``` -- `normalized`: - ``` - CREATE TABLE map (zoom_level INTEGER, tile_column INTEGER, tile_row INTEGER, tile_id TEXT); - CREATE UNIQUE INDEX map_index ON map (zoom_level, tile_column, tile_row); - CREATE TABLE images (tile_data blob, tile_id text); - CREATE UNIQUE INDEX images_id ON images (tile_id); - CREATE VIEW tiles AS - SELECT - map.zoom_level AS zoom_level, - map.tile_column AS tile_column, - map.tile_row AS tile_row, - images.tile_data AS tile_data - FROM map - JOIN images ON images.tile_id = map.tile_id; - ``` - Optionally, `.mbtiles` files with `normalized` schema can include a `tiles_with_hash` view: - ``` - CREATE VIEW tiles_with_hash AS - SELECT - map.zoom_level AS zoom_level, - map.tile_column AS tile_column, - map.tile_row AS tile_row, - images.tile_data AS tile_data, - images.tile_id AS tile_hash - FROM map - JOIN images ON images.tile_id = map.tile_id; - ``` - **__Note:__** All `normalized` files created by the `mbtiles` tool will contain this view. - -For more general spec information, see [here](https://github.com/mapbox/mbtiles-spec#readme). +The `mbtiles` tool supports three different kinds of schema for `tiles` data in `.mbtiles` files. See also the original [specification](https://github.com/mapbox/mbtiles-spec#readme). + +#### flat +```sql, ignore +CREATE TABLE tiles (zoom_level integer, tile_column integer, tile_row integer, tile_data blob); +CREATE UNIQUE INDEX tile_index on tiles (zoom_level, tile_column, tile_row); +``` + +#### flat-with-hash +```sql, ignore +CREATE TABLE tiles_with_hash ( + zoom_level integer NOT NULL, + tile_column integer NOT NULL, + tile_row integer NOT NULL, + tile_data blob, + tile_hash text); +CREATE UNIQUE INDEX tiles_with_hash_index on tiles_with_hash (zoom_level, tile_column, tile_row); +CREATE VIEW tiles AS SELECT zoom_level, tile_column, tile_row, tile_data FROM tiles_with_hash; +``` + +#### normalized +```sql, ignore +CREATE TABLE map (zoom_level INTEGER, tile_column INTEGER, tile_row INTEGER, tile_id TEXT); +CREATE UNIQUE INDEX map_index ON map (zoom_level, tile_column, tile_row); +CREATE TABLE images (tile_data blob, tile_id text); +CREATE UNIQUE INDEX images_id ON images (tile_id); +CREATE VIEW tiles AS + SELECT + map.zoom_level AS zoom_level, + map.tile_column AS tile_column, + map.tile_row AS tile_row, + images.tile_data AS tile_data + FROM map + JOIN images ON images.tile_id = map.tile_id; +``` + +Optionally, `.mbtiles` files with `normalized` schema can include a `tiles_with_hash` view: + +```sql, ignore +CREATE VIEW tiles_with_hash AS + SELECT + map.zoom_level AS zoom_level, + map.tile_column AS tile_column, + map.tile_row AS tile_row, + images.tile_data AS tile_data, + images.tile_id AS tile_hash + FROM map + JOIN images ON images.tile_id = map.tile_id; +``` + +**__Note:__** All `normalized` files created by the `mbtiles` tool will contain this view. diff --git a/martin-mbtiles/src/bin/main.rs b/martin-mbtiles/src/bin/main.rs index 7d8721a05..de3976eb9 100644 --- a/martin-mbtiles/src/bin/main.rs +++ b/martin-mbtiles/src/bin/main.rs @@ -67,7 +67,7 @@ enum Commands { /// Value to specify the extent of the SQLite integrity check performed #[arg(long, value_enum, default_value_t=IntegrityCheckType::default())] integrity_check: IntegrityCheckType, - /// Generate a hash of the tile data hashes and store under the 'agg_tiles_hash' key in metadata + /// Update `agg_tiles_hash` metadata value instead of using it to validate if the entire tile store is valid. #[arg(long)] update_agg_tiles_hash: bool, }, @@ -148,7 +148,7 @@ async fn validate_mbtiles( if update_agg_tiles_hash { mbt.update_agg_tiles_hash(&mut conn).await?; } else { - mbt.check_agg_tile_hashes(&mut conn).await?; + mbt.check_agg_tiles_hashes(&mut conn).await?; } Ok(()) } diff --git a/martin-mbtiles/src/mbtiles.rs b/martin-mbtiles/src/mbtiles.rs index 4912e4e72..fc878a095 100644 --- a/martin-mbtiles/src/mbtiles.rs +++ b/martin-mbtiles/src/mbtiles.rs @@ -491,7 +491,7 @@ impl Mbtiles { Ok(()) } - pub async fn check_agg_tile_hashes(&self, conn: &mut T) -> MbtResult<()> + pub async fn check_agg_tiles_hashes(&self, conn: &mut T) -> MbtResult<()> where for<'e> &'e mut T: SqliteExecutor<'e>, { @@ -745,7 +745,7 @@ mod tests { async fn validate_invalid_file() { let (mut conn, mbt) = open("../tests/fixtures/files/invalid/invalid_zoomed_world_cities.mbtiles").await; - let result = mbt.check_agg_tile_hashes(&mut conn).await; + let result = mbt.check_agg_tiles_hashes(&mut conn).await; assert!(matches!(result, Err(MbtError::AggHashMismatch(..)))); } } diff --git a/martin-mbtiles/src/tile_copier.rs b/martin-mbtiles/src/tile_copier.rs index e556b79ac..eae8f5248 100644 --- a/martin-mbtiles/src/tile_copier.rs +++ b/martin-mbtiles/src/tile_copier.rs @@ -46,7 +46,7 @@ pub struct TileCopierOptions { /// Compare source file with this file, and only copy non-identical tiles to destination #[cfg_attr(feature = "cli", arg(long))] diff_with_file: Option, - /// Skip generating a global hash for mbtiles validation. By default, if dst_type is flat-with-hash or normalized, generate a global hash and store in the metadata table + /// Skip generating a global hash for mbtiles validation. By default, `mbtiles` will compute `agg_tiles_hash` metadata value. #[cfg_attr(feature = "cli", arg(long))] skip_agg_tiles_hash: bool, } @@ -237,8 +237,7 @@ impl TileCopier { } }; - if !self.options.skip_agg_tiles_hash && (dst_type == FlatWithHash || dst_type == Normalized) - { + if !self.options.skip_agg_tiles_hash { self.dst_mbtiles.update_agg_tiles_hash(&mut conn).await?; } From ad8bba536d3d98292864e0d50ffe643e8ffe1c6c Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Thu, 28 Sep 2023 00:06:59 -0400 Subject: [PATCH 2/4] wip --- martin-mbtiles/src/mbtiles.rs | 108 ++++++++++++++++-------------- martin-mbtiles/src/tile_copier.rs | 67 +++++++++++------- 2 files changed, 100 insertions(+), 75 deletions(-) diff --git a/martin-mbtiles/src/mbtiles.rs b/martin-mbtiles/src/mbtiles.rs index fc878a095..8de3cc78a 100644 --- a/martin-mbtiles/src/mbtiles.rs +++ b/martin-mbtiles/src/mbtiles.rs @@ -15,11 +15,8 @@ use serde::ser::SerializeStruct; use serde::Serialize; use serde_json::{Value as JSONValue, Value}; use sqlite_hashes::register_md5_function; -use sqlite_hashes::rusqlite::{ - Connection as RusqliteConnection, Connection, OpenFlags, OptionalExtension, -}; -use sqlx::sqlite::SqliteRow; -use sqlx::{query, Row, SqliteExecutor}; +use sqlx::sqlite::{SqliteConnectOptions, SqliteRow}; +use sqlx::{query, Connection as _, Row, SqliteConnection, SqliteExecutor}; use tilejson::{tilejson, Bounds, Center, TileJSON}; use crate::errors::{MbtError, MbtResult}; @@ -96,6 +93,24 @@ impl Mbtiles { }) } + pub async fn open_with_hashes(&self, readonly: bool) -> MbtResult { + let opt = SqliteConnectOptions::new() + .filename(self.filepath()) + .read_only(readonly); + let mut conn = SqliteConnection::connect_with(&opt).await?; + self.attach_hash_fn(&mut conn).await?; + Ok(conn) + } + + async fn attach_hash_fn(&self, conn: &mut SqliteConnection) -> MbtResult<()> { + let handle = conn.lock_handle().await?.as_raw_handle().as_ptr(); + // Safety: we know that the handle is a SQLite connection is locked and is not used anywhere else. + // The registered functions will be dropped when SQLX drops DB connection. + let rc = unsafe { sqlite_hashes::rusqlite::Connection::from_handle(handle) }?; + register_md5_function(&rc)?; + Ok(()) + } + #[must_use] pub fn filepath(&self) -> &str { &self.filepath @@ -420,41 +435,6 @@ impl Mbtiles { Err(MbtError::NoUniquenessConstraint(self.filepath.clone())) } - /// Compute the hash of the combined tiles in the mbtiles file tiles table/view. - /// This should work on all mbtiles files perf `MBTiles` specification. - fn calc_agg_tiles_hash(&self) -> MbtResult { - Ok(self.open_with_hashes(true)?.query_row_and_then( - // The md5_concat func will return NULL if there are no rows in the tiles table. - // For our use case, we will treat it as an empty string, and hash that. - "SELECT hex( - coalesce( - md5_concat( - cast(zoom_level AS text), - cast(tile_column AS text), - cast(tile_row AS text), - tile_data - ), - md5('') - ) - ) - FROM tiles - ORDER BY zoom_level, tile_column, tile_row;", - [], - |row| row.get(0), - )?) - } - - pub(crate) fn open_with_hashes(&self, is_readonly: bool) -> MbtResult { - let flags = if is_readonly { - OpenFlags::SQLITE_OPEN_READ_ONLY - } else { - OpenFlags::default() - }; - let rusqlite_conn = RusqliteConnection::open_with_flags(self.filepath(), flags)?; - register_md5_function(&rusqlite_conn)?; - Ok(rusqlite_conn) - } - /// Perform `SQLite` internal integrity check pub async fn check_integrity( &self, @@ -499,7 +479,8 @@ impl Mbtiles { return Err(AggHashValueNotFound(self.filepath().to_string())); }; - let computed = self.calc_agg_tiles_hash()?; + // let conn = self.open_with_hashes(true)?; + let computed = calc_agg_tiles_hash(&mut *conn).await?; if stored != computed { let file = self.filepath().to_string(); return Err(AggHashMismatch(computed, stored, file)); @@ -509,12 +490,12 @@ impl Mbtiles { } /// Compute new aggregate tiles hash and save it to the metadata table (if needed) - pub async fn update_agg_tiles_hash(&self, conn: &mut T) -> MbtResult<()> + pub(crate) async fn update_agg_tiles_hash(&self, conn: &mut T) -> MbtResult<()> where for<'e> &'e mut T: SqliteExecutor<'e>, { let old_hash = self.get_agg_tiles_hash(&mut *conn).await?; - let hash = self.calc_agg_tiles_hash()?; + let hash = calc_agg_tiles_hash(&mut *conn).await?; if old_hash.as_ref() == Some(&hash) { info!( "agg_tiles_hash is already set to the correct value `{hash}` in {}", @@ -570,21 +551,50 @@ impl Mbtiles { } }; - self.open_with_hashes(true)? - .query_row_and_then(sql, [], |r| Ok((r.get(0)?, r.get(1)?))) - .optional()? - .map_or(Ok(()), |v: (String, String)| { - Err(IncorrectTileHash(self.filepath().to_string(), v.0, v.1)) + query(sql) + .fetch_optional(&mut *conn) + .await? + .map_or(Ok(()), |v| { + Err(IncorrectTileHash( + self.filepath().to_string(), + v.get(0), + v.get(1), + )) }) } } +/// Compute the hash of the combined tiles in the mbtiles file tiles table/view. +/// This should work on all mbtiles files perf `MBTiles` specification. +async fn calc_agg_tiles_hash(conn: &mut T) -> MbtResult +where + for<'e> &'e mut T: SqliteExecutor<'e>, +{ + let query = query( + // The md5_concat func will return NULL if there are no rows in the tiles table. + // For our use case, we will treat it as an empty string, and hash that. + "SELECT hex( + coalesce( + md5_concat( + cast(zoom_level AS text), + cast(tile_column AS text), + cast(tile_row AS text), + tile_data + ), + md5('') + ) + ) + FROM tiles + ORDER BY zoom_level, tile_column, tile_row;", + ); + return Ok(query.fetch_one(conn).await?.get::(0)); +} + #[cfg(test)] mod tests { use std::collections::HashMap; use martin_tile_utils::Encoding; - use sqlx::{Connection, SqliteConnection}; use tilejson::VectorLayer; use super::*; diff --git a/martin-mbtiles/src/tile_copier.rs b/martin-mbtiles/src/tile_copier.rs index eae8f5248..05616c4d0 100644 --- a/martin-mbtiles/src/tile_copier.rs +++ b/martin-mbtiles/src/tile_copier.rs @@ -175,21 +175,15 @@ impl TileCopier { let dst_type = if is_empty { let dst_type = self.options.dst_type.unwrap_or(src_type); - self.create_new_mbtiles(&mut conn, dst_type, src_type) - .await?; + self.init_new_mbtiles(&mut conn, src_type, dst_type).await?; dst_type } else if self.options.diff_with_file.is_some() { return Err(MbtError::NonEmptyTargetFile(self.options.dst_file)); } else { - open_and_detect_type(&self.dst_mbtiles).await? + self.dst_mbtiles.detect_type(&mut conn).await? }; - let rusqlite_conn = self.dst_mbtiles.open_with_hashes(false)?; - rusqlite_conn.execute( - "ATTACH DATABASE ? AS sourceDb", - [self.src_mbtiles.filepath()], - )?; - + self.attach_source_db(&mut conn, &self.src_mbtiles).await?; let (on_dupl, sql_cond) = self.get_on_duplicate_sql(dst_type); let (select_from, query_args) = { @@ -197,8 +191,10 @@ impl TileCopier { let diff_with_mbtiles = Mbtiles::new(diff_file)?; let diff_type = open_and_detect_type(&diff_with_mbtiles).await?; - rusqlite_conn - .execute("ATTACH DATABASE ? AS newDb", [diff_with_mbtiles.filepath()])?; + let path = diff_with_mbtiles.filepath(); + query!("ATTACH DATABASE ? AS newDb", path) + .execute(&mut *conn) + .await?; Self::get_select_from_with_diff(dst_type, diff_type) } else { @@ -238,29 +234,31 @@ impl TileCopier { }; if !self.options.skip_agg_tiles_hash { - self.dst_mbtiles.update_agg_tiles_hash(&mut conn).await?; + self.dst_mbtiles + .update_agg_tiles_hash_conn(&mut conn, &rusqlite_conn) + .await?; } Ok(conn) } - async fn create_new_mbtiles( + async fn init_new_mbtiles( &self, conn: &mut SqliteConnection, - dst_type: MbtType, - src_type: MbtType, + src: MbtType, + dst: MbtType, ) -> MbtResult<()> { + query!("PRAGMA page_size = 512").execute(&mut *conn).await?; + query!("VACUUM").execute(&mut *conn).await?; + let path = self.src_mbtiles.filepath(); query!("ATTACH DATABASE ? AS sourceDb", path) .execute(&mut *conn) .await?; - query!("PRAGMA page_size = 512").execute(&mut *conn).await?; - query!("VACUUM").execute(&mut *conn).await?; - - if dst_type == src_type { + if src == dst { // DB objects must be created in a specific order: tables, views, triggers, indexes. - for row in query( + let sql_objects = query( "SELECT sql FROM sourceDb.sqlite_schema WHERE tbl_name IN ('metadata', 'tiles', 'map', 'images', 'tiles_with_hash') @@ -273,19 +271,20 @@ impl TileCopier { ELSE 5 END", ) .fetch_all(&mut *conn) - .await? - { + .await?; + + for row in sql_objects { query(row.get(0)).execute(&mut *conn).await?; } } else { - match dst_type { + match dst { Flat => self.create_flat_tables(&mut *conn).await?, FlatWithHash => self.create_flat_with_hash_tables(&mut *conn).await?, Normalized => self.create_normalized_tables(&mut *conn).await?, }; }; - if dst_type == Normalized { + if dst == Normalized { query( "CREATE VIEW tiles_with_hash AS SELECT @@ -372,7 +371,7 @@ impl TileCopier { }; format!( - "AND NOT EXISTS ( + "AND NOT EXISTS ( SELECT 1 FROM {main_table} WHERE @@ -381,7 +380,7 @@ impl TileCopier { AND {main_table}.tile_row = sourceDb.{main_table}.tile_row AND {main_table}.{tile_identifier} != sourceDb.{main_table}.{tile_identifier} )" - ) + ) }), } } @@ -456,6 +455,22 @@ impl TileCopier { } } +async fn attach_source_db(conn: &mut SqliteConnection, src: &Mbtiles) -> MbtResult<()> { + let path = src.filepath(); + query!("ATTACH DATABASE ? AS sourceDb", path) + .execute(&mut *conn) + .await?; + Ok(()) +} + +async fn attach_other_db(conn: &mut SqliteConnection, other: &Mbtiles) -> MbtResult<()> { + let path = other.filepath(); + query!("ATTACH DATABASE ? AS otherDb", path) + .execute(&mut *conn) + .await?; + Ok(()) +} + async fn open_and_detect_type(mbtiles: &Mbtiles) -> MbtResult { let opt = SqliteConnectOptions::new() .read_only(true) From d52787a683c7cb5d253dae297f3b38989e3922eb Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Thu, 28 Sep 2023 12:24:44 -0400 Subject: [PATCH 3/4] wip --- ...3c46f61ff92ffbc6ec3bba4860abd60d224cb.json | 12 + ...7e779ecf324e1862945fbd18da4bf5baf565b.json | 12 + ...2ee47cfc72b56f6ed275a0b0688047405498f.json | 12 + ...1f52ce710d8978e3b35b59b724fc5bee9f55c.json | 12 + martin-mbtiles/src/bin/main.rs | 6 +- martin-mbtiles/src/lib.rs | 4 +- martin-mbtiles/src/mbtiles.rs | 34 +- martin-mbtiles/src/tile_copier.rs | 293 +++++++++--------- 8 files changed, 213 insertions(+), 172 deletions(-) create mode 100644 martin-mbtiles/.sqlx/query-45de99a3628a53940ef80b0e2603c46f61ff92ffbc6ec3bba4860abd60d224cb.json create mode 100644 martin-mbtiles/.sqlx/query-a115609880b2c6ed3beeb5aaf8c7e779ecf324e1862945fbd18da4bf5baf565b.json create mode 100644 martin-mbtiles/.sqlx/query-d1d61dfa7c34dafb4588f78e23b2ee47cfc72b56f6ed275a0b0688047405498f.json create mode 100644 martin-mbtiles/.sqlx/query-e13e2e17d5bf56287bc0fd7c55a1f52ce710d8978e3b35b59b724fc5bee9f55c.json diff --git a/martin-mbtiles/.sqlx/query-45de99a3628a53940ef80b0e2603c46f61ff92ffbc6ec3bba4860abd60d224cb.json b/martin-mbtiles/.sqlx/query-45de99a3628a53940ef80b0e2603c46f61ff92ffbc6ec3bba4860abd60d224cb.json new file mode 100644 index 000000000..7f4b7b65a --- /dev/null +++ b/martin-mbtiles/.sqlx/query-45de99a3628a53940ef80b0e2603c46f61ff92ffbc6ec3bba4860abd60d224cb.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "ATTACH DATABASE ? AS srcDb", + "describe": { + "columns": [], + "parameters": { + "Right": 1 + }, + "nullable": [] + }, + "hash": "45de99a3628a53940ef80b0e2603c46f61ff92ffbc6ec3bba4860abd60d224cb" +} diff --git a/martin-mbtiles/.sqlx/query-a115609880b2c6ed3beeb5aaf8c7e779ecf324e1862945fbd18da4bf5baf565b.json b/martin-mbtiles/.sqlx/query-a115609880b2c6ed3beeb5aaf8c7e779ecf324e1862945fbd18da4bf5baf565b.json new file mode 100644 index 000000000..fc0b3c0c2 --- /dev/null +++ b/martin-mbtiles/.sqlx/query-a115609880b2c6ed3beeb5aaf8c7e779ecf324e1862945fbd18da4bf5baf565b.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "ATTACH DATABASE ? AS newDb", + "describe": { + "columns": [], + "parameters": { + "Right": 1 + }, + "nullable": [] + }, + "hash": "a115609880b2c6ed3beeb5aaf8c7e779ecf324e1862945fbd18da4bf5baf565b" +} diff --git a/martin-mbtiles/.sqlx/query-d1d61dfa7c34dafb4588f78e23b2ee47cfc72b56f6ed275a0b0688047405498f.json b/martin-mbtiles/.sqlx/query-d1d61dfa7c34dafb4588f78e23b2ee47cfc72b56f6ed275a0b0688047405498f.json new file mode 100644 index 000000000..1d9e5c432 --- /dev/null +++ b/martin-mbtiles/.sqlx/query-d1d61dfa7c34dafb4588f78e23b2ee47cfc72b56f6ed275a0b0688047405498f.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "ATTACH DATABASE ? AS originalDb", + "describe": { + "columns": [], + "parameters": { + "Right": 1 + }, + "nullable": [] + }, + "hash": "d1d61dfa7c34dafb4588f78e23b2ee47cfc72b56f6ed275a0b0688047405498f" +} diff --git a/martin-mbtiles/.sqlx/query-e13e2e17d5bf56287bc0fd7c55a1f52ce710d8978e3b35b59b724fc5bee9f55c.json b/martin-mbtiles/.sqlx/query-e13e2e17d5bf56287bc0fd7c55a1f52ce710d8978e3b35b59b724fc5bee9f55c.json new file mode 100644 index 000000000..5d8f76197 --- /dev/null +++ b/martin-mbtiles/.sqlx/query-e13e2e17d5bf56287bc0fd7c55a1f52ce710d8978e3b35b59b724fc5bee9f55c.json @@ -0,0 +1,12 @@ +{ + "db_name": "SQLite", + "query": "ATTACH DATABASE ? AS diffDb", + "describe": { + "columns": [], + "parameters": { + "Right": 1 + }, + "nullable": [] + }, + "hash": "e13e2e17d5bf56287bc0fd7c55a1f52ce710d8978e3b35b59b724fc5bee9f55c" +} diff --git a/martin-mbtiles/src/bin/main.rs b/martin-mbtiles/src/bin/main.rs index de3976eb9..39579703b 100644 --- a/martin-mbtiles/src/bin/main.rs +++ b/martin-mbtiles/src/bin/main.rs @@ -2,9 +2,7 @@ use std::path::{Path, PathBuf}; use anyhow::Result; use clap::{Parser, Subcommand}; -use martin_mbtiles::{ - apply_mbtiles_diff, copy_mbtiles_file, IntegrityCheckType, Mbtiles, TileCopierOptions, -}; +use martin_mbtiles::{apply_mbtiles_diff, IntegrityCheckType, Mbtiles, TileCopierOptions}; use sqlx::sqlite::SqliteConnectOptions; use sqlx::{Connection, SqliteConnection}; @@ -88,7 +86,7 @@ async fn main() -> Result<()> { meta_set_value(file.as_path(), &key, value).await?; } Commands::Copy(opts) => { - copy_mbtiles_file(opts).await?; + opts.run().await?; } Commands::ApplyDiff { src_file, diff --git a/martin-mbtiles/src/lib.rs b/martin-mbtiles/src/lib.rs index 9c16049ef..4d90e4231 100644 --- a/martin-mbtiles/src/lib.rs +++ b/martin-mbtiles/src/lib.rs @@ -9,6 +9,4 @@ mod tile_copier; pub use errors::MbtError; pub use mbtiles::{IntegrityCheckType, Mbtiles, Metadata}; pub use mbtiles_pool::MbtilesPool; -pub use tile_copier::{ - apply_mbtiles_diff, copy_mbtiles_file, CopyDuplicateMode, TileCopierOptions, -}; +pub use tile_copier::{apply_mbtiles_diff, CopyDuplicateMode, TileCopierOptions}; diff --git a/martin-mbtiles/src/mbtiles.rs b/martin-mbtiles/src/mbtiles.rs index 8de3cc78a..ce4bb93eb 100644 --- a/martin-mbtiles/src/mbtiles.rs +++ b/martin-mbtiles/src/mbtiles.rs @@ -98,19 +98,10 @@ impl Mbtiles { .filename(self.filepath()) .read_only(readonly); let mut conn = SqliteConnection::connect_with(&opt).await?; - self.attach_hash_fn(&mut conn).await?; + attach_hash_fn(&mut conn).await?; Ok(conn) } - async fn attach_hash_fn(&self, conn: &mut SqliteConnection) -> MbtResult<()> { - let handle = conn.lock_handle().await?.as_raw_handle().as_ptr(); - // Safety: we know that the handle is a SQLite connection is locked and is not used anywhere else. - // The registered functions will be dropped when SQLX drops DB connection. - let rc = unsafe { sqlite_hashes::rusqlite::Connection::from_handle(handle) }?; - register_md5_function(&rc)?; - Ok(()) - } - #[must_use] pub fn filepath(&self) -> &str { &self.filepath @@ -490,7 +481,7 @@ impl Mbtiles { } /// Compute new aggregate tiles hash and save it to the metadata table (if needed) - pub(crate) async fn update_agg_tiles_hash(&self, conn: &mut T) -> MbtResult<()> + pub async fn update_agg_tiles_hash(&self, conn: &mut T) -> MbtResult<()> where for<'e> &'e mut T: SqliteExecutor<'e>, { @@ -573,7 +564,8 @@ where let query = query( // The md5_concat func will return NULL if there are no rows in the tiles table. // For our use case, we will treat it as an empty string, and hash that. - "SELECT hex( + "SELECT + hex( coalesce( md5_concat( cast(zoom_level AS text), @@ -587,7 +579,16 @@ where FROM tiles ORDER BY zoom_level, tile_column, tile_row;", ); - return Ok(query.fetch_one(conn).await?.get::(0)); + Ok(query.fetch_one(conn).await?.get::(0)) +} + +pub async fn attach_hash_fn(conn: &mut SqliteConnection) -> MbtResult<()> { + let handle = conn.lock_handle().await?.as_raw_handle().as_ptr(); + // Safety: we know that the handle is a SQLite connection is locked and is not used anywhere else. + // The registered functions will be dropped when SQLX drops DB connection. + let rc = unsafe { sqlite_hashes::rusqlite::Connection::from_handle(handle) }?; + register_md5_function(&rc)?; + Ok(()) } #[cfg(test)] @@ -601,10 +602,9 @@ mod tests { async fn open(filepath: &str) -> (SqliteConnection, Mbtiles) { let mbt = Mbtiles::new(filepath).unwrap(); - ( - SqliteConnection::connect(mbt.filepath()).await.unwrap(), - mbt, - ) + let mut conn = SqliteConnection::connect(mbt.filepath()).await.unwrap(); + attach_hash_fn(&mut conn).await.unwrap(); + (conn, mbt) } #[actix_rt::test] diff --git a/martin-mbtiles/src/tile_copier.rs b/martin-mbtiles/src/tile_copier.rs index 05616c4d0..6827f410f 100644 --- a/martin-mbtiles/src/tile_copier.rs +++ b/martin-mbtiles/src/tile_copier.rs @@ -3,13 +3,14 @@ use std::path::PathBuf; #[cfg(feature = "cli")] use clap::{builder::ValueParser, error::ErrorKind, Args, ValueEnum}; +use sqlite_hashes::rusqlite; use sqlite_hashes::rusqlite::params_from_iter; use sqlx::sqlite::SqliteConnectOptions; use sqlx::{query, Connection, Row, SqliteConnection}; use crate::errors::MbtResult; -use crate::mbtiles::MbtType; use crate::mbtiles::MbtType::{Flat, FlatWithHash, Normalized}; +use crate::mbtiles::{attach_hash_fn, MbtType}; use crate::{MbtError, Mbtiles}; #[derive(PartialEq, Eq, Default, Debug, Clone)] @@ -147,6 +148,10 @@ impl TileCopierOptions { self.skip_agg_tiles_hash = skip_global_hash; self } + + pub async fn run(self) -> MbtResult { + TileCopier::new(self)?.run().await + } } impl TileCopier { @@ -168,6 +173,8 @@ impl TileCopier { ) .await?; + attach_hash_fn(&mut conn).await?; + let is_empty = query!("SELECT 1 as has_rows FROM sqlite_schema LIMIT 1") .fetch_optional(&mut conn) .await? @@ -175,27 +182,27 @@ impl TileCopier { let dst_type = if is_empty { let dst_type = self.options.dst_type.unwrap_or(src_type); - self.init_new_mbtiles(&mut conn, src_type, dst_type).await?; + self.create_new_mbtiles(&mut conn, src_type, dst_type) + .await?; dst_type } else if self.options.diff_with_file.is_some() { return Err(MbtError::NonEmptyTargetFile(self.options.dst_file)); } else { - self.dst_mbtiles.detect_type(&mut conn).await? + let dst_type = self.dst_mbtiles.detect_type(&mut conn).await?; + attach_source_db(&mut conn, self.src_mbtiles.filepath()).await?; + dst_type }; - self.attach_source_db(&mut conn, &self.src_mbtiles).await?; let (on_dupl, sql_cond) = self.get_on_duplicate_sql(dst_type); let (select_from, query_args) = { let select_from = if let Some(diff_file) = &self.options.diff_with_file { let diff_with_mbtiles = Mbtiles::new(diff_file)?; let diff_type = open_and_detect_type(&diff_with_mbtiles).await?; - let path = diff_with_mbtiles.filepath(); query!("ATTACH DATABASE ? AS newDb", path) - .execute(&mut *conn) + .execute(&mut conn) .await?; - Self::get_select_from_with_diff(dst_type, diff_type) } else { Self::get_select_from(dst_type, src_type).to_string() @@ -206,6 +213,8 @@ impl TileCopier { (format!("{select_from} {options_sql}"), query_args) }; + let handle = conn.lock_handle().await?.as_raw_handle().as_ptr(); + let rusqlite_conn = unsafe { rusqlite::Connection::from_handle(handle) }?; match dst_type { Flat => rusqlite_conn.execute( &format!("INSERT {on_dupl} INTO tiles {select_from} {sql_cond}"), @@ -234,15 +243,13 @@ impl TileCopier { }; if !self.options.skip_agg_tiles_hash { - self.dst_mbtiles - .update_agg_tiles_hash_conn(&mut conn, &rusqlite_conn) - .await?; + self.dst_mbtiles.update_agg_tiles_hash(&mut conn).await?; } Ok(conn) } - async fn init_new_mbtiles( + async fn create_new_mbtiles( &self, conn: &mut SqliteConnection, src: MbtType, @@ -251,10 +258,7 @@ impl TileCopier { query!("PRAGMA page_size = 512").execute(&mut *conn).await?; query!("VACUUM").execute(&mut *conn).await?; - let path = self.src_mbtiles.filepath(); - query!("ATTACH DATABASE ? AS sourceDb", path) - .execute(&mut *conn) - .await?; + attach_source_db(&mut *conn, self.src_mbtiles.filepath()).await?; if src == dst { // DB objects must be created in a specific order: tables, views, triggers, indexes. @@ -455,22 +459,13 @@ impl TileCopier { } } -async fn attach_source_db(conn: &mut SqliteConnection, src: &Mbtiles) -> MbtResult<()> { - let path = src.filepath(); +async fn attach_source_db(conn: &mut SqliteConnection, path: &str) -> MbtResult<()> { query!("ATTACH DATABASE ? AS sourceDb", path) .execute(&mut *conn) .await?; Ok(()) } -async fn attach_other_db(conn: &mut SqliteConnection, other: &Mbtiles) -> MbtResult<()> { - let path = other.filepath(); - query!("ATTACH DATABASE ? AS otherDb", path) - .execute(&mut *conn) - .await?; - Ok(()) -} - async fn open_and_detect_type(mbtiles: &Mbtiles) -> MbtResult { let opt = SqliteConnectOptions::new() .read_only(true) @@ -486,8 +481,11 @@ pub async fn apply_mbtiles_diff(src_file: PathBuf, diff_file: PathBuf) -> MbtRes let src_type = open_and_detect_type(&src_mbtiles).await?; let diff_type = open_and_detect_type(&diff_mbtiles).await?; - let rusqlite_conn = src_mbtiles.open_with_hashes(false)?; - rusqlite_conn.execute("ATTACH DATABASE ? AS diffDb", [diff_mbtiles.filepath()])?; + let mut conn = src_mbtiles.open_with_hashes(false).await?; + let path = diff_mbtiles.filepath(); + query!("ATTACH DATABASE ? AS diffDb", path) + .execute(&mut conn) + .await?; let select_from = if src_type == Flat { "SELECT zoom_level, tile_column, tile_row, tile_data FROM diffDb.tiles" @@ -513,32 +511,43 @@ pub async fn apply_mbtiles_diff(src_file: PathBuf, diff_file: PathBuf) -> MbtRes }; for statement in insert_sql { - rusqlite_conn.execute(&format!("{statement} WHERE tile_data NOTNULL"), ())?; + query(&format!("{statement} WHERE tile_data NOTNULL")) + .execute(&mut conn) + .await?; } - rusqlite_conn.execute( - &format!( - "DELETE FROM {main_table} + query(&format!( + "DELETE FROM {main_table} WHERE (zoom_level, tile_column, tile_row) IN ( SELECT zoom_level, tile_column, tile_row FROM ({select_from} WHERE tile_data ISNULL) )" - ), - (), - )?; + )) + .execute(&mut conn) + .await?; Ok(()) } -pub async fn copy_mbtiles_file(opts: TileCopierOptions) -> MbtResult { - TileCopier::new(opts)?.run().await -} - #[cfg(test)] mod tests { use sqlx::{Decode, Sqlite, SqliteConnection, Type}; use super::*; + async fn attach_other_db(conn: &mut SqliteConnection, path: &str) -> MbtResult<()> { + query!("ATTACH DATABASE ? AS otherDb", path) + .execute(&mut *conn) + .await?; + Ok(()) + } + + async fn attach_src_db(conn: &mut SqliteConnection, path: &str) -> MbtResult<()> { + query!("ATTACH DATABASE ? AS srcDb", path) + .execute(&mut *conn) + .await?; + Ok(()) + } + async fn get_one(conn: &mut SqliteConnection, sql: &str) -> T where for<'r> T: Decode<'r, Sqlite> + Type, @@ -551,37 +560,34 @@ mod tests { dst_filepath: PathBuf, dst_type: Option, expected_dst_type: MbtType, - ) { - let mut dst_conn = copy_mbtiles_file( - TileCopierOptions::new(src_filepath.clone(), dst_filepath.clone()).dst_type(dst_type), - ) - .await - .unwrap(); + ) -> MbtResult<()> { + let mut dst_conn = TileCopierOptions::new(src_filepath.clone(), dst_filepath.clone()) + .dst_type(dst_type) + .run() + .await?; - query("ATTACH DATABASE ? AS srcDb") - .bind(src_filepath.clone().to_str().unwrap()) - .execute(&mut dst_conn) - .await - .unwrap(); + attach_src_db(&mut dst_conn, src_filepath.to_str().unwrap()).await?; assert_eq!( - open_and_detect_type(&Mbtiles::new(dst_filepath).unwrap()) - .await - .unwrap(), + open_and_detect_type(&Mbtiles::new(dst_filepath)?).await?, expected_dst_type ); assert!( query("SELECT * FROM srcDb.tiles EXCEPT SELECT * FROM tiles") .fetch_optional(&mut dst_conn) - .await - .unwrap() + .await? .is_none() - ) + ); + + Ok(()) } - async fn verify_copy_with_zoom_filter(opts: TileCopierOptions, expected_zoom_levels: u8) { - let mut dst_conn = copy_mbtiles_file(opts).await.unwrap(); + async fn verify_copy_with_zoom_filter( + opts: TileCopierOptions, + expected_zoom_levels: u8, + ) -> MbtResult<()> { + let mut dst_conn = opts.run().await?; assert_eq!( get_one::( @@ -591,104 +597,106 @@ mod tests { .await, expected_zoom_levels ); + + Ok(()) } #[actix_rt::test] - async fn copy_flat_tables() { + async fn copy_flat_tables() -> MbtResult<()> { let src = PathBuf::from("../tests/fixtures/files/world_cities.mbtiles"); let dst = PathBuf::from("file:copy_flat_tables_mem_db?mode=memory&cache=shared"); - verify_copy_all(src, dst, None, Flat).await; + verify_copy_all(src, dst, None, Flat).await } #[actix_rt::test] - async fn copy_flat_from_flat_with_hash_tables() { + async fn copy_flat_from_flat_with_hash_tables() -> MbtResult<()> { let src = PathBuf::from("../tests/fixtures/files/zoomed_world_cities.mbtiles"); let dst = PathBuf::from( "file:copy_flat_from_flat_with_hash_tables_mem_db?mode=memory&cache=shared", ); - verify_copy_all(src, dst, Some(Flat), Flat).await; + verify_copy_all(src, dst, Some(Flat), Flat).await } #[actix_rt::test] - async fn copy_flat_from_normalized_tables() { + async fn copy_flat_from_normalized_tables() -> MbtResult<()> { let src = PathBuf::from("../tests/fixtures/files/geography-class-png.mbtiles"); let dst = PathBuf::from("file:copy_flat_from_normalized_tables_mem_db?mode=memory&cache=shared"); - verify_copy_all(src, dst, Some(Flat), Flat).await; + verify_copy_all(src, dst, Some(Flat), Flat).await } #[actix_rt::test] - async fn copy_flat_with_hash_tables() { + async fn copy_flat_with_hash_tables() -> MbtResult<()> { let src = PathBuf::from("../tests/fixtures/files/zoomed_world_cities.mbtiles"); let dst = PathBuf::from("file:copy_flat_with_hash_tables_mem_db?mode=memory&cache=shared"); - verify_copy_all(src, dst, None, FlatWithHash).await; + verify_copy_all(src, dst, None, FlatWithHash).await } #[actix_rt::test] - async fn copy_flat_with_hash_from_flat_tables() { + async fn copy_flat_with_hash_from_flat_tables() -> MbtResult<()> { let src = PathBuf::from("../tests/fixtures/files/world_cities.mbtiles"); let dst = PathBuf::from( "file:copy_flat_with_hash_from_flat_tables_mem_db?mode=memory&cache=shared", ); - verify_copy_all(src, dst, Some(FlatWithHash), FlatWithHash).await; + verify_copy_all(src, dst, Some(FlatWithHash), FlatWithHash).await } #[actix_rt::test] - async fn copy_flat_with_hash_from_normalized_tables() { + async fn copy_flat_with_hash_from_normalized_tables() -> MbtResult<()> { let src = PathBuf::from("../tests/fixtures/files/geography-class-png.mbtiles"); let dst = PathBuf::from( "file:copy_flat_with_hash_from_normalized_tables_mem_db?mode=memory&cache=shared", ); - verify_copy_all(src, dst, Some(FlatWithHash), FlatWithHash).await; + verify_copy_all(src, dst, Some(FlatWithHash), FlatWithHash).await } #[actix_rt::test] - async fn copy_normalized_tables() { + async fn copy_normalized_tables() -> MbtResult<()> { let src = PathBuf::from("../tests/fixtures/files/geography-class-png.mbtiles"); let dst = PathBuf::from("file:copy_normalized_tables_mem_db?mode=memory&cache=shared"); - verify_copy_all(src, dst, None, Normalized).await; + verify_copy_all(src, dst, None, Normalized).await } #[actix_rt::test] - async fn copy_normalized_from_flat_tables() { + async fn copy_normalized_from_flat_tables() -> MbtResult<()> { let src = PathBuf::from("../tests/fixtures/files/world_cities.mbtiles"); let dst = PathBuf::from("file:copy_normalized_from_flat_tables_mem_db?mode=memory&cache=shared"); - verify_copy_all(src, dst, Some(Normalized), Normalized).await; + verify_copy_all(src, dst, Some(Normalized), Normalized).await } #[actix_rt::test] - async fn copy_normalized_from_flat_with_hash_tables() { + async fn copy_normalized_from_flat_with_hash_tables() -> MbtResult<()> { let src = PathBuf::from("../tests/fixtures/files/zoomed_world_cities.mbtiles"); let dst = PathBuf::from( "file:copy_normalized_from_flat_with_hash_tables_mem_db?mode=memory&cache=shared", ); - verify_copy_all(src, dst, Some(Normalized), Normalized).await; + verify_copy_all(src, dst, Some(Normalized), Normalized).await } #[actix_rt::test] - async fn copy_with_min_max_zoom() { + async fn copy_with_min_max_zoom() -> MbtResult<()> { let src = PathBuf::from("../tests/fixtures/files/world_cities.mbtiles"); let dst = PathBuf::from("file:copy_with_min_max_zoom_mem_db?mode=memory&cache=shared"); let opt = TileCopierOptions::new(src, dst) .min_zoom(Some(2)) .max_zoom(Some(4)); - verify_copy_with_zoom_filter(opt, 3).await; + verify_copy_with_zoom_filter(opt, 3).await } #[actix_rt::test] - async fn copy_with_zoom_levels() { + async fn copy_with_zoom_levels() -> MbtResult<()> { let src = PathBuf::from("../tests/fixtures/files/world_cities.mbtiles"); let dst = PathBuf::from("file:copy_with_zoom_levels_mem_db?mode=memory&cache=shared"); let opt = TileCopierOptions::new(src, dst) .min_zoom(Some(2)) .max_zoom(Some(4)) .zoom_levels(vec![1, 6]); - verify_copy_with_zoom_filter(opt, 2).await; + verify_copy_with_zoom_filter(opt, 2).await } #[actix_rt::test] - async fn copy_with_diff_with_file() { + async fn copy_with_diff_with_file() -> MbtResult<()> { let src = PathBuf::from("../tests/fixtures/files/geography-class-jpg.mbtiles"); let dst = PathBuf::from("file:copy_with_diff_with_file_mem_db?mode=memory&cache=shared"); @@ -698,12 +706,11 @@ mod tests { let copy_opts = TileCopierOptions::new(src.clone(), dst.clone()).diff_with_file(diff_file.clone()); - let mut dst_conn = copy_mbtiles_file(copy_opts).await.unwrap(); + let mut dst_conn = copy_opts.run().await?; assert!(query("SELECT 1 FROM sqlite_schema WHERE name = 'tiles';") .fetch_optional(&mut dst_conn) - .await - .unwrap() + .await? .is_some()); assert_eq!( @@ -731,10 +738,12 @@ mod tests { ) .await .is_none()); + + Ok(()) } #[actix_rt::test] - async fn ignore_dst_type_when_copy_to_existing() { + async fn ignore_dst_type_when_copy_to_existing() -> MbtResult<()> { let src_file = PathBuf::from("../tests/fixtures/files/world_cities_modified.mbtiles"); // Copy the dst file to an in-memory DB @@ -743,11 +752,11 @@ mod tests { "file:ignore_dst_type_when_copy_to_existing_mem_db?mode=memory&cache=shared", ); - let _dst_conn = copy_mbtiles_file(TileCopierOptions::new(dst_file.clone(), dst.clone())) - .await - .unwrap(); + let _dst_conn = TileCopierOptions::new(dst_file.clone(), dst.clone()) + .run() + .await?; - verify_copy_all(src_file, dst, Some(Normalized), Flat).await; + verify_copy_all(src_file, dst, Some(Normalized), Flat).await } #[actix_rt::test] @@ -759,13 +768,13 @@ mod tests { TileCopierOptions::new(src.clone(), dst.clone()).on_duplicate(CopyDuplicateMode::Abort); assert!(matches!( - copy_mbtiles_file(copy_opts).await.unwrap_err(), + copy_opts.run().await.unwrap_err(), MbtError::RusqliteError(..) )); } #[actix_rt::test] - async fn copy_to_existing_override_mode() { + async fn copy_to_existing_override_mode() -> MbtResult<()> { let src_file = PathBuf::from("../tests/fixtures/files/world_cities_modified.mbtiles"); // Copy the dst file to an in-memory DB @@ -773,32 +782,28 @@ mod tests { let dst = PathBuf::from("file:copy_to_existing_override_mode_mem_db?mode=memory&cache=shared"); - let _dst_conn = copy_mbtiles_file(TileCopierOptions::new(dst_file.clone(), dst.clone())) - .await - .unwrap(); + let _dst_conn = TileCopierOptions::new(dst_file.clone(), dst.clone()) + .run() + .await?; - let mut dst_conn = copy_mbtiles_file(TileCopierOptions::new(src_file.clone(), dst.clone())) - .await - .unwrap(); + let mut dst_conn = TileCopierOptions::new(src_file.clone(), dst.clone()) + .run() + .await?; // Verify the tiles in the destination file is a superset of the tiles in the source file - query("ATTACH DATABASE ? AS otherDb") - .bind(src_file.clone().to_str().unwrap()) - .execute(&mut dst_conn) - .await - .unwrap(); - + attach_other_db(&mut dst_conn, src_file.to_str().unwrap()).await?; assert!( query("SELECT * FROM otherDb.tiles EXCEPT SELECT * FROM tiles;") .fetch_optional(&mut dst_conn) - .await - .unwrap() + .await? .is_none() ); + + Ok(()) } #[actix_rt::test] - async fn copy_to_existing_ignore_mode() { + async fn copy_to_existing_ignore_mode() -> MbtResult<()> { let src_file = PathBuf::from("../tests/fixtures/files/world_cities_modified.mbtiles"); // Copy the dst file to an in-memory DB @@ -806,28 +811,23 @@ mod tests { let dst = PathBuf::from("file:copy_to_existing_ignore_mode_mem_db?mode=memory&cache=shared"); - let _dst_conn = copy_mbtiles_file(TileCopierOptions::new(dst_file.clone(), dst.clone())) - .await - .unwrap(); + let _dst_conn = TileCopierOptions::new(dst_file.clone(), dst.clone()) + .run() + .await?; - let mut dst_conn = copy_mbtiles_file( - TileCopierOptions::new(src_file.clone(), dst.clone()) - .on_duplicate(CopyDuplicateMode::Ignore), - ) - .await - .unwrap(); + let mut dst_conn = TileCopierOptions::new(src_file.clone(), dst.clone()) + .on_duplicate(CopyDuplicateMode::Ignore) + .run() + .await?; // Verify the tiles in the destination file are the same as those in the source file except for those with duplicate (zoom_level, tile_column, tile_row) - query("ATTACH DATABASE ? AS srcDb") - .bind(src_file.clone().to_str().unwrap()) - .execute(&mut dst_conn) - .await - .unwrap(); - query("ATTACH DATABASE ? AS originalDb") - .bind(dst_file.clone().to_str().unwrap()) + attach_src_db(&mut dst_conn, src_file.to_str().unwrap()).await?; + + let path = dst_file.to_str().unwrap(); + query!("ATTACH DATABASE ? AS originalDb", path) .execute(&mut dst_conn) - .await - .unwrap(); + .await?; + // Create a temporary table with all the tiles in the original database and // all the tiles in the source database except for those that conflict with tiles in the original database query("CREATE TEMP TABLE expected_tiles AS @@ -840,7 +840,7 @@ mod tests { ON t1.zoom_level = t2.zoom_level AND t1.tile_column = t2.tile_column AND t1.tile_row = t2.tile_row") .execute(&mut dst_conn) .await - .unwrap(); + ?; // Ensure all entries in expected_tiles are in tiles and vice versa assert!(query( @@ -849,68 +849,65 @@ mod tests { SELECT * FROM tiles EXCEPT SELECT * FROM expected_tiles" ) .fetch_optional(&mut dst_conn) - .await - .unwrap() + .await? .is_none()); + + Ok(()) } #[actix_rt::test] - async fn apply_flat_diff_file() { + async fn apply_flat_diff_file() -> MbtResult<()> { // Copy the src file to an in-memory DB let src_file = PathBuf::from("../tests/fixtures/files/world_cities.mbtiles"); let src = PathBuf::from("file:apply_flat_diff_file_mem_db?mode=memory&cache=shared"); - let mut src_conn = copy_mbtiles_file(TileCopierOptions::new(src_file.clone(), src.clone())) - .await - .unwrap(); + let mut src_conn = TileCopierOptions::new(src_file.clone(), src.clone()) + .run() + .await?; // Apply diff to the src data in in-memory DB let diff_file = PathBuf::from("../tests/fixtures/files/world_cities_diff.mbtiles"); - apply_mbtiles_diff(src, diff_file).await.unwrap(); + apply_mbtiles_diff(src, diff_file).await?; // Verify the data is the same as the file the diff was generated from let path = "../tests/fixtures/files/world_cities_modified.mbtiles"; - query!("ATTACH DATABASE ? AS otherDb", path) - .execute(&mut src_conn) - .await - .unwrap(); + attach_other_db(&mut src_conn, path).await?; assert!( query("SELECT * FROM tiles EXCEPT SELECT * FROM otherDb.tiles;") .fetch_optional(&mut src_conn) - .await - .unwrap() + .await? .is_none() ); + + Ok(()) } #[actix_rt::test] - async fn apply_normalized_diff_file() { + async fn apply_normalized_diff_file() -> MbtResult<()> { // Copy the src file to an in-memory DB let src_file = PathBuf::from("../tests/fixtures/files/geography-class-jpg.mbtiles"); let src = PathBuf::from("file:apply_normalized_diff_file_mem_db?mode=memory&cache=shared"); - let mut src_conn = copy_mbtiles_file(TileCopierOptions::new(src_file.clone(), src.clone())) - .await - .unwrap(); + let mut src_conn = TileCopierOptions::new(src_file.clone(), src.clone()) + .run() + .await?; // Apply diff to the src data in in-memory DB let diff_file = PathBuf::from("../tests/fixtures/files/geography-class-jpg-diff.mbtiles"); - apply_mbtiles_diff(src, diff_file).await.unwrap(); + apply_mbtiles_diff(src, diff_file).await?; // Verify the data is the same as the file the diff was generated from let path = "../tests/fixtures/files/geography-class-jpg-modified.mbtiles"; - query!("ATTACH DATABASE ? AS otherDb", path) - .execute(&mut src_conn) - .await - .unwrap(); + attach_other_db(&mut src_conn, path).await?; assert!( query("SELECT * FROM tiles EXCEPT SELECT * FROM otherDb.tiles;") .fetch_optional(&mut src_conn) - .await - .unwrap() + .await? .is_none() ); + + Ok(()) } } From bcff949126b3297fd7028b488b4704f036ac98aa Mon Sep 17 00:00:00 2001 From: Yuri Astrakhan Date: Thu, 28 Sep 2023 12:58:41 -0400 Subject: [PATCH 4/4] fix tests --- Cargo.lock | 4 ++-- docs/src/tools.md | 2 +- martin-mbtiles/src/mbtiles.rs | 27 ++++++++++++++------------- 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fbdfe457a..097c9d88e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2978,9 +2978,9 @@ dependencies = [ [[package]] name = "sqlite-hashes" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f756a7c1f66e2d70c9acb5881776ba0ae25ba2aaf68e2f69ed32d96c42313fab" +checksum = "fd203121770e67b5f689ebf9592c88d3529193743f35630413f419be8ef1e835" dependencies = [ "digest", "md-5", diff --git a/docs/src/tools.md b/docs/src/tools.md index d59d8e6cb..a2e35b5e0 100644 --- a/docs/src/tools.md +++ b/docs/src/tools.md @@ -74,7 +74,7 @@ The original [MBTiles specification](https://github.com/mapbox/mbtiles-spec#read A typical Normalized schema generated by tools like [tilelive-copy](https://github.com/mapbox/TileLive#bintilelive-copy) use MD5 hash in the `tile_id` column. The Martin's `mbtiles` tool can use this hash to verify the content of each tile. We also define a new `flat-with-hash` schema that stores the hash and tile data in the same table. This schema is more efficient than the `normalized` schema when data has no duplicate tiles (see below). Per tile validation is not available for `flat` schema. -Per-tile validation will catch individual invalid tiles, but it will not detect overall datastore corruption (e.g. missing tiles or tiles that shouldn't exist, or tiles with incorrect z/x/y values). For that, Martin `mbtiles` tool defines a new metadata value called `agg_tiles_hash`. The value is computed by hashing `cast(zoom_level AS text), cast(tile_column AS text), cast(tile_row AS text), tile_data` combined for all rows in the `tiles` table/view, ordered by z,x,y. In case there are no rows or all are NULL, the hash value of an empty string is used. +Per-tile validation will catch individual invalid tiles, but it will not detect overall datastore corruption (e.g. missing tiles or tiles that shouldn't exist, or tiles with incorrect z/x/y values). For that, Martin `mbtiles` tool defines a new metadata value called `agg_tiles_hash`. The value is computed by hashing `cast(zoom_level AS text), cast(tile_column AS text), cast(tile_row AS text), cast(tile_data as blob)` combined for all rows in the `tiles` table/view, ordered by z,x,y. In case there are no rows or all are NULL, the hash value of an empty string is used. The `mbtiles` tool will compute `agg_tiles_hash` value when copying or validating mbtiles files. diff --git a/martin-mbtiles/src/mbtiles.rs b/martin-mbtiles/src/mbtiles.rs index ce4bb93eb..b37cbd8df 100644 --- a/martin-mbtiles/src/mbtiles.rs +++ b/martin-mbtiles/src/mbtiles.rs @@ -564,20 +564,21 @@ where let query = query( // The md5_concat func will return NULL if there are no rows in the tiles table. // For our use case, we will treat it as an empty string, and hash that. + // Note that in some weird rare cases, a column with blob type may be stored as an integer value "SELECT - hex( - coalesce( - md5_concat( - cast(zoom_level AS text), - cast(tile_column AS text), - cast(tile_row AS text), - tile_data - ), - md5('') - ) - ) - FROM tiles - ORDER BY zoom_level, tile_column, tile_row;", + hex( + coalesce( + md5_concat( + cast(zoom_level AS text), + cast(tile_column AS text), + cast(tile_row AS text), + cast(tile_data as blob) + ), + md5('') + ) + ) + FROM tiles + ORDER BY zoom_level, tile_column, tile_row;", ); Ok(query.fetch_one(conn).await?.get::(0)) }