From 6edf595773b7934659ccbf2d0d6f099501af8c7b Mon Sep 17 00:00:00 2001 From: liuq19 Date: Fri, 9 Aug 2024 14:57:37 +0800 Subject: [PATCH] ci: support windows --- .github/workflows/ci.yml | 19 +- Cargo.toml | 10 +- benches/deserialize_struct.rs | 6 +- benches/deserialize_value.rs | 3 +- benches/schema/Cargo.toml | 8 + benches/schema/README.md | 4 + .../schema/licences/LICENSE-json-benchmark | 23 ++ benches/schema/src/canada.rs | 41 ++++ benches/schema/src/citm_catalog.rs | 73 ++++++ benches/schema/src/color.rs | 85 +++++++ benches/schema/src/empty.rs | 41 ++++ benches/schema/src/enumstr.rs | 52 +++++ benches/schema/src/lib.rs | 8 + benches/schema/src/prim_str.rs | 59 +++++ benches/schema/src/twitter.rs | 221 ++++++++++++++++++ benches/serialize_struct.rs | 10 +- benches/serialize_value.rs | 3 +- examples/json_number.rs | 8 +- scripts/sanitize.sh | 42 ++-- src/lazyvalue/iterator.rs | 26 +-- src/parser.rs | 2 +- src/reader.rs | 8 +- src/serde/mod.rs | 2 +- 23 files changed, 690 insertions(+), 64 deletions(-) create mode 100644 benches/schema/Cargo.toml create mode 100644 benches/schema/README.md create mode 100644 benches/schema/licences/LICENSE-json-benchmark create mode 100644 benches/schema/src/canada.rs create mode 100644 benches/schema/src/citm_catalog.rs create mode 100644 benches/schema/src/color.rs create mode 100644 benches/schema/src/empty.rs create mode 100644 benches/schema/src/enumstr.rs create mode 100644 benches/schema/src/lib.rs create mode 100644 benches/schema/src/prim_str.rs create mode 100644 benches/schema/src/twitter.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8d279b0..b3324b4 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu, macos] + os: [ubuntu, macos, windows] timeout-minutes: 45 steps: - uses: actions/checkout@v4 @@ -26,7 +26,6 @@ jobs: - run: cargo test --features arbitrary_precision - run: cargo test --features sort_keys - run: ./scripts/run_examples.sh - - run: ./scripts/sanitize.sh test-stable-self: name: Rust stable on self-hosted arm @@ -46,7 +45,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu, macos] + os: [ubuntu, macos, windows] timeout-minutes: 45 steps: - uses: actions/checkout@v4 @@ -84,6 +83,20 @@ jobs: cargo clippy --all-targets --all-features -- -D warnings cargo fmt -- --check + sanitize: + runs-on: [self-hosted, X64] + steps: + - uses: actions/checkout@v3 + - uses: dtolnay/rust-toolchain@stable + with: + components: rustfmt, clippy + - uses: actions-rs/clippy-check@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + - name: Sanitize + run: ./scripts/sanitize.sh + + # check-spelling: # name: Check spelling # runs-on: ubuntu-latest diff --git a/Cargo.toml b/Cargo.toml index a2e7ed3..51b5519 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,19 +29,19 @@ parking_lot = "0.12" page_size = "0.6" -[dev-dependencies] +[target.'cfg(not(target_env = "msvc"))'.dev-dependencies] jemallocator = "0.5" -serde = { version = "1.0", features = ["derive"] } + +[dev-dependencies] serde_json = { version = "1.0", features = ["float_roundtrip", "raw_value"] } simd-json = "0.13" core_affinity = "0.8" criterion = { version = "0.5", features = ["html_reports"] } gjson = "0.8" serde_derive = "1.0" -faststr = "0.2" encoding_rs = "0.8" -# This config will disable rustc-serialize crate to avoid security warnings in ci -json-benchmark = { git = "https://github.com/serde-rs/json-benchmark", default-features = false, features = ["all-files", "lib-serde"]} +# This schema are used in benches and copied from https://github.com/serde-rs/json-benchmark +schema = { path = "benches/schema" } paste = "1.0" serde_bytes = "0.11" bytes = {version = "1.4", features = ["serde"]} diff --git a/benches/deserialize_struct.rs b/benches/deserialize_struct.rs index 55fd0e5..7fcce16 100644 --- a/benches/deserialize_struct.rs +++ b/benches/deserialize_struct.rs @@ -4,9 +4,11 @@ extern crate criterion; use std::{fs::File, io::Read, str::from_utf8_unchecked}; use criterion::{criterion_group, BatchSize, Criterion, SamplingMode, Throughput}; +use schema::{canada::Canada, citm_catalog::CitmCatalog, twitter::Twitter}; +#[cfg(not(target_env = "msvc"))] #[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; +static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc; fn serde_json_parse_struct<'de, T>(data: &'de [u8]) -> serde_json::Result where @@ -136,8 +138,6 @@ macro_rules! bench_file { }; } -use json_benchmark::{canada::Canada, citm_catalog::CitmCatalog, twitter::Twitter}; - bench_file!( json: twitter, structure: Twitter diff --git a/benches/deserialize_value.rs b/benches/deserialize_value.rs index 110dfdc..cb23c26 100644 --- a/benches/deserialize_value.rs +++ b/benches/deserialize_value.rs @@ -5,8 +5,9 @@ use std::{fs::File, io::Read, str::from_utf8_unchecked}; use criterion::{criterion_group, BatchSize, Criterion, SamplingMode, Throughput}; +#[cfg(not(target_env = "msvc"))] #[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; +static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc; fn simdjson_to_borrowed_value(data: &mut [u8]) { let _ = simd_json::to_borrowed_value(data).unwrap(); diff --git a/benches/schema/Cargo.toml b/benches/schema/Cargo.toml new file mode 100644 index 0000000..15e4984 --- /dev/null +++ b/benches/schema/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "schema" +version = "0.1.0" +edition = "2021" + +[dependencies] +serde = { version = "1.0", features = ["derive"] } + diff --git a/benches/schema/README.md b/benches/schema/README.md new file mode 100644 index 0000000..5a799c7 --- /dev/null +++ b/benches/schema/README.md @@ -0,0 +1,4 @@ + +# JSON Schema + +Schema used in benches, copied from `https://github.com/serde-rs/json-benchmark`. diff --git a/benches/schema/licences/LICENSE-json-benchmark b/benches/schema/licences/LICENSE-json-benchmark new file mode 100644 index 0000000..31aa793 --- /dev/null +++ b/benches/schema/licences/LICENSE-json-benchmark @@ -0,0 +1,23 @@ +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/benches/schema/src/canada.rs b/benches/schema/src/canada.rs new file mode 100644 index 0000000..34cf8b9 --- /dev/null +++ b/benches/schema/src/canada.rs @@ -0,0 +1,41 @@ +use std::collections::BTreeMap as Map; + +use serde::{Deserialize, Serialize}; + +use crate::enum_str; + +pub type Canada = FeatureCollection; + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct FeatureCollection { + #[serde(rename = "type")] + pub obj_type: ObjType, + pub features: Vec, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Feature { + #[serde(rename = "type")] + pub obj_type: ObjType, + pub properties: Map, + pub geometry: Geometry, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Geometry { + #[serde(rename = "type")] + pub obj_type: ObjType, + pub coordinates: Vec>, +} + +pub type Latitude = f32; +pub type Longitude = f32; + +enum_str!(ObjType { + FeatureCollection("FeatureCollection"), + Feature("Feature"), + Polygon("Polygon"), +}); diff --git a/benches/schema/src/citm_catalog.rs b/benches/schema/src/citm_catalog.rs new file mode 100644 index 0000000..29eeefa --- /dev/null +++ b/benches/schema/src/citm_catalog.rs @@ -0,0 +1,73 @@ +use std::collections::BTreeMap as Map; + +use serde::{Deserialize, Serialize}; + +use crate::{empty, prim_str::PrimStr}; + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +pub struct CitmCatalog { + pub area_names: Map, + pub audience_sub_category_names: Map, + pub block_names: Map, + pub events: Map, + pub performances: Vec, + pub seat_category_names: Map, + pub sub_topic_names: Map, + pub subject_names: Map, + pub topic_names: Map, + pub topic_sub_topics: Map>, + pub venue_names: Map, +} + +pub type Id = u32; +pub type IdStr = PrimStr; + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +pub struct Event { + pub description: (), + pub id: Id, + pub logo: Option, + pub name: String, + pub sub_topic_ids: Vec, + pub subject_code: (), + pub subtitle: (), + pub topic_ids: Vec, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +pub struct Performance { + pub event_id: Id, + pub id: Id, + pub logo: Option, + pub name: (), + pub prices: Vec, + pub seat_categories: Vec, + pub seat_map_image: (), + pub start: u64, + pub venue_code: String, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +pub struct Price { + pub amount: u32, + pub audience_sub_category_id: Id, + pub seat_category_id: Id, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +pub struct SeatCategory { + pub areas: Vec, + pub seat_category_id: Id, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +pub struct Area { + pub area_id: Id, + pub block_ids: empty::Array, +} diff --git a/benches/schema/src/color.rs b/benches/schema/src/color.rs new file mode 100644 index 0000000..cdb8d4d --- /dev/null +++ b/benches/schema/src/color.rs @@ -0,0 +1,85 @@ +use std::{fmt, mem::MaybeUninit, ptr, slice, str}; + +use serde::{ + de::{self, Deserialize, Deserializer, Unexpected}, + ser::{Serialize, Serializer}, +}; + +#[derive(Clone, Copy)] +pub struct Color(u32); + +const HEX_LUT: &[u8] = b"\ + 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F\ + 202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F\ + 404142434445464748494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F\ + 606162636465666768696A6B6C6D6E6F707172737475767778797A7B7C7D7E7F\ + 808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9F\ + A0A1A2A3A4A5A6A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF\ + C0C1C2C3C4C5C6C7C8C9CACBCCCDCECFD0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF\ + E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEFF0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF"; + +impl Color { + fn as_str(self, buf: &mut MaybeUninit<[u8; 6]>) -> &str { + let buf_len = 6; + let buf_ptr = buf.as_mut_ptr() as *mut u8; + let lut_ptr = HEX_LUT.as_ptr(); + + let r = ((self.0 & 0xFF0000) >> 15) as isize; + let g = ((self.0 & 0x00FF00) >> 7) as isize; + let b = ((self.0 & 0x0000FF) << 1) as isize; + + unsafe { + ptr::copy_nonoverlapping(lut_ptr.offset(r), buf_ptr, 2); + ptr::copy_nonoverlapping(lut_ptr.offset(g), buf_ptr.offset(2), 2); + ptr::copy_nonoverlapping(lut_ptr.offset(b), buf_ptr.offset(4), 2); + + str::from_utf8(slice::from_raw_parts(buf_ptr, buf_len)).unwrap() + } + } +} + +impl Serialize for Color { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut buf = MaybeUninit::uninit(); + serializer.serialize_str(self.as_str(&mut buf)) + } +} + +impl<'de> Deserialize<'de> for Color { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct Visitor; + + impl<'de> de::Visitor<'de> for Visitor { + type Value = Color; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("color string") + } + + fn visit_str(self, value: &str) -> Result + where + E: de::Error, + { + match u32::from_str_radix(value, 16) { + Ok(hex) => Ok(Color(hex)), + Err(_) => Err(E::invalid_value(Unexpected::Str(value), &self)), + } + } + } + + deserializer.deserialize_str(Visitor) + } +} + +#[test] +fn test_color() { + let mut buf = MaybeUninit::uninit(); + let string = Color(0xA0A0A0).as_str(&mut buf); + assert_eq!(string, "A0A0A0"); +} diff --git a/benches/schema/src/empty.rs b/benches/schema/src/empty.rs new file mode 100644 index 0000000..3736d4b --- /dev/null +++ b/benches/schema/src/empty.rs @@ -0,0 +1,41 @@ +use std::fmt; + +use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; + +#[derive(Clone, Copy)] +pub struct Array; + +impl Serialize for Array { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + [(); 0].serialize(serializer) + } +} + +impl<'de> Deserialize<'de> for Array { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + struct Visitor; + + impl<'de> de::Visitor<'de> for Visitor { + type Value = Array; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("empty array") + } + + fn visit_seq(self, _: V) -> Result + where + V: de::SeqAccess<'de>, + { + Ok(Array) + } + } + + deserializer.deserialize_tuple(0, Visitor) + } +} diff --git a/benches/schema/src/enumstr.rs b/benches/schema/src/enumstr.rs new file mode 100644 index 0000000..001fb2d --- /dev/null +++ b/benches/schema/src/enumstr.rs @@ -0,0 +1,52 @@ +#[macro_export] +macro_rules! enum_str { + ($name:ident { $($variant:ident($str:expr), )* }) => { + #[derive(Clone, Copy)] + pub enum $name { + $($variant,)* + } + + impl $name { + fn as_str(self) -> &'static str { + match self { + $( $name::$variant => $str, )* + } + } + } + + impl ::serde::Serialize for $name { + fn serialize(&self, serializer: S) -> Result + where S: ::serde::Serializer, + { + serializer.serialize_str(self.as_str()) + } + } + + impl<'de> ::serde::Deserialize<'de> for $name { + fn deserialize(deserializer: D) -> Result + where D: ::serde::Deserializer<'de>, + { + struct Visitor; + + impl<'de> ::serde::de::Visitor<'de> for Visitor { + type Value = $name; + + fn expecting(&self, formatter: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { + formatter.write_str("unit variant") + } + + fn visit_str(self, value: &str) -> Result<$name, E> + where E: ::serde::de::Error, + { + match value { + $( $str => Ok($name::$variant), )* + _ => Err(E::invalid_value(::serde::de::Unexpected::Str(value), &self)), + } + } + } + + deserializer.deserialize_str(Visitor) + } + } + } +} diff --git a/benches/schema/src/lib.rs b/benches/schema/src/lib.rs new file mode 100644 index 0000000..48af8e3 --- /dev/null +++ b/benches/schema/src/lib.rs @@ -0,0 +1,8 @@ +pub mod canada; +pub mod citm_catalog; +pub mod enumstr; +pub mod twitter; + +mod color; +mod empty; +mod prim_str; diff --git a/benches/schema/src/prim_str.rs b/benches/schema/src/prim_str.rs new file mode 100644 index 0000000..a1e6a24 --- /dev/null +++ b/benches/schema/src/prim_str.rs @@ -0,0 +1,59 @@ +use std::{fmt, fmt::Display, str::FromStr}; + +use serde::{ + de::{self, Deserialize, Deserializer, Unexpected}, + ser::{Serialize, Serializer}, +}; + +#[derive(Clone, Copy, Ord, PartialOrd, Eq, PartialEq)] +pub struct PrimStr(T) +where + T: Copy + Ord + Display + FromStr; + +impl Serialize for PrimStr +where + T: Copy + Ord + Display + FromStr, +{ + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.collect_str(&self.0) + } +} + +impl<'de, T> Deserialize<'de> for PrimStr +where + T: Copy + Ord + Display + FromStr, +{ + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + use std::marker::PhantomData; + struct Visitor(PhantomData); + + impl<'de, T> de::Visitor<'de> for Visitor + where + T: Copy + Ord + Display + FromStr, + { + type Value = PrimStr; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("number represented as string") + } + + fn visit_str(self, value: &str) -> Result, E> + where + E: de::Error, + { + match T::from_str(value) { + Ok(id) => Ok(PrimStr(id)), + Err(_) => Err(E::invalid_value(Unexpected::Str(value), &self)), + } + } + } + + deserializer.deserialize_str(Visitor(PhantomData)) + } +} diff --git a/benches/schema/src/twitter.rs b/benches/schema/src/twitter.rs new file mode 100644 index 0000000..0edff8f --- /dev/null +++ b/benches/schema/src/twitter.rs @@ -0,0 +1,221 @@ +use serde::{Deserialize, Serialize}; + +use crate::{color::Color, empty, enum_str, prim_str::PrimStr}; + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Twitter { + pub statuses: Vec, + pub search_metadata: SearchMetadata, +} + +pub type LongId = u64; +pub type ShortId = u32; +pub type LongIdStr = PrimStr; +pub type ShortIdStr = PrimStr; + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Status { + pub metadata: Metadata, + pub created_at: String, + pub id: LongId, + pub id_str: LongIdStr, + pub text: String, + pub source: String, + pub truncated: bool, + pub in_reply_to_status_id: Option, + pub in_reply_to_status_id_str: Option, + pub in_reply_to_user_id: Option, + pub in_reply_to_user_id_str: Option, + pub in_reply_to_screen_name: Option, + pub user: User, + pub geo: (), + pub coordinates: (), + pub place: (), + pub contributors: (), + pub retweeted_status: Option>, + pub retweet_count: u32, + pub favorite_count: u32, + pub entities: StatusEntities, + pub favorited: bool, + pub retweeted: bool, + pub possibly_sensitive: Option, + pub lang: LanguageCode, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Metadata { + pub result_type: ResultType, + pub iso_language_code: LanguageCode, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct User { + pub id: ShortId, + pub id_str: ShortIdStr, + pub name: String, + pub screen_name: String, + pub location: String, + pub description: String, + pub url: Option, + pub entities: UserEntities, + pub protected: bool, + pub followers_count: u32, + pub friends_count: u32, + pub listed_count: u32, + pub created_at: String, + pub favourites_count: u32, + pub utc_offset: Option, + pub time_zone: Option, + pub geo_enabled: bool, + pub verified: bool, + pub statuses_count: u32, + pub lang: LanguageCode, + pub contributors_enabled: bool, + pub is_translator: bool, + pub is_translation_enabled: bool, + pub profile_background_color: Color, + pub profile_background_image_url: String, + pub profile_background_image_url_https: String, + pub profile_background_tile: bool, + pub profile_image_url: String, + pub profile_image_url_https: String, + pub profile_banner_url: Option, + pub profile_link_color: Color, + pub profile_sidebar_border_color: Color, + pub profile_sidebar_fill_color: Color, + pub profile_text_color: Color, + pub profile_use_background_image: bool, + pub default_profile: bool, + pub default_profile_image: bool, + pub following: bool, + pub follow_request_sent: bool, + pub notifications: bool, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct UserEntities { + pub url: Option, + pub description: UserEntitiesDescription, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct UserUrl { + pub urls: Vec, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Url { + pub url: String, + pub expanded_url: String, + pub display_url: String, + pub indices: Indices, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct UserEntitiesDescription { + pub urls: Vec, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct StatusEntities { + pub hashtags: Vec, + pub symbols: empty::Array, + pub urls: Vec, + pub user_mentions: Vec, + pub media: Option>, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Hashtag { + pub text: String, + pub indices: Indices, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct UserMention { + pub screen_name: String, + pub name: String, + pub id: ShortId, + pub id_str: ShortIdStr, + pub indices: Indices, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Media { + pub id: LongId, + pub id_str: LongIdStr, + pub indices: Indices, + pub media_url: String, + pub media_url_https: String, + pub url: String, + pub display_url: String, + pub expanded_url: String, + #[serde(rename = "type")] + pub media_type: String, + pub sizes: Sizes, + pub source_status_id: Option, + pub source_status_id_str: Option, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Sizes { + pub medium: Size, + pub small: Size, + pub thumb: Size, + pub large: Size, +} + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Size { + pub w: u16, + pub h: u16, + pub resize: Resize, +} + +pub type Indices = (u8, u8); + +#[derive(Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct SearchMetadata { + pub completed_in: f32, + pub max_id: LongId, + pub max_id_str: LongIdStr, + pub next_results: String, + pub query: String, + pub refresh_url: String, + pub count: u8, + pub since_id: LongId, + pub since_id_str: LongIdStr, +} + +enum_str!(Resize { + Fit("fit"), + Crop("crop"), +}); + +enum_str!(LanguageCode { + Cn("zh-cn"), + En("en"), + Es("es"), + It("it"), + Ja("ja"), + Zh("zh"), +}); + +enum_str!(ResultType { + Recent("recent"), +}); diff --git a/benches/serialize_struct.rs b/benches/serialize_struct.rs index e14eb02..1c538a8 100644 --- a/benches/serialize_struct.rs +++ b/benches/serialize_struct.rs @@ -1,12 +1,13 @@ #[macro_use] extern crate criterion; - use std::{fs::File, io::Read}; use criterion::{criterion_group, BatchSize, Criterion, SamplingMode, Throughput}; +use schema::{canada::Canada, citm_catalog::CitmCatalog, twitter::Twitter}; +#[cfg(not(target_env = "msvc"))] #[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; +static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc; fn diff_json(got: &str, expect: &str) -> bool { let value1: serde_json::Value = serde_json::from_str(got).unwrap(); @@ -100,11 +101,6 @@ macro_rules! bench_file { }; } -use json_benchmark::{ - canada::Canada, - copy::{citm_catalog::CitmCatalog, twitter::Twitter}, -}; - bench_file!( json: twitter, structure: Twitter diff --git a/benches/serialize_value.rs b/benches/serialize_value.rs index 1f27944..5d291ee 100644 --- a/benches/serialize_value.rs +++ b/benches/serialize_value.rs @@ -5,8 +5,9 @@ use std::{fs::File, io::Read}; use criterion::{criterion_group, BatchSize, Criterion, SamplingMode, Throughput}; +#[cfg(not(target_env = "msvc"))] #[global_allocator] -static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; +static GLOBAL: jemallocator::Jemalloc = jemallocator::Jemalloc; fn simdjson_to_string(val: &simd_json::value::borrowed::Value) { let _ = simd_json::to_string(val).unwrap(); diff --git a/examples/json_number.rs b/examples/json_number.rs index 55dd504..3b3eb86 100644 --- a/examples/json_number.rs +++ b/examples/json_number.rs @@ -1,10 +1,4 @@ -use sonic_rs::{from_str, to_string, Deserialize, JsonNumberTrait, Number, RawNumber, Serialize}; - -#[derive(Debug, Deserialize, Serialize, PartialEq)] -struct TestJsonNumber { - num: Number, - raw_num: RawNumber, -} +use sonic_rs::{from_str, to_string, JsonNumberTrait, Number, RawNumber}; fn main() { // parse RawNumber from JSON number diff --git a/scripts/sanitize.sh b/scripts/sanitize.sh index b1e045f..81b823b 100755 --- a/scripts/sanitize.sh +++ b/scripts/sanitize.sh @@ -5,34 +5,40 @@ set -ex export ASAN_OPTIONS="disable_coredump=0:unmap_shadow_on_exit=1:abort_on_error=1" testcase_lists() { - cargo test -- -Zunstable-options --list --format json | jq -c 'select(.type=="test") | .name' | awk -F'"' '{print $2}' | awk '{print ($2) ? $3 : $1}' + cargo test -- -Zunstable-options --list --format json + local result=$? + if [ ${result} -ne 0 ]; then + exit -1 + fi + cargo test -- -Zunstable-options --list --format json | jq -c 'select(.type=="test") | .name' | awk -F'"' '{print $2}' | awk '{print ($2) ? $3 : $1}' return $? } sanitize() { - SAN=$1 - TARGET=$2 - TESTCASE=$3 - echo "Running tests with $SAN on $TARGET" - + local san="$1" + local target="$2" + local testcase="$3" # use single thread to make error info more readable and accurate - RUSTFLAGS="-Zsanitizer=$SAN" RUSTDOCFLAGS="-Zsanitizer=$SAN" cargo test --target $TARGET $3 -- --test-threads=1 - - RUSTFLAGS="-Zsanitizer=$SAN" RUSTDOCFLAGS="-Zsanitizer=$SAN" cargo test --doc --package sonic-rs --target $TARGET $3 -- --show-output --test-threads=1 + RUSTFLAGS="-Zsanitizer=${san}" RUSTDOCFLAGS="-Zsanitizer=${san}" cargo test --target ${target} ${testcase} -- --test-threads=1 + RUSTFLAGS="-Zsanitizer=${san}" RUSTDOCFLAGS="-Zsanitizer=${san}" cargo test --doc --package sonic-rs --target ${target} ${testcase} -- --show-output --test-threads=1 } sanitize_single() { - SAN=$1 - TARGET=$2 - for CASE in $(testcase_lists); do - sanitize $SAN $TARGET $CASE + local san="$1" + local target="$2" + local lists=$(testcase_lists) + for case in ${lists}; do + sanitize ${san} ${target} ${case} + done +} + +main() { + for san in address leak; do + echo "Running tests with $san" + sanitize_single $san "x86_64-unknown-linux-gnu" done } -for san in address leak; do - echo "Running tests with $san" - # sanitize $san "x86_64-unknown-linux-gnu" - sanitize_single $san "x86_64-unknown-linux-gnu" -done +main "$@" diff --git a/src/lazyvalue/iterator.rs b/src/lazyvalue/iterator.rs index 3359774..3cb533a 100644 --- a/src/lazyvalue/iterator.rs +++ b/src/lazyvalue/iterator.rs @@ -67,7 +67,7 @@ pub struct ArrayJsonIter<'de>(ArrayInner<'de>); struct ObjectInner<'de> { json: JsonSlice<'de>, - parser: Option>>, + parser: Option>>, strbuf: Vec, first: bool, ending: bool, @@ -76,7 +76,7 @@ struct ObjectInner<'de> { struct ArrayInner<'de> { json: JsonSlice<'de>, - parser: Option>>, + parser: Option>>, first: bool, ending: bool, check: bool, @@ -104,17 +104,15 @@ impl<'de> ObjectInner<'de> { let slice = unsafe { std::slice::from_raw_parts(slice.as_ptr(), slice.len()) }; let parser = Parser::new(Read::new(slice, check)); // check invalid utf8 - match parser.read.check_utf8_final() { - Err(err) if check => { - self.ending = true; - return Some(Err(err)); - } - _ => {} + if let Err(err) = parser.read.check_utf8_final() { + self.ending = true; + return Some(Err(err)); } self.parser = Some(parser); } let parser = unsafe { self.parser.as_mut().unwrap_unchecked() }; + unsafe { parser.read.update_slice(self.json.as_ref().as_ptr()) }; match parser.parse_entry_lazy(&mut self.strbuf, &mut self.first, check) { Ok(ret) => { if let Some((key, val, has_escaped)) = ret { @@ -154,17 +152,15 @@ impl<'de> ArrayInner<'de> { let slice = unsafe { std::slice::from_raw_parts(slice.as_ptr(), slice.len()) }; let parser = Parser::new(Read::new(slice, check)); // check invalid utf8 - match parser.read.check_utf8_final() { - Err(err) if check => { - self.ending = true; - return Some(Err(err)); - } - _ => {} + if let Err(err) = parser.read.check_utf8_final() { + self.ending = true; + return Some(Err(err)); } self.parser = Some(parser); } - let parser = unsafe { self.parser.as_mut().unwrap_unchecked() }; + let parser = self.parser.as_mut().unwrap(); + unsafe { parser.read.update_slice(self.json.as_ref().as_ptr()) }; match parser.parse_array_elem_lazy(&mut self.first, check) { Ok(ret) => { if let Some((ret, has_escaped)) = ret { diff --git a/src/parser.rs b/src/parser.rs index d219298..70c3a2f 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -181,7 +181,7 @@ where #[cold] pub(crate) fn error(&self, mut reason: ErrorCode) -> Error { // check invalid utf8 here at first - // FIXME: maybe has invalid utf8 when deserialzing into byte, and just bytes has other + // FIXME: maybe has invalid utf8 when deserializing into byte, and just bytes has other // errors? if let Err(e) = self.read.check_utf8_final() { return e; diff --git a/src/reader.rs b/src/reader.rs index e5b37c1..dc008aa 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -70,13 +70,13 @@ pub trait Reader<'de>: Sealed { #[inline(always)] fn next(&mut self) -> Option { - self.peek().map(|a| { + self.peek().inspect(|_| { self.eat(1); - a }) } fn cur_ptr(&mut self) -> *mut u8; + unsafe fn update_slice(&mut self, _start: *const u8) {} /// # Safety /// cur must be a valid pointer in the slice unsafe fn set_ptr(&mut self, cur: *mut u8); @@ -197,6 +197,10 @@ impl<'a> Reader<'a> for Read<'a> { } } + unsafe fn update_slice(&mut self, start: *const u8) { + self.slice = std::slice::from_raw_parts(start, self.slice.len()); + } + #[inline(always)] fn cur_ptr(&mut self) -> *mut u8 { panic!("should only used in PaddedSliceRead"); diff --git a/src/serde/mod.rs b/src/serde/mod.rs index d7befc1..51d1e7c 100644 --- a/src/serde/mod.rs +++ b/src/serde/mod.rs @@ -275,7 +275,7 @@ mod test { #[test] fn test_struct() { - use json_benchmark::{citm_catalog::CitmCatalog, twitter::Twitter}; + use schema::{citm_catalog::CitmCatalog, twitter::Twitter}; let mut vec = Vec::new(); read_file("twitter.json", &mut vec); let _value: Twitter = from_slice(&vec).unwrap();