From f8d15001959d6b9afab2c8b7be6f98660b7d1333 Mon Sep 17 00:00:00 2001 From: Martin Haug Date: Thu, 10 Oct 2024 14:40:07 +0200 Subject: [PATCH] Integer deserialization for page ranges (#231) --- src/csl/taxonomy.rs | 11 ++++- src/types/mod.rs | 115 ++++++++++++++++++++++++++++--------------- src/types/numeric.rs | 4 +- src/types/page.rs | 105 +++++++++++++++++++++++++++++++++++---- 4 files changed, 182 insertions(+), 53 deletions(-) diff --git a/src/csl/taxonomy.rs b/src/csl/taxonomy.rs index 778e152..de98feb 100644 --- a/src/csl/taxonomy.rs +++ b/src/csl/taxonomy.rs @@ -810,8 +810,15 @@ impl EntryLike for citationberg::json::Item { ) -> Option> { match variable { PageVariable::Page => match self.0.get("page")? { - csl_json::Value::Number(n) => { - Some(MaybeTyped::Typed(PageRanges::from(*n as u64))) + &csl_json::Value::Number(n) => { + // Page ranges use i32 internally, so we check whether the + // number is in range. + Some(match i32::try_from(n) { + Ok(n) => MaybeTyped::Typed(PageRanges::from(n)), + // If the number is not in range, we degrade to a + // string, which disables some CSL features. + Err(_) => MaybeTyped::String(n.to_string()), + }) } csl_json::Value::String(s) => { let res = MaybeTyped::::infallible_from_str(s); diff --git a/src/types/mod.rs b/src/types/mod.rs index 5fbd5b5..437ced6 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -46,7 +46,41 @@ macro_rules! deserialize_from_str { D: serde::Deserializer<'de>, { let s = <&'de str>::deserialize(deserializer)?; - FromStr::from_str(s).map_err(de::Error::custom) + FromStr::from_str(s).map_err(serde::de::Error::custom) + } + } + }; +} + +macro_rules! custom_deserialize { + ($type_name:ident where $expect:literal $($additional_visitors:item)+) => { + impl<'de> Deserialize<'de> for $type_name { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + use std::fmt; + use serde::de::{Visitor}; + struct OurVisitor; + + impl<'de> Visitor<'de> for OurVisitor { + type Value = $type_name; + + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str($expect) + } + + fn visit_str(self, value: &str) -> Result + where + E: serde::de::Error, + { + Self::Value::from_str(value).map_err(|e| E::custom(e.to_string())) + } + + $($additional_visitors)* + } + + deserializer.deserialize_any(OurVisitor) } } }; @@ -75,53 +109,29 @@ macro_rules! derive_or_from_str { } - impl<'de> Deserialize<'de> for $s { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - use std::fmt; - use serde::de::{self, Visitor}; - struct OurVisitor; - - impl<'de> Visitor<'de> for OurVisitor { - type Value = $s; - - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str($expect) - } - - fn visit_str(self, value: &str) -> Result - where - E: serde::de::Error, - { - Self::Value::from_str(value).map_err(|e| E::custom(e.to_string())) - } + crate::types::custom_deserialize!( + $s where $expect + fn visit_map(self, map: A) -> Result + where A: serde::de::MapAccess<'de>, { + use serde::{de, Deserialize}; - fn visit_map(self, map: A) -> Result - where - A: serde::de::MapAccess<'de>, - { - #[derive(Deserialize)] - #[serde(rename_all = "kebab-case")] - struct Inner { - $( - $(#[serde $serde])* - $i: $t, - )* - } - - Deserialize::deserialize(de::value::MapAccessDeserializer::new(map)) - .map(|inner: Inner| $s { $($i: inner.$i),* }) - } + #[derive(Deserialize)] + #[serde(rename_all = "kebab-case")] + struct Inner { + $( + $(#[serde $serde])* + $i: $t, + )* } - deserializer.deserialize_any(OurVisitor) + Deserialize::deserialize(de::value::MapAccessDeserializer::new(map)) + .map(|inner: Inner| $s { $($i: inner.$i),* }) } - } + ); }; } +use custom_deserialize; use derive_or_from_str; use deserialize_from_str; use serialize_display; @@ -595,4 +605,27 @@ mod tests { assert!(Numeric::from_str("second").is_err()); assert!(Numeric::from_str("2nd edition").is_err()); } + + #[test] + #[cfg(feature = "biblatex")] + fn test_issue_227() { + let yaml = r#" +AAAnonymous_AventureMortevielle_1987: + type: Book + page-range: 100"#; + + let library = crate::io::from_yaml_str(yaml).unwrap(); + let entry = library.get("AAAnonymous_AventureMortevielle_1987").unwrap(); + assert_eq!( + entry + .page_range + .as_ref() + .unwrap() + .as_typed() + .unwrap() + .first() + .unwrap(), + &Numeric::new(100) + ); + } } diff --git a/src/types/numeric.rs b/src/types/numeric.rs index 9716cf3..b242d5f 100644 --- a/src/types/numeric.rs +++ b/src/types/numeric.rs @@ -46,7 +46,9 @@ impl<'de> Deserialize<'de> for Numeric { /// A default serde fallthrough handler for signed integers. fn visit_i64(self, v: i64) -> Result { - Ok(Numeric::new(v.try_into().map_err(|_| E::custom("value too large"))?)) + Ok(Numeric::new( + v.try_into().map_err(|_| E::custom("value out of bounds"))?, + )) } fn visit_i32(self, v: i32) -> Result { diff --git a/src/types/page.rs b/src/types/page.rs index ecf231e..af792eb 100644 --- a/src/types/page.rs +++ b/src/types/page.rs @@ -1,9 +1,14 @@ -use std::{cmp::Ordering, fmt::Display, num::NonZeroUsize, str::FromStr}; +use std::{ + cmp::Ordering, + fmt::Display, + num::{NonZeroUsize, TryFromIntError}, + str::FromStr, +}; use crate::{MaybeTyped, Numeric, NumericError}; -use super::{deserialize_from_str, serialize_display}; -use serde::{de, Deserialize, Serialize}; +use super::{custom_deserialize, serialize_display}; +use serde::{Deserialize, Serialize}; use thiserror::Error; impl MaybeTyped { @@ -23,6 +28,22 @@ pub struct PageRanges { pub ranges: Vec, } +custom_deserialize!( + PageRanges where "pages, page ranges, ampersands, and commas" + fn visit_i32(self, v: i32) -> Result { + Ok(PageRanges::from(v)) + } + fn visit_u32(self, v: u32) -> Result { + PageRanges::try_from(v).map_err(|_| E::custom("value too large")) + } + fn visit_i64(self, v: i64) -> Result { + PageRanges::try_from(v).map_err(|_| E::custom("value out of bounds")) + } + fn visit_u64(self, v: u64) -> Result { + PageRanges::try_from(v).map_err(|_| E::custom("value too large")) + } +); + impl PageRanges { /// Create a new `PageRanges` struct. pub fn new(ranges: Vec) -> Self { @@ -76,12 +97,36 @@ impl PageRanges { } } -impl From for PageRanges { - fn from(value: u64) -> Self { +impl From for PageRanges { + fn from(value: i32) -> Self { Self { ranges: vec![value.into()] } } } +impl TryFrom for PageRanges { + type Error = TryFromIntError; + + fn try_from(value: u32) -> Result { + Ok(Self { ranges: vec![value.try_into()?] }) + } +} + +impl TryFrom for PageRanges { + type Error = TryFromIntError; + + fn try_from(value: i64) -> Result { + Ok(Self { ranges: vec![value.try_into()?] }) + } +} + +impl TryFrom for PageRanges { + type Error = TryFromIntError; + + fn try_from(value: u64) -> Result { + Ok(Self { ranges: vec![value.try_into()?] }) + } +} + impl Display for PageRanges { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.ranges.iter().try_for_each(|r| r.fmt(f)) @@ -116,6 +161,22 @@ pub enum PageRangesPart { Range(Numeric, Numeric), } +custom_deserialize!( + PageRangesPart where "a page, a page range, or a separator" + fn visit_i32(self, v: i32) -> Result { + Ok(PageRangesPart::from(v)) + } + fn visit_u32(self, v: u32) -> Result { + PageRangesPart::try_from(v).map_err(|_| E::custom("value too large")) + } + fn visit_i64(self, v: i64) -> Result { + PageRangesPart::try_from(v).map_err(|_| E::custom("value out of bounds")) + } + fn visit_u64(self, v: u64) -> Result { + PageRangesPart::try_from(v).map_err(|_| E::custom("value too large")) + } +); + impl PageRangesPart { /// The start of a range, if any. pub fn start(&self) -> Option<&Numeric> { @@ -169,9 +230,36 @@ impl PageRangesPart { } } -impl From for PageRangesPart { - fn from(value: u64) -> Self { - Self::SinglePage((value as u32).into()) +impl From for PageRangesPart { + fn from(value: i32) -> Self { + Self::SinglePage(value.into()) + } +} + +impl TryFrom for PageRangesPart { + type Error = TryFromIntError; + + fn try_from(value: u32) -> Result { + let value: i32 = value.try_into()?; + Ok(Self::SinglePage(value.into())) + } +} + +impl TryFrom for PageRangesPart { + type Error = TryFromIntError; + + fn try_from(value: i64) -> Result { + let value: i32 = value.try_into()?; + Ok(Self::SinglePage(value.into())) + } +} + +impl TryFrom for PageRangesPart { + type Error = TryFromIntError; + + fn try_from(value: u64) -> Result { + let value: i32 = value.try_into()?; + Ok(Self::SinglePage(value.into())) } } @@ -246,7 +334,6 @@ impl FromStr for PageRangesPart { } } -deserialize_from_str!(PageRanges); serialize_display!(PageRanges); fn parse_number(s: &str) -> Result {