Skip to content

Commit

Permalink
refactor(properties_file): simplify, standarize on Windows-1252 encoding
Browse files Browse the repository at this point in the history
The Java documentation specifies that .properties files should use the
Windows-1252 (also imprecisely known as ISO-8859-1) encoding, but there
was code for supporting the Unicode BOM, which should not appear on the
first place and does not help with incompatible misinterpretations of
the source encoding as UTF-8; it'd only help if a UTF-8 with BOM file
only uses ASCII characters, but break in other cases.

In the future, we might study and implement ways of handling several
encodings, depending on the encoding expected by the mod using these
files.
  • Loading branch information
AlexTMjugador committed Sep 11, 2023
1 parent ff0981f commit d3c3e13
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 58 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ Versioning](https://semver.org/spec/v2.0.0.html).
to their decentralized and free nature, the PackSquash project will most
likely not pursue signing binaries with code signing certificates.

#### Fixed

- The UTF-8 BOM is no longer automatically stripped from properties files, as
they should not be encoded in UTF-8 to begin with, and carrying on processing
with mismatched encodings may cause mojibake.

#### User experience

- Some options file deserialization error messages have been shortened and made
Expand Down
20 changes: 4 additions & 16 deletions packages/packsquash/src/pack_file/properties_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,8 @@ use tokio::io::AsyncRead;
use tokio_util::codec::{Decoder, FramedRead};

use crate::config::PropertiesFileOptions;
use crate::pack_file::util::{starts_with_bom, BOM_UTF8};

use super::{
util::strip_utf8_bom, AsyncReadAndSizeHint, PackFile, PackFileAssetType, PackFileConstructor
};
use super::{AsyncReadAndSizeHint, PackFile, PackFileAssetType, PackFileConstructor};

#[cfg(test)]
mod tests;
Expand Down Expand Up @@ -95,7 +92,7 @@ impl Decoder for OptimizerDecoder {
minified_properties_writer.set_line_ending(LineEnding::LF);
minified_properties_writer.set_kv_separator("=").unwrap();

PropertiesIter::new(strip_utf8_bom(src)).read_into(|key, value| {
PropertiesIter::new(&**src).read_into(|key, value| {
minified_properties_writer.write(&key, &value).unwrap();
})?;

Expand All @@ -109,20 +106,11 @@ impl Decoder for OptimizerDecoder {
} else {
// Parse the properties file to check its correctness, and then just copy the
// original file
PropertiesIter::new(strip_utf8_bom(src)).read_into(|_, _| ())?;

// Strip the BOM anyway because it can be part of the first key name, OptiFine
// will not strip it when trying to get its value, and it just adds bytes for
// nothing
let split_index = if starts_with_bom(&src) {
BOM_UTF8.len()
} else {
0
};
PropertiesIter::new(&**src).read_into(|_, _| ())?;

Ok(Some((
Cow::Borrowed("Validated and copied"),
ByteBuffer::BytesMut(src.split_off(split_index))
ByteBuffer::BytesMut(src.split_off(0))
)))
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# The U+00E9 'LATIN SMALL LETTER E WITH ACUTE' Unicode escape code should be minified
# to its equivalent ISO-8859-1 codepoint, which is a single byte.
# U+2764 'HEAVY BLACK HEART' cannot be represented with ISO-8859-1 and should be
# kept escaped
nbt.Name = Th\u00e9oriciennes\u2764
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
nbt.Name=Th�oriciennes\u2764
56 changes: 14 additions & 42 deletions packages/packsquash/src/pack_file/properties_file/tests.rs
Original file line number Diff line number Diff line change
@@ -1,35 +1,25 @@
use crate::pack_file::util::BOM;
use pretty_assertions::assert_eq;
use tokio_stream::StreamExt;
use tokio_test::io::Builder;

use super::*;

static PROPERTIES_DATA: &str = include_str!("example.properties");
static MINIFIED_PROPERTIES_DATA: &str = include_str!("example_minified.properties");
static PROPERTIES_DATA: &[u8] = include_bytes!("example.properties");
static MINIFIED_PROPERTIES_DATA: &[u8] = include_bytes!("example_minified.properties");
static PROPERTIES_WITH_UNICODE_DATA: &[u8] = include_bytes!("escaped_unicode_characters.properties");
static MINIFIED_PROPERTIES_WITH_UNICODE_DATA: &[u8] =
include_bytes!("escaped_unicode_characters_minified.properties");

/// Processes the given input data as a [PropertiesFile], using the provided settings,
/// expecting a successful result that equals the expected string.
async fn successful_process_test(
input_text: &str,
add_bom: bool,
input: &[u8],
settings: PropertiesFileOptions,
expected_result: &str
expected_result: &[u8]
) {
let input_text = {
let mut input_data = Cow::Borrowed(input_text);

if add_bom {
input_data.to_mut().insert(0, BOM);
}

input_data
};
let input_data = input_text.as_bytes();

let data_stream = PropertiesFile {
read: Builder::new().read(input_data).build(),
file_length_hint: input_data.len(),
read: Builder::new().read(input).build(),
file_length_hint: input.len(),
optimization_settings: settings
}
.process();
Expand All @@ -39,20 +29,18 @@ async fn successful_process_test(
.collect()
.await;

let mut data = Vec::with_capacity(input_data.len());
let mut data = Vec::with_capacity(input.len());
for (_, partial_data) in process_result {
data.extend_from_slice(partial_data.as_ref());
}

let data = String::from_utf8(data).expect("The result should be a UTF-8 string");
assert_eq!(&data, expected_result);
}

#[tokio::test]
async fn minifying_works() {
successful_process_test(
PROPERTIES_DATA,
false,
PropertiesFileOptions {
minify: true,
..Default::default()
Expand All @@ -63,15 +51,14 @@ async fn minifying_works() {
}

#[tokio::test]
async fn minifying_with_bom_works() {
async fn minifying_with_unicode_data_works() {
successful_process_test(
PROPERTIES_DATA,
true,
PROPERTIES_WITH_UNICODE_DATA,
PropertiesFileOptions {
minify: true,
..Default::default()
},
MINIFIED_PROPERTIES_DATA
MINIFIED_PROPERTIES_WITH_UNICODE_DATA
)
.await
}
Expand All @@ -80,21 +67,6 @@ async fn minifying_with_bom_works() {
async fn passthrough_works() {
successful_process_test(
PROPERTIES_DATA,
false,
PropertiesFileOptions {
minify: false,
..Default::default()
},
PROPERTIES_DATA
)
.await
}

#[tokio::test]
async fn passthrough_with_bom_works() {
successful_process_test(
PROPERTIES_DATA,
true,
PropertiesFileOptions {
minify: false,
..Default::default()
Expand All @@ -106,5 +78,5 @@ async fn passthrough_with_bom_works() {

#[tokio::test]
async fn empty_input_is_handled_properly() {
successful_process_test("", false, PropertiesFileOptions::default(), "").await
successful_process_test(b"", PropertiesFileOptions::default(), b"").await
}

0 comments on commit d3c3e13

Please sign in to comment.