diff --git a/Cargo.lock b/Cargo.lock index 287770f..6a786e5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -658,7 +658,7 @@ checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" [[package]] name = "subset_sum" -version = "0.11.1" +version = "0.11.2" dependencies = [ "criterion", "pyo3", diff --git a/Cargo.toml b/Cargo.toml index 3ee666d..a9cc60f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "subset_sum" -version = "0.11.1" +version = "0.11.2" edition = "2018" authors = ["Tomohiro Endo "] description = "Solves subset sum problem and returns a set of decomposed integers. It also can match corresponding numbers from two vectors and be used for Account reconciliation." diff --git a/readme.md b/readme.md index 97d3f69..e1b79d2 100644 --- a/readme.md +++ b/readme.md @@ -5,10 +5,23 @@ [![Crates.io](https://img.shields.io/crates/d/subset_sum?label=crates.io%20Downloads)](https://crates.io/crates/subset_sum) [![Crates.io (recent)](https://img.shields.io/crates/dr/subset_sum?label=crates.io%20Downloads%20%28recent%29)](https://crates.io/crates/subset_sum) [![GitHub all releases](https://img.shields.io/github/downloads/europeanplaice/subset_sum/total?label=GitHub%20releases%20Downloads)](https://tooomm.github.io/github-release-stats/?username=europeanplaice&repository=subset_sum) +[![GitHub Repo stars](https://img.shields.io/github/stars/europeanplaice/subset_sum?style=social)](https://github.com/europeanplaice/subset_sum) This is a Rust implementation that calculates subset sum problem using dynamic programming. It solves subset sum problem and returns a set of decomposed integers. It also can match corresponding numbers from two vectors and be used for Account reconciliation. -Python implementation is also [available](#python). +There are three ways to use this program. +* [CLI](#CLI) +* [Rust](#rust) +* [Python](#python) + +And it has three methods. + +* `find_subset` + * It finds a subset from an array. +* `Sequence Matcher (One-to-Many)` + * It finds One-to-Many relationships with two arrays. +* `Sequence Matcher (Many-to-Many)` + * It finds Many-to-Many relationships with two arrays. `dpss` is short for `dynamic programming subset sum`. @@ -21,6 +34,7 @@ Python implementation is also [available](#python). |docs.rs|https://docs.rs/subset_sum/latest/dpss/| |pypi|https://pypi.org/project/dpss/| +## CLI ## Installation Binary files are provided on the [Releases](https://github.com/europeanplaice/subset_sum/releases) page. When you download one of these, please add it to your PATH manually. @@ -79,7 +93,7 @@ In this example, the output is ### Sequence Matcher (Many-to-Many) -`key.txt` +`arr1.txt` ``` 1980 2980 @@ -88,7 +102,7 @@ In this example, the output is 1050 ``` -`targets.txt` +`arr2.txt` ``` 1950 2900 @@ -101,7 +115,7 @@ In this example, the output is 20 ``` -Call `subset_sum.exe key.txt targets.txt m2m` +Call `subset_sum.exe arr1.txt arr2.txt m2m` In this example, the output is ``` @@ -144,19 +158,17 @@ print(dpss.sequence_matcher_m2m([1980, 2980, 3500, 4000, 1050], [1950, 2900, 30, >>>[[([20, 30, 1050, 2900], [4000]), ([200, 3300], [3500]), ([80, 1950, 3980], [1050, 1980, 2980])], [([20, 3980], [4000]), ([80, 2900], [2980]), ([30, 1950], [1980]), ([1050], [1050]), ([200, 3300], [3500])], [([20, 3980], [4000]), ([80, 2900], [2980]), ([1050], [1050]), ([30, 1950], [1980]), ([200, 3300], [3500])], [([20, 3980], [4000]), ([200, 3300], [3500]), ([80, 2900], [2980]), ([1050], [1050]), ([30, 1950], [1980])], [([30, 1950], [1980]), ([80, 2900], [2980]), ([20, 3980], [4000]), ([200, 3300], [3500]), ([1050], [1050])], [([30, 1950], [1980]), ([80, 2900], [2980]), ([200, 3300], [3500]), ([20, 3980], [4000]), ([1050], [1050])], [([30, 1950], [1980]), ([80, 2900], [2980]), ([1050], [1050]), ([20, 3980], [4000]), ([200, 3300], [3500])], [([80, 2900], [2980]), ([20, 3980], [4000]), ([1050], [1050]), ([200, 3300], [3500]), ([30, 1950], [1980])], [([80, 2900], [2980]), ([1050], [1050]), ([30, 1950], [1980]), ([20, 3980], [4000]), ([200, 3300], [3500])], [([200, 3300], [3500]), ([20, 30, 1050, 2900], [4000]), ([80, 1950, 3980], [1050, 1980, 2980])], [([200, 3300], [3500]), ([20, 3980], [4000]), ([80, 2900], [2980]), ([30, 1950], [1980]), ([1050], [1050])], [([1050], [1050]), ([30, 1950], [1980]), ([20, 3980], [4000]), ([80, 2900], [2980]), ([200, 3300], [3500])]] ``` -## Use in Rust +## Use in Rust + +Please check https://crates.io/crates/subset_sum. `Cargo.toml` ``` [dependencies] subset_sum = "(version)" ``` -Example -``` -subset_sum = "0.8.0" -``` -### Subset sum +### Find subset `main.rs` ```rust use subset_sum::dp::find_subset; diff --git a/src/dp_module.rs b/src/dp_module.rs index 1eb6680..0ac2519 100644 --- a/src/dp_module.rs +++ b/src/dp_module.rs @@ -34,8 +34,8 @@ pub mod dp { /// ``` /// /// use subset_sum::dp::find_subset; - /// let a = vec![-1, -3, -2, 6, 12, 48]; - /// let result = find_subset(&a, 0); + /// let arr = vec![-1, -3, -2, 6, 12, 48]; + /// let result = find_subset(&arr, 0); /// let route1: Vec = vec![6, -2, -3, -1]; /// let answer: Vec> = vec![route1]; /// assert_eq!(result, answer); @@ -49,18 +49,18 @@ pub mod dp { /// println!("{:?}", result); /// ``` /// output: `[[1], [4, -3]]` - pub fn find_subset(a: &Vec, n: i32) -> Vec> { + pub fn find_subset(arr: &Vec, value: i32) -> Vec> { use std::cmp::max; use std::cmp::min; // https://stackoverflow.com/questions/43078142/subset-sum-with-negative-values-in-c-or-c // Find a subset even if an array contains negative values. - let mut b: Vec = Vec::with_capacity(a.len()); - let mut answer: Vec> = Vec::with_capacity(a.len()); - if a.iter().min().unwrap() >= &0 && n > 0 { - for i in a { + let mut b: Vec = Vec::with_capacity(arr.len()); + let mut answer: Vec> = Vec::with_capacity(arr.len()); + if arr.iter().min().unwrap() >= &0 && value > 0 { + for i in arr { b.push(*i as u32); } - let result = find_subset_fast_only_positive(&b, n as usize); + let result = find_subset_fast_only_positive(&b, value as usize); for i in result { let mut tempvec = Vec::with_capacity(i.len()); for j in i { @@ -70,17 +70,17 @@ pub mod dp { } return answer; } else { - let offset: u32 = (max(a.iter().min().unwrap().abs() + 1, min(n, 0).abs() + 1)) as u32; - for i in a { + let offset: u32 = (max(arr.iter().min().unwrap().abs() + 1, min(value, 0).abs() + 1)) as u32; + for i in arr { b.push((i + offset as i32) as u32); } // We will transform the array into a new array whose elements are all positive. // And check if the transformed sum of the result of the new array is equal to the target value. // If we find the sum is the same as the target, we will return the result. - for i in 1..a.len() + 1 { + for i in 1..arr.len() + 1 { let result = - find_subset_fast_only_positive(&b, (n + i as i32 * offset as i32) as usize); + find_subset_fast_only_positive(&b, (value + i as i32 * offset as i32) as usize); for res in result { let mut tempsum: i32 = 0; let mut new_res: Vec = Vec::with_capacity(res.len()); @@ -88,7 +88,7 @@ pub mod dp { tempsum += el as i32 - offset as i32; new_res.push(el as i32 - offset as i32); } - if tempsum == n as i32 { + if tempsum == value as i32 { answer.push(new_res); } } @@ -99,7 +99,7 @@ pub mod dp { fn rec( dp: &Vec>, - a: &Vec, + arr: &Vec, i: usize, j: usize, route: &mut Vec, @@ -118,23 +118,23 @@ pub mod dp { } if dp[i - 1][j] != 0 { - rec(dp, a, i - 1, j, route, answer, a_min); + rec(dp, arr, i - 1, j, route, answer, a_min); } - if j as i32 - a[i - 1] as i32 >= 0 && dp[i - 1][j - a[i - 1] as usize] != 0 { - // Choose this element as a candidate for an answer. - route.push(a[i - 1]); - rec(dp, a, i - 1, j - a[i - 1] as usize, route, answer, a_min); + if j as i32 - arr[i - 1] as i32 >= 0 && dp[i - 1][j - arr[i - 1] as usize] != 0 { + // Choose this element as arr candidate for an answer. + route.push(arr[i - 1]); + rec(dp, arr, i - 1, j - arr[i - 1] as usize, route, answer, a_min); // Remove this element after we reach i == 0 regardless of whether we reach j == 0. route.pop(); } } - fn filter_j_idx(n: usize, a: &Vec) -> (Vec, u32) { + fn filter_j_idx(value: usize, arr: &Vec) -> (Vec, u32) { // a_min is the minimum number in an except for zero. - let mut a_min = a.iter().max().unwrap(); - let mut a_no_zero: Vec = Vec::with_capacity(a.len()); - for i in a { + let mut a_min = arr.iter().max().unwrap(); + let mut a_no_zero: Vec = Vec::with_capacity(arr.len()); + for i in arr { if i > &0 { if a_min > &i { a_min = &i @@ -142,16 +142,16 @@ pub mod dp { a_no_zero.push(*i); } } - let mut j_indexes: Vec = Vec::with_capacity(n+1); + let mut j_indexes: Vec = Vec::with_capacity(value+1); let gcd = gcd_multi(a_no_zero); // j of the range of 1 to a_min-1 must be zero. // For example, if a_min = 10, there is no way to make sum 5. // Also, if j == 8 and target = 10 and a_min=5, we can't reach 10. // If all the numbers are even, j should be even. - for j in 0..n + 1 { - if (j as u32 >= *a_min && j as u32 <= n as u32 - *a_min && j as u32 % gcd == 0) + for j in 0..value + 1 { + if (j as u32 >= *a_min && j as u32 <= value as u32 - *a_min && j as u32 % gcd == 0) || j as u32 == 0 - || j == n + || j == value { j_indexes.push(j) } @@ -201,39 +201,39 @@ pub mod dp { /// println!("{:?}", result); /// ``` /// output: `[[5, 3, 2], [5, 4, 1], [4, 3, 2, 1]]` - pub fn find_subset_fast_only_positive(a: &Vec, n: usize) -> Vec> { + pub fn find_subset_fast_only_positive(arr: &Vec, value: usize) -> Vec> { // dp is a table that stores the information of subset sum. // dp[i][j] is the number of ways to make sum j with i element. // We follow from the start of this table. - let mut dp: Vec> = vec![vec![0; n + 1]; a.len() + 1]; + let mut dp: Vec> = vec![vec![0; value + 1]; arr.len() + 1]; dp[0][0] = 1; - let (j_indexes, a_min) = filter_j_idx(n, a); - for i in 0..a.len() { + let (j_indexes, a_min) = filter_j_idx(value, arr); + for i in 0..arr.len() { for j in &j_indexes { // If we don't choose to select an element to sum, // the ways to make a sum are the same as with the previous element. dp[i + 1][*j] += dp[i][*j]; // Skip if j + the element is larger than the target value. - if *j as u32 + a[i] < n as u32 + 1 { + if *j as u32 + arr[i] < value as u32 + 1 { // This means we find another way to make sum j with i elements // when we choose this element as an element to sum. - dp[i + 1][j + a[i] as usize] += dp[i][*j]; + dp[i + 1][j + arr[i] as usize] += dp[i][*j]; } } } - let a_length: usize = a.len(); + let a_length: usize = arr.len(); let mut route: Vec = Vec::with_capacity(a_length); let mut answer: Vec> = Vec::with_capacity(a_length); - rec(&dp, &a, a_length, n, &mut route, &mut answer, &a_min); + rec(&dp, &arr, a_length, value, &mut route, &mut answer, &a_min); answer } - fn vec_remove(a: &mut Vec, v: i32) { - let index = a.iter().position(|x| *x == v).unwrap(); - a.remove(index); + fn vec_remove(arr: &mut Vec, v: i32) { + let index = arr.iter().position(|x| *x == v).unwrap(); + arr.remove(index); } /// Finds the integers from two vectors that sum to the same value. @@ -316,7 +316,7 @@ pub mod dp { /// Finds the integers from two vectors that sum to the same value. /// This method assumes that the two vectors have Many-to-Many relationships. - /// Each integer of the `key` vector corresponds to the multiple integers of the `value` vector. + /// Each integer of the `arr1` vector corresponds to the multiple integers of the `arr2` vector. /// With this method, we can find multiple combinations of the integers. /// # Example /// @@ -355,22 +355,22 @@ pub mod dp { /// ]); /// ``` pub fn sequence_matcher_m2m( - key: &mut Vec, - targets: &mut Vec, + arr1: &mut Vec, + arr2: &mut Vec, n_max: usize, ) -> Vec, Vec)>> { use rand::seq::SliceRandom; - let mut group: Vec<(Vec, Vec)> = Vec::with_capacity(targets.len()); + let mut group: Vec<(Vec, Vec)> = Vec::with_capacity(arr2.len()); let mut answer: Vec, Vec)>> = Vec::with_capacity(n_max); let mut rng: rand::rngs::StdRng = rand::SeedableRng::from_seed([13; 32]); - if key.iter().sum::() != targets.iter().sum() { - println!("The sum of the key must be equal to the sum of the targets."); + if arr1.iter().sum::() != arr2.iter().sum() { + println!("The sum of the arr1 must be equal to the sum of the arr2."); return answer; } for _i in 0..n_max { - sequence_matcher_core_m2m(key, targets, &mut group, &mut answer, 1); - key.shuffle(&mut rng); + sequence_matcher_core_m2m(arr1, arr2, &mut group, &mut answer, 1); + arr1.shuffle(&mut rng); } answer.sort(); answer.dedup(); @@ -381,37 +381,37 @@ pub mod dp { } fn sequence_matcher_core_m2m( - key: &mut Vec, - targets: &mut Vec, + arr1: &mut Vec, + arr2: &mut Vec, group: &mut Vec<(Vec, Vec)>, answer: &mut Vec, Vec)>>, n_key: usize, ) { - if key.iter().sum::() != targets.iter().sum() { + if arr1.iter().sum::() != arr2.iter().sum() { return; } - if key.len() == 0 && targets.len() == 0 { + if arr1.len() == 0 && arr2.len() == 0 { answer.push(group.clone()); return; } - if (key.len() == 0 && targets.len() > 0) || (key.len() > 0 && targets.len() == 0) { - sequence_matcher_core_m2m(key, targets, group, answer, n_key + 1); + if (arr1.len() == 0 && arr2.len() > 0) || (arr1.len() > 0 && arr2.len() == 0) { + sequence_matcher_core_m2m(arr1, arr2, group, answer, n_key + 1); } - if n_key > key.len() { + if n_key > arr1.len() { return; } let mut sum_key = 0; let mut vec_key = vec![]; for i in 0..n_key { - sum_key += key[i]; - vec_key.push(key[i].clone()) + sum_key += arr1[i]; + vec_key.push(arr1[i].clone()) } - let set_: Vec> = find_subset(&targets, sum_key); + let set_: Vec> = find_subset(&arr2, sum_key); if set_.len() == 0 { - sequence_matcher_core_m2m(key, targets, group, answer, n_key + 1); + sequence_matcher_core_m2m(arr1, arr2, group, answer, n_key + 1); } for set in set_ { let mut _set = Vec::from(set.clone()); @@ -420,18 +420,18 @@ pub mod dp { _vec_key.sort(); group.push((_set, _vec_key.clone())); for el in set.clone() { - vec_remove(targets, el); + vec_remove(arr2, el); } for i in vec_key.clone() { - vec_remove(key, i); + vec_remove(arr1, i); } - sequence_matcher_core_m2m(key, targets, group, answer, n_key); + sequence_matcher_core_m2m(arr1, arr2, group, answer, n_key); group.pop(); for el in set.clone() { - targets.push(el); + arr2.push(el); } for i in vec_key.clone() { - key.push(i); + arr1.push(i); } } } @@ -558,48 +558,48 @@ mod tests { let answer: Vec> = vec![route1, route2, route3]; assert_eq!(result, answer); - let a = vec![75, 467, 512, -835, 770, -69, 10]; - let result = dp::find_subset(&a, 711); + let arr = vec![75, 467, 512, -835, 770, -69, 10]; + let result = dp::find_subset(&arr, 711); let route1: Vec = vec![10, -69, 770]; let answer: Vec> = vec![route1]; assert_eq!(result, answer); - let a = vec![-3, 10, 56, -33, 65, -9, 8, 72, 63, 35]; - let result = dp::find_subset(&a, 7); + let arr = vec![-3, 10, 56, -33, 65, -9, 8, 72, 63, 35]; + let result = dp::find_subset(&arr, 7); let route1: Vec = vec![10, -3]; let route2: Vec = vec![35, 8, -33, -3]; let answer: Vec> = vec![route1, route2]; assert_eq!(result, answer); - let a = vec![ + let arr = vec![ 73209, 95597, 84735, 40496, 83553, 95595, -628, 201, 27597, 7904, 98445, 6241, 33002, -776, -711, 45552, 86746, 84248, 66278, 37475, ]; - let result = dp::find_subset(&a, 72782); + let result = dp::find_subset(&arr, 72782); let route1: Vec = vec![201, -628, 73209]; let answer: Vec> = vec![route1]; assert_eq!(result, answer); - let a = vec![-1, 2, 3]; - let result = dp::find_subset(&a, -1); + let arr = vec![-1, 2, 3]; + let result = dp::find_subset(&arr, -1); let route1: Vec = vec![-1]; let answer: Vec> = vec![route1]; assert_eq!(result, answer); - let a = vec![-10, 5, -2]; - let result = dp::find_subset(&a, -5); + let arr = vec![-10, 5, -2]; + let result = dp::find_subset(&arr, -5); let route1: Vec = vec![5, -10]; let answer: Vec> = vec![route1]; assert_eq!(result, answer); - let a = vec![-3, -5, -7]; - let result = dp::find_subset(&a, -15); + let arr = vec![-3, -5, -7]; + let result = dp::find_subset(&arr, -15); let route1: Vec = vec![-7, -5, -3]; let answer: Vec> = vec![route1]; assert_eq!(result, answer); - let a = vec![-100, 10, 20]; - let result = dp::find_subset(&a, -70); + let arr = vec![-100, 10, 20]; + let result = dp::find_subset(&arr, -70); let route1: Vec = vec![20, 10, -100]; let answer: Vec> = vec![route1]; assert_eq!(result, answer); diff --git a/src/py_module.rs b/src/py_module.rs index 22d329e..c163d71 100644 --- a/src/py_module.rs +++ b/src/py_module.rs @@ -2,23 +2,23 @@ use pyo3::prelude::*; /// Finds subsets sum of a target value. It can accept negative values. #[pyfunction] -#[pyo3(text_signature = "(a, n, /)")] -fn find_subset(a: Vec, n: i32) -> PyResult>> { +#[pyo3(text_signature = "(arr, value, /)")] +fn find_subset(arr: Vec, value: i32) -> PyResult>> { use crate::dp_module::*; - Ok(dp::find_subset(&a, n)) + Ok(dp::find_subset(&arr, value)) } /// Finds subsets sum of a target value. It can't accept negative values but relatively faster. #[pyfunction] -#[pyo3(text_signature = "(a, n, /)")] -fn find_subset_fast_only_positive(a: Vec, n: usize) -> PyResult>> { +#[pyo3(text_signature = "(arr, value, /)")] +fn find_subset_fast_only_positive(arr: Vec, value: usize) -> PyResult>> { use crate::dp_module::*; - Ok(dp::find_subset_fast_only_positive(&a, n)) + Ok(dp::find_subset_fast_only_positive(&arr, value)) } /// Finds the integers from two vectors that sum to the same value. /// This method assumes that the two vectors have One-to-Many relationships. -/// Each integer of the `key` vector corresponds to the multiple integers of the `value` vector. +/// Each integer of the `key` vector corresponds to the multiple integers of the `targets` vector. #[pyfunction] #[pyo3(text_signature = "(key, targets, /)")] fn sequence_matcher(mut key: Vec, mut targets: Vec) -> PyResult, i32)>>> { @@ -28,13 +28,13 @@ fn sequence_matcher(mut key: Vec, mut targets: Vec) -> PyResult, mut targets: Vec) -> PyResult, Vec)>>> { +#[pyo3(text_signature = "(arr1, arr2, /)")] +fn sequence_matcher_m2m(mut arr1: Vec, mut arr2: Vec) -> PyResult, Vec)>>> { use crate::dp_module::*; - Ok(dp::sequence_matcher_m2m(&mut key, &mut targets, 10)) + Ok(dp::sequence_matcher_m2m(&mut arr1, &mut arr2, 10)) } #[pymodule]