Skip to content

Commit

Permalink
Merge pull request #22 from 8onlichtman/complete_greedy.py
Browse files Browse the repository at this point in the history
Complete greedy.py
  • Loading branch information
erelsgl authored May 15, 2024
2 parents aa962f6 + 964feea commit 54dc223
Show file tree
Hide file tree
Showing 3 changed files with 186 additions and 42 deletions.
10 changes: 9 additions & 1 deletion prtpy/binners.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,13 @@ def add_item_to_bin(self, bins:BinsArray, item: Any, bin_index: int)->BinsArray:
Return the bins after the addition.
"""
return bins


def remove_item_from_bin(self, bins:BinsArray, bin_index: int, item_index: int)->BinsArray:
sums, lists = bins
value = lists[bin_index][item_index]
sums[bin_index] -= value
del lists[bin_index][item_index]
return bins

@abstractmethod
def sort_by_ascending_sum(self, bins:BinsArray):
Expand All @@ -99,6 +105,8 @@ def numbins(self, bins: BinsArray) -> int:
"""
return None



@abstractmethod
def sums(self, bins: BinsArray) -> Tuple[float]:
"""
Expand Down
45 changes: 44 additions & 1 deletion prtpy/objectives.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,6 @@ def lower_bound(self, sums:list, sum_of_remaining_items:float, are_sums_in_ascen
25.0
>>> MinimizeDifference.lower_bound([10,20,30,40,50], sum_of_remaining_items=45)
15.0
>>> MinimizeDifference.lower_bound([10,20,30,40,50], sum_of_remaining_items=200)
0.0
>>> MinimizeDifference.lower_bound([0,0,0,0,0], sum_of_remaining_items=54)
Expand All @@ -156,6 +155,50 @@ def lower_bound(self, sums:list, sum_of_remaining_items:float, are_sums_in_ascen



class MinimizeTheDistanceFromAvg(Objective):
def value_to_minimize(self, sums:list, are_sums_in_ascending_order:bool=False)->float:
avg = sum(sums) / len(sums)
diff_from_avg = 0
for s in sums:
if (s > avg):
diff_from_avg = diff_from_avg + (s - avg)
return diff_from_avg
def __str__(self) -> str:
return "minimize-the-distance-from-avg"
def lower_bound(self, sums:list, sum_of_remaining_items:float, are_sums_in_ascending_order:bool=False)->float:
"""
First we calculate the final avg including the remaining items.
We try to add values from the remaining sum to the bins that haven't reached the avg yet
(they don't contribute to our final difference because we only take the bins that are more than avg).
If at any point all bins are equal to avg then we just need to divide the remaining sum amongst all bins
and the residue will be given out to random bins (one each).
We calculate and return the difference.
>>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=5)
28.0
>>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=20)
22.0
>>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=45)
12.0
>>> MinimizeDistAvg.lower_bound([10,20,30,40,50], sum_of_remaining_items=200)
0.0
>>> MinimizeDistAvg.lower_bound([0,0,0,0,0], sum_of_remaining_items=54)
0.8
"""
remaining = sum_of_remaining_items
avg = (sum(sums) + remaining) / len(sums)
diff_from_avg = 0
for s in sums:
if (s < avg and remaining > 0):
remaining = remaining - min(remaining, int(avg - s))
if(s > avg):
diff_from_avg = diff_from_avg + (s - avg)
return diff_from_avg + ((remaining % len(sums)) / len(sums))

MinimizeDistAvg = MinimizeTheDistanceFromAvg()



if __name__ == "__main__":
import doctest

Expand Down
173 changes: 133 additions & 40 deletions prtpy/partitioning/complete_greedy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,28 @@
explains how to have a set with a custom key.
Programmer: Erel Segal-Halevi
Eitan Lichtman added Minimize Distance from Avg Objective
"""

import math
from typing import List, Tuple, Callable, Iterator, Any
import numpy as np
import logging, time

from prtpy import objectives as obj, Binner, BinsArray

logger = logging.getLogger(__name__)



def anytime(
binner: Binner, numbins: int, items: List[any],
binner: Binner, numbins: int, items: List[any], relative_value: List[any] = None,
objective: obj.Objective = obj.MinimizeDifference,
use_lower_bound: bool = True, # Prune branches whose lower bound (= optimistic value) is at least as large as the current minimum.
use_fast_lower_bound: bool = True, # A faster lower bound, that does not create the branch at all. Useful for min-max and max-min objectives.
use_heuristic_3: bool = False, # An improved stopping condition, applicable for min-max only. Not very useful in experiments.
use_set_of_seen_states: bool = True,
use_lower_bound: bool = True,
# Prune branches whose lower bound (= optimistic value) is at least as large as the current minimum.
use_fast_lower_bound: bool = True,
# A faster lower bound, that does not create the branch at all. Useful for min-max max-min and min-dist-avg objectives.
use_heuristic_3: bool = False,
# An improved stopping condition, applicable for min-max only. Not very useful in experiments.
use_set_of_seen_states: bool = True,
time_limit: float = np.inf,
) -> Iterator:
"""
Expand All @@ -41,6 +45,10 @@ def anytime(
Bin #0: [6, 5, 4], sum=15.0
Bin #1: [8, 7], sum=15.0
>>> printbins(anytime(BinnerKeepingContents(), 2, [4,5,6,7,8], [0.3,0.7], objective=obj.MinimizeDistAvg))
Bin #0: [5, 4], sum=9.0
Bin #1: [8, 7, 6], sum=21.0
The following examples are based on:
Walter (2013), 'Comparing the minimum completion times of two longest-first scheduling-heuristics'.
>>> walter_numbers = [46, 39, 27, 26, 16, 13, 10]
Expand All @@ -56,6 +64,56 @@ def anytime(
Bin #0: [46, 10], sum=56.0
Bin #1: [27, 16, 13], sum=56.0
Bin #2: [39, 26], sum=65.0
>>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MinimizeDistAvg))
Bin #0: [39, 16], sum=55.0
Bin #1: [46, 13], sum=59.0
Bin #2: [27, 26, 10], sum=63.0
>>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers,[0.2,0.4,0.4], objective=obj.MinimizeDistAvg))
Bin #0: [27, 10], sum=37.0
Bin #1: [39, 16, 13], sum=68.0
Bin #2: [46, 26], sum=72.0
>>> printbins(anytime(BinnerKeepingContents(), 5, [460000000, 390000000, 270000000, 260000000, 160000000, 130000000, 100000000],[0.2,0.4,0.1,0.15,0.15], objective=obj.MinimizeDistAvg))
Bin #0: [390000000], sum=390000000.0
Bin #1: [460000000, 130000000, 100000000], sum=690000000.0
Bin #2: [160000000], sum=160000000.0
Bin #3: [260000000], sum=260000000.0
Bin #4: [270000000], sum=270000000.0
>>> printbins(anytime(BinnerKeepingContents(), 10, [115268834, 22638149, 35260669, 68111031, 13376625, 20835125, 179398684, 69888000, 94462800, 5100340, 27184906, 305371, 272847, 545681, 1680746, 763835, 763835], [0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1], objective=obj.MinimizeDistAvg))
Bin #0: [13376625, 5100340, 1680746, 763835, 763835, 545681, 305371, 272847], sum=22809280.0
Bin #1: [20835125], sum=20835125.0
Bin #2: [22638149], sum=22638149.0
Bin #3: [27184906], sum=27184906.0
Bin #4: [35260669], sum=35260669.0
Bin #5: [68111031], sum=68111031.0
Bin #6: [69888000], sum=69888000.0
Bin #7: [94462800], sum=94462800.0
Bin #8: [115268834], sum=115268834.0
Bin #9: [179398684], sum=179398684.0
>>> printbins(anytime(BinnerKeepingContents(), 3, walter_numbers,[0.1,0.9,0], objective=obj.MinimizeDistAvg))
Bin #0: [16], sum=16.0
Bin #1: [46, 39, 27, 26, 13, 10], sum=161.0
Bin #2: [], sum=0.0
>>> printbins(anytime(BinnerKeepingContents(), 5, [2,2,5,5,5,5,9], objective=obj.MinimizeDistAvg))
Bin #0: [5], sum=5.0
Bin #1: [5], sum=5.0
Bin #2: [5, 2], sum=7.0
Bin #3: [5, 2], sum=7.0
Bin #4: [9], sum=9.0
>>> printbins(anytime(BinnerKeepingContents(), 3, [1,1,1,1], objective=obj.MinimizeDistAvg))
Bin #0: [1], sum=1.0
Bin #1: [1], sum=1.0
Bin #2: [1, 1], sum=2.0
>>> printbins(anytime(BinnerKeepingContents(), 3, [1,1,1,1,1], objective=obj.MinimizeDistAvg))
Bin #0: [1], sum=1.0
Bin #1: [1, 1], sum=2.0
Bin #2: [1, 1], sum=2.0
Compare results with and without the lower bound:
>>> random_numbers = np.random.randint(1, 2**48-1, 10, dtype=np.int64)
Expand Down Expand Up @@ -95,24 +153,34 @@ def anytime(
end_time = start_time + time_limit

sorted_items = sorted(items, key=binner.valueof, reverse=True)
sums_of_remaining_items = [sum(map(binner.valueof, sorted_items[i:])) for i in range(numitems)] + [0] # For Heuristic 3
sums_of_remaining_items = [sum(map(binner.valueof, sorted_items[i:])) for i in range(numitems)] + [
0] # For Heuristic 3
from prtpy import BinnerKeepingContents, BinnerKeepingSums, printbins
best_bins, best_objective_value = None, np.inf

global_lower_bound = objective.lower_bound(np.zeros(numbins), sums_of_remaining_items[0], are_sums_in_ascending_order=True)

logger.info("\nComplete Greedy %s Partitioning of %d items into %d parts. Lower bound: %s", objective, numitems, numbins, global_lower_bound)
global_lower_bound = objective.lower_bound(np.zeros(numbins), sums_of_remaining_items[0],
are_sums_in_ascending_order=True)

logger.info("\nComplete Greedy %s Partitioning of %d items into %d parts. Lower bound: %s", objective, numitems,
numbins, global_lower_bound)

# Create a stack whose elements are a partition and the current depth.
# Initially, it contains a single tuple: an empty partition with depth 0.
first_bins = binner.new_bins(numbins)
# If the input has relative values for each bin -
# we add a sum to each bin in order to equal them out to the bin with the highest relative value
# (at the end of the algorithm we will remove these sums).
first_bins = binner.new_bins(numbins)
if (relative_value):
for i in range(numbins):
binner.add_item_to_bin(first_bins, (max(relative_value) * sum(items) - relative_value[i] * sum(items)), i)
first_vertex = (first_bins, 0)
stack: List[Tuple[BinsArray, int]] = [first_vertex]
if use_set_of_seen_states:
seen_states = set(tuple(binner.sums(first_bins)))

# For logging and profiling:
complete_partitions_checked = 0
intermediate_partitions_checked = 1
complete_partitions_checked = 0
intermediate_partitions_checked = 1

times_fast_lower_bound_activated = 0
times_lower_bound_activated = 0
Expand All @@ -134,108 +202,133 @@ def anytime(
if new_objective_value < best_objective_value:
best_bins, best_objective_value = current_bins, new_objective_value
logger.info(" Found a better solution: %s, with value %s", current_bins, best_objective_value)
if new_objective_value<=global_lower_bound:
if new_objective_value <= global_lower_bound:
logger.info(" Solution matches global lower bound - stopping")
break
continue

# Heuristic 3: "If the sum of the remaining unassigned integers plus the smallest current subset sum is <= the largest subset sum, all remaining integers are assigned to the subset with the smallest sum, terminating that branch of the tree."
# Note that this heuristic is valid only for the objective "minimize largest sum"!
if use_heuristic_3 and objective==obj.MinimizeLargestSum:
if use_heuristic_3 and objective == obj.MinimizeLargestSum:
if sums_of_remaining_items[depth] + current_sums[0] <= current_sums[-1]:
new_bins = binner.copy_bins(current_bins)
for i in range(depth,numitems):
for i in range(depth, numitems):
binner.add_item_to_bin(new_bins, sorted_items[i], 0)
binner.sort_by_ascending_sum(new_bins)
binner.sort_by_ascending_sum(new_bins)
new_depth = numitems
stack.append((new_bins, new_depth))
logger.debug(" Heuristic 3 activated")
times_heuristic_3_activated+=1
times_heuristic_3_activated += 1
continue

next_item = sorted_items[depth]
sum_of_remaining_items = sums_of_remaining_items[depth+1]
sum_of_remaining_items = sums_of_remaining_items[depth + 1]

previous_bin_sum = None

# We want to insert the next item to the bin with the *smallest* sum first.
# But, since we use a stack, we have to insert it to the bin with the *largest* sum first,
# so that it is pushed deeper into the stack.
# Therefore, we proceed in reverse, by *descending* order of sum.
for bin_index in reversed(range(numbins)):
for bin_index in reversed(range(numbins)):

# Heuristic 1: "If there are two subsets with the same sum, the current number is assigned to only one."
current_bin_sum = current_sums[bin_index]
if current_bin_sum == previous_bin_sum:
continue
continue
previous_bin_sum = current_bin_sum

# Fast-lower-bound heuristic - before creating the new vertex.
# Currently implemented only for two objectives: min-max and max-min.
# Currently implemented only for two objectives: min-max, max-min and min-dist-avg
if use_fast_lower_bound:
if objective==obj.MinimizeLargestSum:
if objective == obj.MinimizeLargestSum:
# "If an assignment to a subset creates a subset sum that equals or exceeds the largest subset sum in the best complete solution found so far, that branch is pruned from the tree."
fast_lower_bound = max(current_bin_sum + binner.valueof(next_item), current_sums[-1])
elif objective==obj.MaximizeSmallestSum:
elif objective == obj.MaximizeSmallestSum:
# An adaptation of the above heuristic to maximizing the smallest sum.
if bin_index==0:
new_smallest_sum = min(current_sums[0]+binner.valueof(next_item), current_sums[1])
if bin_index == 0:
new_smallest_sum = min(current_sums[0] + binner.valueof(next_item), current_sums[1])
else:
new_smallest_sum = current_sums[0]
fast_lower_bound = -(new_smallest_sum+sum_of_remaining_items)
fast_lower_bound = -(new_smallest_sum + sum_of_remaining_items)
elif objective == obj.MinimizeDistAvg:
if relative_value:
fast_lower_bound = 0
for i in range (numbins):
# For each bin: we take off the sum that we added in the beginning of the algorithm (max(relative_value) * sum(items) - relative_value[i] * sum(items))
# Then we check if the difference between the bin's sum and the relative AVG for bin i: (sum(items)*relative_value[i])
# is positive and contributes to our final difference or negative and we will not add anything to our difference.
fast_lower_bound = fast_lower_bound + max((current_sums[i]-(max(relative_value) * sum(items) - relative_value[i] * sum(items)))-sum(items)*relative_value[i],0)
else:
fast_lower_bound = 0
avg = sum(items) / numbins
for i in range (numbins):
fast_lower_bound = fast_lower_bound + max(current_sums[i]-avg,0)
else:
fast_lower_bound = -np.inf
if fast_lower_bound >= best_objective_value:
times_fast_lower_bound_activated += 1
continue

new_bins = binner.add_item_to_bin(binner.copy_bins(current_bins), next_item, bin_index)
binner.sort_by_ascending_sum(new_bins)
if not relative_value:
binner.sort_by_ascending_sum(new_bins)
new_sums = tuple(binner.sums(new_bins))

# Lower-bound heuristic.
if use_lower_bound:
lower_bound = objective.lower_bound(new_sums, sum_of_remaining_items, are_sums_in_ascending_order=True)
lower_bound = objective.lower_bound(new_sums, sum_of_remaining_items, are_sums_in_ascending_order=False)
if lower_bound >= best_objective_value:
logger.debug(" Lower bound %f too large", lower_bound)
times_lower_bound_activated += 1
continue
if use_set_of_seen_states:
if use_set_of_seen_states:
if new_sums in seen_states:
logger.debug(" State %s already seen", new_sums)
times_seen_state_skipped += 1
continue
seen_states.add(new_sums) # should be after if use_lower_bound
seen_states.add(new_sums) # should be after if use_lower_bound

new_vertex = (new_bins, depth + 1)
stack.append(new_vertex)
intermediate_partitions_checked += 1

logger.info("Checked %d out of %d complete partitions, and %d intermediate partitions.", complete_partitions_checked, numbins**numitems, intermediate_partitions_checked)
logger.info(" Heuristics: fast lower bound = %d, lower bound = %d, seen state = %d, heuristic 3 = %d.", times_fast_lower_bound_activated, times_lower_bound_activated, times_seen_state_skipped, times_heuristic_3_activated)
logger.info("Checked %d out of %d complete partitions, and %d intermediate partitions.",
complete_partitions_checked, numbins ** numitems, intermediate_partitions_checked)
logger.info(" Heuristics: fast lower bound = %d, lower bound = %d, seen state = %d, heuristic 3 = %d.",
times_fast_lower_bound_activated, times_lower_bound_activated, times_seen_state_skipped,
times_heuristic_3_activated)


if (relative_value):
# For each bin we remove the value that we added in the beginning of the algorithm.
for i in range(numbins):
binner.remove_item_from_bin(best_bins, i, 0)

for i in range(numbins):
binner.sums(best_bins)[i] = math.floor(binner.sums(best_bins)[i])
return best_bins


if __name__ == "__main__":
import doctest, sys

(failures, tests) = doctest.testmod(report=True, optionflags=doctest.FAIL_FAST)
print("{} failures, {} tests".format(failures, tests))
if failures>0:
if failures > 0:
sys.exit()

# DEMO
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler())

from prtpy import BinnerKeepingContents, BinnerKeepingSums
anytime(BinnerKeepingContents(), 2, [4,5,6,7,8], objective=obj.MinimizeLargestSum)

anytime(BinnerKeepingContents(), 2, [4, 5, 6, 7, 8], objective=obj.MinimizeLargestSum)

walter_numbers = [46, 39, 27, 26, 16, 13, 10]
anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MaximizeSmallestSum)
anytime(BinnerKeepingContents(), 3, walter_numbers, objective=obj.MinimizeLargestSum)

random_numbers = np.random.randint(1, 2**16-1, 15, dtype=np.int64)
random_numbers = np.random.randint(1, 2 ** 16 - 1, 15, dtype=np.int64)
anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MaximizeSmallestSum)
anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MinimizeLargestSum)
anytime(BinnerKeepingSums(), 3, random_numbers, objective=obj.MinimizeDifference)

0 comments on commit 54dc223

Please sign in to comment.