nv-legate · shivsundram · Sep 27, 2021 · Sep 27, 2021 · Sep 29, 2021 · Sep 29, 2021
diff --git a/install.py b/install.py
@@ -794,8 +794,8 @@ def driver():
     )
     parser.add_argument(
         "--cuda",
-        action=BooleanFlag,
-        default=os.environ.get("USE_CUDA", "0") == "1",
+        action= BooleanFlag,
+        default=True,#os.environ.get("USE_CUDA", "0") == "1",
         help="Build Legate with CUDA support.",
     )
     parser.add_argument(
@@ -895,7 +895,7 @@ def driver():
         "--clean",
         dest="clean_first",
         action=BooleanFlag,
-        default=True,
+        default=False,
         help="Clean before build, and pull latest Legion.",
     )
     parser.add_argument(

diff --git a/legate/core/corelib.py b/legate/core/corelib.py
@@ -24,7 +24,7 @@
 class CoreLib(Library):
     def __init__(self):
         self._lib = None
-
+        
     def get_name(self):
         return "legate.core"
 
@@ -38,6 +38,7 @@ def get_c_header(self):
     def initialize(self, shared_lib):
         self._lib = shared_lib
         shared_lib.legate_parse_config()
+        #self.fused_id = self._lib.LEGATE_CORE_FUSED_TASK_ID
 
     def get_registration_callback(self):
         return "legate_core_perform_registration"

diff --git a/legate/core/launcher.py b/legate/core/launcher.py
@@ -534,6 +534,8 @@ def __init__(self, context, task_id, mapper_id=0, tag=0):
         self._sharding_space = None
         self._point = None
         self._output_regions = list()
+        self._is_fused = False
+        self._fusion_metadata = None
 
     @property
     def library_task_id(self):
@@ -577,7 +579,6 @@ def add_store(self, args, store, proj, perm, tag, flags):
         else:
             region = store.storage.region
             field_id = store.storage.field.field_id
-
             req = RegionReq(region, perm, proj, tag, flags)
 
             self._req_analyzer.insert(req, field_id)
@@ -643,21 +644,35 @@ def set_sharding_space(self, space):
     def set_point(self, point):
         self._point = point
 
+    def add_fusion_metadata(self, is_fused, fusion_metadata):
+        self._is_fused = is_fused
+        self._fusion_metadata = fusion_metadata
+
     @staticmethod
     def pack_args(argbuf, args):
         argbuf.pack_32bit_uint(len(args))
         for arg in args:
             arg.pack(argbuf)
 
+
+    @staticmethod
+    def pack_fusion_metadata(argbuf, is_fused, fusion_metadata):
+        argbuf.pack_bool(is_fused)
+        if is_fused:
+            fusion_metadata.pack(argbuf)
+
+
     def build_task(self, launch_domain, argbuf):
         self._req_analyzer.analyze_requirements()
         self._out_analyzer.analyze_requirements()
 
+        #pack fusion metadata
+        self.pack_fusion_metadata(argbuf, self._is_fused, self._fusion_metadata)        
+
         self.pack_args(argbuf, self._inputs)
         self.pack_args(argbuf, self._outputs)
         self.pack_args(argbuf, self._reductions)
         self.pack_args(argbuf, self._scalars)
-
         task = IndexTask(
             self.legion_task_id,
             launch_domain,
@@ -683,6 +698,9 @@ def build_task(self, launch_domain, argbuf):
     def build_single_task(self, argbuf):
         self._req_analyzer.analyze_requirements()
         self._out_analyzer.analyze_requirements()
+
+        #pack fusion metadata
+        self.pack_fusion_metadata(argbuf, self._is_fused, self._fusion_metadata)        
 
         self.pack_args(argbuf, self._inputs)
         self.pack_args(argbuf, self._outputs)

diff --git a/legate/core/legion.py b/legate/core/legion.py
@@ -4859,6 +4859,12 @@ def pack_32bit_int(self, arg):
         self.size += 4
         self.add_arg(arg, legion.LEGION_TYPE_INT32)
 
+    def pack_32bit_int_arr(self, arg):
+        self.fmt.append(str(len(arg))+"i")
+        size = len(arg)
+        self.size += 4*size
+        self.args += arg
+
     def pack_64bit_int(self, arg):
         self.fmt.append("q")
         self.size += 8
@@ -5043,7 +5049,7 @@ def pack_dtype(self, dtype):
     def get_string(self):
         if self.string is None or self.arglen != len(self.args):
             fmtstr = "".join(self.fmt)
-            assert len(fmtstr) == len(self.args) + 1
+            #assert len(fmtstr) == len(self.args) + 1
             self.string = struct.pack(fmtstr, *self.args)
             self.arglen = len(self.args)
         return self.string

diff --git a/legate/core/operation.py b/legate/core/operation.py
@@ -20,7 +20,11 @@
 from .legion import Future
 from .store import Store
 from .utils import OrderedSet
-
+from .legion import (
+    FieldSpace,
+    Future
+)
+
 
 class Operation(object):
     def __init__(self, context, mapper_id=0, op_id=0):
@@ -30,6 +34,7 @@ def __init__(self, context, mapper_id=0, op_id=0):
         self._inputs = []
         self._outputs = []
         self._reductions = []
+        self._is_fused = False
         self._input_parts = []
         self._output_parts = []
         self._reduction_parts = []
@@ -145,11 +150,18 @@ def add_broadcast(self, store):
     def add_constraint(self, constraint):
         self._constraints.append(constraint)
 
+    def has_constraint(self, store1, store2):
+        part1 = self._get_unique_partition(store1)
+        part2 = self._get_unique_partition(store2)
+        cons = [str(con) for con in self._constraints]
+        return (str(part1 == part2) in cons) or (str(part2==part1) in cons)
+
     def execute(self):
         self._context.runtime.submit(self)
 
     def get_tag(self, strategy, part):
         if strategy.is_key_part(part):
+            return 0
             return 1  # LEGATE_CORE_KEY_STORE_TAG
         else:
             return 0
@@ -180,6 +192,7 @@ def __init__(self, context, task_id, mapper_id=0, op_id=0):
         self._task_id = task_id
         self._scalar_args = []
         self._futures = []
+        self._fusion_metadata = None
 
     def get_name(self):
         libname = self.context.library.get_name()
@@ -195,14 +208,29 @@ def add_dtype_arg(self, dtype):
     def add_future(self, future):
         self._futures.append(future)
 
+    def add_fusion_metadata(self, fusion_metadata):
+        self._is_fused = True
+        self._fusion_metadata = fusion_metadata
+
     def launch(self, strategy):
         launcher = TaskLauncher(self.context, self._task_id, self.mapper_id)
 
-        for input, input_part in zip(self._inputs, self._input_parts):
+        if self._is_fused:
+            launcher.add_fusion_metadata(self._is_fused, self._fusion_metadata)
+        if  self._is_fused: #fused ops re-use encapsulated unfused partitions
+            input_parts = self._unfused_input_parts
+            output_parts = self._unfused_output_parts
+            reduction_parts = self._unfused_reduction_parts
+        else:
+            input_parts = self._input_parts
+            output_parts = self._output_parts
+            reduction_parts = self._reduction_parts
+
+        for input, input_part in zip(self._inputs, input_parts):
             proj = strategy.get_projection(input_part)
             tag = self.get_tag(strategy, input_part)
             launcher.add_input(input, proj, tag=tag)
-        for output, output_part in zip(self._outputs, self._output_parts):
+        for output, output_part in zip(self._outputs, output_parts):
             if output.unbound:
                 continue
             proj = strategy.get_projection(output_part)
@@ -212,7 +240,7 @@ def launch(self, strategy):
             # We update the key partition of a store only when it gets updated
             output.set_key_partition(partition)
         for ((reduction, redop), reduction_part) in zip(
-            self._reductions, self._reduction_parts
+            self._reductions, reduction_parts
         ):
             partition = strategy.get_partition(reduction_part)
             can_read_write = partition.is_disjoint_for(strategy, reduction)

diff --git a/legate/core/partition.py b/legate/core/partition.py
@@ -180,10 +180,8 @@ def construct(self, region, complete=False):
             transform = Transform(tile_shape.ndim, tile_shape.ndim)
             for idx, size in enumerate(tile_shape):
                 transform.trans[idx, idx] = size
-
             lo = Shape((0,) * tile_shape.ndim) + self._offset
             hi = self._tile_shape - 1 + self._offset
-
             extent = Rect(hi, lo, exclusive=False)
 
             color_space = self._runtime.find_or_create_index_space(