nv-legate · shivsundram · Sep 27, 2021 · Sep 27, 2021 · Sep 29, 2021 · Sep 29, 2021
diff --git a/install.py b/install.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+#/home/shiv1/pypy/pypy3.8-v7.3.7-linux64/bin/python3.8
 
 # Copyright 2021 NVIDIA Corporation
 #
@@ -125,6 +126,7 @@ def git_clone(repo_dir, url, branch=None, tag=None, commit=None):
         verbose_check_call(
             ["git", "submodule", "update", "--init"], cwd=repo_dir
         )
+        git_reset(repo_dir, commit)
     else:
         verbose_check_call(
             [
@@ -199,6 +201,7 @@ def install_legion(legion_src_dir, branch):
         legion_src_dir,
         url="https://gitlab.com/StanfordLegion/legion.git",
         branch=branch,
+        commit=commit,
     )
 
 
@@ -213,7 +216,7 @@ def install_thrust(thrust_dir):
 
 def update_legion(legion_src_dir, branch):
     # Make sure we are on the right branch for single/multi-node
-    git_update(legion_src_dir, branch=branch)
+    git_update(legion_src_dir, branch=branch, commit=commit)
 
 
 def build_legion(
@@ -375,6 +378,7 @@ def build_legion(
 
         legion_python_dir = os.path.join(legion_src_dir, "bindings", "python")
         if clean_first:
+            print("cleaning!\n")
             verbose_check_call(
                 ["make"] + flags + ["clean"], cwd=legion_python_dir
             )
@@ -794,8 +798,8 @@ def driver():
     )
     parser.add_argument(
         "--cuda",
-        action=BooleanFlag,
-        default=os.environ.get("USE_CUDA", "0") == "1",
+        action= BooleanFlag,
+        default=True,#os.environ.get("USE_CUDA", "0") == "1",
         help="Build Legate with CUDA support.",
     )
     parser.add_argument(
@@ -895,7 +899,7 @@ def driver():
         "--clean",
         dest="clean_first",
         action=BooleanFlag,
-        default=True,
+        default=False,
         help="Clean before build, and pull latest Legion.",
     )
     parser.add_argument(

diff --git a/legate/core/__init__.py b/legate/core/__init__.py
@@ -120,6 +120,10 @@
 # are overriding that module's name.
 from legion_cffi import ffi, lib as legion
 
+# NOTE: This needs to come after the imports from legate.core.legion, as we
+# are overriding that module's name.
+from legion_cffi import ffi, lib as legion
+
 # Import the PyArrow type system
 from pyarrow import (
     DataType,

diff --git a/legate/core/constraints.py b/legate/core/constraints.py
@@ -63,6 +63,7 @@ def __init__(self, op_hash, op_name, store, id, disjoint, complete):
         self._disjoint = disjoint
         self._complete = complete
 
+
     @property
     def ndim(self):
         return self._store.ndim

diff --git a/legate/core/launcher.py b/legate/core/launcher.py
@@ -263,6 +263,8 @@ def __init__(self, region, permission, proj, tag, flags):
         self.region = region
         self.permission = permission
         self.proj = proj
+        #print(proj.__dict__)
+        #print(region)
         self.tag = tag
         self.flags = flags
 
@@ -369,6 +371,7 @@ def coalesce(self):
         # promote them to read write permission.
         if len(all_perms - set([Permission.NO_ACCESS])) > 1:
             perm = Permission.READ_WRITE
+            #perm = Permission.WRITE
 
             # When the field requires read write permission,
             # all projections must be the same
@@ -534,6 +537,8 @@ def __init__(self, context, task_id, mapper_id=0, tag=0):
         self._sharding_space = None
         self._point = None
         self._output_regions = list()
+        self._is_fused = False
+        self._fusion_metadata = None
 
     @property
     def library_task_id(self):
@@ -567,17 +572,17 @@ def add_store(self, args, store, proj, perm, tag, flags):
         if store.kind is Future:
             if store.has_storage:
                 self.add_future(store.storage)
-            elif perm == Permission.READ or perm == Permission.REDUCTION:
+            elif (perm == Permission.READ or perm == Permission.REDUCTION):
                 raise RuntimeError(
                     "Read access to an uninitialized store is disallowed"
                 )
             read_only = perm == Permission.READ
             args.append(FutureStoreArg(store, read_only, store.has_storage))
+            #args.append(FutureStoreArg(store, perm, store.has_storage))
 
         else:
             region = store.storage.region
             field_id = store.storage.field.field_id
-
             req = RegionReq(region, perm, proj, tag, flags)
 
             self._req_analyzer.insert(req, field_id)
@@ -600,6 +605,12 @@ def add_output(self, store, proj, tag=0, flags=0):
             self._outputs, store, proj, Permission.WRITE, tag, flags
         )
 
+    # currently this is adding to outputs but we can have a seperate "temps" array in the core
+    def add_temp(self, store, proj, tag=0, flags=0):
+        self.add_store(
+            self._outputs, store, proj, Permission.WRITE, tag, flags
+        )
+
     def add_reduction(self, store, proj, tag=0, flags=0, read_write=False):
         if read_write and store.kind is not Future:
             self.add_store(
@@ -643,21 +654,40 @@ def set_sharding_space(self, space):
     def set_point(self, point):
         self._point = point
 
+    def add_fusion_metadata(self, is_fused, fusion_metadata):
+        self._is_fused = is_fused
+        self._fusion_metadata = fusion_metadata
+
     @staticmethod
     def pack_args(argbuf, args):
         argbuf.pack_32bit_uint(len(args))
         for arg in args:
             arg.pack(argbuf)
 
+
+    @staticmethod
+    def pack_fusion_metadata(argbuf, is_fused, fusion_metadata):
+        argbuf.pack_bool(is_fused)
+        if is_fused:
+            fusion_metadata.pack(argbuf)
+
+
     def build_task(self, launch_domain, argbuf):
         self._req_analyzer.analyze_requirements()
+        #print("building task id", self._task_id)
+        #for req in self._req_analyzer._requirements:
+        #    print(req)
+        #    print(req[0].__dict__)
+        #    print()
         self._out_analyzer.analyze_requirements()
 
+        #pack fusion metadata
+        self.pack_fusion_metadata(argbuf, self._is_fused, self._fusion_metadata)        
+
         self.pack_args(argbuf, self._inputs)
         self.pack_args(argbuf, self._outputs)
         self.pack_args(argbuf, self._reductions)
         self.pack_args(argbuf, self._scalars)
-
         task = IndexTask(
             self.legion_task_id,
             launch_domain,
@@ -683,6 +713,9 @@ def build_task(self, launch_domain, argbuf):
     def build_single_task(self, argbuf):
         self._req_analyzer.analyze_requirements()
         self._out_analyzer.analyze_requirements()
+
+        #pack fusion metadata
+        self.pack_fusion_metadata(argbuf, self._is_fused, self._fusion_metadata)        
 
         self.pack_args(argbuf, self._inputs)
         self.pack_args(argbuf, self._outputs)

diff --git a/legate/core/legion.py b/legate/core/legion.py
@@ -157,6 +157,7 @@ def legate_task_postamble(runtime, context):
 # This is a decorator for wrapping the launch method on launchers
 # to dispatch any unordered deletions while the task is live
 def dispatch(func):
+    #print("dispatching")
     def launch(launcher, runtime, context, *args):
         # This context should always be in the dictionary
         legate_task_progress(runtime, context)
@@ -4859,6 +4860,12 @@ def pack_32bit_int(self, arg):
         self.size += 4
         self.add_arg(arg, legion.LEGION_TYPE_INT32)
 
+    def pack_32bit_int_arr(self, arg):
+        self.fmt.append(str(len(arg))+"i")
+        size = len(arg)
+        self.size += 4*size
+        self.args += arg
+
     def pack_64bit_int(self, arg):
         self.fmt.append("q")
         self.size += 8
@@ -5043,7 +5050,7 @@ def pack_dtype(self, dtype):
     def get_string(self):
         if self.string is None or self.arglen != len(self.args):
             fmtstr = "".join(self.fmt)
-            assert len(fmtstr) == len(self.args) + 1
+            #assert len(fmtstr) == len(self.args) + 1
             self.string = struct.pack(fmtstr, *self.args)
             self.arglen = len(self.args)
         return self.string

diff --git a/legate/core/operation.py b/legate/core/operation.py
@@ -20,7 +20,11 @@
 from .legion import Future
 from .store import Store
 from .utils import OrderedSet
-
+from .legion import (
+    FieldSpace,
+    Future
+)
+
 
 class Operation(object):
     def __init__(self, context, mapper_id=0, op_id=0):
@@ -30,6 +34,9 @@ def __init__(self, context, mapper_id=0, op_id=0):
         self._inputs = []
         self._outputs = []
         self._reductions = []
+        self._is_fused = False
+        self._temps = []
+
         self._input_parts = []
         self._output_parts = []
         self._reduction_parts = []
@@ -116,6 +123,10 @@ def add_output(self, store, partition=None):
         self._outputs.append(store)
         self._output_parts.append(partition)
 
+    def add_temp(self, store):
+        self._check_store(store)
+        self._temps.append(store) #this may not be necessary
+
     def add_reduction(self, store, redop, partition=None):
         self._check_store(store)
         if store.kind is Future:
@@ -150,6 +161,7 @@ def execute(self):
 
     def get_tag(self, strategy, part):
         if strategy.is_key_part(part):
+            return 0
             return 1  # LEGATE_CORE_KEY_STORE_TAG
         else:
             return 0
@@ -174,12 +186,14 @@ def declare_partition(self, store, disjoint=True, complete=True):
         return sym
 
 
+
 class Task(Operation):
     def __init__(self, context, task_id, mapper_id=0, op_id=0):
         Operation.__init__(self, context, mapper_id=mapper_id, op_id=op_id)
         self._task_id = task_id
         self._scalar_args = []
         self._futures = []
+        self._fusion_metadata = None
 
     def get_name(self):
         libname = self.context.library.get_name()
@@ -195,14 +209,29 @@ def add_dtype_arg(self, dtype):
     def add_future(self, future):
         self._futures.append(future)
 
+    def add_fusion_metadata(self, fusion_metadata):
+        self._is_fused = True
+        self._fusion_metadata = fusion_metadata
+
     def launch(self, strategy):
         launcher = TaskLauncher(self.context, self._task_id, self.mapper_id)
 
-        for input, input_part in zip(self._inputs, self._input_parts):
+        if self._is_fused:
+            launcher.add_fusion_metadata(self._is_fused, self._fusion_metadata)
+        if  self._is_fused: #fused ops re-use encapsulated unfused partitions
+            input_parts = self._unfused_input_parts
+            output_parts = self._unfused_output_parts
+            reduction_parts = self._unfused_reduction_parts
+        else:
+            input_parts = self._input_parts
+            output_parts = self._output_parts
+            reduction_parts = self._reduction_parts
+
+        for input, input_part in zip(self._inputs, input_parts):
             proj = strategy.get_projection(input_part)
             tag = self.get_tag(strategy, input_part)
             launcher.add_input(input, proj, tag=tag)
-        for output, output_part in zip(self._outputs, self._output_parts):
+        for output, output_part in zip(self._outputs, output_parts):
             if output.unbound:
                 continue
             proj = strategy.get_projection(output_part)
@@ -212,7 +241,7 @@ def launch(self, strategy):
             # We update the key partition of a store only when it gets updated
             output.set_key_partition(partition)
         for ((reduction, redop), reduction_part) in zip(
-            self._reductions, self._reduction_parts
+            self._reductions, reduction_parts
         ):
             partition = strategy.get_partition(reduction_part)
             can_read_write = partition.is_disjoint_for(strategy, reduction)

diff --git a/legate/core/partition.py b/legate/core/partition.py
@@ -180,10 +180,8 @@ def construct(self, region, complete=False):
             transform = Transform(tile_shape.ndim, tile_shape.ndim)
             for idx, size in enumerate(tile_shape):
                 transform.trans[idx, idx] = size
-
             lo = Shape((0,) * tile_shape.ndim) + self._offset
             hi = self._tile_shape - 1 + self._offset
-
             extent = Rect(hi, lo, exclusive=False)
 
             color_space = self._runtime.find_or_create_index_space(