From f4ee5a243dbb31e6310e5632b1c87898b299df2c Mon Sep 17 00:00:00 2001 From: James Wu Date: Sun, 3 Nov 2024 15:31:29 -0800 Subject: [PATCH 001/503] Add PT2 Compile Events for triton and kernel compilation + load_by_key_path (#139402) Adds a few more dynamo_timed() to measure triton compilation and load_by_key_path times. In the case of async compilation with multiple threads, we'll generate a single `kernel_compile` event that occurs when waiting on all the parallel compiles to finish. In the case where async parallel compilation is disabled (or, compile threads are warming up), we'll generate a `triton_compile` event for each kernel. The `triton_compile` events is a bit questionable: do we need a row for each triton compile event? It might eat up on our already low retention, so I might just remove that. Will discuss with @slarsen. Differential Revision: [D65215707](https://our.internmc.facebook.com/intern/diff/D65215707/) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139402 Approved by: https://github.com/oulgen --- torch/_inductor/async_compile.py | 64 +++++++++++++++++--------------- torch/_inductor/codecache.py | 17 ++++++--- torch/_inductor/graph.py | 16 ++++---- 3 files changed, 54 insertions(+), 43 deletions(-) diff --git a/torch/_inductor/async_compile.py b/torch/_inductor/async_compile.py index c803e7690f9fc5..1a714f0f40f727 100644 --- a/torch/_inductor/async_compile.py +++ b/torch/_inductor/async_compile.py @@ -14,6 +14,7 @@ import torch from torch._dynamo.device_interface import get_registered_device_interfaces +from torch._dynamo.utils import dynamo_timed from torch._inductor import config from torch._inductor.codecache import ( CodeCacheFuture, @@ -286,36 +287,39 @@ def halide(self, meta: HalideMeta, source_code: str): return LambdaFuture(get_result) def wait(self, scope: Dict[str, Any]) -> None: - num_kernels = len( - [ - value - for key, value in scope.items() - if isinstance(value, (Future, CodeCacheFuture)) - ] - ) - pbar = tqdm( - total=num_kernels, - desc="Inductor Compilation", - disable=config.disable_progress, - delay=0, - ) - if get_compile_threads() > 1: - for key, result in scope.items(): - if config.verbose_progress and not isinstance(pbar, _Faketqdm): - pbar.set_postfix_str(key) - if isinstance(result, (Future, CodeCacheFuture)): - try: - scope[key] = result.result() - except BrokenProcessPool as e: - raise RuntimeError( - "A compilation subprocess exited unexpectedly. This " - "is likely due to a crash. To facilitate debugging, " - "you can re-run with TORCHINDUCTOR_COMPILE_THREADS=1 " - "to cause compilation to occur in the main process." - ) from e - pbar.update(1) - - _compile_end() + with dynamo_timed( + "async_compile.wait", log_pt2_compile_event=True, fwd_only=False + ): + num_kernels = len( + [ + value + for key, value in scope.items() + if isinstance(value, (Future, CodeCacheFuture)) + ] + ) + pbar = tqdm( + total=num_kernels, + desc="Inductor Compilation", + disable=config.disable_progress, + delay=0, + ) + if get_compile_threads() > 1: + for key, result in scope.items(): + if config.verbose_progress and not isinstance(pbar, _Faketqdm): + pbar.set_postfix_str(key) + if isinstance(result, (Future, CodeCacheFuture)): + try: + scope[key] = result.result() + except BrokenProcessPool as e: + raise RuntimeError( + "A compilation subprocess exited unexpectedly. This " + "is likely due to a crash. To facilitate debugging, " + "you can re-run with TORCHINDUCTOR_COMPILE_THREADS=1 " + "to cause compilation to occur in the main process." + ) from e + pbar.update(1) + + _compile_end() if ( diff --git a/torch/_inductor/codecache.py b/torch/_inductor/codecache.py index 39dc59c922dc6d..ee8a671a6016bc 100644 --- a/torch/_inductor/codecache.py +++ b/torch/_inductor/codecache.py @@ -1153,12 +1153,17 @@ def iterate_over_candidates() -> Generator[CompiledFxGraph, None, None]: AutotuneCacheBundler.begin_compile(inductor_meta, code=code) try: - graph.current_callable = PyCodeCache.load_by_key_path( - graph.cache_key, - artifact_path, - graph.cache_linemap, - graph.get_constants(gm), - ).call + with dynamo_timed( + "PyCodeCache.load_by_key_path", + log_pt2_compile_event=True, + fwd_only=False, + ): + graph.current_callable = PyCodeCache.load_by_key_path( + graph.cache_key, + artifact_path, + graph.cache_linemap, + graph.get_constants(gm), + ).call except OSError: # Not expected, but in case the PyCodeCache entry is removed from # underneath us, treat it as a cache miss and recompile. diff --git a/torch/_inductor/graph.py b/torch/_inductor/graph.py index 6d7915e39689a5..67c72fc2f3042b 100644 --- a/torch/_inductor/graph.py +++ b/torch/_inductor/graph.py @@ -1984,13 +1984,15 @@ def _compile_to_module(self) -> ModuleType: lambda: {"filename": path}, payload_fn=lambda: code, ) - - mod = PyCodeCache.load_by_key_path( - key, - path, - linemap=linemap, # type: ignore[arg-type] - attrs={**self.constants, **self.torchbind_constants}, - ) + with dynamo_timed( + "PyCodeCache.load_by_key_path", log_pt2_compile_event=True, fwd_only=False + ): + mod = PyCodeCache.load_by_key_path( + key, + path, + linemap=linemap, # type: ignore[arg-type] + attrs={**self.constants, **self.torchbind_constants}, + ) self.cache_key = key self.cache_path = path self.cache_linemap = linemap # type: ignore[assignment] From 3337439dc087b55337c8e4c1ed7f26ecf2fe6bee Mon Sep 17 00:00:00 2001 From: "Sun, Jiayi" Date: Tue, 29 Oct 2024 01:11:29 -0700 Subject: [PATCH 002/503] [inductor] modify the heuristic for disabling vectorization (#136422) Summary Since we have already implemented tail loop mask vectorization (https://github.com/pytorch/pytorch/pull/126526), I re-tuned the heuristics for disabling vectorization from performance perspective. I changed the heuristic to: when the total number of elements along the vec dim is less than `tiling_factor/4` and the number of operations is less than 10, we disable the vectorization. Pull Request resolved: https://github.com/pytorch/pytorch/pull/136422 Approved by: https://github.com/leslie-fang-intel, https://github.com/jgong5, https://github.com/jansel --- torch/_inductor/codegen/cpp.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/torch/_inductor/codegen/cpp.py b/torch/_inductor/codegen/cpp.py index 41fcaa86b303c8..a6e5475082b4fe 100644 --- a/torch/_inductor/codegen/cpp.py +++ b/torch/_inductor/codegen/cpp.py @@ -3374,15 +3374,14 @@ def _is_valid_indices( group[tiling_indices[0]], ] ) - and group[tiling_indices[0]] < tiling_factor / 2 + and group[tiling_indices[0]] < tiling_factor / 4 + and op_num < 10 ): - # For case of Multi Thread AMP Static shape of pyhpc_isoneutral_mixing, - # the inner loop range doesn't have enough elements to do vectorization - # explicitly and found that `#pragma GCC ivdep` has better performance than - # `#pragma omp simd simdlen(8)`. Disable vectorization for this case. - # Leslie: maybe we can always disable vectorization when loop range is less - # than tiling factor and enable `#pragma omp simd simdlen(8)` for scalar kernel - # when needed. + # We found that when the number of elements in the inner loop range is + # relatively small(< tiling_factor / 4) and the number of operations is + # not large(< 10), vectorization is not efficient. + # And found that `#pragma GCC ivdep` has better performance than + # `#pragma omp simd simdlen(8)` for these cases. return [], [] if dtype in DTYPE_LOWP_FP: @@ -3744,7 +3743,6 @@ def run(kernel): tail_loop.simd_vec = True else: tail_loop.set_kernel(scalar_kernel) - tail_loop.simd_omp = True # We chop the loop into two cubes by the nelements - main loop and tail loop. # Regarding the main loop, it is straightforward that it could be vectorized with # nelements. But for the tail loop, it still could be vectorized. For example, From 12d225d91c507cbc9be2e42c7758febf5ecf8f3f Mon Sep 17 00:00:00 2001 From: Bob Ren Date: Sun, 3 Nov 2024 20:08:31 -0800 Subject: [PATCH 003/503] add opaque unary sin and cos to SYMPY_INTERP (#139569) Fixes `PYTORCH_TEST_WITH_DYNAMO=1 python test/test_nn.py TestNNDeviceTypeCPU.test_affine_3d_rotateRandom_cpu` when specialize_float = False Pull Request resolved: https://github.com/pytorch/pytorch/pull/139569 Approved by: https://github.com/ezyang --- torch/fx/experimental/symbolic_shapes.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/torch/fx/experimental/symbolic_shapes.py b/torch/fx/experimental/symbolic_shapes.py index 5a22ba40a66fa7..646ba542c8bc16 100644 --- a/torch/fx/experimental/symbolic_shapes.py +++ b/torch/fx/experimental/symbolic_shapes.py @@ -1982,6 +1982,16 @@ def cast_symbool_to_symint_guardless( "IntTrueDiv": operator.truediv, "FloatTrueDiv": operator.truediv, "ToFloat": builtins.float, + "OpaqueUnaryFn_cos": math.cos, + "OpaqueUnaryFn_cosh": math.cosh, + "OpaqueUnaryFn_acos": math.acos, + "OpaqueUnaryFn_sin": math.sin, + "OpaqueUnaryFn_sinh": math.sinh, + "OpaqueUnaryFn_asin": math.asin, + "OpaqueUnaryFn_tan": math.tan, + "OpaqueUnaryFn_tanh": math.tanh, + "OpaqueUnaryFn_atan": math.atan, + "OpaqueUnaryFn_sqrt": math.sqrt, } From 2ce2e4df4e95c3a35a3c5a0d695736745c1470b6 Mon Sep 17 00:00:00 2001 From: PyTorch UpdateBot Date: Mon, 4 Nov 2024 11:49:06 +0000 Subject: [PATCH 004/503] Update slow tests (#139051) This PR is auto-generated weekly by [this action](https://github.com/pytorch/pytorch/blob/main/.github/workflows/weekly.yml). Update the list of slow tests. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139051 Approved by: https://github.com/pytorchbot --- test/slow_tests.json | 580 ++++++++++++++++++++++--------------------- 1 file changed, 299 insertions(+), 281 deletions(-) diff --git a/test/slow_tests.json b/test/slow_tests.json index 5ced052f14e15d..60c98499c07eb1 100644 --- a/test/slow_tests.json +++ b/test/slow_tests.json @@ -1,283 +1,301 @@ { - "test_AllenaiLongformerBase_repro_cpu (__main__.CpuHalideTests)": 217.70533333333333, - "test__adaptive_avg_pool2d (__main__.CPUReproTests)": 183.53220000000002, - "test_adaptive_max_pool2d1_cpu (__main__.CpuHalideTests)": 113.71199999999999, - "test_after_aot_cpu_runtime_error (__main__.MinifierIsolateTests)": 65.14765517241378, - "test_alexnet_prefix_cpu (__main__.CpuHalideTests)": 192.105, - "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 90.7365, - "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 152.649, - "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 102.364, - "test_aot_autograd_symbolic_module_exhaustive_nn_TransformerDecoderLayer_cpu_float32 (__main__.TestEagerFusionModuleInfoCPU)": 107.731, - "test_aot_export_joint_simple_repro_dynamic_shapes (__main__.DynamicShapesAotAutogradFallbackTests)": 521.2014, - "test_associative_scan_dim_reverse_False_combine_mode_generic_cpu (__main__.TestControlFlow)": 71.284, - "test_associative_scan_dim_reverse_True_combine_mode_generic_cpu (__main__.TestControlFlow)": 72.0559090909091, - "test_avg_pool3d_backward2_cpu (__main__.CpuTests)": 482.14825423728814, - "test_avg_pool3d_backward2_cuda (__main__.GPUTests)": 91.70333333333333, - "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 507.2, - "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 504.7608, - "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 63.166666666666664, - "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 124.896, - "test_avg_pool3d_backward_cpu (__main__.CpuHalideTests)": 62.757666666666665, - "test_backward_nn_functional_multi_head_attention_forward_cpu_float32 (__main__.TestCompositeComplianceCPU)": 76.14750000000001, - "test_backward_nn_functional_multi_head_attention_forward_cuda_float32 (__main__.TestCompositeComplianceCUDA)": 60.264, - "test_basic_cpu (__main__.EfficientConvBNEvalCpuTests)": 278.2862, - "test_basic_cuda (__main__.EfficientConvBNEvalGpuTests)": 111.48954545454545, - "test_builtin_equivalent_funcs (__main__.TorchFunctionModeTests)": 108.13043478260867, - "test_captured_score_mod_aot_eager_gradcheck_score_mod_name__head_offset_mode_eager (__main__.TestFlexAttention)": 163.244, - "test_collect_callgrind (__main__.TestBenchmarkUtils)": 378.20866666666666, - "test_comprehensive_constant_pad_nd_cpu_float16 (__main__.TestInductorOpInfoCPU)": 80.16999999999999, - "test_comprehensive_constant_pad_nd_cpu_float32 (__main__.TestInductorOpInfoCPU)": 70.06291666666665, - "test_comprehensive_constant_pad_nd_cpu_float64 (__main__.TestInductorOpInfoCPU)": 69.58566666666667, - "test_comprehensive_constant_pad_nd_cpu_int32 (__main__.TestInductorOpInfoCPU)": 69.70495833333334, - "test_comprehensive_constant_pad_nd_cpu_int64 (__main__.TestInductorOpInfoCPU)": 69.52449999999999, - "test_comprehensive_diff_cpu_bool (__main__.TestInductorOpInfoCPU)": 118.6565, - "test_comprehensive_diff_cpu_float32 (__main__.TestInductorOpInfoCPU)": 122.9565, - "test_comprehensive_diff_cpu_float64 (__main__.TestInductorOpInfoCPU)": 114.0035, - "test_comprehensive_diff_cpu_int32 (__main__.TestInductorOpInfoCPU)": 112.271, - "test_comprehensive_diff_cpu_int64 (__main__.TestInductorOpInfoCPU)": 113.428, - "test_comprehensive_diff_cuda_complex128 (__main__.TestDecompCUDA)": 73.43875, - "test_comprehensive_diff_cuda_complex64 (__main__.TestDecompCUDA)": 75.05725, - "test_comprehensive_dist_cpu_float16 (__main__.TestInductorOpInfoCPU)": 95.8675, - "test_comprehensive_dist_cpu_float32 (__main__.TestInductorOpInfoCPU)": 91.992, - "test_comprehensive_dist_cpu_float64 (__main__.TestInductorOpInfoCPU)": 92.976, - "test_comprehensive_eye_cpu_bool (__main__.TestInductorOpInfoCPU)": 143.73000000000002, - "test_comprehensive_eye_cpu_float16 (__main__.TestInductorOpInfoCPU)": 138.624, - "test_comprehensive_eye_cpu_float32 (__main__.TestInductorOpInfoCPU)": 139.755, - "test_comprehensive_eye_cpu_float64 (__main__.TestInductorOpInfoCPU)": 147.81799999999998, - "test_comprehensive_eye_cpu_int32 (__main__.TestInductorOpInfoCPU)": 140.828, - "test_comprehensive_eye_cpu_int64 (__main__.TestInductorOpInfoCPU)": 143.93099999999998, - "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestDecompCPU)": 341.48900000000003, - "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 86.49000000000001, - "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestDecompCPU)": 323.1645, - "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 85.9655, - "test_comprehensive_grid_sampler_2d_cuda_bfloat16 (__main__.TestDecompCUDA)": 215.22800000000004, - "test_comprehensive_grid_sampler_2d_cuda_float16 (__main__.TestDecompCUDA)": 201.79633333333334, - "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestDecompCUDA)": 756.4825, - "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestDecompCUDA)": 856.9263333333333, - "test_comprehensive_linalg_vector_norm_cpu_float16 (__main__.TestInductorOpInfoCPU)": 219.004, - "test_comprehensive_linalg_vector_norm_cpu_float32 (__main__.TestInductorOpInfoCPU)": 227.63799999999998, - "test_comprehensive_linalg_vector_norm_cpu_float64 (__main__.TestInductorOpInfoCPU)": 223.42000000000002, - "test_comprehensive_logspace_cpu_float32 (__main__.TestInductorOpInfoCPU)": 474.6385, - "test_comprehensive_logspace_cpu_float64 (__main__.TestInductorOpInfoCPU)": 496.866, - "test_comprehensive_logspace_cpu_int32 (__main__.TestInductorOpInfoCPU)": 459.975, - "test_comprehensive_logspace_cpu_int64 (__main__.TestInductorOpInfoCPU)": 457.97450000000003, - "test_comprehensive_masked_amax_cpu_float16 (__main__.TestInductorOpInfoCPU)": 107.4735, - "test_comprehensive_masked_amax_cpu_float32 (__main__.TestInductorOpInfoCPU)": 106.3655, - "test_comprehensive_masked_amax_cpu_float64 (__main__.TestInductorOpInfoCPU)": 112.69399999999999, - "test_comprehensive_masked_amax_cpu_int32 (__main__.TestInductorOpInfoCPU)": 105.007, - "test_comprehensive_masked_amax_cpu_int64 (__main__.TestInductorOpInfoCPU)": 100.816, - "test_comprehensive_masked_amin_cpu_float16 (__main__.TestInductorOpInfoCPU)": 106.1785, - "test_comprehensive_masked_amin_cpu_float32 (__main__.TestInductorOpInfoCPU)": 106.233, - "test_comprehensive_masked_amin_cpu_float64 (__main__.TestInductorOpInfoCPU)": 106.112, - "test_comprehensive_masked_amin_cpu_int32 (__main__.TestInductorOpInfoCPU)": 101.783, - "test_comprehensive_masked_amin_cpu_int64 (__main__.TestInductorOpInfoCPU)": 102.14850000000001, - "test_comprehensive_masked_mean_cpu_bool (__main__.TestInductorOpInfoCPU)": 103.827, - "test_comprehensive_masked_mean_cpu_float16 (__main__.TestInductorOpInfoCPU)": 105.78999999999999, - "test_comprehensive_masked_mean_cpu_float32 (__main__.TestInductorOpInfoCPU)": 104.21549999999999, - "test_comprehensive_masked_mean_cpu_float64 (__main__.TestInductorOpInfoCPU)": 110.54400000000001, - "test_comprehensive_masked_mean_cpu_int32 (__main__.TestInductorOpInfoCPU)": 107.743, - "test_comprehensive_masked_mean_cpu_int64 (__main__.TestInductorOpInfoCPU)": 102.5795, - "test_comprehensive_masked_norm_cpu_float16 (__main__.TestInductorOpInfoCPU)": 520.4745, - "test_comprehensive_masked_norm_cpu_float32 (__main__.TestInductorOpInfoCPU)": 526.9034999999999, - "test_comprehensive_masked_norm_cpu_float64 (__main__.TestInductorOpInfoCPU)": 526.642, - "test_comprehensive_masked_norm_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 120.993, - "test_comprehensive_masked_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 117.78525, - "test_comprehensive_masked_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 106.073, - "test_comprehensive_masked_prod_cpu_bool (__main__.TestInductorOpInfoCPU)": 99.75399999999999, - "test_comprehensive_masked_prod_cpu_float16 (__main__.TestInductorOpInfoCPU)": 107.64099999999999, - "test_comprehensive_masked_prod_cpu_float32 (__main__.TestInductorOpInfoCPU)": 106.8455, - "test_comprehensive_masked_prod_cpu_float64 (__main__.TestInductorOpInfoCPU)": 107.0445, - "test_comprehensive_masked_prod_cpu_int32 (__main__.TestInductorOpInfoCPU)": 106.7095, - "test_comprehensive_masked_prod_cpu_int64 (__main__.TestInductorOpInfoCPU)": 98.7585, - "test_comprehensive_masked_sum_cpu_bool (__main__.TestInductorOpInfoCPU)": 103.51249999999999, - "test_comprehensive_masked_sum_cpu_float16 (__main__.TestInductorOpInfoCPU)": 103.95750000000001, - "test_comprehensive_masked_sum_cpu_float32 (__main__.TestInductorOpInfoCPU)": 102.953, - "test_comprehensive_masked_sum_cpu_float64 (__main__.TestInductorOpInfoCPU)": 106.582, - "test_comprehensive_masked_sum_cpu_int32 (__main__.TestInductorOpInfoCPU)": 104.2175, - "test_comprehensive_masked_sum_cpu_int64 (__main__.TestInductorOpInfoCPU)": 96.6305, - "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestDecompCUDA)": 84.78766666666667, - "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float64 (__main__.TestDecompCUDA)": 89.57775, - "test_comprehensive_nn_functional_glu_cpu_float16 (__main__.TestInductorOpInfoCPU)": 79.2515, - "test_comprehensive_nn_functional_glu_cpu_float32 (__main__.TestInductorOpInfoCPU)": 79.8335, - "test_comprehensive_nn_functional_glu_cpu_float64 (__main__.TestInductorOpInfoCPU)": 80.49, - "test_comprehensive_nn_functional_grid_sample_cpu_float32 (__main__.TestDecompCPU)": 88.69, - "test_comprehensive_nn_functional_grid_sample_cpu_float64 (__main__.TestDecompCPU)": 80.43, - "test_comprehensive_nn_functional_grid_sample_cuda_float32 (__main__.TestDecompCUDA)": 183.62233333333333, - "test_comprehensive_nn_functional_grid_sample_cuda_float64 (__main__.TestDecompCUDA)": 228.25766666666667, - "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float32 (__main__.TestDecompCUDA)": 63.317666666666675, - "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float64 (__main__.TestDecompCUDA)": 64.50699999999999, - "test_comprehensive_nn_functional_max_pool1d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 174.4395, - "test_comprehensive_nn_functional_max_pool1d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 164.878, - "test_comprehensive_nn_functional_max_pool1d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 165.157, - "test_comprehensive_nn_functional_max_pool2d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 939.275, - "test_comprehensive_nn_functional_max_pool2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 873.8385, - "test_comprehensive_nn_functional_max_pool2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 869.2495, - "test_comprehensive_nn_functional_max_pool2d_cpu_int32 (__main__.TestInductorOpInfoCPU)": 798.8875, - "test_comprehensive_nn_functional_max_pool2d_cpu_int64 (__main__.TestInductorOpInfoCPU)": 835.7080000000001, - "test_comprehensive_nn_functional_max_pool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 719.5625, - "test_comprehensive_nn_functional_max_pool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 722.98475, - "test_comprehensive_nn_functional_max_pool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 710.978, - "test_comprehensive_nn_functional_max_unpool2d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 194.88083333333336, - "test_comprehensive_nn_functional_max_unpool2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 196.07629166666663, - "test_comprehensive_nn_functional_max_unpool2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 194.2515, - "test_comprehensive_nn_functional_max_unpool3d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 128.8409583333333, - "test_comprehensive_nn_functional_max_unpool3d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 126.98766666666666, - "test_comprehensive_nn_functional_max_unpool3d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 127.97137500000001, - "test_comprehensive_nn_functional_pad_constant_cpu_float16 (__main__.TestInductorOpInfoCPU)": 78.40700000000001, - "test_comprehensive_nn_functional_pad_constant_cpu_float32 (__main__.TestInductorOpInfoCPU)": 77.36099999999999, - "test_comprehensive_nn_functional_pad_constant_cpu_float64 (__main__.TestInductorOpInfoCPU)": 80.1695, - "test_comprehensive_nn_functional_pad_constant_cpu_int32 (__main__.TestInductorOpInfoCPU)": 69.87650000000001, - "test_comprehensive_nn_functional_pad_constant_cpu_int64 (__main__.TestInductorOpInfoCPU)": 70.05720833333334, - "test_comprehensive_nn_functional_poisson_nll_loss_cpu_float16 (__main__.TestInductorOpInfoCPU)": 127.6425, - "test_comprehensive_nn_functional_poisson_nll_loss_cpu_float32 (__main__.TestInductorOpInfoCPU)": 129.609, - "test_comprehensive_nn_functional_poisson_nll_loss_cpu_float64 (__main__.TestInductorOpInfoCPU)": 136.7715, - "test_comprehensive_nn_functional_poisson_nll_loss_cpu_int32 (__main__.TestInductorOpInfoCPU)": 137.901, - "test_comprehensive_nn_functional_poisson_nll_loss_cpu_int64 (__main__.TestInductorOpInfoCPU)": 126.132, - "test_comprehensive_nn_functional_unfold_cpu_bool (__main__.TestInductorOpInfoCPU)": 123.48891666666667, - "test_comprehensive_nn_functional_unfold_cpu_float16 (__main__.TestInductorOpInfoCPU)": 261.71, - "test_comprehensive_nn_functional_unfold_cpu_float32 (__main__.TestInductorOpInfoCPU)": 273.28200000000004, - "test_comprehensive_nn_functional_unfold_cpu_float64 (__main__.TestInductorOpInfoCPU)": 266.879, - "test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 101.85466666666666, - "test_comprehensive_ormqr_cuda_complex64 (__main__.TestDecompCUDA)": 86.66175000000001, - "test_comprehensive_ormqr_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 83.36566666666666, - "test_comprehensive_ormqr_cuda_float64 (__main__.TestDecompCUDA)": 60.800333333333334, - "test_comprehensive_svd_cuda_complex64 (__main__.TestDecompCUDA)": 68.02033333333334, - "test_cond_autograd_nested (__main__.TestControlFlow)": 144.61216666666667, - "test_constructor_autograd_SparseBSC_cuda (__main__.TestSparseAnyCUDA)": 104.74549999999999, - "test_constructor_autograd_SparseBSR_cuda (__main__.TestSparseAnyCUDA)": 120.5435, - "test_constructor_autograd_SparseCSC_cuda (__main__.TestSparseAnyCUDA)": 68.446, - "test_constructor_autograd_SparseCSR_cuda (__main__.TestSparseAnyCUDA)": 81.96675, - "test_conv1d_basic (__main__.TestXNNPACKConv1dTransformPass)": 142.8464, - "test_conv1d_with_relu_fc (__main__.TestXNNPACKConv1dTransformPass)": 584.9645, - "test_conv2d_unary_cpu_cpp_wrapper (__main__.TestCppWrapper)": 275.89, - "test_conv_bn_fuse_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 60.4926, - "test_correctness_NAdam_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 63.9085, - "test_count_nonzero_all (__main__.TestBool)": 585.3546, - "test_cusparse_multiple_threads_same_device (__main__.TestCuda)": 108.34033333333333, - "test_custom_module_lstm (__main__.TestQuantizedOps)": 311.147, - "test_dispatch_symbolic_meta_outplace_all_strides_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestMetaCUDA)": 83.78333333333335, - "test_dtensor_op_db_nn_functional_gaussian_nll_loss_cpu_float32 (__main__.TestDTensorOpsCPU)": 88.53099999999999, - "test_eig_check_magma_cuda_float32 (__main__.TestLinalgCUDA)": 253.42925, - "test_fail_creation_ops.py (__main__.TestTyping)": 60.42466666666667, - "test_fn_fwgrad_bwgrad_cumprod_cuda_complex128 (__main__.TestFwdGradientsCUDA)": 73.053, - "test_fn_fwgrad_bwgrad_nn_functional_scaled_dot_product_attention_cuda_float64 (__main__.TestFwdGradientsCUDA)": 64.3565294117647, - "test_fn_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 135.923, - "test_fn_gradgrad_map_nested_cpu_float64 (__main__.TestBwdGradientsCPU)": 91.0015, - "test_fn_gradgrad_map_triple_nested_cpu_float64 (__main__.TestBwdGradientsCPU)": 520.559, - "test_fn_gradgrad_map_triple_nested_cuda_float64 (__main__.TestBwdGradientsCUDA)": 355.68533333333335, - "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 79.11880000000001, - "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 77.26866666666666, - "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 72.283, - "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 63.989, - "test_grad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 97.49375, - "test_gradgrad_nn_LSTM_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 92.881, - "test_gradgrad_nn_LSTM_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 89.92000000000002, - "test_gradgrad_nn_TransformerDecoderLayer_cuda_float64 (__main__.TestModuleCUDA)": 215.37775, - "test_gradgrad_nn_TransformerEncoder_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 141.02475, - "test_gradgrad_nn_TransformerEncoder_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 144.87225, - "test_grid_sampler_2d_cpu (__main__.CpuHalideTests)": 187.98233333333334, - "test_hessian_argnums_dynamic_shapes (__main__.DynamicShapesFuncTorchHigherOrderOpTests)": 243.68739024390243, - "test_indexing (__main__.TestAutogradWithCompiledAutograd)": 66.53580487804876, - "test_indirect_device_assert (__main__.TritonCodeGenTests)": 178.977625, - "test_inplace_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 96.526, - "test_jacrev_two_tensors_argnums_dynamic_shapes (__main__.DynamicShapesFuncTorchHigherOrderOpTests)": 66.33568292682926, - "test_jit_cuda_archflags (__main__.TestCppExtensionJIT)": 117.10499999999999, - "test_linalg_solve_triangular_large_cuda_complex128 (__main__.TestLinalgCUDA)": 697.57475, - "test_linalg_solve_triangular_large_cuda_complex64 (__main__.TestLinalgCUDA)": 83.33125000000001, - "test_linalg_solve_triangular_large_cuda_float64 (__main__.TestLinalgCUDA)": 73.11175, - "test_linear (__main__.TestStaticQuantizedModule)": 83.46508771929825, - "test_linear_binary_cpp_wrapper (__main__.TestCppWrapper)": 167.69299999999998, - "test_linear_binary_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 174.84699999999998, - "test_linear_packed_cpp_wrapper (__main__.TestCppWrapper)": 78.657, - "test_linear_packed_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 80.5985, - "test_linear_relu (__main__.TestStaticQuantizedModule)": 67.61533333333334, - "test_lstm_cpu (__main__.TestMkldnnCPU)": 62.057500000000005, - "test_matmul_small_brute_force_tunableop_cuda_float16 (__main__.TestLinalgCUDA)": 84.33282352941175, - "test_max_autotune_cutlass_backend_addmm_dynamic_False_max_autotune_gemm_backends_ATen,Triton,CUTLASS (__main__.TestCutlassBackend)": 84.89699999999999, - "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 62.57302439024391, - "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 62.35275609756097, - "test_memory_format_operators_cpu (__main__.TestTorchDeviceTypeCPU)": 76.08051111111111, - "test_mixed_mm_exhaustive_dtypes (__main__.TestPatternMatcher)": 103.41881818181818, - "test_pipeline_order_flex_and_zero_bubble_ScheduleClass1 (__main__.TestSchedulePlan)": 71.24585714285715, - "test_proper_exit (__main__.TestDataLoader)": 188.92475, - "test_proper_exit (__main__.TestDataLoaderPersistentWorkers)": 172.64125, - "test_python_ref_executor__refs_special_zeta_executor_aten_cuda_float64 (__main__.TestCommonCUDA)": 69.83999999999999, - "test_qat_conv2d_unary (__main__.TestQuantizePT2EX86Inductor)": 146.72099999999998, - "test_qat_conv_bn_fusion_no_conv_bias (__main__.TestQuantizePT2EQAT_ConvBn1d)": 63.105317460317494, - "test_qat_conv_bn_fusion_no_conv_bias (__main__.TestQuantizePT2EQAT_ConvBn2d)": 62.03507936507937, - "test_qat_mobilenet_v2 (__main__.TestQuantizePT2EQATModels)": 72.86783333333334, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 110.89726666666668, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 111.5624, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 111.90106666666665, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 113.40520000000001, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 111.72566666666668, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 112.32593333333335, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 112.38706666666667, - "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 114.79066666666668, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 115.27933333333333, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 113.48193333333333, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 112.9768, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 113.761, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 112.22186666666668, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 112.23886666666668, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 112.1882, - "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 115.28106666666667, - "test_quick_core_backward__unsafe_masked_index_cpu_float64 (__main__.TestDecompCPU)": 356.806, - "test_quick_core_backward__unsafe_masked_index_cuda_float64 (__main__.TestDecompCUDA)": 613.9933333333333, - "test_quick_core_backward__unsafe_masked_index_put_accumulate_cpu_float64 (__main__.TestDecompCPU)": 571.8885, - "test_quick_core_backward__unsafe_masked_index_put_accumulate_cuda_float64 (__main__.TestDecompCUDA)": 952.0746666666668, - "test_quick_core_backward_nn_functional_max_unpool3d_grad_cpu_float64 (__main__.TestDecompCPU)": 66.04319047619047, - "test_quick_core_backward_nn_functional_max_unpool3d_grad_cuda_float64 (__main__.TestDecompCUDA)": 251.12686666666664, - "test_quick_core_backward_roll_cpu_float64 (__main__.TestDecompCPU)": 89.99199999999999, - "test_quick_core_backward_roll_cuda_float64 (__main__.TestDecompCUDA)": 152.62966666666668, - "test_quick_core_backward_select_scatter_cuda_float64 (__main__.TestDecompCUDA)": 99.9515, - "test_quick_core_backward_split_with_sizes_copy_cpu_float64 (__main__.TestDecompCPU)": 73.9705, - "test_quick_core_backward_split_with_sizes_copy_cuda_float64 (__main__.TestDecompCUDA)": 126.93, - "test_quick_core_backward_std_cuda_float64 (__main__.TestDecompCUDA)": 79.00166666666667, - "test_replicatepad_64bit_indexing_cuda_float16 (__main__.TestNNDeviceTypeCUDA)": 320.14799999999997, - "test_retracibility_dict_container_inp_out_dynamic_shapes (__main__.DynamicShapesExportTests)": 1307.0030000000002, - "test_retracibility_nested_list_out_dynamic_shapes (__main__.DynamicShapesExportTests)": 1304.0381999999997, - "test_reveal_module_list.py (__main__.TestTyping)": 69.52092857142858, - "test_rosenbrock_sparse_with_lrsched_True_SGD_cuda_float64 (__main__.TestOptimRenewedCUDA)": 73.97947058823529, - "test_save_load_large_string_attribute (__main__.TestSaveLoad)": 116.828, - "test_sdpa_kernel_ctx_manager2_dynamic_shapes (__main__.DynamicShapesCtxManagerTests)": 384.3718536585367, - "test_shuffler_iterdatapipe (__main__.IntegrationTestDataLoaderDataPipe)": 124.52640000000001, - "test_slow_tasks (__main__.TestFunctionalAutogradBenchmark)": 143.84016666666668, - "test_sort_stable_cpu (__main__.CpuTritonTests)": 76.68900000000001, - "test_split_cumsum_cpu (__main__.CpuTritonTests)": 105.111, - "test_svd_lowrank_cuda_complex128 (__main__.TestLinalgCUDA)": 250.93199999999996, - "test_terminate_handler_on_crash (__main__.TestTorch)": 70.20433333333334, - "test_terminate_signal (__main__.ForkTest)": 94.89233333333334, - "test_terminate_signal (__main__.ParallelForkServerShouldWorkTest)": 138.68493220338985, - "test_terminate_signal (__main__.SpawnTest)": 97.77383333333334, - "test_transformer_backend_inductor_fullgraph_True (__main__.TestFullyShardCompile)": 113.80291666666669, - "test_transpose_copy (__main__.CPUReproTests)": 67.3468, - "test_triton_bsr_scatter_mm_blocksize_64_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 95.54525, - "test_triton_bsr_scatter_mm_blocksize_64_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 63.90972727272728, - "test_triton_bsr_scatter_mm_blocksize_64_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 108.88275, - "test_triton_bsr_softmax_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 126.33936363636363, - "test_triton_bsr_softmax_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 121.914, - "test_triton_bsr_softmax_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 97.888, - "test_triton_scaled_dot_product_attention_block_size_16_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 74.247, - "test_triton_scaled_dot_product_attention_block_size_16_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 73.68725, - "test_triton_scaled_dot_product_attention_block_size_32_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 72.97200000000001, - "test_triton_scaled_dot_product_attention_block_size_64_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 73.30687499999999, - "test_unary_ops (__main__.TestTEFuserDynamic)": 195.76619999999997, - "test_unary_ops (__main__.TestTEFuserStatic)": 161.1238, - "test_upsample_bicubic2d_cpu (__main__.CpuHalideTests)": 95.887, - "test_variant_consistency_jit_nn_functional_max_pool2d_cpu_float32 (__main__.TestJitCPU)": 94.2415, - "test_variant_consistency_jit_nn_functional_max_pool2d_cuda_float32 (__main__.TestJitCUDA)": 79.27025, - "test_vec_bitwise (__main__.CPUReproTests)": 67.08171428571428, - "test_vmapjvpvjp_linalg_lu_solve_cuda_float32 (__main__.TestOperatorsCUDA)": 65.91033333333333, - "test_vmapjvpvjp_max_pool2d_with_indices_backward_cpu_float32 (__main__.TestOperatorsCPU)": 64.7045, - "test_vmapjvpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 67.02433333333333, - "test_vmapjvpvjp_nn_functional_conv2d_cpu_float32 (__main__.TestOperatorsCPU)": 60.5145, - "test_vmapjvpvjp_nn_functional_max_pool2d_cpu_float32 (__main__.TestOperatorsCPU)": 69.64450000000001, - "test_vmapjvpvjp_nn_functional_max_pool2d_cuda_float32 (__main__.TestOperatorsCUDA)": 70.06475, - "test_vmapjvpvjp_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 63.21600000000001, - "test_vmapjvpvjp_unbind_cpu_float32 (__main__.TestOperatorsCPU)": 66.5395, - "test_vmapjvpvjp_unbind_cuda_float32 (__main__.TestOperatorsCUDA)": 78.33349999999999, - "test_vmapvjpvjp_meshgrid_list_of_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 68.875, - "test_vmapvjpvjp_meshgrid_variadic_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 92.238, - "test_vmapvjpvjp_nn_functional_bilinear_cuda_float32 (__main__.TestOperatorsCUDA)": 125.17075 + "test_AllenaiLongformerBase_repro_cpu (__main__.CpuHalideTests)": 217.4143320719401, + "test__adaptive_avg_pool2d (__main__.CPUReproTests)": 166.39100392659506, + "test_adaptive_max_pool2d1_cpu (__main__.CpuHalideTests)": 114.1923344930013, + "test_after_aot_cpu_runtime_error (__main__.MinifierIsolateTests)": 63.9750010172526, + "test_alexnet_prefix_cpu (__main__.CpuHalideTests)": 192.23033142089844, + "test_aot_autograd_symbolic_exhaustive_linalg_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 61.99166671435038, + "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool1d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 69.81999969482422, + "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool2d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 124.89299774169922, + "test_aot_autograd_symbolic_exhaustive_nn_functional_max_pool3d_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 76.3479995727539, + "test_aot_autograd_symbolic_exhaustive_svd_cpu_float32 (__main__.TestEagerFusionOpInfoCPU)": 63.36962493260702, + "test_aot_autograd_symbolic_module_exhaustive_nn_TransformerDecoderLayer_cpu_float32 (__main__.TestEagerFusionModuleInfoCPU)": 81.5479965209961, + "test_associative_scan_dim_reverse_False_combine_mode_generic_cpu (__main__.TestControlFlow)": 67.8025016784668, + "test_associative_scan_dim_reverse_True_combine_mode_generic_cpu (__main__.TestControlFlow)": 66.13800048828125, + "test_avg_pool3d_backward2_cpu (__main__.CpuTests)": 478.62633260091144, + "test_avg_pool3d_backward2_cuda (__main__.GPUTests)": 93.62950134277344, + "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 506.30767822265625, + "test_avg_pool3d_backward2_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 491.98000081380206, + "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 64.00250053405762, + "test_avg_pool3d_backward2_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 121.60200500488281, + "test_avg_pool3d_backward_cpu (__main__.CpuHalideTests)": 61.75266647338867, + "test_backward_nn_functional_multi_head_attention_forward_cpu_float32 (__main__.TestCompositeComplianceCPU)": 78.88500213623047, + "test_backward_nn_functional_multi_head_attention_forward_cuda_float32 (__main__.TestCompositeComplianceCUDA)": 86.18000030517578, + "test_basic_cpu (__main__.EfficientConvBNEvalCpuTests)": 258.5509999593099, + "test_basic_cuda (__main__.EfficientConvBNEvalGpuTests)": 185.53849411010742, + "test_builtin_equivalent_funcs (__main__.TorchFunctionModeTests)": 106.2084831730012, + "test_captured_score_mod_aot_eager_gradcheck_score_mod_name__head_offset_mode_eager (__main__.TestFlexAttention)": 168.5279998779297, + "test_checkpoint_cast (__main__.TestFxToOnnx)": 367.0326639811198, + "test_collect_callgrind (__main__.TestBenchmarkUtils)": 445.89332071940106, + "test_comprehensive_constant_pad_nd_cpu_float16 (__main__.TestInductorOpInfoCPU)": 77.9749984741211, + "test_comprehensive_constant_pad_nd_cpu_float32 (__main__.TestInductorOpInfoCPU)": 79.8239974975586, + "test_comprehensive_constant_pad_nd_cpu_float64 (__main__.TestInductorOpInfoCPU)": 85.11900329589844, + "test_comprehensive_constant_pad_nd_cpu_int32 (__main__.TestInductorOpInfoCPU)": 76.80500030517578, + "test_comprehensive_constant_pad_nd_cpu_int64 (__main__.TestInductorOpInfoCPU)": 85.15299987792969, + "test_comprehensive_diff_cpu_bool (__main__.TestInductorOpInfoCPU)": 120.29299926757812, + "test_comprehensive_diff_cpu_float32 (__main__.TestInductorOpInfoCPU)": 124.5790023803711, + "test_comprehensive_diff_cpu_float64 (__main__.TestInductorOpInfoCPU)": 117.91300201416016, + "test_comprehensive_diff_cpu_int32 (__main__.TestInductorOpInfoCPU)": 114.56999969482422, + "test_comprehensive_diff_cpu_int64 (__main__.TestInductorOpInfoCPU)": 112.50800323486328, + "test_comprehensive_diff_cuda_complex128 (__main__.TestDecompCUDA)": 84.26350212097168, + "test_comprehensive_diff_cuda_complex64 (__main__.TestDecompCUDA)": 78.46549987792969, + "test_comprehensive_diff_cuda_float32 (__main__.TestDecompCUDA)": 67.1016674041748, + "test_comprehensive_diff_cuda_float64 (__main__.TestDecompCUDA)": 65.60141595204671, + "test_comprehensive_dist_cpu_float16 (__main__.TestInductorOpInfoCPU)": 92.52999877929688, + "test_comprehensive_dist_cpu_float32 (__main__.TestInductorOpInfoCPU)": 97.3219985961914, + "test_comprehensive_dist_cpu_float64 (__main__.TestInductorOpInfoCPU)": 95.83000183105469, + "test_comprehensive_eye_cpu_bool (__main__.TestInductorOpInfoCPU)": 144.24400329589844, + "test_comprehensive_eye_cpu_float16 (__main__.TestInductorOpInfoCPU)": 144.09800720214844, + "test_comprehensive_eye_cpu_float32 (__main__.TestInductorOpInfoCPU)": 140.6179962158203, + "test_comprehensive_eye_cpu_float64 (__main__.TestInductorOpInfoCPU)": 147.72799682617188, + "test_comprehensive_eye_cpu_int32 (__main__.TestInductorOpInfoCPU)": 148.1300048828125, + "test_comprehensive_eye_cpu_int64 (__main__.TestInductorOpInfoCPU)": 140.44900512695312, + "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestDecompCPU)": 336.2829895019531, + "test_comprehensive_grid_sampler_2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 79.47200012207031, + "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestDecompCPU)": 332.3320007324219, + "test_comprehensive_grid_sampler_2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 87.55799865722656, + "test_comprehensive_grid_sampler_2d_cuda_bfloat16 (__main__.TestDecompCUDA)": 198.34749603271484, + "test_comprehensive_grid_sampler_2d_cuda_float16 (__main__.TestDecompCUDA)": 179.73450469970703, + "test_comprehensive_grid_sampler_2d_cuda_float32 (__main__.TestDecompCUDA)": 967.4775085449219, + "test_comprehensive_grid_sampler_2d_cuda_float64 (__main__.TestDecompCUDA)": 694.5654907226562, + "test_comprehensive_linalg_svd_cuda_complex128 (__main__.TestDecompCUDA)": 66.58891677856445, + "test_comprehensive_linalg_svd_cuda_complex64 (__main__.TestDecompCUDA)": 65.6439167658488, + "test_comprehensive_linalg_vector_norm_cpu_float16 (__main__.TestInductorOpInfoCPU)": 227.33700561523438, + "test_comprehensive_linalg_vector_norm_cpu_float32 (__main__.TestInductorOpInfoCPU)": 216.9149932861328, + "test_comprehensive_linalg_vector_norm_cpu_float64 (__main__.TestInductorOpInfoCPU)": 232.6009979248047, + "test_comprehensive_logspace_cpu_float32 (__main__.TestInductorOpInfoCPU)": 481.40899658203125, + "test_comprehensive_logspace_cpu_float64 (__main__.TestInductorOpInfoCPU)": 471.8290100097656, + "test_comprehensive_logspace_cpu_int32 (__main__.TestInductorOpInfoCPU)": 486.5690002441406, + "test_comprehensive_logspace_cpu_int64 (__main__.TestInductorOpInfoCPU)": 463.9100036621094, + "test_comprehensive_masked_amax_cpu_float16 (__main__.TestInductorOpInfoCPU)": 110.54100036621094, + "test_comprehensive_masked_amax_cpu_float32 (__main__.TestInductorOpInfoCPU)": 108.97200012207031, + "test_comprehensive_masked_amax_cpu_float64 (__main__.TestInductorOpInfoCPU)": 102.99299621582031, + "test_comprehensive_masked_amax_cpu_int32 (__main__.TestInductorOpInfoCPU)": 101.9540023803711, + "test_comprehensive_masked_amax_cpu_int64 (__main__.TestInductorOpInfoCPU)": 113.29900360107422, + "test_comprehensive_masked_amin_cpu_float16 (__main__.TestInductorOpInfoCPU)": 109.6259994506836, + "test_comprehensive_masked_amin_cpu_float32 (__main__.TestInductorOpInfoCPU)": 108.30999755859375, + "test_comprehensive_masked_amin_cpu_float64 (__main__.TestInductorOpInfoCPU)": 116.44100189208984, + "test_comprehensive_masked_amin_cpu_int32 (__main__.TestInductorOpInfoCPU)": 109.46900177001953, + "test_comprehensive_masked_amin_cpu_int64 (__main__.TestInductorOpInfoCPU)": 104.93399810791016, + "test_comprehensive_masked_mean_cpu_bool (__main__.TestInductorOpInfoCPU)": 114.0250015258789, + "test_comprehensive_masked_mean_cpu_float16 (__main__.TestInductorOpInfoCPU)": 101.22699737548828, + "test_comprehensive_masked_mean_cpu_float32 (__main__.TestInductorOpInfoCPU)": 107.9739990234375, + "test_comprehensive_masked_mean_cpu_float64 (__main__.TestInductorOpInfoCPU)": 99.99700164794922, + "test_comprehensive_masked_mean_cpu_int32 (__main__.TestInductorOpInfoCPU)": 101.21099853515625, + "test_comprehensive_masked_mean_cpu_int64 (__main__.TestInductorOpInfoCPU)": 101.0739974975586, + "test_comprehensive_masked_norm_cpu_float16 (__main__.TestInductorOpInfoCPU)": 520.8800048828125, + "test_comprehensive_masked_norm_cpu_float32 (__main__.TestInductorOpInfoCPU)": 519.7890014648438, + "test_comprehensive_masked_norm_cpu_float64 (__main__.TestInductorOpInfoCPU)": 537.2009887695312, + "test_comprehensive_masked_norm_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 133.04099655151367, + "test_comprehensive_masked_norm_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 152.52900314331055, + "test_comprehensive_masked_norm_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 152.47850036621094, + "test_comprehensive_masked_prod_cpu_bool (__main__.TestInductorOpInfoCPU)": 106.46099853515625, + "test_comprehensive_masked_prod_cpu_float16 (__main__.TestInductorOpInfoCPU)": 109.83699798583984, + "test_comprehensive_masked_prod_cpu_float32 (__main__.TestInductorOpInfoCPU)": 103.9739990234375, + "test_comprehensive_masked_prod_cpu_float64 (__main__.TestInductorOpInfoCPU)": 105.8479995727539, + "test_comprehensive_masked_prod_cpu_int32 (__main__.TestInductorOpInfoCPU)": 102.44999694824219, + "test_comprehensive_masked_prod_cpu_int64 (__main__.TestInductorOpInfoCPU)": 105.66799926757812, + "test_comprehensive_masked_sum_cpu_bool (__main__.TestInductorOpInfoCPU)": 106.78600311279297, + "test_comprehensive_masked_sum_cpu_float16 (__main__.TestInductorOpInfoCPU)": 105.28500366210938, + "test_comprehensive_masked_sum_cpu_float32 (__main__.TestInductorOpInfoCPU)": 103.47899627685547, + "test_comprehensive_masked_sum_cpu_float64 (__main__.TestInductorOpInfoCPU)": 103.16100311279297, + "test_comprehensive_masked_sum_cpu_int32 (__main__.TestInductorOpInfoCPU)": 109.84500122070312, + "test_comprehensive_masked_sum_cpu_int64 (__main__.TestInductorOpInfoCPU)": 101.93699645996094, + "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestDecompCUDA)": 81.2234992980957, + "test_comprehensive_nn_functional_gaussian_nll_loss_cuda_float64 (__main__.TestDecompCUDA)": 74.54800033569336, + "test_comprehensive_nn_functional_glu_cpu_float16 (__main__.TestInductorOpInfoCPU)": 81.26899719238281, + "test_comprehensive_nn_functional_glu_cpu_float32 (__main__.TestInductorOpInfoCPU)": 79.93000030517578, + "test_comprehensive_nn_functional_glu_cpu_float64 (__main__.TestInductorOpInfoCPU)": 81.04100036621094, + "test_comprehensive_nn_functional_grid_sample_cpu_float32 (__main__.TestDecompCPU)": 92.6050033569336, + "test_comprehensive_nn_functional_grid_sample_cpu_float64 (__main__.TestDecompCPU)": 104.4520034790039, + "test_comprehensive_nn_functional_grid_sample_cuda_bfloat16 (__main__.TestDecompCUDA)": 60.700416564941406, + "test_comprehensive_nn_functional_grid_sample_cuda_float32 (__main__.TestDecompCUDA)": 178.74700164794922, + "test_comprehensive_nn_functional_grid_sample_cuda_float64 (__main__.TestDecompCUDA)": 274.27099609375, + "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float32 (__main__.TestDecompCUDA)": 61.93400192260742, + "test_comprehensive_nn_functional_interpolate_trilinear_cuda_float64 (__main__.TestDecompCUDA)": 61.11149978637695, + "test_comprehensive_nn_functional_max_pool1d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 175.85699462890625, + "test_comprehensive_nn_functional_max_pool1d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 168.17300415039062, + "test_comprehensive_nn_functional_max_pool1d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 165.5489959716797, + "test_comprehensive_nn_functional_max_pool2d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 923.7789916992188, + "test_comprehensive_nn_functional_max_pool2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 884.5230102539062, + "test_comprehensive_nn_functional_max_pool2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 881.906982421875, + "test_comprehensive_nn_functional_max_pool2d_cpu_int32 (__main__.TestInductorOpInfoCPU)": 842.1710205078125, + "test_comprehensive_nn_functional_max_pool2d_cpu_int64 (__main__.TestInductorOpInfoCPU)": 848.0770263671875, + "test_comprehensive_nn_functional_max_pool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 752.0610046386719, + "test_comprehensive_nn_functional_max_pool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 753.4309997558594, + "test_comprehensive_nn_functional_max_pool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 838.5154724121094, + "test_comprehensive_nn_functional_max_unpool2d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 221.04100036621094, + "test_comprehensive_nn_functional_max_unpool2d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 234.07400512695312, + "test_comprehensive_nn_functional_max_unpool2d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 231.1929931640625, + "test_comprehensive_nn_functional_max_unpool2d_cuda_float16 (__main__.TestInductorOpInfoCUDA)": 78.63642120361328, + "test_comprehensive_nn_functional_max_unpool2d_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 76.75510486803557, + "test_comprehensive_nn_functional_max_unpool2d_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 77.19105228624846, + "test_comprehensive_nn_functional_max_unpool3d_cpu_float16 (__main__.TestInductorOpInfoCPU)": 153.2100067138672, + "test_comprehensive_nn_functional_max_unpool3d_cpu_float32 (__main__.TestInductorOpInfoCPU)": 149.39599609375, + "test_comprehensive_nn_functional_max_unpool3d_cpu_float64 (__main__.TestInductorOpInfoCPU)": 146.4810028076172, + "test_comprehensive_nn_functional_pad_constant_cpu_float16 (__main__.TestInductorOpInfoCPU)": 75.3740005493164, + "test_comprehensive_nn_functional_pad_constant_cpu_float32 (__main__.TestInductorOpInfoCPU)": 88.25800323486328, + "test_comprehensive_nn_functional_pad_constant_cpu_float64 (__main__.TestInductorOpInfoCPU)": 77.86799621582031, + "test_comprehensive_nn_functional_pad_constant_cpu_int32 (__main__.TestInductorOpInfoCPU)": 85.75399780273438, + "test_comprehensive_nn_functional_pad_constant_cpu_int64 (__main__.TestInductorOpInfoCPU)": 75.73500061035156, + "test_comprehensive_nn_functional_poisson_nll_loss_cpu_float16 (__main__.TestInductorOpInfoCPU)": 134.7989959716797, + "test_comprehensive_nn_functional_poisson_nll_loss_cpu_float32 (__main__.TestInductorOpInfoCPU)": 125.13800048828125, + "test_comprehensive_nn_functional_poisson_nll_loss_cpu_float64 (__main__.TestInductorOpInfoCPU)": 138.28500366210938, + "test_comprehensive_nn_functional_poisson_nll_loss_cpu_int32 (__main__.TestInductorOpInfoCPU)": 127.2229995727539, + "test_comprehensive_nn_functional_poisson_nll_loss_cpu_int64 (__main__.TestInductorOpInfoCPU)": 124.50499725341797, + "test_comprehensive_nn_functional_unfold_cpu_bool (__main__.TestInductorOpInfoCPU)": 141.44400024414062, + "test_comprehensive_nn_functional_unfold_cpu_float16 (__main__.TestInductorOpInfoCPU)": 255.00599670410156, + "test_comprehensive_nn_functional_unfold_cpu_float32 (__main__.TestInductorOpInfoCPU)": 257.2449951171875, + "test_comprehensive_nn_functional_unfold_cpu_float64 (__main__.TestInductorOpInfoCPU)": 252.31399536132812, + "test_comprehensive_ormqr_cuda_complex128 (__main__.TestDecompCUDA)": 113.76800155639648, + "test_comprehensive_ormqr_cuda_complex64 (__main__.TestDecompCUDA)": 91.2755012512207, + "test_comprehensive_ormqr_cuda_float32 (__main__.TestDecompCUDA)": 72.64516703287761, + "test_comprehensive_ormqr_cuda_float32 (__main__.TestInductorOpInfoCUDA)": 119.41699981689453, + "test_comprehensive_ormqr_cuda_float64 (__main__.TestInductorOpInfoCUDA)": 69.64147366975483, + "test_comprehensive_svd_cuda_complex128 (__main__.TestDecompCUDA)": 73.99099953969319, + "test_comprehensive_svd_cuda_complex64 (__main__.TestDecompCUDA)": 65.03750038146973, + "test_comprehensive_svd_lowrank_cuda_complex128 (__main__.TestDecompCUDA)": 64.99233373006184, + "test_comprehensive_svd_lowrank_cuda_complex64 (__main__.TestDecompCUDA)": 64.825332959493, + "test_cond_autograd_nested (__main__.TestControlFlow)": 78.28133392333984, + "test_constructor_autograd_SparseBSC_cuda (__main__.TestSparseAnyCUDA)": 85.54650115966797, + "test_constructor_autograd_SparseBSR_cuda (__main__.TestSparseAnyCUDA)": 77.20849990844727, + "test_constructor_autograd_SparseCSC_cuda (__main__.TestSparseAnyCUDA)": 66.02849769592285, + "test_constructor_autograd_SparseCSR_cuda (__main__.TestSparseAnyCUDA)": 90.93049621582031, + "test_conv1d_basic (__main__.TestXNNPACKConv1dTransformPass)": 217.32833099365234, + "test_conv1d_with_relu_fc (__main__.TestXNNPACKConv1dTransformPass)": 434.2860107421875, + "test_conv2d_unary_cpu_cpp_wrapper (__main__.TestCppWrapper)": 279.1969909667969, + "test_conv_freezing_cpu_with_stack_allocation (__main__.AOTInductorTestABICompatibleCpuWithStackAllocation)": 63.11627990722656, + "test_correctness_NAdam_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 77.31900215148926, + "test_correctness_RAdam_use_closure_True_cuda_float32 (__main__.CompiledOptimizerParityTestsCUDA)": 62.60299987792969, + "test_count_nonzero_all (__main__.TestBool)": 663.2940063476562, + "test_custom_module_lstm (__main__.TestQuantizedOps)": 205.06800333658853, + "test_ddp_uneven_inputs (__main__.TestDistBackendWithSpawn)": 185.50533405939737, + "test_deconv_freezing_cpu_with_stack_allocation (__main__.AOTInductorTestABICompatibleCpuWithStackAllocation)": 60.336119651794434, + "test_dispatch_symbolic_meta_outplace_all_strides_nn_functional_gaussian_nll_loss_cuda_float32 (__main__.TestMetaCUDA)": 80.5359992980957, + "test_eig_check_magma_cuda_float32 (__main__.TestLinalgCUDA)": 62.61600172519684, + "test_fail_creation_ops.py (__main__.TestTyping)": 64.10633341471355, + "test_fail_random.py (__main__.TestTyping)": 73.96077489852905, + "test_fn_fwgrad_bwgrad_cumprod_cuda_complex128 (__main__.TestFwdGradientsCUDA)": 63.15999794006348, + "test_fn_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 69.70149993896484, + "test_fn_gradgrad_map_nested_cpu_float64 (__main__.TestBwdGradientsCPU)": 88.06099700927734, + "test_fn_gradgrad_map_nested_cuda_float64 (__main__.TestBwdGradientsCUDA)": 66.5104997808283, + "test_fn_gradgrad_map_triple_nested_cpu_float64 (__main__.TestBwdGradientsCPU)": 490.46099853515625, + "test_fn_gradgrad_map_triple_nested_cuda_float64 (__main__.TestBwdGradientsCUDA)": 309.78050231933594, + "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 78.52733357747395, + "test_fuse_large_params_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 77.23800150553386, + "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesCodegenGPUTests)": 72.49049758911133, + "test_fuse_large_params_dynamic_shapes_cuda (__main__.DynamicShapesGPUTests)": 61.91749954223633, + "test_grad_nn_Transformer_cuda_float64 (__main__.TestModuleCUDA)": 85.50300216674805, + "test_gradgrad_nn_LSTM_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 105.5469970703125, + "test_gradgrad_nn_LSTM_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 84.302001953125, + "test_gradgrad_nn_TransformerDecoderLayer_cuda_float64 (__main__.TestModuleCUDA)": 178.19400024414062, + "test_gradgrad_nn_TransformerEncoder_eval_mode_cuda_float64 (__main__.TestModuleCUDA)": 120.38199996948242, + "test_gradgrad_nn_TransformerEncoder_train_mode_cuda_float64 (__main__.TestModuleCUDA)": 130.34649658203125, + "test_grid_sampler_2d_cpu (__main__.CpuHalideTests)": 188.41600036621094, + "test_index_select_cuda_float8_e4m3fnuz (__main__.TestTorchDeviceTypeCUDA)": 67.98859901059419, + "test_index_select_cuda_float8_e5m2fnuz (__main__.TestTorchDeviceTypeCUDA)": 68.28252009976656, + "test_indexing (__main__.TestAutogradWithCompiledAutograd)": 66.04966608683269, + "test_indirect_device_assert (__main__.TritonCodeGenTests)": 186.83200073242188, + "test_inductor_no_recursionerror_on_for_loops_dynamic_shapes (__main__.DynamicShapesReproTests)": 62.57789257594517, + "test_inplace_gradgrad_cumprod_cuda_complex128 (__main__.TestBwdGradientsCUDA)": 73.21549987792969, + "test_jit_cuda_archflags (__main__.TestCppExtensionJIT)": 144.78700256347656, + "test_linalg_solve_triangular_large_cuda_complex128 (__main__.TestLinalgCUDA)": 590.2300109863281, + "test_linalg_solve_triangular_large_cuda_complex64 (__main__.TestLinalgCUDA)": 67.87849998474121, + "test_linalg_solve_triangular_large_cuda_float64 (__main__.TestLinalgCUDA)": 69.79699993133545, + "test_linear (__main__.TestStaticQuantizedModule)": 165.18300247192383, + "test_linear_binary_cpp_wrapper (__main__.TestCppWrapper)": 175.40899658203125, + "test_linear_binary_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 170.81900024414062, + "test_linear_packed_cpp_wrapper (__main__.TestCppWrapper)": 80.76100158691406, + "test_linear_packed_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 76.26699829101562, + "test_linear_relu (__main__.TestStaticQuantizedModule)": 60.553001403808594, + "test_load_from_view_buffer (__main__.TestFlexAttention)": 92.79700034005302, + "test_lobpcg_ortho_cuda_float64 (__main__.TestLinalgCUDA)": 62.85261689699613, + "test_max_autotune (__main__.TestFlexAttention)": 76.35428619384766, + "test_max_autotune_cutlass_backend_addmm_dynamic_False_max_autotune_gemm_backends_ATen,Triton,CUTLASS (__main__.TestCutlassBackend)": 87.5459976196289, + "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCodegenCpuTests)": 64.43099848429362, + "test_max_pool2d_with_indices_backward4_dynamic_shapes_cpu (__main__.DynamicShapesCpuTests)": 62.10766728719076, + "test_mixed_mm_exhaustive_dtypes (__main__.TestPatternMatcher)": 90.12800025939941, + "test_output_match_max_pool2d_with_indices_backward_cpu_bfloat16 (__main__.TestConsistencyCPU)": 60.4490000406901, + "test_proper_exit (__main__.TestDataLoader)": 214.47549438476562, + "test_proper_exit (__main__.TestDataLoaderPersistentWorkers)": 212.3499984741211, + "test_qconv2d_add_cpu_cpp_wrapper (__main__.TestCppWrapper)": 60.95766671498617, + "test_qconv2d_add_cpu_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 62.85099951426188, + "test_qconv2d_add_relu_cpu_cpp_wrapper (__main__.TestCppWrapper)": 61.30074977874756, + "test_qconv2d_add_relu_cpu_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 64.02558294932048, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 60.128166834513344, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 113.4209976196289, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 113.31400299072266, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 141.7570037841797, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 112.85099792480469, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 108.26799774169922, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 112.7490005493164, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 112.31099700927734, + "test_qlinear_add_int8_mixed_bf16_use_relu_False_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 111.48500061035156, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False (__main__.TestPatternMatcher)": 62.4573335647583, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 111.9219970703125, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 112.73999786376953, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True (__main__.TestPatternMatcher)": 60.87241554260254, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 114.99500274658203, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_False_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 111.69499969482422, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_cpp_wrapper (__main__.TestCppWrapper)": 109.68599700927734, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_False_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 109.4990005493164, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True (__main__.TestPatternMatcher)": 60.729000091552734, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_cpp_wrapper (__main__.TestCppWrapper)": 115.64600372314453, + "test_qlinear_add_int8_mixed_bf16_use_relu_True_is_qat_True_is_dynamic_True_dynamic_shapes_cpp_wrapper (__main__.DynamicShapesCppWrapperCpuTests)": 112.4219970703125, + "test_qrnncell (__main__.TestDynamicQuantizedOps)": 63.147268639753264, + "test_quick_core_backward__unsafe_masked_index_cpu_float64 (__main__.TestDecompCPU)": 413.0690002441406, + "test_quick_core_backward__unsafe_masked_index_cuda_float64 (__main__.TestDecompCUDA)": 630.9710083007812, + "test_quick_core_backward__unsafe_masked_index_put_accumulate_cpu_float64 (__main__.TestDecompCPU)": 571.0189819335938, + "test_quick_core_backward__unsafe_masked_index_put_accumulate_cuda_float64 (__main__.TestDecompCUDA)": 851.6174926757812, + "test_quick_core_backward_expand_copy_cuda_float64 (__main__.TestDecompCUDA)": 74.7632490793864, + "test_quick_core_backward_nn_functional_max_unpool3d_grad_cpu_float64 (__main__.TestDecompCPU)": 78.52200317382812, + "test_quick_core_backward_nn_functional_max_unpool3d_grad_cuda_float64 (__main__.TestDecompCUDA)": 200.74200439453125, + "test_quick_core_backward_roll_cpu_float64 (__main__.TestDecompCPU)": 107.08300018310547, + "test_quick_core_backward_roll_cuda_float64 (__main__.TestDecompCUDA)": 150.26499938964844, + "test_quick_core_backward_select_scatter_cuda_float64 (__main__.TestDecompCUDA)": 98.96500015258789, + "test_quick_core_backward_split_with_sizes_copy_cpu_float64 (__main__.TestDecompCPU)": 74.80500030517578, + "test_quick_core_backward_split_with_sizes_copy_cuda_float64 (__main__.TestDecompCUDA)": 118.8329963684082, + "test_quick_core_backward_std_cuda_float64 (__main__.TestDecompCUDA)": 82.09700012207031, + "test_replicatepad_64bit_indexing_cuda_float16 (__main__.TestNNDeviceTypeCUDA)": 324.70098876953125, + "test_rosenbrock_sparse_with_lrsched_False_SGD_cuda_float64 (__main__.TestOptimRenewedCUDA)": 86.34838581085205, + "test_rosenbrock_sparse_with_lrsched_True_SGD_cuda_float64 (__main__.TestOptimRenewedCUDA)": 76.88300371170044, + "test_save_load_large_string_attribute (__main__.TestSaveLoad)": 93.64399719238281, + "test_shuffler_iterdatapipe (__main__.IntegrationTestDataLoaderDataPipe)": 151.77233378092447, + "test_slow_tasks (__main__.TestFunctionalAutogradBenchmark)": 129.88999684651694, + "test_sort_bool_cpu (__main__.CpuTritonTests)": 340.2829996744792, + "test_sum_all_cpu_float64 (__main__.TestReductionsCPU)": 164.725030376971, + "test_svd_lowrank_cuda_complex128 (__main__.TestLinalgCUDA)": 141.5560052394867, + "test_terminate_handler_on_crash (__main__.TestTorch)": 71.76799805959065, + "test_terminate_signal (__main__.ForkTest)": 105.02499709029992, + "test_terminate_signal (__main__.ParallelForkServerShouldWorkTest)": 105.14500128229459, + "test_terminate_signal (__main__.SpawnTest)": 107.84633318583171, + "test_train_parity_multi_group (__main__.TestFullyShard1DTrainingCore)": 124.47291040041654, + "test_transpose_copy (__main__.CPUReproTests)": 63.25933329264323, + "test_triton_bsr_scatter_mm_blocksize_64_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 81.10850143432617, + "test_triton_bsr_scatter_mm_blocksize_64_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 69.84850120544434, + "test_triton_bsr_scatter_mm_blocksize_64_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 74.66350173950195, + "test_triton_bsr_softmax_cuda_bfloat16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 139.77949905395508, + "test_triton_bsr_softmax_cuda_float16 (__main__.TestSparseCompressedTritonKernelsCUDA)": 133.375, + "test_triton_bsr_softmax_cuda_float32 (__main__.TestSparseCompressedTritonKernelsCUDA)": 121.22699737548828, + "test_unary_ops (__main__.TestTEFuserDynamic)": 228.66966756184897, + "test_unary_ops (__main__.TestTEFuserStatic)": 204.28700065612793, + "test_upsample_bicubic2d_cpu (__main__.CpuHalideTests)": 95.69666544596355, + "test_variant_consistency_jit_nn_functional_max_pool2d_cpu_float32 (__main__.TestJitCPU)": 85.29199981689453, + "test_variant_consistency_jit_nn_functional_max_pool2d_cuda_float32 (__main__.TestJitCUDA)": 68.83450126647949, + "test_vmapjvpvjp_diff_cuda_float32 (__main__.TestOperatorsCUDA)": 60.5719234759991, + "test_vmapjvpvjp_linalg_lu_solve_cpu_float32 (__main__.TestOperatorsCPU)": 67.02274974187215, + "test_vmapjvpvjp_linalg_lu_solve_cuda_float32 (__main__.TestOperatorsCUDA)": 78.4694995880127, + "test_vmapjvpvjp_linalg_multi_dot_cuda_float32 (__main__.TestOperatorsCUDA)": 72.39323043823242, + "test_vmapjvpvjp_linalg_solve_triangular_cuda_float32 (__main__.TestOperatorsCUDA)": 67.01423028799204, + "test_vmapjvpvjp_linalg_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 74.47176947960487, + "test_vmapjvpvjp_max_pool2d_with_indices_backward_cpu_float32 (__main__.TestOperatorsCPU)": 64.39900207519531, + "test_vmapjvpvjp_max_pool2d_with_indices_backward_cuda_float32 (__main__.TestOperatorsCUDA)": 63.3494987487793, + "test_vmapjvpvjp_nn_functional_max_pool2d_cpu_float32 (__main__.TestOperatorsCPU)": 71.36399841308594, + "test_vmapjvpvjp_nn_functional_max_pool2d_cuda_float32 (__main__.TestOperatorsCUDA)": 71.33700180053711, + "test_vmapjvpvjp_svd_cuda_float32 (__main__.TestOperatorsCUDA)": 60.809499740600586, + "test_vmapjvpvjp_unbind_cpu_float32 (__main__.TestOperatorsCPU)": 71.86699676513672, + "test_vmapjvpvjp_unbind_cuda_float32 (__main__.TestOperatorsCUDA)": 77.49649810791016, + "test_vmapvjpvjp_meshgrid_list_of_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 66.41450119018555, + "test_vmapvjpvjp_meshgrid_variadic_tensors_cuda_float32 (__main__.TestOperatorsCUDA)": 67.54150009155273, + "test_vmapvjpvjp_nn_functional_bilinear_cuda_float32 (__main__.TestOperatorsCUDA)": 127.17300033569336 } \ No newline at end of file From 419a7e197d2579e699c2e730902d197a27df8deb Mon Sep 17 00:00:00 2001 From: cyy Date: Mon, 4 Nov 2024 13:43:16 +0000 Subject: [PATCH 005/503] [6/N] Fix Wextra-semi warning (#139605) Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/139605 Approved by: https://github.com/ezyang --- aten/src/ATen/core/dispatch/OperatorEntry.cpp | 2 +- .../src/ATen/cpu/vec/vec256/vec256_bfloat16.h | 10 +- aten/src/ATen/functorch/Interpreter.cpp | 4 +- aten/src/ATen/mps/MPSAllocator.mm | 2 +- aten/src/ATen/native/BlasKernel.cpp | 2 +- aten/src/ATen/native/EmbeddingBag.cpp | 2 +- aten/src/ATen/native/MaxUnpooling.cpp | 2 +- .../native/NaiveConvolutionTranspose2d.cpp | 2 +- .../ATen/native/NaiveDilatedConvolution.cpp | 4 +- aten/src/ATen/native/Pool.h | 2 +- aten/src/ATen/native/RNN.cpp | 34 +++--- aten/src/ATen/native/UnaryOps.cpp | 4 +- aten/src/ATen/native/cpu/BinaryOpsKernel.cpp | 8 +- .../src/ATen/native/cpu/MultinomialKernel.cpp | 2 +- ...educedPrecisionFloatGemvFastPathKernel.cpp | 6 +- .../ReducedPrecisionFloatGemvFastPathKernel.h | 4 +- aten/src/ATen/native/cpu/SoftMaxKernel.cpp | 6 +- aten/src/ATen/native/cpu/UnaryOpsKernel.cpp | 46 ++++---- aten/src/ATen/native/cuda/DepthwiseConv2d.cu | 2 +- aten/src/ATen/native/cuda/DepthwiseConv3d.cu | 2 +- .../ATen/native/cuda/FlattenIndicesKernel.cu | 2 +- aten/src/ATen/native/cuda/IndexKernel.cu | 2 +- aten/src/ATen/native/cuda/Indexing.cu | 6 +- .../ATen/native/cuda/LinearAlgebraStubs.cpp | 24 ++-- .../cuda/NaiveConvolutionTranspose2d.cu | 2 +- .../cuda/NaiveConvolutionTranspose3d.cu | 2 +- .../native/cuda/NaiveDilatedConvolution.cu | 4 +- aten/src/ATen/native/cuda/ReduceOps.cpp | 16 +-- aten/src/ATen/native/cuda/ScanKernels.cpp | 4 +- aten/src/ATen/native/cuda/Sort.cpp | 2 +- .../cuda/SparseBinaryOpIntersectionKernel.cu | 6 +- aten/src/ATen/native/cuda/TensorCompare.cpp | 2 +- .../src/ATen/native/cuda/TensorModeKernel.cpp | 2 +- .../native/cuda/linalg/BatchLinearAlgebra.cpp | 20 ++-- aten/src/ATen/native/cudnn/RNN.cpp | 110 +++++++++--------- aten/src/ATen/native/miopen/Conv_miopen.cpp | 6 +- aten/src/ATen/native/miopen/RNN_miopen.cpp | 8 +- aten/src/ATen/native/mkl/SpectralOps.cpp | 2 +- aten/src/ATen/native/mkldnn/Conv.cpp | 12 +- aten/src/ATen/native/mkldnn/RNN.cpp | 2 +- aten/src/ATen/native/mkldnn/Utils.cpp | 6 +- .../ATen/native/mps/operations/BitwiseOps.mm | 4 +- .../native/nested/NestedTensorBinaryOps.cpp | 2 +- .../nested/cuda/NestedTensorBinaryOps.cu | 2 +- .../cpu/kernels/QuantizedOpKernels.cpp | 42 +++---- .../native/sparse/FlattenIndicesKernel.cpp | 12 +- .../SparseBinaryOpIntersectionKernel.cpp | 40 +++---- .../ATen/native/sparse/SparseCsrTensor.cpp | 8 +- .../native/sparse/SparseCsrTensorMath.cpp | 72 ++++++------ .../ATen/native/sparse/SparseTensorMath.cpp | 2 +- .../ATen/native/transformers/attention.cpp | 12 +- .../native/transformers/cuda/attention.cu | 2 +- caffe2/perfkernels/embedding_lookup_idx.cc | 32 ++--- caffe2/utils/threadpool/ThreadPool.cc | 10 +- .../api/include/torch/nn/modules/dropout.h | 2 +- .../api/include/torch/nn/modules/pooling.h | 2 +- .../csrc/api/include/torch/optim/optimizer.h | 2 +- torch/csrc/autograd/record_function_ops.cpp | 2 +- torch/csrc/distributed/c10d/Backend.hpp | 2 +- .../distributed/c10d/GlooDeviceFactory.cpp | 6 +- torch/csrc/distributed/c10d/ProcessGroup.hpp | 8 +- torch/csrc/distributed/rpc/utils.cpp | 2 +- torch/csrc/jit/frontend/sugared_value.cpp | 2 +- torch/csrc/jit/mobile/nnc/registry.cpp | 2 +- .../jit/passes/frozen_linear_transpose.cpp | 2 +- .../csrc/jit/passes/frozen_ops_to_mkldnn.cpp | 2 +- .../jit/passes/integer_value_refinement.cpp | 2 +- .../csrc/jit/passes/peephole_list_idioms.cpp | 2 +- torch/csrc/jit/passes/quantization/helper.cpp | 2 +- torch/csrc/jit/passes/utils/op_registry.cpp | 4 +- .../csrc/jit/passes/value_refinement_utils.h | 2 +- torch/csrc/jit/runtime/static/native_ops.cpp | 2 +- torch/csrc/jit/runtime/static/ops.h | 4 +- torch/csrc/jit/serialization/pickle.cpp | 2 +- torch/csrc/jit/tensorexpr/codegen.cpp | 2 +- torch/csrc/jit/tensorexpr/codegen.h | 4 +- torch/csrc/jit/tensorexpr/cpp_codegen.cpp | 2 +- torch/csrc/jit/tensorexpr/eval.cpp | 18 +-- torch/csrc/jit/tensorexpr/eval.h | 14 +-- torch/csrc/jit/tensorexpr/expr.cpp | 2 +- torch/csrc/jit/tensorexpr/expr.h | 4 +- .../csrc/jit/tensorexpr/external_functions.h | 2 +- torch/csrc/jit/tensorexpr/fwd_decls.h | 2 +- torch/csrc/jit/tensorexpr/hash_provider.h | 2 +- torch/csrc/jit/tensorexpr/ir.cpp | 4 +- torch/csrc/jit/tensorexpr/ir.h | 8 +- torch/csrc/jit/tensorexpr/ir_cloner.cpp | 2 +- torch/csrc/jit/tensorexpr/ir_cloner.h | 2 +- torch/csrc/jit/tensorexpr/ir_mutator.cpp | 2 +- torch/csrc/jit/tensorexpr/ir_printer.cpp | 2 +- torch/csrc/jit/tensorexpr/ir_printer.h | 2 +- torch/csrc/jit/tensorexpr/ir_simplifier.cpp | 2 +- torch/csrc/jit/tensorexpr/ir_simplifier.h | 2 +- torch/csrc/jit/tensorexpr/ir_visitor.cpp | 2 +- torch/csrc/jit/tensorexpr/llvm_codegen.cpp | 2 +- torch/csrc/jit/tensorexpr/loopnest.cpp | 2 +- torch/csrc/jit/tensorexpr/operators/misc.cpp | 2 +- torch/csrc/jit/tensorexpr/registerizer.cpp | 2 +- torch/csrc/jit/tensorexpr/registerizer.h | 6 +- torch/csrc/jit/tensorexpr/types.cpp | 4 +- torch/csrc/jit/tensorexpr/types.h | 8 +- torch/csrc/lazy/backend/lowering_context.h | 2 +- torch/csrc/lazy/core/tensor.h | 2 +- torch/csrc/profiler/perf.cpp | 6 +- torchgen/static_runtime/generator.py | 2 +- 105 files changed, 399 insertions(+), 399 deletions(-) diff --git a/aten/src/ATen/core/dispatch/OperatorEntry.cpp b/aten/src/ATen/core/dispatch/OperatorEntry.cpp index aa99e9d2fdf946..112e88c4c594f8 100644 --- a/aten/src/ATen/core/dispatch/OperatorEntry.cpp +++ b/aten/src/ATen/core/dispatch/OperatorEntry.cpp @@ -510,7 +510,7 @@ void OperatorEntry::reportSignatureError(const CppSignature& call_signature, con "This likely happened in a call to OperatorHandle::typed(). ", "Please make sure that the function signature matches the signature in the operator registration call." ); -}; +} #ifndef STRIP_ERROR_MESSAGES static std::string post_process_dispatch_key_str(std::string dispatch_key) { diff --git a/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h b/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h index 60d9965d983290..319761cef3a72a 100644 --- a/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h +++ b/aten/src/ATen/cpu/vec/vec256/vec256_bfloat16.h @@ -132,7 +132,7 @@ template , inline void cvt_to_fp32(const __m128i& a, __m256& o); template <> inline void cvt_to_fp32(const __m128i& a, __m256& o) { cvtbf16_fp32(a, o); -}; +} template <> inline void cvt_to_fp32(const __m128i& a, __m256& o) { cvtfp16_fp32(a, o); } @@ -1071,8 +1071,8 @@ inline std::tuple, Vectorized> convert_##name##_float(c inline Vectorized convert_float_##name(const Vectorized& a, const Vectorized& b) { \ return cvt_from_fp32(__m256(a), __m256(b)); \ } -CONVERT_VECTORIZED_INIT(BFloat16, bfloat16); -CONVERT_VECTORIZED_INIT(Half, half); +CONVERT_VECTORIZED_INIT(BFloat16, bfloat16) +CONVERT_VECTORIZED_INIT(Half, half) #else // defined(CPU_CAPABILITY_AVX2) @@ -1096,9 +1096,9 @@ inline Vectorized convert_float_##name(const Vectorized& a, const V convert(arr, arr2, K); \ return Vectorized::loadu(arr2); \ } -CONVERT_NON_VECTORIZED_INIT(BFloat16, bfloat16); +CONVERT_NON_VECTORIZED_INIT(BFloat16, bfloat16) #if !(defined(__aarch64__) && !defined(C10_MOBILE) && !defined(__CUDACC__) && !defined(CPU_CAPABILITY_SVE256)) -CONVERT_NON_VECTORIZED_INIT(Half, half); +CONVERT_NON_VECTORIZED_INIT(Half, half) #endif #endif // defined(CPU_CAPABILITY_AVX2) diff --git a/aten/src/ATen/functorch/Interpreter.cpp b/aten/src/ATen/functorch/Interpreter.cpp index 609cda8562953d..15dba2e27af59f 100644 --- a/aten/src/ATen/functorch/Interpreter.cpp +++ b/aten/src/ATen/functorch/Interpreter.cpp @@ -120,11 +120,11 @@ void sanityCheckStack(const c10::OperatorHandle& op, torch::jit::Stack* stack) { } void Interpreter::process(const c10::OperatorHandle& op, torch::jit::Stack* stack) { - INTERPRETER_DISPATCH(key_, SINGLE_ARG(processImpl(op, stack))); + INTERPRETER_DISPATCH(key_, SINGLE_ARG(processImpl(op, stack))) } void Interpreter::sendToNextInterpreter(const c10::OperatorHandle& op, torch::jit::Stack* stack, bool grad_special_case) { - INTERPRETER_DISPATCH(key_, SINGLE_ARG(sendToNextInterpreterImpl(op, stack, grad_special_case))); + INTERPRETER_DISPATCH(key_, SINGLE_ARG(sendToNextInterpreterImpl(op, stack, grad_special_case))) } } // namespace at::functorch diff --git a/aten/src/ATen/mps/MPSAllocator.mm b/aten/src/ATen/mps/MPSAllocator.mm index f546d986354cd8..cf0ebc869bb42f 100644 --- a/aten/src/ATen/mps/MPSAllocator.mm +++ b/aten/src/ATen/mps/MPSAllocator.mm @@ -10,7 +10,7 @@ namespace at::mps { -C10_DEFINE_REGISTRY(MPSAllocatorCallbacksRegistry, IMpsAllocatorCallback); +C10_DEFINE_REGISTRY(MPSAllocatorCallbacksRegistry, IMpsAllocatorCallback) namespace HeapAllocator { diff --git a/aten/src/ATen/native/BlasKernel.cpp b/aten/src/ATen/native/BlasKernel.cpp index 017b88f94eda10..b397527bdf1c26 100644 --- a/aten/src/ATen/native/BlasKernel.cpp +++ b/aten/src/ATen/native/BlasKernel.cpp @@ -330,7 +330,7 @@ void gemv_fast_path( y, *incy); } -INSTANTIATE(c10::BFloat16); +INSTANTIATE(c10::BFloat16) #else template <> bool scal_use_fast_path( diff --git a/aten/src/ATen/native/EmbeddingBag.cpp b/aten/src/ATen/native/EmbeddingBag.cpp index ea97ac8a5ad38b..068612b582ff8a 100644 --- a/aten/src/ATen/native/EmbeddingBag.cpp +++ b/aten/src/ATen/native/EmbeddingBag.cpp @@ -1251,7 +1251,7 @@ embedding_bag(const Tensor &weight, const Tensor &indices, mode, sparse, per_sample_weights, include_last_offset, padding_idx); } return out; -}; +} std::tuple embedding_bag(const Tensor &weight, const Tensor &indices, diff --git a/aten/src/ATen/native/MaxUnpooling.cpp b/aten/src/ATen/native/MaxUnpooling.cpp index 0e9294770e32ab..a71db5e8ef8d16 100644 --- a/aten/src/ATen/native/MaxUnpooling.cpp +++ b/aten/src/ATen/native/MaxUnpooling.cpp @@ -64,7 +64,7 @@ Tensor& max_unpooling2d_forward_out_cpu( } return output; -}; +} Tensor max_unpooling2d_forward_cpu( const Tensor& self, diff --git a/aten/src/ATen/native/NaiveConvolutionTranspose2d.cpp b/aten/src/ATen/native/NaiveConvolutionTranspose2d.cpp index 1fe298d9e1f1ba..799b5ffa2cdbf4 100644 --- a/aten/src/ATen/native/NaiveConvolutionTranspose2d.cpp +++ b/aten/src/ATen/native/NaiveConvolutionTranspose2d.cpp @@ -871,7 +871,7 @@ static std::tuple slow_conv_transpose2d_backward_cpu( return std::tuple(grad_input, grad_weight, grad_bias); } -REGISTER_ALL_CPU_DISPATCH(slow_conv_transpose2d_backward_stub, &slow_conv_transpose2d_backward_cpu); +REGISTER_ALL_CPU_DISPATCH(slow_conv_transpose2d_backward_stub, &slow_conv_transpose2d_backward_cpu) } // namespace native } // namespace at diff --git a/aten/src/ATen/native/NaiveDilatedConvolution.cpp b/aten/src/ATen/native/NaiveDilatedConvolution.cpp index acf040259b1355..bd8ada650a96bd 100644 --- a/aten/src/ATen/native/NaiveDilatedConvolution.cpp +++ b/aten/src/ATen/native/NaiveDilatedConvolution.cpp @@ -741,7 +741,7 @@ static std::tuple slow_conv_dilated3d_backward_cpu( return std::tie(grad_input, grad_weight, grad_bias); } -REGISTER_ALL_CPU_DISPATCH(slow_conv_dilated2d_backward_stub, &slow_conv_dilated2d_backward_cpu); -REGISTER_ALL_CPU_DISPATCH(slow_conv_dilated3d_backward_stub, &slow_conv_dilated3d_backward_cpu); +REGISTER_ALL_CPU_DISPATCH(slow_conv_dilated2d_backward_stub, &slow_conv_dilated2d_backward_cpu) +REGISTER_ALL_CPU_DISPATCH(slow_conv_dilated3d_backward_stub, &slow_conv_dilated3d_backward_cpu) } // namespace at::native diff --git a/aten/src/ATen/native/Pool.h b/aten/src/ATen/native/Pool.h index 7e7346489fa291..893e34dd479452 100644 --- a/aten/src/ATen/native/Pool.h +++ b/aten/src/ATen/native/Pool.h @@ -14,7 +14,7 @@ using max_pool2d_fn = void(*)(const Tensor& output, const Tensor& indices, const int kW, int kH, int dW, int dH, int padW, int padH, int dilationW, int dilationH); using max_pool2d_backward_fn = void(*)(const Tensor& grad_input, const Tensor& grad_output, const Tensor& indices); -DECLARE_DISPATCH(max_pool2d_fn, max_pool2d_kernel); +DECLARE_DISPATCH(max_pool2d_fn, max_pool2d_kernel) DECLARE_DISPATCH(max_pool2d_backward_fn, max_pool2d_backward_kernel) // averge pooling has same signature for forward and backward diff --git a/aten/src/ATen/native/RNN.cpp b/aten/src/ATen/native/RNN.cpp index 00e37395398357..4efefd71485e39 100644 --- a/aten/src/ATen/native/RNN.cpp +++ b/aten/src/ATen/native/RNN.cpp @@ -1187,10 +1187,10 @@ std::tuple _thnn_fused_lstm_cell_backwar DEFINE_DISPATCH(NAME##_miopen_stub); \ DEFINE_DISPATCH(NAME##_packed_cudnn_stub); \ DEFINE_DISPATCH(NAME##_packed_miopen_stub); \ - REGISTER_NO_CPU_DISPATCH(NAME##_cudnn_stub); \ - REGISTER_NO_CPU_DISPATCH(NAME##_miopen_stub); \ - REGISTER_NO_CPU_DISPATCH(NAME##_packed_cudnn_stub); \ - REGISTER_NO_CPU_DISPATCH(NAME##_packed_miopen_stub); \ + REGISTER_NO_CPU_DISPATCH(NAME##_cudnn_stub) \ + REGISTER_NO_CPU_DISPATCH(NAME##_miopen_stub) \ + REGISTER_NO_CPU_DISPATCH(NAME##_packed_cudnn_stub) \ + REGISTER_NO_CPU_DISPATCH(NAME##_packed_miopen_stub) \ \ std::tuple NAME( \ const Tensor& _input, \ @@ -1415,17 +1415,17 @@ static std::tuple quantized_gru_data_legacy( using tanf_cell_type = SimpleCell; ONE_HIDDEN_RNN(rnn_tanh, tanf_cell_type) using relu_cell_type = SimpleCell; -ONE_HIDDEN_RNN(rnn_relu, relu_cell_type); +ONE_HIDDEN_RNN(rnn_relu, relu_cell_type) DEFINE_DISPATCH(lstm_cudnn_stub); DEFINE_DISPATCH(lstm_packed_cudnn_stub); DEFINE_DISPATCH(lstm_miopen_stub); DEFINE_DISPATCH(lstm_packed_miopen_stub); DEFINE_DISPATCH(lstm_mkldnn_stub); -REGISTER_NO_CPU_DISPATCH(lstm_cudnn_stub); -REGISTER_NO_CPU_DISPATCH(lstm_packed_cudnn_stub); -REGISTER_NO_CPU_DISPATCH(lstm_miopen_stub); -REGISTER_NO_CPU_DISPATCH(lstm_packed_miopen_stub); +REGISTER_NO_CPU_DISPATCH(lstm_cudnn_stub) +REGISTER_NO_CPU_DISPATCH(lstm_packed_cudnn_stub) +REGISTER_NO_CPU_DISPATCH(lstm_miopen_stub) +REGISTER_NO_CPU_DISPATCH(lstm_packed_miopen_stub) std::tuple lstm( const Tensor& _input, TensorList hx, @@ -1857,9 +1857,9 @@ static std::tuple prepare_quantized_lstm_hx(TensorList hx) { // Quantized LSTM cell using quantized_lstm_cell_dynamic_type = LSTMCell; -DEFINE_QUANTIZED_RNN_CELL(quantized_lstm_cell, TensorList, quantized_lstm_cell_type, quantized_lstm_return_type, prepare_quantized_lstm_hx); +DEFINE_QUANTIZED_RNN_CELL(quantized_lstm_cell, TensorList, quantized_lstm_cell_type, quantized_lstm_return_type, prepare_quantized_lstm_hx) -static DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_lstm_cell_dynamic, TensorList, quantized_lstm_cell_dynamic_type, quantized_lstm_return_type, prepare_quantized_lstm_hx); +static DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_lstm_cell_dynamic, TensorList, quantized_lstm_cell_dynamic_type, quantized_lstm_return_type, prepare_quantized_lstm_hx) // Helpers for simpler cells using simple_hx_type = const Tensor&; @@ -1871,21 +1871,21 @@ static simple_hx_type prepare_quantized_hx(simple_hx_type hx) { using quantized_gru_cell_type = GRUCell; using quantized_gru_cell_dynamic_type = GRUCell; -DEFINE_QUANTIZED_RNN_CELL(quantized_gru_cell, simple_hx_type, quantized_gru_cell_type, Tensor, prepare_quantized_hx); +DEFINE_QUANTIZED_RNN_CELL(quantized_gru_cell, simple_hx_type, quantized_gru_cell_type, Tensor, prepare_quantized_hx) -static DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_gru_cell_dynamic, simple_hx_type, quantized_gru_cell_dynamic_type, Tensor, prepare_quantized_hx); +static DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_gru_cell_dynamic, simple_hx_type, quantized_gru_cell_dynamic_type, Tensor, prepare_quantized_hx) // Quantized RNN w/ ReLU cell using quantized_rnn_relu_cell_type = SimpleCell; -DEFINE_QUANTIZED_RNN_CELL(quantized_rnn_relu_cell, simple_hx_type, quantized_rnn_relu_cell_type, Tensor, prepare_quantized_hx); +DEFINE_QUANTIZED_RNN_CELL(quantized_rnn_relu_cell, simple_hx_type, quantized_rnn_relu_cell_type, Tensor, prepare_quantized_hx) using quantized_rnn_relu_cell_dynamic_type = SimpleCell; -static DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_rnn_relu_cell_dynamic, simple_hx_type, quantized_rnn_relu_cell_dynamic_type, Tensor, prepare_quantized_hx); +static DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_rnn_relu_cell_dynamic, simple_hx_type, quantized_rnn_relu_cell_dynamic_type, Tensor, prepare_quantized_hx) // Quantized RNN w/ tanh cell using quantized_rnn_tanh_cell_type = SimpleCell; -DEFINE_QUANTIZED_RNN_CELL(quantized_rnn_tanh_cell, simple_hx_type, quantized_rnn_tanh_cell_type, Tensor, prepare_quantized_hx); +DEFINE_QUANTIZED_RNN_CELL(quantized_rnn_tanh_cell, simple_hx_type, quantized_rnn_tanh_cell_type, Tensor, prepare_quantized_hx) using quantized_rnn_tanh_cell_dynamic_type = SimpleCell; -static DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_rnn_tanh_cell_dynamic, simple_hx_type, quantized_rnn_tanh_cell_dynamic_type, Tensor, prepare_quantized_hx); +static DEFINE_QUANTIZED_RNN_CELL_DYNAMIC(quantized_rnn_tanh_cell_dynamic, simple_hx_type, quantized_rnn_tanh_cell_dynamic_type, Tensor, prepare_quantized_hx) namespace { diff --git a/aten/src/ATen/native/UnaryOps.cpp b/aten/src/ATen/native/UnaryOps.cpp index 3520620280fee3..3485de512276a5 100644 --- a/aten/src/ATen/native/UnaryOps.cpp +++ b/aten/src/ATen/native/UnaryOps.cpp @@ -932,11 +932,11 @@ Tensor& mvlgamma_out(const Tensor& self, int64_t p, Tensor& result) { Tensor special_multigammaln(const Tensor& self, int64_t p) { return self.mvlgamma(p); -}; +} Tensor& special_multigammaln_out(const Tensor& self, int64_t p, Tensor& result) { return at::mvlgamma_out(result, self, p); -}; +} std::tuple frexp(const Tensor& self) { Tensor mantissa = at::empty_like(self); diff --git a/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp b/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp index 9891d6034ff2ff..42f9b9029c2232 100644 --- a/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp +++ b/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp @@ -1415,16 +1415,16 @@ REGISTER_DISPATCH(laguerre_polynomial_l_stub, &laguerre_polynomial_l_kernel) REGISTER_DISPATCH(legendre_polynomial_p_stub, &legendre_polynomial_p_kernel) REGISTER_DISPATCH( shifted_chebyshev_polynomial_t_stub, - &shifted_chebyshev_polynomial_t_kernel); + &shifted_chebyshev_polynomial_t_kernel) REGISTER_DISPATCH( shifted_chebyshev_polynomial_u_stub, - &shifted_chebyshev_polynomial_u_kernel); + &shifted_chebyshev_polynomial_u_kernel) REGISTER_DISPATCH( shifted_chebyshev_polynomial_v_stub, - &shifted_chebyshev_polynomial_v_kernel); + &shifted_chebyshev_polynomial_v_kernel) REGISTER_DISPATCH( shifted_chebyshev_polynomial_w_stub, - &shifted_chebyshev_polynomial_w_kernel); + &shifted_chebyshev_polynomial_w_kernel) // Might enable AVX512 dispatch after enabling explicit vectorization for them. REGISTER_DISPATCH(chebyshev_polynomial_u_stub, &chebyshev_polynomial_u_kernel) REGISTER_DISPATCH(hermite_polynomial_h_stub, &hermite_polynomial_h_kernel) diff --git a/aten/src/ATen/native/cpu/MultinomialKernel.cpp b/aten/src/ATen/native/cpu/MultinomialKernel.cpp index f15292bd21fdb1..48896c014d3e34 100644 --- a/aten/src/ATen/native/cpu/MultinomialKernel.cpp +++ b/aten/src/ATen/native/cpu/MultinomialKernel.cpp @@ -241,5 +241,5 @@ static void multinomial_with_replacement_kernel_impl( REGISTER_DISPATCH( multinomial_with_replacement_stub, - &multinomial_with_replacement_kernel_impl); + &multinomial_with_replacement_kernel_impl) } // namespace at::native diff --git a/aten/src/ATen/native/cpu/ReducedPrecisionFloatGemvFastPathKernel.cpp b/aten/src/ATen/native/cpu/ReducedPrecisionFloatGemvFastPathKernel.cpp index d88c87d39dec07..dbace18d4ae346 100644 --- a/aten/src/ATen/native/cpu/ReducedPrecisionFloatGemvFastPathKernel.cpp +++ b/aten/src/ATen/native/cpu/ReducedPrecisionFloatGemvFastPathKernel.cpp @@ -22,7 +22,7 @@ inline namespace CPU_CAPABILITY { constexpr auto kF32RegisterPairsPerIteration = 4; constexpr auto kF32RegistersPerIteration = kF32RegisterPairsPerIteration * 2; constexpr auto kF32ElementsPerRegister = vec::Vectorized::size(); -constexpr auto kF32ElementsPerIteration = kF32RegistersPerIteration * kF32ElementsPerRegister;; +constexpr auto kF32ElementsPerIteration = kF32RegistersPerIteration * kF32ElementsPerRegister; namespace { template @@ -328,8 +328,8 @@ void fp16_gemv_trans( #if !defined(C10_MOBILE) // NOTE: we don't *need* to go through dispatch for the ARM-only // implementation right now, but we will need it when we cover x86. -REGISTER_DISPATCH(fp16_dot_with_fp32_arith_stub, &fp16_dot_with_fp32_arith); -REGISTER_DISPATCH(fp16_gemv_trans_stub, &fp16_gemv_trans); +REGISTER_DISPATCH(fp16_dot_with_fp32_arith_stub, &fp16_dot_with_fp32_arith) +REGISTER_DISPATCH(fp16_gemv_trans_stub, &fp16_gemv_trans) #else #endif // defined(__aarch64__) && !defined(C10_MOBILE) diff --git a/aten/src/ATen/native/cpu/ReducedPrecisionFloatGemvFastPathKernel.h b/aten/src/ATen/native/cpu/ReducedPrecisionFloatGemvFastPathKernel.h index b37e65dff7acbb..1d40d20eb12f85 100644 --- a/aten/src/ATen/native/cpu/ReducedPrecisionFloatGemvFastPathKernel.h +++ b/aten/src/ATen/native/cpu/ReducedPrecisionFloatGemvFastPathKernel.h @@ -8,7 +8,7 @@ namespace at::native { #if !defined(C10_MOBILE) using fp16_dot_fn = float(*)(const Half*, const Half*, int64_t); using fp16_gemv_fn = void(*)(int, int, float, const Half*, int, const Half*, int, float, Half*, int); -DECLARE_DISPATCH(fp16_dot_fn, fp16_dot_with_fp32_arith_stub); -DECLARE_DISPATCH(fp16_gemv_fn, fp16_gemv_trans_stub); +DECLARE_DISPATCH(fp16_dot_fn, fp16_dot_with_fp32_arith_stub) +DECLARE_DISPATCH(fp16_gemv_fn, fp16_gemv_trans_stub) #endif // !defined(C10_MOBILE) } // namespace at::native diff --git a/aten/src/ATen/native/cpu/SoftMaxKernel.cpp b/aten/src/ATen/native/cpu/SoftMaxKernel.cpp index c289152db04d8b..4f82783eac03db 100644 --- a/aten/src/ATen/native/cpu/SoftMaxKernel.cpp +++ b/aten/src/ATen/native/cpu/SoftMaxKernel.cpp @@ -1295,15 +1295,15 @@ ALSO_REGISTER_AVX512_DISPATCH(softmax_lastdim_kernel, &softmax_lastdim_kernel_im ALSO_REGISTER_AVX512_DISPATCH(log_softmax_lastdim_kernel, &log_softmax_lastdim_kernel_impl) ALSO_REGISTER_AVX512_DISPATCH( softmax_backward_lastdim_kernel, - &softmax_backward_lastdim_kernel_impl); + &softmax_backward_lastdim_kernel_impl) ALSO_REGISTER_AVX512_DISPATCH( log_softmax_backward_lastdim_kernel, - &log_softmax_backward_lastdim_kernel_impl); + &log_softmax_backward_lastdim_kernel_impl) ALSO_REGISTER_AVX512_DISPATCH(softmax_kernel, &softmax_kernel_impl) ALSO_REGISTER_AVX512_DISPATCH(log_softmax_kernel, &log_softmax_kernel_impl) ALSO_REGISTER_AVX512_DISPATCH(softmax_backward_kernel, &softmax_backward_kernel_impl) ALSO_REGISTER_AVX512_DISPATCH( log_softmax_backward_kernel, - &log_softmax_backward_kernel_impl); + &log_softmax_backward_kernel_impl) } // namespace at::native diff --git a/aten/src/ATen/native/cpu/UnaryOpsKernel.cpp b/aten/src/ATen/native/cpu/UnaryOpsKernel.cpp index fb647349e94ccc..a90406836cf499 100644 --- a/aten/src/ATen/native/cpu/UnaryOpsKernel.cpp +++ b/aten/src/ATen/native/cpu/UnaryOpsKernel.cpp @@ -830,15 +830,15 @@ REGISTER_DISPATCH(special_i0e_stub, &CPU_CAPABILITY::i0e_kernel) REGISTER_DISPATCH(special_ndtri_stub, &CPU_CAPABILITY::ndtri_kernel) REGISTER_DISPATCH(special_modified_bessel_k0_stub, &CPU_CAPABILITY::modified_bessel_k0_kernel) REGISTER_DISPATCH(special_modified_bessel_k1_stub, &CPU_CAPABILITY::modified_bessel_k1_kernel) -IMPLEMENT_FLOAT_KERNEL_WITHOUT_AVX512(ceil); -IMPLEMENT_FLOAT_KERNEL_WITHOUT_AVX512(floor); -IMPLEMENT_FLOAT_KERNEL_WITHOUT_AVX512(round); -IMPLEMENT_COMPLEX_KERNEL_WITHOUT_AVX512(sqrt); -IMPLEMENT_FLOAT_KERNEL_WITHOUT_AVX512(trunc); -IMPLEMENT_FLOAT_KERNEL_WITHOUT_AVX512(i0); -STATIC_IMPLEMENT_COMPLEX_KERNEL_WITHOUT_AVX512(sin); -STATIC_IMPLEMENT_COMPLEX_KERNEL_WITHOUT_AVX512(cos); -STATIC_IMPLEMENT_COMPLEX_KERNEL_WITHOUT_AVX512(tan); +IMPLEMENT_FLOAT_KERNEL_WITHOUT_AVX512(ceil) +IMPLEMENT_FLOAT_KERNEL_WITHOUT_AVX512(floor) +IMPLEMENT_FLOAT_KERNEL_WITHOUT_AVX512(round) +IMPLEMENT_COMPLEX_KERNEL_WITHOUT_AVX512(sqrt) +IMPLEMENT_FLOAT_KERNEL_WITHOUT_AVX512(trunc) +IMPLEMENT_FLOAT_KERNEL_WITHOUT_AVX512(i0) +STATIC_IMPLEMENT_COMPLEX_KERNEL_WITHOUT_AVX512(sin) +STATIC_IMPLEMENT_COMPLEX_KERNEL_WITHOUT_AVX512(cos) +STATIC_IMPLEMENT_COMPLEX_KERNEL_WITHOUT_AVX512(tan) // The following kernels are compute-intensive & are compiled with both AVX512 // & AVX2 @@ -871,19 +871,19 @@ REGISTER_DISPATCH(special_bessel_y1_stub, &CPU_CAPABILITY::bessel_y1_kernel) REGISTER_DISPATCH(special_modified_bessel_i0_stub, &CPU_CAPABILITY::modified_bessel_i0_kernel) REGISTER_DISPATCH(special_modified_bessel_i1_stub, &CPU_CAPABILITY::modified_bessel_i1_kernel) -STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(acos); -STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(asin); -STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(atan); -IMPLEMENT_FLOAT_KERNEL_WITH_AVX512(erf); -IMPLEMENT_FLOAT_KERNEL_WITH_AVX512(erfc); -IMPLEMENT_FLOAT_KERNEL_WITH_AVX512(erfinv); -STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(exp); -STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(expm1); -STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(log); -STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(log10); -STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(log1p); -STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(log2); -STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(tanh); -IMPLEMENT_FLOAT_KERNEL_WITH_AVX512(lgamma); +STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(acos) +STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(asin) +STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(atan) +IMPLEMENT_FLOAT_KERNEL_WITH_AVX512(erf) +IMPLEMENT_FLOAT_KERNEL_WITH_AVX512(erfc) +IMPLEMENT_FLOAT_KERNEL_WITH_AVX512(erfinv) +STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(exp) +STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(expm1) +STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(log) +STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(log10) +STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(log1p) +STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(log2) +STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(tanh) +IMPLEMENT_FLOAT_KERNEL_WITH_AVX512(lgamma) } // namespace at::native diff --git a/aten/src/ATen/native/cuda/DepthwiseConv2d.cu b/aten/src/ATen/native/cuda/DepthwiseConv2d.cu index 4f9e1f36213ab7..4999c04915ae20 100644 --- a/aten/src/ATen/native/cuda/DepthwiseConv2d.cu +++ b/aten/src/ATen/native/cuda/DepthwiseConv2d.cu @@ -760,6 +760,6 @@ std::tuple conv_depthwise2d_backward_cuda( grad_weight); } -REGISTER_CUDA_DISPATCH(conv_depthwise2d_backward_stub, &conv_depthwise2d_backward_cuda); +REGISTER_CUDA_DISPATCH(conv_depthwise2d_backward_stub, &conv_depthwise2d_backward_cuda) } // namespace at::native diff --git a/aten/src/ATen/native/cuda/DepthwiseConv3d.cu b/aten/src/ATen/native/cuda/DepthwiseConv3d.cu index 62c36d66ee40eb..985d5c49b615d6 100644 --- a/aten/src/ATen/native/cuda/DepthwiseConv3d.cu +++ b/aten/src/ATen/native/cuda/DepthwiseConv3d.cu @@ -695,7 +695,7 @@ std::tuple conv_depthwise3d_backward_cuda( } -REGISTER_CUDA_DISPATCH(conv_depthwise3d_backward_stub, &conv_depthwise3d_backward_cuda); +REGISTER_CUDA_DISPATCH(conv_depthwise3d_backward_stub, &conv_depthwise3d_backward_cuda) #undef DWCONV3D_BACKWARD_INPUT_DISPATCH_SPECIALIZATION #undef DWCONV3D_BACKWARD_INPUT_DISPATCH_OTHERS diff --git a/aten/src/ATen/native/cuda/FlattenIndicesKernel.cu b/aten/src/ATen/native/cuda/FlattenIndicesKernel.cu index a127e0a52ded40..fb0553a5c8dea1 100644 --- a/aten/src/ATen/native/cuda/FlattenIndicesKernel.cu +++ b/aten/src/ATen/native/cuda/FlattenIndicesKernel.cu @@ -23,6 +23,6 @@ Tensor flatten_indices_cuda_kernel(const Tensor& indices, IntArrayRef size) { } -REGISTER_CUDA_DISPATCH(flatten_indices_stub, &flatten_indices_cuda_kernel); +REGISTER_CUDA_DISPATCH(flatten_indices_stub, &flatten_indices_cuda_kernel) } // namespace at::native diff --git a/aten/src/ATen/native/cuda/IndexKernel.cu b/aten/src/ATen/native/cuda/IndexKernel.cu index bc2b406b43a742..37500414575db6 100644 --- a/aten/src/ATen/native/cuda/IndexKernel.cu +++ b/aten/src/ATen/native/cuda/IndexKernel.cu @@ -483,6 +483,6 @@ REGISTER_DISPATCH(put_stub, &put_kernel) REGISTER_DISPATCH(take_stub, &take_kernel) REGISTER_DISPATCH(flip_stub, &flip_kernel) -REGISTER_CUDA_DISPATCH(index_put_kernel_quantized_stub, &index_put_kernel_quantized_cuda); +REGISTER_CUDA_DISPATCH(index_put_kernel_quantized_stub, &index_put_kernel_quantized_cuda) } // namespace at::native diff --git a/aten/src/ATen/native/cuda/Indexing.cu b/aten/src/ATen/native/cuda/Indexing.cu index 822525556bc3ff..09fdf9802784b3 100644 --- a/aten/src/ATen/native/cuda/Indexing.cu +++ b/aten/src/ATen/native/cuda/Indexing.cu @@ -684,7 +684,7 @@ void index_put_with_sort_kernel(Tensor & self, const c10::List>& indices, const Tensor & value, double scale, int zero_point, bool unsafe) { if (indices.size() > (size_t)self.dim()) { @@ -784,7 +784,7 @@ void index_put_with_sort_quantized(Tensor & self, const c10::List slow_conv_transpose2d_backward_cuda( return std::tuple(grad_input, grad_weight, grad_bias); } -REGISTER_CUDA_DISPATCH(slow_conv_transpose2d_backward_stub, &slow_conv_transpose2d_backward_cuda); +REGISTER_CUDA_DISPATCH(slow_conv_transpose2d_backward_stub, &slow_conv_transpose2d_backward_cuda) } // namespace at::native diff --git a/aten/src/ATen/native/cuda/NaiveConvolutionTranspose3d.cu b/aten/src/ATen/native/cuda/NaiveConvolutionTranspose3d.cu index 20f10ed3b264f1..ee3ea6b274523c 100644 --- a/aten/src/ATen/native/cuda/NaiveConvolutionTranspose3d.cu +++ b/aten/src/ATen/native/cuda/NaiveConvolutionTranspose3d.cu @@ -1011,6 +1011,6 @@ std::tuple slow_conv_transpose3d_backward_cuda( return std::tuple(grad_input, grad_weight, grad_bias); } -REGISTER_CUDA_DISPATCH(slow_conv_transpose3d_backward_stub, &slow_conv_transpose3d_backward_cuda); +REGISTER_CUDA_DISPATCH(slow_conv_transpose3d_backward_stub, &slow_conv_transpose3d_backward_cuda) } // namespace at::native diff --git a/aten/src/ATen/native/cuda/NaiveDilatedConvolution.cu b/aten/src/ATen/native/cuda/NaiveDilatedConvolution.cu index cd969fa9405bb4..24e6aaa519d252 100644 --- a/aten/src/ATen/native/cuda/NaiveDilatedConvolution.cu +++ b/aten/src/ATen/native/cuda/NaiveDilatedConvolution.cu @@ -608,7 +608,7 @@ std::tuple slow_conv_dilated3d_backward_cuda( return std::tie(grad_input, grad_weight, grad_bias); } -REGISTER_CUDA_DISPATCH(slow_conv_dilated2d_backward_stub, &slow_conv_dilated2d_backward_cuda); -REGISTER_CUDA_DISPATCH(slow_conv_dilated3d_backward_stub, &slow_conv_dilated3d_backward_cuda); +REGISTER_CUDA_DISPATCH(slow_conv_dilated2d_backward_stub, &slow_conv_dilated2d_backward_cuda) +REGISTER_CUDA_DISPATCH(slow_conv_dilated3d_backward_stub, &slow_conv_dilated3d_backward_cuda) } // namespace at::native diff --git a/aten/src/ATen/native/cuda/ReduceOps.cpp b/aten/src/ATen/native/cuda/ReduceOps.cpp index 0c998ba8d47b7b..29adecf3008d15 100644 --- a/aten/src/ATen/native/cuda/ReduceOps.cpp +++ b/aten/src/ATen/native/cuda/ReduceOps.cpp @@ -90,13 +90,13 @@ void aminmax_allreduce_kernel_impl(const Tensor& input, Tensor& min_result, Tens } // namespace (anonymous) -REGISTER_CUDA_DISPATCH(min_stub, &min_kernel_impl); -REGISTER_CUDA_DISPATCH(max_stub, &max_kernel_impl); -REGISTER_CUDA_DISPATCH(min_all_stub, &min_all_kernel_impl); -REGISTER_CUDA_DISPATCH(max_all_stub, &max_all_kernel_impl); -REGISTER_CUDA_DISPATCH(aminmax_allreduce_stub, &aminmax_allreduce_kernel_impl); -REGISTER_CUDA_DISPATCH(aminmax_stub, &aminmax_kernel_impl); - -REGISTER_CUDA_DISPATCH(norm_stub, &norm_kernel_cuda); +REGISTER_CUDA_DISPATCH(min_stub, &min_kernel_impl) +REGISTER_CUDA_DISPATCH(max_stub, &max_kernel_impl) +REGISTER_CUDA_DISPATCH(min_all_stub, &min_all_kernel_impl) +REGISTER_CUDA_DISPATCH(max_all_stub, &max_all_kernel_impl) +REGISTER_CUDA_DISPATCH(aminmax_allreduce_stub, &aminmax_allreduce_kernel_impl) +REGISTER_CUDA_DISPATCH(aminmax_stub, &aminmax_kernel_impl) + +REGISTER_CUDA_DISPATCH(norm_stub, &norm_kernel_cuda) } // namespace at::native diff --git a/aten/src/ATen/native/cuda/ScanKernels.cpp b/aten/src/ATen/native/cuda/ScanKernels.cpp index 463ceb23bade55..3f89c022e3c12e 100644 --- a/aten/src/ATen/native/cuda/ScanKernels.cpp +++ b/aten/src/ATen/native/cuda/ScanKernels.cpp @@ -109,7 +109,7 @@ void cumprod_cuda_kernel(const Tensor& result, const Tensor& self, int64_t dim) } } -REGISTER_CUDA_DISPATCH(cumsum_stub, &cumsum_cuda_kernel); -REGISTER_CUDA_DISPATCH(cumprod_stub, &cumprod_cuda_kernel); +REGISTER_CUDA_DISPATCH(cumsum_stub, &cumsum_cuda_kernel) +REGISTER_CUDA_DISPATCH(cumprod_stub, &cumprod_cuda_kernel) } // namespace at::native diff --git a/aten/src/ATen/native/cuda/Sort.cpp b/aten/src/ATen/native/cuda/Sort.cpp index 5f97b94bf2d29d..4605be8cdf187e 100644 --- a/aten/src/ATen/native/cuda/Sort.cpp +++ b/aten/src/ATen/native/cuda/Sort.cpp @@ -122,6 +122,6 @@ void sort_cuda_kernel( // TODO: we should handle this accordingly when we start using REGISTER_HIP_DISPATCH, // since REGISTER_DISPATCH won't work in this cpp file. // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) -REGISTER_CUDA_DISPATCH(sort_stub, &sort_cuda_kernel); +REGISTER_CUDA_DISPATCH(sort_stub, &sort_cuda_kernel) } // namespace at::native diff --git a/aten/src/ATen/native/cuda/SparseBinaryOpIntersectionKernel.cu b/aten/src/ATen/native/cuda/SparseBinaryOpIntersectionKernel.cu index 80a7422e317391..bf9738a3943a7b 100644 --- a/aten/src/ATen/native/cuda/SparseBinaryOpIntersectionKernel.cu +++ b/aten/src/ATen/native/cuda/SparseBinaryOpIntersectionKernel.cu @@ -204,8 +204,8 @@ void sparse_mask_projection_out_cuda_kernel( } -REGISTER_CUDA_DISPATCH(mul_sparse_sparse_out_stub, &mul_sparse_sparse_out_cuda_kernel); -REGISTER_CUDA_DISPATCH(sparse_mask_intersection_out_stub, &sparse_mask_intersection_out_cuda_kernel); -REGISTER_CUDA_DISPATCH(sparse_mask_projection_out_stub, &sparse_mask_projection_out_cuda_kernel); +REGISTER_CUDA_DISPATCH(mul_sparse_sparse_out_stub, &mul_sparse_sparse_out_cuda_kernel) +REGISTER_CUDA_DISPATCH(sparse_mask_intersection_out_stub, &sparse_mask_intersection_out_cuda_kernel) +REGISTER_CUDA_DISPATCH(sparse_mask_projection_out_stub, &sparse_mask_projection_out_cuda_kernel) } // namespace at::native diff --git a/aten/src/ATen/native/cuda/TensorCompare.cpp b/aten/src/ATen/native/cuda/TensorCompare.cpp index 1b4d7490b03dab..e2efb21a50585f 100644 --- a/aten/src/ATen/native/cuda/TensorCompare.cpp +++ b/aten/src/ATen/native/cuda/TensorCompare.cpp @@ -18,6 +18,6 @@ void isin_default_kernel_gpu( } // anonymous namespace -REGISTER_CUDA_DISPATCH(isin_default_stub, &isin_default_kernel_gpu); +REGISTER_CUDA_DISPATCH(isin_default_stub, &isin_default_kernel_gpu) } // namespace at::native diff --git a/aten/src/ATen/native/cuda/TensorModeKernel.cpp b/aten/src/ATen/native/cuda/TensorModeKernel.cpp index c5774f69d94330..b5615c18639e12 100644 --- a/aten/src/ATen/native/cuda/TensorModeKernel.cpp +++ b/aten/src/ATen/native/cuda/TensorModeKernel.cpp @@ -98,5 +98,5 @@ void mode_kernel_impl( } } -REGISTER_CUDA_DISPATCH(mode_stub, &mode_kernel_impl); +REGISTER_CUDA_DISPATCH(mode_stub, &mode_kernel_impl) } // namespace at::native diff --git a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp index 8d67b6dc080cdb..a9e007d41b3350 100644 --- a/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp +++ b/aten/src/ATen/native/cuda/linalg/BatchLinearAlgebra.cpp @@ -1454,7 +1454,7 @@ Tensor& cholesky_inverse_kernel_impl(Tensor &result, Tensor& infos, bool upper) } -REGISTER_CUDA_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl); +REGISTER_CUDA_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl) // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lu ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1670,7 +1670,7 @@ static void lu_factor(const Tensor& input, const Tensor& pivots, const Tensor& i } } -REGISTER_CUDA_DISPATCH(lu_factor_stub, &lu_factor); +REGISTER_CUDA_DISPATCH(lu_factor_stub, &lu_factor) // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ triangular_solve ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1764,7 +1764,7 @@ void triangular_solve_kernel(const Tensor& A, const Tensor& B, bool left, bool u } } -REGISTER_CUDA_DISPATCH(triangular_solve_stub, &triangular_solve_kernel); +REGISTER_CUDA_DISPATCH(triangular_solve_stub, &triangular_solve_kernel) // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ orgqr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1782,7 +1782,7 @@ Tensor& orgqr_kernel_impl(Tensor& result, const Tensor& tau) { #endif } -REGISTER_CUDA_DISPATCH(orgqr_stub, &orgqr_kernel_impl); +REGISTER_CUDA_DISPATCH(orgqr_stub, &orgqr_kernel_impl) void ormqr_kernel(const Tensor& input, const Tensor& tau, const Tensor& other, bool left, bool transpose) { #ifdef USE_LINALG_SOLVER @@ -1794,7 +1794,7 @@ void ormqr_kernel(const Tensor& input, const Tensor& tau, const Tensor& other, b #endif } -REGISTER_CUDA_DISPATCH(ormqr_stub, &ormqr_kernel); +REGISTER_CUDA_DISPATCH(ormqr_stub, &ormqr_kernel) // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ qr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1878,7 +1878,7 @@ void geqrf_kernel(const Tensor& input, const Tensor& tau) { #endif } -REGISTER_CUDA_DISPATCH(geqrf_stub, &geqrf_kernel); +REGISTER_CUDA_DISPATCH(geqrf_stub, &geqrf_kernel) template static void apply_magma_eigh(const Tensor& values, const Tensor& vectors, const Tensor& infos, bool upper, bool compute_eigenvectors) { @@ -2007,7 +2007,7 @@ void linalg_eigh_kernel(const Tensor& eigenvalues, const Tensor& eigenvectors, c #endif } -REGISTER_CUDA_DISPATCH(linalg_eigh_stub, &linalg_eigh_kernel); +REGISTER_CUDA_DISPATCH(linalg_eigh_stub, &linalg_eigh_kernel) // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ linalg_eig ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2093,7 +2093,7 @@ void linalg_eig_kernel(Tensor& eigenvalues, Tensor& eigenvectors, Tensor& infos, }); } -REGISTER_CUDA_DISPATCH(linalg_eig_stub, &linalg_eig_kernel); +REGISTER_CUDA_DISPATCH(linalg_eig_stub, &linalg_eig_kernel) // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ svd ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2579,7 +2579,7 @@ if (n <= 8) { #endif // ifdef USE_LINALG_SOLVER } -REGISTER_CUDA_DISPATCH(lu_solve_stub, &lu_solve_kernel); +REGISTER_CUDA_DISPATCH(lu_solve_stub, &lu_solve_kernel) // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ lstsq ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -2761,7 +2761,7 @@ void lstsq_kernel(const Tensor& a, Tensor& b, Tensor& /*rank*/, Tensor& /*singul } } -REGISTER_CUDA_DISPATCH(lstsq_stub, &lstsq_kernel); +REGISTER_CUDA_DISPATCH(lstsq_stub, &lstsq_kernel) #if defined(BUILD_LAZY_CUDA_LINALG) diff --git a/aten/src/ATen/native/cudnn/RNN.cpp b/aten/src/ATen/native/cudnn/RNN.cpp index f6526acaa61f64..f1219d2f5eeda4 100644 --- a/aten/src/ATen/native/cudnn/RNN.cpp +++ b/aten/src/ATen/native/cudnn/RNN.cpp @@ -2627,59 +2627,59 @@ std::pair _cudnn_impl( std::get<1>(cudnn_output), std::get<2>(cudnn_output))}; } -#define ONE_HIDDEN_RNN(NAME, MODE) \ - void NAME##_cudnn( \ - Tensor& output, \ - Tensor& hy, \ - const Tensor& input, \ - const Tensor& hx, \ - TensorList params, \ - bool has_biases, \ - int64_t num_layers, \ - double dropout_p, \ - bool train, \ - bool bidirectional, \ - bool batch_first) { \ - std::tie(output, hy) = _cudnn_impl( \ - input, \ - hx, \ - params, \ - has_biases, \ - MODE, \ - num_layers, \ - dropout_p, \ - train, \ - bidirectional, \ - batch_first); \ - } \ - \ - void NAME##_packed_cudnn( \ - Tensor& output, \ - Tensor& hy, \ - const Tensor& data, \ - const Tensor& batch_sizes, \ - const Tensor& hx, \ - TensorList params, \ - bool has_biases, \ - int64_t num_layers, \ - double dropout_p, \ - bool train, \ - bool bidirectional) { \ - std::tie(output, hy) = _cudnn_impl( \ - data, \ - batch_sizes, \ - hx, \ - params, \ - has_biases, \ - MODE, \ - num_layers, \ - dropout_p, \ - train, \ - bidirectional); \ - } \ - \ - REGISTER_CUDA_DISPATCH(NAME##_cudnn_stub, &NAME##_cudnn); \ - REGISTER_CUDA_DISPATCH(NAME##_packed_cudnn_stub, &NAME##_packed_cudnn); +#define ONE_HIDDEN_RNN(NAME, MODE) \ + void NAME##_cudnn( \ + Tensor& output, \ + Tensor& hy, \ + const Tensor& input, \ + const Tensor& hx, \ + TensorList params, \ + bool has_biases, \ + int64_t num_layers, \ + double dropout_p, \ + bool train, \ + bool bidirectional, \ + bool batch_first) { \ + std::tie(output, hy) = _cudnn_impl( \ + input, \ + hx, \ + params, \ + has_biases, \ + MODE, \ + num_layers, \ + dropout_p, \ + train, \ + bidirectional, \ + batch_first); \ + } \ + \ + void NAME##_packed_cudnn( \ + Tensor& output, \ + Tensor& hy, \ + const Tensor& data, \ + const Tensor& batch_sizes, \ + const Tensor& hx, \ + TensorList params, \ + bool has_biases, \ + int64_t num_layers, \ + double dropout_p, \ + bool train, \ + bool bidirectional) { \ + std::tie(output, hy) = _cudnn_impl( \ + data, \ + batch_sizes, \ + hx, \ + params, \ + has_biases, \ + MODE, \ + num_layers, \ + dropout_p, \ + train, \ + bidirectional); \ + } \ + \ + REGISTER_CUDA_DISPATCH(NAME##_cudnn_stub, &NAME##_cudnn) \ + REGISTER_CUDA_DISPATCH(NAME##_packed_cudnn_stub, &NAME##_packed_cudnn) ONE_HIDDEN_RNN(gru, CUDNN_GRU) ONE_HIDDEN_RNN(rnn_tanh, CUDNN_RNN_TANH) @@ -2743,8 +2743,8 @@ void lstm_packed_cudnn( cy = std::get<1>(result.second); } -REGISTER_CUDA_DISPATCH(lstm_cudnn_stub, &lstm_cudnn); -REGISTER_CUDA_DISPATCH(lstm_packed_cudnn_stub, &lstm_packed_cudnn); +REGISTER_CUDA_DISPATCH(lstm_cudnn_stub, &lstm_cudnn) +REGISTER_CUDA_DISPATCH(lstm_packed_cudnn_stub, &lstm_packed_cudnn) } // namespace diff --git a/aten/src/ATen/native/miopen/Conv_miopen.cpp b/aten/src/ATen/native/miopen/Conv_miopen.cpp index 35a725687574e9..45f8b3f64e8496 100644 --- a/aten/src/ATen/native/miopen/Conv_miopen.cpp +++ b/aten/src/ATen/native/miopen/Conv_miopen.cpp @@ -1696,9 +1696,9 @@ Tensor miopen_convolution_relu( } } -REGISTER_CUDA_DISPATCH(miopen_convolution_backward_stub, &miopen_convolution_backward); -REGISTER_CUDA_DISPATCH(miopen_convolution_transpose_backward_stub, &miopen_convolution_transpose_backward); -REGISTER_CUDA_DISPATCH(miopen_depthwise_convolution_backward_stub, &miopen_depthwise_convolution_backward); +REGISTER_CUDA_DISPATCH(miopen_convolution_backward_stub, &miopen_convolution_backward) +REGISTER_CUDA_DISPATCH(miopen_convolution_transpose_backward_stub, &miopen_convolution_transpose_backward) +REGISTER_CUDA_DISPATCH(miopen_depthwise_convolution_backward_stub, &miopen_depthwise_convolution_backward) }} // namespace diff --git a/aten/src/ATen/native/miopen/RNN_miopen.cpp b/aten/src/ATen/native/miopen/RNN_miopen.cpp index 8ac986b1af6478..a21cc3f4d4db61 100644 --- a/aten/src/ATen/native/miopen/RNN_miopen.cpp +++ b/aten/src/ATen/native/miopen/RNN_miopen.cpp @@ -876,8 +876,8 @@ void NAME##_packed_miopen(Tensor& output, Tensor& hy, \ has_biases, MODE, num_layers, dropout_p, train, bidirectional); \ } \ \ -REGISTER_CUDA_DISPATCH(NAME##_miopen_stub, &NAME##_miopen); \ -REGISTER_CUDA_DISPATCH(NAME##_packed_miopen_stub, &NAME##_packed_miopen); +REGISTER_CUDA_DISPATCH(NAME##_miopen_stub, &NAME##_miopen) \ +REGISTER_CUDA_DISPATCH(NAME##_packed_miopen_stub, &NAME##_packed_miopen) ONE_HIDDEN_RNN(gru, miopenGRU) ONE_HIDDEN_RNN(rnn_tanh, miopenRNNTANH) @@ -905,8 +905,8 @@ void lstm_packed_miopen(Tensor& output, Tensor& hy, Tensor& cy, cy = std::get<1>(result.second); } -REGISTER_CUDA_DISPATCH(lstm_miopen_stub, &lstm_miopen); -REGISTER_CUDA_DISPATCH(lstm_packed_miopen_stub, &lstm_packed_miopen); +REGISTER_CUDA_DISPATCH(lstm_miopen_stub, &lstm_miopen) +REGISTER_CUDA_DISPATCH(lstm_packed_miopen_stub, &lstm_packed_miopen) } // anonymous namespace }} //namespace native. diff --git a/aten/src/ATen/native/mkl/SpectralOps.cpp b/aten/src/ATen/native/mkl/SpectralOps.cpp index 7fa9234e0fe8d3..8d82895a9867fc 100644 --- a/aten/src/ATen/native/mkl/SpectralOps.cpp +++ b/aten/src/ATen/native/mkl/SpectralOps.cpp @@ -575,7 +575,7 @@ Tensor _fft_c2c_mkl(const Tensor& self, IntArrayRef dim, int64_t normalization, #else namespace at { namespace native { -REGISTER_NO_CPU_DISPATCH(fft_fill_with_conjugate_symmetry_stub); +REGISTER_NO_CPU_DISPATCH(fft_fill_with_conjugate_symmetry_stub) Tensor _fft_c2r_mkl(const Tensor& self, IntArrayRef dim, int64_t normalization, int64_t last_dim_size) { TORCH_CHECK(false, "fft: ATen not compiled with FFT support"); diff --git a/aten/src/ATen/native/mkldnn/Conv.cpp b/aten/src/ATen/native/mkldnn/Conv.cpp index d8e62063cbd991..9bc382701cc49c 100644 --- a/aten/src/ATen/native/mkldnn/Conv.cpp +++ b/aten/src/ATen/native/mkldnn/Conv.cpp @@ -28,9 +28,9 @@ Tensor mkldnn_convolution( TORCH_CHECK(false, "mkldnn_convolution_forward: ATen not compiled with MKLDNN support"); } -REGISTER_NO_CPU_DISPATCH(mkldnn_convolution_backward_stub); -REGISTER_NO_CPU_DISPATCH(mkldnn_convolution_transpose_stub); -REGISTER_NO_CPU_DISPATCH(mkldnn_convolution_transpose_backward_stub); +REGISTER_NO_CPU_DISPATCH(mkldnn_convolution_backward_stub) +REGISTER_NO_CPU_DISPATCH(mkldnn_convolution_transpose_stub) +REGISTER_NO_CPU_DISPATCH(mkldnn_convolution_transpose_backward_stub) }} @@ -891,7 +891,7 @@ Tensor mkldnn_convolution_transpose_pointwise( ); } -REGISTER_ALL_CPU_DISPATCH(mkldnn_convolution_backward_stub, &mkldnn_convolution_backward); +REGISTER_ALL_CPU_DISPATCH(mkldnn_convolution_backward_stub, &mkldnn_convolution_backward) namespace{ Tensor mkldnn_convolution_transpose( @@ -1044,8 +1044,8 @@ std::tuple mkldnn_convolution_transpose_backward( } } -REGISTER_ALL_CPU_DISPATCH(mkldnn_convolution_transpose_stub, &mkldnn_convolution_transpose); -REGISTER_ALL_CPU_DISPATCH(mkldnn_convolution_transpose_backward_stub, &mkldnn_convolution_transpose_backward); +REGISTER_ALL_CPU_DISPATCH(mkldnn_convolution_transpose_stub, &mkldnn_convolution_transpose) +REGISTER_ALL_CPU_DISPATCH(mkldnn_convolution_transpose_backward_stub, &mkldnn_convolution_transpose_backward) TORCH_LIBRARY_IMPL(mkldnn, CPU, m) { m.impl( diff --git a/aten/src/ATen/native/mkldnn/RNN.cpp b/aten/src/ATen/native/mkldnn/RNN.cpp index 3931799e89bb03..883ea6e37f954b 100644 --- a/aten/src/ATen/native/mkldnn/RNN.cpp +++ b/aten/src/ATen/native/mkldnn/RNN.cpp @@ -71,7 +71,7 @@ std::tuple mkldnn_rnn_la TORCH_CHECK(false, "mkldnn_rnn_layer_backward: ATen not compiled with MKLDNN support"); } -REGISTER_NO_CPU_DISPATCH(lstm_mkldnn_stub); +REGISTER_NO_CPU_DISPATCH(lstm_mkldnn_stub) } // namespace at::native diff --git a/aten/src/ATen/native/mkldnn/Utils.cpp b/aten/src/ATen/native/mkldnn/Utils.cpp index 6578b23ff9c928..0d040323cde746 100644 --- a/aten/src/ATen/native/mkldnn/Utils.cpp +++ b/aten/src/ATen/native/mkldnn/Utils.cpp @@ -154,14 +154,14 @@ const std::map& fusion_unary_attr_map() { {"gelu", attr_func_gelu}, }; return fusion_attr_map; -}; +} const std::map& fusion_unary_alg_map() { static const std::map fusion_attr_map{ {"relu", {ideep::algorithm::eltwise_relu}}, }; return fusion_attr_map; -}; +} const std::map& fusion_binary_alg_map() { static const std::map fusion_attr_map{ @@ -171,7 +171,7 @@ const std::map& fusion_binary_alg_map() { {"div", {ideep::algorithm::binary_div}}, }; return fusion_attr_map; -}; +} #endif // AT_MKLDNN_ENABLED() }} diff --git a/aten/src/ATen/native/mps/operations/BitwiseOps.mm b/aten/src/ATen/native/mps/operations/BitwiseOps.mm index ab37e785d176cf..63b21dd6891160 100644 --- a/aten/src/ATen/native/mps/operations/BitwiseOps.mm +++ b/aten/src/ATen/native/mps/operations/BitwiseOps.mm @@ -356,7 +356,7 @@ void rshift_kernel_mps(TensorIteratorBase& iter) { mps::_bitwise_not_out_mps(self, output); } -REGISTER_MPS_DISPATCH(lshift_stub, &lshift_kernel_mps); -REGISTER_MPS_DISPATCH(rshift_stub, &rshift_kernel_mps); +REGISTER_MPS_DISPATCH(lshift_stub, &lshift_kernel_mps) +REGISTER_MPS_DISPATCH(rshift_stub, &rshift_kernel_mps) } // namespace at::native diff --git a/aten/src/ATen/native/nested/NestedTensorBinaryOps.cpp b/aten/src/ATen/native/nested/NestedTensorBinaryOps.cpp index 49048325e3e771..9c99185848b32b 100644 --- a/aten/src/ATen/native/nested/NestedTensorBinaryOps.cpp +++ b/aten/src/ATen/native/nested/NestedTensorBinaryOps.cpp @@ -17,7 +17,7 @@ namespace at::native { DEFINE_DISPATCH(nested_dense_elementwise_stub); -REGISTER_NO_CPU_DISPATCH(nested_dense_elementwise_stub); +REGISTER_NO_CPU_DISPATCH(nested_dense_elementwise_stub) std::pair static get_elementwise_nested_tensor_impl( diff --git a/aten/src/ATen/native/nested/cuda/NestedTensorBinaryOps.cu b/aten/src/ATen/native/nested/cuda/NestedTensorBinaryOps.cu index 350c3a27e77b04..e6242956424225 100644 --- a/aten/src/ATen/native/nested/cuda/NestedTensorBinaryOps.cu +++ b/aten/src/ATen/native/nested/cuda/NestedTensorBinaryOps.cu @@ -114,7 +114,7 @@ void _nested_op_dense_esuhm_cuda(Tensor& result, const Tensor& self, const Tenso }); } -REGISTER_CUDA_DISPATCH(nested_dense_elementwise_stub, &_nested_op_dense_esuhm_cuda); +REGISTER_CUDA_DISPATCH(nested_dense_elementwise_stub, &_nested_op_dense_esuhm_cuda) } // namespace native } // namespace at diff --git a/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp b/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp index e115fb5c06f67c..fc95e990a68fce 100644 --- a/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp +++ b/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp @@ -4264,21 +4264,21 @@ void index_put_kernel_quantized_cpu(TensorIterator& iter, IntArrayRef index_size // AVX2 kernels would be used instead. Ref: GH 56992. #if defined(_WIN32) REGISTER_DISPATCH(dequantize_tensor_per_channel_affine_stub, - &dequantize_tensor_per_channel_affine_cpu); + &dequantize_tensor_per_channel_affine_cpu) REGISTER_DISPATCH(dequantize_tensor_per_channel_float_qparams_stub, - &dequantize_tensor_per_channel_float_qparams_cpu); + &dequantize_tensor_per_channel_float_qparams_cpu) REGISTER_DISPATCH(fake_quant_per_channel_cachemask_stub, - &fake_quant_per_channel_cachemask_cpu); + &fake_quant_per_channel_cachemask_cpu) REGISTER_DISPATCH(qavg_pool2d_nhwc_stub, &qavg_pool2d_nhwc_kernel) REGISTER_DISPATCH(qavg_pool3d_nhwc_stub, &qavg_pool3d_nhwc_kernel) #else // These kernels are dispatched to AVX512 ALSO_REGISTER_AVX512_DISPATCH(dequantize_tensor_per_channel_affine_stub, - &dequantize_tensor_per_channel_affine_cpu); + &dequantize_tensor_per_channel_affine_cpu) ALSO_REGISTER_AVX512_DISPATCH(dequantize_tensor_per_channel_float_qparams_stub, - &dequantize_tensor_per_channel_float_qparams_cpu); + &dequantize_tensor_per_channel_float_qparams_cpu) ALSO_REGISTER_AVX512_DISPATCH(fake_quant_per_channel_cachemask_stub, - &fake_quant_per_channel_cachemask_cpu); + &fake_quant_per_channel_cachemask_cpu) ALSO_REGISTER_AVX512_DISPATCH(qavg_pool2d_nhwc_stub, &qavg_pool2d_nhwc_kernel) ALSO_REGISTER_AVX512_DISPATCH(qavg_pool3d_nhwc_stub, &qavg_pool3d_nhwc_kernel) #endif // CPU_CAPABILITY_AVX512 && _WIN32 @@ -4286,17 +4286,17 @@ ALSO_REGISTER_AVX512_DISPATCH(qavg_pool3d_nhwc_stub, &qavg_pool3d_nhwc_kernel) // The kernels below are dispatched to AVX2 because they don't perform as well // with AVX512. We might revisit this decision in the near future. REGISTER_DISPATCH(dequantize_tensor_per_tensor_affine_stub, - &dequantize_tensor_per_tensor_affine_cpu); + &dequantize_tensor_per_tensor_affine_cpu) REGISTER_DISPATCH(fake_quant_grad_learnable_tensor_stub, - &fake_quantize_learnable_tensor_grad_kernel_cpu); + &fake_quantize_learnable_tensor_grad_kernel_cpu) REGISTER_DISPATCH(fake_quant_tensor_cachemask_stub, - &fake_quantize_tensor_cachemask_kernel); + &fake_quantize_tensor_cachemask_kernel) REGISTER_DISPATCH(fake_quant_tensor_cachemask_tensor_qparams_stub, - &fake_quantize_tensor_cachemask_tensor_qparams_kernel); + &fake_quantize_tensor_cachemask_tensor_qparams_kernel) REGISTER_DISPATCH(qadaptive_avg_pool2d_nhwc_stub, - &qadaptive_avg_pool2d_nhwc_kernel); + &qadaptive_avg_pool2d_nhwc_kernel) REGISTER_DISPATCH(qadaptive_avg_pool3d_ndhwc_stub, - &qadaptive_avg_pool3d_ndhwc_kernel); + &qadaptive_avg_pool3d_ndhwc_kernel) REGISTER_DISPATCH(qadd_relu_stub, &qadd_kernel) REGISTER_DISPATCH(qadd_scalar_relu_stub, &qadd_scalar_kernel) REGISTER_DISPATCH(qadd_scalar_stub, &qadd_scalar_kernel) @@ -4325,32 +4325,32 @@ REGISTER_DISPATCH(qtanh_stub, &qtanh_kernel) REGISTER_DISPATCH(qthreshold_stub, &qthreshold_kernel) REGISTER_DISPATCH(qtopk_stub, &qtopk_kernel) REGISTER_DISPATCH(fake_quant_grad_learnable_channel_stub, - &fake_quantize_learnable_channel_grad_kernel_cpu); + &fake_quantize_learnable_channel_grad_kernel_cpu) REGISTER_DISPATCH( quantize_tensor_per_tensor_affine_stub, - &quantize_tensor_per_tensor_affine_cpu); + &quantize_tensor_per_tensor_affine_cpu) REGISTER_DISPATCH( quantize_tensor_per_channel_affine_stub, - &quantize_tensor_per_channel_affine_cpu); + &quantize_tensor_per_channel_affine_cpu) REGISTER_DISPATCH( quantize_tensor_per_channel_float_qparams_stub, - &quantize_tensor_per_channel_float_qparams_cpu); + &quantize_tensor_per_channel_float_qparams_cpu) REGISTER_DISPATCH(quantized_normalize_stub, &quantized_normalize_kernel) REGISTER_DISPATCH(quantized_groupnorm_nhwc_stub, &quantized_groupnorm_nhwc_kernel) REGISTER_DISPATCH(qupsample_bilinear2d_nhwc_stub, - &qupsample_bilinear2d_nhwc_kernel); + &qupsample_bilinear2d_nhwc_kernel) REGISTER_DISPATCH( quantize_tensor_per_tensor_affine_sub_byte_stub, - &quantize_tensor_per_tensor_affine_sub_byte_cpu); + &quantize_tensor_per_tensor_affine_sub_byte_cpu) REGISTER_DISPATCH( dequantize_tensor_per_tensor_affine_sub_byte_stub, - &dequantize_tensor_per_tensor_affine_sub_byte_cpu); + &dequantize_tensor_per_tensor_affine_sub_byte_cpu) REGISTER_DISPATCH( masked_fill_kernel_quantized_stub, - &masked_fill_kernel_quantized_cpu); + &masked_fill_kernel_quantized_cpu) REGISTER_DISPATCH( index_put_kernel_quantized_stub, - &index_put_kernel_quantized_cpu); + &index_put_kernel_quantized_cpu) REGISTER_DISPATCH(qmean_inner_dim_stub, &qmean_inner_dim_kernel) REGISTER_DISPATCH(qstd_inner_dim_stub, &qstd_inner_dim_kernel) } // namespace at::native diff --git a/aten/src/ATen/native/sparse/FlattenIndicesKernel.cpp b/aten/src/ATen/native/sparse/FlattenIndicesKernel.cpp index 90d3e9cce6734f..35e3ebaa9f8b64 100644 --- a/aten/src/ATen/native/sparse/FlattenIndicesKernel.cpp +++ b/aten/src/ATen/native/sparse/FlattenIndicesKernel.cpp @@ -22,11 +22,11 @@ Tensor flatten_indices_cpu_kernel(const Tensor& indices, IntArrayRef size) { } -REGISTER_ARCH_DISPATCH(flatten_indices_stub, DEFAULT, &flatten_indices_cpu_kernel); -REGISTER_AVX512_DISPATCH(flatten_indices_stub, &flatten_indices_cpu_kernel); -REGISTER_AVX2_DISPATCH(flatten_indices_stub, &flatten_indices_cpu_kernel); -REGISTER_VSX_DISPATCH(flatten_indices_stub, &flatten_indices_cpu_kernel); -REGISTER_ZVECTOR_DISPATCH(flatten_indices_stub, &flatten_indices_cpu_kernel); -REGISTER_SVE256_DISPATCH(flatten_indices_stub, &flatten_indices_cpu_kernel); +REGISTER_ARCH_DISPATCH(flatten_indices_stub, DEFAULT, &flatten_indices_cpu_kernel) +REGISTER_AVX512_DISPATCH(flatten_indices_stub, &flatten_indices_cpu_kernel) +REGISTER_AVX2_DISPATCH(flatten_indices_stub, &flatten_indices_cpu_kernel) +REGISTER_VSX_DISPATCH(flatten_indices_stub, &flatten_indices_cpu_kernel) +REGISTER_ZVECTOR_DISPATCH(flatten_indices_stub, &flatten_indices_cpu_kernel) +REGISTER_SVE256_DISPATCH(flatten_indices_stub, &flatten_indices_cpu_kernel) } // namespace at::native diff --git a/aten/src/ATen/native/sparse/SparseBinaryOpIntersectionKernel.cpp b/aten/src/ATen/native/sparse/SparseBinaryOpIntersectionKernel.cpp index e86d5c46a795fa..20a44c87093994 100644 --- a/aten/src/ATen/native/sparse/SparseBinaryOpIntersectionKernel.cpp +++ b/aten/src/ATen/native/sparse/SparseBinaryOpIntersectionKernel.cpp @@ -156,24 +156,24 @@ void sparse_mask_projection_out_cpu_kernel( } -REGISTER_ARCH_DISPATCH(mul_sparse_sparse_out_stub, DEFAULT, &mul_sparse_sparse_out_cpu_kernel); -REGISTER_AVX512_DISPATCH(mul_sparse_sparse_out_stub, &mul_sparse_sparse_out_cpu_kernel); -REGISTER_AVX2_DISPATCH(mul_sparse_sparse_out_stub, &mul_sparse_sparse_out_cpu_kernel); -REGISTER_VSX_DISPATCH(mul_sparse_sparse_out_stub, &mul_sparse_sparse_out_cpu_kernel); -REGISTER_ZVECTOR_DISPATCH(mul_sparse_sparse_out_stub, &mul_sparse_sparse_out_cpu_kernel); -REGISTER_SVE256_DISPATCH(mul_sparse_sparse_out_stub, &mul_sparse_sparse_out_cpu_kernel); - -REGISTER_ARCH_DISPATCH(sparse_mask_intersection_out_stub, DEFAULT, &sparse_mask_intersection_out_cpu_kernel); -REGISTER_AVX512_DISPATCH(sparse_mask_intersection_out_stub, &sparse_mask_intersection_out_cpu_kernel); -REGISTER_AVX2_DISPATCH(sparse_mask_intersection_out_stub, &sparse_mask_intersection_out_cpu_kernel); -REGISTER_VSX_DISPATCH(sparse_mask_intersection_out_stub, &sparse_mask_intersection_out_cpu_kernel); -REGISTER_ZVECTOR_DISPATCH(sparse_mask_intersection_out_stub, &sparse_mask_intersection_out_cpu_kernel); -REGISTER_SVE256_DISPATCH(sparse_mask_intersection_out_stub, &sparse_mask_intersection_out_cpu_kernel); - -REGISTER_ARCH_DISPATCH(sparse_mask_projection_out_stub, DEFAULT, &sparse_mask_projection_out_cpu_kernel); -REGISTER_AVX512_DISPATCH(sparse_mask_projection_out_stub, &sparse_mask_projection_out_cpu_kernel); -REGISTER_AVX2_DISPATCH(sparse_mask_projection_out_stub, &sparse_mask_projection_out_cpu_kernel); -REGISTER_VSX_DISPATCH(sparse_mask_projection_out_stub, &sparse_mask_projection_out_cpu_kernel); -REGISTER_ZVECTOR_DISPATCH(sparse_mask_projection_out_stub, &sparse_mask_projection_out_cpu_kernel); -REGISTER_SVE256_DISPATCH(sparse_mask_projection_out_stub, &sparse_mask_projection_out_cpu_kernel); +REGISTER_ARCH_DISPATCH(mul_sparse_sparse_out_stub, DEFAULT, &mul_sparse_sparse_out_cpu_kernel) +REGISTER_AVX512_DISPATCH(mul_sparse_sparse_out_stub, &mul_sparse_sparse_out_cpu_kernel) +REGISTER_AVX2_DISPATCH(mul_sparse_sparse_out_stub, &mul_sparse_sparse_out_cpu_kernel) +REGISTER_VSX_DISPATCH(mul_sparse_sparse_out_stub, &mul_sparse_sparse_out_cpu_kernel) +REGISTER_ZVECTOR_DISPATCH(mul_sparse_sparse_out_stub, &mul_sparse_sparse_out_cpu_kernel) +REGISTER_SVE256_DISPATCH(mul_sparse_sparse_out_stub, &mul_sparse_sparse_out_cpu_kernel) + +REGISTER_ARCH_DISPATCH(sparse_mask_intersection_out_stub, DEFAULT, &sparse_mask_intersection_out_cpu_kernel) +REGISTER_AVX512_DISPATCH(sparse_mask_intersection_out_stub, &sparse_mask_intersection_out_cpu_kernel) +REGISTER_AVX2_DISPATCH(sparse_mask_intersection_out_stub, &sparse_mask_intersection_out_cpu_kernel) +REGISTER_VSX_DISPATCH(sparse_mask_intersection_out_stub, &sparse_mask_intersection_out_cpu_kernel) +REGISTER_ZVECTOR_DISPATCH(sparse_mask_intersection_out_stub, &sparse_mask_intersection_out_cpu_kernel) +REGISTER_SVE256_DISPATCH(sparse_mask_intersection_out_stub, &sparse_mask_intersection_out_cpu_kernel) + +REGISTER_ARCH_DISPATCH(sparse_mask_projection_out_stub, DEFAULT, &sparse_mask_projection_out_cpu_kernel) +REGISTER_AVX512_DISPATCH(sparse_mask_projection_out_stub, &sparse_mask_projection_out_cpu_kernel) +REGISTER_AVX2_DISPATCH(sparse_mask_projection_out_stub, &sparse_mask_projection_out_cpu_kernel) +REGISTER_VSX_DISPATCH(sparse_mask_projection_out_stub, &sparse_mask_projection_out_cpu_kernel) +REGISTER_ZVECTOR_DISPATCH(sparse_mask_projection_out_stub, &sparse_mask_projection_out_cpu_kernel) +REGISTER_SVE256_DISPATCH(sparse_mask_projection_out_stub, &sparse_mask_projection_out_cpu_kernel) } diff --git a/aten/src/ATen/native/sparse/SparseCsrTensor.cpp b/aten/src/ATen/native/sparse/SparseCsrTensor.cpp index e9552802082d8c..ca5447c6a80892 100644 --- a/aten/src/ATen/native/sparse/SparseCsrTensor.cpp +++ b/aten/src/ATen/native/sparse/SparseCsrTensor.cpp @@ -512,10 +512,10 @@ Tensor _sparse_compressed_tensor_unsafe_template(const Tensor& compressed_indice return _sparse_compressed_tensor_unsafe_template(compressed_indices, plain_indices, values, size, dtype, layout, device, pin_memory); \ } -SPARSE_COMPRESSED_TENSOR_UNSAFE(csr, kSparseCsr); -SPARSE_COMPRESSED_TENSOR_UNSAFE(csc, kSparseCsc); -SPARSE_COMPRESSED_TENSOR_UNSAFE(bsr, kSparseBsr); -SPARSE_COMPRESSED_TENSOR_UNSAFE(bsc, kSparseBsc); +SPARSE_COMPRESSED_TENSOR_UNSAFE(csr, kSparseCsr) +SPARSE_COMPRESSED_TENSOR_UNSAFE(csc, kSparseCsc) +SPARSE_COMPRESSED_TENSOR_UNSAFE(bsr, kSparseBsr) +SPARSE_COMPRESSED_TENSOR_UNSAFE(bsc, kSparseBsc) static DimVector _estimate_sparse_compressed_tensor_size( const Tensor& compressed_indices, diff --git a/aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp b/aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp index 8ccf8788fc621d..e11e536b64b047 100644 --- a/aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp +++ b/aten/src/ATen/native/sparse/SparseCsrTensorMath.cpp @@ -433,42 +433,42 @@ Tensor& zero_sparse_csr_(Tensor& self) { } #define CREATE_UNARY_UFUNC(op_name) \ - CREATE_UNARY_UFUNC_OUT(op_name); \ - CREATE_UNARY_UFUNC_FUNCTIONAL(op_name); \ - CREATE_UNARY_UFUNC_INPLACE(op_name); + CREATE_UNARY_UFUNC_OUT(op_name) \ + CREATE_UNARY_UFUNC_FUNCTIONAL(op_name) \ + CREATE_UNARY_UFUNC_INPLACE(op_name) #define CREATE_UNARY_UFUNC_NO_INPLACE(op_name) \ - CREATE_UNARY_UFUNC_OUT(op_name); \ - CREATE_UNARY_UFUNC_FUNCTIONAL(op_name); + CREATE_UNARY_UFUNC_OUT(op_name) \ + CREATE_UNARY_UFUNC_FUNCTIONAL(op_name) // Exhaustive list of the unary ufuncs supported by sparse compressed -CREATE_UNARY_UFUNC(abs); -CREATE_UNARY_UFUNC(asin); -CREATE_UNARY_UFUNC(asinh); -CREATE_UNARY_UFUNC(atan); -CREATE_UNARY_UFUNC(atanh); -CREATE_UNARY_UFUNC(ceil); -CREATE_UNARY_UFUNC(deg2rad); -CREATE_UNARY_UFUNC(erf); -CREATE_UNARY_UFUNC(erfinv); -CREATE_UNARY_UFUNC(expm1); -CREATE_UNARY_UFUNC(floor); -CREATE_UNARY_UFUNC(frac); -CREATE_UNARY_UFUNC(log1p); -CREATE_UNARY_UFUNC(neg); -CREATE_UNARY_UFUNC(rad2deg); -CREATE_UNARY_UFUNC(sign); -CREATE_UNARY_UFUNC(sin); -CREATE_UNARY_UFUNC(sinh); -CREATE_UNARY_UFUNC(sgn); -CREATE_UNARY_UFUNC(sqrt); -CREATE_UNARY_UFUNC(tan); -CREATE_UNARY_UFUNC(tanh); -CREATE_UNARY_UFUNC(trunc); -CREATE_UNARY_UFUNC(conj_physical); +CREATE_UNARY_UFUNC(abs) +CREATE_UNARY_UFUNC(asin) +CREATE_UNARY_UFUNC(asinh) +CREATE_UNARY_UFUNC(atan) +CREATE_UNARY_UFUNC(atanh) +CREATE_UNARY_UFUNC(ceil) +CREATE_UNARY_UFUNC(deg2rad) +CREATE_UNARY_UFUNC(erf) +CREATE_UNARY_UFUNC(erfinv) +CREATE_UNARY_UFUNC(expm1) +CREATE_UNARY_UFUNC(floor) +CREATE_UNARY_UFUNC(frac) +CREATE_UNARY_UFUNC(log1p) +CREATE_UNARY_UFUNC(neg) +CREATE_UNARY_UFUNC(rad2deg) +CREATE_UNARY_UFUNC(sign) +CREATE_UNARY_UFUNC(sin) +CREATE_UNARY_UFUNC(sinh) +CREATE_UNARY_UFUNC(sgn) +CREATE_UNARY_UFUNC(sqrt) +CREATE_UNARY_UFUNC(tan) +CREATE_UNARY_UFUNC(tanh) +CREATE_UNARY_UFUNC(trunc) +CREATE_UNARY_UFUNC(conj_physical) C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-function") -static CREATE_UNARY_UFUNC(relu); +static CREATE_UNARY_UFUNC(relu) C10_DIAGNOSTIC_POP() // With addition of `round.decimals` overload, using CREATE_UNARY_UFUNC leads @@ -512,14 +512,14 @@ Tensor& threshold_backward_sparse_compressed_out( } // angle, isneginf, isposinf and signbit currently don't have an inplace variant -CREATE_UNARY_UFUNC_NO_INPLACE(angle); -CREATE_UNARY_UFUNC_NO_INPLACE(isneginf); -CREATE_UNARY_UFUNC_NO_INPLACE(isposinf); -CREATE_UNARY_UFUNC_NO_INPLACE(signbit); +CREATE_UNARY_UFUNC_NO_INPLACE(angle) +CREATE_UNARY_UFUNC_NO_INPLACE(isneginf) +CREATE_UNARY_UFUNC_NO_INPLACE(isposinf) +CREATE_UNARY_UFUNC_NO_INPLACE(signbit) // isnan and isinf don't have an out variant -CREATE_UNARY_UFUNC_FUNCTIONAL(isnan); -CREATE_UNARY_UFUNC_FUNCTIONAL(isinf); +CREATE_UNARY_UFUNC_FUNCTIONAL(isnan) +CREATE_UNARY_UFUNC_FUNCTIONAL(isinf) template void addmm_out_sparse_csr_native_cpu( diff --git a/aten/src/ATen/native/sparse/SparseTensorMath.cpp b/aten/src/ATen/native/sparse/SparseTensorMath.cpp index 45ff374c4736fc..d1990924f93a7b 100644 --- a/aten/src/ATen/native/sparse/SparseTensorMath.cpp +++ b/aten/src/ATen/native/sparse/SparseTensorMath.cpp @@ -1230,7 +1230,7 @@ void s_addmm_out_sparse_dense_worker(int64_t nnz, int64_t dim_i, int64_t dim_j, } } } -}; +} static Tensor& s_addmm_out_sparse_dense_cpu( Tensor& r, diff --git a/aten/src/ATen/native/transformers/attention.cpp b/aten/src/ATen/native/transformers/attention.cpp index 91770447fab764..5becd2a782f7df 100644 --- a/aten/src/ATen/native/transformers/attention.cpp +++ b/aten/src/ATen/native/transformers/attention.cpp @@ -444,12 +444,12 @@ int64_t _fused_sdp_choice_cpp(const Tensor& query_, const Tensor& key, const Ten return static_cast(backend); } -REGISTER_ARCH_DISPATCH(_fused_sdp_choice_stub, DEFAULT, &_fused_sdp_choice_cpp); -REGISTER_AVX2_DISPATCH(_fused_sdp_choice_stub, &_fused_sdp_choice_cpp); -REGISTER_AVX512_DISPATCH(_fused_sdp_choice_stub, &_fused_sdp_choice_cpp); -REGISTER_VSX_DISPATCH(_fused_sdp_choice_stub, &_fused_sdp_choice_cpp); -REGISTER_ZVECTOR_DISPATCH(_fused_sdp_choice_stub, &_fused_sdp_choice_cpp); -REGISTER_SVE256_DISPATCH(_fused_sdp_choice_stub, &_fused_sdp_choice_cpp); +REGISTER_ARCH_DISPATCH(_fused_sdp_choice_stub, DEFAULT, &_fused_sdp_choice_cpp) +REGISTER_AVX2_DISPATCH(_fused_sdp_choice_stub, &_fused_sdp_choice_cpp) +REGISTER_AVX512_DISPATCH(_fused_sdp_choice_stub, &_fused_sdp_choice_cpp) +REGISTER_VSX_DISPATCH(_fused_sdp_choice_stub, &_fused_sdp_choice_cpp) +REGISTER_ZVECTOR_DISPATCH(_fused_sdp_choice_stub, &_fused_sdp_choice_cpp) +REGISTER_SVE256_DISPATCH(_fused_sdp_choice_stub, &_fused_sdp_choice_cpp) int64_t _fused_sdp_choice_meta( const Tensor& query_, diff --git a/aten/src/ATen/native/transformers/cuda/attention.cu b/aten/src/ATen/native/transformers/cuda/attention.cu index 2a0974bcfad591..5a8e7c6ce57789 100644 --- a/aten/src/ATen/native/transformers/cuda/attention.cu +++ b/aten/src/ATen/native/transformers/cuda/attention.cu @@ -1387,7 +1387,7 @@ Tensor triton_scaled_dot_attention(const Tensor& q, const Tensor& k, const Tenso return at::Tensor(); } -REGISTER_CUDA_DISPATCH(_fused_sdp_choice_stub, &_fused_sdp_choice_cuda); +REGISTER_CUDA_DISPATCH(_fused_sdp_choice_stub, &_fused_sdp_choice_cuda) #if defined(USE_MEM_EFF_ATTENTION) and !defined(USE_ROCM) namespace { diff --git a/caffe2/perfkernels/embedding_lookup_idx.cc b/caffe2/perfkernels/embedding_lookup_idx.cc index 76be1201d589ea..db0f4468399023 100644 --- a/caffe2/perfkernels/embedding_lookup_idx.cc +++ b/caffe2/perfkernels/embedding_lookup_idx.cc @@ -227,23 +227,23 @@ static bool EmbeddingLookupGenericSlowIdx( } // clang-format on -EMBEDDING_IDX_SPECIALIZATION(int32_t, float, float, float, false); -EMBEDDING_IDX_SPECIALIZATION(int64_t, float, float, float, false); -EMBEDDING_IDX_SPECIALIZATION(int32_t, half, at::Half, float, false); -EMBEDDING_IDX_SPECIALIZATION(int64_t, half, at::Half, float, false); -EMBEDDING_IDX_SPECIALIZATION(int32_t, bfloat16, at::BFloat16, float, false); -EMBEDDING_IDX_SPECIALIZATION(int64_t, bfloat16, at::BFloat16, float, false); -EMBEDDING_IDX_SPECIALIZATION(int32_t, uint8_t, uint8_t, float, false); -EMBEDDING_IDX_SPECIALIZATION(int64_t, uint8_t, uint8_t, float, false); +EMBEDDING_IDX_SPECIALIZATION(int32_t, float, float, float, false) +EMBEDDING_IDX_SPECIALIZATION(int64_t, float, float, float, false) +EMBEDDING_IDX_SPECIALIZATION(int32_t, half, at::Half, float, false) +EMBEDDING_IDX_SPECIALIZATION(int64_t, half, at::Half, float, false) +EMBEDDING_IDX_SPECIALIZATION(int32_t, bfloat16, at::BFloat16, float, false) +EMBEDDING_IDX_SPECIALIZATION(int64_t, bfloat16, at::BFloat16, float, false) +EMBEDDING_IDX_SPECIALIZATION(int32_t, uint8_t, uint8_t, float, false) +EMBEDDING_IDX_SPECIALIZATION(int64_t, uint8_t, uint8_t, float, false) -EMBEDDING_IDX_SPECIALIZATION(int32_t, float, float, float, true); -EMBEDDING_IDX_SPECIALIZATION(int64_t, float, float, float, true); -EMBEDDING_IDX_SPECIALIZATION(int32_t, half, at::Half, float, true); -EMBEDDING_IDX_SPECIALIZATION(int64_t, half, at::Half, float, true); -EMBEDDING_IDX_SPECIALIZATION(int32_t, bfloat16, at::BFloat16, float, true); -EMBEDDING_IDX_SPECIALIZATION(int64_t, bfloat16, at::BFloat16, float, true); -EMBEDDING_IDX_SPECIALIZATION(int32_t, uint8_t, uint8_t, float, true); -EMBEDDING_IDX_SPECIALIZATION(int64_t, uint8_t, uint8_t, float, true); +EMBEDDING_IDX_SPECIALIZATION(int32_t, float, float, float, true) +EMBEDDING_IDX_SPECIALIZATION(int64_t, float, float, float, true) +EMBEDDING_IDX_SPECIALIZATION(int32_t, half, at::Half, float, true) +EMBEDDING_IDX_SPECIALIZATION(int64_t, half, at::Half, float, true) +EMBEDDING_IDX_SPECIALIZATION(int32_t, bfloat16, at::BFloat16, float, true) +EMBEDDING_IDX_SPECIALIZATION(int64_t, bfloat16, at::BFloat16, float, true) +EMBEDDING_IDX_SPECIALIZATION(int32_t, uint8_t, uint8_t, float, true) +EMBEDDING_IDX_SPECIALIZATION(int64_t, uint8_t, uint8_t, float, true) #undef EMBEDDING_IDX_SPECIALIZATION diff --git a/caffe2/utils/threadpool/ThreadPool.cc b/caffe2/utils/threadpool/ThreadPool.cc index 6e716a77d7fd65..0e12d5d253ae00 100644 --- a/caffe2/utils/threadpool/ThreadPool.cc +++ b/caffe2/utils/threadpool/ThreadPool.cc @@ -10,16 +10,16 @@ C10_DEFINE_bool( caffe2_threadpool_force_inline, false, - "Force to always run jobs on the calling thread"); + "Force to always run jobs on the calling thread") // Whether or not threadpool caps apply to Android -C10_DEFINE_int(caffe2_threadpool_android_cap, true, ""); +C10_DEFINE_int(caffe2_threadpool_android_cap, true, "") // Whether or not threadpool caps apply to iOS and MacOS -C10_DEFINE_int(caffe2_threadpool_ios_cap, true, ""); -C10_DEFINE_int(caffe2_threadpool_macos_cap, true, ""); +C10_DEFINE_int(caffe2_threadpool_ios_cap, true, "") +C10_DEFINE_int(caffe2_threadpool_macos_cap, true, "") -C10_DEFINE_int(pthreadpool_size, 0, "Override the default thread pool size."); +C10_DEFINE_int(pthreadpool_size, 0, "Override the default thread pool size.") namespace caffe2 { diff --git a/torch/csrc/api/include/torch/nn/modules/dropout.h b/torch/csrc/api/include/torch/nn/modules/dropout.h index c23f0501dc3b53..c63b1e6a7eeae5 100644 --- a/torch/csrc/api/include/torch/nn/modules/dropout.h +++ b/torch/csrc/api/include/torch/nn/modules/dropout.h @@ -14,7 +14,7 @@ namespace detail { template class _DropoutNd : public torch::nn::Cloneable { public: - _DropoutNd(double p) : _DropoutNd(DropoutOptions().p(p)){}; + _DropoutNd(double p) : _DropoutNd(DropoutOptions().p(p)) {} explicit _DropoutNd(const DropoutOptions& options_ = {}) : options(options_) { // NOLINTNEXTLINE(clang-analyzer-optin.cplusplus.VirtualCall) diff --git a/torch/csrc/api/include/torch/nn/modules/pooling.h b/torch/csrc/api/include/torch/nn/modules/pooling.h index c9482adb702bbe..17ed12f4cc037e 100644 --- a/torch/csrc/api/include/torch/nn/modules/pooling.h +++ b/torch/csrc/api/include/torch/nn/modules/pooling.h @@ -227,7 +227,7 @@ class TORCH_API AdaptiveMaxPoolImpl : public torch::nn::Cloneable { const AdaptiveMaxPoolOptions& options_) : options(options_) {} - void reset() override{}; + void reset() override {} /// Pretty prints the `AdaptiveMaxPool{1,2,3}d` module into the given /// `stream`. diff --git a/torch/csrc/api/include/torch/optim/optimizer.h b/torch/csrc/api/include/torch/optim/optimizer.h index e6115cb2f6d783..fd81153db1c67b 100644 --- a/torch/csrc/api/include/torch/optim/optimizer.h +++ b/torch/csrc/api/include/torch/optim/optimizer.h @@ -128,7 +128,7 @@ class TORCH_API Optimizer { std::unique_ptr defaults) : Optimizer( {OptimizerParamGroup(std::move(parameters))}, - std::move(defaults)){}; + std::move(defaults)) {} /// Adds the given param_group to the optimizer's param_group list. void add_param_group(const OptimizerParamGroup& param_group); diff --git a/torch/csrc/autograd/record_function_ops.cpp b/torch/csrc/autograd/record_function_ops.cpp index ef29465e19176b..d005951341ba65 100644 --- a/torch/csrc/autograd/record_function_ops.cpp +++ b/torch/csrc/autograd/record_function_ops.cpp @@ -9,7 +9,7 @@ namespace caffe2 { // Required for cpp_custom_type_hack to work // NOLINTNEXTLINE(bugprone-exception-escape) -CAFFE_KNOWN_TYPE(at::RecordFunction); +CAFFE_KNOWN_TYPE(at::RecordFunction) } // namespace caffe2 namespace torch::autograd::profiler { diff --git a/torch/csrc/distributed/c10d/Backend.hpp b/torch/csrc/distributed/c10d/Backend.hpp index b75e457b8cd01c..06efbcac297124 100644 --- a/torch/csrc/distributed/c10d/Backend.hpp +++ b/torch/csrc/distributed/c10d/Backend.hpp @@ -77,7 +77,7 @@ class TORCH_API Backend : public torch::CustomClassHolder { // Subclasses must override this method to return the backend name virtual const std::string getBackendName() const { TORCH_INTERNAL_ASSERT(false, "getBackendName is not implemented."); - }; + } virtual c10::intrusive_ptr broadcast( std::vector& /* tensors */, diff --git a/torch/csrc/distributed/c10d/GlooDeviceFactory.cpp b/torch/csrc/distributed/c10d/GlooDeviceFactory.cpp index 8ef195ca5ecbd8..8784e351796986 100644 --- a/torch/csrc/distributed/c10d/GlooDeviceFactory.cpp +++ b/torch/csrc/distributed/c10d/GlooDeviceFactory.cpp @@ -39,7 +39,7 @@ C10_DEFINE_SHARED_REGISTRY_WITHOUT_WARNING( GlooDeviceRegistry, ::gloo::transport::Device, const std::string& /* interface */, - const std::string& /* hostname */); + const std::string& /* hostname */) #if GLOO_HAVE_TRANSPORT_TCP static std::shared_ptr<::gloo::transport::Device> makeTCPDevice( @@ -62,8 +62,8 @@ static std::shared_ptr<::gloo::transport::Device> makeTCPDevice( // Registry priority is per key identifier. We register TCP to `LINUX` for // the flexibility of other application to override by priority. Register // TCP to `TCP` for env "GLOO_DEVICE_TRANSPORT" override. -C10_REGISTER_CREATOR(GlooDeviceRegistry, LINUX, makeTCPDevice); -C10_REGISTER_CREATOR(GlooDeviceRegistry, TCP, makeTCPDevice); +C10_REGISTER_CREATOR(GlooDeviceRegistry, LINUX, makeTCPDevice) +C10_REGISTER_CREATOR(GlooDeviceRegistry, TCP, makeTCPDevice) #endif #if GLOO_HAVE_TRANSPORT_TCP_TLS diff --git a/torch/csrc/distributed/c10d/ProcessGroup.hpp b/torch/csrc/distributed/c10d/ProcessGroup.hpp index 1f28f5442d03cc..5cba3a39629d4e 100644 --- a/torch/csrc/distributed/c10d/ProcessGroup.hpp +++ b/torch/csrc/distributed/c10d/ProcessGroup.hpp @@ -97,7 +97,7 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder { default: TORCH_CHECK(false, "THis should never happen!"); } - }; + } static BackendType strToBackendType(const std::string& backend) { if (backend == "undefined") { @@ -113,7 +113,7 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder { } else { return BackendType::CUSTOM; } - }; + } // Not used, set for backwards compatibility and only used for TypeDef in // Ops.cpp @@ -146,11 +146,11 @@ class TORCH_API ProcessGroup : public torch::CustomClassHolder { virtual const std::string getBackendName() const { return backendTypeToString(backendType_); - }; + } BackendType getBackendType() const { return backendType_; - }; + } virtual void startCoalescing(c10::DeviceType deviceType) { // only nccl has implemented startCoalescing so only execute for nccl diff --git a/torch/csrc/distributed/rpc/utils.cpp b/torch/csrc/distributed/rpc/utils.cpp index e3c7228d95aa86..aa3fccbd2fc70e 100644 --- a/torch/csrc/distributed/rpc/utils.cpp +++ b/torch/csrc/distributed/rpc/utils.cpp @@ -316,7 +316,7 @@ parseWireSections(const void* data, size_t data_size) { static const char* kMeta = "meta"; static const char* kPayload = "payload"; -}; // namespace +} // namespace c10::List cloneSparseTensors( const std::vector& tensors) { diff --git a/torch/csrc/jit/frontend/sugared_value.cpp b/torch/csrc/jit/frontend/sugared_value.cpp index 54be15a8702837..5f1a3e798bf93d 100644 --- a/torch/csrc/jit/frontend/sugared_value.cpp +++ b/torch/csrc/jit/frontend/sugared_value.cpp @@ -578,7 +578,7 @@ RangeValue::RangeValue( SugaredValuePtr RangeValue::iter(const SourceRange& loc, GraphFunction& m) { return shared_from_this(); -}; +} Value* RangeValue::len(const SourceRange& loc, GraphFunction& m) { if (static_len_) { diff --git a/torch/csrc/jit/mobile/nnc/registry.cpp b/torch/csrc/jit/mobile/nnc/registry.cpp index 18a15eccd23d58..83f4a9c2348a8d 100644 --- a/torch/csrc/jit/mobile/nnc/registry.cpp +++ b/torch/csrc/jit/mobile/nnc/registry.cpp @@ -2,6 +2,6 @@ namespace torch::jit::mobile::nnc { -C10_DEFINE_REGISTRY(NNCKernelRegistry, NNCKernel); +C10_DEFINE_REGISTRY(NNCKernelRegistry, NNCKernel) } // namespace torch::jit::mobile::nnc diff --git a/torch/csrc/jit/passes/frozen_linear_transpose.cpp b/torch/csrc/jit/passes/frozen_linear_transpose.cpp index 5d819e86fd6c60..9595227d2587dc 100644 --- a/torch/csrc/jit/passes/frozen_linear_transpose.cpp +++ b/torch/csrc/jit/passes/frozen_linear_transpose.cpp @@ -78,7 +78,7 @@ class TransposeFrozenLinear { node->replaceAllUsesWith(bias_result); } node->destroy(); - }; + } void handleBlockAndSubblocks(Block* block) {} diff --git a/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp b/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp index c620d216cf33f4..2e2daaa11a0c3f 100644 --- a/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp +++ b/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp @@ -303,7 +303,7 @@ void MKLDNNLayerNormOp(Stack& stack, bool inplace) { at::native::mkldnn_layer_norm_last_index_weight_bias_f32( input, shape, weight, bias, eps, inplace); push(stack, dst); -}; +} Operation BroadOp(const Node* node) { return [](Stack& stack) { diff --git a/torch/csrc/jit/passes/integer_value_refinement.cpp b/torch/csrc/jit/passes/integer_value_refinement.cpp index 22a3bb42790ece..7405608bb4ca0f 100644 --- a/torch/csrc/jit/passes/integer_value_refinement.cpp +++ b/torch/csrc/jit/passes/integer_value_refinement.cpp @@ -201,7 +201,7 @@ struct IntegerValueRefiner { active_refinements_.pop_back(); return block_refinements; - }; + } std::optional tryFindRefinement(Value* v) { for (const auto& ref : active_refinements_) { diff --git a/torch/csrc/jit/passes/peephole_list_idioms.cpp b/torch/csrc/jit/passes/peephole_list_idioms.cpp index 1c9a7a050d915a..e07496dee2e524 100644 --- a/torch/csrc/jit/passes/peephole_list_idioms.cpp +++ b/torch/csrc/jit/passes/peephole_list_idioms.cpp @@ -126,7 +126,7 @@ struct ListLenRefiner { } active_refinements_.pop_back(); return block_refinements; - }; + } std::optional tryFindRefinement(Value* v) { for (const auto& ref : active_refinements_) { diff --git a/torch/csrc/jit/passes/quantization/helper.cpp b/torch/csrc/jit/passes/quantization/helper.cpp index 4e103b32701d90..1d623c82d32266 100644 --- a/torch/csrc/jit/passes/quantization/helper.cpp +++ b/torch/csrc/jit/passes/quantization/helper.cpp @@ -705,7 +705,7 @@ static bool is_module( return module_name.value() == module_qualified_name; } return false; -}; +} bool aten_add_alpha_is_one( const Match& match, diff --git a/torch/csrc/jit/passes/utils/op_registry.cpp b/torch/csrc/jit/passes/utils/op_registry.cpp index 2538c90b4575c7..46eb552f99a66f 100644 --- a/torch/csrc/jit/passes/utils/op_registry.cpp +++ b/torch/csrc/jit/passes/utils/op_registry.cpp @@ -57,7 +57,7 @@ std::shared_ptr nn_ops_first_input_preserving() { "aten::hardswish_(Tensor self) -> Tensor", }); return ops; -}; +} // Requirements: // dims : Changed from first argument @@ -70,5 +70,5 @@ std::shared_ptr ops_one_tensor_in_shape_transform() { "aten::flatten(Tensor self, int start_dim, int end_dim) -> Tensor", }); return ops; -}; +} } // namespace torch::jit diff --git a/torch/csrc/jit/passes/value_refinement_utils.h b/torch/csrc/jit/passes/value_refinement_utils.h index cd2e3d1b82bcbd..387a0af360f322 100644 --- a/torch/csrc/jit/passes/value_refinement_utils.h +++ b/torch/csrc/jit/passes/value_refinement_utils.h @@ -29,7 +29,7 @@ struct BooleanRefinementMapping { ListRefinement true_refine, ListRefinement false_refine) : true_refine_(std::move(true_refine)), - false_refine_(std::move(false_refine)){}; + false_refine_(std::move(false_refine)) {} BooleanRefinementMapping() = default; // empty static BooleanRefinementMapping FalseRefinements( diff --git a/torch/csrc/jit/runtime/static/native_ops.cpp b/torch/csrc/jit/runtime/static/native_ops.cpp index 73b8cfeb87b004..5bbd52c5a8ea17 100644 --- a/torch/csrc/jit/runtime/static/native_ops.cpp +++ b/torch/csrc/jit/runtime/static/native_ops.cpp @@ -36,7 +36,7 @@ std::vector boxInputs(const ProcessedNode& pnode) { } // namespace -C10_DEFINE_REGISTRY(SRNativeOperatorRegistry, SROperatorFunctor); +C10_DEFINE_REGISTRY(SRNativeOperatorRegistry, SROperatorFunctor) bool nativeOpIsRegistered(const c10::Symbol& op_name) { const std::string name(op_name.toQualString()); diff --git a/torch/csrc/jit/runtime/static/ops.h b/torch/csrc/jit/runtime/static/ops.h index 623340daec068b..eb3dafeb59e2f9 100644 --- a/torch/csrc/jit/runtime/static/ops.h +++ b/torch/csrc/jit/runtime/static/ops.h @@ -38,7 +38,7 @@ TORCH_DECLARE_REGISTRY(SROperatorRegistry, SROperatorFunctor); return fn(n); \ } \ }; \ - C10_REGISTER_CLASS(SROperatorRegistry, name, SROperatorFunctor_##id); + C10_REGISTER_CLASS(SROperatorRegistry, name, SROperatorFunctor_##id) TORCH_DECLARE_REGISTRY(SRNativeOperatorRegistry, SROperatorFunctor); #define REGISTER_NATIVE_OPERATOR_FUNCTOR(name, id, ...) \ @@ -49,7 +49,7 @@ TORCH_DECLARE_REGISTRY(SRNativeOperatorRegistry, SROperatorFunctor); } \ }; \ C10_REGISTER_CLASS( \ - SRNativeOperatorRegistry, name, SRNativeOperatorFunctor_##id); + SRNativeOperatorRegistry, name, SRNativeOperatorFunctor_##id) inline at::Tensor create_empty_from(const at::Tensor& t) { return at::detail::empty_cpu( diff --git a/torch/csrc/jit/serialization/pickle.cpp b/torch/csrc/jit/serialization/pickle.cpp index 4bf6189a5bf59b..6de5c9f4f70185 100644 --- a/torch/csrc/jit/serialization/pickle.cpp +++ b/torch/csrc/jit/serialization/pickle.cpp @@ -142,7 +142,7 @@ IValue pickle_load(const std::vector& data) { "pickle_load not supported on mobile " "(see https://github.com/pytorch/pytorch/pull/30108)"); #endif -}; +} // A specialized version of pickle_load that can load custom objects. c10::IValue pickle_load_obj(std::string_view data) { diff --git a/torch/csrc/jit/tensorexpr/codegen.cpp b/torch/csrc/jit/tensorexpr/codegen.cpp index b5149a8a624abc..41e54869850c8c 100644 --- a/torch/csrc/jit/tensorexpr/codegen.cpp +++ b/torch/csrc/jit/tensorexpr/codegen.cpp @@ -87,7 +87,7 @@ void* CodeGen::argToPtr(const BufferArg& bufferArg, const CallArg& callArg) { case ScalarType::Name: \ return callArg.Name##Ptr(); - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) #undef TYPE_CASE default: diff --git a/torch/csrc/jit/tensorexpr/codegen.h b/torch/csrc/jit/tensorexpr/codegen.h index e1a42cb1d45935..cad930b58bd930 100644 --- a/torch/csrc/jit/tensorexpr/codegen.h +++ b/torch/csrc/jit/tensorexpr/codegen.h @@ -165,7 +165,7 @@ class CodeGen::CallArg { memcpy(buffer_, &v, sizeof(Type)); \ data_ = (void*)buffer_; \ } - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, ARG_TYPE_CTOR); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, ARG_TYPE_CTOR) #undef ARG_TYPE_CTOR void* data() const { @@ -199,7 +199,7 @@ class CodeGen::CallArg { TORCH_INTERNAL_ASSERT(data_ == (void*)buffer_); \ return (Type*)data_; \ } - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, ARG_PTR_DEFINE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, ARG_PTR_DEFINE) #undef ARG_PTR_DEFINE private: diff --git a/torch/csrc/jit/tensorexpr/cpp_codegen.cpp b/torch/csrc/jit/tensorexpr/cpp_codegen.cpp index 453daae9dc72ab..b9cc921c303af2 100644 --- a/torch/csrc/jit/tensorexpr/cpp_codegen.cpp +++ b/torch/csrc/jit/tensorexpr/cpp_codegen.cpp @@ -148,7 +148,7 @@ void dispatch_binary_op(std::ostream& os, const BinaryOpNode* v) { case ScalarType::Name: \ visit_binary_op(os, v->lhs(), v->rhs(), v->expr_type()); \ break; - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) #undef TYPE_CASE default: throw unsupported_dtype(); diff --git a/torch/csrc/jit/tensorexpr/eval.cpp b/torch/csrc/jit/tensorexpr/eval.cpp index a3d2274a1eccf0..12982d98b01883 100644 --- a/torch/csrc/jit/tensorexpr/eval.cpp +++ b/torch/csrc/jit/tensorexpr/eval.cpp @@ -375,7 +375,7 @@ class SimpleIREvaluatorImpl : public IRVisitor { case ScalarType::Name: \ value = compare_select_op(lhs, rhs, retval1, retval2, cmp_op); \ break; - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) #undef TYPE_CASE default: throw unsupported_dtype(); @@ -407,7 +407,7 @@ class SimpleIREvaluatorImpl : public IRVisitor { value_ = compare_select_op_helper( \ lhs_v, rhs_v, ret_val1_v, ret_val2_v, cmp_op); \ break; - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) #undef TYPE_CASE default: throw unsupported_dtype(); @@ -418,7 +418,7 @@ class SimpleIREvaluatorImpl : public IRVisitor { TORCH_API void visit(const Name##ImmPtr& v) override { \ value_ = InterpValue(v->value()); \ } - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_VISIT); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_VISIT) #undef IMM_VISIT TORCH_API void visit(const BlockPtr& v) override { @@ -472,7 +472,7 @@ class SimpleIREvaluatorImpl : public IRVisitor { case ScalarType::Name: \ this->value_ = InterpValue(castValues(src_dtype, v)); \ break; - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, DST_TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, DST_TYPE_CASE) #undef DST_TYPE_CASE #define DST_TYPE_CASE_QUANT(Type, Name, CppType) \ case ScalarType::Name: { \ @@ -507,7 +507,7 @@ class SimpleIREvaluatorImpl : public IRVisitor { case ScalarType::Name: \ doCastFromSrc(src_dtype, dst_dtype, value_); \ break; - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, SRC_TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, SRC_TYPE_CASE) SRC_TYPE_CASE(c10::quint8, QUInt8); SRC_TYPE_CASE(c10::qint8, QInt8); #undef SRC_TYPE_CASE @@ -615,7 +615,7 @@ class SimpleIREvaluatorImpl : public IRVisitor { std::vector v(lanes, value.as()); \ value_ = InterpValue(v); \ } break; - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) #undef TYPE_CASE default: throw unsupported_dtype(); @@ -758,7 +758,7 @@ class SimpleIREvaluatorImpl : public IRVisitor { } \ value_ = InterpValue(val); \ } break; - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) TYPE_CASE(c10::quint8, QUInt8); TYPE_CASE(c10::qint8, QInt8); #undef TYPE_CASE @@ -805,7 +805,7 @@ class SimpleIREvaluatorImpl : public IRVisitor { ptr##Name[index[i]] = value[i]; \ } \ } break; - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) TYPE_CASE(c10::quint8, QUInt8); TYPE_CASE(c10::qint8, QInt8); #undef TYPE_CASE @@ -1268,7 +1268,7 @@ void SimpleIREvaluator::bindArg(const BufferArg& bufArg, void* data) { impl_->bindVar(bufArg.var(), typed_data); \ break; \ } - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) #undef TYPE_CASE default: throw unsupported_dtype(); diff --git a/torch/csrc/jit/tensorexpr/eval.h b/torch/csrc/jit/tensorexpr/eval.h index cc6a79ef5fd48e..8cbc1689e0c9b9 100644 --- a/torch/csrc/jit/tensorexpr/eval.h +++ b/torch/csrc/jit/tensorexpr/eval.h @@ -30,7 +30,7 @@ class InterpValue { Name##values.push_back(v); \ return; \ } - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) #undef TYPE_CASE throw unsupported_dtype(); } @@ -89,9 +89,9 @@ class InterpValue { } \ return Name##values[0]; \ } -AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, VALUE_AS_DISPATCH); -VALUE_AS_DISPATCH(c10::quint8, QUInt8); -VALUE_AS_DISPATCH(c10::qint8, QInt8); +AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, VALUE_AS_DISPATCH) +VALUE_AS_DISPATCH(c10::quint8, QUInt8) +VALUE_AS_DISPATCH(c10::qint8, QInt8) #undef VALUE_AS_DISPATCH #define VALUE_AS_VEC_DISPATCH(Type, Name) \ @@ -102,9 +102,9 @@ VALUE_AS_DISPATCH(c10::qint8, QInt8); } \ return Name##values; \ } -AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, VALUE_AS_VEC_DISPATCH); -VALUE_AS_VEC_DISPATCH(c10::quint8, QUInt8); -VALUE_AS_VEC_DISPATCH(c10::qint8, QInt8); +AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, VALUE_AS_VEC_DISPATCH) +VALUE_AS_VEC_DISPATCH(c10::quint8, QUInt8) +VALUE_AS_VEC_DISPATCH(c10::qint8, QInt8) #undef VALUE_AS_VEC_DISPATCH template diff --git a/torch/csrc/jit/tensorexpr/expr.cpp b/torch/csrc/jit/tensorexpr/expr.cpp index 35c6ac03ce8dda..ece08a2f08b7b7 100644 --- a/torch/csrc/jit/tensorexpr/expr.cpp +++ b/torch/csrc/jit/tensorexpr/expr.cpp @@ -87,7 +87,7 @@ ExprHandle ExprHandle::operator>>(const ExprHandle& other) const { #define IMM_EXPR_DECLARE(Type, Name) \ ExprHandle::ExprHandle(Type v) : ExprHandle(Name##Imm::make(v)) {} -AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_EXPR_DECLARE); +AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_EXPR_DECLARE) #undef IMM_EXPR_DECLARE ExprHandle sin(const ExprHandle& v) { diff --git a/torch/csrc/jit/tensorexpr/expr.h b/torch/csrc/jit/tensorexpr/expr.h index c5c41cd0a045ce..30d3ecdccda9df 100644 --- a/torch/csrc/jit/tensorexpr/expr.h +++ b/torch/csrc/jit/tensorexpr/expr.h @@ -112,7 +112,7 @@ class TORCH_API ExprHandle { } #define IMM_EXPR_DECLARE(Type, Name) ExprHandle(Type v); - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_EXPR_DECLARE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_EXPR_DECLARE) #undef IMM_EXPR_DECLARE template @@ -274,7 +274,7 @@ class TORCH_API Buf : public ExprNode { ExprPtr initializer() const { return initializer_; - }; + } ExprPtr qzero() const { return qzero_; diff --git a/torch/csrc/jit/tensorexpr/external_functions.h b/torch/csrc/jit/tensorexpr/external_functions.h index 9710793583af40..a8d08166fcfb86 100644 --- a/torch/csrc/jit/tensorexpr/external_functions.h +++ b/torch/csrc/jit/tensorexpr/external_functions.h @@ -97,7 +97,7 @@ void DispatchParallel( FOR_ALL_EXTERNAL_FUNCTIONS(DECLARE_EXTERNAL_FUNCTION) #if AT_MKLDNN_ENABLED() -DECLARE_EXTERNAL_FUNCTION(nnc_mkldnn_prepacked_conv_run); +DECLARE_EXTERNAL_FUNCTION(nnc_mkldnn_prepacked_conv_run) #endif TORCH_API void nnc_aten_free(size_t bufs_num, void** ptrs) noexcept; diff --git a/torch/csrc/jit/tensorexpr/fwd_decls.h b/torch/csrc/jit/tensorexpr/fwd_decls.h index d0a4acbc3169ce..0849c8cdb21073 100644 --- a/torch/csrc/jit/tensorexpr/fwd_decls.h +++ b/torch/csrc/jit/tensorexpr/fwd_decls.h @@ -119,7 +119,7 @@ using SyncThreadsPtr = NodePtr; #define IMM_DECLARE(Type, Name) \ class Name##Imm; \ using Name##ImmPtr = NodePtr; -AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_DECLARE); +AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_DECLARE) #undef IMM_DECLARE } // namespace torch::jit::tensorexpr diff --git a/torch/csrc/jit/tensorexpr/hash_provider.h b/torch/csrc/jit/tensorexpr/hash_provider.h index b50b4bfeabfbdb..57a64c569aa95a 100644 --- a/torch/csrc/jit/tensorexpr/hash_provider.h +++ b/torch/csrc/jit/tensorexpr/hash_provider.h @@ -86,7 +86,7 @@ class TORCH_API HashProvider : public IRVisitor { CACHE_GUARD(); \ putHash(v, hash_combine(#Name, v->value())); \ } - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_VISIT); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_VISIT) #undef IMM_VISIT void visit(const CastPtr& v) override; diff --git a/torch/csrc/jit/tensorexpr/ir.cpp b/torch/csrc/jit/tensorexpr/ir.cpp index ae2b2f82a009e5..fbe1b5ca3ade01 100644 --- a/torch/csrc/jit/tensorexpr/ir.cpp +++ b/torch/csrc/jit/tensorexpr/ir.cpp @@ -276,7 +276,7 @@ bool immediateIsPositive(const ExprPtr& e) { if (Name##ImmPtr imm = to(e)) { \ return imm->value() > 0; \ } - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) #undef TYPE_CASE return false; } @@ -286,7 +286,7 @@ bool immediateIsZero(const ExprPtr& e) { if (Name##ImmPtr imm = to(e)) { \ return imm->value() == 0; \ } - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) #undef TYPE_CASE return false; } diff --git a/torch/csrc/jit/tensorexpr/ir.h b/torch/csrc/jit/tensorexpr/ir.h index 31372b41334b23..a8ceabe701e7d7 100644 --- a/torch/csrc/jit/tensorexpr/ir.h +++ b/torch/csrc/jit/tensorexpr/ir.h @@ -322,7 +322,7 @@ class Min : public BinaryOpNode { private: \ Type value_; \ }; -AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_DECLARE); +AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_DECLARE) #undef IMM_DECLARE // Get immediate by ScalarType. @@ -332,7 +332,7 @@ ExprPtr getImmediateByType(ScalarType immType, T initialVal) { #define TYPE_CASE(Type, Name) \ case ScalarType::Name: \ return alloc(Type(initialVal)); - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) #undef TYPE_CASE default: throw unsupported_dtype(); @@ -375,7 +375,7 @@ T immediateAs(const ExprPtr& e) { if (Name##ImmPtr imm = to(e)) { \ return imm->value(); \ } - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) #undef TYPE_CASE throw unsupported_dtype(); return 0; @@ -392,7 +392,7 @@ bool immediateEquals(const ExprPtr& e, T val) { if (Name##ImmPtr imm = to(e)) { \ return imm->value() == val; \ } - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) #undef TYPE_CASE throw unsupported_dtype(); return false; diff --git a/torch/csrc/jit/tensorexpr/ir_cloner.cpp b/torch/csrc/jit/tensorexpr/ir_cloner.cpp index 6d83dcf4a320b8..78421bb0f0a415 100644 --- a/torch/csrc/jit/tensorexpr/ir_cloner.cpp +++ b/torch/csrc/jit/tensorexpr/ir_cloner.cpp @@ -116,7 +116,7 @@ ExprPtr IRCloner::mutate(const CompareSelectPtr& v) { ExprPtr IRCloner::mutate(const Name##ImmPtr& v) { \ return v; \ } -AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_MUTATE_DEFINE); +AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_MUTATE_DEFINE) #undef IMM_MUTATE_DEFINE ExprPtr IRCloner::mutate(const CastPtr& v) { diff --git a/torch/csrc/jit/tensorexpr/ir_cloner.h b/torch/csrc/jit/tensorexpr/ir_cloner.h index dd626eeb4c9d9b..11a407dc715ce4 100644 --- a/torch/csrc/jit/tensorexpr/ir_cloner.h +++ b/torch/csrc/jit/tensorexpr/ir_cloner.h @@ -25,7 +25,7 @@ class TORCH_API IRCloner : public IRMutator { ExprPtr mutate(const CompareSelectPtr& v) override; #define IMM_MUTATE_DECLARE(Type, Name) \ ExprPtr mutate(const Name##ImmPtr& v) override; - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_MUTATE_DECLARE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_MUTATE_DECLARE) #undef IMM_MUTATE_DECLARE ExprPtr mutate(const CastPtr& v) override; ExprPtr mutate(const BitCastPtr& v) override; diff --git a/torch/csrc/jit/tensorexpr/ir_mutator.cpp b/torch/csrc/jit/tensorexpr/ir_mutator.cpp index 38a2f4c7c00275..52b7d5367dcdff 100644 --- a/torch/csrc/jit/tensorexpr/ir_mutator.cpp +++ b/torch/csrc/jit/tensorexpr/ir_mutator.cpp @@ -113,7 +113,7 @@ ExprPtr IRMutator::mutate(const CompareSelectPtr& v) { ExprPtr IRMutator::mutate(const Name##ImmPtr& v) { \ return v; \ } -AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_MUTATE_DEFINE); +AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_MUTATE_DEFINE) #undef IMM_MUTATE_DEFINE ExprPtr IRMutator::mutate(const CastPtr& v) { diff --git a/torch/csrc/jit/tensorexpr/ir_printer.cpp b/torch/csrc/jit/tensorexpr/ir_printer.cpp index 4fa2a6b6bde094..5e7aa884e9b6c4 100644 --- a/torch/csrc/jit/tensorexpr/ir_printer.cpp +++ b/torch/csrc/jit/tensorexpr/ir_printer.cpp @@ -231,7 +231,7 @@ static void formatImm(std::ostream& os, T v) { void IRPrinter::visit(const Name##ImmPtr& v) { \ formatImm(os(), v->value()); \ } -AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_PRINT_VISIT); +AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_PRINT_VISIT) #undef IMM_PRINT_VISIT void IRPrinter::visit(const CastPtr& v) { diff --git a/torch/csrc/jit/tensorexpr/ir_printer.h b/torch/csrc/jit/tensorexpr/ir_printer.h index 9c4d4d0b9dadfb..1909a40283c714 100644 --- a/torch/csrc/jit/tensorexpr/ir_printer.h +++ b/torch/csrc/jit/tensorexpr/ir_printer.h @@ -32,7 +32,7 @@ class TORCH_API IRPrinter : public IRVisitor { void visit(const RshiftPtr& v) override; void visit(const CompareSelectPtr& v) override; #define IMM_PRINT_VISIT(Type, Name) void visit(const Name##ImmPtr& v) override; - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_PRINT_VISIT); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_PRINT_VISIT) #undef IMM_PRINT_VISIT void visit(const CastPtr& v) override; void visit(const BitCastPtr& v) override; diff --git a/torch/csrc/jit/tensorexpr/ir_simplifier.cpp b/torch/csrc/jit/tensorexpr/ir_simplifier.cpp index cdd2c0e66bf7ab..f04ea5a7043505 100644 --- a/torch/csrc/jit/tensorexpr/ir_simplifier.cpp +++ b/torch/csrc/jit/tensorexpr/ir_simplifier.cpp @@ -1293,7 +1293,7 @@ bool isOperandInMinMaxTerm( return true; } return false; -}; +} // Simplifies the nested min-max pattern like: // * Max(Min(x, y), Min(x, z)) => Min(x, Max(y, z)) diff --git a/torch/csrc/jit/tensorexpr/ir_simplifier.h b/torch/csrc/jit/tensorexpr/ir_simplifier.h index d1e57a1a5a1b92..d9fd2b61c97b18 100644 --- a/torch/csrc/jit/tensorexpr/ir_simplifier.h +++ b/torch/csrc/jit/tensorexpr/ir_simplifier.h @@ -98,7 +98,7 @@ inline ExprPtr evaluateOp(const ExprPtr& v) { Type val = eval.value(); \ return getImmediateByType(v->dtype().scalar_type(), val); \ } - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) #undef TYPE_CASE default: LOG(FATAL) << "Unsupported datatype: " << v->dtype(); diff --git a/torch/csrc/jit/tensorexpr/ir_visitor.cpp b/torch/csrc/jit/tensorexpr/ir_visitor.cpp index 00232fecd88211..d923e30ece3dff 100644 --- a/torch/csrc/jit/tensorexpr/ir_visitor.cpp +++ b/torch/csrc/jit/tensorexpr/ir_visitor.cpp @@ -76,7 +76,7 @@ void IRVisitor::visit(const CompareSelectPtr& v) { #define IMM_VISIT(Type, Name) \ void IRVisitor::visit(const Name##ImmPtr& v) {} -AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_VISIT); +AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_VISIT) #undef IMM_VISIT void IRVisitor::visit(const CastPtr& v) { diff --git a/torch/csrc/jit/tensorexpr/llvm_codegen.cpp b/torch/csrc/jit/tensorexpr/llvm_codegen.cpp index 90a0513719c92f..fd42b6f596c420 100644 --- a/torch/csrc/jit/tensorexpr/llvm_codegen.cpp +++ b/torch/csrc/jit/tensorexpr/llvm_codegen.cpp @@ -329,7 +329,7 @@ class LLVMCodeGenImpl : public IRVisitor { void visit(const CompareSelectPtr& v) override; #define IMM_VISIT_DECLARE(_1, Name) void visit(const Name##ImmPtr& v) override; - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_VISIT_DECLARE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_VISIT_DECLARE) #undef IMM_VISIT_DECLARE void visit(const CastPtr& v) override; diff --git a/torch/csrc/jit/tensorexpr/loopnest.cpp b/torch/csrc/jit/tensorexpr/loopnest.cpp index 5f0b2efe472df6..4e09bf51ba96d9 100644 --- a/torch/csrc/jit/tensorexpr/loopnest.cpp +++ b/torch/csrc/jit/tensorexpr/loopnest.cpp @@ -1753,7 +1753,7 @@ std::vector LoopNest::distributeLoopAndParentsOverInnerLoops( static bool areEqual(const ExprPtr& expr1, const ExprPtr& expr2) { auto diff = IRSimplifier::simplify(alloc(expr1, expr2)); return diff->isConstant() && (immediateAs(diff) == 0); -}; +} static bool doesExprContainAnyVar( const ExprPtr& expr, diff --git a/torch/csrc/jit/tensorexpr/operators/misc.cpp b/torch/csrc/jit/tensorexpr/operators/misc.cpp index fab35357c83b1a..f6339237235350 100644 --- a/torch/csrc/jit/tensorexpr/operators/misc.cpp +++ b/torch/csrc/jit/tensorexpr/operators/misc.cpp @@ -32,7 +32,7 @@ ExprHandle promoteToDtype(ExprHandle e, ScalarType dt) { case ScalarType::Name: \ e = cast(e); \ break; - AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE); + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) #undef TYPE_CASE case ScalarType::QUInt8: e = cast(e); diff --git a/torch/csrc/jit/tensorexpr/registerizer.cpp b/torch/csrc/jit/tensorexpr/registerizer.cpp index 12856d59883e42..736f00a126d0b8 100644 --- a/torch/csrc/jit/tensorexpr/registerizer.cpp +++ b/torch/csrc/jit/tensorexpr/registerizer.cpp @@ -272,7 +272,7 @@ void RegisterizerAnalysis::visit(const ForPtr& v) { // having hoisted, now we can merge normally. mergeCurrentScopeIntoParent(); -}; +} void RegisterizerAnalysis::visit(const CondPtr& v) { ExprPtr condition = v->condition(); diff --git a/torch/csrc/jit/tensorexpr/registerizer.h b/torch/csrc/jit/tensorexpr/registerizer.h index 15d4bce415cb4e..752537bb089953 100644 --- a/torch/csrc/jit/tensorexpr/registerizer.h +++ b/torch/csrc/jit/tensorexpr/registerizer.h @@ -342,9 +342,9 @@ class TORCH_API RegisterizerAnalysis : public IRVisitor { stmtStack_.pop_front(); \ } - STMT_ON_STACK(AtomicAdd); - STMT_ON_STACK(Allocate); - STMT_ON_STACK(Free); + STMT_ON_STACK(AtomicAdd) + STMT_ON_STACK(Allocate) + STMT_ON_STACK(Free) #undef STMT_ON_STACK diff --git a/torch/csrc/jit/tensorexpr/types.cpp b/torch/csrc/jit/tensorexpr/types.cpp index 538b53be25f12a..a335a762031c08 100644 --- a/torch/csrc/jit/tensorexpr/types.cpp +++ b/torch/csrc/jit/tensorexpr/types.cpp @@ -22,8 +22,8 @@ AT_FORALL_SCALAR_TYPES_AND7( Float8_e4m3fn, Float8_e4m3fnuz, DTYPE_DEFINE) -DTYPE_DEFINE(c10::quint8, QUInt8); -DTYPE_DEFINE(c10::qint8, QInt8); +DTYPE_DEFINE(c10::quint8, QUInt8) +DTYPE_DEFINE(c10::qint8, QInt8) #undef DTYPE_DEFINE diff --git a/torch/csrc/jit/tensorexpr/types.h b/torch/csrc/jit/tensorexpr/types.h index 1b1c6066a5c116..cd23fdce4ae98e 100644 --- a/torch/csrc/jit/tensorexpr/types.h +++ b/torch/csrc/jit/tensorexpr/types.h @@ -86,8 +86,8 @@ extern TORCH_API Dtype kHandle; #define NNC_DTYPE_DECLARATION(ctype, name) extern TORCH_API Dtype k##name; AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, NNC_DTYPE_DECLARATION) -NNC_DTYPE_DECLARATION(c10::quint8, QUInt8); -NNC_DTYPE_DECLARATION(c10::qint8, QInt8); +NNC_DTYPE_DECLARATION(c10::quint8, QUInt8) +NNC_DTYPE_DECLARATION(c10::qint8, QInt8) #undef NNC_DTYPE_DECLARATION template @@ -99,8 +99,8 @@ TORCH_API Dtype ToDtype(); return k##name; \ } AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, NNC_TODTYPE_DECLARATION) -NNC_TODTYPE_DECLARATION(c10::quint8, QUInt8); -NNC_TODTYPE_DECLARATION(c10::qint8, QInt8); +NNC_TODTYPE_DECLARATION(c10::quint8, QUInt8) +NNC_TODTYPE_DECLARATION(c10::qint8, QInt8) #undef NNC_TODTYPE_DECLARATION TORCH_API Dtype ToDtype(ScalarType type); diff --git a/torch/csrc/lazy/backend/lowering_context.h b/torch/csrc/lazy/backend/lowering_context.h index 06d257bedcfa5c..3a40c7c3dd0801 100644 --- a/torch/csrc/lazy/backend/lowering_context.h +++ b/torch/csrc/lazy/backend/lowering_context.h @@ -56,7 +56,7 @@ class TORCH_API LoweringContext { const BackendDevice& device() const { return device_; - }; + } // Retrieves the vector holding all the tensors associated with the parameter // instructions which have been created. diff --git a/torch/csrc/lazy/core/tensor.h b/torch/csrc/lazy/core/tensor.h index 51697268dcf44a..dfa317d3b81f90 100644 --- a/torch/csrc/lazy/core/tensor.h +++ b/torch/csrc/lazy/core/tensor.h @@ -11,7 +11,7 @@ namespace torch::lazy { class TORCH_API SymNodeImpl : public c10::SymNodeImpl { public: - SymNodeImpl(NodePtr ptr) : node_(std::move(ptr)){}; + SymNodeImpl(NodePtr ptr) : node_(std::move(ptr)) {} NodePtr node_; }; diff --git a/torch/csrc/profiler/perf.cpp b/torch/csrc/profiler/perf.cpp index 90a30cb3729bab..7302f7e28d8e54 100644 --- a/torch/csrc/profiler/perf.cpp +++ b/torch/csrc/profiler/perf.cpp @@ -119,13 +119,13 @@ uint64_t PerfEvent::ReadCounter() const { * value */ -PerfEvent::~PerfEvent(){}; +PerfEvent::~PerfEvent() {} -void PerfEvent::Init(){}; +void PerfEvent::Init() {} uint64_t PerfEvent::ReadCounter() const { return 0; -}; +} #endif /* __ANDROID__ || __linux__ */ diff --git a/torchgen/static_runtime/generator.py b/torchgen/static_runtime/generator.py index 7bbb7f64d86442..02fcbcf0376d92 100644 --- a/torchgen/static_runtime/generator.py +++ b/torchgen/static_runtime/generator.py @@ -623,7 +623,7 @@ def out_variant( {body} LogAndDumpSchema(n); return nullptr; - }}); + }}) """ return generated From 6b8e3022f29af389b75ca33e6b03fc730aa21fc2 Mon Sep 17 00:00:00 2001 From: Richard Barnes Date: Mon, 4 Nov 2024 15:35:23 +0000 Subject: [PATCH 006/503] Remove c10::optional usages in PyTorch (#139525) Test Plan: Sandcastle Reviewed By: swolchok Pull Request resolved: https://github.com/pytorch/pytorch/pull/139525 Approved by: https://github.com/malfet, https://github.com/Skylion007 --- aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp | 8 ++++---- torch/csrc/api/include/torch/data/dataloader/base.h | 2 +- torch/csrc/api/include/torch/data/dataloader/stateless.h | 2 +- torch/csrc/api/include/torch/data/datasets/chunk.h | 2 +- torch/csrc/api/include/torch/data/datasets/map.h | 2 +- torch/csrc/api/include/torch/data/detail/data_shuttle.h | 2 +- torch/csrc/api/include/torch/data/detail/sequencers.h | 2 +- torch/csrc/api/src/data/samplers/distributed.cpp | 4 ++-- torch/csrc/api/src/data/samplers/random.cpp | 4 ++-- torch/csrc/api/src/data/samplers/sequential.cpp | 2 +- torch/csrc/api/src/data/samplers/stream.cpp | 2 +- torch/csrc/functorch/init.cpp | 2 +- torch/csrc/jit/mobile/train/random.cpp | 2 +- torch/csrc/jit/mobile/train/sequential.cpp | 4 ++-- 14 files changed, 20 insertions(+), 20 deletions(-) diff --git a/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp b/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp index 46691707382140..b3120aa1e7ddf7 100644 --- a/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp +++ b/aten/src/ATen/functorch/BatchRulesLinearAlgebra.cpp @@ -536,10 +536,10 @@ _scaled_dot_product_flash_attention_batch_rule( } fourOutputs _scaled_dot_product_efficient_attention_batch_rule( - const Tensor& query, optional query_bdim, - const Tensor& key, optional key_bdim, - const Tensor& value, optional value_bdim, - const std::optional& attn_bias, optional attn_bias_bdim, + const Tensor& query, std::optional query_bdim, + const Tensor& key, std::optional key_bdim, + const Tensor& value, std::optional value_bdim, + const std::optional& attn_bias, std::optional attn_bias_bdim, bool compute_log_sumexp, double dropout_p, bool is_causal, diff --git a/torch/csrc/api/include/torch/data/dataloader/base.h b/torch/csrc/api/include/torch/data/dataloader/base.h index aff15f34fec0cc..17c97793b94f5c 100644 --- a/torch/csrc/api/include/torch/data/dataloader/base.h +++ b/torch/csrc/api/include/torch/data/dataloader/base.h @@ -173,7 +173,7 @@ class DataLoaderBase { } else if (auto batch_request = get_batch_request()) { return this->main_thread_dataset_->get_batch(std::move(*batch_request)); } - return nullopt; + return std::nullopt; } /// The function that worker threads run. diff --git a/torch/csrc/api/include/torch/data/dataloader/stateless.h b/torch/csrc/api/include/torch/data/dataloader/stateless.h index d8f94d471ce033..cdd4c2cc069c82 100644 --- a/torch/csrc/api/include/torch/data/dataloader/stateless.h +++ b/torch/csrc/api/include/torch/data/dataloader/stateless.h @@ -68,7 +68,7 @@ class StatelessDataLoader : public DataLoaderBase< if (!indices || (indices->size() < this->options_.batch_size && this->options_.drop_last)) { - return nullopt; + return std::nullopt; } AT_ASSERT(indices->size() > 0); return indices; diff --git a/torch/csrc/api/include/torch/data/datasets/chunk.h b/torch/csrc/api/include/torch/data/datasets/chunk.h index 755d62c0306bcd..a32a7b21b569e8 100644 --- a/torch/csrc/api/include/torch/data/datasets/chunk.h +++ b/torch/csrc/api/include/torch/data/datasets/chunk.h @@ -74,7 +74,7 @@ class BatchDataBuffer { if (batch_queue_.empty()) { AT_ASSERT(stop_); // All batches have been retrieved. Return an empty batch. - return nullopt; + return std::nullopt; } UnwrappedBatchData batch = std::move(batch_queue_.front()); diff --git a/torch/csrc/api/include/torch/data/datasets/map.h b/torch/csrc/api/include/torch/data/datasets/map.h index f0a09fb4532619..6c4afd95501e92 100644 --- a/torch/csrc/api/include/torch/data/datasets/map.h +++ b/torch/csrc/api/include/torch/data/datasets/map.h @@ -85,7 +85,7 @@ class MapDataset : public BatchDataset< if (auto batch = dataset_.get_batch(std::move(indices))) { return transform_.apply_batch(std::move(*batch)); } - return nullopt; + return std::nullopt; } /// The underlying dataset being transformed. diff --git a/torch/csrc/api/include/torch/data/detail/data_shuttle.h b/torch/csrc/api/include/torch/data/detail/data_shuttle.h index b1e7ac87686882..6538c2b449c8ef 100644 --- a/torch/csrc/api/include/torch/data/detail/data_shuttle.h +++ b/torch/csrc/api/include/torch/data/detail/data_shuttle.h @@ -49,7 +49,7 @@ class DataShuttle { --in_flight_jobs_; return result; } - return nullopt; + return std::nullopt; } /// Discards any jobs that are not yet in flight, and waits for all in-flight diff --git a/torch/csrc/api/include/torch/data/detail/sequencers.h b/torch/csrc/api/include/torch/data/detail/sequencers.h index 779e21f3a4b687..69004d55fefe5f 100644 --- a/torch/csrc/api/include/torch/data/detail/sequencers.h +++ b/torch/csrc/api/include/torch/data/detail/sequencers.h @@ -90,7 +90,7 @@ struct OrderedSequencer : public Sequencer { buffer(result->sequence_number) = std::move(result); } // The result was an empty optional, so we are done with this epoch. - return nullopt; + return std::nullopt; } /// Accesses the buffer at the `index` modulo the buffer size. diff --git a/torch/csrc/api/src/data/samplers/distributed.cpp b/torch/csrc/api/src/data/samplers/distributed.cpp index 9f240570f75edf..8b59d691d6c8ef 100644 --- a/torch/csrc/api/src/data/samplers/distributed.cpp +++ b/torch/csrc/api/src/data/samplers/distributed.cpp @@ -26,7 +26,7 @@ DistributedRandomSampler::DistributedRandomSampler( std::optional> DistributedRandomSampler::next( size_t batch_size) { if (sample_index_ == end_index_) { - return nullopt; + return std::nullopt; } size_t end = sample_index_ + batch_size; @@ -109,7 +109,7 @@ DistributedSequentialSampler::DistributedSequentialSampler( std::optional> DistributedSequentialSampler::next( size_t batch_size) { if (sample_index_ == end_index_) { - return nullopt; + return std::nullopt; } size_t end = sample_index_ + batch_size; diff --git a/torch/csrc/api/src/data/samplers/random.cpp b/torch/csrc/api/src/data/samplers/random.cpp index dba9af5c49ec41..4c56acce6a07b1 100644 --- a/torch/csrc/api/src/data/samplers/random.cpp +++ b/torch/csrc/api/src/data/samplers/random.cpp @@ -20,11 +20,11 @@ void RandomSampler::reset(std::optional new_size) { index_ = 0; } -optional> RandomSampler::next(size_t batch_size) { +std::optional> RandomSampler::next(size_t batch_size) { AT_ASSERT(index_ <= indices_.numel()); const size_t remaining_indices = indices_.numel() - index_; if (remaining_indices == 0) { - return nullopt; + return std::nullopt; } std::vector index_batch(std::min(batch_size, remaining_indices)); auto slice = indices_.slice(/*dim=*/0, index_, index_ + index_batch.size()); diff --git a/torch/csrc/api/src/data/samplers/sequential.cpp b/torch/csrc/api/src/data/samplers/sequential.cpp index cd906e9c866bc1..1c5ed4baa2d758 100644 --- a/torch/csrc/api/src/data/samplers/sequential.cpp +++ b/torch/csrc/api/src/data/samplers/sequential.cpp @@ -19,7 +19,7 @@ void SequentialSampler::reset(std::optional new_size) { std::optional> SequentialSampler::next(size_t batch_size) { const auto remaining_indices = size_ - index_; if (remaining_indices == 0) { - return nullopt; + return std::nullopt; } std::vector index_batch(std::min(batch_size, remaining_indices)); for (auto& i : index_batch) { diff --git a/torch/csrc/api/src/data/samplers/stream.cpp b/torch/csrc/api/src/data/samplers/stream.cpp index 2281e8b9329d88..3a5a9e5142f9db 100644 --- a/torch/csrc/api/src/data/samplers/stream.cpp +++ b/torch/csrc/api/src/data/samplers/stream.cpp @@ -28,7 +28,7 @@ void StreamSampler::reset(std::optional new_size) { std::optional StreamSampler::next(size_t batch_size) { AT_ASSERT(examples_retrieved_so_far_ <= epoch_size_); if (examples_retrieved_so_far_ == epoch_size_) { - return nullopt; + return std::nullopt; } if (examples_retrieved_so_far_ + batch_size > epoch_size_) { batch_size = epoch_size_ - examples_retrieved_so_far_; diff --git a/torch/csrc/functorch/init.cpp b/torch/csrc/functorch/init.cpp index 5b78b5c27a4989..482fa5b5d79dc3 100644 --- a/torch/csrc/functorch/init.cpp +++ b/torch/csrc/functorch/init.cpp @@ -379,7 +379,7 @@ static std::optional maybe_current_level() { int64_t current_level = maybe_layer->layerId(); return current_level; } - return nullopt; + return std::nullopt; } static void tls_set_vmap_excluded(bool excluded) { diff --git a/torch/csrc/jit/mobile/train/random.cpp b/torch/csrc/jit/mobile/train/random.cpp index e608ade5f18a84..4d5edd2a14a642 100644 --- a/torch/csrc/jit/mobile/train/random.cpp +++ b/torch/csrc/jit/mobile/train/random.cpp @@ -24,7 +24,7 @@ std::optional> RandomSampler::next(size_t batch_size) { AT_ASSERT(index_ <= indices_.numel()); const size_t remaining_indices = indices_.numel() - index_; if (remaining_indices == 0) { - return nullopt; + return std::nullopt; } std::vector index_batch(std::min(batch_size, remaining_indices)); auto slice = indices_.slice(/*dim=*/0, index_, index_ + index_batch.size()); diff --git a/torch/csrc/jit/mobile/train/sequential.cpp b/torch/csrc/jit/mobile/train/sequential.cpp index 293424c4601139..3b76db5e8d0cb5 100644 --- a/torch/csrc/jit/mobile/train/sequential.cpp +++ b/torch/csrc/jit/mobile/train/sequential.cpp @@ -15,10 +15,10 @@ void SequentialSampler::reset(std::optional new_size) { index_ = 0; } -optional> SequentialSampler::next(size_t batch_size) { +std::optional> SequentialSampler::next(size_t batch_size) { const auto remaining_indices = size_ - index_; if (remaining_indices == 0) { - return nullopt; + return std::nullopt; } std::vector index_batch(std::min(batch_size, remaining_indices)); for (auto& i : index_batch) { From 87404b6ca6478cbc8dac65fbc486bde9997ab540 Mon Sep 17 00:00:00 2001 From: Bob Ren Date: Sun, 3 Nov 2024 20:08:32 -0800 Subject: [PATCH 007/503] support symfloats in translation validation (#139457) fixes `python test/dynamo/test_dynamic_shapes.py DynamicShapesHigherOrderOpTests.test_cond_pytree_operands_with_non_tensor_leaves_dynamic_shapes` when `specialize_float=False` Pull Request resolved: https://github.com/pytorch/pytorch/pull/139457 Approved by: https://github.com/ezyang ghstack dependencies: #139569 --- torch/fx/experimental/validator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/torch/fx/experimental/validator.py b/torch/fx/experimental/validator.py index ba1d66bc289a16..1dd6e27b9b3d8b 100644 --- a/torch/fx/experimental/validator.py +++ b/torch/fx/experimental/validator.py @@ -726,6 +726,8 @@ def new_with_shape_env(shape_env: ShapeEnv, fake) -> Any: return fake if isinstance(fake, torch.SymInt): return torch.SymInt(fake.node.with_shape_env(shape_env)) + if isinstance(fake, torch.SymFloat): + return torch.SymFloat(fake.node.with_shape_env(shape_env)) assert isinstance(fake, FakeTensorMeta) return FakeTensorMeta( tuple(new_with_shape_env(shape_env, s) for s in fake.size()), From 3ca794783f3fd7b3fec97135282ffa5426ddc73f Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Mon, 4 Nov 2024 16:34:34 +0000 Subject: [PATCH 008/503] Revert "[SymmetricMemory] introduce a binding for cuMemset32Async (#138755)" This reverts commit 924e726c3a2566125f55cdbff4dff054d3db3232. Reverted https://github.com/pytorch/pytorch/pull/138755 on behalf of https://github.com/ZainRizvi due to Sorry but this breaks internally. Can you please fix this PR so it works internally and re-merge it? See D65401876 for more details ([comment](https://github.com/pytorch/pytorch/pull/138755#issuecomment-2455173596)) --- .lintrunner.toml | 2 + c10/cuda/driver_api.h | 1 - caffe2/CMakeLists.txt | 1 - test/distributed/test_symmetric_memory.py | 29 --------- .../c10d/CUDASymmetricMemoryOps.cu | 65 ++----------------- torch/csrc/distributed/c10d/init.cpp | 18 +---- 6 files changed, 7 insertions(+), 109 deletions(-) diff --git a/.lintrunner.toml b/.lintrunner.toml index 0cd41c06037a01..ea5fc7a4f468ac 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -70,6 +70,8 @@ include_patterns = [ 'aten/src/ATen/native/cudnn/*.cpp', 'c10/**/*.h', 'c10/**/*.cpp', + 'distributed/c10d/*DMAConnectivity.*', + 'distributed/c10d/*SymmetricMemory.*', 'torch/csrc/**/*.h', 'torch/csrc/**/*.hpp', 'torch/csrc/**/*.cpp', diff --git a/c10/cuda/driver_api.h b/c10/cuda/driver_api.h index 65cbdfe878dc0a..d698beada411f8 100644 --- a/c10/cuda/driver_api.h +++ b/c10/cuda/driver_api.h @@ -30,7 +30,6 @@ _(cuMemGetAllocationGranularity) \ _(cuMemExportToShareableHandle) \ _(cuMemImportFromShareableHandle) \ - _(cuMemsetD32Async) \ _(cuStreamWriteValue32) \ _(cuGetErrorString) diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 018d9b23fde0f7..5e7e657ea6a61b 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -562,7 +562,6 @@ if(USE_CUDA) ${TORCH_SRC_DIR}/csrc/distributed/c10d/intra_node_comm.cpp ${TORCH_SRC_DIR}/csrc/distributed/c10d/CudaDMAConnectivity.cpp ${TORCH_SRC_DIR}/csrc/distributed/c10d/CUDASymmetricMemory.cu - ${TORCH_SRC_DIR}/csrc/distributed/c10d/CUDASymmetricMemoryOps.cu ${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupNCCL.cpp PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1" ) diff --git a/test/distributed/test_symmetric_memory.py b/test/distributed/test_symmetric_memory.py index 09520143602a76..e9bfcdcc9bcfc4 100644 --- a/test/distributed/test_symmetric_memory.py +++ b/test/distributed/test_symmetric_memory.py @@ -24,11 +24,9 @@ from torch.testing._internal.common_utils import ( instantiate_parametrized_tests, parametrize, - requires_cuda, run_tests, skip_but_pass_in_sandcastle_if, skipIfRocm, - TestCase, ) @@ -851,32 +849,5 @@ def func_3(x): self.assertNotIn("return (buf0", code_3) -class SymmMemUtilTest(TestCase): - @skipIfRocm - @requires_cuda - def test_memset32(self): - t = _SymmetricMemory.empty_strided_p2p( - (64,), - (1,), - dtype=torch.uint32, - device=torch.device("cuda:0"), - group_name="0", - ).fill_(0) - - _SymmetricMemory.memset32(t, offset=32, val=1, count=16) - self.assertTrue(t[:32].eq(0).all()) - self.assertTrue(t[32:48].eq(1).all()) - self.assertTrue(t[48:].eq(0).all()) - - with self.assertRaises(RuntimeError): - _SymmetricMemory.memset32(t, offset=-1, val=1, count=16) - - with self.assertRaises(RuntimeError): - _SymmetricMemory.memset32(t, offset=32, val=4294967296, count=16) - - with self.assertRaises(RuntimeError): - _SymmetricMemory.memset32(t, offset=32, val=1, count=-1) - - if __name__ == "__main__": run_tests() diff --git a/torch/csrc/distributed/c10d/CUDASymmetricMemoryOps.cu b/torch/csrc/distributed/c10d/CUDASymmetricMemoryOps.cu index ec514b3912ca02..ee619f821161f8 100644 --- a/torch/csrc/distributed/c10d/CUDASymmetricMemoryOps.cu +++ b/torch/csrc/distributed/c10d/CUDASymmetricMemoryOps.cu @@ -1,7 +1,8 @@ +#if defined(CUDART_VERSION) && CUDART_VERSION >= 12030 + #include #include #include -#include #ifndef AT_PER_OPERATOR_HEADERS #include @@ -10,10 +11,6 @@ #include #endif -#if !defined(USE_ROCM) && defined(PYTORCH_C10_DRIVER_API_SUPPORTED) -#include -#endif - #include #include @@ -111,7 +108,6 @@ void init_elementwise_launch_config( } } -#if defined(CUDART_VERSION) && CUDART_VERSION >= 12030 template static __global__ void multimem_all_reduce_kernel( T* input_mc_ptr, @@ -290,7 +286,6 @@ at::Tensor multimem_one_shot_all_reduce( auto out = at::empty_like(input); return multimem_one_shot_all_reduce_out(input, reduce_op, group_name, out); } -#endif // One-shot all-reduce is register-intensive because it stages values loaded // from peers in registers before performing reduction. Setting the thread @@ -496,56 +491,7 @@ at::Tensor two_shot_all_reduce_( return input; } -at::Tensor memset32_( - at::Tensor& input, - int64_t offset, - int64_t val, - int64_t count) { -#if !defined(USE_ROCM) && defined(PYTORCH_C10_DRIVER_API_SUPPORTED) - TORCH_CHECK( - input.dim() == 1 && input.is_contiguous() && - input.scalar_type() == c10::ScalarType::UInt32, - "CUDASymmetricMemoryUtils::memset32: input must be " - "a flat, contiguous uint32 tensor."); - - TORCH_CHECK( - offset > 0 && count > 0, - "CUDASymmetricMemoryUtils::memset32: " - "offset and count must be a positive integers.") - - TORCH_CHECK( - val >= 0 && - static_cast(val) <= std::numeric_limits::max(), - "CUDASymmetricMemoryUtils::memset32: " - "val must be in the range of [0, 4294967295] (uint32_t).") - - auto element_size = c10::elementSize(input.scalar_type()); - TORCH_CHECK( - offset + count <= input.numel(), - "CUDASymmetricMemoryUtils::memset32: offset + count (", - offset + count, - ") exceeded the numel of the input (", - input.numel(), - ")"); - - auto addr = reinterpret_cast(input.data_ptr()) + offset; - - c10::cuda::CUDAGuard guard(input.device()); - auto driver_api = c10::cuda::DriverAPI::get(); - C10_CUDA_DRIVER_CHECK(driver_api->cuMemsetD32Async_( - reinterpret_cast(addr), - val, - count, - at::cuda::getCurrentCUDAStream())); -#else - TORCH_CHECK( - false, "CUDASymmetricMemory requires PYTORCH_C10_DRIVER_API_SUPPORTED"); -#endif - return input; -} - TORCH_LIBRARY_FRAGMENT(symm_mem, m) { -#if defined(CUDART_VERSION) && CUDART_VERSION >= 12030 m.def( "multimem_all_reduce_(Tensor(a!) input, str reduce_op, str group_name) -> Tensor(a!)", torch::dispatch(c10::DispatchKey::CUDA, ::multimem_all_reduce_), @@ -585,11 +531,8 @@ TORCH_LIBRARY_FRAGMENT(symm_mem, m) { "two_shot_all_reduce_(Tensor(a!) input, str reduce_op, str group_name) -> Tensor(a!)", torch::dispatch(c10::DispatchKey::CUDA, ::two_shot_all_reduce_), {at::Tag::pt2_compliant_tag}); -#endif - m.def( - "memset32_(Tensor(a!) input, int offset, int val, int count) -> Tensor(a!)", - torch::dispatch(c10::DispatchKey::CUDA, ::memset32_), - {at::Tag::pt2_compliant_tag}); } } // namespace + +#endif diff --git a/torch/csrc/distributed/c10d/init.cpp b/torch/csrc/distributed/c10d/init.cpp index ea760b068ec2f6..b71aa7886bfdea 100644 --- a/torch/csrc/distributed/c10d/init.cpp +++ b/torch/csrc/distributed/c10d/init.cpp @@ -1122,23 +1122,7 @@ This class does not support ``__members__`` property.)"); "stream_write_value32", &SymmetricMemory::stream_write_value32, py::arg("addr"), - py::arg("val")) - // Util functions that are often used together with symmetric memory but - // not necessarily directly on symmetric memory. - .def_static( - "memset32", - [](at::Tensor& input, int64_t offset, int64_t val, int64_t count) { - // The range of `val` is checked inside the op - auto op = c10::Dispatcher::singleton() - .findSchemaOrThrow("symm_mem::memset32_", "") - .typed(); - return op.call(input, offset, val, count); - }, - py::arg("input"), - py::arg("offset"), - py::arg("val"), - py::arg("count") = 1); + py::arg("val")); auto store = py::class_<::c10d::Store, c10::intrusive_ptr<::c10d::Store>, PythonStore>( From d3fc13a9dd186ceb8d1b56b0968a41686ea645cd Mon Sep 17 00:00:00 2001 From: Natalia Gimelshein Date: Mon, 4 Nov 2024 16:43:33 +0000 Subject: [PATCH 009/503] use more elements per thread for narrow dtypes (#139449) Fix perf issue for narrow type by accessing more elements per thread Pull Request resolved: https://github.com/pytorch/pytorch/pull/139449 Approved by: https://github.com/Chillee, https://github.com/eqy --- aten/src/ATen/native/cuda/CUDALoops.cuh | 55 ++++++++++++++++++---- aten/src/ATen/native/cuda/Loops.cuh | 7 +-- aten/src/ATen/native/cuda/MemoryAccess.cuh | 29 ++++++------ aten/src/ATen/test/cuda_vectorized_test.cu | 2 +- test/test_reductions.py | 1 - 5 files changed, 67 insertions(+), 27 deletions(-) diff --git a/aten/src/ATen/native/cuda/CUDALoops.cuh b/aten/src/ATen/native/cuda/CUDALoops.cuh index c1cfe8791a796f..024416f25a21a1 100644 --- a/aten/src/ATen/native/cuda/CUDALoops.cuh +++ b/aten/src/ATen/native/cuda/CUDALoops.cuh @@ -52,13 +52,49 @@ namespace at::native { + +template +constexpr auto sum_of_sizes(args_t args, std::index_sequence) { + if constexpr (sizeof...(Is) == 0) { + return 0; + } else { + return (sizeof(std::tuple_element_t) + ...); + } +} + +template +constexpr auto elems_per_thread(){ + if constexpr (io_sizes == 1) { + return 16; + } else if constexpr (io_sizes < 4) { + return 8; + } else { + return 4; + } +} + +template +constexpr auto io_block_work_size() { + return num_threads() * elems_per_thread(); +} + +template +constexpr auto calc_io_size(){ + using traits = function_traits; + using args_t = typename traits::ArgsTuple; + constexpr auto input_size = at::native::sum_of_sizes(args_t{}, std::make_index_sequence>{}); + constexpr auto output_size = sizeof(typename traits::result_type); + return input_size + output_size; +} + template C10_LAUNCH_BOUNDS_1(num_threads()) __global__ void vectorized_elementwise_kernel(int N, func_t f, array_t data) { using traits = function_traits; - int remaining = N - block_work_size() * blockIdx.x; + constexpr auto io_size = calc_io_size(); + int remaining = N - io_block_work_size() * blockIdx.x; - if (remaining < block_work_size()) { // if this block handles the reminder, + if (remaining < io_block_work_size()) { // if this block handles the reminder, // just do a naive unrolled loop auto input_calc = TrivialOffsetCalculator(); auto output_calc = TrivialOffsetCalculator<1>(); @@ -69,19 +105,21 @@ __global__ void vectorized_elementwise_kernel(int N, func_t f, array_t data) { decltype(input_calc), decltype(output_calc), memory::LoadWithoutCast, - memory::StoreWithoutCast>( + memory::StoreWithoutCast, + elems_per_thread()>( data, remaining, input_calc, output_calc, loader, storer); elementwise_kernel_helper(f, policy); } else { // if this block has a full `block_work_size` data to handle, use // vectorized memory access elementwise_kernel_helper( - f, memory::policies::vectorized(data)); + f, memory::policies::vectorized()>(data)); } } template < typename func_t, typename array_t, + int elems_per_thread, typename inp_calc_t, typename out_calc_t, typename loader_t, @@ -97,7 +135,7 @@ __global__ void unrolled_elementwise_kernel( storer_t s) { int remaining = N - block_work_size() * blockIdx.x; auto policy = memory::policies:: - unroll( + unroll( data, remaining, ic, oc, l, s); elementwise_kernel_helper(f, policy); } @@ -110,7 +148,8 @@ static inline void launch_vectorized_kernel( array_t data) { TORCH_INTERNAL_ASSERT(N > 0 && N <= std::numeric_limits::max()); using traits = function_traits; - int64_t grid = (N + block_work_size() - 1) / block_work_size(); + constexpr auto io_size = calc_io_size(); + int64_t grid = (N + io_block_work_size() - 1) / io_block_work_size(); auto stream = at::cuda::getCurrentCUDAStream(); int vec_size = memory::can_vectorize_up_to(data); @@ -130,7 +169,7 @@ static inline void launch_vectorized_kernel( auto output_calc = TrivialOffsetCalculator<1>(); auto loader = memory::LoadWithoutCast(); auto storer = memory::StoreWithoutCast(); - unrolled_elementwise_kernel + unrolled_elementwise_kernel()> <<>>( N, f, data, input_calc, output_calc, loader, storer); C10_CUDA_KERNEL_LAUNCH_CHECK(); @@ -159,7 +198,7 @@ static inline void launch_unrolled_kernel( TORCH_INTERNAL_ASSERT(N > 0 && N <= std::numeric_limits::max()); int64_t grid = (N + block_work_size() - 1) / block_work_size(); auto stream = at::cuda::getCurrentCUDAStream(); - unrolled_elementwise_kernel + unrolled_elementwise_kernel <<>>(N, f, data, ic, oc, l, s); C10_CUDA_KERNEL_LAUNCH_CHECK(); } diff --git a/aten/src/ATen/native/cuda/Loops.cuh b/aten/src/ATen/native/cuda/Loops.cuh index d157d44ade9de4..1af48c15f298cd 100644 --- a/aten/src/ATen/native/cuda/Loops.cuh +++ b/aten/src/ATen/native/cuda/Loops.cuh @@ -46,18 +46,19 @@ __device__ inline void elementwise_kernel_helper(func_t f, policy_t policy) { using traits = function_traits; using return_t = typename traits::result_type; using args_t = typename traits::ArgsTuple; + constexpr int elems_per_thread = policy_t::tws; int idx = blockIdx.x; - return_t results[thread_work_size()]; - args_t args[thread_work_size()]; + return_t results[elems_per_thread]; + args_t args[elems_per_thread]; // load policy.load(args, idx); // compute #pragma unroll - for (int i = 0; i < thread_work_size(); i++) { + for (int i = 0; i < elems_per_thread; i++) { if (policy.check_inbounds(i)) { results[i] = c10::guts::apply(f, args[i]); } diff --git a/aten/src/ATen/native/cuda/MemoryAccess.cuh b/aten/src/ATen/native/cuda/MemoryAccess.cuh index ea16c44201c625..2d87488937254d 100644 --- a/aten/src/ATen/native/cuda/MemoryAccess.cuh +++ b/aten/src/ATen/native/cuda/MemoryAccess.cuh @@ -57,11 +57,11 @@ struct static_unroll { template struct vectorized_load_helper { template - static __device__ void apply(policy_t &self, args_t *args, int idx) { + static __device__ void apply(policy_t &self, args_t *args, int idx, int block_work_size) { using arg_t = std::tuple_element_t; // `data` hold the data_ptr for tensors [output, input0, input1, ...], so we // need a +1 offset to get the input - auto ptr = reinterpret_cast(self.data[arg_index + 1]) + block_work_size() * idx; + auto ptr = reinterpret_cast(self.data[arg_index + 1]) + block_work_size * idx; auto args_accessor = [&args] __device__ (int thread_unroll_idx) -> arg_t & { return std::get(args[thread_unroll_idx]); }; self.load_single_arg(args_accessor, ptr); } @@ -181,9 +181,7 @@ __device__ aligned_vector load_vector(const bool *base_ptr, uint namespace policies { -// Assumption: -// all tensors are contiguous, that is: stride == sizeof(type) for all tensors -template +template struct unroll { data_t data; @@ -192,6 +190,7 @@ struct unroll { out_calc_t output_offset_calculator; loader_t loader; storer_t storer; + static constexpr int tws = elems_per_thread; __device__ unroll(data_t data, int remaining, inp_calc_t ic, out_calc_t oc, loader_t l, storer_t s): data(data), remaining(remaining), input_offset_calculator(ic), output_offset_calculator(oc), loader(l), storer(s) {} @@ -205,11 +204,11 @@ struct unroll { constexpr int arity = std::tuple_size_v; int thread_idx = threadIdx.x; #pragma unroll - for (int i = 0; i < thread_work_size(); i++) { + for (int i = 0; i < elems_per_thread; i++) { if (thread_idx >= remaining) { return; } - int linear_idx = thread_idx + block_work_size() * idx; + int linear_idx = thread_idx + elems_per_thread * num_threads() * idx; auto offset = input_offset_calculator.get(linear_idx); detail::static_unroll::with_args(*this, args, offset, loader, i, num_outputs); thread_idx += num_threads(); @@ -220,11 +219,11 @@ struct unroll { __device__ inline void store(scalar_t *from, int idx) { int thread_idx = threadIdx.x; #pragma unroll - for (int i = 0; i < thread_work_size(); i++) { + for (int i = 0; i < elems_per_thread; i++) { if (thread_idx >= remaining) { return; } - int linear_idx = thread_idx + block_work_size() * idx; + int linear_idx = thread_idx + elems_per_thread * num_threads() * idx; int offset = output_offset_calculator.get(linear_idx)[0]; storer.store(from[i], data[0], offset); thread_idx += num_threads(); @@ -237,11 +236,12 @@ struct unroll { // Note: // Functions in vectorized policy does not do boundary check. It assumes the whole block // has its job to do. So the reminders should be handled by the caller manually. -template // vec_size: number of scalars, can be 1, 2, or 4. +template // vec_size: number of scalars, can be 1, 2, or 4. struct vectorized { - static_assert(thread_work_size() % vec_size == 0, "The workload per thread must be a multiple of vec_size"); - static constexpr int loop_size = thread_work_size() / vec_size; + static_assert(elems_per_thread % vec_size == 0, "The workload per thread must be a multiple of vec_size"); + static constexpr int loop_size = elems_per_thread / vec_size; + static constexpr int tws = elems_per_thread; data_t data; @@ -268,13 +268,13 @@ struct vectorized { template __device__ inline void load(args_t *args, int idx) { constexpr int arity = std::tuple_size_v; - detail::static_unroll::with_args(*this, args, idx); + detail::static_unroll::with_args(*this, args, idx, elems_per_thread * num_threads()); } template __device__ inline void store(scalar_t *from, int idx) { using vec_t = aligned_vector; - scalar_t *to = reinterpret_cast(data[0]) + block_work_size() * idx; + scalar_t *to = reinterpret_cast(data[0]) + elems_per_thread * num_threads() * idx; vec_t *to_ = reinterpret_cast(to); int thread_idx = threadIdx.x; #pragma unroll @@ -299,6 +299,7 @@ struct multi_outputs_unroll { out_calc_t output_offset_calculator; LoadWithoutCast loader; StoreWithoutCast storer; + static constexpr int tws = thread_work_size(); __device__ multi_outputs_unroll(data_t data, int remaining, inp_calc_t ic, out_calc_t oc): data(data), remaining(remaining), input_offset_calculator(ic), output_offset_calculator(oc) {} diff --git a/aten/src/ATen/test/cuda_vectorized_test.cu b/aten/src/ATen/test/cuda_vectorized_test.cu index 2bd192c07e6e52..9ac119f1e6805b 100644 --- a/aten/src/ATen/test/cuda_vectorized_test.cu +++ b/aten/src/ATen/test/cuda_vectorized_test.cu @@ -82,7 +82,7 @@ __global__ void vectorized_copy(scalar_t *dst, scalar_t *src) { data[0] = reinterpret_cast(dst); data[1] = reinterpret_cast(src); int idx = blockIdx.x; - using vectorized = policies::vectorized; + using vectorized = policies::vectorized; auto policy = vectorized(data); scalar_t buf[thread_work_size()]; #if !defined(USE_ROCM) diff --git a/test/test_reductions.py b/test/test_reductions.py index 8a1f5fd8e8e440..7bb0a79e59c268 100644 --- a/test/test_reductions.py +++ b/test/test_reductions.py @@ -1045,7 +1045,6 @@ def test_mode_boolean(self, device): a[:, (shape[1] - 1) // 2:] = True values, indices = a.mode(-1) self.assertEqual(values, torch.ones(shape[0], dtype=torch.bool)) - print(indices) indexed = a.gather(1, indices.unsqueeze(1)).squeeze(1) self.assertEqual(values, indexed) From 68c515b29278a1a532e79b68e1f89093c8b85a82 Mon Sep 17 00:00:00 2001 From: Bob Ren Date: Mon, 4 Nov 2024 08:55:29 -0800 Subject: [PATCH 010/503] don't run z3 analysis on backed symfloat nodes (#139568) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139568 Approved by: https://github.com/ezyang ghstack dependencies: #139569, #139457 --- torch/fx/experimental/symbolic_shapes.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/torch/fx/experimental/symbolic_shapes.py b/torch/fx/experimental/symbolic_shapes.py index 646ba542c8bc16..eb838e799ffa4e 100644 --- a/torch/fx/experimental/symbolic_shapes.py +++ b/torch/fx/experimental/symbolic_shapes.py @@ -6150,6 +6150,8 @@ def compute_concrete_val() -> sympy.Basic: # 1. 'translation_validation' is set # 2. the corresponding 'fx_node' is not 'None' # 3. the guard should not be suppressed + # 4. the guard doesn't contain backed symfloat symbols + # since z3 can't handle floats # # If all of the above check, we create an FX node representing the # actual expression to be guarded. @@ -6160,6 +6162,7 @@ def compute_concrete_val() -> sympy.Basic: and fx_node is not None and not self._suppress_guards_tls() and not size_oblivious + and not any(symbol_is_type(s, SymT.FLOAT) for s in orig_expr.free_symbols) ): # TODO: does this even worked with unbacked :think: concrete_val = compute_concrete_val() From e080c89bdc21e19875358ce594f0ddf194cda20a Mon Sep 17 00:00:00 2001 From: Tugsbayasgalan Manlaibaatar Date: Sun, 3 Nov 2024 15:44:49 -0800 Subject: [PATCH 011/503] Make test_torchbind.py training IR compatible (#138658) In this diff, i make test_torchbind.py tests to handle training IR. Today in the training IR, we don't see the effect token and HOP because this happens at the FunctionalTensorMode. Maybe in the future, we should move this logic up to the training IR so that writing passes etc on training Ir is safer. But for the migration purposes, i think it is ok for now. I also fixed two bugs: 1. ep.module() doesn't register all aliased constants in the module. 2. When we retrace, we need to fakify the original Torchbind object. 3. We don't run any DCE on training IR so we need to add some more torch ops to verifier. Differential Revision: [D64853530](https://our.internmc.facebook.com/intern/diff/D64853530) Pull Request resolved: https://github.com/pytorch/pytorch/pull/138658 Approved by: https://github.com/ydwu4, https://github.com/zhxchen17 --- test/export/test_export.py | 15 +++++++++++++-- test/export/test_serialize.py | 1 + test/export/test_torchbind.py | 29 ++++++++++++++++++++--------- torch/_export/verifier.py | 1 + torch/export/_trace.py | 17 +++++++++++++++-- torch/export/_unlift.py | 16 +++++++++------- torch/export/exported_program.py | 19 +++++++++++-------- 7 files changed, 70 insertions(+), 28 deletions(-) diff --git a/test/export/test_export.py b/test/export/test_export.py index 6a52cfccb1c734..891771ef37ac7b 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -1071,7 +1071,17 @@ def forward(self, x, y): self.assertEqual(ep.module()(*inputs), model(*inputs)) x = torch.zeros(64) y = torch.ones(64) - self.assertEqual(ep.module()(x, x), model(x, x)) + # This seems to be a bug with old export because when we pass in x, x + # as input, runtime assertion should fail. This is because we would create + # guard on y.shape[0] > x.shape[0] but somehow in old export, we dce this + # assertion. + if is_training_ir_test(self._testMethodName) and is_non_strict_test( + self._testMethodName + ): + with self.assertRaisesRegex(RuntimeError, "Runtime assertion failed for"): + ep.module()(x, x) + else: + self.assertEqual(ep.module()(x, x), model(x, x)) self.assertEqual(ep.module()(x, y), model(x, y)) def test_draft_export_checks_mutation_with_nan(self): @@ -7469,7 +7479,8 @@ def forward(self, x): def test_symint_tensor_return(self): class Module(torch.nn.Module): def forward(self, x): - return torch.ops.testlib.returns_tensor_symint(x)[0] + a, b = torch.ops.testlib.returns_tensor_symint(x) + return a, b self._test_export_same_as_eager(Module(), (torch.randn(4, 4),)) diff --git a/test/export/test_serialize.py b/test/export/test_serialize.py index aaacbf69b90f1c..af233a35b794be 100644 --- a/test/export/test_serialize.py +++ b/test/export/test_serialize.py @@ -806,6 +806,7 @@ def forward(self, a, b, c) -> torch.Tensor: dynamic_shapes = {"a": {0: dim0_ac}, "b": None, "c": {0: dim0_ac}} self.check_graph(DynamicShapeSimpleModel(), inputs, dynamic_shapes) + @unittest.expectedFailure # T206587081 def test_sym_bool(self): class Module(torch.nn.Module): def forward(self, x, y): diff --git a/test/export/test_torchbind.py b/test/export/test_torchbind.py index 997aeecd37ddaf..d19ec645e3396b 100644 --- a/test/export/test_torchbind.py +++ b/test/export/test_torchbind.py @@ -10,7 +10,6 @@ from torch._higher_order_ops.torchbind import enable_torchbind_tracing from torch._higher_order_ops.wrap import wrap from torch._library.fake_class_registry import FakeScriptObject -from torch.export import export from torch.export._trace import _export from torch.fx.experimental.proxy_tensor import make_fx from torch.testing._internal.common_utils import ( @@ -134,14 +133,16 @@ def _test_export_same_as_eager( ): kwargs = kwargs or {} - def export_wrapper(f, args, kwargs, strcit, pre_dispatch): + def export_wrapper(f, args, kwargs, strict, pre_dispatch): with enable_torchbind_tracing(): if pre_dispatch: + exported_program = torch.export.export_for_training( + f, args, kwargs, strict=strict + ).run_decompositions({}) + else: exported_program = _export( - f, args, kwargs, strict=strict, pre_dispatch=True + f, args, kwargs, strict=strict, pre_dispatch=False ) - else: - exported_program = export(f, args, kwargs, strict=strict) return exported_program exported_program = export_wrapper(f, args, kwargs, strict, pre_dispatch) @@ -314,7 +315,10 @@ def forward(self, token, x, cc): # aot_export_function runs the program twice # in run_functionalized_fw_and_collect_metadata and create_aot_dispatcher_function # We also have a re-tracing test, which doubles the count. - self.assertEqual(self.foo_add_tensor_counter, 4) + if pre_dispatch: + self.assertEqual(self.foo_add_tensor_counter, 6) + else: + self.assertEqual(self.foo_add_tensor_counter, 4) @parametrize("pre_dispatch", [True, False]) def test_input_as_custom_op_argument(self, pre_dispatch): @@ -693,7 +697,9 @@ def forward(self, tq, x): b = torch.randn(2, 2) tq.push(a) tq.push(b) - ep = torch.export.export(mod, (tq, torch.randn(2, 2)), strict=False) + ep = torch.export.export_for_training( + mod, (tq, torch.randn(2, 2)), strict=False + ).run_decompositions({}) self.assertExpectedInline( ep.graph_module.code.strip(), """\ @@ -721,6 +727,7 @@ def forward(self, token, p_linear_weight, p_linear_bias, tq, x): self.assertTrue(tq.pop() is a) self.assertTrue(tq.pop() is b) + @unittest.expectedFailure # T205481814 @skipIfCrossRef # arg names change with torch function mode def test_safe_to_trace_with_real(self): x = torch.randn(3, 3) @@ -745,7 +752,9 @@ def forward(self, L_safe_obj_ : torch.ScriptObject): ) with enable_torchbind_tracing(): - ep = torch.export.export(mod, (safe_obj,), strict=False) + ep = torch.export.export_for_training( + mod, (safe_obj,), strict=False + ).run_decompositions({}) self.assertExpectedInline( ep.graph_module.code.strip(), """\ @@ -1338,7 +1347,9 @@ def forward(self, obj, x): mod = TestMod() torch.compile(mod, backend=backend, fullgraph=True)(test_obj, torch.randn(3, 1)) - ep = torch.export.export(mod, (test_obj, torch.randn(3, 1)), strict=False) + ep = torch.export.export_for_training( + mod, (test_obj, torch.randn(3, 1)), strict=False + ).run_decompositions({}) self.assertExpectedInline( ep.graph_module.code.strip(), """\ diff --git a/torch/_export/verifier.py b/torch/_export/verifier.py index 7919cd51f824e5..84b8a37ad16836 100644 --- a/torch/_export/verifier.py +++ b/torch/_export/verifier.py @@ -190,6 +190,7 @@ def _allowed_op_types() -> Tuple[Type[Any], ...]: torch._C._set_grad_enabled, torch.amp.autocast_mode._enter_autocast, torch.amp.autocast_mode._exit_autocast, + torch.fx.experimental.symbolic_shapes.cast_symbool_to_symint_guardless, ) if not isinstance(op, _allowed_op_types()): diff --git a/torch/export/_trace.py b/torch/export/_trace.py index adb95cdcdc4dc7..e1e4d1bebe2a31 100644 --- a/torch/export/_trace.py +++ b/torch/export/_trace.py @@ -1496,8 +1496,6 @@ def wrapped_fn(*args): hook.remove() # type: ignore[possibly-undefined] - gm.graph.eliminate_dead_code() - # create graph signature input_names = _graph_input_names(gm) output_names = _graph_output_names(gm) @@ -1541,6 +1539,21 @@ def wrapped_fn(*args): kwargs=fake_kwargs, ) + # [NOTE] In training IR, we don't run + # any DCE as a result we preserve constant + # nodes in the graph. make_fx invariant is that + # they don't guarantee every node gets a meta['val'] + # field. Since the actual value is already hardcoded in + # graph, the node.meta here actually doesn't matter. But + # we do this to make spec verifier happy. + for node in gm.graph.nodes: + if ( + node.op == "call_function" + and len(node.users) == 0 + and "val" not in node.meta + ): + node.meta["val"] = None + if isinstance(mod, torch.fx.GraphModule) and hasattr(mod, "meta"): gm.meta.update(mod.meta) diff --git a/torch/export/_unlift.py b/torch/export/_unlift.py index a422950fa4788e..fd1b3d15bd06e2 100644 --- a/torch/export/_unlift.py +++ b/torch/export/_unlift.py @@ -331,12 +331,12 @@ def _create_stateful_graph_module( detached_value = value.detach() original_tensor_to_detached_tensor[value] = detached_value value = detached_value - _assign_attr( - value, - stateful_gm, - const_name, - attr_kind=_AttrKind.CONSTANT, - ) + _assign_attr( + value, + stateful_gm, + const_name, + attr_kind=_AttrKind.CONSTANT, + ) # Fix up non-persistent buffers. torch.fx does not distinguish between # persistent and non-persistent buffers, so we must restore that distinction @@ -354,7 +354,9 @@ def _create_stateful_graph_module( def _unlift_exported_program_lifted_states(ep: ExportedProgram) -> torch.nn.Module: - ep = _remove_effect_tokens(ep) + # TODO T206340015 + if ep.verifiers[0].dialect != "TRAINING": + ep = _remove_effect_tokens(ep) new_gm = torch.fx.GraphModule(ep.graph_module, copy.deepcopy(ep.graph)) _register_attrs_to_new_gm(new_gm, ep.graph_signature, ep.state_dict, ep.constants) forward_arg_names = ( diff --git a/torch/export/exported_program.py b/torch/export/exported_program.py index c9214494ab50dd..a4a642f9e8d4e1 100644 --- a/torch/export/exported_program.py +++ b/torch/export/exported_program.py @@ -24,7 +24,7 @@ ) from torch._higher_order_ops.utils import autograd_not_implemented -from torch._library.fake_class_registry import FakeScriptObject +from torch._library.fake_class_registry import FakeScriptObject, maybe_to_fake_obj from torch._subclasses.fake_tensor import FakeTensorMode from torch.fx._utils import first_call_function_nn_module_stack from torch.fx.graph import _PyTreeCodeGen, _PyTreeInfo @@ -345,18 +345,21 @@ def _is_joint_ir_decomp(ep, joint_loss_index): if not _is_joint_ir_decomp(ep, joint_loss_index): mod = ep.module() - - fake_args = [] - for node in mod.graph.nodes: - if node.op == "placeholder": - fake_args.append(node.meta["val"]) - - fake_args_unwrapped = pytree.tree_unflatten(fake_args, mod._in_spec) # TODO T204030333 fake_mode = _detect_fake_mode_from_gm(ep.graph_module) if fake_mode is None: fake_mode = FakeTensorMode(shape_env=ShapeEnv(), export=True) + fake_args = [] + for node in mod.graph.nodes: + if node.op == "placeholder": + if isinstance(node.meta["val"], CustomObjArgument): + fake_args.append( + maybe_to_fake_obj(fake_mode, node.meta["val"].fake_val.real_obj) # type: ignore[union-attr] + ) + else: + fake_args.append(node.meta["val"]) + fake_args_unwrapped = pytree.tree_unflatten(fake_args, mod._in_spec) # Fix the graph output signature to be tuple if scalar out_spec = mod._out_spec From 6dada2136aa3cd8d3752c1b7f10abf5e97c838d1 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Mon, 4 Nov 2024 17:44:44 +0000 Subject: [PATCH 012/503] Revert "Refactor FxGraphDrawer to use HTML-like labels (#137726)" This reverts commit 1e738420296a84406cd0a1626074ea6447a6603a. Reverted https://github.com/pytorch/pytorch/pull/137726 on behalf of https://github.com/huydhn due to Sorry for reverting your change, but it looks like some internal components are failing after this change and need to be updated ([comment](https://github.com/pytorch/pytorch/pull/137726#issuecomment-2455332612)) --- .ci/docker/build.sh | 1 - .ci/docker/common/install_graphviz.sh | 16 --- .ci/docker/requirements-ci.txt | 5 - .ci/docker/ubuntu/Dockerfile | 7 -- test/fx/test_graph_drawer.py | 43 ------- test/test_fx.py | 1 - torch/_functorch/partitioners.py | 2 + torch/_inductor/config.py | 10 ++ torch/_inductor/debug.py | 5 +- torch/fx/passes/graph_drawer.py | 171 +++++++++++++++----------- 10 files changed, 113 insertions(+), 148 deletions(-) delete mode 100644 .ci/docker/common/install_graphviz.sh delete mode 100644 test/fx/test_graph_drawer.py diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh index ce8dbe8b0d8a1d..0c44c68248253a 100755 --- a/.ci/docker/build.sh +++ b/.ci/docker/build.sh @@ -265,7 +265,6 @@ case "$image" in SWIFTSHADER=yes CONDA_CMAKE=yes TRITON=yes - GRAPHVIZ=yes ;; pytorch-linux-focal-py3.9-gcc9) ANACONDA_PYTHON_VERSION=3.9 diff --git a/.ci/docker/common/install_graphviz.sh b/.ci/docker/common/install_graphviz.sh deleted file mode 100644 index ec44ba8ca978f9..00000000000000 --- a/.ci/docker/common/install_graphviz.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -set -ex - -source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh" - -if [ -n "${UBUNTU_VERSION}" ]; then - apt update - apt-get install -y graphviz -elif [ -n "${CENTOS_VERSION}" ]; then - dnf update - dnf install -y graphviz -else - echo "Unsupported Linux distribution" - exit 1 -fi diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt index 777bab46796e31..9c847d7cdfba97 100644 --- a/.ci/docker/requirements-ci.txt +++ b/.ci/docker/requirements-ci.txt @@ -205,11 +205,6 @@ xdoctest==1.1.0 #Pinned versions: 1.1.0 #test that import: -pydot==3.0.1 -#Description: Needed for testing FxGraphDrawer -#Pinned versions: -#test that import: - pygments==2.15.0 #Description: support doctest highlighting #Pinned versions: 2.12.0 diff --git a/.ci/docker/ubuntu/Dockerfile b/.ci/docker/ubuntu/Dockerfile index e05bfeef59d7d0..8b9eba7e87168d 100644 --- a/.ci/docker/ubuntu/Dockerfile +++ b/.ci/docker/ubuntu/Dockerfile @@ -170,13 +170,6 @@ RUN if [ -n "${ACL}" ]; then bash ./install_acl.sh; fi RUN rm install_acl.sh ENV INSTALLED_ACL ${ACL} -# (optional) install graphviz -ARG GRAPHVIZ -COPY ./common/install_graphviz.sh install_graphviz.sh -RUN if [ -n "${GRAPHVIZ}" ]; then bash ./install_graphviz.sh; fi -RUN rm install_graphviz.sh -ENV INSTALLED_GRAPHVIZ ${GRAPHVIZ} - # Install ccache/sccache (do this last, so we get priority in PATH) ARG SKIP_SCCACHE_INSTALL COPY ./common/install_cache.sh install_cache.sh diff --git a/test/fx/test_graph_drawer.py b/test/fx/test_graph_drawer.py deleted file mode 100644 index 260b75314ead55..00000000000000 --- a/test/fx/test_graph_drawer.py +++ /dev/null @@ -1,43 +0,0 @@ -# Owner(s): ["oncall: fx"] - -import os -import tempfile - -import torch -import torch._dynamo as dynamo -from torch.fx.passes.graph_drawer import FxGraphDrawer -from torch.nn import LayerNorm -from torch.testing._internal.common_utils import run_tests, TestCase - - -class TestGraphDrawer(TestCase): - def test_that_graph_with_subgraph_draws_successfully(self): - # covering regression: https://github.com/pytorch/pytorch/issues/137499 - if os.environ.get("INSTALLED_GRAPHVIZ", "") == "yes": - batch_size = 32 - seq_length = 50 - hidden_size = 768 - layer_norm = LayerNorm(hidden_size) - - torch.set_grad_enabled(False) - - @torch.compile - def fn(inp, weight): - matmul_output = inp @ weight - final_output = layer_norm(matmul_output) - return final_output - - inp = torch.randn(batch_size, seq_length, hidden_size) - weight = torch.randn(hidden_size, hidden_size) - - graph_module = dynamo.export(fn)(inp, weight)[0] - - g = FxGraphDrawer(graph_module, name="fn") - dot = g.get_main_dot_graph() - out_name = tempfile.NamedTemporaryFile(delete=True).name + ".svg" - # This should succeed - dot.write_svg(out_name) - - -if __name__ == "__main__": - run_tests() diff --git a/test/test_fx.py b/test/test_fx.py index b2497a5f517cda..11fdea4cf3b2bb 100644 --- a/test/test_fx.py +++ b/test/test_fx.py @@ -45,7 +45,6 @@ from fx.test_pass_infra import TestPassManager # noqa: F401 from fx.test_common_passes import TestCommonPass # noqa: F401 from fx.test_cse_pass import TestCSEPass # noqa: F401 -from fx.test_graph_drawer import TestGraphDrawer # noqa: F401 from fx.test_matcher_utils import TestMatcher # noqa: F401 from fx.test_source_matcher_utils import TestSourceMatcher # noqa: F401 diff --git a/torch/_functorch/partitioners.py b/torch/_functorch/partitioners.py index 80b021363ea4ed..e36a02853c235c 100644 --- a/torch/_functorch/partitioners.py +++ b/torch/_functorch/partitioners.py @@ -1960,6 +1960,7 @@ def draw_graph( clear_meta: bool = True, prog: Optional[Union[str, List[str]]] = None, parse_stack_trace: bool = False, + dot_graph_shape: Optional[str] = None, ) -> None: if clear_meta: new_graph = copy.deepcopy(traced.graph) @@ -1974,6 +1975,7 @@ def draw_graph( traced, figname, parse_stack_trace=parse_stack_trace, + dot_graph_shape=dot_graph_shape, ) x = g.get_main_dot_graph() write_method = getattr(x, "write_" + ext.lstrip(".")) diff --git a/torch/_inductor/config.py b/torch/_inductor/config.py index e9d322519632b5..15316ef343d8ba 100644 --- a/torch/_inductor/config.py +++ b/torch/_inductor/config.py @@ -1270,6 +1270,16 @@ class trace: # SVG figure showing fx with fusion draw_orig_fx_graph = os.environ.get("INDUCTOR_ORIG_FX_SVG", "0") == "1" + # We draw our fx graphs with the "record" shape attribute by default. + # Sometimes, when the graph is very complex, we may hit dot errors like below: + # "flat edge between adjacent nodes one of which has a record shape - + # replace records with HTML-like labels" + # and thus fail to generate a graph. So, let's give the user an option + # to specify the shape attribute for the dot graph. For example, passing + # INDUCTOR_DOT_GRAPH_SHAPE_SVG = "none" would let us generate HTML-like lables + # to workaround the above failure. + dot_graph_shape = os.environ.get("INDUCTOR_DOT_GRAPH_SHAPE_SVG", None) + # If not None, this is the URL that saves the SVG files of the input/output # graph of each pass that changed the graph # The nodes that are being transformed in each pass will be colored in yellow diff --git a/torch/_inductor/debug.py b/torch/_inductor/debug.py index 6a51c92c73682f..be0a15a981c9d2 100644 --- a/torch/_inductor/debug.py +++ b/torch/_inductor/debug.py @@ -91,7 +91,9 @@ def draw_buffers( gm = GraphModule({}, graph) legalize_graph(gm) gm.graph.lint() - draw_graph(gm, fname, clear_meta=False) + draw_graph( + gm, fname, clear_meta=False, dot_graph_shape=config.trace.dot_graph_shape + ) def create_fx_from_snodes(snodes: List[BaseSchedulerNode]) -> fx.Graph: @@ -535,6 +537,7 @@ def draw_orig_fx_graph( clear_meta=False, prog=GRAPHVIZ_COMMAND_SCALABLE, parse_stack_trace=True, + dot_graph_shape=config.trace.dot_graph_shape, ) def output_code(self, filename: str) -> None: diff --git a/torch/fx/passes/graph_drawer.py b/torch/fx/passes/graph_drawer.py index d019de678ffd4d..9a1710c9721ae5 100644 --- a/torch/fx/passes/graph_drawer.py +++ b/torch/fx/passes/graph_drawer.py @@ -76,11 +76,13 @@ def __init__( ignore_parameters_and_buffers: bool = False, skip_node_names_in_args: bool = True, parse_stack_trace: bool = False, + dot_graph_shape: Optional[str] = None, normalize_args: bool = False, ): self._name = name - # HTML-Like labels - self.dot_graph_shape = "none" + self.dot_graph_shape = ( + dot_graph_shape if dot_graph_shape is not None else "record" + ) self.normalize_args = normalize_args _WEIGHT_TEMPLATE["shape"] = self.dot_graph_shape @@ -218,10 +220,10 @@ def _get_node_label( ) -> str: def _get_str_for_args_kwargs(arg): if isinstance(arg, tuple): - prefix, suffix = "(", ")" + prefix, suffix = r"|args=(\l", r",\n)\l" arg_strs_list = [_format_arg(a, max_list_len=8) for a in arg] elif isinstance(arg, dict): - prefix, suffix = r"{", r"}" + prefix, suffix = r"|kwargs={\l", r",\n}\l" arg_strs_list = [ f"{k}: {_format_arg(v, max_list_len=8)}" for k, v in arg.items() ] @@ -233,33 +235,27 @@ def _get_str_for_args_kwargs(arg): arg_strs_list = [a for a in arg_strs_list if "%" not in a] if len(arg_strs_list) == 0: return "" - arg_strs = prefix + r", ".join(arg_strs_list) + suffix - return ( - arg_strs.replace("&", "&") - .replace("<", "<") - .replace(">", ">") - ) + arg_strs = prefix + r",\n".join(arg_strs_list) + suffix + if len(arg_strs_list) == 1: + arg_strs = arg_strs.replace(r"\l", "").replace(r"\n", "") + return arg_strs.replace("{", r"\{").replace("}", r"\}") - label = f"""< - - - - """ + label = "{" + f"name=%{node.name}|op_code={node.op}\n" if node.op == "call_module": leaf_module = self._get_leaf_node(module, node) - label += f'' - + label += r"\n" + self._typename(leaf_module) + r"\n|" extra = "" if hasattr(leaf_module, "__constants__"): - for c in leaf_module.__constants__: - label += ( - f"" - ) + extra = r"\n".join( + [ + f"{c}: {getattr(leaf_module, c)}" + for c in leaf_module.__constants__ + ] # type: ignore[union-attr] + ) + label += extra + r"\n" else: - label += ( - f"" - ) + label += f"|target={self._typename(node.target)}" + r"\n" if self.normalize_args: try: args, kwargs = normalize_function( # type: ignore[misc] @@ -274,16 +270,11 @@ def _get_str_for_args_kwargs(arg): args, kwargs = node.args, node.kwargs else: args, kwargs = node.args, node.kwargs - if len(args) > 0: - args_str = _get_str_for_args_kwargs(args) - if args_str: - label += f"" + label += _get_str_for_args_kwargs(args) if len(kwargs) > 0: - kwargs_str = _get_str_for_args_kwargs(kwargs) - if kwargs_str: - label += f"" - label += f"" + label += _get_str_for_args_kwargs(kwargs) + label += f"|num_users={len(node.users)}" + r"\n" tensor_meta = node.meta.get("tensor_meta") label += self._tensor_meta_to_label(tensor_meta) @@ -292,63 +283,101 @@ def _get_str_for_args_kwargs(arg): # print buf=buf0, n_origin=6 buf_meta = node.meta.get("buf_meta", None) if buf_meta is not None: - label += f"" - label += f"" + label += f"|buf={buf_meta.name}" + r"\n" + label += f"|n_origin={buf_meta.n_origin}" + r"\n" # for original fx graph # print file:lineno code if parse_stack_trace and node.stack_trace is not None: parsed_stack_trace = _parse_stack_trace(node.stack_trace) fname = self._shorten_file_name(parsed_stack_trace.file) - label += f"" + label += ( + f"|file={fname}:{parsed_stack_trace.lineno} {parsed_stack_trace.code}" + + r"\n" + ) - return label + "
%{node.name}
op_code{node.op}
{self._typename(leaf_module)}
{c}{getattr(leaf_module, c)}
target{self._typename(node.target)}
args{args_str}
kwargs{kwargs_str}
num_users{len(node.users)}
buf{buf_meta.name}
n_origin{buf_meta.n_origin}
file{fname}:{parsed_stack_trace.lineno} {parsed_stack_trace.code}
>" + return label + "}" def _tensor_meta_to_label(self, tm) -> str: if tm is None: return "" elif isinstance(tm, TensorMetadata): - return self._htmlize_tensor_meta(tm) - elif isinstance(tm, (list, tuple)): - return "".join(self._tensor_meta_to_label(item) for item in tm) + return self._stringify_tensor_meta(tm) + elif isinstance(tm, list): + result = "" + for item in tm: + result += self._tensor_meta_to_label(item) + return result elif isinstance(tm, dict): - return "".join(self._tensor_meta_to_label(v) for v in tm.values()) + result = "" + for v in tm.values(): + result += self._tensor_meta_to_label(v) + return result + elif isinstance(tm, tuple): + result = "" + for item in tm: + result += self._tensor_meta_to_label(item) + return result else: raise RuntimeError(f"Unsupported tensor meta type {type(tm)}") - def _htmlize_tensor_meta(self, tm: TensorMetadata) -> str: - result = f""" - dtype{tm.dtype} - shape{tuple(tm.shape)} - requires_grad{tm.requires_grad} - stride{tm.stride} - """ + def _stringify_tensor_meta(self, tm: TensorMetadata) -> str: + result = "" + if not hasattr(tm, "dtype"): + print("tm", tm) + result += "|" + "dtype" + "=" + str(tm.dtype) + r"\n" + result += "|" + "shape" + "=" + str(tuple(tm.shape)) + r"\n" + result += "|" + "requires_grad" + "=" + str(tm.requires_grad) + r"\n" + result += "|" + "stride" + "=" + str(tm.stride) + r"\n" if tm.is_quantized: assert tm.qparams is not None assert "qscheme" in tm.qparams qscheme = tm.qparams["qscheme"] - if qscheme in {torch.per_tensor_affine, torch.per_tensor_symmetric}: - result += f""" - q_scale{tm.qparams["scale"]} - q_zero_point{tm.qparams["zero_point"]} - """ + if qscheme in { + torch.per_tensor_affine, + torch.per_tensor_symmetric, + }: + result += "|" + "q_scale" + "=" + str(tm.qparams["scale"]) + r"\n" + result += ( + "|" + + "q_zero_point" + + "=" + + str(tm.qparams["zero_point"]) + + r"\n" + ) elif qscheme in { torch.per_channel_affine, torch.per_channel_symmetric, torch.per_channel_affine_float_qparams, }: - result += f""" - q_per_channel_scale{tm.qparams["scale"]} - q_per_channel_zero_point{tm.qparams["zero_point"]} - q_per_channel_axis{tm.qparams["axis"]} - """ + result += ( + "|" + + "q_per_channel_scale" + + "=" + + str(tm.qparams["scale"]) + + r"\n" + ) + result += ( + "|" + + "q_per_channel_zero_point" + + "=" + + str(tm.qparams["zero_point"]) + + r"\n" + ) + result += ( + "|" + + "q_per_channel_axis" + + "=" + + str(tm.qparams["axis"]) + + r"\n" + ) else: raise RuntimeError(f"Unsupported qscheme: {qscheme}") - result += f'qscheme{tm.qparams["qscheme"]}' + result += "|" + "qscheme" + "=" + str(tm.qparams["qscheme"]) + r"\n" return result - def _get_html_tensor_label(self, t: torch.Tensor) -> str: - return f"tensor{t.dtype}{list(t.shape)}" + def _get_tensor_label(self, t: torch.Tensor) -> str: + return str(t.dtype) + str(list(t.shape)) + r"\n" # when parse_stack_trace=True # print file:lineno code @@ -369,6 +398,7 @@ def _to_dot( # "TB" means top-to-bottom rank direction in layout dot_graph = pydot.Dot(name, rankdir="TB") + buf_name_to_subgraph = {} for node in graph_module.graph.nodes: @@ -401,26 +431,19 @@ def get_module_params_or_buffers(): for pname, ptensor in chain( leaf_module.named_parameters(), leaf_module.named_buffers() ): - pname1 = f"{node.name}.{pname}" - node_name = f'"{pname}"' - param_type = ( - "parameter" + pname1 = node.name + "." + pname + label1 = ( + pname1 + "|op_code=get_" + "parameter" if isinstance(ptensor, torch.nn.Parameter) - else "buffer" + else "buffer" + r"\l" ) - label = f"""< - - - - {self._get_html_tensor_label(ptensor)} -
{pname1}
op_codeget_{param_type}
>""" dot_w_node = pydot.Node( - node_name, - label=label, + pname1, + label="{" + label1 + self._get_tensor_label(ptensor) + "}", **_WEIGHT_TEMPLATE, ) dot_graph.add_node(dot_w_node) - dot_graph.add_edge(pydot.Edge(node_name, node.name)) + dot_graph.add_edge(pydot.Edge(pname1, node.name)) if node.op == "call_module": leaf_module = self._get_leaf_node(graph_module, node) From 85c3c4132dcd11ad7c8be3d2203a69b4c231f538 Mon Sep 17 00:00:00 2001 From: rzou Date: Fri, 1 Nov 2024 12:39:22 -0700 Subject: [PATCH 013/503] no-op torch.library.custom_op APIs on torch.deploy (#139509) We forgot this case in the previous PR. Fixes https://github.com/pytorch/pytorch/issues/137536 Test Plan: - better tests Pull Request resolved: https://github.com/pytorch/pytorch/pull/139509 Approved by: https://github.com/williamwen42 --- test/test_custom_ops.py | 18 ++++++++++++++++++ torch/_library/custom_ops.py | 4 ++++ torch/_library/utils.py | 10 ++++++++++ torch/library.py | 24 +++++++----------------- 4 files changed, 39 insertions(+), 17 deletions(-) diff --git a/test/test_custom_ops.py b/test/test_custom_ops.py index f0ee8b65be6c5d..b998a2c2a1ff08 100644 --- a/test/test_custom_ops.py +++ b/test/test_custom_ops.py @@ -490,6 +490,24 @@ def sin_override(x): m.impl("sin", sin_override, "CompositeImplicitAutograd") x = torch.randn(3) y = torch.sin(x) + +# should be a no-op +@torch.library.custom_op("mylib::foobar", mutates_args={}) +def foobar(x: torch.Tensor) -> torch.Tensor: + return x.sin() + +# should be a no-op +@foobar.register_fake +def _(x): + return torch.empty_like(x) + +# should be a no-op +m2.define("foobarbaz9996(Tensor x) -> Tensor") + +# should be a no-op +@torch.library.register_fake("mylib4392::foobarbaz9996") +def _(x): + return torch.empty_like(x) """ script = script.strip() env = os.environ.copy() diff --git a/torch/_library/custom_ops.py b/torch/_library/custom_ops.py index d08c5d5d077899..2c57ceb9baf984 100644 --- a/torch/_library/custom_ops.py +++ b/torch/_library/custom_ops.py @@ -553,6 +553,10 @@ def register_autograd( self._setup_context_fn = setup_context def _register_to_dispatcher(self) -> None: + if torch._running_with_deploy(): + utils.warn_deploy(stacklevel=5) + return + lib = self._lib schema_str = self._name + self._schema cpp_schema = _C.parse_schema(schema_str) diff --git a/torch/_library/utils.py b/torch/_library/utils.py index 3d636f9b3a8596..82ebdad018d466 100644 --- a/torch/_library/utils.py +++ b/torch/_library/utils.py @@ -2,6 +2,7 @@ import dataclasses import inspect import sys +import warnings from typing import Any, Callable, Dict, Iterable, Iterator, Tuple, Union import torch @@ -10,6 +11,15 @@ from torch._ops import OpOverload +def warn_deploy(stacklevel=3): + warnings.warn( + "Python torch.library APIs do nothing under torch::deploy (multipy). " + "Please instead use C++ custom operator registration APIs.", + RuntimeWarning, + stacklevel=stacklevel, + ) + + @dataclasses.dataclass class Kernel: """Models a (function, source location)""" diff --git a/torch/library.py b/torch/library.py index d4224e62e456a2..378aca0d621a15 100644 --- a/torch/library.py +++ b/torch/library.py @@ -5,7 +5,6 @@ import re import sys import traceback -import warnings import weakref from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, Union from typing_extensions import deprecated @@ -54,15 +53,6 @@ def fallthrough_kernel(): raise NotImplementedError("fallthrough_kernel() should never be called.") -def _warn_deploy(): - warnings.warn( - "Python torch.library APIs do nothing under torch::deploy (multipy). " - "Please instead use C++ custom operator registration APIs.", - RuntimeWarning, - stacklevel=3, - ) - - class Library: """ A class to create libraries that can be used to register new operators or @@ -92,7 +82,7 @@ def __init__(self, ns, kind, dispatch_key=""): " is a reserved namespace. Please try creating a library with another name.", ) if torch._running_with_deploy(): - _warn_deploy() + _library.utils.warn_deploy() return frame = traceback.extract_stack(limit=3)[0] @@ -143,7 +133,7 @@ def define(self, schema, alias_analysis="", *, tags=()): >>> my_lib.define("sum(Tensor self) -> Tensor") """ if torch._running_with_deploy(): - _warn_deploy() + _library.utils.warn_deploy() return # This is added because we also want to disallow PURE_FUNCTION alias analysis which is a valid @@ -178,7 +168,7 @@ def define(self, schema, alias_analysis="", *, tags=()): def _register_fake(self, op_name, fn, _stacklevel=1): r"""Registers the fake impl for an operator defined in the library.""" if torch._running_with_deploy(): - _warn_deploy() + _library.utils.warn_deploy() return source = torch._library.utils.get_source(_stacklevel + 1) @@ -222,7 +212,7 @@ def _register_torch_dispatch_rule(self, op_name, torch_dispatch_class, fn): (mode, func: OpOverload, types: Tuple[type, ...], args, kwargs) -> Any """ if torch._running_with_deploy(): - _warn_deploy() + _library.utils.warn_deploy() return qualname = f"{self.ns}::{op_name}" @@ -243,7 +233,7 @@ def _impl_with_aoti_compile(self, op_name, dispatch_key=""): >>> my_lib._impl_with_aoti_compile("div.Tensor", "CPU") """ if torch._running_with_deploy(): - _warn_deploy() + _library.utils.warn_deploy() return if dispatch_key == "": @@ -300,7 +290,7 @@ def impl(self, op_name, fn, dispatch_key="", *, with_keyset=False): >>> my_lib.impl("div.Tensor", div_cpu, "CPU") """ if torch._running_with_deploy(): - _warn_deploy() + _library.utils.warn_deploy() return if not callable(fn): @@ -384,7 +374,7 @@ def fallback(self, fn, dispatch_key="", *, with_keyset=False): >>> my_lib.fallback(fallback_kernel, "Autocast") """ if torch._running_with_deploy(): - _warn_deploy() + _library.utils.warn_deploy() return if dispatch_key == "": From ae0e7042f65a8164567f6014172845d42b40e612 Mon Sep 17 00:00:00 2001 From: Tugsbayasgalan Manlaibaatar Date: Sun, 3 Nov 2024 15:44:49 -0800 Subject: [PATCH 014/503] Fix custom obj being input (#139209) Differential Revision: [D65158939](https://our.internmc.facebook.com/intern/diff/D65158939) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139209 Approved by: https://github.com/ydwu4 ghstack dependencies: #138658 --- test/export/test_torchbind.py | 1 - torch/_export/utils.py | 6 +++++- torch/export/_trace.py | 2 ++ torch/export/exported_program.py | 9 ++++++--- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/test/export/test_torchbind.py b/test/export/test_torchbind.py index d19ec645e3396b..fd9f98199d37b7 100644 --- a/test/export/test_torchbind.py +++ b/test/export/test_torchbind.py @@ -727,7 +727,6 @@ def forward(self, token, p_linear_weight, p_linear_bias, tq, x): self.assertTrue(tq.pop() is a) self.assertTrue(tq.pop() is b) - @unittest.expectedFailure # T205481814 @skipIfCrossRef # arg names change with torch function mode def test_safe_to_trace_with_real(self): x = torch.randn(3, 3) diff --git a/torch/_export/utils.py b/torch/_export/utils.py index a34aea5519a0ad..86cb0bef955e7c 100644 --- a/torch/_export/utils.py +++ b/torch/_export/utils.py @@ -35,7 +35,7 @@ from torch.export import ExportedProgram from torch.export.graph_signature import ExportGraphSignature -from torch.export.graph_signature import InputKind, OutputKind +from torch.export.graph_signature import CustomObjArgument, InputKind, OutputKind from torch.utils._pytree import ( _register_pytree_node, Context, @@ -862,6 +862,10 @@ def _extract_pytree_key(x): if node.op == "placeholder": assert node.name in name_map node.name = node.target = name_map[node.name] + # if the constant obj is an input, we also need to update meta["val"] + # because this is created before the placeholder naming pass + if isinstance(node.meta["val"], CustomObjArgument): + node.meta["val"].name = node.name elif node.name in name_map: node.name = name_map[node.name] diff --git a/torch/export/_trace.py b/torch/export/_trace.py index e1e4d1bebe2a31..86ed188ae6bdf9 100644 --- a/torch/export/_trace.py +++ b/torch/export/_trace.py @@ -447,6 +447,8 @@ def _produce_aten_artifact( graph_signature, gm, _get_non_persistent_buffers(mod) ) + # script objects are always stored in constants no matter whether they're initial inputs or + # they're lifted in aot" before rewrite_script_object_meta constants = rewrite_script_object_meta(gm) constants.update(lift_constants_pass(gm, export_graph_signature, constant_attrs)) diff --git a/torch/export/exported_program.py b/torch/export/exported_program.py index a4a642f9e8d4e1..a5c7a519b8590e 100644 --- a/torch/export/exported_program.py +++ b/torch/export/exported_program.py @@ -353,9 +353,12 @@ def _is_joint_ir_decomp(ep, joint_loss_index): for node in mod.graph.nodes: if node.op == "placeholder": if isinstance(node.meta["val"], CustomObjArgument): - fake_args.append( - maybe_to_fake_obj(fake_mode, node.meta["val"].fake_val.real_obj) # type: ignore[union-attr] - ) + real_script_obj = None + if node.meta["val"].fake_val is None: + real_script_obj = ep.constants[node.meta["val"].name] + else: + real_script_obj = node.meta["val"].fake_val.real_obj + fake_args.append(maybe_to_fake_obj(fake_mode, real_script_obj)) else: fake_args.append(node.meta["val"]) From f55dfbcf87874ec3307b5d0ee4f815ec3f145c5e Mon Sep 17 00:00:00 2001 From: Mikayla Gawarecki Date: Fri, 1 Nov 2024 18:58:04 -0700 Subject: [PATCH 015/503] Remove hasattr(__slots__) for BUILD logic in weights_only unpickler (#139541) This is tested in PR stacked above in ```python python test/distributed/fsdp/test_fsdp_state_dict.py TestFSDPStateDict.test_torch_save_load ``` We cannot depend on whether `hasattr(..., __slots__)` to know whether a BUILD instruction has slotstate. For example, if a class subclasses ABC `hasattr(__slots__)` will be `True` but there might be no slots (and hence `state` will not be a tuple). So revert #138936 to following the pickle library's code ```python >>> from abc import ABC >>> hasattr(ABC, "__slots__") True ``` So ```python import torch from abc import ABC from dataclasses import dataclass class Foo(ABC): pass class FooWrapper(Foo): def __init__(self, x, y): self.x = x self.y = y f = FooWrapper(1, 2) torch.save(f, "temp.pt") with torch.serialization.safe_globals([FooWrapper]): torch.load("temp.pt") ``` Would fail on the previous code with ``` File "/data/users/mg1998/pytorch/torch/serialization.py", line 1934, in _load result = unpickler.load() File "/data/users/mg1998/pytorch/torch/_weights_only_unpickler.py", line 366, in load for k, v in slotstate.items(): ``` As there is actually no slotstate Pull Request resolved: https://github.com/pytorch/pytorch/pull/139541 Approved by: https://github.com/malfet ghstack dependencies: #138936, #139221, #139433 --- torch/_weights_only_unpickler.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/torch/_weights_only_unpickler.py b/torch/_weights_only_unpickler.py index e89f04429265a8..c624ad914e8fe5 100644 --- a/torch/_weights_only_unpickler.py +++ b/torch/_weights_only_unpickler.py @@ -361,15 +361,17 @@ def load(self): elif type(inst) in _get_user_allowed_globals().values(): if hasattr(inst, "__setstate__"): inst.__setstate__(state) - elif hasattr(inst, "__slots__"): - # if slots are defined, state will be a tuple (state, slotstate) - state, slotstate = state - for k, v in slotstate.items(): - setattr(inst, k, v) + else: + # mimics load_build in pickle + # https://github.com/python/cpython/blob/f0c6fccd08904787a39269367f09f263d496114c/Lib/pickle.py#L1854-L1867 + slotstate = None + if isinstance(state, tuple) and len(state) == 2: + state, slotstate = state if state: inst.__dict__.update(state) - else: - inst.__dict__.update(state) + if slotstate: + for k, v in slotstate.items(): + setattr(inst, k, v) else: raise UnpicklingError( "Can only build Tensor, Parameter, OrderedDict or types allowlisted " From ca43ecd5996b15178de88960d167ccc31458b607 Mon Sep 17 00:00:00 2001 From: Mikayla Gawarecki Date: Fri, 1 Nov 2024 18:58:05 -0700 Subject: [PATCH 016/503] Flip default on weights_only (#137602) Pull Request resolved: https://github.com/pytorch/pytorch/pull/137602 Approved by: https://github.com/malfet, https://github.com/albanD ghstack dependencies: #138936, #139221, #139433, #139541 --- .github/ci_commit_pins/torchbench.txt | 2 +- .github/ci_commit_pins/xla.txt | 2 +- .../sharded_tensor/test_sharded_tensor.py | 15 +++++--- test/distributed/fsdp/test_fsdp_state_dict.py | 23 +++++++++++- test/dynamo/test_compile.py | 8 +++-- test/dynamo/test_modules.py | 10 ++++-- test/load_torchscript_model.py | 3 +- test/test_mps.py | 6 ++-- test/test_nestedtensor.py | 19 ++++++++-- test/test_serialization.py | 2 +- torch/_weights_only_unpickler.py | 5 +-- torch/serialization.py | 35 +++++++------------ 12 files changed, 88 insertions(+), 42 deletions(-) diff --git a/.github/ci_commit_pins/torchbench.txt b/.github/ci_commit_pins/torchbench.txt index 21b3c3481f3988..4f922a0676eb2c 100644 --- a/.github/ci_commit_pins/torchbench.txt +++ b/.github/ci_commit_pins/torchbench.txt @@ -1 +1 @@ -e522b45cd4535b9dfe067aa68d7315755df38f48 +766a5e3a189384659fd35a68c3b17b88c761aaac diff --git a/.github/ci_commit_pins/xla.txt b/.github/ci_commit_pins/xla.txt index 9d412df07f46c2..03db6224c4139e 100644 --- a/.github/ci_commit_pins/xla.txt +++ b/.github/ci_commit_pins/xla.txt @@ -1 +1 @@ -2eb4a60ed14a38260b85b0c765161f0ce45be6d1 +f71c02d1f457d58371e013632efb016c01bd1866 diff --git a/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py b/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py index 76d06a972bdf8c..730b2c2c0ac27b 100644 --- a/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py +++ b/test/distributed/_shard/sharded_tensor/test_sharded_tensor.py @@ -1245,7 +1245,8 @@ def test_state_dict(self): module_load._register_load_state_dict_pre_hook(pre_load_state_dict_hook, True) buffer.seek(0) - state_dict_deser = torch.load(buffer) + # weights_only=False as ShardedTensor weights_only is already tested in TestFSDPStateDict.test_torch_save_load + state_dict_deser = torch.load(buffer, weights_only=False) module_load.load_state_dict(state_dict_deser, strict=False) module_load._register_state_dict_hook(state_dict_hook) @@ -1289,7 +1290,8 @@ def test_state_dict_new_group(self): buffer.seek(0) with load_with_process_group(pg): - state_dict_deser = torch.load(buffer) + # ShardedTensor weights_only is already tested in TestFSDPStateDict.test_torch_save_load + state_dict_deser = torch.load(buffer, weights_only=False) module_load.load_state_dict(state_dict_deser, strict=False) # Verify after load. @@ -1361,20 +1363,23 @@ def test_load_state_dict_errors(self): if self.rank != 0: with self.assertRaisesRegex(RuntimeError, "Local rank at save time was"): with load_with_process_group(pg): - state_dict_deser = torch.load(buffer) + # ShardedTensor weights_only is already tested in TestFSDPStateDict.test_torch_save_load + state_dict_deser = torch.load(buffer, weights_only=False) else: with self.assertRaisesRegex( RuntimeError, "Local world size at save time was" ): with load_with_process_group(pg): - state_dict_deser = torch.load(buffer) + # ShardedTensor weights_only is already tested in TestFSDPStateDict.test_torch_save_load + state_dict_deser = torch.load(buffer, weights_only=False) dist.destroy_process_group() buffer.seek(0) with self.assertRaisesRegex( RuntimeError, "Need to initialize default process group" ): - state_dict_deser = torch.load(buffer) + # ShardedTensor weights_only is already tested in TestFSDPStateDict.test_torch_save_load + state_dict_deser = torch.load(buffer, weights_only=False) rpc.shutdown() @with_comms diff --git a/test/distributed/fsdp/test_fsdp_state_dict.py b/test/distributed/fsdp/test_fsdp_state_dict.py index 0fa1b38eef42bc..a246375caba8ff 100644 --- a/test/distributed/fsdp/test_fsdp_state_dict.py +++ b/test/distributed/fsdp/test_fsdp_state_dict.py @@ -16,6 +16,12 @@ Shard, ShardedTensor, ) +from torch.distributed._shard.sharded_tensor.metadata import ( + MEM_FORMAT_ENCODING, + ShardedTensorMetadata, + TensorProperties, +) +from torch.distributed._shard.sharding_spec import ChunkShardingSpec, ShardMetadata from torch.distributed._state_dict_utils import ( _all_gather_sharded_tensor, _gather_state_dict, @@ -37,6 +43,7 @@ from torch.distributed.fsdp._common_utils import FSDP_PREFIX from torch.distributed.fsdp._unshard_param_utils import FLAT_PARAM from torch.distributed.fsdp.wrap import enable_wrap, ModuleWrapPolicy, wrap +from torch.distributed.remote_device import _remote_device from torch.nn import Linear, Module, TransformerDecoderLayer, TransformerEncoderLayer from torch.nn.parallel import DistributedDataParallel from torch.optim import SGD @@ -1160,7 +1167,21 @@ def test_torch_save_load(self): checkpoint = io.BytesIO() torch.save(state_dict, checkpoint) checkpoint.seek(0) - state_dict_saved = torch.load(checkpoint) + with torch.serialization.safe_globals( + [ + Shard, + ShardMetadata, + ShardedTensor, + ShardedTensorMetadata, + TensorProperties, + MEM_FORMAT_ENCODING, + _remote_device, + getattr, + ShardedTensor.ProcessGroupState, + ChunkShardingSpec, + ] + ): + state_dict_saved = torch.load(checkpoint) for k, v in state_dict_saved.items(): if isinstance(v, ShardedTensor): self.assertEqual( diff --git a/test/dynamo/test_compile.py b/test/dynamo/test_compile.py index f28855c1ae2548..791ff7a67ffde3 100644 --- a/test/dynamo/test_compile.py +++ b/test/dynamo/test_compile.py @@ -46,7 +46,10 @@ def test_save(self): with tempfile.TemporaryDirectory() as tmpdirname: torch.save(model, os.path.join(tmpdirname, "model.pt")) - loaded_model = torch.load(os.path.join(tmpdirname, "model.pt")) + # weights_only=False as this is a legacy use case that loads a module + loaded_model = torch.load( + os.path.join(tmpdirname, "model.pt"), weights_only=False + ) loaded_model(torch.randn(1, 10)) def test_state_dict_save(self): @@ -58,7 +61,8 @@ def test_state_dict_save(self): torch.save(model.state_dict(), os.path.join(tmpdirname, "model.pt")) loaded_model = ToyModel() loaded_model.load_state_dict( - torch.load(os.path.join(tmpdirname, "model.pt")) + # weights_only=False as this is a legacy use case that loads a module + torch.load(os.path.join(tmpdirname, "model.pt"), weights_only=False) ) loaded_model(torch.randn(1, 10)) diff --git a/test/dynamo/test_modules.py b/test/dynamo/test_modules.py index 3a81380af3ea88..acdd687b6c7b1c 100644 --- a/test/dynamo/test_modules.py +++ b/test/dynamo/test_modules.py @@ -3002,7 +3002,10 @@ def test_save_and_load_inductor(self): with tempfile.TemporaryDirectory() as tmpdirname: torch.save(opt_mod, os.path.join(tmpdirname, "model.pt")) - loaded_model = torch.load(os.path.join(tmpdirname, "model.pt")) + # weights_only=False as this is a legacy use case that loads a module + loaded_model = torch.load( + os.path.join(tmpdirname, "model.pt"), weights_only=False + ) loaded_model(inp) self.assertTrue(same_two_models(loaded_model, mod, [inp])) self.assertTrue(same_two_models(loaded_model, opt_mod, [inp])) @@ -3020,7 +3023,10 @@ def test_save_and_load_all_backends(self): opt_mod = torch.compile(mod, backend=backend) with tempfile.TemporaryDirectory() as tmpdirname: torch.save(opt_mod, os.path.join(tmpdirname, "model.pt")) - loaded_model = torch.load(os.path.join(tmpdirname, "model.pt")) + # weights_only=False as this is a legacy use case that loads a module + loaded_model = torch.load( + os.path.join(tmpdirname, "model.pt"), weights_only=False + ) torch._dynamo.reset() # force recompiles torch._inductor.metrics.generated_kernel_count = 0 opt_mod(inp) diff --git a/test/load_torchscript_model.py b/test/load_torchscript_model.py index 807f27ffe76050..d362ae5dd93a00 100644 --- a/test/load_torchscript_model.py +++ b/test/load_torchscript_model.py @@ -5,7 +5,8 @@ if __name__ == "__main__": script_mod = torch.jit.load(sys.argv[1]) - mod = torch.load(sys.argv[1] + ".orig") + # weights_only=False as this is loading a sharded model + mod = torch.load(sys.argv[1] + ".orig", weights_only=False) print(script_mod) inp = torch.rand(2, 28 * 28) _ = mod(inp) diff --git a/test/test_mps.py b/test/test_mps.py index 8962ece03dc18d..5a5f7944d486e6 100644 --- a/test/test_mps.py +++ b/test/test_mps.py @@ -8825,7 +8825,8 @@ def test_module_backcompat(self): path = download_file('https://download.pytorch.org/test_data/linear.pt') with warnings.catch_warnings(): warnings.simplefilter('ignore', SourceChangeWarning) - m = torch.load(path) + # weights_only=False as this is a legacy use case that loads a module + m = torch.load(path, weights_only=False) input = torch.randn(2, 3, dtype=torch.float) self.assertEqual(m(input).size(), (2, 5)) @@ -8842,7 +8843,8 @@ def test_conv_backcompat(self): path = download_file('https://download.pytorch.org/test_data/legacy_conv2d.pt') with warnings.catch_warnings(): warnings.simplefilter('ignore', SourceChangeWarning) - m = torch.load(path, encoding='utf-8') + # weights_only=False as this is a legacy use case that loads a module + m = torch.load(path, encoding='utf-8', weights_only=False) input = torch.randn((1, 1, 1, 1), dtype=torch.float) self.assertEqual(m(input).size(), (1, 1, 1, 1)) diff --git a/test/test_nestedtensor.py b/test/test_nestedtensor.py index 1feca105d60f48..ba2af0927c8e12 100644 --- a/test/test_nestedtensor.py +++ b/test/test_nestedtensor.py @@ -1,6 +1,7 @@ # Owner(s): ["module: nestedtensor"] import ast +import contextlib import io import itertools import math @@ -3657,7 +3658,8 @@ def _make_tensor( ["contig", "noncontig_transposed", "noncontig_with_holes"], name_fn=lambda c: c, ) - def test_serialization(self, device, dtype, contiguity): + @parametrize("weights_only", [True, False]) + def test_serialization(self, device, dtype, contiguity, weights_only): # Test with 3 cases: # 1. contiguous # 2. non-contiguous transposed @@ -3693,8 +3695,21 @@ def test_serialization(self, device, dtype, contiguity): with tempfile.TemporaryFile() as f: torch.save(nt, f) + safe_globals = [ + torch.nested._internal.nested_tensor.NestedTensor, + torch.nested._internal.nested_tensor._rebuild_njt, + set, + torch._dynamo.decorators._DimRange, + ] f.seek(0) - nt_loaded = torch.load(f) + ctx = ( + torch.serialization.safe_globals(safe_globals) + if weights_only + else contextlib.nullcontext() + ) + + with ctx: + nt_loaded = torch.load(f, weights_only=weights_only) self.assertIsNot(nt, nt_loaded) # we expect a new offsets tensor -> different nested int upon load diff --git a/test/test_serialization.py b/test/test_serialization.py index 331b8c85f9c783..f24886ac4cd251 100644 --- a/test/test_serialization.py +++ b/test/test_serialization.py @@ -1196,7 +1196,7 @@ def test_weights_only_error(self, unsafe_global): f.seek(0) if unsafe_global: with self.assertRaisesRegex(pickle.UnpicklingError, - r"use `torch.serialization.add_safe_globals\(\[TwoTensor\]\)` to allowlist"): + r"use `torch.serialization.add_safe_globals\(\[TwoTensor\]\)` or .* to allowlist"): torch.load(f, weights_only=True) else: with self.assertRaisesRegex(pickle.UnpicklingError, diff --git a/torch/_weights_only_unpickler.py b/torch/_weights_only_unpickler.py index c624ad914e8fe5..a2d83425d2be60 100644 --- a/torch/_weights_only_unpickler.py +++ b/torch/_weights_only_unpickler.py @@ -322,8 +322,9 @@ def load(self): else: raise UnpicklingError( f"Unsupported global: GLOBAL {full_path} was not an allowed global by default. " - f"Please use `torch.serialization.add_safe_globals([{name}])` to allowlist " - "this global if you trust this class/function." + f"Please use `torch.serialization.add_safe_globals([{name}])` or the " + f"`torch.serialization.safe_globals([{name}])` context manager to allowlist this global " + "if you trust this class/function." ) elif key[0] == NEWOBJ[0]: args = self.stack.pop() diff --git a/torch/serialization.py b/torch/serialization.py index 857e70c23a1a96..352514d541505a 100644 --- a/torch/serialization.py +++ b/torch/serialization.py @@ -67,6 +67,7 @@ "skip_data", ] +IS_FBCODE = not hasattr(torch.version, "git_version") DEFAULT_PROTOCOL = 2 @@ -92,6 +93,10 @@ MAP_SHARED, MAP_PRIVATE = None, None # type: ignore[assignment] +def _default_to_weights_only(pickle_module): + return pickle_module is None and not IS_FBCODE + + # _serialization_tls is used to store thread local state specific to serialization # that needs to be propagated to other files, in particular we use this for # (1) map_location (needed for wrapper subclasses/third party devices to torch._utils) @@ -1205,7 +1210,7 @@ def load( # documentation. We need it so that Sphinx doesn't leak `pickle`s path from # the build environment (e.g. ` str: "is not supported yet. Please call torch.load outside the skip_data context manager." ) + weights_only_not_set = weights_only is None + + if weights_only_not_set: + weights_only = _default_to_weights_only(pickle_module) + true_values = ["1", "y", "yes", "true"] # Add ability to force safe only or non-safe weight loads via environment variables force_weights_only_load = ( @@ -1364,7 +1374,8 @@ def _get_wo_message(message: str) -> str: elif force_weights_only_load: weights_only = True elif force_no_weights_only_load: - if weights_only is None: + # TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD can only override if callsite did not explicitly set weights_only + if weights_only_not_set: warnings.warn( "Environment variable TORCH_FORCE_NO_WEIGHTS_ONLY_LOAD detected, since the" "`weights_only` argument was not explicitly passed to `torch.load`, forcing weights_only=False.", @@ -1373,11 +1384,6 @@ def _get_wo_message(message: str) -> str: ) weights_only = False - if weights_only is None: - weights_only, warn_weights_only = False, True - else: - warn_weights_only = False - if weights_only: if pickle_module is not None: raise RuntimeError( @@ -1385,21 +1391,6 @@ def _get_wo_message(message: str) -> str: ) else: if pickle_module is None: - if warn_weights_only: - warnings.warn( - "You are using `torch.load` with `weights_only=False` (the current default value), which uses " - "the default pickle module implicitly. It is possible to construct malicious pickle data " - "which will execute arbitrary code during unpickling (See " - "https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). " - "In a future release, the default value for `weights_only` will be flipped to `True`. This " - "limits the functions that could be executed during unpickling. Arbitrary objects will no " - "longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the " - "user via `torch.serialization.add_safe_globals`. We recommend you start setting " - "`weights_only=True` for any use case where you don't have full control of the loaded file. " - "Please open an issue on GitHub for any issues related to this experimental feature.", - FutureWarning, - stacklevel=2, - ) pickle_module = pickle # make flipping default BC-compatible From 4930c4b71620fe6d76de15551b4d999a0b356637 Mon Sep 17 00:00:00 2001 From: Shunting Zhang Date: Mon, 4 Nov 2024 00:13:10 -0800 Subject: [PATCH 017/503] [inductor] patterns to remove pointless view/permute pairs (#139136) These are not artificial patterns I come up. They shows up in linear+CrossEntropyLoss graph. Consider this snippet: ``` class LinearAndCEL(nn.Module): def __init__(self): super().__init__() self.linear = nn.Linear(C, V) self.ce = nn.CrossEntropyLoss() def forward(self, x, y): return self.ce(self.linear(x).view(B * T, V), y.view(-1)) ``` `x` passed to `forward` is a 3D tensor of shape [B, T, C]. The `self.linear` will view x as [BxT, C] shape tensor first, do the matmul and produce a [BxT, V] tensor, and then view this output back to a 3D tensor with shape [B, T, V]. User code is gonna add another view op to convert the tensor shape to [B x T, V]. This generates a pair of redundant views . A pair of redundant permute happens in the backward part when we compute gradients. The view ops makes it hard to chunk linear+CEL. When the view op breaks up the dimension being chunked, what should the chunker do (even if we merge those dimension again later)? Removing these pointless view pairs makes the chunker simpler. And I think it's in general nice to do. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139136 Approved by: https://github.com/Chillee, https://github.com/jansel --- .../test_replicate_with_compiler.py | 12 +++- test/inductor/test_compiled_autograd.py | 4 -- test/inductor/test_mkldnn_pattern_matcher.py | 3 +- test/inductor/test_pattern_matcher.py | 70 +++++++++++++++++++ torch/_inductor/fx_passes/joint_graph.py | 46 +++++++++++- torch/fx/experimental/symbolic_shapes.py | 15 ++++ 6 files changed, 141 insertions(+), 9 deletions(-) diff --git a/test/distributed/_composable/test_replicate_with_compiler.py b/test/distributed/_composable/test_replicate_with_compiler.py index 0a072ec4ab3ff3..da11b7490a840f 100644 --- a/test/distributed/_composable/test_replicate_with_compiler.py +++ b/test/distributed/_composable/test_replicate_with_compiler.py @@ -318,7 +318,11 @@ def bwd(loss): # todo: This pass mucks things up since Inductor thinks its inference # and can apply this. Should turn off these passes in compiled autograd @torch._inductor.config.patch( - reorder_for_locality=False, reorder_for_peak_memory=False + reorder_for_locality=False, + reorder_for_peak_memory=False, + # The correctness of this test relies on the pointless permute ops + # in the joint graph does not get eliminated.. + pattern_matcher=False, ) def test_bucketing_coalesced_op(self): # Gradient is None @@ -356,7 +360,11 @@ def test_bucketing_coalesced_op(self): # todo: This pass mucks things up since Inductor thinks its inference # and can apply this. Should turn off these passes in compiled autograd @torch._inductor.config.patch( - reorder_for_locality=False, reorder_for_peak_memory=False + reorder_for_locality=False, + reorder_for_peak_memory=False, + # The correctness of this test relies on the pointless permute ops + # in the joint graph does not get eliminated.. + pattern_matcher=False, ) def test_bucketing_concat_op(self): # Gradient is None diff --git a/test/inductor/test_compiled_autograd.py b/test/inductor/test_compiled_autograd.py index 96a14959432390..59234dbf4c6b7f 100644 --- a/test/inductor/test_compiled_autograd.py +++ b/test/inductor/test_compiled_autograd.py @@ -2852,18 +2852,14 @@ def forward(model, x): "aot0_mm", "aot0_permute_3", "aot0_mm_1", - "aot0_permute_4", "aot0_sum_1", "aot0_view", - "aot0_permute_5", "aot0_le_1", "aot0_where_1", "aot0_permute_6", "aot0_mm_2", - "aot0_permute_7", "aot0_sum_2", "aot0_view_1", - "aot0_permute_8", ] found = 0 diff --git a/test/inductor/test_mkldnn_pattern_matcher.py b/test/inductor/test_mkldnn_pattern_matcher.py index 4ca78364a6e04e..08f885d130b2e1 100644 --- a/test/inductor/test_mkldnn_pattern_matcher.py +++ b/test/inductor/test_mkldnn_pattern_matcher.py @@ -623,10 +623,11 @@ def forward(self, x, y): is_inplace = binary_list[binary_fn][2] # view + linear + view(joint_graph+freeze pass) match_count = match_count + 5 if is_inplace else match_count + 3 - match_nodes = match_nodes + 7 if is_inplace else match_nodes + 5 + match_nodes = match_nodes + 8 if is_inplace else match_nodes + 5 mod = M(binary_fn, input_shape[-1], out_feature, bias).eval() v = torch.randn(input_shape) other = torch.randn(input_shape[:-1] + [out_feature]).to(dtype) + self._test_common( mod, ( diff --git a/test/inductor/test_pattern_matcher.py b/test/inductor/test_pattern_matcher.py index d6bfdbcc05f91b..b60a24b14523cc 100644 --- a/test/inductor/test_pattern_matcher.py +++ b/test/inductor/test_pattern_matcher.py @@ -27,6 +27,7 @@ from torch._inductor.test_case import run_tests, TestCase from torch._inductor.utils import run_and_get_code from torch._inductor.virtualized import V +from torch.fx.experimental.proxy_tensor import make_fx from torch.testing import FileCheck from torch.testing._internal.common_cuda import SM80OrLater from torch.testing._internal.common_device_type import expectedFailureXPU, skipCUDAIf @@ -40,6 +41,9 @@ from torch.utils import _pytree as pytree +aten = torch.ops.aten + + class TestPatternMatcher(TestCase): device_type = GPU_TYPE @@ -817,6 +821,72 @@ def fn(a, b): ] self.common(fn, args, 1, 3) + def test_pointless_view_pair(self): + def f(x): + x = aten.view.default(x, [3, 5, 7]) + x = aten.view.default(x, [15, 7]) + return x + + x = torch.randn(15, 7, device=GPU_TYPE) + gm = make_fx(f)(x) + self.assertEqual(count_calls(gm.graph), 2) + joint_graph.joint_graph_passes(gm) + self.assertEqual(count_calls(gm.graph), 0) + + def f(x): + x1 = aten.view.default(x, [3, 5, 7]) + x2 = aten.view.default(x1, [15, 7]) + return x1, x2 + + gm = make_fx(f)(x) + self.assertEqual(count_calls(gm.graph), 2) + joint_graph.joint_graph_passes(gm) + self.assertEqual(count_calls(gm.graph), 2) + + def test_pointless_permute_pair(self): + def f(x): + x = aten.permute.default(x, [1, 0]) + x = aten.permute.default(x, [1, 0]) + return x + + x = torch.randn(15, 7, device=GPU_TYPE) + gm = make_fx(f)(x) + self.assertEqual(count_calls(gm.graph), 2) + joint_graph.joint_graph_passes(gm) + self.assertEqual(count_calls(gm.graph), 0) + + def f(x): + x1 = aten.permute.default(x, [1, 0]) + x2 = aten.permute.default(x1, [1, 0]) + return x1, x2 + + gm = make_fx(f)(x) + self.assertEqual(count_calls(gm.graph), 2) + joint_graph.joint_graph_passes(gm) + self.assertEqual(count_calls(gm.graph), 2) + + def test_pointless_permute_pair_3d(self): + def f(x): + x = aten.permute.default(x, [1, 0, 2]) + x = aten.permute.default(x, [1, 0, 2]) + return x + + x = torch.randn(3, 5, 7, device=GPU_TYPE) + gm = make_fx(f)(x) + self.assertEqual(count_calls(gm.graph), 2) + joint_graph.joint_graph_passes(gm) + self.assertEqual(count_calls(gm.graph), 0) + + def f(x): + x1 = aten.permute.default(x, [1, 0, 2]) + x2 = aten.permute.default(x1, [1, 0, 2]) + return x1, x2 + + gm = make_fx(f)(x) + self.assertEqual(count_calls(gm.graph), 2) + joint_graph.joint_graph_passes(gm) + self.assertEqual(count_calls(gm.graph), 2) + def test_pointless_convert(self): def fn1(x): x = torch.ops.prims.convert_element_type.default(x, torch.float16) diff --git a/torch/_inductor/fx_passes/joint_graph.py b/torch/_inductor/fx_passes/joint_graph.py index 6716da64a59777..97e4d7b8fda72c 100644 --- a/torch/_inductor/fx_passes/joint_graph.py +++ b/torch/_inductor/fx_passes/joint_graph.py @@ -11,7 +11,10 @@ import torch.utils._pytree as pytree from torch._inductor.constant_folding import ConstantFolder from torch._inductor.fx_passes.dedupe_symint_uses import _SymHashingDict -from torch.fx.experimental.symbolic_shapes import statically_known_true +from torch.fx.experimental.symbolic_shapes import ( + _guard_sizes_oblivious, + statically_known_true, +) from torch.multiprocessing.reductions import StorageWeakRef from ...utils._ordered_set import OrderedSet @@ -570,11 +573,50 @@ def pointless_view(match: Match, arg, size): """Remove no-op view""" node = match.output_node() arg_size = list(node.args[0].meta["val"].shape) # type: ignore[union-attr] - if size == arg_size: + if _guard_sizes_oblivious(size, arg_size): node.replace_all_uses_with(node.args[0]) # type: ignore[arg-type] match.erase_nodes() +@register_graph_pattern( + CallFunction( + aten.view.default, + CallFunction(aten.view.default, KeywordArg("arg"), KeywordArg("size1")), + KeywordArg("size2"), + ), + pass_dict=patterns, +) +def pointless_view_pair(match: Match, arg, size1, size2): + """ + Remove a pair of views that are pointless. + """ + node = match.output_node() + arg_size = list(arg.meta["val"].shape) + if _guard_sizes_oblivious(arg_size, size2): + node.replace_all_uses_with(arg) + match.erase_nodes() + + +@register_graph_pattern( + CallFunction( + aten.permute.default, + CallFunction(aten.permute.default, KeywordArg("arg"), KeywordArg("perm1")), + KeywordArg("perm2"), + ), + pass_dict=patterns, +) +def pointless_permute_pair(match: Match, arg, perm1, perm2): + rank = len(perm1) + assert len(perm2) == rank + + for i in range(rank): + if perm1[perm2[i]] != i: + return # bail out + node = match.output_node() + node.replace_all_uses_with(arg) + match.erase_nodes() + + # When softmax is used with temperature or other scaling, we get the pattern # # scale(x) - scale(x).amax(dim, keepdim=True) diff --git a/torch/fx/experimental/symbolic_shapes.py b/torch/fx/experimental/symbolic_shapes.py index eb838e799ffa4e..753a74fc297be6 100644 --- a/torch/fx/experimental/symbolic_shapes.py +++ b/torch/fx/experimental/symbolic_shapes.py @@ -380,6 +380,21 @@ def guard_size_oblivious(expr: Union[torch.SymBool, bool]) -> bool: return expr +def _guard_sizes_oblivious( + lhs_sizes: Sequence[Union[torch.SymInt, bool]], + rhs_sizes: Sequence[Union[torch.SymInt, bool]], +) -> bool: + """ + Leverage guard_size_oblivious to compare if two lists of int/symint are equal. + Useful to compare sizes, strides etc. + """ + + return len(lhs_sizes) == len(rhs_sizes) and all( + guard_size_oblivious(lhs_item == rhs_item) + for lhs_item, rhs_item in zip(lhs_sizes, rhs_sizes) + ) + + def check_consistent(new: _T, old: _T) -> None: """ Test that two "meta" values (typically either Tensor or SymInt) have From e76ce201779c4fefff37af16d8502a368a9040f3 Mon Sep 17 00:00:00 2001 From: Oguz Ulgen Date: Mon, 4 Nov 2024 18:39:03 +0000 Subject: [PATCH 018/503] Log to pt2 compile events (#139601) Summary: This option was added after I wrote the original diff, lets publish to pt2_compile_events Test Plan: CI Differential Revision: D65404910 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139601 Approved by: https://github.com/jamesjwu --- torch/_inductor/triton_bundler.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/torch/_inductor/triton_bundler.py b/torch/_inductor/triton_bundler.py index f16258d8dc0d5a..7835f168b3763d 100644 --- a/torch/_inductor/triton_bundler.py +++ b/torch/_inductor/triton_bundler.py @@ -130,7 +130,9 @@ def collect( cls._entries = None return [], None - with dynamo_timed(key="TritonBundler.collect", fwd_only=False): + with dynamo_timed( + key="TritonBundler.collect", fwd_only=False, log_pt2_compile_event=True + ): entries = cls._entries if entries is not None: result: List[TritonKernelArtifacts] = [] @@ -192,7 +194,11 @@ def read_and_emit( if not TritonBundler.is_enabled(): return None - with dynamo_timed(key="TritonBundler.read_and_emit", fwd_only=False): + with dynamo_timed( + key="TritonBundler.read_and_emit", + fwd_only=False, + log_pt2_compile_event=True, + ): kernel_names: List[str] = [] for artifacts in bundle: From 740054ffe658602ad30de57bb1d50ea8dcc8e0b2 Mon Sep 17 00:00:00 2001 From: Bin Bao Date: Sun, 3 Nov 2024 17:41:02 -0800 Subject: [PATCH 019/503] [AOTI][reland] Switch OSS dashboard to use aoti_compile_and_package (#139597) Summary: Reland https://github.com/pytorch/pytorch/pull/139154 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139597 Approved by: https://github.com/angelayi --- benchmarks/dynamo/common.py | 25 +++++++++++-------------- torch/_inductor/codecache.py | 8 ++++++-- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/benchmarks/dynamo/common.py b/benchmarks/dynamo/common.py index 0e7771c636a3d3..508672ba445b3a 100644 --- a/benchmarks/dynamo/common.py +++ b/benchmarks/dynamo/common.py @@ -987,9 +987,7 @@ def maybe_mark_profile(*args, **kwargs): with maybe_profile(args.export_profiler_trace) as p: if args.export_aot_inductor: - frozen_model_iter_fn = export_aot_inductor( - model, example_inputs, args.devices[0] - ) + frozen_model_iter_fn = export_aot_inductor(model, example_inputs) else: frozen_model_iter_fn = torch._dynamo.run(model_iter_fn) @@ -1487,7 +1485,7 @@ class AOTInductorModelCache: cache = {} @classmethod - def load(cls, model, example_inputs, device): + def load(cls, model, example_inputs): import torch._inductor import torch.export._trace from torch.export.dynamic_shapes import _tree_map_with_path @@ -1515,18 +1513,19 @@ def load(cls, model, example_inputs, device): _produce_dynamic_shapes_for_export, combined_args ) - gm = torch.export._trace._export( + ep = torch.export.export( model, example_args, example_kwargs, dynamic_shapes=dynamic_shapes, - pre_dispatch=True, strict=False, - ).module() + ) with torch.no_grad(): - so_path = torch._inductor.aot_compile(gm, example_args, example_kwargs) # type: ignore[arg-type] + package_path = torch._inductor.aoti_compile_and_package( + ep, example_args, example_kwargs + ) # type: ignore[arg-type] - cls.cache[key] = torch._export.aot_load(so_path, device) + cls.cache[key] = torch._inductor.aoti_load_package(package_path) return cls.cache[key] @@ -1554,8 +1553,8 @@ def opt_export(_, example_inputs): return opt_export -def export_aot_inductor(model, example_inputs, device): - optimized = AOTInductorModelCache.load(model, example_inputs, device) +def export_aot_inductor(model, example_inputs): + optimized = AOTInductorModelCache.load(model, example_inputs) def opt_aot_inductor(_, example_inputs, collect_outputs=False): example_args, example_kwargs = _normalize_bench_inputs(example_inputs) @@ -4585,9 +4584,7 @@ def run(runner, args, original_dir=None): elif args.backend or args.export_aot_inductor: if args.export_aot_inductor: assert not args.training, "AOTInductor only supports inference" - optimize_ctx = functools.partial( - export_aot_inductor, device=args.devices[0] - ) + optimize_ctx = functools.partial(export_aot_inductor) # AOTInductor doesn't support control flow yet runner.skip_models.update(runner.skip_models_due_to_control_flow) diff --git a/torch/_inductor/codecache.py b/torch/_inductor/codecache.py index ee8a671a6016bc..fc4cbba2a702d4 100644 --- a/torch/_inductor/codecache.py +++ b/torch/_inductor/codecache.py @@ -2056,8 +2056,12 @@ def _pad_to_alignment(raw_bytes: bytes) -> bytes: else: run_command_and_check(link_cmd) - for o_file in [output_o, consts_o]: - # remove .o files to save disk space since we already have the .so file + for o_file in [ + output_o, + consts_o, + os.path.splitext(consts_o)[0] + ".S", + ]: + # No need to package .o or .S into the output artifact os.remove(o_file) if use_mmap_weights: From 30a83ca9910e128e628d7fac207b18e81c03af19 Mon Sep 17 00:00:00 2001 From: Ryan Guo Date: Fri, 1 Nov 2024 18:04:38 -0700 Subject: [PATCH 020/503] [dynamo] Improve codegen for `DataPtrVariable` and fix tensor reference issue (#139487) This addresses https://github.com/pytorch/pytorch/pull/137677/files#r1799836499, which had to set `allow_cache=False` for codegen on `DataPtrVariable.base`, which is a `TensorVariable`, otherwise we observe failure of `test_no_grad_copy` when testing with Dynamo. I've seen `test_no_grad_copy` failing a few times, and every single time it's related to cyclic reference, my best guess is the cyclic reference holds some tensor object longer in memory than necessary, preventing the optimization introduced in #11165. This patch makes `OutputGraph.cleanup()` more aggressive by clearing out all fields that might reference a `VariableTracker`. As a result, we can remove the aforementioned `allow_cache=False`, which helps generate better code (e.g., in the case of `test_no_grad_copy`, it skipped generating a redundant graph whose only op is returning the input tensor; instead we just generate a single `LOAD_FAST`). Pull Request resolved: https://github.com/pytorch/pytorch/pull/139487 Approved by: https://github.com/jansel, https://github.com/aakhundov --- torch/_dynamo/output_graph.py | 3 +++ torch/_dynamo/variables/tensor.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/torch/_dynamo/output_graph.py b/torch/_dynamo/output_graph.py index 4be62b1d7db5d4..61d1001d03cc08 100644 --- a/torch/_dynamo/output_graph.py +++ b/torch/_dynamo/output_graph.py @@ -1741,6 +1741,9 @@ def cleanup(self) -> None: self.register_finalizer_fns.clear() self.dynamo_flat_name_to_original_fqn.clear() self.tracing_context.clear() + self.input_source_to_var.clear() + self.unspec_variable_map.clear() + self.backward_state.clear() def set_torch_function_state(self, enabled: bool) -> None: self.torch_function_enabled = enabled diff --git a/torch/_dynamo/variables/tensor.py b/torch/_dynamo/variables/tensor.py index 4bbefc1c1dfd7b..7d07c195524cf7 100644 --- a/torch/_dynamo/variables/tensor.py +++ b/torch/_dynamo/variables/tensor.py @@ -1448,6 +1448,6 @@ def __init__( self.from_tensor = from_tensor def reconstruct(self, codegen): - codegen(self.from_tensor, allow_cache=False) + codegen(self.from_tensor) codegen.load_method("data_ptr") codegen.call_method(0) From 99413cd1a8036774cf3e5167f21ea215b44dc804 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Mon, 4 Nov 2024 19:43:53 +0000 Subject: [PATCH 021/503] [CMake] Fix local MPS builds (#139651) Not sure how it works on some machines, but clean build fails for me after https://github.com/pytorch/pytorch/pull/138636 was landed, even though it works fine on another machine. Solution is to create an empty file when one adds a dependency, but later this dependency will be updated by the build rule Pull Request resolved: https://github.com/pytorch/pytorch/pull/139651 Approved by: https://github.com/atalman --- caffe2/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 5e7e657ea6a61b..a99a0e428a6a21 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -684,7 +684,8 @@ list(APPEND Caffe2_CPU_SRCS ${TORCH_SRCS}) if(USE_MPS) list(APPEND Caffe2_CPU_SRCS ${Caffe2_MPS_SRCS}) if(CAN_COMPILE_METAL) - list(APPEND Caffe2_CPU_SRCS aten/src/ATen/metallib_dummy.cpp) + file(TOUCH ${CMAKE_BINARY_DIR}/aten/src/ATen/metallib_dummy.cpp) + list(APPEND Caffe2_CPU_SRCS ${CMAKE_BINARY_DIR}/aten/src/ATen/metallib_dummy.cpp) endif() endif() From 71dc5df93c88d336b119f3cab8e8d37a1a31da15 Mon Sep 17 00:00:00 2001 From: Will Constable Date: Fri, 1 Nov 2024 14:11:00 -0700 Subject: [PATCH 022/503] [pipelining] Fix 'last backward' counting for dI / dW (#139415) Since any stage can run a mixture of full backwards and split backwards, it is important to count the sum of (full_backwards + backward_weight) when comparing to num microbatches to determine last backward. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139415 Approved by: https://github.com/H-Huang --- torch/distributed/pipelining/schedules.py | 46 +++++++++++++++++++---- torch/distributed/pipelining/stage.py | 38 +++++++++++-------- 2 files changed, 61 insertions(+), 23 deletions(-) diff --git a/torch/distributed/pipelining/schedules.py b/torch/distributed/pipelining/schedules.py index a1e19c85133e30..be6bbb7a3f4ed6 100644 --- a/torch/distributed/pipelining/schedules.py +++ b/torch/distributed/pipelining/schedules.py @@ -7,7 +7,7 @@ import logging import re from abc import ABC, abstractmethod -from collections import defaultdict +from collections import Counter, defaultdict from enum import Enum from typing import ( Any, @@ -613,7 +613,9 @@ def _step_microbatches( work.wait() loss = self._maybe_get_loss(self._stage, i) - self._stage.backward_one_chunk(i, loss=loss) + self._stage.backward_one_chunk( + i, loss=loss, last_backward=i == self._n_microbatches - 1 + ) ops = self._stage.get_bwd_send_ops(i) works = _sorted_batch_p2p(ops, desc="bwd_send") @@ -709,7 +711,11 @@ def _step_microbatches( # Backward one chunk loss = self._maybe_get_loss(self._stage, bwd_mb_index) - self._stage.backward_one_chunk(bwd_mb_index, loss=loss) + self._stage.backward_one_chunk( + bwd_mb_index, + loss=loss, + last_backward=bwd_mb_index == self._n_microbatches - 1, + ) # Get the bwd send ops, but don't fire, to be fused with the 1F below bwd_sends = self._stage.get_bwd_send_ops(bwd_mb_index) @@ -748,7 +754,11 @@ def _step_microbatches( # Backward one chunk loss = self._maybe_get_loss(self._stage, bwd_mb_index) - self._stage.backward_one_chunk(bwd_mb_index, loss=loss) + self._stage.backward_one_chunk( + bwd_mb_index, + loss=loss, + last_backward=bwd_mb_index == self._n_microbatches - 1, + ) # Clear previous chunk's backward sends (hopefully they have well finished) if send_work: @@ -1203,7 +1213,8 @@ def _step_microbatches( all_prev_ranks.add(self.stage_index_to_group_rank[stage_index - 1]) if stage_index < self._num_stages - 1: all_next_ranks.add(self.stage_index_to_group_rank[stage_index + 1]) - + # count either full_backward or backward_weight together, to determine when to sync DP grads + backward_counter: Counter[int] = Counter() for time_step, action in enumerate(self.pipeline_order[self.rank]): try: ops: List[dist.P2POp] = [] @@ -1226,10 +1237,13 @@ def _step_microbatches( # perform backward computation stage = stage_index_to_stage[stage_index] loss = self._maybe_get_loss(stage, mb_index) + backward_counter[stage_index] += 1 stage.backward_one_chunk( mb_index, loss=loss, full_backward=True, + last_backward=backward_counter[stage_index] + == self._n_microbatches, ) ops.extend(stage.get_bwd_send_ops(mb_index)) elif computation_type == _ComputationType.BACKWARD_INPUT: @@ -1240,12 +1254,18 @@ def _step_microbatches( mb_index, loss=loss, full_backward=False, + last_backward=False, ) ops.extend(stage.get_bwd_send_ops(mb_index)) elif computation_type == _ComputationType.BACKWARD_WEIGHT: # perform weight update stage = stage_index_to_stage[stage_index] - stage.backward_weight_one_chunk(mb_index) + backward_counter[stage_index] += 1 + stage.backward_weight_one_chunk( + mb_index, + last_backward=backward_counter[stage_index] + == self._n_microbatches, + ) else: raise ValueError(f"Unknown computation type {computation_type}") @@ -1459,6 +1479,8 @@ def _assert_unsharded(stage_idx: int): stage_idx in unsharded_stages ), f"Attempted to compute on sharded {stage_idx=}" + # count either full_backward or backward_weight together, to determine when to sync DP grads + backward_counter: Counter[int] = Counter() for time_step, action in enumerate(self.pipeline_order_with_comms[self.rank]): try: comp_type = action.computation_type @@ -1569,10 +1591,13 @@ def _assert_unsharded(stage_idx: int): ) bwd_recv_ops.pop((stage_idx, mb_index)).wait() loss = self._maybe_get_loss(stage, mb_index) + backward_counter[stage_idx] += 1 stage.backward_one_chunk( mb_index, loss=loss, full_backward=True, + last_backward=backward_counter[stage_idx] + == self._n_microbatches, ) # SEND/RECV op are avoided for special case with 2 adjacent stages on same rank # see [Note: V-schedule special case] @@ -1597,6 +1622,7 @@ def _assert_unsharded(stage_idx: int): mb_index, loss=loss, full_backward=False, + last_backward=False, ) # SEND/RECV op are avoided for special case with 2 adjacent stages on same rank # see [Note: V-schedule special case] @@ -1607,8 +1633,12 @@ def _assert_unsharded(stage_idx: int): elif comp_type == BACKWARD_WEIGHT: if stage_uses_fsdp: _assert_unsharded(stage_idx) - - stage.backward_weight_one_chunk(mb_index) + backward_counter[stage_idx] += 1 + stage.backward_weight_one_chunk( + mb_index, + last_backward=backward_counter[stage_idx] + == self._n_microbatches, + ) else: raise ValueError(f"{action=} is unknown or unsupported") except Exception as e: diff --git a/torch/distributed/pipelining/stage.py b/torch/distributed/pipelining/stage.py index bcd6c4c653ec6c..b5145981b3be6c 100644 --- a/torch/distributed/pipelining/stage.py +++ b/torch/distributed/pipelining/stage.py @@ -577,20 +577,15 @@ def forward_maybe_with_nosync(self, *args, **kwargs): out_val = self.submod(*args, **kwargs) return out_val - def backward_maybe_with_nosync(self, backward_type, bwd_kwargs: Dict): + def backward_maybe_with_nosync( + self, backward_type, bwd_kwargs: Dict, last_backward=False + ): """ Whether using PP with FSDP or DDP, there are some runtime differences between the last backward step and the other steps. Namely, we need to accumulate gradients on previous steps and reduce them on the last step, but there are additional state-variables and performance considerations depending on the data parallelism used. This helper should adapt any pipeline parallel schedule to work with common/supported data parallel libraries. """ - full_backward = bwd_kwargs["full_backward"] - if full_backward: - last_backward = self._seen_bwd_chunks == self.chunks - 1 # type: ignore[operator] - else: - # For backwards are split into weight and input, we will see twice as many bwd_chunks - # -1 because we skip the first bwd_chunk backward - last_backward = self._seen_bwd_chunks == 2 * self.chunks - 1 # type: ignore[operator] def perform_backward(backward_type): if backward_type == "full": @@ -726,7 +721,11 @@ def forward_one_chunk( return output def backward_one_chunk( - self, bwd_chunk_id: int, loss=None, full_backward: bool = True + self, + bwd_chunk_id: int, + loss=None, + full_backward: bool = True, + last_backward=False, ): """ Perform backward pass on the module. @@ -737,6 +736,9 @@ def backward_one_chunk( If full_backward is False, it is optional that `dw_runner` was provided to the PipelineStage at __init__ time, and a subsequent call to `backward_weight_one_chunk` is required to invoke dw_runner and complete the backward. + + last_backward is controlled by the schedule and signals synchronization of gradients across DP groups + after the last backward. """ self._check_chunk_id(bwd_chunk_id) @@ -773,7 +775,9 @@ def backward_one_chunk( if self.dw_builder: # TODO: We may want to change our semantics so we are allowed to ignore # the 'dw_builder' and call full_backward directly when it is a full_backward op. - self.grads_input, _ = self.backward_maybe_with_nosync("full", bwd_kwargs) + self.grads_input, _ = self.backward_maybe_with_nosync( + "full", bwd_kwargs, last_backward=last_backward + ) if full_backward: self.dw_builder()() else: @@ -781,7 +785,7 @@ def backward_one_chunk( else: if full_backward: self.grads_input, _ = self.backward_maybe_with_nosync( - "full", bwd_kwargs + "full", bwd_kwargs, last_backward=last_backward ) else: grads_input = [] @@ -795,7 +799,7 @@ def backward_one_chunk( # perform the partial backwards for the inputs with a custom backward function # when the "stage_ouput" is a loss, then it is a tensor, otherwise it is a tuple of tensors grads_input, param_groups = self.backward_maybe_with_nosync( - "input", bwd_kwargs + "input", bwd_kwargs, last_backward=last_backward ) # TODO: we dont need to save this, add to dw_runner? @@ -821,7 +825,7 @@ def backward_one_chunk( self._seen_bwd_chunks += 1 logger.debug("%s Backwarded chunk %s", self.log_prefix, bwd_chunk_id) - def backward_weight_one_chunk(self, bwd_chunk_id: int): + def backward_weight_one_chunk(self, bwd_chunk_id: int, last_backward=False): assert bwd_chunk_id in self.dw_runner, ( f"{self.log_prefix} Attempted to run backward_weight_one_chunk for chunk {bwd_chunk_id}" " without first calling `backward_one_chunk(full_backward=False)`" @@ -843,7 +847,9 @@ def backward_weight_one_chunk(self, bwd_chunk_id: int): "param_groups": param_groups, "full_backward": False, } - self.backward_maybe_with_nosync("weight", bwd_kwargs) + self.backward_maybe_with_nosync( + "weight", bwd_kwargs, last_backward=last_backward + ) else: # TODO: figure out a better way to do this: # if inputs does not require gradient, @@ -856,7 +862,9 @@ def backward_weight_one_chunk(self, bwd_chunk_id: int): "input_values": input_values, "full_backward": False, } - self.backward_maybe_with_nosync("full", bwd_kwargs) + self.backward_maybe_with_nosync( + "full", bwd_kwargs, last_backward=last_backward + ) self._seen_bwd_chunks += 1 From 1aa71be56c39908893273bd9558b127159e1ef3a Mon Sep 17 00:00:00 2001 From: Ze Sheng Date: Mon, 4 Nov 2024 20:16:16 +0000 Subject: [PATCH 023/503] [PT2] Decouple decompose_triton_kernel_wrapper_functional from decompose_auto_functionalized (#139526) As title. We may not always want to remove the `triton_kernel_wrapper_functional` for example the references of [`unsafe_remove_auto_functionalized_pass`](https://github.com/pytorch/pytorch/blob/c8ab9b06a28754247588d08cd56e324762fa225a/torch/export/_remove_auto_functionalized_pass.py#L48). Test Plan: CI & [D62592946](https://www.internalfb.com/diff/D62592946) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139526 Approved by: https://github.com/zou3519 --- torch/_inductor/fx_passes/post_grad.py | 58 ++++++++++++++++---------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/torch/_inductor/fx_passes/post_grad.py b/torch/_inductor/fx_passes/post_grad.py index e2e6f1fbdc5a73..02df853c9d9f00 100644 --- a/torch/_inductor/fx_passes/post_grad.py +++ b/torch/_inductor/fx_passes/post_grad.py @@ -155,6 +155,9 @@ def post_grad_passes(gm: torch.fx.GraphModule, is_inference: bool): GraphTransformObserver(gm, "reinplace_inplaceable_ops").apply_graph_pass( reinplace_inplaceable_ops ) + GraphTransformObserver( + gm, "decompose_triton_kernel_wrapper_functional" + ).apply_graph_pass(decompose_triton_kernel_wrapper_functional) GraphTransformObserver(gm, "decompose_auto_functionalized").apply_graph_pass( decompose_auto_functionalized ) @@ -735,9 +738,9 @@ def remove_noop_ops(graph: torch.fx.Graph): graph.erase_node(node) -def decompose_auto_functionalized(graph): - """Decomposes auto_functionalized and triton_kernel_wrapper_functional - nodes into clones and the underlying mutation node. +def decompose_triton_kernel_wrapper_functional(graph): + """Decomposes triton_kernel_wrapper_functional nodes into clones and the underlying + mutation node. We assume that the reinplacing pass runs before this; the reinplacing pass tells us (via rewriting the arguments or .meta to those nodes) which @@ -746,14 +749,12 @@ def decompose_auto_functionalized(graph): graph_pass = PatternMatcherPass() @register_graph_pattern( - CallFunctionVarArgs(torch.ops.higher_order.auto_functionalized), + CallFunctionVarArgs(torch.ops.higher_order.triton_kernel_wrapper_functional), pass_dict=graph_pass, ) def _(match: Match, *args, **kwargs): - from torch._higher_order_ops.auto_functionalize import auto_functionalized_dense - - only_clone_these_tensors = tuple( - match.nodes[0].meta.get("only_clone_these_tensors", []) + from torch._higher_order_ops.triton_kernel_wrap import ( + triton_kernel_wrapper_functional_dense, ) flat_args, spec = pytree.tree_flatten((args, kwargs)) @@ -763,19 +764,38 @@ def _(match: Match, *args, **kwargs): # tracing a function with kwargs. def decomp(*flat_args): args, kwargs = pytree.tree_unflatten(flat_args, spec) - assert len(args) == 1 - mode = args[0] - return auto_functionalized_dense(mode, only_clone_these_tensors, **kwargs) + return (triton_kernel_wrapper_functional_dense(*args, **kwargs),) match.replace_by_example(decomp, flat_args, run_functional_passes=False) + graph_pass.apply(graph) + + for node in graph.find_nodes( + op="call_function", + target=torch.ops.higher_order.triton_kernel_wrapper_functional, + ): + raise AssertionError("triton_kernel_wrapper_functional was not removed") + + +def decompose_auto_functionalized(graph): + """Decomposes auto_functionalized nodes into clones and the underlying + mutation node. + + We assume that the reinplacing pass runs before this; the reinplacing pass + tells us (via rewriting the arguments or .meta to those nodes) which + Tensors we should clone and which Tensors are safe to reinplace. + """ + graph_pass = PatternMatcherPass() + @register_graph_pattern( - CallFunctionVarArgs(torch.ops.higher_order.triton_kernel_wrapper_functional), + CallFunctionVarArgs(torch.ops.higher_order.auto_functionalized), pass_dict=graph_pass, ) def _(match: Match, *args, **kwargs): - from torch._higher_order_ops.triton_kernel_wrap import ( - triton_kernel_wrapper_functional_dense, + from torch._higher_order_ops.auto_functionalize import auto_functionalized_dense + + only_clone_these_tensors = tuple( + match.nodes[0].meta.get("only_clone_these_tensors", []) ) flat_args, spec = pytree.tree_flatten((args, kwargs)) @@ -785,7 +805,9 @@ def _(match: Match, *args, **kwargs): # tracing a function with kwargs. def decomp(*flat_args): args, kwargs = pytree.tree_unflatten(flat_args, spec) - return (triton_kernel_wrapper_functional_dense(*args, **kwargs),) + assert len(args) == 1 + mode = args[0] + return auto_functionalized_dense(mode, only_clone_these_tensors, **kwargs) match.replace_by_example(decomp, flat_args, run_functional_passes=False) @@ -829,12 +851,6 @@ def decomp(*flat_args): ): raise AssertionError("auto_functionalized_v2 was not removed") - for node in graph.find_nodes( - op="call_function", - target=torch.ops.higher_order.triton_kernel_wrapper_functional, - ): - raise AssertionError("triton_kernel_wrapper_functional was not removed") - @register_lowering_pattern( CallFunction( From 888110841c771b34d7bbbde12b21dedee7a14e70 Mon Sep 17 00:00:00 2001 From: Shunting Zhang Date: Mon, 4 Nov 2024 00:13:11 -0800 Subject: [PATCH 024/503] [inductor] don't fuse two nodes if likely increase peak memory (#138756) Partially fixing https://github.com/pytorch/pytorch/issues/138685 Add a (relatively safe?) heuristics to skip fusion if we can potentially increasing peak memory. The doc string mainly explains what this PR is doing: ``` The implementation is more like a heuristic since we don't really know if we are at peak or not when trying to fuse these two ndoes. The order of nodes may change later which makes the peak memory estimation hard. Here is how we decide the LOWER BOUND of extra memory allocation if we fuse these 2 nodes: 1. find all buffers read by each node with a single user. These buffers are supposed to be reused if we don't fuses these 2 nodes 2. find the intersection of these buffers for the two node and sum the total buffer size. If we don't fuse these two nodes, we can at lease avoid this much memory allocation. Note that the extra memory allocation is not necessarily causing peak memory increase. This is just a heuristic. We return true only if the saving for fusion can not trade off the extra memory allocation. ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/138756 Approved by: https://github.com/jansel ghstack dependencies: #139136 --- test/inductor/test_memory.py | 21 +++++++++++ torch/_inductor/scheduler.py | 68 ++++++++++++++++++++++++++++++++++-- 2 files changed, 87 insertions(+), 2 deletions(-) diff --git a/test/inductor/test_memory.py b/test/inductor/test_memory.py index 185095673a6b5c..82d71026688972 100644 --- a/test/inductor/test_memory.py +++ b/test/inductor/test_memory.py @@ -1,4 +1,5 @@ # Owner(s): ["module: inductor"] +import unittest from unittest import mock import torch @@ -202,6 +203,26 @@ def reorder_with_only_dfs( outp = compiled_model(self.inputs) self.assertTrue(same(outp, outp_corr)) + @unittest.skipIf( + not torch.cuda.is_available() + or torch.cuda.get_device_properties().total_memory < int(1e10), + "Need 10GB memory to be safe to run the test", + ) + def test_fusing_reductions_increase_peak_memory(self): + @torch.compile + def f(a, b, c): + return (a @ c).sum(dim=-1) + (b @ c).sum(dim=-1) + + a = torch.randn(1024 * 32, 16, device=GPU_TYPE) + b = torch.randn(1024 * 32, 16, device=GPU_TYPE) + c = torch.randn(16, 1024 * 32, device=GPU_TYPE) + torch.cuda.reset_peak_memory_stats() + f(a, b, c) + peak_mem = torch.cuda.max_memory_allocated() + + expected_bound = a.size(0) * c.size(1) * a.dtype.itemsize * 2 + self.assertLess(peak_mem, expected_bound) + if __name__ == "__main__": from torch._inductor.test_case import run_tests diff --git a/torch/_inductor/scheduler.py b/torch/_inductor/scheduler.py index f1584d431e0309..3fc04a14d0fffc 100644 --- a/torch/_inductor/scheduler.py +++ b/torch/_inductor/scheduler.py @@ -2778,6 +2778,66 @@ def found_path(node: BaseSchedulerNode) -> bool: def can_fusion_increase_peak_memory( self, node1: BaseSchedulerNode, node2: BaseSchedulerNode + ) -> bool: + """ + Return true if fusing the two nodes can potentially increasing peak memory. + + The implementation is more like a heuristic since we don't really know if we are at peak + or not when trying to fuse these two ndoes. The order of nodes may change later which makes the + peak memory estimation hard. + + Here is how we decide the LOWER BOUND of extra memory allocation if we fuse these 2 nodes: + 1. find all buffers read by each node with a single user. These buffers are supposed to + be reused if we don't fuses these 2 nodes + 2. find the intersection of these buffers for the two node and sum the total buffer size. + If we don't fuse these two nodes, we can at lease avoid this much memory allocation. + Note that the extra memory allocation is not necessarily causing peak memory increase. + This is just a heuristic. + + We return true only if the saving for fusion can not trade off the extra memory allocation. + """ + + from .codegen.wrapper import buffer_reuse_key + + def _find_single_user_inputs( + node: BaseSchedulerNode, + ) -> List[ir.Buffer]: + output = [] + for rd in node.read_writes.reads: + name = rd.name + if name not in self.name_to_buf: + continue + buf = self.name_to_buf[name] + if len(buf.users) == 1: + output.append(buf.node) + return output + + # Check inputs that can be potentially reused + lhs_dep_nodes = _find_single_user_inputs(node1) + rhs_dep_nodes = _find_single_user_inputs(node2) + + lhs_reuse_keys = {buffer_reuse_key(buf) for buf in lhs_dep_nodes} + rhs_reuse_keys = {buffer_reuse_key(buf) for buf in rhs_dep_nodes} + + common_reuse_keys = lhs_reuse_keys.intersection(rhs_reuse_keys) + + memory_overhead = 0 + for key in common_reuse_keys: + try: + memory_overhead += int(key[2]) + except ValueError: + # not an interger. Fallback is to fuse + return False + + bw_saving = self.score_fusion_memory(node1, node2) + + # The factor 32 here is quite arbitrary. + if V.graph.sizevars.statically_known_gt(memory_overhead, 32 * bw_saving): + return True + return False + + def are_long_distant_nodes( + self, node1: BaseSchedulerNode, node2: BaseSchedulerNode ) -> bool: """ This function prevents fusion for nodes that can increase memory @@ -3046,6 +3106,10 @@ def can_fuse(self, node1: BaseSchedulerNode, node2: BaseSchedulerNode) -> bool: why("exceeds max fusion") return False # heuristic not needed for correctness + if self.can_fusion_increase_peak_memory(node1, node2): + why("Fusion will increase peak memory") + return False + if node1.get_operation_names() & node2.ancestors: # node2 depends on node1 outputs if not self.can_fuse_vertical(node1, node2): @@ -3059,8 +3123,8 @@ def can_fuse(self, node1: BaseSchedulerNode, node2: BaseSchedulerNode) -> bool: ): why("score_fusion_memory_threshold") return False - if self.can_fusion_increase_peak_memory(node1, node2): - why("will increase peak memory") + if self.are_long_distant_nodes(node1, node2): + why("Nodes are too far away. Fusing them may increase peak memory.") return False return self.get_backend(device).can_fuse_horizontal(node1, node2) From 91d38a5a82e428cb58aa24880371ad128e38ea39 Mon Sep 17 00:00:00 2001 From: atalman Date: Mon, 4 Nov 2024 21:13:15 +0000 Subject: [PATCH 025/503] Fix cuda Manylinux 2_28 docker images PATH setting (#139631) Enabling Manywheel builds here: https://github.com/pytorch/pytorch/pull/138732 During the build I observe the failure with cuda jobs: ``` -- Compiler does not support SVE extension. Will not build perfkernels. -- Found CUDA: /usr/local/cuda (found version "11.8") -- The CUDA compiler identification is unknown CMake Error at cmake/public/cuda.cmake:47 (enable_language): No CMAKE_CUDA_COMPILER could be found. Tell CMake where to find the compiler by setting either the environment variable "CUDACXX" or the CMake cache entry CMAKE_CUDA_COMPILER to the full path to the compiler, or to the compiler name if it is in the PATH. Call Stack (most recent call first): cmake/Dependencies.cmake:44 (include) CMakeLists.txt:851 (include) ``` While correct sequence suppose to be: ``` -- Found CUDA: /usr/local/cuda (found version "11.8") -- The CUDA compiler identification is NVIDIA 11.8.89 -- Detecting CUDA compiler ABI info -- Detecting CUDA compiler ABI info - done -- Check for working CUDA compiler: /usr/local/cuda/bin/nvcc - skipped -- Detecting CUDA compile features -- Detecting CUDA compile features - done -- Found CUDAToolkit: /usr/local/cuda/include (found version "11.8.89") ``` Issue found to be missing PATH setting in 2_28 Docker file. This section exist in CentOS Docker file here: https://github.com/pytorch/pytorch/blob/main/.ci/docker/manywheel/Dockerfile#L174-L175 (Please Note these Docker images are not used yet. The https://github.com/pytorch/pytorch/pull/138732 should enable using these images) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139631 Approved by: https://github.com/malfet, https://github.com/huydhn --- .ci/docker/manywheel/Dockerfile_2_28 | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.ci/docker/manywheel/Dockerfile_2_28 b/.ci/docker/manywheel/Dockerfile_2_28 index 0af2ee8e944569..655dc363548052 100644 --- a/.ci/docker/manywheel/Dockerfile_2_28 +++ b/.ci/docker/manywheel/Dockerfile_2_28 @@ -130,6 +130,8 @@ FROM cpu_final as cuda_final RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION} COPY --from=cuda /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION} COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION} +RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda +ENV PATH=/usr/local/cuda/bin:$PATH FROM common as rocm_final ARG ROCM_VERSION=3.7 From 7deec3942fc5ffce46ca537509f679bee0ec45d8 Mon Sep 17 00:00:00 2001 From: cyy Date: Mon, 4 Nov 2024 21:32:42 +0000 Subject: [PATCH 026/503] [6/N] Don't skip ASAN on some tests (#139565) Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/139565 Approved by: https://github.com/ezyang --- aten/src/ATen/native/Scalar.cpp | 3 +++ test/test_meta.py | 10 ---------- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/aten/src/ATen/native/Scalar.cpp b/aten/src/ATen/native/Scalar.cpp index ec19449d4133e2..76cf4a0fad67a9 100644 --- a/aten/src/ATen/native/Scalar.cpp +++ b/aten/src/ATen/native/Scalar.cpp @@ -35,6 +35,9 @@ Scalar item(const Tensor& self) { #endif Scalar _local_scalar_dense_cpu(const Tensor& self) { + if (self.scalar_type() == kBool) { + return Scalar(static_cast(*reinterpret_cast(self.const_data_ptr()))); + } Scalar r; AT_DISPATCH_V2( self.scalar_type(), diff --git a/test/test_meta.py b/test/test_meta.py index 106355d435c37e..9e8ae75d9be013 100644 --- a/test/test_meta.py +++ b/test/test_meta.py @@ -18,7 +18,6 @@ TestCase, skipIfCrossRef, suppress_warnings, - TEST_WITH_ASAN, TEST_WITH_TORCHDYNAMO, run_tests, dtype_abbrs, @@ -1149,7 +1148,6 @@ def _fn(t, *args, **kwargs): return _fn - @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @skipIfCrossRef @suppress_warnings @ops(itertools.chain(op_db, foreach_op_db)) @@ -1196,7 +1194,6 @@ def test_meta_outplace(self, device, dtype, op): if op.name != "empty_like": self.assertEqual(ref, meta) - @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @skipIfCrossRef @suppress_warnings @ops(itertools.chain(op_db, foreach_op_db)) @@ -1261,21 +1258,18 @@ def _run_dispatch_meta_test(self, device, dtype, op, symbolic_meta, inplace, all func(*args, **kwargs, out=expected) - @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @skipIfCrossRef @suppress_warnings @ops(itertools.chain(op_db, foreach_op_db)) def test_dispatch_meta_outplace(self, device, dtype, op): self._run_dispatch_meta_test(device, dtype, op, symbolic_meta=False, inplace=False) - @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @skipIfCrossRef @suppress_warnings @ops(itertools.chain(op_db, foreach_op_db)) def test_dispatch_meta_inplace(self, device, dtype, op): self._run_dispatch_meta_test(device, dtype, op, symbolic_meta=False, inplace=True) - @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @skipIfCrossRef @suppress_warnings @ops(itertools.chain(op_db, foreach_op_db)) @@ -1283,14 +1277,12 @@ def test_dispatch_symbolic_meta_outplace(self, device, dtype, op): self._run_dispatch_meta_test(device, dtype, op, symbolic_meta=True, inplace=False) - @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @skipIfCrossRef @suppress_warnings @ops(itertools.chain(op_db, foreach_op_db)) def test_dispatch_symbolic_meta_inplace(self, device, dtype, op): self._run_dispatch_meta_test(device, dtype, op, symbolic_meta=True, inplace=True) - @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @skipIfCrossRef @suppress_warnings # only test one dtype, as output stride behavior is the same for all dtypes @@ -1300,7 +1292,6 @@ def test_dispatch_symbolic_meta_inplace(self, device, dtype, op): def test_dispatch_symbolic_meta_outplace_all_strides(self, device, dtype, op): self._run_dispatch_meta_test(device, dtype, op, symbolic_meta=True, inplace=False, all_stride_variants=True) - @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @skipIfCrossRef @suppress_warnings # only test one dtype, as output stride behavior is the same for all dtypes @@ -1310,7 +1301,6 @@ def test_dispatch_symbolic_meta_outplace_all_strides(self, device, dtype, op): def test_dispatch_symbolic_meta_inplace_all_strides(self, device, dtype, op): self._run_dispatch_meta_test(device, dtype, op, symbolic_meta=True, inplace=True, all_stride_variants=True) - @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @skipIfCrossRef @suppress_warnings # only test one dtype, as output stride behavior is the same for all dtypes From 3d93caf664fbdc3d92469e651629c6e81402a45b Mon Sep 17 00:00:00 2001 From: Will Constable Date: Mon, 4 Nov 2024 09:54:08 -0800 Subject: [PATCH 027/503] [c10d] Add thread-safety initialization warning (#139638) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139638 Approved by: https://github.com/kwen2501, https://github.com/c-p-i-o, https://github.com/XilunWu --- docs/source/distributed.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/source/distributed.rst b/docs/source/distributed.rst index b0661f867c961c..5b3f60f97af42d 100644 --- a/docs/source/distributed.rst +++ b/docs/source/distributed.rst @@ -180,6 +180,10 @@ The package needs to be initialized using the :func:`torch.distributed.init_proc or :func:`torch.distributed.device_mesh.init_device_mesh` function before calling any other methods. Both block until all processes have joined. +.. warning:: + Initialization is not thread-safe. Process group creation should be performed from a single thread, to prevent + inconsistent 'UUID' assignment across ranks, and to prevent races during initialization that can lead to hangs. + .. autofunction:: is_available .. autofunction:: init_process_group From 080e0ca5843c293b2ccc87630694f93a37f52f0f Mon Sep 17 00:00:00 2001 From: Henry Tsang Date: Mon, 4 Nov 2024 22:06:07 +0000 Subject: [PATCH 028/503] [aoti tests] enable some aoti package tests for fbcode (#139359) Differential Revision: D65249372 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139359 Approved by: https://github.com/angelayi --- test/inductor/test_aot_inductor_package.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/test/inductor/test_aot_inductor_package.py b/test/inductor/test_aot_inductor_package.py index a0d2716710cdf2..f0953c44440567 100644 --- a/test/inductor/test_aot_inductor_package.py +++ b/test/inductor/test_aot_inductor_package.py @@ -38,12 +38,18 @@ def compile( @unittest.skipIf(sys.platform == "darwin", "No CUDA on MacOS") -@unittest.skipIf(IS_FBCODE, "This is for OSS only") @parameterized_class( [ {"device": "cpu", "package_cpp_only": False}, - {"device": "cpu", "package_cpp_only": True}, ] + + ( + [ + # FIXME: AssertionError: AOTInductor compiled library does not exist at + {"device": "cpu", "package_cpp_only": True} + ] + if not IS_FBCODE + else [] + ) + ( [ {"device": "cuda", "package_cpp_only": False}, From bf5cd8d0116d90d24b8acb38d578b8952dab22ef Mon Sep 17 00:00:00 2001 From: Felix Zimmermann Date: Mon, 4 Nov 2024 22:10:01 +0000 Subject: [PATCH 029/503] Tighten type hints for tensor arithmetic (#135392) Fixes #124015 Pull Request resolved: https://github.com/pytorch/pytorch/pull/135392 Approved by: https://github.com/ezyang --- tools/pyi/gen_pyi.py | 56 ++++++++++++------- torch/_decomp/decompositions.py | 3 +- .../fx_passes/efficient_conv_bn_eval.py | 1 + torch/ao/quantization/_equalize.py | 10 +--- 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/tools/pyi/gen_pyi.py b/tools/pyi/gen_pyi.py index bd2fcee5e51c4a..84a8163e215479 100644 --- a/tools/pyi/gen_pyi.py +++ b/tools/pyi/gen_pyi.py @@ -177,14 +177,18 @@ def should_bind_method(python_func: PythonSignatureNativeFunctionPair) -> bool: "copy_", ] -binary_ops = ( +shift_ops = ( + "lshift", + "rshift", + "ilshift", + "irshift", # inplace ops +) +arithmetic_ops = ( "add", "sub", "mul", "div", "pow", - "lshift", - "rshift", "mod", "truediv", "matmul", @@ -195,24 +199,26 @@ def should_bind_method(python_func: PythonSignatureNativeFunctionPair) -> bool: "rtruediv", "rfloordiv", "rpow", # reverse arithmetic + "iadd", + "idiv", + "imul", + "isub", + "ifloordiv", + "imod", # inplace ops +) +logic_ops = ( "and", "or", "xor", "rand", "ror", - "rxor", # logic - "iadd", + "rxor", # reverse logic "iand", - "idiv", - "ilshift", - "imul", "ior", - "irshift", - "isub", - "ixor", - "ifloordiv", - "imod", # inplace ops + "ixor", # inplace ops ) +binary_ops = shift_ops + arithmetic_ops + logic_ops + symmetric_comparison_ops = ("eq", "ne") asymmetric_comparison_ops = ("ge", "gt", "lt", "le") comparison_ops = symmetric_comparison_ops + asymmetric_comparison_ops @@ -232,14 +238,24 @@ def sig_for_ops(opname: str) -> list[str]: assert opname.endswith("__") and opname.startswith("__"), f"Unexpected op {opname}" name = opname[2:-2] - if name in binary_ops: - return [f"def {opname}(self, other: Any) -> Tensor: ..."] - elif name in comparison_ops: - sig = f"def {opname}(self, other: Any) -> Tensor: ..." - if name in symmetric_comparison_ops: + if name in arithmetic_ops: + return [ + f"def {opname}(self, other: Union[Tensor, Number, _complex]) -> Tensor: ..." + ] + elif name in logic_ops: + return [f"def {opname}(self, other: Union[Tensor, _bool]) -> Tensor: ..."] + elif name in shift_ops: + return [f"def {opname}(self, other: Union[Tensor, _int]) -> Tensor: ..."] + elif name in symmetric_comparison_ops: + return [ # unsafe override https://github.com/python/mypy/issues/5704 - sig += " # type: ignore[override]" - return [sig] + f"def {opname}(self, other: Union[Tensor, Number, _complex]) -> Tensor: ... # type: ignore[override]", + f"def {opname}(self, other: Any) -> _bool: ...", + ] + elif name in asymmetric_comparison_ops: + return [ + f"def {opname}(self, other: Union[Tensor, Number, _complex]) -> Tensor: ..." + ] elif name in unary_ops: return [f"def {opname}(self) -> Tensor: ..."] elif name in to_py_type_ops: diff --git a/torch/_decomp/decompositions.py b/torch/_decomp/decompositions.py index ffa83a80bcc667..17b5897aed255e 100644 --- a/torch/_decomp/decompositions.py +++ b/torch/_decomp/decompositions.py @@ -2291,7 +2291,8 @@ def native_batch_norm_backward( mean = save_mean_cast invstd = save_invstd_cast if train: - assert save_mean_cast is not None and save_invstd_cast is not None + assert mean is not None and invstd is not None + else: assert running_mean_cast is not None and running_var_cast is not None mean = running_mean_cast diff --git a/torch/_inductor/fx_passes/efficient_conv_bn_eval.py b/torch/_inductor/fx_passes/efficient_conv_bn_eval.py index 4845142caab347..bc6ebbcd5cef6d 100644 --- a/torch/_inductor/fx_passes/efficient_conv_bn_eval.py +++ b/torch/_inductor/fx_passes/efficient_conv_bn_eval.py @@ -33,6 +33,7 @@ def efficient_conv_bn_eval( """ assert bn.running_var is not None + assert bn.running_mean is not None # These lines of code are designed to deal with various cases # like bn without affine transform, and conv without bias diff --git a/torch/ao/quantization/_equalize.py b/torch/ao/quantization/_equalize.py index 08316f755552b7..57a4cfdead2901 100644 --- a/torch/ao/quantization/_equalize.py +++ b/torch/ao/quantization/_equalize.py @@ -128,8 +128,7 @@ def cross_layer_equalization(module1, module2, output_axis=0, input_axis=1): "module type not supported:", type(module1), " ", type(module2) ) - conv1_has_bias = has_bias(module1) - bias = None + bias = get_module_bias(module1) if has_bias(module1) else None weight1 = get_module_weight(module1) weight2 = get_module_weight(module2) @@ -140,9 +139,6 @@ def cross_layer_equalization(module1, module2, output_axis=0, input_axis=1): number input channels of second arg" ) - if conv1_has_bias: - bias = get_module_bias(module1) - weight1_range = channel_range(weight1, output_axis) weight2_range = channel_range(weight2, input_axis) @@ -151,7 +147,7 @@ def cross_layer_equalization(module1, module2, output_axis=0, input_axis=1): scaling_factors = torch.sqrt(weight1_range / weight2_range) inverse_scaling_factors = torch.reciprocal(scaling_factors) - if conv1_has_bias: + if bias is not None: bias = bias * inverse_scaling_factors # formatting the scaling (1D) tensors to be applied on the given argument tensors @@ -168,7 +164,7 @@ def cross_layer_equalization(module1, module2, output_axis=0, input_axis=1): weight2 = weight2 * scaling_factors set_module_weight(module1, weight1) - if conv1_has_bias: + if bias is not None: set_module_bias(module1, bias) set_module_weight(module2, weight2) From 514c466cd98fc654bb49d5f54bba59998f59a038 Mon Sep 17 00:00:00 2001 From: Jane Xu Date: Mon, 4 Nov 2024 10:01:02 -0800 Subject: [PATCH 030/503] Redirect the custom ops landing page :D (#139634) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139634 Approved by: https://github.com/zou3519 --- docs/source/notes/custom_operators.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/source/notes/custom_operators.rst b/docs/source/notes/custom_operators.rst index af3b015b582aea..af744263e7285d 100644 --- a/docs/source/notes/custom_operators.rst +++ b/docs/source/notes/custom_operators.rst @@ -3,4 +3,10 @@ PyTorch Custom Operators Landing Page ===================================== -`This page has moved. Click here for the new page. `_ +This page has moved. + +Redirecting to the new page... + +.. raw:: html + + From 54c69a785ba87bd3b99b96f9dfd5cd39aca85639 Mon Sep 17 00:00:00 2001 From: Yidi Wu Date: Fri, 1 Nov 2024 14:54:04 -0700 Subject: [PATCH 031/503] [hop free symbols][refactor] make bound_symbols a dictionary (#138345) Code refactoring only. Change all self.tx.output.bound_symbols to self.tx.output.root_tracer.bound_symbols. Pull Request resolved: https://github.com/pytorch/pytorch/pull/138345 Approved by: https://github.com/zou3519 --- torch/_dynamo/output_graph.py | 14 +++++++++----- torch/_dynamo/variables/builder.py | 13 ++++++++++--- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/torch/_dynamo/output_graph.py b/torch/_dynamo/output_graph.py index 61d1001d03cc08..d3ebedb86c6ef2 100644 --- a/torch/_dynamo/output_graph.py +++ b/torch/_dynamo/output_graph.py @@ -287,10 +287,6 @@ def __init__( # aren't explicit graph inputs. Used by shape guard self.tracked_fakes: List[TrackedFake] = [] - # List of symbols for which we have exact bindings in the arguments - # already - self.bound_symbols: Set[sympy.Symbol] = set() - shape_env = ShapeEnv( # Reference Cycle! # Share a reference to the list of TrackedFake. @@ -549,6 +545,10 @@ def input_name_to_proxy(self): def real_value_cache(self): return self.current_tracer.real_value_cache + @property + def bound_symbols(self): + return self.current_tracer.bound_symbols + # If you are here, and you're looking for create_graph_input, # to avoid ambiguity, please call one of the following: # - self.current_tracer.create_graph_input @@ -674,7 +674,6 @@ def bind_symint(s: torch.SymInt, prop): s0 = s.node.expr if s0 in self.bound_symbols: return - self.bound_symbols.add(s0) log.debug("bind_symint %s %s", s, prop.name()) # TODO: don't readd symint if we already have it in graph # (this is harmless because we do remove the unused ones later) @@ -684,6 +683,7 @@ def bind_symint(s: torch.SymInt, prop): before=True, source=prop, ) + self.root_tracer.bound_symbols[s0] = proxy set_example_value(proxy.node, s) assert isinstance(s, torch.SymInt) proxy.node.meta["grapharg"] = GraphArg( @@ -1880,6 +1880,10 @@ def __init__( # rewrite the HigherOrderOperator call using the traced body_fn. # Dicts maintain the order of args for the HigherOrderOperator call. self.lifted_freevars = {} + + # map symbols to their bound proxy placeholders. + self.bound_symbols: Dict[sympy.Symbol, torch.fx.Proxy] = {} + self.prev_inst = None # True if this tracer is currently tracing into torch.utils.checkpoint # as part of speculate_subgraph. diff --git a/torch/_dynamo/variables/builder.py b/torch/_dynamo/variables/builder.py index 3a62af5a8e7002..dc7c801e68b075 100644 --- a/torch/_dynamo/variables/builder.py +++ b/torch/_dynamo/variables/builder.py @@ -32,6 +32,8 @@ Union, ) +import sympy + import torch from torch import SymInt from torch._guards import GuardSource, TracingContext @@ -973,7 +975,11 @@ def build_key_value(i, k, v): ) # We bind the new_symint to graph input. set_example_value(sym_node_proxy.node, new_symint) - self.tx.output.bound_symbols.add(new_symint.node.expr) + sym_expr = new_symint.node.expr + assert isinstance( + sym_expr, sympy.Symbol + ), f"{sym_expr} is not a basic Symbol." + self.tx.output.root_tracer.bound_symbols[sym_expr] = sym_node_proxy self.tx.output.tracked_fakes.append( TrackedFake(new_symint, new_source, None) ) @@ -1757,7 +1763,6 @@ def wrap_symint(self, value): if TracingContext.get().force_unspec_int_unbacked_size_like: wrapped_value = shape_env.create_unbacked_symint() _constrain_range_for_size(wrapped_value) - self.tx.output.bound_symbols.add(wrapped_value.node.expr) self.tx.output.tracked_fakes.append( TrackedFake(wrapped_value, self.source, None) ) @@ -1803,7 +1808,6 @@ def wrap_symint(self, value): source=self.source, dynamic_dim=dynamic_dim, ) - self.tx.output.bound_symbols.add(wrapped_value.node.expr) self.tx.output.tracked_fakes.append( TrackedFake(wrapped_value, self.source, None) @@ -1825,6 +1829,9 @@ def wrap_symint(self, value): source=self.get_source(), ) + sym_expr = wrapped_value.node.expr + assert isinstance(sym_expr, sympy.Symbol), f"{sym_expr} is not a basic Symbol." + self.tx.output.root_tracer.bound_symbols[sym_expr] = proxy set_example_value(proxy.node, wrapped_value) unspec_var = SymNodeVariable(proxy, wrapped_value, **options) self.tx.output.unspec_variable_map[self.name] = unspec_var From dc3a6a9d0808b2554f41482f5b8fea3424acc929 Mon Sep 17 00:00:00 2001 From: Yidi Wu Date: Fri, 1 Nov 2024 14:54:05 -0700 Subject: [PATCH 032/503] [hop free symbols][refactor] make create_graph_input always take example_value (#138428) Code refactoring only. We move the wrap_to_fake_tensor_logic out of wrap_fx_proxy for placeholders to provide the invariant that **all graph inputs must set their example values when creating the inputs**. This invariant helps us to identify all the free symbols in the graph in top-level and sub-graphs. Pull Request resolved: https://github.com/pytorch/pytorch/pull/138428 Approved by: https://github.com/ezyang, https://github.com/zou3519 ghstack dependencies: #138345 --- test/dynamo/test_autograd_function.py | 8 +- test/dynamo/test_higher_order_ops.py | 10 +- test/functorch/test_control_flow.py | 4 +- torch/_dynamo/output_graph.py | 22 +++-- torch/_dynamo/variables/builder.py | 102 +++++++++++++++----- torch/_dynamo/variables/higher_order_ops.py | 21 ++-- 6 files changed, 120 insertions(+), 47 deletions(-) diff --git a/test/dynamo/test_autograd_function.py b/test/dynamo/test_autograd_function.py index e9b913ed9559db..235b1ee1b5e476 100644 --- a/test/dynamo/test_autograd_function.py +++ b/test/dynamo/test_autograd_function.py @@ -546,14 +546,14 @@ def forward(self, L_x_: "f32[]", L_z_: "f32[]", L_weird_b: "f32[]", L_weird_c: " return (autograd_function_apply,) class fwd_body_0(torch.nn.Module): - def forward(self, ctx, x: "f32[]", z: "f32[]", l_weird_b: "f32[]", l_weird_c: "f32[]"): + def forward(self, ctx : torch.autograd.function.Function, x: "f32[]", z: "f32[]", l_weird_b: "f32[]", l_weird_c: "f32[]"): mul: "f32[]" = l_weird_b * l_weird_c clone: "f32[]" = x.clone(); x = None mul_1: "f32[]" = mul * clone; mul = clone = None return (mul_1, [l_weird_b, l_weird_c]) class bwd_body_0(torch.nn.Module): - def forward(self, ctx, grad: "f32[]", l_weird_b: "f32[]", l_weird_c: "f32[]"): + def forward(self, ctx : torch.autograd.function.Function, grad: "f32[]", l_weird_b: "f32[]", l_weird_c: "f32[]"): _set_grad_enabled = torch._C._set_grad_enabled(False); _set_grad_enabled = None mul: "f32[]" = grad * l_weird_b; l_weird_b = None @@ -1112,14 +1112,14 @@ def forward(self, L_x_: "f32[]", L_y_: "f32[]"): return (getitem, getitem_1) class fwd_body_0(torch.nn.Module): - def forward(self, ctx, x: "f32[]", y: "f32[]"): + def forward(self, ctx : torch.autograd.function.Function, x: "f32[]", y: "f32[]"): out1: "f32[]" = x.sin(); x = None out2: "f32[]" = y * 2; y = None return ((out1, out2), []) class bwd_body_0(torch.nn.Module): - def forward(self, ctx, grad1: "f32[]", grad2: "f32[]"): + def forward(self, ctx : torch.autograd.function.Function, grad1: "f32[]", grad2: "f32[]"): _set_grad_enabled = torch._C._set_grad_enabled(False); _set_grad_enabled = None cos: "f32[]" = grad1.cos(); grad1 = None diff --git a/test/dynamo/test_higher_order_ops.py b/test/dynamo/test_higher_order_ops.py index ff3c83863a5b02..bcfc07146e87e0 100644 --- a/test/dynamo/test_higher_order_ops.py +++ b/test/dynamo/test_higher_order_ops.py @@ -1197,7 +1197,7 @@ def forward(self, L_xs_ : torch.Tensor, L_y_ : torch.Tensor): self.assertExpectedInline( body_graph, """\ -def forward(self, child, l_y_): +def forward(self, child : torch.Tensor, l_y_ : torch.Tensor): child_1 = child[0]; child_1 = None map_body_0 = self.map_body_0 map_impl = torch.ops.higher_order.map_impl(map_body_0, [child], [l_y_]); map_body_0 = child = l_y_ = None @@ -1229,7 +1229,7 @@ def forward(self, L_x_ : torch.Tensor): self.assertExpectedInline( body_graph, """\ -def forward(self, child): +def forward(self, child : torch.Tensor): child_1 = child.sin() child_2 = child.sin(); child = None return (child_1, child_2)""", @@ -1270,7 +1270,7 @@ def forward(self, L_x_ : torch.Tensor): self.assertExpectedInline( body_graph, """\ -def forward(self, child): +def forward(self, child : torch.Tensor): return (child, child, child, child, child, child, child)""", ) @@ -1313,7 +1313,7 @@ def forward(self, L_x_ : torch.Tensor): self.assertExpectedInline( body_graph, """\ -def forward(self, child, const_unused): +def forward(self, child : torch.Tensor, const_unused : int): add = child + 3; child = None sin = torch.sin(add); add = None return (sin,)""", @@ -1347,7 +1347,7 @@ def forward(self, L_x_ : torch.Tensor): self.assertExpectedInline( body_graph, """\ -def forward(self, child, const_unused): +def forward(self, child : torch.Tensor, const_unused : int): add = child + 3; child = None sin = torch.sin(add); add = None return (sin,)""", diff --git a/test/functorch/test_control_flow.py b/test/functorch/test_control_flow.py index b6c0e103dfee48..91561308177365 100644 --- a/test/functorch/test_control_flow.py +++ b/test/functorch/test_control_flow.py @@ -3150,7 +3150,7 @@ def forward(self, L_iter_ : torch.Tensor, L_x_ : torch.Tensor, L_self_buffers_de self.assertExpectedInline( gm.cond_fn_0.code.strip(), """\ -def forward(self, l_iter_, l_x_, l_self_buffers_dec__cond_fn, l_self_modules_linear_parameters_bias__body_fn, l_self_modules_linear_parameters_weight__body_fn): +def forward(self, l_iter_ : torch.Tensor, l_x_ : torch.Tensor, l_self_buffers_dec__cond_fn, l_self_modules_linear_parameters_bias__body_fn, l_self_modules_linear_parameters_weight__body_fn): sub = l_iter_ - l_self_buffers_dec__cond_fn; l_iter_ = l_self_buffers_dec__cond_fn = None gt = sub > 0; sub = None return gt""", # noqa: B950 @@ -3158,7 +3158,7 @@ def forward(self, l_iter_, l_x_, l_self_buffers_dec__cond_fn, l_self_modules_lin self.assertExpectedInline( gm.body_fn_0.code.strip(), """\ -def forward(self, l_iter_, l_x_, l_self_buffers_dec__cond_fn, l_self_modules_linear_parameters_bias__body_fn, l_self_modules_linear_parameters_weight__body_fn): +def forward(self, l_iter_ : torch.Tensor, l_x_ : torch.Tensor, l_self_buffers_dec__cond_fn, l_self_modules_linear_parameters_bias__body_fn, l_self_modules_linear_parameters_weight__body_fn): child = l_iter_ - 1; l_iter_ = None child_1 = torch._C._nn.linear(l_x_, l_self_modules_linear_parameters_weight__body_fn, l_self_modules_linear_parameters_bias__body_fn); l_x_ = l_self_modules_linear_parameters_weight__body_fn = l_self_modules_linear_parameters_bias__body_fn = None return (child, child_1)""", # noqa: B950 diff --git a/torch/_dynamo/output_graph.py b/torch/_dynamo/output_graph.py index d3ebedb86c6ef2..dc576a34f81cb1 100644 --- a/torch/_dynamo/output_graph.py +++ b/torch/_dynamo/output_graph.py @@ -452,11 +452,14 @@ def get_backward_state_proxy(self): if self.backward_state_proxy is None: if self.export: unimplemented("backward_state does not support export") + example_value = BackwardState() self.backward_state_proxy = self.root_tracer.create_graph_input( - "dynamo_backward_state", BackwardState, source=BackwardStateSource() + "dynamo_backward_state", + type(example_value), + example_value, + source=BackwardStateSource(), ) self.backward_state_proxy.node.meta["grapharg"] = BackwardStateGraphArg() - set_example_value(self.backward_state_proxy.node, BackwardState()) self.backward_state_var = self.new_var() return self.backward_state_proxy @@ -679,12 +682,12 @@ def bind_symint(s: torch.SymInt, prop): # (this is harmless because we do remove the unused ones later) proxy = self.root_tracer.create_graph_input( str(s0), - torch.SymInt, + type(s), + s, before=True, source=prop, ) self.root_tracer.bound_symbols[s0] = proxy - set_example_value(proxy.node, s) assert isinstance(s, torch.SymInt) proxy.node.meta["grapharg"] = GraphArg( prop, @@ -2137,7 +2140,9 @@ def remove_node(self, node): # for SymInts that may occur in the tensor argument. # Remove this if https://github.com/pytorch/pytorch/issues/99007 gets # fixed. - def create_graph_input(self, name, type_expr=None, before=False, source=None): + def create_graph_input( + self, name, type_expr, example_value, before=False, source=None + ): log.debug( "create_graph_input %s %s", name, @@ -2181,6 +2186,7 @@ def create_graph_input(self, name, type_expr=None, before=False, source=None): ctx = self.graph.inserting_before(None) with ctx: proxy = self.create_proxy("placeholder", name, (), {}, type_expr=type_expr) + set_example_value(proxy.node, example_value) if self.input_name_to_proxy and before: k, v = self.input_name_to_proxy.popitem() self.input_name_to_proxy[name] = proxy @@ -2201,8 +2207,10 @@ def lift_tracked_freevar_to_input(self, proxy): # If that is the case, just return the already lifted Proxy. if proxy in self.lifted_freevars: return self.lifted_freevars[proxy] - new_proxy = self.create_graph_input(proxy.node.name) - set_example_value(new_proxy.node, proxy.node.meta["example_value"]) + example_value = proxy.node.meta["example_value"] + new_proxy = self.create_graph_input( + proxy.node.name, type(example_value), example_value + ) self.lifted_freevars[proxy] = new_proxy if self.parent is not None and proxy.tracer != self.parent: self.parent.lift_tracked_freevar_to_input(proxy) diff --git a/torch/_dynamo/variables/builder.py b/torch/_dynamo/variables/builder.py index dc7c801e68b075..02c9dd12854206 100644 --- a/torch/_dynamo/variables/builder.py +++ b/torch/_dynamo/variables/builder.py @@ -36,6 +36,7 @@ import torch from torch import SymInt +from torch._dynamo.utils import clone_input from torch._guards import GuardSource, TracingContext from torch._higher_order_ops.torchbind import call_torchbind from torch._ops import HigherOrderOperator @@ -99,7 +100,6 @@ from ..utils import ( _extract_tensor_dict, build_checkpoint_variable, - clone_input, common_constant_types, get_fake_value, get_locals_to_steal, @@ -962,6 +962,7 @@ def build_key_value(i, k, v): sym_node_proxy = self.tx.output.root_tracer.create_graph_input( re.sub(r"[^a-zA-Z0-9]+", "_", self.name), type(new_symint), + new_symint, source=new_source, ) @@ -1143,6 +1144,7 @@ def build_key_value(i, k, v): proxy = self.tx.output.root_tracer.create_graph_input( re.sub(r"[^a-zA-Z0-9]+", "_", self.name), type(value), + value, source=self.source, ) @@ -1187,6 +1189,7 @@ def build_key_value(i, k, v): proxy = self.tx.output.root_tracer.create_graph_input( re.sub(r"[^a-zA-Z0-9]+", "_", self.name), type(value), + fake_script_obj, source=self.source, ) @@ -1266,7 +1269,10 @@ def wrap_listlike(self, value: Union[tuple, list, odict_values, NamedTuple]): source = self.source assert isinstance(value, list) tensor_list_proxy = self.tx.output.root_tracer.create_graph_input( - re.sub(r"[^a-zA-Z0-9]+", "_", self.name), type(value), source=source + re.sub(r"[^a-zA-Z0-9]+", "_", self.name), + type(value), + value, + source=source, ) tensor_list_proxy.node.meta["steal_arg"] = True @@ -1586,18 +1592,6 @@ def wrap_tensor(self, value: torch.Tensor): # By this point, we should have deduplicated all tensors self.assert_not_wrapped_by_this_graph(value) - # tx.output has multiple tracers if we're introspecting HigherOrderOperator. - # When we've discovered an untracked tensor, then we actually need - # to get Dynamo to track the tensor (which is what this function does) - # and put it as a graph input on the root tracer. Later on, - # if the input is actually used in the body of the HigherOrderOperator, - # then the relevant SubgraphTracer will lift it to being an input of - # the subgraph. - # See NOTE [HigherOrderOperator tracing design] for more details. - - tensor_proxy = self.tx.output.root_tracer.create_graph_input( - re.sub(r"[^a-zA-Z0-9]+", "_", self.name), type(value), source=source - ) options = {} if type(value) in config.traceable_tensor_subclasses: options["torch_function_fn"] = build_torch_function_fn( @@ -1635,10 +1629,30 @@ def wrap_tensor(self, value: torch.Tensor): "requires some design around FSDP + torch.compile." ) + # tx.output has multiple tracers if we're introspecting HigherOrderOperator. + # When we've discovered an untracked tensor, then we actually need + # to get Dynamo to track the tensor (which is what this function does) + # and put it as a graph input on the root tracer. Later on, + # if the input is actually used in the body of the HigherOrderOperator, + # then the relevant SubgraphTracer will lift it to being an input of + # the subgraph. + # See NOTE [HigherOrderOperator tracing design] for more details. + + example_value = wrap_to_fake_tensor_and_record( + value, tx=self.tx, is_tensor=True, source=source + ) + tensor_proxy = self.tx.output.root_tracer.create_graph_input( + re.sub(r"[^a-zA-Z0-9]+", "_", self.name), + type(value), + example_value, + source=source, + ) + cache_real_value_when_export(self.tx, tensor_proxy, value) + tensor_variable = wrap_fx_proxy( tx=self.tx, proxy=tensor_proxy, - example_value=value, + example_value=example_value, subclass_type=subclass_type, source=source, **options, @@ -1724,15 +1738,25 @@ def wrap_numpy_ndarray(self, value): # that there's not another great way to do this atm. # This creates the right graphargs, as well as registration for guards in tensor names and shape env. LazyVariableTracker.realize_all(VariableBuilder(self.tx, source)(tensor_value)) + example_value = wrap_to_fake_tensor_and_record( + tensor_value, + tx=self.tx, + is_tensor=False, + source=source, + ) proxy = self.tx.output.root_tracer.create_graph_input( - re.sub(r"[^a-zA-Z0-9]+", "_", self.name), type(tensor_value), source=source + re.sub(r"[^a-zA-Z0-9]+", "_", self.name), + type(tensor_value), + example_value, + source=source, ) + cache_real_value_when_export(self.tx, proxy, tensor_value) options = {"source": source} numpy_ndarray_variable = wrap_fx_proxy_cls( target_cls=NumpyNdarrayVariable, tx=self.tx, proxy=proxy, - example_value=tensor_value, + example_value=example_value, **options, ) @@ -1826,13 +1850,13 @@ def wrap_symint(self, value): proxy = self.tx.output.root_tracer.create_graph_input( re.sub(r"[^a-zA-Z0-9]+", "_", self.name), type(wrapped_value), + wrapped_value, source=self.get_source(), ) sym_expr = wrapped_value.node.expr assert isinstance(sym_expr, sympy.Symbol), f"{sym_expr} is not a basic Symbol." self.tx.output.root_tracer.bound_symbols[sym_expr] = proxy - set_example_value(proxy.node, wrapped_value) unspec_var = SymNodeVariable(proxy, wrapped_value, **options) self.tx.output.unspec_variable_map[self.name] = unspec_var @@ -1893,21 +1917,27 @@ def wrap_symfloat(self, value): # Tensor. However, we never let the UnspecializedPythonVariable escape # here, so there should never actually be any guards against this # source. - options = {"source": FloatTensorSource(self.get_source()), "raw_value": value} + source = FloatTensorSource(self.get_source()) + options = {"source": source, "raw_value": value} # TODO: Maybe the tensor-ification should be built into the source, # rather than by special pattern match + example_value = wrap_to_fake_tensor_and_record( + wrapped_value, tx=self.tx, is_tensor=False, source=source + ) proxy = self.tx.output.root_tracer.create_graph_input( re.sub(r"[^a-zA-Z0-9]+", "_", self.name), type(wrapped_value), - source=self.get_source(), + example_value, + source=source, ) + cache_real_value_when_export(self.tx, proxy, wrapped_value) unspec_var = wrap_fx_proxy_cls( UnspecializedPythonVariable, tx=self.tx, proxy=proxy, - example_value=wrapped_value, + example_value=example_value, **options, ) assert isinstance(unspec_var, UnspecializedPythonVariable) @@ -1971,17 +2001,22 @@ def wrap_unspecialized_primitive(self, value): options = {"source": self.get_source()} options.update({"raw_value": value}) + example_value = wrap_to_fake_tensor_and_record( + wrapped_value, tx=self.tx, is_tensor=False, source=self.get_source() + ) proxy = self.tx.output.root_tracer.create_graph_input( re.sub(r"[^a-zA-Z0-9]+", "_", self.name), type(wrapped_value), + example_value, source=self.get_source(), ) + cache_real_value_when_export(self.tx, proxy, wrapped_value) unspec_var = wrap_fx_proxy_cls( UnspecializedPythonVariable, tx=self.tx, proxy=proxy, - example_value=wrapped_value, + example_value=example_value, **options, ) self.tx.output.unspec_variable_map[self.name] = unspec_var @@ -2069,6 +2104,17 @@ def wrap_fx_proxy( return result +def cache_real_value_when_export(tx, proxy, example_value): + if tx.export: + # The legacy behavior for real value cache with subclasses was + # to perform a clone WITHOUT preserving the subclass. It's + # not entirely clear this is what you actually want though. + with torch._C.DisableTorchFunctionSubclass(): + proxy.tracer.real_value_cache[proxy.node] = _clone_input( + example_value, tx.fake_mode + ) + + # Note: Unfortunate split due to some gross classes existing that subclass TensorVariable # Should be compositional instead # @@ -2147,7 +2193,16 @@ def _wrap_fx_preexisting_tensor( if "guards" in options and options["guards"] is not None: tx.output.guards.update(options["guards"]) - assert "example_value" not in proxy.node.meta, f"{proxy.node.meta['example_value']}" + # Placeholders always carry example_value in node.meta. + # non-placeholders always have no example_value in node.meta + if proxy.node.op == "placeholder": + assert ( + "example_value" in proxy.node.meta + ), f"placeholder {proxy} doesn't have 'example_value' in node.meta" + else: + assert ( + "example_value" not in proxy.node.meta + ), f"{proxy.node.meta['example_value']}" # See NOTE: [Deferring tensor pack/unpack hooks until runtime] with torch._dynamo.utils._disable_saved_tensors_hooks_during_tracing(): @@ -2155,6 +2210,7 @@ def _wrap_fx_preexisting_tensor( if maybe_get_fake_mode(tensor) is tx.fake_mode: pass else: + cache_real_value_when_export(tx, proxy, tensor) if tx.export: # The legacy behavior for real value cache with subclasses was # to perform a clone WITHOUT preserving the subclass. It's diff --git a/torch/_dynamo/variables/higher_order_ops.py b/torch/_dynamo/variables/higher_order_ops.py index af1dca5f01a7e3..6a6f3e51186474 100644 --- a/torch/_dynamo/variables/higher_order_ops.py +++ b/torch/_dynamo/variables/higher_order_ops.py @@ -199,20 +199,24 @@ def validate_args_and_maybe_create_graph_inputs( continue elif set_subgraph_inputs == "semi_automatic": if isinstance(a, AutogradFunctionContextVariable): + example_value = a.as_proxy().node.meta["example_value"] arg_name = ( a.as_proxy().node.name if sub_args_names is None else sub_args_names[idx] ) - tracer.create_graph_input(arg_name) + tracer.create_graph_input(arg_name, a.python_type(), example_value) elif a.maybe_fx_node() is not None: node = a.maybe_fx_node() + example_value = node.meta["example_value"] arg_name = ( a.as_proxy().node.name if sub_args_names is None else sub_args_names[idx] ) - new_proxy = tracer.create_graph_input(arg_name) + new_proxy = tracer.create_graph_input( + arg_name, a.python_type(), example_value + ) example_value = ( node.meta["example_value"] if "example_value" in node.meta @@ -237,26 +241,31 @@ def validate_args_and_maybe_create_graph_inputs( if sub_args_names is None else f"const_unused_{sub_args_names[idx]}" ) - tracer.create_graph_input(arg_name) + tracer.create_graph_input( + arg_name, a.python_type(), a.as_python_constant() + ) new_arg = a # Weird special case, we probably want to delete it or fold it # into the next case (of `a` being placeable into a graph) elif isinstance(a, AutogradFunctionContextVariable): + example_value = a.as_proxy().node.meta["example_value"] arg_name = ( a.as_proxy().node.name if sub_args_names is None else sub_args_names[idx] ) - tracer.create_graph_input(arg_name) + tracer.create_graph_input(arg_name, a.python_type(), example_value) new_arg = a # If `a` can be put into a graph elif a.maybe_fx_node() is not None: node = a.maybe_fx_node() - arg_name = node.name if sub_args_names is None else sub_args_names[idx] - new_proxy = tracer.create_graph_input(arg_name) example_value = ( node.meta["example_value"] if "example_value" in node.meta else None ) + arg_name = node.name if sub_args_names is None else sub_args_names[idx] + new_proxy = tracer.create_graph_input( + arg_name, a.python_type(), example_value + ) new_arg = wrap_fx_proxy_cls( target_cls=type(a), tx=tx, From ac20d0f8936eec70d3423433125208a7774347b2 Mon Sep 17 00:00:00 2001 From: Yidi Wu Date: Fri, 1 Nov 2024 14:54:05 -0700 Subject: [PATCH 033/503] [hop free symbols][refactor] make map's save_for_backward to handle int (#138558) Pull Request resolved: https://github.com/pytorch/pytorch/pull/138558 Approved by: https://github.com/zou3519 ghstack dependencies: #138345, #138428 --- torch/_higher_order_ops/map.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/torch/_higher_order_ops/map.py b/torch/_higher_order_ops/map.py index d57d68d5e473f7..dbf07b24964011 100644 --- a/torch/_higher_order_ops/map.py +++ b/torch/_higher_order_ops/map.py @@ -27,6 +27,8 @@ _unstack_pytree, clone_outputs_aliasing_inputs, prepare_fw_with_masks, + save_tensors_and_symints_for_backward, + saved_tensors_and_symints, ) @@ -157,7 +159,7 @@ def flat_fn(*flat_args): class MapAutogradOp(torch.autograd.Function): @staticmethod def forward(ctx, fw_graph, joint_graph, num_mapped_args, *flat_args): - ctx.save_for_backward(*flat_args) + save_tensors_and_symints_for_backward(ctx, flat_args) ctx._joint_graph = joint_graph ctx._num_mapped_args = num_mapped_args with torch._C._AutoDispatchBelowAutograd(): @@ -169,7 +171,7 @@ def forward(ctx, fw_graph, joint_graph, num_mapped_args, *flat_args): @staticmethod def backward(ctx, *flat_grads): - fw_args = ctx.saved_tensors + fw_args = saved_tensors_and_symints(ctx) fw_mapped_args = fw_args[: ctx._num_mapped_args] pos_args = fw_args[ctx._num_mapped_args :] From c5b79699e1aec4436745ba8c32b2d6e36951bff7 Mon Sep 17 00:00:00 2001 From: Yidi Wu Date: Fri, 1 Nov 2024 14:54:06 -0700 Subject: [PATCH 034/503] [hop free symbols] replace ctx.save_for_backward to support symints/ints (#138737) Pull Request resolved: https://github.com/pytorch/pytorch/pull/138737 Approved by: https://github.com/drisspg, https://github.com/zou3519, https://github.com/Chillee ghstack dependencies: #138345, #138428, #138558 --- test/inductor/test_flex_attention.py | 49 ++++++++++++++++- torch/_higher_order_ops/flex_attention.py | 66 +++++++++++++---------- torch/_higher_order_ops/utils.py | 4 +- 3 files changed, 90 insertions(+), 29 deletions(-) diff --git a/test/inductor/test_flex_attention.py b/test/inductor/test_flex_attention.py index e647558e70c28b..d0fc98b1ba6e16 100644 --- a/test/inductor/test_flex_attention.py +++ b/test/inductor/test_flex_attention.py @@ -3005,7 +3005,12 @@ def forward(self, x, block_mask=None): qkv = self.qkv(x).view(B, T, 3, self.n_head, self.head_dim) qkv = qkv.permute(2, 0, 3, 1, 4) q, k, v = qkv - y = flex_attention(q, k, v, block_mask=block_mask) + y = flex_attention( + q, + k, + v, + block_mask=block_mask, + ) return y.transpose(1, 2).contiguous().view(B, T, C) model = SimpleAttention().cuda() @@ -3033,6 +3038,48 @@ def forward(self, x, block_mask=None): self.assertEqual(torch._dynamo.utils.counters["aot_autograd"]["ok"], 2) + @supported_platform + def test_symbol_closure_in_score_mod(self): + class SimpleAttention(torch.nn.Module): + def __init__(self, dim=512, n_head=8): + super().__init__() + self.qkv = torch.nn.Linear(dim, 3 * dim) + self.n_head = n_head + self.head_dim = dim // n_head + + def forward(self, x, block_mask=None): + B, T, C = x.size() + qkv = self.qkv(x).view(B, T, 3, self.n_head, self.head_dim) + qkv = qkv.permute(2, 0, 3, 1, 4) + q, k, v = qkv + return flex_attention( + q, + k, + v, + score_mod=lambda s, b, h, q, k: s + B, + block_mask=block_mask, + ) + + model = SimpleAttention().cuda() + from torch._dynamo.testing import EagerAndRecordGraphs + + backend = EagerAndRecordGraphs() + model.compile(mode="default", dynamic=True, backend=backend) + sequence_len = 256 + + torch._dynamo.reset() + for batch_shape in [4, 16, 32]: + x = torch.randn(batch_shape, sequence_len, 512).cuda() + model(x) + self.assertEqual(len(backend.graphs), 1) + self.assertExpectedInline( + backend.graphs[0].score_mod_0.code.strip(), + """\ +def forward(self, child_4 : torch.Tensor, child_5 : torch.Tensor, child_6 : torch.Tensor, child_7 : torch.Tensor, child_8 : torch.Tensor, getitem : torch.SymInt): + add = child_4 + getitem; child_4 = getitem = None + return add""", + ) + @supported_platform def test_fw_bw_graph_correctness(self): cnt = CompileCounterWithBackend("aot_eager") diff --git a/torch/_higher_order_ops/flex_attention.py b/torch/_higher_order_ops/flex_attention.py index 56794cc1b93e90..f38cb6a0de3759 100644 --- a/torch/_higher_order_ops/flex_attention.py +++ b/torch/_higher_order_ops/flex_attention.py @@ -9,6 +9,8 @@ _has_potential_branch_input_mutation, autograd_not_implemented, reenter_make_fx, + save_tensors_and_symints_for_backward, + saved_tensors_and_symints, UnsupportedAliasMutationException, ) from torch._ops import HigherOrderOperator @@ -84,7 +86,7 @@ def __call__( mask_mod_other_buffers: Tuple = (), ) -> Tuple[torch.Tensor, torch.Tensor]: if not all( - isinstance(buf, torch.Tensor) + isinstance(buf, (torch.Tensor, torch.SymInt, int)) for buf in score_mod_other_buffers + mask_mod_other_buffers ): raise RuntimeError("Other buffers must be tensors.") @@ -414,7 +416,7 @@ def flex_attention_functionalize( assert isinstance(score_mod_other_buffers_unwrapped, tuple) assert isinstance(mask_mod_other_buffers_unwrapped, tuple) assert all( - isinstance(item, torch.Tensor) + isinstance(item, (torch.Tensor, torch.SymInt, int)) for item in score_mod_other_buffers_unwrapped + mask_mod_other_buffers_unwrapped ) @@ -502,14 +504,18 @@ def create_fw_bw_graph( with suspend_functionalization(), disable_functional_mode(): with disable_proxy_modes_tracing(): - def _from_fun(t: Tensor) -> Tensor: - return torch.empty_strided( - t.size(), - t.stride(), - device=t.device, - dtype=t.dtype, - requires_grad=t.requires_grad, - ) + def _from_fun( + t: Union[Tensor, torch.SymInt, int] + ) -> Union[Tensor, torch.SymInt, int]: + if isinstance(t, torch.Tensor): + return torch.empty_strided( + t.size(), + t.stride(), + device=t.device, + dtype=t.dtype, + requires_grad=t.requires_grad, + ) + return t # If someone runs this hop under the default compiler backend ("eager") # Then this path will be run with the actual user inputs. We convert them @@ -524,8 +530,14 @@ def _from_fun(t: Tensor) -> Tensor: unwrapped_score_mod_indexes = pytree.tree_map(_from_fun, index_values) unwrapped_other_buffers = pytree.tree_map(_from_fun, other_buffers) - assert all(isinstance(t, FakeTensor) for t in unwrapped_score_mod_indexes) - assert all(isinstance(t, FakeTensor) for t in unwrapped_other_buffers) + assert all( + isinstance(t, (FakeTensor, torch.SymInt, int)) + for t in unwrapped_score_mod_indexes + ) + assert all( + isinstance(t, (FakeTensor, torch.SymInt, int)) + for t in unwrapped_other_buffers + ) example_flat_out = pytree.tree_map( _from_fun, @@ -591,9 +603,6 @@ def forward( ctx._fw_graph = fw_graph ctx._joint_graph = joint_graph ctx._mask_graph = block_mask[-1] - # KV_BLOCK_SIZE and Q_BLOCK_SIZE are integers, so can't use ctx.save_for_backward - ctx._Q_BLOCK_SIZE = block_mask[8] - ctx._KV_BLOCK_SIZE = block_mask[9] ctx.scale = scale ctx.kernel_options = kernel_options ctx._score_mod_other_buffers_len = len(score_mod_other_buffers) @@ -610,21 +619,24 @@ def forward( mask_mod_other_buffers, ) - ctx.save_for_backward( - query, - key, - value, - out, - logsumexp, - *block_mask[:8], - *score_mod_other_buffers, - *mask_mod_other_buffers, + save_tensors_and_symints_for_backward( + ctx, + ( + query, + key, + value, + out, + logsumexp, + *block_mask[:10], + *score_mod_other_buffers, + *mask_mod_other_buffers, + ), ) return out, logsumexp @staticmethod def backward(ctx: Any, grad_out: Tensor, grad_logsumexp: Tensor) -> Tuple[Optional[Tensor], ...]: # type: ignore[override] - fw_args = ctx.saved_tensors + fw_args = saved_tensors_and_symints(ctx) ( query, key, @@ -639,13 +651,13 @@ def backward(ctx: Any, grad_out: Tensor, grad_logsumexp: Tensor) -> Tuple[Option q_indices, full_q_num_blocks, full_q_indices, + Q_BLOCK_SIZE, + KV_BLOCK_SIZE, *other_buffers, ) = fw_args fw_graph = ctx._fw_graph joint_graph = ctx._joint_graph mask_graph = ctx._mask_graph - KV_BLOCK_SIZE = ctx._KV_BLOCK_SIZE - Q_BLOCK_SIZE = ctx._Q_BLOCK_SIZE scale = ctx.scale kernel_options = ctx.kernel_options score_mod_other_buffers = tuple( diff --git a/torch/_higher_order_ops/utils.py b/torch/_higher_order_ops/utils.py index 549e1af54f9b68..f6a8d29d520c00 100644 --- a/torch/_higher_order_ops/utils.py +++ b/torch/_higher_order_ops/utils.py @@ -431,7 +431,9 @@ def _stack_pytree(pytrees): # iterating over the pos list and pop one item from the front of paritioned_args[pos[i]]. # We use t_idx and s_idx to keep track of the next index of the item we are going to pop for the two lists. def save_tensors_and_symints_for_backward(ctx, args): - assert all(isinstance(arg, (torch.Tensor, torch.SymInt, int)) for arg in args), args + assert all( + isinstance(arg, (torch.Tensor, torch.SymInt, int, type(None))) for arg in args + ), args partitioned_args: List[Any] = [[], []] pos = [] for i, arg in enumerate(args): From 397938b453c615360b763a81af756b056c7afed8 Mon Sep 17 00:00:00 2001 From: Yidi Wu Date: Fri, 1 Nov 2024 14:54:06 -0700 Subject: [PATCH 035/503] [hop free symbols][refactor] lift freevar to parent graph before lifting to subgraph (#138559) This refactoring is for getting a deterministic ordering of binding tensors and sizes of tensors. When seeing a free tensor x with shape (s0,) in subgraph, the ordering of lifting changes from ``` lift_x_in_child, lift_s0_in_child, lift_s0_in_parent, lift_x_in_parent ``` to ``` lift_x_in_parent, lift_s0_in_parent, lift_x_in_child, lift_s0_in_child ``` This produces a determinstic ordering of handling the symints in lifted tensors. This is also the current contract of dynamo top-level graph: we lift free_symbols in sizes after tensor x and insert the free symbols before the tensor x's proxy. Pull Request resolved: https://github.com/pytorch/pytorch/pull/138559 Approved by: https://github.com/zou3519 ghstack dependencies: #138345, #138428, #138558, #138737 --- torch/_dynamo/output_graph.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/torch/_dynamo/output_graph.py b/torch/_dynamo/output_graph.py index dc576a34f81cb1..f690fe3991ccdf 100644 --- a/torch/_dynamo/output_graph.py +++ b/torch/_dynamo/output_graph.py @@ -1873,7 +1873,8 @@ def __init__( # A dict mapping previously free variables (Proxy objects) # to new Proxy objects that wrap inputs to this subgraph. # - # This dict serves two purposes: + # This dict maps proxies in outer graphs to placeholders in current graph. + # It serves two purposes: # - Proxies are associated with VariableTrackers. If we see # the same VariableTracker twice (and it is a free variable), # then we want to use the same Proxy in the current subgraph to @@ -2207,13 +2208,18 @@ def lift_tracked_freevar_to_input(self, proxy): # If that is the case, just return the already lifted Proxy. if proxy in self.lifted_freevars: return self.lifted_freevars[proxy] + + # We first lift proxy to parent's graph then lift to current grpah's input + # so that when we bind symints of the sizes in current graph, those symints + # would already be lifted as inputs to parent graph. + if proxy.tracer != self.parent: + self.parent.lift_tracked_freevar_to_input(proxy) + example_value = proxy.node.meta["example_value"] new_proxy = self.create_graph_input( proxy.node.name, type(example_value), example_value ) self.lifted_freevars[proxy] = new_proxy - if self.parent is not None and proxy.tracer != self.parent: - self.parent.lift_tracked_freevar_to_input(proxy) return new_proxy def maybe_lift_tracked_freevar_to_input(self, arg): From 87a379b61b5a9920606df25802d8938d7a2f0ba8 Mon Sep 17 00:00:00 2001 From: Tugsbayasgalan Manlaibaatar Date: Sun, 3 Nov 2024 15:44:49 -0800 Subject: [PATCH 036/503] Move pippy to training IR (#139233) Differential Revision: [D65282662](https://our.internmc.facebook.com/intern/diff/D65282662) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139233 Approved by: https://github.com/kwen2501 ghstack dependencies: #138658, #139209 --- test/distributed/pipelining/test_unflatten.py | 2 +- torch/distributed/pipelining/_IR.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/distributed/pipelining/test_unflatten.py b/test/distributed/pipelining/test_unflatten.py index ef2e48d8ee9f49..9e63c3b8084cd7 100644 --- a/test/distributed/pipelining/test_unflatten.py +++ b/test/distributed/pipelining/test_unflatten.py @@ -20,7 +20,7 @@ def forward(self, x: torch.Tensor, constant=None) -> torch.Tensor: x = self.conv(x) x = self.lin0(x) pipe_split() - x.add_(constant) + x.add(constant) x = self.lin1(x) return self.relu(x) diff --git a/torch/distributed/pipelining/_IR.py b/torch/distributed/pipelining/_IR.py index 3010bccd377c94..72ed53f8a42697 100644 --- a/torch/distributed/pipelining/_IR.py +++ b/torch/distributed/pipelining/_IR.py @@ -1003,7 +1003,7 @@ def _trace_with_export( ) -> ExportedProgram: logger.info("Tracing model ...") try: - ep = torch.export.export( + ep = torch.export.export_for_training( mod, example_args, example_kwargs, From 23169a6bcc766cd4006c18a515f828bbedaaf148 Mon Sep 17 00:00:00 2001 From: Jane Xu Date: Mon, 4 Nov 2024 10:47:06 -0800 Subject: [PATCH 037/503] Disable foreach tests for complex128 internally (#139649) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139649 Approved by: https://github.com/ngimel --- torch/testing/_internal/opinfo/core.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/torch/testing/_internal/opinfo/core.py b/torch/testing/_internal/opinfo/core.py index c14fa748a95dc1..393e2af43393dc 100644 --- a/torch/testing/_internal/opinfo/core.py +++ b/torch/testing/_internal/opinfo/core.py @@ -26,6 +26,7 @@ get_all_dtypes, ) from torch.testing._internal.common_utils import ( + IS_FBCODE, is_iterable_of_tensors, noncontiguous_like, OPINFO_SAMPLE_INPUT_INDEX, @@ -2817,7 +2818,14 @@ def __post_init__(self): foreach_method = foreach_method_inplace torch_ref_method = torch_ref_inplace - self.dtypes = _dispatch_dtypes(get_all_dtypes(include_qint=False)) + # We disable all complex128 tests internally for foreach due to reported flakiness + # tracked in #139648 + supported_dtypes = get_all_dtypes(include_qint=False) + if IS_FBCODE: + supported_dtypes = [ + x for x in supported_dtypes if x is not torch.complex128 + ] + self.dtypes = _dispatch_dtypes(supported_dtypes) self.op = foreach_method self.method_variant = foreach_method From 6add86a29f6d25c2468207be0f5f4937b27b7e3b Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Mon, 4 Nov 2024 23:30:15 +0000 Subject: [PATCH 038/503] Revert "Tighten type hints for tensor arithmetic (#135392)" This reverts commit bf5cd8d0116d90d24b8acb38d578b8952dab22ef. Reverted https://github.com/pytorch/pytorch/pull/135392 on behalf of https://github.com/ZainRizvi due to Sorry but this is breaking lint on trunk. See [GH job link](https://github.com/pytorch/pytorch/actions/runs/11673543178/job/32504499599) [HUD commit link](https://hud.pytorch.org/pytorch/pytorch/commit/bf5cd8d0116d90d24b8acb38d578b8952dab22ef) ([comment](https://github.com/pytorch/pytorch/pull/135392#issuecomment-2455908056)) --- tools/pyi/gen_pyi.py | 56 +++++++------------ torch/_decomp/decompositions.py | 3 +- .../fx_passes/efficient_conv_bn_eval.py | 1 - torch/ao/quantization/_equalize.py | 10 +++- 4 files changed, 28 insertions(+), 42 deletions(-) diff --git a/tools/pyi/gen_pyi.py b/tools/pyi/gen_pyi.py index 84a8163e215479..bd2fcee5e51c4a 100644 --- a/tools/pyi/gen_pyi.py +++ b/tools/pyi/gen_pyi.py @@ -177,18 +177,14 @@ def should_bind_method(python_func: PythonSignatureNativeFunctionPair) -> bool: "copy_", ] -shift_ops = ( - "lshift", - "rshift", - "ilshift", - "irshift", # inplace ops -) -arithmetic_ops = ( +binary_ops = ( "add", "sub", "mul", "div", "pow", + "lshift", + "rshift", "mod", "truediv", "matmul", @@ -199,26 +195,24 @@ def should_bind_method(python_func: PythonSignatureNativeFunctionPair) -> bool: "rtruediv", "rfloordiv", "rpow", # reverse arithmetic - "iadd", - "idiv", - "imul", - "isub", - "ifloordiv", - "imod", # inplace ops -) -logic_ops = ( "and", "or", "xor", "rand", "ror", - "rxor", # reverse logic + "rxor", # logic + "iadd", "iand", + "idiv", + "ilshift", + "imul", "ior", - "ixor", # inplace ops + "irshift", + "isub", + "ixor", + "ifloordiv", + "imod", # inplace ops ) -binary_ops = shift_ops + arithmetic_ops + logic_ops - symmetric_comparison_ops = ("eq", "ne") asymmetric_comparison_ops = ("ge", "gt", "lt", "le") comparison_ops = symmetric_comparison_ops + asymmetric_comparison_ops @@ -238,24 +232,14 @@ def sig_for_ops(opname: str) -> list[str]: assert opname.endswith("__") and opname.startswith("__"), f"Unexpected op {opname}" name = opname[2:-2] - if name in arithmetic_ops: - return [ - f"def {opname}(self, other: Union[Tensor, Number, _complex]) -> Tensor: ..." - ] - elif name in logic_ops: - return [f"def {opname}(self, other: Union[Tensor, _bool]) -> Tensor: ..."] - elif name in shift_ops: - return [f"def {opname}(self, other: Union[Tensor, _int]) -> Tensor: ..."] - elif name in symmetric_comparison_ops: - return [ + if name in binary_ops: + return [f"def {opname}(self, other: Any) -> Tensor: ..."] + elif name in comparison_ops: + sig = f"def {opname}(self, other: Any) -> Tensor: ..." + if name in symmetric_comparison_ops: # unsafe override https://github.com/python/mypy/issues/5704 - f"def {opname}(self, other: Union[Tensor, Number, _complex]) -> Tensor: ... # type: ignore[override]", - f"def {opname}(self, other: Any) -> _bool: ...", - ] - elif name in asymmetric_comparison_ops: - return [ - f"def {opname}(self, other: Union[Tensor, Number, _complex]) -> Tensor: ..." - ] + sig += " # type: ignore[override]" + return [sig] elif name in unary_ops: return [f"def {opname}(self) -> Tensor: ..."] elif name in to_py_type_ops: diff --git a/torch/_decomp/decompositions.py b/torch/_decomp/decompositions.py index 17b5897aed255e..ffa83a80bcc667 100644 --- a/torch/_decomp/decompositions.py +++ b/torch/_decomp/decompositions.py @@ -2291,8 +2291,7 @@ def native_batch_norm_backward( mean = save_mean_cast invstd = save_invstd_cast if train: - assert mean is not None and invstd is not None - + assert save_mean_cast is not None and save_invstd_cast is not None else: assert running_mean_cast is not None and running_var_cast is not None mean = running_mean_cast diff --git a/torch/_inductor/fx_passes/efficient_conv_bn_eval.py b/torch/_inductor/fx_passes/efficient_conv_bn_eval.py index bc6ebbcd5cef6d..4845142caab347 100644 --- a/torch/_inductor/fx_passes/efficient_conv_bn_eval.py +++ b/torch/_inductor/fx_passes/efficient_conv_bn_eval.py @@ -33,7 +33,6 @@ def efficient_conv_bn_eval( """ assert bn.running_var is not None - assert bn.running_mean is not None # These lines of code are designed to deal with various cases # like bn without affine transform, and conv without bias diff --git a/torch/ao/quantization/_equalize.py b/torch/ao/quantization/_equalize.py index 57a4cfdead2901..08316f755552b7 100644 --- a/torch/ao/quantization/_equalize.py +++ b/torch/ao/quantization/_equalize.py @@ -128,7 +128,8 @@ def cross_layer_equalization(module1, module2, output_axis=0, input_axis=1): "module type not supported:", type(module1), " ", type(module2) ) - bias = get_module_bias(module1) if has_bias(module1) else None + conv1_has_bias = has_bias(module1) + bias = None weight1 = get_module_weight(module1) weight2 = get_module_weight(module2) @@ -139,6 +140,9 @@ def cross_layer_equalization(module1, module2, output_axis=0, input_axis=1): number input channels of second arg" ) + if conv1_has_bias: + bias = get_module_bias(module1) + weight1_range = channel_range(weight1, output_axis) weight2_range = channel_range(weight2, input_axis) @@ -147,7 +151,7 @@ def cross_layer_equalization(module1, module2, output_axis=0, input_axis=1): scaling_factors = torch.sqrt(weight1_range / weight2_range) inverse_scaling_factors = torch.reciprocal(scaling_factors) - if bias is not None: + if conv1_has_bias: bias = bias * inverse_scaling_factors # formatting the scaling (1D) tensors to be applied on the given argument tensors @@ -164,7 +168,7 @@ def cross_layer_equalization(module1, module2, output_axis=0, input_axis=1): weight2 = weight2 * scaling_factors set_module_weight(module1, weight1) - if bias is not None: + if conv1_has_bias: set_module_bias(module1, bias) set_module_weight(module2, weight2) From 350bc2a16695483323d4d2c868c76f11db1c0b35 Mon Sep 17 00:00:00 2001 From: Henry Tsang Date: Mon, 4 Nov 2024 23:31:46 +0000 Subject: [PATCH 039/503] [export] Add support for symbool to make it usable for torch.cond (#138765) # Why? I want the following code to work. minimal repro: ``` class M(torch.nn.Module): def forward(self, dilate_flag): return dilate_flag.item() input1 = (torch.tensor([1], dtype=torch.bool, device="cuda"),) model = M().cuda() ep = torch.export.export(model, input1, strict=True) path = torch._inductor.aot_compile(ep.module(), input1) aot_model = torch._export.aot_load(path, device="cuda") actual_output = aot_model(*input1) ``` error: AssertionError: Encountered an unsupported object of type while writing the metadata for exported program second error will be handled by https://github.com/pytorch/pytorch/pull/138760 # Motivation I could technically bypass it with a torch.int tensor. However, it doesn't work with torch.cond. I want the following to work. It would also require https://github.com/pytorch/pytorch/pull/138760 for aot compile to work. ``` class M(torch.nn.Module): def __init__(self) -> None: super().__init__() self.dilate_flag = 0 def forward(self, dilate_flag): self.dilate_flag = dilate_flag.item() def true_fn(dilate_flag): return dilate_flag.clone() def false_fn(dilate_flag): return dilate_flag.clone() torch.cond( self.dilate_flag, true_fn, false_fn, (dilate_flag,), ) return self.dilate_flag input1 = (torch.tensor([1], dtype=torch.bool, device="cuda"),) input2 = (torch.tensor([0], dtype=torch.bool, device="cuda"),) inputs = (input1, input2) model = M().cuda() for input in inputs: expected_output = model(*input) ep = torch.export.export(model, input, strict=False) path = torch._inductor.aot_compile(ep.module(), input) aot_model = torch._export.aot_load(path, device="cuda") actual_output = aot_model(*input) assert ( expected_output == actual_output ), f"henry they are not equal {expected_output} != {actual_output}" ``` Differential Revision: D64867504 Pull Request resolved: https://github.com/pytorch/pytorch/pull/138765 Approved by: https://github.com/ydwu4 --- docs/source/export.rst | 2 + test/export/test_export.py | 44 +++++++++++++++++++++ test/inductor/test_aot_inductor.py | 16 ++++++++ test/inductor/test_aot_inductor_arrayref.py | 4 ++ torch/_export/__init__.py | 1 + torch/_export/verifier.py | 3 +- torch/export/exported_program.py | 3 ++ torch/export/graph_signature.py | 28 +++++++++++-- torch/export/unflatten.py | 9 ++++- 9 files changed, 103 insertions(+), 7 deletions(-) diff --git a/docs/source/export.rst b/docs/source/export.rst index da7d827b3d0353..6d6784c97c5263 100644 --- a/docs/source/export.rst +++ b/docs/source/export.rst @@ -888,6 +888,8 @@ API Reference .. autoclass:: InputSpec .. autoclass:: OutputKind .. autoclass:: OutputSpec +.. autoclass:: SymIntArgument +.. autoclass:: SymBoolArgument .. autoclass:: ExportGraphSignature .. automethod:: replace_all_uses diff --git a/test/export/test_export.py b/test/export/test_export.py index 891771ef37ac7b..7bbb0ad3f62d16 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -843,6 +843,28 @@ def forward(self, x, c): foo, bad_example_inp, dynamic_shapes=dynamic_shapes, strict=False ) + def test_symint_item(self): + class M(torch.nn.Module): + def forward(self, tensor): + return tensor.item() + + input = (torch.tensor([1], dtype=torch.int),) + + orig_res = M()(*input) + ep_res = torch.export.export(M(), input).module()(*input) + self.assertEqual(orig_res, ep_res) + + def test_symbool_item(self): + class M(torch.nn.Module): + def forward(self, tensor): + return tensor.item() + + input = (torch.tensor([1], dtype=torch.bool),) + + orig_res = M()(*input) + ep_res = torch.export.export(M(), input).module()(*input) + self.assertEqual(orig_res, ep_res) + def test_unbacked_to_cond(self): class M(torch.nn.Module): def forward(self, a): @@ -5324,6 +5346,28 @@ def forward(self, x): self.assertEqual(len(ep.graph_signature.input_specs), 4) self.assertTrue(torch.allclose(ep.module()(*inp), transform.module()(*inp))) + class Boo(torch.nn.Module): + def __init__(self) -> None: + super().__init__() + self.a = torch.tensor(True) + + def forward(self, x): + list_tensor = [torch.tensor(False), torch.tensor(True)] + return x + self.a + list_tensor[0] + list_tensor[1] + + ep = export(Boo(), (torch.tensor(False),)) + + self.assertEqual(len(ep.graph_signature.input_specs), 4) + self.assertEqual(len(ep.state_dict), 0) + self.assertEqual(len(ep.constants), 3) + + inp = (torch.tensor(True),) + self.assertTrue(torch.allclose(ep.module()(*inp), Boo()(*inp))) + + transform = ep.run_decompositions() + self.assertEqual(len(ep.graph_signature.input_specs), 4) + self.assertTrue(torch.allclose(ep.module()(*inp), transform.module()(*inp))) + def test_tensor_attribute_zero_args(self): class Foo(torch.nn.Module): def __init__(self, value): diff --git a/test/inductor/test_aot_inductor.py b/test/inductor/test_aot_inductor.py index a408cd78f19756..a8a2c0603e11d2 100644 --- a/test/inductor/test_aot_inductor.py +++ b/test/inductor/test_aot_inductor.py @@ -2763,6 +2763,22 @@ def forward(self, inputs, targets, split_index=None): ) self.check_model(Model(), inputs) + def test_symint_item(self): + class Model(torch.nn.Module): + def forward(self, tensor): + return tensor.item() + + inputs = (torch.tensor([1], dtype=torch.int, device=self.device),) + self.check_model(Model(), inputs) + + def test_symbool_item(self): + class Model(torch.nn.Module): + def forward(self, tensor): + return tensor.item() + + inputs = (torch.tensor([0], dtype=torch.bool, device=self.device),) + self.check_model(Model(), inputs) + def test_constant_original_fqn_and_dtype(self): class FooBarModule(torch.nn.Module): def __init__(self) -> None: diff --git a/test/inductor/test_aot_inductor_arrayref.py b/test/inductor/test_aot_inductor_arrayref.py index 1f988f874270b9..64e7e872ff6939 100644 --- a/test/inductor/test_aot_inductor_arrayref.py +++ b/test/inductor/test_aot_inductor_arrayref.py @@ -175,6 +175,10 @@ def fail_minimal_arrayref_interface(is_skip=False): "test_size_from_multi_output": fail_stack_allocation(is_skip=True), "test_masked_select_dynamic": fail_stack_allocation(is_skip=True), "test_torchvision_transforms_functional_tensor_resize": fail_minimal_arrayref_interface(), + # TODO: AttributeError: 'ShapeAsConstantBuffer' object has no attribute 'dtype' + "test_symint_item": fail_minimal_arrayref_interface(is_skip=True), + # TODO: AttributeError: 'ShapeAsConstantBuffer' object has no attribute 'dtype' + "test_symbool_item": fail_minimal_arrayref_interface(is_skip=True), } diff --git a/torch/_export/__init__.py b/torch/_export/__init__.py index 6bfa2cc7fb2474..99b8ba943e80b7 100644 --- a/torch/_export/__init__.py +++ b/torch/_export/__init__.py @@ -36,6 +36,7 @@ OutputKind, OutputSpec, SymIntArgument, + SymBoolArgument, TensorArgument, ) from torch.fx import traceback as fx_traceback diff --git a/torch/_export/verifier.py b/torch/_export/verifier.py index 84b8a37ad16836..d40618aa34e99b 100644 --- a/torch/_export/verifier.py +++ b/torch/_export/verifier.py @@ -12,6 +12,7 @@ CustomObjArgument, InputKind, SymIntArgument, + SymBoolArgument, TensorArgument, TokenArgument, ) @@ -308,7 +309,7 @@ def _verify_exported_program_signature(exported_program) -> None: ) for input_spec, node in zip(gs.input_specs, input_node_names): - if isinstance(input_spec.arg, (TensorArgument, SymIntArgument)): + if isinstance(input_spec.arg, (TensorArgument, SymIntArgument, SymBoolArgument)): if input_spec.arg.name != node: raise SpecViolationError( f"Input spec name {input_spec.arg.name} does not match node name {node}" diff --git a/torch/export/exported_program.py b/torch/export/exported_program.py index a5c7a519b8590e..4a64032347cadd 100644 --- a/torch/export/exported_program.py +++ b/torch/export/exported_program.py @@ -74,6 +74,7 @@ InputSpec, OutputKind, OutputSpec, + SymBoolArgument, SymIntArgument, TensorArgument, TokenArgument, @@ -466,6 +467,8 @@ def update_arg(old_arg, new_ph): return TensorArgument(name=new_ph.name) elif isinstance(old_arg, SymIntArgument): return SymIntArgument(name=new_ph.name) + elif isinstance(old_arg, SymBoolArgument): + return SymBoolArgument(name=new_ph.name) raise RuntimeError(f"Type of old_arg not supported: {type(old_arg)}") new_placeholders = [node for node in gm.graph.nodes if node.op == "placeholder"] diff --git a/torch/export/graph_signature.py b/torch/export/graph_signature.py index 4730cf6febcddf..4b99dc1b992e2e 100644 --- a/torch/export/graph_signature.py +++ b/torch/export/graph_signature.py @@ -20,6 +20,7 @@ "OutputKind", "OutputSpec", "SymIntArgument", + "SymBoolArgument", "TensorArgument", ] @@ -39,6 +40,11 @@ class SymIntArgument: name: str +@dataclasses.dataclass +class SymBoolArgument: + name: str + + @dataclasses.dataclass class CustomObjArgument: name: str @@ -55,6 +61,7 @@ class ConstantArgument: ArgumentSpec = Union[ TensorArgument, SymIntArgument, + SymBoolArgument, ConstantArgument, CustomObjArgument, TokenArgument, @@ -87,6 +94,7 @@ def __post_init__(self): ( TensorArgument, SymIntArgument, + SymBoolArgument, ConstantArgument, CustomObjArgument, TokenArgument, @@ -116,6 +124,7 @@ def __post_init__(self): ( TensorArgument, SymIntArgument, + SymBoolArgument, ConstantArgument, TokenArgument, CustomObjArgument, @@ -262,7 +271,10 @@ def user_inputs(self) -> Collection[Union[int, float, bool, None, str]]: if s.kind != InputKind.USER_INPUT: continue - if isinstance(s.arg, (TensorArgument, SymIntArgument, CustomObjArgument)): + if isinstance( + s.arg, + (TensorArgument, SymIntArgument, SymBoolArgument, CustomObjArgument), + ): user_inputs.append(s.arg.name) elif isinstance(s.arg, ConstantArgument): user_inputs.append(s.arg.value) @@ -278,7 +290,7 @@ def user_outputs(self) -> Collection[Union[int, float, bool, None, str]]: if s.kind != OutputKind.USER_OUTPUT: continue - if isinstance(s.arg, (TensorArgument, SymIntArgument)): + if isinstance(s.arg, (TensorArgument, SymIntArgument, SymBoolArgument)): user_outputs.append(s.arg.name) elif isinstance(s.arg, ConstantArgument): user_outputs.append(s.arg.value) @@ -425,7 +437,13 @@ def replace_all_uses(self, old: str, new: str): """ assert isinstance(old, str) assert isinstance(new, str) - arg_types = (TensorArgument, SymIntArgument, CustomObjArgument, TokenArgument) + arg_types = ( + TensorArgument, + SymIntArgument, + SymBoolArgument, + CustomObjArgument, + TokenArgument, + ) for o in self.output_specs: if isinstance(o.arg, arg_types): if o.arg.name == old: @@ -454,7 +472,7 @@ def _immutable_dict(items): def _make_argument_spec(node, token_names) -> ArgumentSpec: - from torch import ScriptObject, SymInt + from torch import ScriptObject, SymBool, SymInt from torch._library.fake_class_registry import FakeScriptObject from torch._subclasses.fake_tensor import FakeTensor @@ -472,6 +490,8 @@ def _make_argument_spec(node, token_names) -> ArgumentSpec: return TensorArgument(name=node.name) elif isinstance(val, SymInt): return SymIntArgument(name=node.name) + elif isinstance(val, SymBool): + return SymBoolArgument(name=node.name) elif isinstance(val, ScriptObject): return CustomObjArgument(name=node.name, class_fqn=val._type().qualified_name()) # type: ignore[attr-defined] elif isinstance(val, FakeScriptObject): diff --git a/torch/export/unflatten.py b/torch/export/unflatten.py index 32638d8f880c91..d25eb23162ef31 100644 --- a/torch/export/unflatten.py +++ b/torch/export/unflatten.py @@ -22,6 +22,7 @@ ExportGraphSignature, InputKind, ModuleCallSignature, + SymBoolArgument, SymIntArgument, TensorArgument, ) @@ -869,7 +870,9 @@ def __init__( elif input.name not in self.seen_nodes: input_nodes.append(None) else: - assert isinstance(input, (TensorArgument, SymIntArgument)) + assert isinstance( + input, (TensorArgument, SymIntArgument, SymBoolArgument) + ) input_nodes.append( self.parent.remap_input(self.seen_nodes[input.name]) ) @@ -975,7 +978,9 @@ def finalize_outputs(self): signature = self.module_call_graph.get(self.child_fqn) if signature is not None and self.parent is not None: for output in signature.outputs: - if isinstance(output, (TensorArgument, SymIntArgument)): + if isinstance( + output, (TensorArgument, SymIntArgument, SymBoolArgument) + ): if output.name in self.seen_nodes: orig_outputs.append(self.seen_nodes[output.name]) else: From 9919932783fc6904d97cdbb3813d53cd1f0ea9d0 Mon Sep 17 00:00:00 2001 From: Bob Ren Date: Mon, 4 Nov 2024 08:55:29 -0800 Subject: [PATCH 040/503] Specialize symfloats that flow through is_integer (#139572) Fixes `python test/dynamo/test_dynamic_shapes.py DynamicShapesFunctionTests.test_number_method_method_is_integer_num_type6_dynamic_shapes` when specialize_float = False Pull Request resolved: https://github.com/pytorch/pytorch/pull/139572 Approved by: https://github.com/ezyang ghstack dependencies: #139569, #139457, #139568 --- torch/_dynamo/utils.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/torch/_dynamo/utils.py b/torch/_dynamo/utils.py index f249483986c00e..fb838fe23ca3a7 100644 --- a/torch/_dynamo/utils.py +++ b/torch/_dynamo/utils.py @@ -2185,6 +2185,15 @@ def get_fake_value(node, tx, allow_non_graph_fake=False): # no matter it's lazy module or not, we should copy to fake mode. nnmodule = deepcopy_to_fake_tensor(nnmodule, tx.fake_mode) + if node.name in ["interpolate", "is_integer"]: + # We need to specialize symfloats for now. Eventually we should do a tensorify pass in dynamo. + args = tuple( + float(arg) + if isinstance(arg, torch.SymFloat) and arg.node.hint is not None + else arg + for arg in args + ) + try: with tx.fake_mode, enable_python_dispatcher(): ret_val = wrap_fake_exception( From a678eaf1adac8f4a529a93c33de04237a179cf19 Mon Sep 17 00:00:00 2001 From: Pian Pawakapan Date: Mon, 4 Nov 2024 23:39:46 +0000 Subject: [PATCH 041/503] check fake/real mismatches during real tensor prop (#137747) Summary: While testing exportability for PT2 Inference models, we found various cases of invalid op inputs during tracing, for example errors like: `a and b must have same reduction dim`, `expected scalar type Long but found Int`, etc. Looking more closely, these happened to due the same few meta kernels & eager kernels producing mismatched outputs upstream (e.g. different output tensor dtype, int output). Adding checks to catch mismatched outputs in real tensor prop upstream, so errors are raised at the mismatched op, instead of the downstream ops taking them as inputs. Relies a lot on utils from [CrossRefFakeMode](https://github.com/pytorch/pytorch/blob/929797dedbf23376123ce95230c01a7e3b71e130/torch/_subclasses/fake_utils.py#L78) Follow ups: could add more checks, and maybe have a flag to only enable these for cases like draft mode, so perf doesn't suffer? Test Plan: test_export, test_fake_tensor Differential Revision: D64210055 Pull Request resolved: https://github.com/pytorch/pytorch/pull/137747 Approved by: https://github.com/zou3519 --- test/export/test_export.py | 114 +++++++++++++++++++++++ test/test_fake_tensor.py | 17 +++- torch/_meta_registrations.py | 6 ++ torch/_prims_common/__init__.py | 21 +++-- torch/_subclasses/fake_tensor.py | 96 +++++++++++++++++++ torch/_subclasses/fake_utils.py | 106 +++++++++++++-------- torch/fx/experimental/symbolic_shapes.py | 6 +- 7 files changed, 313 insertions(+), 53 deletions(-) diff --git a/test/export/test_export.py b/test/export/test_export.py index 7bbb0ad3f62d16..4d1d5aabf48fdb 100755 --- a/test/export/test_export.py +++ b/test/export/test_export.py @@ -1078,6 +1078,120 @@ def forward(self, x): ep_model = export(model, (x,), strict=False).module() self.assertTrue(torch.allclose(model(x), ep_model(x))) + def test_real_tensor_size_mismatch(self): + from torch._subclasses.fake_tensor import MetadataMismatchError + + class M(torch.nn.Module): + def forward(self, a, b): + return torch.ops.mylib.foo(a, b) + + @torch.library.custom_op("mylib::foo", mutates_args={}) + def foo(a: torch.Tensor, b: torch.Tensor) -> torch.Tensor: + return a + b + + @foo.register_fake + def foo_fake_impl(a, b): + m, n = a.shape + return torch.empty(n, m) # incorrectly permute + + error_type = ( + MetadataMismatchError + if is_non_strict_test(self._testMethodName) + else torch._dynamo.exc.TorchRuntimeError + ) + with torch._functorch.config.patch(fake_tensor_propagate_real_tensors=True): + # won't catch anything if dims are equal + export( + M(), + (torch.randn(4, 4), torch.randn(4, 4)), + ) + # catch concrete inequality + with self.assertRaisesRegex( + error_type, + "Real tensor propagation found an output size mismatch between fake shape 8 and real shape 4, " + "at output index 0, dimension 0 for func: mylib.foo.default", + ): + export( + M(), + (torch.randn(4, 8), torch.randn(4, 8)), + ) + # same test with dynamic shapes + d0 = Dim("d0") + d1 = Dim("d1") + export( + M(), + (torch.randn(4, 4), torch.randn(4, 4)), + dynamic_shapes={ + "a": (d0, d1), + "b": (d0, d1), + }, + ) + with self.assertRaisesRegex( + error_type, + "Real tensor propagation found an output size mismatch between fake shape s1 and real shape 4, " + "at output index 0, dimension 0 for func: mylib.foo.default", + ): + export( + M(), + (torch.randn(4, 8), torch.randn(4, 8)), + dynamic_shapes={ + "a": (d0, d1), + "b": (d0, d1), + }, + ) + + def test_real_tensor_alias_dtype_mismatch(self): + from torch._subclasses.fake_tensor import MetadataMismatchError + + error_type = ( + MetadataMismatchError + if is_non_strict_test(self._testMethodName) + else torch._dynamo.exc.TorchRuntimeError + ) + + # test alias case + class M(torch.nn.Module): + def forward(self, a): + return torch.ops.mylib.foo_alias(a) + + @torch.library.custom_op("mylib::foo_alias", mutates_args={}) + def foo_alias(a: torch.Tensor) -> torch.Tensor: + return a * 2 + + @foo_alias.register_fake + def foo_fake_impl(a): + return a + + with torch._functorch.config.patch(fake_tensor_propagate_real_tensors=True): + with self.assertRaisesRegex( + error_type, + r"Real tensor propagation found an aliasing mismatch between fake output (.*\n)*.* " + r"and real output (.*\n)*.* for func: mylib.foo_alias.default", + ): + ep = export(M(), (torch.randn(4, 4),)) + + # test dtype case + class N(torch.nn.Module): + def forward(self, a): + return torch.ops.mylib.foo_dtype(a) + + @torch.library.custom_op("mylib::foo_dtype", mutates_args={}) + def foo_dtype(a: torch.Tensor) -> torch.Tensor: + return a * 2 + + @foo_dtype.register_fake + def foo_fake_impl(a): + m, n = a.shape + return torch.empty([m, n], dtype=torch.int32) + + with torch._functorch.config.patch(fake_tensor_propagate_real_tensors=True): + with self.assertRaisesRegex( + error_type, + r"Real tensor propagation found a metadata mismatch between fake tensor (.*\n)*.* " + r"and real tensor (.*\n)*.* at output index 0, for func: mylib.foo_dtype.default", + ): + ep = export(N(), (torch.randn(4, 4),)) + def test_real_tensor_for_max_op(self): class Foo(torch.nn.Module): def forward(self, x, y): diff --git a/test/test_fake_tensor.py b/test/test_fake_tensor.py index e798682fd39489..e0e76419c9fd31 100644 --- a/test/test_fake_tensor.py +++ b/test/test_fake_tensor.py @@ -28,6 +28,7 @@ _CacheKeyState, DynamicOutputShapeException, extract_tensor_metadata, + MetadataMismatchError, FakeTensor, FakeTensorConverter, FakeTensorMode, @@ -1377,14 +1378,20 @@ def forward(self, arg1, arg2, arg3): try: with torch._subclasses.CrossRefFakeMode(): Repro()(*args) - except RuntimeError as e: + except MetadataMismatchError as e: # We expect the cross ref to succed for the first output to fail # for the rng state, see Note [Seed and Offset] self.assertTrue("output[0]" not in str(e)) - self.assertTrue( - "found mismatched tensor metadata for output[6]: Devices cpu and cuda:0 are not equal!" - in str(e) - ) + if self.__class__.__name__.startswith("PropagateRealTensors"): + self.assertTrue( + "Real tensor propagation found a metadata mismatch" + in str(e) + ) + else: + self.assertTrue( + "found mismatched tensor metadata for output" + in str(e) + ) # IMPORTANT!!! Always run even if CUDA is not available def test_fake_gpu_no_init(self): diff --git a/torch/_meta_registrations.py b/torch/_meta_registrations.py index 0da6b58bdb413f..35264096aa4372 100644 --- a/torch/_meta_registrations.py +++ b/torch/_meta_registrations.py @@ -2131,6 +2131,12 @@ def _compute_reduction_shape(self, dims, keepdim): def device_hint(tensor) -> "str": if isinstance(tensor, torch._subclasses.FakeTensor): return tensor.fake_device.type + elif ( + hasattr(tensor, "device") + and hasattr(tensor.device, "type") + and tensor.device.type != "meta" + ): + return tensor.device.type else: return "cuda" # default to cuda diff --git a/torch/_prims_common/__init__.py b/torch/_prims_common/__init__.py index 61d0ba13b88f15..29f3dacafaa9a2 100644 --- a/torch/_prims_common/__init__.py +++ b/torch/_prims_common/__init__.py @@ -136,6 +136,7 @@ def _maybe_get_pytype(t): def compare_tensor_meta( a: TensorLikeType, b: TensorLikeType, + check_sizes=True, check_strides=False, *, allow_rhs_unbacked=False, @@ -148,16 +149,20 @@ def compare_tensor_meta( In the future this will validate additional metadata, like strides. """ + from torch._subclasses.fake_tensor import MetadataMismatchError + assert isinstance(a, TensorLike) assert isinstance(b, TensorLike) - if not same_shape(a.shape, b.shape, allow_rhs_unbacked=allow_rhs_unbacked): + if check_sizes and not same_shape( + a.shape, b.shape, allow_rhs_unbacked=allow_rhs_unbacked + ): msg = f"Shapes {a.shape} and {b.shape} are not equal!" - raise AssertionError(msg) + raise MetadataMismatchError(msg) if a.dtype != b.dtype: msg = f"Dtypes {a.dtype} and {b.dtype} are not equal!" - raise AssertionError(msg) + raise MetadataMismatchError(msg) if a.device != b.device: # Handles special cuda:0 vs cuda case @@ -168,27 +173,27 @@ def compare_tensor_meta( pass else: msg = f"Devices {a.device} and {b.device} are not equal!" - raise AssertionError(msg) + raise MetadataMismatchError(msg) # Stride checking is currently disabled, see https://github.com/pytorch/pytorch/issues/78050 if check_strides: same_strides, idx = check_significant_strides(a, b) if not same_strides: msg = f"Stride mismatch! Strides are {a.stride()} and {b.stride()} (mismatched at {idx})!" - raise RuntimeError(msg) + raise MetadataMismatchError(msg) if a.storage_offset() != b.storage_offset(): msg = f"Storage offset mismatch! Storage offsets are {a.storage_offset()} and {b.storage_offset()}!" - raise RuntimeError(msg) + raise MetadataMismatchError(msg) if check_conj: if a.is_conj() != b.is_conj(): - raise RuntimeError( + raise MetadataMismatchError( f"Conj mismatch! is_conj is set to {a.is_conj()} and {b.is_conj()}" ) if a.is_neg() != b.is_neg(): - raise RuntimeError( + raise MetadataMismatchError( f"Neg mismatch! is_neg is set to {a.is_neg()} and {b.is_neg()}" ) diff --git a/torch/_subclasses/fake_tensor.py b/torch/_subclasses/fake_tensor.py index 610041d15eb879..9fbd52ccf2cef9 100644 --- a/torch/_subclasses/fake_tensor.py +++ b/torch/_subclasses/fake_tensor.py @@ -140,6 +140,11 @@ class UnsupportedOperatorException(RuntimeError): func: OpOverload +@dataclass +class MetadataMismatchError(RuntimeError): + reason: str + + def ordered_set(*items: T) -> Dict[T, Literal[True]]: return dict.fromkeys(items, True) @@ -2031,6 +2036,11 @@ def maybe_to_real_tensor( def maybe_propagate_real_tensors(fake_out: T) -> T: import sympy + from torch._subclasses.fake_utils import ( + _check_alias_info, + _check_fake_real_tensors, + ) + log.debug("maybe_propagate_real_tensors %s", func) def go(t: object, real_t: Tensor) -> None: @@ -2057,6 +2067,33 @@ def go(t: object, real_t: Tensor) -> None: assert self.shape_env is not None self.shape_env.set_unbacked_var_to_val(s, int(real_t)) + def _check_fake_real_vals(fake: Any, real: Any) -> None: + # use real values + ShapeEnv to check mismatches between potentially symbolic values + if isinstance(fake, (SymInt, SymFloat)): + # symbolic expression, ask ShapeEnv to substitute known backed/unbacked values + assert self.shape_env is not None + if ( + not fake.node.expr.free_symbols + - self.shape_env.var_to_val.keys() + - self.shape_env.unbacked_var_to_val.keys() + ): + if ( + self.shape_env._maybe_evaluate_static( + sympy.Eq(fake.node.expr, real), compute_hint=True + ) + is not sympy.S.true + ): + raise MetadataMismatchError( + f"mismatch between fake value {fake} and real value {real} " + ) + elif isinstance( + fake, (int, float, bool) + ): # concrete value, check direct equality + if fake != real: + raise MetadataMismatchError( + f"mismatch between fake value {fake} and real value {real} " + ) + if real_out is not nil: if ( not isinstance(fake_out, Tensor) @@ -2073,6 +2110,65 @@ def go(t: object, real_t: Tensor) -> None: else: tree_map_(go, fake_out, real_out) + # check fake/real alias info + try: + _check_alias_info( + "Real tensor propagation found", + real_out, + (real_args, real_kwargs), + fake_out, + (args, kwargs), + ) + except MetadataMismatchError as exc: + raise MetadataMismatchError( + f"Real tensor propagation found an aliasing mismatch between " + f"fake output {fake_out} and real output {real_out}, " + f" for func: {func}" + ) from exc + + # check fake/real tensor properies, sizes & output values + for i, (_real_out, _fake_out) in enumerate( + zip(pytree.tree_leaves(real_out), pytree.tree_leaves(fake_out)) + ): + if isinstance(_fake_out, torch.Tensor): + try: + _check_fake_real_tensors( + _fake_out, + _real_out, + context="Real tensor propagation found", + sizes=False, # manual check below + strides=False, # skip strides + storage_offset=True, + requires_grad=False, # issues with FakeTensorConverter preserving requires_grad + ) + except MetadataMismatchError as exc: + raise MetadataMismatchError( + f"Real tensor propagation found a metadata mismatch between " + f"fake tensor {_fake_out} and real tensor {_real_out}, " + f" at output index {i}, for func: {func}" + ) from exc + + for j, (s_fake, s_real) in enumerate( + zip(_fake_out.size(), _real_out.size()) + ): + try: + _check_fake_real_vals(s_fake, s_real) + except MetadataMismatchError as exc: + raise MetadataMismatchError( + f"Real tensor propagation found an output size mismatch between " + f"fake shape {s_fake} and real shape {s_real}, at output " + f"index {i}, dimension {j} for func: {func}" + ) from exc + else: + try: + _check_fake_real_vals(_fake_out, _real_out) + except MetadataMismatchError as exc: + raise MetadataMismatchError( + f"Real tensor propagation found an output value mismatch between " + f"fake output value {_fake_out} and real output value {_real_out}, " + f" at output index {i}, for func: {func}" + ) from exc + # If a data-dependent op is used in a decomposition, we # may need to get the unbacked settings "early" # TODO: Is this really needed? diff --git a/torch/_subclasses/fake_utils.py b/torch/_subclasses/fake_utils.py index abc66255e4e74a..9cf5777551ff50 100644 --- a/torch/_subclasses/fake_utils.py +++ b/torch/_subclasses/fake_utils.py @@ -10,6 +10,7 @@ from torch._subclasses.fake_tensor import ( FakeTensor, FakeTensorMode, + MetadataMismatchError, tree_flatten_only, UnsupportedFakeTensorException, ) @@ -48,6 +49,30 @@ def output_alias_each_other(outputs): return False +def _check_alias_info(context, real_out, real_in, fake_out, fake_in): + r_aliasing = outputs_alias_inputs(real_out, real_in) + f_aliasing = outputs_alias_inputs(fake_out, fake_in) + if r_aliasing != f_aliasing: + raise MetadataMismatchError( + f"{context} mismatch in outputs_alias_inputs check {f_aliasing} != {r_aliasing}" + ) + + r_identity_eq = outputs_are_inputs(real_out, real_in) + f_identity_eq = outputs_are_inputs(fake_out, fake_in) + if r_identity_eq != f_identity_eq: + raise MetadataMismatchError( + f"{context} mismatch in outputs_are_inputs check {f_identity_eq} != {r_identity_eq}" + ) + + r_output_alias_each_other = output_alias_each_other(real_out) + f_output_alias_each_other = output_alias_each_other(fake_out) + if r_output_alias_each_other != f_output_alias_each_other: + raise MetadataMismatchError( + f"{context} mismatch in outputs_alias_each_other check " + f"{f_output_alias_each_other} != {r_output_alias_each_other}" + ) + + def is_sdpa_error(func, idx, e): if ( ( @@ -144,6 +169,39 @@ def map_symint(s): return out +def _check_fake_real_tensors( + real_out: torch.Tensor, + fake_out: FakeTensor, + context="", + sizes=True, + strides=False, + storage_offset=True, + requires_grad=True, +): + if requires_grad: + if real_out.requires_grad != fake_out.requires_grad: + raise MetadataMismatchError( + f"{context} mismatched requires_grad-ness of outputs. " + f"This usually means that you have added autograd support " + f"for your operator at a dispatch key other than Autograd, " + f"which will lead to problems" + ) + + if torch._C._has_storage(real_out): + r_offset = real_out.storage_offset() + f_offset = fake_out.storage_offset() + if r_offset != f_offset: + raise MetadataMismatchError(f"{context} mismatched storage offset") + + torch._prims.utils.compare_tensor_meta( + real_out, + fake_out, + check_sizes=sizes, + check_strides=strides, + allow_rhs_unbacked=True, + ) + + class CrossRefFakeMode(TorchDispatchMode): def __init__( self, @@ -213,52 +271,26 @@ def __torch_dispatch__(self, func, types, args=(), kwargs=None): ), f"{context} mismatch in number of returns {len(f_flat)} != {len(r_flat)}" if self.check_aliasing: - r_aliasing = outputs_alias_inputs(r, (args, kwargs)) - f_aliasing = outputs_alias_inputs(fake_r, (fake_args, fake_kwargs)) - assert ( - r_aliasing == f_aliasing - ), f"{context} mismatch in outputs_alias_inputs check {f_aliasing} != {r_aliasing}" - - r_identity_eq = outputs_are_inputs(r, (args, kwargs)) - f_identity_eq = outputs_are_inputs(fake_r, (fake_args, fake_kwargs)) - assert ( - r_identity_eq == f_identity_eq - ), f"{context} mismatch in outputs_are_inputs check {f_identity_eq} != {r_identity_eq}" - - r_output_alias_each_other = output_alias_each_other(r) - f_output_alias_each_other = output_alias_each_other(fake_r) - assert r_output_alias_each_other == f_output_alias_each_other, ( - f"{context} mismatch in outputs_alias_each_other check " - f"{f_output_alias_each_other} != {r_output_alias_each_other}" + _check_alias_info( + context, r, (args, kwargs), fake_r, (fake_args, fake_kwargs) ) - for idx, (r_out, fake_out) in enumerate( + for idx, (r_out, f_out) in enumerate( zip(pytree.tree_leaves(r), pytree.tree_leaves(fake_r)) ): r_is_ten = isinstance(r_out, torch.Tensor) assert r_is_ten == isinstance( - fake_out, torch.Tensor + f_out, torch.Tensor ), f"{context} mismatched number of tensor outputs" if r_is_ten: - assert r_out.requires_grad == fake_out.requires_grad, ( - f"{context} mismatched requires_grad-ness of outputs. " - f"This usually means that you have added autograd support " - f"for your operator at a dispatch key other than Autograd, " - f"which will lead to problems" - ) - if torch._C._has_storage(r_out): - r_offset = r_out.storage_offset() - f_offset = fake_out.storage_offset() - assert ( - r_offset == f_offset - ), f"{context} mismatched storage offset" - try: - torch._prims.utils.compare_tensor_meta( + _check_fake_real_tensors( r_out, - fake_out, - check_strides=self.check_strides, - allow_rhs_unbacked=True, + f_out, + sizes=True, + strides=self.check_strides, + storage_offset=True, + requires_grad=True, ) except Exception as e: if is_sdpa_error(func, idx, e): @@ -268,5 +300,5 @@ def __torch_dispatch__(self, func, types, args=(), kwargs=None): if len(r_flat) == 1 else f"{context} mismatched tensor metadata for output[{idx}]: {e}" ) - raise RuntimeError(error_message) from e + raise MetadataMismatchError(error_message) from e return r diff --git a/torch/fx/experimental/symbolic_shapes.py b/torch/fx/experimental/symbolic_shapes.py index 753a74fc297be6..fb7028fb715b5f 100644 --- a/torch/fx/experimental/symbolic_shapes.py +++ b/torch/fx/experimental/symbolic_shapes.py @@ -5363,8 +5363,8 @@ def _maybe_evaluate_static( could then potentially guard on. Use compute_hint == True if you are trying to compute a non-binding - hint for the particular hint values of backed SymInts, e.g., if - s0 happens to be 3 this run, compute_hint will subsitute s0 with 3. + hint for the particular hint values of backed and unbacked SymInts, + e.g., if s0 happens to be 3 this run, compute_hint will subsitute s0 with 3. """ # axioms with compute hint NYE @@ -5373,7 +5373,7 @@ def _maybe_evaluate_static( expr = self.simplify(expr) if compute_hint: - expr = expr.xreplace(self.var_to_val) + expr = expr.xreplace(self.var_to_val).xreplace(self.unbacked_var_to_val) expr = canonicalize_bool_expr(expr) From 1565eba4b44d4fe921dc7715a080fe5dac19478a Mon Sep 17 00:00:00 2001 From: Eddie Yan Date: Mon, 4 Nov 2024 23:49:07 +0000 Subject: [PATCH 042/503] [cuDNN][SDPA] Match `query`'s memory layout ordering for `output` in cuDNN SDPA (#138354) For #138340 ~~We might consider more sophisticated logic here but the corresponding logic in other backends doesn't seem to do anything fancy for non BSHD/BHSD cases https://github.com/pytorch/pytorch/blob/ea8ea2f33fc65b33dc562f4b0430f8c79eb81d8d/aten/src/ATen/native/transformers/cuda/attention.cu#L1145~~ ended up going with a more general approach to much more or less arbitrary layouts Pull Request resolved: https://github.com/pytorch/pytorch/pull/138354 Approved by: https://github.com/drisspg --- aten/src/ATen/native/cudnn/MHA.cpp | 109 ++++++++++++++---- .../native/transformers/cuda/sdp_utils.cpp | 11 +- test/test_transformers.py | 36 +++++- 3 files changed, 131 insertions(+), 25 deletions(-) diff --git a/aten/src/ATen/native/cudnn/MHA.cpp b/aten/src/ATen/native/cudnn/MHA.cpp index c70a96f937cb81..7350d97b093877 100644 --- a/aten/src/ATen/native/cudnn/MHA.cpp +++ b/aten/src/ATen/native/cudnn/MHA.cpp @@ -292,6 +292,88 @@ auto fixSizeOneDimStrideSDPA( } return strides; } + +void alloc_with_matching_layout( + const Tensor& q, + Tensor& output, + const std::vector& shape) { + TORCH_INTERNAL_ASSERT( + shape.size() == q.sizes().size(), + "cuDNN SDPA alloc_with_matching_layout got requested shape ndim != q ndim"); + + if (std::equal(q.sizes().begin(), q.sizes().end(), shape.begin())) { + output = at::empty_like(q); + return; + } + + // get the "fill order," which is just an argsort on the strides + std::vector fill_order(shape.size()); + std::iota(fill_order.begin(), fill_order.end(), 0); + const auto q_strides = q.strides(); + std::stable_sort( + fill_order.begin(), fill_order.end(), [&q_strides](int idx1, int idx2) { + return q_strides[idx1] < q_strides[idx2]; + }); + std::vector ordered_strides(shape.size()); + int64_t current_stride = 1; + for (const int dim_idx : fill_order) { + ordered_strides[dim_idx] = current_stride; + current_stride *= shape[dim_idx]; + } + output = at::empty(at::IntArrayRef(shape), q.options()) + .as_strided( + at::IntArrayRef(shape), at::IntArrayRef(ordered_strides), 0); +} + +void permute_to_matching_layout(const Tensor& output, Tensor& grad_output) { + const int dims = output.sizes().size(); + std::vector outer_to_inner(dims); + std::iota(outer_to_inner.begin(), outer_to_inner.end(), 0); + const auto o_strides = output.strides(); + std::stable_sort( + outer_to_inner.begin(), + outer_to_inner.end(), + [&o_strides](int idx1, int idx2) { + return o_strides[idx1] > o_strides[idx2]; + }); + std::vector inverse(dims); + for (int d = 0; d < dims; d++) { + inverse[d] = std::find(outer_to_inner.begin(), outer_to_inner.end(), d) - + outer_to_inner.begin(); + } + grad_output = grad_output.permute(at::IntArrayRef(outer_to_inner)) + .contiguous() + .permute(at::IntArrayRef(inverse)); +} + +bool same_strides(const Tensor& t1, const Tensor& t2) { + std::vector t1_strides_no_ones; + std::vector t2_strides_no_ones; + const auto t1strides = t1.strides(); + const auto t2strides = t2.strides(); + const int dim = t1strides.size(); + if (dim != (int)t2strides.size()) { + return false; + } + const auto t1sizes = t1.sizes(); + const auto t2sizes = t2.sizes(); + + // we are going through strides backward here, but if both are backward it's + // comparable + for (int i = 0; i < dim; i++) { + if (t1sizes[i] > 1) { + t1_strides_no_ones.push_back(t1strides[i]); + } + if (t2sizes[i] > 1) { + t2_strides_no_ones.push_back(t2strides[i]); + } + } + return std::equal( + t1_strides_no_ones.begin(), + t1_strides_no_ones.end(), + t2_strides_no_ones.begin(), + t2_strides_no_ones.end()); +} } // namespace auto build_graph_and_tensors( @@ -553,7 +635,8 @@ void run_cudnn_SDP_fprop( Tensor& dropoutoffset) { cudnnHandle_t handle = getCudnnHandle(); if (!o.defined()) { - o = at::empty({b, h, s_q, d_v}, q.options()); + // q is passed to us in BHSD dim order + alloc_with_matching_layout(q, o, {b, h, s_q, d_v}); } if (return_softmaxstats && !softmaxstats.defined()) { @@ -660,30 +743,14 @@ void run_cudnn_SDP_bprop( } Tensor dO_ = dO; - if (!dO.strides()[dO.strides().size() - 1]) { - TORCH_WARN( - "cuDNN SDPA backward got an innermost stride of 0 in grad_out, which is unsupported." - " Materializing a contiguous tensor which will increase memory usage..."); - dO_ = dO.contiguous(); - } - if ( // handle trivial transposed case with a transposed dim of size 1 - // see also: https://github.com/pytorch/pytorch/issues/134001 - !(dO_.is_contiguous() && o.is_contiguous()) && - !std::equal( - o.strides().begin(), o.strides().end(), dO.strides().begin())) { - TORCH_WARN( + if (!same_strides(o, dO)) { + TORCH_WARN_ONCE( "cuDNN SDPA backward got grad_output.strides() != output.strides(), " "attempting to materialize a grad_output with matching strides..."); - if (o.is_contiguous()) { - dO_ = dO.contiguous(); - } else { - dO_ = dO.transpose(1, 2).contiguous().transpose(1, 2); - } + permute_to_matching_layout(o, dO_); } TORCH_INTERNAL_ASSERT( - (dO_.is_contiguous() && o.is_contiguous()) || - std::equal( - dO_.strides().begin(), dO_.strides().end(), o.strides().begin()), + same_strides(o, dO_), "cuDNN SDPA expected grad_output.strides() == output.strides(), " "the previous step probably failed to materialize a grad_output " "with matching strides..."); diff --git a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp index dae465c6d04313..615e36bfc351d6 100644 --- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp +++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp @@ -56,12 +56,21 @@ namespace { // TODO(eqy): more benchmarking to determine whether this should include sm86/89 // Needs to be kept in-sync with test_fused_chocie in test_transformers.py bool check_prefer_cudnn_attention() { -#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 90000 + // TODO(eqy): Re-enable by default after upgrading to a release later than 9.5.0 + // see context: https://github.com/pytorch/pytorch/issues/138340 + // return false; +#if defined(CUDNN_VERSION) + +#if CUDNN_VERSION > 90000 auto dprops = at::cuda::getCurrentDeviceProperties(); return dprops->major >= 9; #else return false; #endif + +#else + return false; +#endif } // flash_attention V2 is universally faster than efficient_attention and Math diff --git a/test/test_transformers.py b/test/test_transformers.py index 5183e77931c83c..16756f96812b8b 100644 --- a/test/test_transformers.py +++ b/test/test_transformers.py @@ -2529,9 +2529,9 @@ def test_cudnn_attention_trivial_output_transpose(self, device): def test_cudnn_attention_nonmodulo64seqlen(self, device): # see also: https://github.com/pytorch/pytorch/issues/137347 mask = torch.randint(0, 2, (2, 1, 157, 6404)).to(device="cuda", dtype=torch.bool) - q = torch.randn(2, 32, 157, 128, device='cuda', dtype=torch.bfloat16, requires_grad=True) - k = torch.randn(2, 32, 6404, 128, device='cuda', dtype=torch.bfloat16, requires_grad=True) - v = torch.randn(2, 32, 6404, 128, device='cuda', dtype=torch.bfloat16, requires_grad=True) + q = torch.randn(2, 32, 157, 128, device='cuda', dtype=torch.float16, requires_grad=True) + k = torch.randn(2, 32, 6404, 128, device='cuda', dtype=torch.float16, requires_grad=True) + v = torch.randn(2, 32, 6404, 128, device='cuda', dtype=torch.float16, requires_grad=True) q_cpu = q.detach().clone().cpu() k_cpu = k.detach().clone().cpu() v_cpu = v.detach().clone().cpu() @@ -2564,6 +2564,36 @@ def test_cudnn_attention_nonmodulo64seqlen(self, device): torch.testing.assert_close(k.grad, k_cpu.grad.cuda(), atol=3e-3, rtol=2e-3) torch.testing.assert_close(v.grad, v_cpu.grad.cuda(), atol=3e-3, rtol=2e-3) + @skipIfRocm + @unittest.skipIf(not PLATFORM_SUPPORTS_CUDNN_ATTENTION, "cudnn Attention is not supported on this system") + def test_cudnn_attention_preserves_query_layout(self, device): + + def test_attention(backend: SDPBackend, permute_order: List[List[int]]): + BHSqD = [4, 16, 256, 64] + BHSkvD = [4, 16, 512, 64] + + shape_q = [BHSqD[idx] for idx in permute_order] + shape_kv = [BHSkvD[idx] for idx in permute_order] + reverse = [permute_order.index(idx) for idx in range(4)] + q = torch.randn(*shape_q, dtype=torch.bfloat16, device='cuda', requires_grad=True).permute(reverse) + k = torch.randn(*shape_kv, dtype=torch.bfloat16, device='cuda', requires_grad=True).permute(reverse) + v = torch.randn(*shape_kv, dtype=torch.bfloat16, device='cuda', requires_grad=True).permute(reverse) + self.assertEqual(q.shape, BHSqD) + self.assertEqual(k.shape, BHSkvD) + self.assertEqual(v.shape, BHSkvD) + + with sdpa_kernel(backend): + out = F.scaled_dot_product_attention(q, k, v) + self.assertTrue(out.permute(permute_order).is_contiguous()) + out.sum().backward() + + permute_orders = list() + permutable = [0, 1, 2] + permute_orders = itertools.permutations(permutable) + + for permute_order in permute_orders: + test_attention(SDPBackend.CUDNN_ATTENTION, list(permute_order) + [3]) + @unittest.skipIf(not PLATFORM_SUPPORTS_MEM_EFF_ATTENTION, "Fused SDPA was not built for this system") @parametrize("mask_dim", [1, 2, 3, 4]) def test_mem_efficient_attention_mask_variants(self, device, mask_dim: List[int]): From e947649e8f19dd84048a6145704a4ed47deca0de Mon Sep 17 00:00:00 2001 From: Mikayla Gawarecki Date: Fri, 1 Nov 2024 18:58:05 -0700 Subject: [PATCH 043/503] [BE] Change _marked_safe_globals_list to set (#139303) Prevent same global from being added multiple times Pull Request resolved: https://github.com/pytorch/pytorch/pull/139303 Approved by: https://github.com/janeyx99 ghstack dependencies: #138936, #139221, #139433, #139541, #137602 --- torch/_weights_only_unpickler.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/torch/_weights_only_unpickler.py b/torch/_weights_only_unpickler.py index a2d83425d2be60..9567b6d6129030 100644 --- a/torch/_weights_only_unpickler.py +++ b/torch/_weights_only_unpickler.py @@ -83,29 +83,27 @@ "nt", ] -_marked_safe_globals_list: List[Any] = [] +_marked_safe_globals_set: Set[Any] = set() def _add_safe_globals(safe_globals: List[Any]): - global _marked_safe_globals_list - _marked_safe_globals_list += safe_globals + global _marked_safe_globals_set + _marked_safe_globals_set = _marked_safe_globals_set.union(set(safe_globals)) def _get_safe_globals() -> List[Any]: - global _marked_safe_globals_list - return _marked_safe_globals_list + global _marked_safe_globals_set + return list(_marked_safe_globals_set) def _clear_safe_globals(): - global _marked_safe_globals_list - _marked_safe_globals_list = [] + global _marked_safe_globals_set + _marked_safe_globals_set = set() def _remove_safe_globals(globals_to_remove: List[Any]): - global _marked_safe_globals_list - _marked_safe_globals_list = list( - set(_marked_safe_globals_list) - set(globals_to_remove) - ) + global _marked_safe_globals_set + _marked_safe_globals_set = _marked_safe_globals_set - set(globals_to_remove) class _safe_globals: @@ -128,7 +126,7 @@ def __exit__(self, type, value, tb): # _get_allowed_globals due to the lru_cache def _get_user_allowed_globals(): rc: Dict[str, Any] = {} - for f in _marked_safe_globals_list: + for f in _marked_safe_globals_set: module, name = f.__module__, f.__name__ rc[f"{module}.{name}"] = f return rc From 3f248a57353288ac4df3a445ffa3ae0f952a6d33 Mon Sep 17 00:00:00 2001 From: Laith Sakka Date: Mon, 4 Nov 2024 23:56:12 +0000 Subject: [PATCH 044/503] Classify miss-inplaced tensors in logs. (#139240) Summary: use signpost logs, a followup is to remove the field possibly_missed_reinplacing_opportunities form dynamo compile table. Differential Revision: D65180194 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139240 Approved by: https://github.com/zou3519 --- test/inductor/test_inplacing_pass.py | 18 ++++----- torch/_dynamo/convert_frame.py | 29 ++------------ torch/_dynamo/utils.py | 52 +++++++++++++++++++++++++- torch/_inductor/fx_passes/reinplace.py | 37 +++++++++++------- 4 files changed, 86 insertions(+), 50 deletions(-) diff --git a/test/inductor/test_inplacing_pass.py b/test/inductor/test_inplacing_pass.py index 4c3e57d9be5144..ed09e81af48338 100644 --- a/test/inductor/test_inplacing_pass.py +++ b/test/inductor/test_inplacing_pass.py @@ -6,7 +6,7 @@ import torch._inductor.config as inductor_config from functorch import make_fx from torch import Tensor -from torch._dynamo.utils import counters +from torch._dynamo.utils import ReinplaceCounters from torch._higher_order_ops.auto_functionalize import ( auto_functionalized, auto_functionalized_v2, @@ -31,11 +31,11 @@ def num_reinplacing_failures(): - return counters["inductor"]["possibly_missed_reinplacing_opportunities"] + return ReinplaceCounters.get_total_missed() def miss_inplaced_bytes(): - return counters["inductor"]["possibly_missed_reinplacing_bytes"] + return ReinplaceCounters.get_total_missed_bytes() @torch.library.custom_op("_reinplacing::sin", mutates_args={"result"}) @@ -85,7 +85,7 @@ def boo(x: torch.Tensor) -> None: class TestReinplacingPassCorrectness(InductorTestCase): def setUp(self): - counters.clear() + ReinplaceCounters.clear() return super().setUp() def _test(self, f): @@ -138,7 +138,7 @@ def f(x, y): self._test(f) def test_counters_functionalize_old(self): - counters.clear() + ReinplaceCounters.clear() def f(x): out = torch.empty_like(x) @@ -158,7 +158,7 @@ def f(x): self.assertEqual(miss_inplaced_bytes(), 12) def test_counters_functionalize_v2(self): - counters.clear() + ReinplaceCounters.clear() def f(x): out = torch.empty_like(x) @@ -314,7 +314,7 @@ def test_multi_output_intermediate(self): with inductor_config.patch( {"enable_auto_functionalized_v2": enable_v2} ): - counters.clear() + ReinplaceCounters.clear() def f(x): out1 = torch.empty_like(x) @@ -329,7 +329,7 @@ def f(x): self.assertEqual(num_reinplacing_failures(), 0) def test_multiple_mutations(self): - counters.clear() + ReinplaceCounters.clear() def f(x, out): sin(x, out) @@ -345,7 +345,7 @@ def f(x, out): self.assertEqual(num_reinplacing_failures(), 0) def test_multiple_intermediate(self): - counters.clear() + ReinplaceCounters.clear() def f(x): out = torch.empty_like(x) diff --git a/torch/_dynamo/convert_frame.py b/torch/_dynamo/convert_frame.py index 847cbf7e7253df..6388adc0f4e0b0 100644 --- a/torch/_dynamo/convert_frame.py +++ b/torch/_dynamo/convert_frame.py @@ -971,12 +971,7 @@ def format_guard_failures() -> str: fail_reason: Optional[str] = None fail_user_frame_filename: Optional[str] = None fail_user_frame_lineno: Optional[int] = None - start_possibly_missed_reinplacing_opportunities = torch._dynamo.utils.counters[ - "inductor" - ]["possibly_missed_reinplacing_opportunities"] - start_possibly_missed_reinplacing_bytes = torch._dynamo.utils.counters[ - "inductor" - ]["start_possibly_missed_reinplacing_bytes"] + torch._dynamo.utils.ReinplaceCounters.clear() guarded_code = None try: guarded_code = compile_inner(code, one_graph, hooks, transform) @@ -1054,33 +1049,17 @@ def format_guard_failures() -> str: compliant_custom_ops = { op.__qualname__ for op in output.compliant_custom_ops } - possibly_missed_reinplacing_opportunities = ( - torch._dynamo.utils.counters["inductor"][ - "possibly_missed_reinplacing_opportunities" - ] - - start_possibly_missed_reinplacing_opportunities - ) remote_cache_time_saved = frame_phase_timing[frame_key].get( "remote_cache_time_saved", 0 ) - possibly_missed_reinplacing_bytes = ( - torch._dynamo.utils.counters["inductor"][ - "possibly_missed_reinplacing_bytes" - ] - - start_possibly_missed_reinplacing_bytes - ) - if possibly_missed_reinplacing_bytes != 0: - signpost_event( - "inductor", - "auto_functionalize", - {"missed_reinplacing_bytes": possibly_missed_reinplacing_bytes}, - ) remote_fx_graph_cache_get_time = frame_phase_timing[frame_key].get( "remote_fx_graph_cache_get", None ) remote_fx_graph_cache_put_time = frame_phase_timing[frame_key].get( "remote_fx_graph_cache_put", None ) + torch._dynamo.utils.ReinplaceCounters.log() + else: guard_count = None shape_env_guard_count = None @@ -1096,7 +1075,6 @@ def format_guard_failures() -> str: restart_reasons = set() # If compilation failed, the entire time is wasted dynamo_time_before_restart = duration_ns / 1e9 - possibly_missed_reinplacing_opportunities = None remote_cache_time_saved = None remote_fx_graph_cache_get_time = None remote_fx_graph_cache_put_time = None @@ -1161,7 +1139,6 @@ def clean_for_json(d: Dict[str, Any]) -> Dict[str, Any]: restart_reasons, dynamo_time_before_restart, guarded_code is not None, - possibly_missed_reinplacing_opportunities, remote_cache_time_saved, structured_logging_overhead_s, config.suppress_errors, diff --git a/torch/_dynamo/utils.py b/torch/_dynamo/utils.py index fb838fe23ca3a7..35db326f3a9d47 100644 --- a/torch/_dynamo/utils.py +++ b/torch/_dynamo/utils.py @@ -73,7 +73,11 @@ from torch._dispatch.python import enable_python_dispatcher from torch._guards import Source, TracingContext from torch._subclasses.meta_utils import is_sparse_compressed -from torch._utils_internal import log_chromium_event_internal, log_compilation_event +from torch._utils_internal import ( + log_chromium_event_internal, + log_compilation_event, + signpost_event, +) from torch.fx._utils import _format_graph_code, lazy_format_graph_code from torch.nn.modules.lazy import LazyModuleMixin from torch.utils._triton import has_triton, has_triton_package @@ -143,6 +147,51 @@ timer_counter = itertools.count() +# Abstraction on top of counters. +class ReInplaceTrigger(enum.Enum): + AUTO_FUNC_V1 = 1 + AUTO_FUNC_V2 = 2 + TRITON_OPS = 3 + + +class ReinplaceCounters: + _values: DefaultDict[str, int] = collections.defaultdict(int) + + # Track sizes of known not re-inplaced tensors (exclude dynamic shapes). + @classmethod + def add_missed_bytes(cls, trigger: ReInplaceTrigger, bytes: int): + cls._values[f"missed_bytes_{trigger.name}"] += bytes + + # Track number of not re-inplaced tensors. + @classmethod + def add_missed_opportunities(cls, trigger: ReInplaceTrigger, count: int): + cls._values[f"missed_tensors_{trigger}"] += count + + @classmethod + def clear(cls): + cls._values.clear() + + @classmethod + def get_total_missed(cls): + sum = 0 + for trigger in ReInplaceTrigger: + sum += cls._values.get(f"missed_tensors_{trigger}", 0) + return sum + + @classmethod + def get_total_missed_bytes(cls): + sum = 0 + for trigger in ReInplaceTrigger: + sum += cls._values.get(f"missed_bytes_{trigger.name}", 0) + return sum + + @classmethod + def log(cls): + # if not empty log. + if cls._values: + signpost_event("inductor", "reinplace_counters", cls._values) + + def tabulate( rows: Union[List[Tuple[str, object]], List[List[object]]], headers: Union[Tuple[str, ...], List[str]], @@ -843,7 +892,6 @@ class CompilationMetrics: # to install any guarded code. True means we actually decided to install # a compiled frame has_guarded_code: Optional[bool] = None - possibly_missed_reinplacing_opportunities: Optional[int] = None remote_cache_time_saved_s: Optional[float] = None structured_logging_overhead_s: Optional[float] = None config_suppress_errors: Optional[bool] = None diff --git a/torch/_inductor/fx_passes/reinplace.py b/torch/_inductor/fx_passes/reinplace.py index 8a7f06ed2a4b75..16b257fdf32e0b 100644 --- a/torch/_inductor/fx_passes/reinplace.py +++ b/torch/_inductor/fx_passes/reinplace.py @@ -8,6 +8,7 @@ import torch from torch._dispatch.python import enable_python_dispatcher +from torch._dynamo.utils import ReinplaceCounters, ReInplaceTrigger from torch._higher_order_ops.triton_kernel_wrap import ( kernel_side_table, triton_kernel_wrapper_functional, @@ -497,7 +498,12 @@ def can_inplace(node, mutated_arg): ) def log_inplace_results( - node_name, old_tensors_to_clone, tensors_to_clone, missed_args, missed_nodes + node_name, + old_tensors_to_clone, + tensors_to_clone, + missed_args, + missed_nodes, + trigger, ): # Total size of possibly_missed_reinplacing_opportunities for tensors with static shapes. missed_bytes = 0 @@ -531,17 +537,14 @@ def bytes(node): missed_args, missed_bytes, ) - torch._dynamo.utils.counters["inductor"][ - "possibly_missed_reinplacing_opportunities" - ] += len(missed_args) - torch._dynamo.utils.counters["inductor"][ - "possibly_missed_reinplacing_bytes" - ] += missed_bytes + + ReinplaceCounters.add_missed_opportunities(trigger, len(missed_args)) + ReinplaceCounters.add_missed_bytes(trigger, missed_bytes) replace_dict: Dict[torch.fx.Node, torch.fx.Node] = {} def reinplace_and_refine_tensors_to_clone( - old_tensors_to_clone, kwargs, node_name, auto_functionalize_v2=False + old_tensors_to_clone, kwargs, node_name, trigger ): tensors_to_clone: List[str] = [] storage_of_reinplaced_args = set() @@ -580,7 +583,7 @@ def tensor_with_same_storage_already_reinplaced(arg): copy_node = copy_args_to_copy_nodes.get((mutated_arg, node)) if copy_node is not None: replace_dict[copy_node] = copy_node.args[0] - if not auto_functionalize_v2: + if not trigger == ReInplaceTrigger.AUTO_FUNC_V2: for user in node.users: # For auto_functionalize_v2, arg is the index of the base, where base at index i corresponds to # output atindex size(out)+i. @@ -602,7 +605,12 @@ def tensor_with_same_storage_already_reinplaced(arg): tensors_to_clone.append(arg) log_inplace_results( - node_name, old_tensors_to_clone, tensors_to_clone, missed_args, missed_nodes + node_name, + old_tensors_to_clone, + tensors_to_clone, + missed_args, + missed_nodes, + trigger, ) return tensors_to_clone @@ -628,7 +636,7 @@ def tensor_with_same_storage_already_reinplaced(arg): bases_to_clone, base_tensors_dct, node.target, - auto_functionalize_v2=True, + ReInplaceTrigger.AUTO_FUNC_V2, ) # Stash the metadata. There is a pass later on where we decompose # auto_functionalized into clones + a mutable op; this metadata @@ -647,7 +655,7 @@ def tensor_with_same_storage_already_reinplaced(arg): tensors_to_clone, node.kwargs, _mutable_op._name, - auto_functionalize_v2=False, + ReInplaceTrigger.AUTO_FUNC_V1, ) # Stash the metadata. There is a pass later on where we decompose @@ -679,7 +687,10 @@ def tensor_with_same_storage_already_reinplaced(arg): # This pass iterates over them and sees which ones are safe # to eliminate (i.e. no longer need the clones) tensors_to_clone = reinplace_and_refine_tensors_to_clone( - node.kwargs["tensors_to_clone"], node.kwargs["kwargs"], kernel_name + node.kwargs["tensors_to_clone"], + node.kwargs["kwargs"], + kernel_name, + ReInplaceTrigger.TRITON_OPS, ) kwargs = dict(node.kwargs) From 64d9ee88d7da0a63247559e9789075da2fcc7929 Mon Sep 17 00:00:00 2001 From: cyy Date: Mon, 4 Nov 2024 23:57:39 +0000 Subject: [PATCH 045/503] [11/N] Fix extra warnings brought by clang-tidy-17 (#139599) Follows #139385 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139599 Approved by: https://github.com/sraikund16 --- .clang-tidy | 3 ++- .lintrunner.toml | 3 +++ aten/src/ATen/core/CachingHostAllocator.h | 1 + aten/src/ATen/cuda/CUDASparseBlas.h | 2 ++ aten/src/ATen/cuda/CUDASparseDescriptors.cpp | 2 ++ aten/src/ATen/cudnn/AutocastRNN.cpp | 2 +- aten/src/ATen/cudnn/Descriptors.cpp | 13 ++++++++----- aten/src/ATen/cudnn/Descriptors.h | 7 ++++--- aten/src/ATen/cudnn/Types.cpp | 4 +--- aten/src/ATen/native/nested/NestedTensorUtils.h | 14 +++++++------- aten/src/ATen/templates/TensorBody.h | 4 ++-- c10/util/intrusive_ptr.h | 8 +++++--- tools/onnx/templates/rules.h.in | 1 + torch/csrc/autograd/python_autograd.h | 1 + torch/csrc/autograd/python_fft_functions.h | 1 + torch/csrc/autograd/python_linalg_functions.h | 1 + torch/csrc/autograd/python_sparse_functions.h | 1 + torch/csrc/autograd/python_special_functions.h | 2 +- torch/csrc/cuda/Module.h | 1 + torch/csrc/cuda/nccl.cpp | 2 ++ torch/csrc/cuda/shared/cudnn.cpp | 3 +-- torch/csrc/profiler/collection.h | 2 +- torch/csrc/profiler/kineto_client_interface.h | 2 +- torch/csrc/profiler/kineto_shim.cpp | 2 +- torch/csrc/profiler/orchestration/observer.cpp | 4 ++-- torch/csrc/profiler/orchestration/observer.h | 2 +- 26 files changed, 54 insertions(+), 34 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 1f7521ce76005d..deff8f0fb8d3d3 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -35,13 +35,14 @@ cppcoreguidelines-*, hicpp-exception-baseclass, hicpp-avoid-goto, misc-*, +-misc-confusable-identifiers, -misc-const-correctness, -misc-include-cleaner, -misc-use-anonymous-namespace, -misc-unused-parameters, -misc-no-recursion, -misc-non-private-member-variables-in-classes, --misc-confusable-identifiers, +-misc-unused-using-decls, modernize-*, -modernize-macro-to-enum, -modernize-return-braced-init-list, diff --git a/.lintrunner.toml b/.lintrunner.toml index ea5fc7a4f468ac..3c628701bfbb48 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -227,6 +227,9 @@ exclude_patterns = [ '**/generated/**', '**/*pb.h', '**/*inl.h', + 'aten/src/ATen/cpu/FlushDenormal.cpp', + 'aten/src/ATen/cpu/Utils.cpp', + 'aten/src/ATen/cpu/vml.h', 'aten/src/ATen/CPUFixedAllocator.h', 'aten/src/ATen/Parallel*.h', 'c10/xpu/**/*.h', diff --git a/aten/src/ATen/core/CachingHostAllocator.h b/aten/src/ATen/core/CachingHostAllocator.h index bf617ef806c65e..87b57b4abaa103 100644 --- a/aten/src/ATen/core/CachingHostAllocator.h +++ b/aten/src/ATen/core/CachingHostAllocator.h @@ -40,6 +40,7 @@ struct alignas(64) FreeBlockList { namespace { // Max cached block sizes: (1 << MAX_SIZE_INDEX) bytes + // NOLINTNEXTLINE(misc-definitions-in-headers) constexpr size_t MAX_SIZE_INDEX = 64; } diff --git a/aten/src/ATen/cuda/CUDASparseBlas.h b/aten/src/ATen/cuda/CUDASparseBlas.h index c99d42c9a7de88..a098496491d155 100644 --- a/aten/src/ATen/cuda/CUDASparseBlas.h +++ b/aten/src/ATen/cuda/CUDASparseBlas.h @@ -12,6 +12,7 @@ #include #include +// NOLINTBEGIN(misc-misplaced-const) namespace at::cuda::sparse { #define CUSPARSE_CSRGEAM2_BUFFERSIZE_ARGTYPES(scalar_t) \ @@ -316,3 +317,4 @@ void bsrsm2_solve>( #endif // AT_USE_HIPSPARSE_TRIANGULAR_SOLVE } // namespace at::cuda::sparse +// NOLINTEND(misc-misplaced-const) diff --git a/aten/src/ATen/cuda/CUDASparseDescriptors.cpp b/aten/src/ATen/cuda/CUDASparseDescriptors.cpp index b662996f3bc860..426f43c36ae57b 100644 --- a/aten/src/ATen/cuda/CUDASparseDescriptors.cpp +++ b/aten/src/ATen/cuda/CUDASparseDescriptors.cpp @@ -8,6 +8,7 @@ namespace at::cuda::sparse { cusparseStatus_t destroyConstDnMat(const cusparseDnMatDescr* dnMatDescr) { + // NOLINTNEXTLINE(*const-cast) return cusparseDestroyDnMat(const_cast(dnMatDescr)); } @@ -83,6 +84,7 @@ cusparseDnMatDescr_t createRawDnMatDescriptor(const Tensor& input, int64_t batch #endif auto batch_stride = ndim > 2 && batch_offset >= 0 ? input_strides[ndim - 3] : 0; + // NOLINTNEXTLINE(*const-cast) void* data_ptr = is_const ? const_cast(input.const_data_ptr()) : input.data_ptr(); void* values_ptr = static_cast(data_ptr) + batch_offset * batch_stride * input.itemsize(); diff --git a/aten/src/ATen/cudnn/AutocastRNN.cpp b/aten/src/ATen/cudnn/AutocastRNN.cpp index 71cd199b33790e..84571c9b45dcff 100644 --- a/aten/src/ATen/cudnn/AutocastRNN.cpp +++ b/aten/src/ATen/cudnn/AutocastRNN.cpp @@ -18,7 +18,7 @@ Autocast wrapper for CuDNN RNNs (the weight reflattening needs special attention // To be registered for the "_cudnn_rnn(...)" schema. // _cudnn_rnn is autograd-exposed (test_autocast_cudnn_rnn in test_cuda.py includes a test to confirm) -std::tuple +static std::tuple _cudnn_rnn_cast_reflatten(const Tensor & input, TensorList weight, int64_t weight_stride0, diff --git a/aten/src/ATen/cudnn/Descriptors.cpp b/aten/src/ATen/cudnn/Descriptors.cpp index 8c2a4467a479c2..d7c32ac2cf3340 100644 --- a/aten/src/ATen/cudnn/Descriptors.cpp +++ b/aten/src/ATen/cudnn/Descriptors.cpp @@ -6,6 +6,7 @@ #include #include +// NOLINTBEGIN(*c-arrays*) namespace at::native { namespace { @@ -101,7 +102,7 @@ std::ostream& operator<<(std::ostream & out, const TensorDescriptor& d) { int nbDims = 0; int dimA[CUDNN_DIM_MAX]; int strideA[CUDNN_DIM_MAX]; - cudnnDataType_t dtype; + cudnnDataType_t dtype{}; cudnnGetTensorNdDescriptor(d.desc(), CUDNN_DIM_MAX, &dtype, &nbDims, dimA, strideA); out << " type = " << cudnnTypeToString(dtype) << "\n"; out << " nbDims = " << nbDims << "\n"; @@ -143,7 +144,7 @@ void FilterDescriptor::set(const at::Tensor &t, const at::MemoryFormat memory_fo size[i] = (int) 1; } dim = std::max(dim, pad); - cudnnTensorFormat_t filter_format; + cudnnTensorFormat_t filter_format{}; switch(memory_format) { case at::MemoryFormat::Contiguous: filter_format = CUDNN_TENSOR_NCHW; @@ -155,7 +156,8 @@ void FilterDescriptor::set(const at::Tensor &t, const at::MemoryFormat memory_fo default: TORCH_INTERNAL_ASSERT(false, "unsupported memory_format for cuDNN filters"); } - set(getDataType(t), (int) dim, size, filter_format); + // NOLINTNEXTLINE(*narrowing-conversions) + set(getDataType(t), static_cast(dim), size, filter_format); } std::string cudnnMemoryFormatToString(cudnnTensorFormat_t tformat) { @@ -175,8 +177,8 @@ std::ostream& operator<<(std::ostream & out, const FilterDescriptor& d) { out << "FilterDescriptor " << static_cast(d.desc()) << "\n"; int nbDims = 0; int dimA[CUDNN_DIM_MAX]; - cudnnDataType_t dtype; - cudnnTensorFormat_t tformat; + cudnnDataType_t dtype{}; + cudnnTensorFormat_t tformat{}; cudnnGetFilterNdDescriptor(d.desc(), CUDNN_DIM_MAX, &dtype, &tformat, &nbDims, dimA); out << " type = " << cudnnTypeToString(dtype) << "\n"; out << " tensor_format = " << cudnnMemoryFormatToString(tformat) << "\n"; @@ -193,3 +195,4 @@ std::ostream& operator<<(std::ostream & out, const FilterDescriptor& d) { void FilterDescriptor::print() { std::cout << *this; } } +// NOLINTEND(*c-arrays*) diff --git a/aten/src/ATen/cudnn/Descriptors.h b/aten/src/ATen/cudnn/Descriptors.h index 8773af62fd62d2..6c2492b12e6b9b 100644 --- a/aten/src/ATen/cudnn/Descriptors.h +++ b/aten/src/ATen/cudnn/Descriptors.h @@ -92,6 +92,7 @@ struct DescriptorDeleter { // initialized the first time you call set() or any other initializing // function. template +// NOLINTNEXTLINE(bugprone-exception-escape) class TORCH_CUDA_CPP_API Descriptor { public: // TODO: Figure out why const-correctness doesn't work here @@ -128,7 +129,7 @@ class TORCH_CUDA_CPP_API RNNDataDescriptor : public Descriptor< void set(const at::Tensor &t, cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize, int vectorSize, const int* seqLengthArray); private: void set(cudnnDataType_t dataType, cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize, int vectorSize, const int* seqLengthArray) { - AT_CUDNN_CHECK(cudnnSetRNNDataDescriptor(mut_desc(), dataType, layout, maxSeqLength, batchSize, vectorSize, seqLengthArray, NULL)); + AT_CUDNN_CHECK(cudnnSetRNNDataDescriptor(mut_desc(), dataType, layout, maxSeqLength, batchSize, vectorSize, seqLengthArray, nullptr)); } }; @@ -224,6 +225,7 @@ struct TORCH_CUDA_CPP_API SpatialTransformerDescriptor } }; +// NOLINTNEXTLINE(bugprone-exception-escape) struct TORCH_CUDA_CPP_API DropoutDescriptor : public Descriptor< cudnnDropoutStruct, @@ -244,9 +246,8 @@ struct TORCH_CUDA_CPP_API DropoutDescriptor } // Restore a dropout descriptor given a dropout probability and existing RNG state. - void set(cudnnHandle_t handle, float dropout, at::Tensor state_) { + void set(cudnnHandle_t handle, float dropout, const at::Tensor& state) { TORCH_INTERNAL_ASSERT(dropout > 0, "dropout must be nonzero; otherwise call set_no_dropout"); - state = state_; void *state_ptr = state.data_ptr(); size_t state_size = state.size(0); // NB: The seed doesn't actually matter, so we give a dummy value diff --git a/aten/src/ATen/cudnn/Types.cpp b/aten/src/ATen/cudnn/Types.cpp index 4269f1dc0d4f13..f6e080c433d604 100644 --- a/aten/src/ATen/cudnn/Types.cpp +++ b/aten/src/ATen/cudnn/Types.cpp @@ -5,7 +5,7 @@ namespace at::native { cudnnDataType_t getCudnnDataTypeFromScalarType(const at::ScalarType dtype) { - if (dtype == c10::kQInt8) { + if (dtype == c10::kQInt8 || dtype == at::kChar) { return CUDNN_DATA_INT8; } else if (dtype == at::kFloat) { return CUDNN_DATA_FLOAT; @@ -19,8 +19,6 @@ cudnnDataType_t getCudnnDataTypeFromScalarType(const at::ScalarType dtype) { return CUDNN_DATA_INT32; } else if (dtype == at::kByte) { return CUDNN_DATA_UINT8; - } else if (dtype == at::kChar) { - return CUDNN_DATA_INT8; } std::string msg("getCudnnDataTypeFromScalarType() not supported for "); msg += toString(dtype); diff --git a/aten/src/ATen/native/nested/NestedTensorUtils.h b/aten/src/ATen/native/nested/NestedTensorUtils.h index 4630aa3fde0926..e36ae8a372f9d5 100644 --- a/aten/src/ATen/native/nested/NestedTensorUtils.h +++ b/aten/src/ATen/native/nested/NestedTensorUtils.h @@ -32,7 +32,7 @@ struct NestedTensorImpl; // The following functions are used to construct nested tensors from buffers and // metadata. -inline at::Tensor wrap_buffer(at::Tensor buffer, at::Tensor nested_sizes) { +inline at::Tensor wrap_buffer(const at::Tensor& buffer, const at::Tensor& nested_sizes) { TORCH_CHECK( buffer.dim() == 1, "Expected given buffer to be 1dim, but got ", @@ -41,19 +41,19 @@ inline at::Tensor wrap_buffer(at::Tensor buffer, at::Tensor nested_sizes) { TORCH_CHECK( buffer.is_contiguous(), "Expected given buffer to be contiguous."); return at::detail::make_tensor( - std::move(buffer), std::move(nested_sizes)); + buffer, nested_sizes); } // TODO: Figure out if we need a non-moving wrap_buffer() inline at::Tensor wrap_buffer( - at::Tensor buffer, + const at::Tensor& buffer, at::Tensor nested_sizes, at::Tensor nested_strides, at::Tensor storage_offsets) { TORCH_INTERNAL_ASSERT_DEBUG_ONLY( buffer.is_contiguous(), "Given buffer must be contiguous."); return at::detail::make_tensor( - std::move(buffer), + buffer, std::move(nested_sizes), std::move(nested_strides), std::move(storage_offsets)); @@ -95,9 +95,9 @@ inline at::Tensor create_nested_view_tensor( return at::detail::make_tensor( c10::TensorImpl::VIEW, base, - nested_sizes, - nested_strides, - storage_offsets); + std::move(nested_sizes), + std::move(nested_strides), + std::move(storage_offsets)); } // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/aten/src/ATen/templates/TensorBody.h b/aten/src/ATen/templates/TensorBody.h index 2e1520392ef927..7956ffb6aefd35 100644 --- a/aten/src/ATen/templates/TensorBody.h +++ b/aten/src/ATen/templates/TensorBody.h @@ -195,7 +195,7 @@ class TORCH_API Tensor: public TensorBase { // // TODO: temporarily disabled - Tensor& operator=(const TensorBase& x) & { + Tensor& operator=(const TensorBase& x) & noexcept { impl_ = x.getIntrusivePtr(); return *this; } @@ -204,7 +204,7 @@ class TORCH_API Tensor: public TensorBase { return *this; } - Tensor& operator=(const Tensor &x) & { + Tensor& operator=(const Tensor &x) & noexcept { return operator=(static_cast(x)); } Tensor& operator=(Tensor &&x) & noexcept { diff --git a/c10/util/intrusive_ptr.h b/c10/util/intrusive_ptr.h index e1d551930e162d..288b19df0a6c88 100644 --- a/c10/util/intrusive_ptr.h +++ b/c10/util/intrusive_ptr.h @@ -664,15 +664,17 @@ struct MaybeOwnedTraits> { toDestroy.release(); } - static const owned_type& referenceFromBorrow(const borrow_type& borrow) { + static const owned_type& referenceFromBorrow( + const borrow_type& borrow) noexcept { return borrow; } - static const owned_type* pointerFromBorrow(const borrow_type& borrow) { + static const owned_type* pointerFromBorrow( + const borrow_type& borrow) noexcept { return &borrow; } - static bool debugBorrowIsValid(const borrow_type& /*borrow*/) { + static bool debugBorrowIsValid(const borrow_type& /*borrow*/) noexcept { return true; } }; diff --git a/tools/onnx/templates/rules.h.in b/tools/onnx/templates/rules.h.in index c4ec775b83fc08..5d3e26012c4d5e 100644 --- a/tools/onnx/templates/rules.h.in +++ b/tools/onnx/templates/rules.h.in @@ -1,4 +1,5 @@ #pragma once +#include /** ${generated_comment} diff --git a/torch/csrc/autograd/python_autograd.h b/torch/csrc/autograd/python_autograd.h index a854d30c895ce7..73401b15ce3b1a 100644 --- a/torch/csrc/autograd/python_autograd.h +++ b/torch/csrc/autograd/python_autograd.h @@ -1,5 +1,6 @@ #ifndef THP_AUTOGRAD_H #define THP_AUTOGRAD_H +#include PyObject* THPAutograd_initExtension(PyObject* _unused, PyObject* unused); void THPAutograd_initFunctions(); diff --git a/torch/csrc/autograd/python_fft_functions.h b/torch/csrc/autograd/python_fft_functions.h index b95d25effcbb4f..1ce94653e1cba2 100644 --- a/torch/csrc/autograd/python_fft_functions.h +++ b/torch/csrc/autograd/python_fft_functions.h @@ -1,4 +1,5 @@ #pragma once +#include namespace torch::autograd { diff --git a/torch/csrc/autograd/python_linalg_functions.h b/torch/csrc/autograd/python_linalg_functions.h index 685c87bb6d2a85..9477556279d0d3 100644 --- a/torch/csrc/autograd/python_linalg_functions.h +++ b/torch/csrc/autograd/python_linalg_functions.h @@ -1,4 +1,5 @@ #pragma once +#include namespace torch::autograd { diff --git a/torch/csrc/autograd/python_sparse_functions.h b/torch/csrc/autograd/python_sparse_functions.h index d97018c51981c7..02e3b071eab31a 100644 --- a/torch/csrc/autograd/python_sparse_functions.h +++ b/torch/csrc/autograd/python_sparse_functions.h @@ -1,4 +1,5 @@ #pragma once +#include namespace torch::autograd { diff --git a/torch/csrc/autograd/python_special_functions.h b/torch/csrc/autograd/python_special_functions.h index d036ce4383b562..a58235214bc943 100644 --- a/torch/csrc/autograd/python_special_functions.h +++ b/torch/csrc/autograd/python_special_functions.h @@ -1,5 +1,5 @@ #pragma once - +#include namespace torch::autograd { void initSpecialFunctions(PyObject* module); diff --git a/torch/csrc/cuda/Module.h b/torch/csrc/cuda/Module.h index 0c89e4bc65f259..f3a5ccb925e4d4 100644 --- a/torch/csrc/cuda/Module.h +++ b/torch/csrc/cuda/Module.h @@ -1,5 +1,6 @@ #ifndef THCP_CUDA_MODULE_INC #define THCP_CUDA_MODULE_INC +#include PyObject* THCPModule_getDevice_wrap(PyObject* self); PyObject* THCPModule_setDevice_wrap(PyObject* self, PyObject* arg); diff --git a/torch/csrc/cuda/nccl.cpp b/torch/csrc/cuda/nccl.cpp index 15c4273b686719..d7d8c9385e97f4 100644 --- a/torch/csrc/cuda/nccl.cpp +++ b/torch/csrc/cuda/nccl.cpp @@ -273,6 +273,7 @@ struct NcclCommList { devices.data())); } NcclCommList(NcclCommList&& foo) = default; + // NOLINTNEXTLINE(bugprone-exception-escape) ~NcclCommList() { if (comms) { for (const auto i : c10::irange(ndevices)) { @@ -457,6 +458,7 @@ AutoNcclGroup::AutoNcclGroup(ncclComm_t comm, bool comm_nonblocking) #endif } +// NOLINTNEXTLINE(bugprone-exception-escape) AutoNcclGroup::~AutoNcclGroup() noexcept(false) { #if defined(NCCL_MAJOR) && (NCCL_MAJOR >= 2) if (comm_nonblocking_ && comm_ != nullptr) { diff --git a/torch/csrc/cuda/shared/cudnn.cpp b/torch/csrc/cuda/shared/cudnn.cpp index 30a1383455be1e..f56899107fd56b 100644 --- a/torch/csrc/cuda/shared/cudnn.cpp +++ b/torch/csrc/cuda/shared/cudnn.cpp @@ -4,7 +4,6 @@ #if defined(USE_CUDNN) || defined(USE_ROCM) #include -#include #include namespace { @@ -22,7 +21,7 @@ version_tuple getCompileVersion() { version_tuple getRuntimeVersion() { #ifndef USE_STATIC_CUDNN - int major, minor, patch; + int major = 0, minor = 0, patch = 0; cudnnGetProperty(MAJOR_VERSION, &major); cudnnGetProperty(MINOR_VERSION, &minor); cudnnGetProperty(PATCH_LEVEL, &patch); diff --git a/torch/csrc/profiler/collection.h b/torch/csrc/profiler/collection.h index 0b5bad4d2b4953..01f02fa94fb6c2 100644 --- a/torch/csrc/profiler/collection.h +++ b/torch/csrc/profiler/collection.h @@ -378,7 +378,7 @@ struct TORCH_API Result : public std::enable_shared_from_this { } template - void visit_if_base(Fn&& fn) const { + void visit_if_base(const Fn& fn) const { visit([&](const auto& extra_fields) { using extra_fields_t = typename std::remove_cv_t< typename std::remove_reference_t>; diff --git a/torch/csrc/profiler/kineto_client_interface.h b/torch/csrc/profiler/kineto_client_interface.h index 6d32825608522f..6cfabfd111cf5c 100644 --- a/torch/csrc/profiler/kineto_client_interface.h +++ b/torch/csrc/profiler/kineto_client_interface.h @@ -6,6 +6,6 @@ namespace torch { // declare global_kineto_init for libtorch_cpu.so to call -TORCH_API void global_kineto_init(void); +TORCH_API void global_kineto_init(); } // namespace torch diff --git a/torch/csrc/profiler/kineto_shim.cpp b/torch/csrc/profiler/kineto_shim.cpp index 4cbaf7c9a5309b..1bdff80b9b91e3 100644 --- a/torch/csrc/profiler/kineto_shim.cpp +++ b/torch/csrc/profiler/kineto_shim.cpp @@ -222,7 +222,7 @@ bool collectivesProfilerExists() { #ifdef USE_KINETO static const std::string setTraceID(const std::string& trace_id) { - if (trace_id == "") { + if (trace_id.empty()) { return ""; } std::stringstream configss; diff --git a/torch/csrc/profiler/orchestration/observer.cpp b/torch/csrc/profiler/orchestration/observer.cpp index c4d25f5f0786de..4b443ccc23ee45 100644 --- a/torch/csrc/profiler/orchestration/observer.cpp +++ b/torch/csrc/profiler/orchestration/observer.cpp @@ -39,7 +39,7 @@ ProfilerConfig::ProfilerConfig( bool with_flops, bool with_modules, ExperimentalConfig experimental_config, - const std::string& trace_id) + std::string trace_id) : state{state}, experimental_config{std::move(experimental_config)}, report_input_shapes{report_input_shapes}, @@ -47,7 +47,7 @@ ProfilerConfig::ProfilerConfig( with_stack{with_stack}, with_flops{with_flops}, with_modules{with_modules}, - trace_id{trace_id} {} + trace_id{std::move(trace_id)} {} bool ProfilerConfig::disabled() const { return state == torch::profiler::impl::ProfilerState::Disabled; diff --git a/torch/csrc/profiler/orchestration/observer.h b/torch/csrc/profiler/orchestration/observer.h index ef7b4d4566ea89..c3beb4cca4d0c7 100644 --- a/torch/csrc/profiler/orchestration/observer.h +++ b/torch/csrc/profiler/orchestration/observer.h @@ -104,7 +104,7 @@ struct TORCH_API ProfilerConfig { bool with_flops = false, bool with_modules = false, ExperimentalConfig experimental_config = ExperimentalConfig(), - const std::string& trace_id = ""); + std::string trace_id = ""); bool disabled() const; bool global() const; From 4a3ee964274901464f47ff133cec5e48fb4e33c6 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Tue, 5 Nov 2024 00:13:48 +0000 Subject: [PATCH 046/503] Revert "Don't use deprecated type properties in UpsampleKernel (#139399)" This reverts commit 9d096e4d9ffc2b57a19cbefd5d4b5cce7306945b. Reverted https://github.com/pytorch/pytorch/pull/139399 on behalf of https://github.com/ZainRizvi due to Change reverted internally due to broken builds. See D65378845 ([comment](https://github.com/pytorch/pytorch/pull/139358#issuecomment-2455959040)) --- aten/src/ATen/native/cpu/UpSampleKernel.cpp | 18 +++++++++--------- torch/csrc/jit/mobile/flatbuffer_loader.cpp | 4 ++-- .../csrc/jit/runtime/register_special_ops.cpp | 2 +- torch/csrc/jit/serialization/unpickler.cpp | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/aten/src/ATen/native/cpu/UpSampleKernel.cpp b/aten/src/ATen/native/cpu/UpSampleKernel.cpp index 74fb38779ea156..3cc02b5077665e 100644 --- a/aten/src/ATen/native/cpu/UpSampleKernel.cpp +++ b/aten/src/ATen/native/cpu/UpSampleKernel.cpp @@ -735,8 +735,8 @@ struct HelperInterpBase { for ([[maybe_unused]] const auto j : c10::irange(interp_size)) { output.emplace_back( - empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); - output.emplace_back(empty(new_shape, at::device(kCPU).dtype(output_type))); + empty(new_shape, CPU(c10::CppTypeToScalarType()))); + output.emplace_back(empty(new_shape, CPU(output_type))); } } @@ -878,16 +878,16 @@ struct HelperInterpBase { // Bounds approach as in PIL: xmin/xmax output.emplace_back( - empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); + empty(new_shape, CPU(c10::CppTypeToScalarType()))); output.emplace_back( - empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); + empty(new_shape, CPU(c10::CppTypeToScalarType()))); output.emplace_back( - empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); + empty(new_shape, CPU(c10::CppTypeToScalarType()))); { // Weights new_shape[reshape_dim] = output_size * max_interp_size; - auto wts = empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType())); + auto wts = empty(new_shape, CPU(c10::CppTypeToScalarType())); auto strides = wts.strides().vec(); strides[reshape_dim] = 0; new_shape[reshape_dim] = output_size; @@ -895,7 +895,7 @@ struct HelperInterpBase { output.emplace_back(wts); // Weights indices output.emplace_back( - empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); + empty(new_shape, CPU(c10::CppTypeToScalarType()))); } int64_t* idx_ptr_xmin = output[0].data_ptr(); @@ -1050,9 +1050,9 @@ struct HelperInterpNearest : public HelperInterpBase { for ([[maybe_unused]] const auto j : c10::irange(interp_size)) { output.emplace_back( - empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); + empty(new_shape, CPU(c10::CppTypeToScalarType()))); // Defines weights for consistency, but not used - output.emplace_back(at::ones(new_shape, at::device(kCPU).dtype(output_type))); + output.emplace_back(at::ones(new_shape, CPU(output_type))); } } diff --git a/torch/csrc/jit/mobile/flatbuffer_loader.cpp b/torch/csrc/jit/mobile/flatbuffer_loader.cpp index f56b5818ecaccf..246bee03d6fdd6 100644 --- a/torch/csrc/jit/mobile/flatbuffer_loader.cpp +++ b/torch/csrc/jit/mobile/flatbuffer_loader.cpp @@ -469,8 +469,8 @@ IValue parseBasic( at::Tensor parseTensorFromMetadata( FlatbufferLoader* loader, const mobile::serialization::TensorMetadata* tensor_md) { - auto type = static_cast(tensor_md->scalar_type()); - auto options = at::device(at::kCPU).dtype(type); + at::ScalarType type = static_cast(tensor_md->scalar_type()); + auto options = at::CPU(type).options(); at::Tensor tensor; if (tensor_md->quantized_schema() != nullptr) { // is quantized diff --git a/torch/csrc/jit/runtime/register_special_ops.cpp b/torch/csrc/jit/runtime/register_special_ops.cpp index 0f2447e05a9f8d..783aaf87ef7d7d 100644 --- a/torch/csrc/jit/runtime/register_special_ops.cpp +++ b/torch/csrc/jit/runtime/register_special_ops.cpp @@ -293,7 +293,7 @@ RegisterOperators reg({ DEFINE_TORCH_TENSOR_OP( bool, bool, - at::empty({}, at::device(at::kCPU).dtype(at::kBool)).fill_(scalar_val)) + at::empty({}, at::CPU(at::kBool).options()).fill_(scalar_val)) DEFINE_TORCH_TENSOR_OP( float, double, diff --git a/torch/csrc/jit/serialization/unpickler.cpp b/torch/csrc/jit/serialization/unpickler.cpp index 5a81a25c358e20..fc95f7fe9a4a65 100644 --- a/torch/csrc/jit/serialization/unpickler.cpp +++ b/torch/csrc/jit/serialization/unpickler.cpp @@ -586,7 +586,7 @@ PickleOpCode Unpickler::readInstruction() { storage = storage_context_->getStorage(key); } else { int64_t numel = args.at(4).toInt(); - auto dtype = scalarTypeToTypeMeta(type); + caffe2::TypeMeta dtype = at::CPU(type).typeMeta(); at::DataPtr storage_ptr; if (numel > 0) { @@ -608,7 +608,7 @@ PickleOpCode Unpickler::readInstruction() { } } - auto options = at::device(at::kCPU).dtype(type); + auto options = at::CPU(type).options(); if (use_storage_device_) { options = options.device(storage.device()); device = storage.device(); From 1b6f0b2a00cf411d708e0fda40883724cd06e4ff Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Tue, 5 Nov 2024 00:13:48 +0000 Subject: [PATCH 047/503] Revert "[BE] And delete `DeprecatedTypProperties` cast (#139358)" This reverts commit 92a2a9ded22ef20a49e8c31dc2add93b40e8a78c. Reverted https://github.com/pytorch/pytorch/pull/139358 on behalf of https://github.com/ZainRizvi due to Change reverted internally due to broken builds. See D65378845 ([comment](https://github.com/pytorch/pytorch/pull/139358#issuecomment-2455959040)) --- aten/src/ATen/Dispatch.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/aten/src/ATen/Dispatch.h b/aten/src/ATen/Dispatch.h index 30114e42d3de78..c70d68fb93e77d 100644 --- a/aten/src/ATen/Dispatch.h +++ b/aten/src/ATen/Dispatch.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include @@ -102,6 +102,13 @@ inline at::ScalarType scalar_type(at::ScalarType s) { return s; } +C10_DEPRECATED_MESSAGE( + "passing at::DeprecatedTypeProperties to an AT_DISPATCH macro is deprecated, " + "pass an at::ScalarType instead") +inline at::ScalarType scalar_type(const at::DeprecatedTypeProperties& t) { + return t.scalarType(); +} + } // namespace detail // The AT_DISPATCH_* family of macros provides the ability to From b82a51bc6b1170da3db8f67816799f3a47530ff8 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Wed, 30 Oct 2024 17:57:15 -0700 Subject: [PATCH 048/503] [BE] And delete `DeprecatedTypProperties` cast (#139358) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139358 Approved by: https://github.com/ezyang ghstack dependencies: #139353 --- aten/src/ATen/Dispatch.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/aten/src/ATen/Dispatch.h b/aten/src/ATen/Dispatch.h index c70d68fb93e77d..30114e42d3de78 100644 --- a/aten/src/ATen/Dispatch.h +++ b/aten/src/ATen/Dispatch.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include @@ -102,13 +102,6 @@ inline at::ScalarType scalar_type(at::ScalarType s) { return s; } -C10_DEPRECATED_MESSAGE( - "passing at::DeprecatedTypeProperties to an AT_DISPATCH macro is deprecated, " - "pass an at::ScalarType instead") -inline at::ScalarType scalar_type(const at::DeprecatedTypeProperties& t) { - return t.scalarType(); -} - } // namespace detail // The AT_DISPATCH_* family of macros provides the ability to From 0058f7100222523fa8b9f74af9ea7d341a6458b4 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Thu, 31 Oct 2024 08:50:33 -0700 Subject: [PATCH 049/503] Don't use deprecated type properties in UpsampleKernel (#139399) By replacing `at::CPU(dtype)` pattern with `at::device(kCPU).dtype(dtype)` pattern Pull Request resolved: https://github.com/pytorch/pytorch/pull/139399 Approved by: https://github.com/Skylion007 ghstack dependencies: #139353, #139358 --- aten/src/ATen/native/cpu/UpSampleKernel.cpp | 18 +++++++++--------- torch/csrc/jit/mobile/flatbuffer_loader.cpp | 4 ++-- .../csrc/jit/runtime/register_special_ops.cpp | 2 +- torch/csrc/jit/serialization/unpickler.cpp | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/aten/src/ATen/native/cpu/UpSampleKernel.cpp b/aten/src/ATen/native/cpu/UpSampleKernel.cpp index 3cc02b5077665e..74fb38779ea156 100644 --- a/aten/src/ATen/native/cpu/UpSampleKernel.cpp +++ b/aten/src/ATen/native/cpu/UpSampleKernel.cpp @@ -735,8 +735,8 @@ struct HelperInterpBase { for ([[maybe_unused]] const auto j : c10::irange(interp_size)) { output.emplace_back( - empty(new_shape, CPU(c10::CppTypeToScalarType()))); - output.emplace_back(empty(new_shape, CPU(output_type))); + empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); + output.emplace_back(empty(new_shape, at::device(kCPU).dtype(output_type))); } } @@ -878,16 +878,16 @@ struct HelperInterpBase { // Bounds approach as in PIL: xmin/xmax output.emplace_back( - empty(new_shape, CPU(c10::CppTypeToScalarType()))); + empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); output.emplace_back( - empty(new_shape, CPU(c10::CppTypeToScalarType()))); + empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); output.emplace_back( - empty(new_shape, CPU(c10::CppTypeToScalarType()))); + empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); { // Weights new_shape[reshape_dim] = output_size * max_interp_size; - auto wts = empty(new_shape, CPU(c10::CppTypeToScalarType())); + auto wts = empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType())); auto strides = wts.strides().vec(); strides[reshape_dim] = 0; new_shape[reshape_dim] = output_size; @@ -895,7 +895,7 @@ struct HelperInterpBase { output.emplace_back(wts); // Weights indices output.emplace_back( - empty(new_shape, CPU(c10::CppTypeToScalarType()))); + empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); } int64_t* idx_ptr_xmin = output[0].data_ptr(); @@ -1050,9 +1050,9 @@ struct HelperInterpNearest : public HelperInterpBase { for ([[maybe_unused]] const auto j : c10::irange(interp_size)) { output.emplace_back( - empty(new_shape, CPU(c10::CppTypeToScalarType()))); + empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); // Defines weights for consistency, but not used - output.emplace_back(at::ones(new_shape, CPU(output_type))); + output.emplace_back(at::ones(new_shape, at::device(kCPU).dtype(output_type))); } } diff --git a/torch/csrc/jit/mobile/flatbuffer_loader.cpp b/torch/csrc/jit/mobile/flatbuffer_loader.cpp index 246bee03d6fdd6..f56b5818ecaccf 100644 --- a/torch/csrc/jit/mobile/flatbuffer_loader.cpp +++ b/torch/csrc/jit/mobile/flatbuffer_loader.cpp @@ -469,8 +469,8 @@ IValue parseBasic( at::Tensor parseTensorFromMetadata( FlatbufferLoader* loader, const mobile::serialization::TensorMetadata* tensor_md) { - at::ScalarType type = static_cast(tensor_md->scalar_type()); - auto options = at::CPU(type).options(); + auto type = static_cast(tensor_md->scalar_type()); + auto options = at::device(at::kCPU).dtype(type); at::Tensor tensor; if (tensor_md->quantized_schema() != nullptr) { // is quantized diff --git a/torch/csrc/jit/runtime/register_special_ops.cpp b/torch/csrc/jit/runtime/register_special_ops.cpp index 783aaf87ef7d7d..0f2447e05a9f8d 100644 --- a/torch/csrc/jit/runtime/register_special_ops.cpp +++ b/torch/csrc/jit/runtime/register_special_ops.cpp @@ -293,7 +293,7 @@ RegisterOperators reg({ DEFINE_TORCH_TENSOR_OP( bool, bool, - at::empty({}, at::CPU(at::kBool).options()).fill_(scalar_val)) + at::empty({}, at::device(at::kCPU).dtype(at::kBool)).fill_(scalar_val)) DEFINE_TORCH_TENSOR_OP( float, double, diff --git a/torch/csrc/jit/serialization/unpickler.cpp b/torch/csrc/jit/serialization/unpickler.cpp index fc95f7fe9a4a65..5a81a25c358e20 100644 --- a/torch/csrc/jit/serialization/unpickler.cpp +++ b/torch/csrc/jit/serialization/unpickler.cpp @@ -586,7 +586,7 @@ PickleOpCode Unpickler::readInstruction() { storage = storage_context_->getStorage(key); } else { int64_t numel = args.at(4).toInt(); - caffe2::TypeMeta dtype = at::CPU(type).typeMeta(); + auto dtype = scalarTypeToTypeMeta(type); at::DataPtr storage_ptr; if (numel > 0) { @@ -608,7 +608,7 @@ PickleOpCode Unpickler::readInstruction() { } } - auto options = at::CPU(type).options(); + auto options = at::device(at::kCPU).dtype(type); if (use_storage_device_) { options = options.device(storage.device()); device = storage.device(); From 639162f39a8a9b17fced61795dfcd93cdcb1c044 Mon Sep 17 00:00:00 2001 From: Edward Yang Date: Tue, 5 Nov 2024 00:30:07 +0000 Subject: [PATCH 050/503] Add cache size to pt2_compile_events (#139627) Summary: I realized I wanted to check "are my cache entries/IO unreasonably large" and there's no easy way to do it. This lets me do it. Test Plan: servicelab Differential Revision: D65390363 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139627 Approved by: https://github.com/c00w --- torch/_dynamo/pgo.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/torch/_dynamo/pgo.py b/torch/_dynamo/pgo.py index 300f9a5897602f..0943fa4c53ac9d 100644 --- a/torch/_dynamo/pgo.py +++ b/torch/_dynamo/pgo.py @@ -541,6 +541,7 @@ def hit(ty: str) -> DefaultDict[CodeId, CodeState]: with open(path, "rb") as f: try: _CODE_STATE = pickle.load(f) + chromium_log.add_event_data(name, cache_size_bytes=f.tell()) except Exception: log.warning( "get_code_state failed while reading %s", path, exc_info=True @@ -569,6 +570,7 @@ def hit(ty: str) -> DefaultDict[CodeId, CodeState]: data = cache_data["data"] assert isinstance(data, str) payload = base64.b64decode(data) + chromium_log.add_event_data(name, cache_size_bytes=len(payload)) _CODE_STATE = pickle.loads(payload) except Exception: log.warning( @@ -631,6 +633,7 @@ def put_local_code_state(cache_key: str) -> None: with FileLock(lock_path, timeout=LOCK_TIMEOUT): with open(tmp_path, "wb") as f: pickle.dump(_CODE_STATE, f) + chromium_log.add_event_data(name, cache_size_bytes=f.tell()) os.rename(tmp_path, path) log.info( "put_code_state: wrote local %s, %d entries", path, len(_CODE_STATE) @@ -655,6 +658,7 @@ def put_remote_code_state(cache_key: str) -> None: return content = pickle.dumps(_CODE_STATE) + chromium_log.add_event_data(name, cache_size_bytes=len(content)) cache_data: JsonDataTy = { "data": base64.b64encode(content).decode("ascii"), } From 3672c688e3d181d6659d49cfab99528285720206 Mon Sep 17 00:00:00 2001 From: CaoE Date: Tue, 5 Nov 2024 00:55:17 +0000 Subject: [PATCH 051/503] Fix layout for SetSourceTensorKernel (#137973) Fixes #136837. `aten.set_.source_Tensor` will make the size and stride of the first input and output follow that of the second input: https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/TensorShape.cpp#L440. If the layouts of the two inputs are different, the following `assert_size_stride` will fail. Pull Request resolved: https://github.com/pytorch/pytorch/pull/137973 Approved by: https://github.com/jgong5, https://github.com/jansel --- test/inductor/test_cpu_repro.py | 60 +++++++++++++++++++++++++++++++++ torch/_inductor/ir.py | 4 +-- 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/test/inductor/test_cpu_repro.py b/test/inductor/test_cpu_repro.py index b8ed2c6644a398..37aba2794a171b 100644 --- a/test/inductor/test_cpu_repro.py +++ b/test/inductor/test_cpu_repro.py @@ -639,6 +639,66 @@ def test_lstm_packed_change_input_sizes_cpu( change_input_sizes=True, ) + def test_set_source_Tensor(self): + class MaskedConv2d(torch.nn.Conv2d): + def __init__( + self, + *, + in_channels: int, + out_channels: int, + kernel_size: int, + padding: int = 0, + ) -> None: + super().__init__( + in_channels, out_channels, kernel_size, padding=padding + ) + mask = torch.zeros_like(self.weight) + + mask[:, :, : kernel_size // 2, :] = 1 + mask[:, :, kernel_size // 2, : kernel_size // 2] = 1 + self.register_buffer("mask", mask) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + with torch.no_grad(): + self.weight.data *= self.mask + return super().forward(x) + + class M(torch.nn.Module): + def __init__( + self, num_channels: int, num_colors: int, H: int, W: int + ) -> None: + super().__init__() + self.num_channels = num_channels + self.num_colors = num_colors + self.H = H + self.W = W + kernel_size = 7 + padding = (kernel_size - 1) // 2 + # 1 7x7 Mask + layers = [ + MaskedConv2d( + in_channels=self.num_channels, + out_channels=64, + kernel_size=kernel_size, + padding=padding, + ), + ] + self.model = nn.Sequential(*layers) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x = x.permute(0, 3, 1, 2) + return self.model(x) + + model = M(H=32, W=32, num_channels=4, num_colors=2) + fn_opt = torch._dynamo.optimize("inductor")(model) + v = (torch.rand(10, 32, 32, 4) > 0.5).to(torch.float32) + inps = [ + v.clone(), + ] + result, code = run_and_get_cpp_code(fn_opt, *inps) + self.assertTrue("aten.set_.source_Tensor" in code) + self.assertEqual(model(*inps), result) + @torch._dynamo.config.patch(dynamic_shapes=True) @torch._dynamo.config.patch(assume_static_by_default=False) @torch._dynamo.config.patch(allow_rnn=True) diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py index 83bad00258a3f8..c4ce9b4c376837 100644 --- a/torch/_inductor/ir.py +++ b/torch/_inductor/ir.py @@ -5720,9 +5720,9 @@ def __init__(self, variable, new_size): class SetSourceTensorKernel(ExternKernelAlloc): def __init__(self, self_tensor, storage_tensor): - self_tensor.freeze_layout() + storage_tensor.freeze_layout() super().__init__( - self_tensor.get_layout(), + storage_tensor.get_layout(), [self_tensor, storage_tensor], python_kernel_name="torch.ops.aten.set_.source_Tensor", op_overload=torch.ops.aten.set_.source_Tensor, From 967cef294b07ffab231548bc6f10cf733fbc2e97 Mon Sep 17 00:00:00 2001 From: David Berard Date: Mon, 4 Nov 2024 09:59:05 -0800 Subject: [PATCH 052/503] [inductor][triton 3.2] fix test_codegen_config_option_dont_assume_alignment for triton 3.2 (#139640) "divisible_by_16" was renamed "divisibility_16". Found in #139206. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139640 Approved by: https://github.com/aakhundov --- test/inductor/test_torchinductor.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py index c6bf9c27c8d498..de0d12408ff163 100644 --- a/test/inductor/test_torchinductor.py +++ b/test/inductor/test_torchinductor.py @@ -11882,14 +11882,20 @@ def test_codegen_config_option_dont_assume_alignment(self): def fn(x: torch.Tensor) -> torch.Tensor: return x.sin() + x.cos() + def get_divisible_by_16(cfg): + # attribute was renamed between triton versions, from "divisible_by_16" to "divisibility_16" + if hasattr(cfg, "divisibility_16"): + return cfg.divisibility_16 + return cfg.divisible_by_16 + # We want code that assumes alignment if the initial input is 16-byte aligned for offset in (0, 1, 2, 3, 4): base = torch.randn(64 * 64 + 64, dtype=torch.float32, device=GPU_TYPE) inps = torch.as_strided(base, (64, 64), (64, 1), offset) torch._dynamo.reset() kernels = self.get_kernels(fn, [inps]) - arguments_that_are_divisible_by_16 = ( - kernels[0].triton_meta["configs"][0].divisible_by_16 + arguments_that_are_divisible_by_16 = get_divisible_by_16( + kernels[0].triton_meta["configs"][0] ) # NO_ALIGN ALIGN ALIGN @@ -11905,8 +11911,8 @@ def fn(x: torch.Tensor) -> torch.Tensor: torch._dynamo.reset() inp = torch.randn((64, 64), device=GPU_TYPE) kernels = self.get_kernels(fn, [inp]) - arguments_that_are_divisible_by_16 = ( - kernels[0].triton_meta["configs"][0].divisible_by_16 + arguments_that_are_divisible_by_16 = get_divisible_by_16( + kernels[0].triton_meta["configs"][0] ) self.assertEqual(arguments_that_are_divisible_by_16, (0, 1, 2)) From eaf92b2484b20a58b19b4bd0171609c9c60afeb1 Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 5 Nov 2024 01:16:35 +0000 Subject: [PATCH 053/503] [Python 3.13 CD] Enable Aarch64 py3.13 builds (#138629) Adding CD aarch64. Part of: https://github.com/pytorch/pytorch/issues/130249 Pull Request resolved: https://github.com/pytorch/pytorch/pull/138629 Approved by: https://github.com/ZainRizvi --- .../scripts/generate_binary_build_matrix.py | 6 +- ...linux-aarch64-binary-manywheel-nightly.yml | 120 ++++++++++++++++++ 2 files changed, 124 insertions(+), 2 deletions(-) diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py index 423cf0248cec77..145df940631293 100644 --- a/.github/scripts/generate_binary_build_matrix.py +++ b/.github/scripts/generate_binary_build_matrix.py @@ -370,13 +370,15 @@ def generate_wheels_matrix( # TODO: Enable python 3.13 on rocm, aarch64, windows if ( gpu_arch_type == "rocm" - or os not in ["linux", "linux-s390x", "macos-arm64"] + or os not in ["linux", "linux-s390x", "linux-aarch64", "macos-arm64"] ) and python_version in ["3.13", "3.13t"]: continue # TODO: Enable python 3.13t on xpu and cpu-s390x or MacOS if ( - gpu_arch_type in ["xpu", "cpu-s390x"] or os == "macos-arm64" + gpu_arch_type in ["xpu", "cpu-s390x"] + or os == "macos-arm64" + or os == "linux-aarch64" ) and python_version == "3.13t": continue diff --git a/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml index f6776167835268..479137f235335f 100644 --- a/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-aarch64-binary-manywheel-nightly.yml @@ -525,3 +525,123 @@ jobs: conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }} uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_13-cpu-aarch64-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + BUILDER_ROOT: /builder + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu-aarch64 + DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main + use_split_build: False + DESIRED_PYTHON: "3.13" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.m7g.4xlarge.ephemeral + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_13-cpu-aarch64 + build_environment: linux-aarch64-binary-manywheel + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.5.8; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.1.3; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.147; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.1.9; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.1.170; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.127; platform_system == 'Linux' and platform_machine == 'x86_64' + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_13-cpu-aarch64-test: # Testing + if: ${{ github.repository_owner == 'pytorch' }} + needs: + - manywheel-py3_13-cpu-aarch64-build + - get-label-type + uses: ./.github/workflows/_binary-test-linux.yml + with: + PYTORCH_ROOT: /pytorch + BUILDER_ROOT: /builder + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu-aarch64 + DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main + use_split_build: False + DESIRED_PYTHON: "3.13" + build_name: manywheel-py3_13-cpu-aarch64 + build_environment: linux-aarch64-binary-manywheel + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.2xlarge + ALPINE_IMAGE: "arm64v8/alpine" + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_13-cpu-aarch64-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_13-cpu-aarch64-test + with: + PYTORCH_ROOT: /pytorch + BUILDER_ROOT: /builder + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cpu + GPU_ARCH_TYPE: cpu-aarch64 + DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cpu-aarch64-main + use_split_build: False + DESIRED_PYTHON: "3.13" + build_name: manywheel-py3_13-cpu-aarch64 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }} + uses: ./.github/workflows/_binary-upload.yml + + manywheel-py3_13-cuda-aarch64-build: + if: ${{ github.repository_owner == 'pytorch' }} + uses: ./.github/workflows/_binary-build-linux.yml + needs: get-label-type + with: + PYTORCH_ROOT: /pytorch + BUILDER_ROOT: /builder + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu124 + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main + DESIRED_DEVTOOLSET: cxx11-abi + use_split_build: False + DESIRED_PYTHON: "3.13" + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + runs_on: linux.arm64.m7g.4xlarge.ephemeral + ALPINE_IMAGE: "arm64v8/alpine" + build_name: manywheel-py3_13-cuda-aarch64 + build_environment: linux-aarch64-binary-manywheel + timeout-minutes: 420 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + manywheel-py3_13-cuda-aarch64-upload: # Uploading + if: ${{ github.repository_owner == 'pytorch' }} + permissions: + id-token: write + contents: read + needs: manywheel-py3_13-cuda-aarch64-build + with: + PYTORCH_ROOT: /pytorch + BUILDER_ROOT: /builder + PACKAGE_TYPE: manywheel + # TODO: This is a legacy variable that we eventually want to get rid of in + # favor of GPU_ARCH_VERSION + DESIRED_CUDA: cu124 + GPU_ARCH_TYPE: cuda-aarch64 + DOCKER_IMAGE: pytorch/manylinuxaarch64-builder:cuda12.4-main + DESIRED_DEVTOOLSET: cxx11-abi + use_split_build: False + DESIRED_PYTHON: "3.13" + build_name: manywheel-py3_13-cuda-aarch64 + secrets: + github-token: ${{ secrets.GITHUB_TOKEN }} + conda-pytorchbot-token: ${{ secrets.CONDA_PYTORCHBOT_TOKEN }} + conda-pytorchbot-token-test: ${{ secrets.CONDA_PYTORCHBOT_TOKEN_TEST }} + uses: ./.github/workflows/_binary-upload.yml From 299dbcde610c924c49836da5acf149707b0c8d79 Mon Sep 17 00:00:00 2001 From: chuanqiw Date: Tue, 5 Nov 2024 01:23:21 +0000 Subject: [PATCH 054/503] [CI] Fix xpu ci test with s3 cache (#139604) Fix a regression caused by https://github.com/pytorch/pytorch/pull/121323 Works for #114850 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139604 Approved by: https://github.com/atalman, https://github.com/malfet --- .github/workflows/_xpu-test.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/_xpu-test.yml b/.github/workflows/_xpu-test.yml index 036a2c8eeca85f..6b80b1767e26f0 100644 --- a/.github/workflows/_xpu-test.yml +++ b/.github/workflows/_xpu-test.yml @@ -152,6 +152,8 @@ jobs: NUM_TEST_SHARDS: ${{ matrix.num_shards }} REENABLED_ISSUES: ${{ steps.keep-going.outputs.reenabled-issues }} SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 + SCCACHE_REGION: us-east-1 + SCCACHE_S3_KEY_PREFIX: ${{ github.workflow }} DOCKER_IMAGE: ${{ inputs.docker-image }} XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }} @@ -159,6 +161,8 @@ jobs: TESTS_TO_INCLUDE: ${{ inputs.tests-to-include }} timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }} run: | + # Fetch aws credential from IMDs + eval "$(python3 .github/scripts/get_aws_session_tokens.py)" set -x TEST_COMMAND=.ci/pytorch/test.sh @@ -181,6 +185,9 @@ jobs: -e BRANCH \ -e SHA1 \ -e AWS_DEFAULT_REGION \ + -e AWS_ACCESS_KEY_ID \ + -e AWS_SECRET_ACCESS_KEY \ + -e AWS_SESSION_TOKEN \ -e IN_WHEEL_TEST \ -e SHARD_NUMBER \ -e TEST_CONFIG \ @@ -195,6 +202,8 @@ jobs: -e NO_TD \ -e MAX_JOBS="$(nproc --ignore=2)" \ -e SCCACHE_BUCKET \ + -e SCCACHE_REGION \ + -e SCCACHE_S3_KEY_PREFIX \ -e XLA_CLANG_CACHE_S3_BUCKET_NAME \ -e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \ -e PYTORCH_TEST_RERUN_DISABLED_TESTS \ From c92de3b5dfa36b974fb1a65b193d55a0f91ba4ec Mon Sep 17 00:00:00 2001 From: CaoE Date: Tue, 5 Nov 2024 01:26:25 +0000 Subject: [PATCH 055/503] Add BRGEMM API versioning to be compatible with different oneDNN versions (#138184) oneDNN v3.6 updated the ukernel APIs of `brgemm` and `brgemm_pack_B`. Considering the upgrade of oneDNN, ukernel API versioning is needed to be compatible with different oneDNN versions. Pull Request resolved: https://github.com/pytorch/pytorch/pull/138184 Approved by: https://github.com/jgong5, https://github.com/peterbell10 --- aten/src/ATen/native/CPUBlas.cpp | 100 +++++++++++------- aten/src/ATen/native/CPUBlas.h | 5 +- .../ATen/native/cpu/FlashAttentionKernel.cpp | 12 +-- 3 files changed, 71 insertions(+), 46 deletions(-) diff --git a/aten/src/ATen/native/CPUBlas.cpp b/aten/src/ATen/native/CPUBlas.cpp index a074c854ab7bd7..a593ad10468529 100644 --- a/aten/src/ATen/native/CPUBlas.cpp +++ b/aten/src/ATen/native/CPUBlas.cpp @@ -45,12 +45,21 @@ extern "C" void zaxpy_(int *n, void *a, const void *x, int *incx, void *y, int * #endif // USE_FBGEMM #if AT_MKLDNN_ENABLED() -#include -#endif // oneDNN - -#define ONEDNN_UKERNEL_ENABLED (DNNL_VERSION_MAJOR >=3 && DNNL_VERSION_MINOR >=5) +#include +// Add uKernel API versioning to be compatible with different oneDNN versions +// oneDNN 3.6.x updates the ukernel APIs of brgemm and brgemm_pack_B +// brgemm_pack_B is changed to transform and the setting of brgemm beta is changed to set_add_C +#if (IDEEP_VERSION_MAJOR == 3 && IDEEP_VERSION_MINOR == 5) +#define ONEDNN_UKERNEL_1 +#elif (IDEEP_VERSION_MAJOR >= 3 && IDEEP_VERSION_MINOR >= 6) +#define ONEDNN_UKERNEL_2 +#endif +#if ((defined(ONEDNN_UKERNEL_1) || defined(ONEDNN_UKERNEL_2)) && (defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC)))) +#define ONEDNN_UKERNEL_ENABLED +#endif +#endif // AT_MKLDNN_ENABLED() -#if ONEDNN_UKERNEL_ENABLED && (defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC))) +#if defined(ONEDNN_UKERNEL_ENABLED) #include #include #endif // oneDNN BRGEMM @@ -847,7 +856,7 @@ void copy(int64_t n, const c10::complex *x, int64_t incx, c10::complex std::size_t UnsafeUkernelKeyHasher::operator()(const BrgemmKey& key) const { - // Use beta, M, N, and K to compute hash to reduce the overhead as - // batch size, alpha, and data types are unlikely to change within the same kernel and - // leading dimensions are likely to be related to M, K, N or use fixed values. - std::size_t h = std::hash()(key.beta + 1); - h = std::hash()(key.M) ^ (h << 1); + // Use M, N, K add_C, and ldc to compute hash to reduce the overhead as + // batch size and data types are unlikely to change within the same kernel and + // lda/ldb are likely to be related to M, K, N or use fixed values. + std::size_t h = std::hash()(key.M); h = std::hash()(key.N) ^ (h << 1); h = std::hash()(key.K) ^ (h << 1); + h = std::hash()(key.add_C) ^ (h << 1); h = std::hash()(key.ldc) ^ (h << 1); return h; } @@ -1000,9 +1006,9 @@ struct GemmHelper { ScalarType dt_a, ScalarType dt_b, ScalarType dt_c, - const float alpha, - const float beta) { + const bool add_C) { // Create brgemm +#if defined(ONEDNN_UKERNEL_1) brg = dnnl::ukernel::brgemm( M, N, @@ -1014,8 +1020,23 @@ struct GemmHelper { get_dnnl_dtype(dt_a), get_dnnl_dtype(dt_b), get_dnnl_dtype(dt_c), - alpha, - beta); + 1, + add_C ? 1 : 0); +#elif defined(ONEDNN_UKERNEL_2) + brg = dnnl::ukernel::brgemm( + M, + N, + K, + bs, + ld_a, + ld_b, + ld_c, + get_dnnl_dtype(dt_a), + get_dnnl_dtype(dt_b), + get_dnnl_dtype(dt_c)); + brg.set_add_C(add_C); + brg.finalize(); +#endif // Create a scratchpad buffer for the brgemm execution scratchpad = std::vector(brg.get_scratchpad_size()); // Prepare default vector of pairs of tensors A and B offsets for each batch. @@ -1037,8 +1058,7 @@ struct Brgemm : public KernelCache { int64_t ld_a, int64_t ld_b, int64_t ld_c, - const float alpha, - const float beta, + const bool add_C, const scalar_t_a* A, const scalar_t_b* B, scalar_t_c* C) { @@ -1053,8 +1073,7 @@ struct Brgemm : public KernelCache { c10::CppTypeToScalarType::value, c10::CppTypeToScalarType::value, c10::CppTypeToScalarType::value, - alpha, - beta); + add_C); // Fetch/create GemmHelper object auto&& value = fetch_or_create(key, [&]() { auto&& v = std::make_shared( @@ -1068,13 +1087,14 @@ struct Brgemm : public KernelCache { c10::CppTypeToScalarType::value, c10::CppTypeToScalarType::value, c10::CppTypeToScalarType::value, - alpha, - beta); + add_C); (*v).brg.generate(); return std::move(v); }); if (get_current() != value) { +#if defined(ONEDNN_UKERNEL_1) dnnl::ukernel::brgemm::release_hw_context(); +#endif ((*value).brg).set_hw_context(); get_current() = value; } @@ -1099,7 +1119,11 @@ struct Brgemm : public KernelCache { } }; +#if defined(ONEDNN_UKERNEL_1) using pack_t = dnnl::ukernel::brgemm_pack_B; +#elif defined(ONEDNN_UKERNEL_2) +using pack_t = dnnl::ukernel::transform; +#endif struct Pack : public KernelCache { static inline void call( int64_t K, @@ -1113,7 +1137,11 @@ struct Pack : public KernelCache { auto&& key = PackKey(K, N, ld_in, ld_out, dt_in, dt_out); auto&& pack = fetch_or_create(key, [&]() { auto&& p = std::make_shared( +#if defined(ONEDNN_UKERNEL_1) K, N, ld_in, ld_out, get_dnnl_dtype(dt_in), get_dnnl_dtype(dt_out)); +#elif defined(ONEDNN_UKERNEL_2) + K, N, dnnl::ukernel::pack_type::no_trans, ld_in, ld_out, get_dnnl_dtype(dt_in), get_dnnl_dtype(dt_out)); +#endif if (need_pack(dt_in)) { (*p).generate(); } @@ -1146,15 +1174,14 @@ void brgemm( int64_t ld_a, int64_t ld_b, int64_t ld_c, - const float alpha, - const float beta, + const bool add_C, const at::Half* A, const at::Half* B, float* C) { -#if ONEDNN_UKERNEL_ENABLED && (defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC))) +#if defined(ONEDNN_UKERNEL_ENABLED) if (Brgemm::device_check(ScalarType::Half)) { Brgemm::call( - M, N, K, ld_a, ld_b, ld_c, alpha, beta, A, B, C); + M, N, K, ld_a, ld_b, ld_c, add_C, A, B, C); return; } #endif @@ -1163,8 +1190,9 @@ void brgemm( } void brgemm_release() { -#if ONEDNN_UKERNEL_ENABLED && (defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC))) +#if defined(ONEDNN_UKERNEL_ENABLED) dnnl::ukernel::brgemm::release_hw_context(); + Brgemm::get_current() = nullptr; #endif } @@ -1177,7 +1205,7 @@ void pack( ScalarType dt_out, const void* in, void* out) { -#if ONEDNN_UKERNEL_ENABLED && (defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC))) +#if defined(ONEDNN_UKERNEL_ENABLED) Pack::call(K, N, ld_in, ld_out, dt_in, dt_out, in, out); #else TORCH_CHECK(false, "pack is only supported on X64 with oneDNN ukernel enabled"); @@ -1185,7 +1213,7 @@ void pack( } bool need_pack(ScalarType dt_in) { -#if ONEDNN_UKERNEL_ENABLED && (defined(__x86_64__) || (defined(_M_X64) && !defined(_M_ARM64EC))) +#if defined(ONEDNN_UKERNEL_ENABLED) return Pack::need_pack(dt_in); #else return false; diff --git a/aten/src/ATen/native/CPUBlas.h b/aten/src/ATen/native/CPUBlas.h index 3518596ab053a1..d49fe40409f814 100644 --- a/aten/src/ATen/native/CPUBlas.h +++ b/aten/src/ATen/native/CPUBlas.h @@ -189,7 +189,7 @@ void copy(int64_t n, const c10::complex *x, int64_t incx, c10::complex 0, qk_reduced_data, value_reorder_ptr + i * num_head * kv_padding_size * rHeadSize + @@ -791,10 +789,10 @@ void cpu_flash_attention( // Move to the next query data_index_step(i, batchSize, j, num_head, k, qSlice); } + if (need_pack) { + cpublas::brgemm_release(); + } }); - if (need_pack) { - cpublas::brgemm_release(); - } } template From 5008d15ae9473aef0858bd5a8d181f666ca4ec26 Mon Sep 17 00:00:00 2001 From: cyy Date: Tue, 5 Nov 2024 01:58:09 +0000 Subject: [PATCH 056/503] [2/N] Remove usage of C array (#139589) Follows #139567 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139589 Approved by: https://github.com/ezyang --- torch/csrc/Device.cpp | 12 ++++++------ torch/csrc/Dtype.cpp | 12 ++++++------ torch/csrc/Exceptions.cpp | 17 ++++++++--------- torch/csrc/Stream.cpp | 18 +++++++++--------- torch/csrc/TypeInfo.cpp | 26 ++++++++------------------ 5 files changed, 37 insertions(+), 48 deletions(-) diff --git a/torch/csrc/Device.cpp b/torch/csrc/Device.cpp index c2b2f1d93171f8..6e84d49539c574 100644 --- a/torch/csrc/Device.cpp +++ b/torch/csrc/Device.cpp @@ -219,14 +219,12 @@ typedef PyObject* (*getter)(PyObject*, void*); // NB: If you edit these properties/methods, update torch/_C/__init__.pyi.in -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays) -static struct PyGetSetDef THPDevice_properties[] = { +static const std::initializer_list THPDevice_properties = { {"type", (getter)THPDevice_type, nullptr, nullptr, nullptr}, {"index", (getter)THPDevice_index, nullptr, nullptr, nullptr}, {nullptr}}; -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays) -static PyMethodDef THPDevice_methods[] = { +static const std::initializer_list THPDevice_methods = { {"__reduce__", THPDevice_reduce, METH_NOARGS, nullptr}, {"__enter__", THPDevice_enter, METH_NOARGS, nullptr}, {"__exit__", THPDevice_exit, METH_VARARGS, nullptr}, @@ -266,9 +264,11 @@ PyTypeObject THPDeviceType = { 0, /* tp_weaklistoffset */ nullptr, /* tp_iter */ nullptr, /* tp_iternext */ - THPDevice_methods, /* tp_methods */ + // NOLINTNEXTLINE(*const-cast) + const_cast(std::data(THPDevice_methods)), /* tp_methods */ nullptr, /* tp_members */ - THPDevice_properties, /* tp_getset */ + // NOLINTNEXTLINE(*const-cast) + const_cast(std::data(THPDevice_properties)), /* tp_getset */ nullptr, /* tp_base */ nullptr, /* tp_dict */ nullptr, /* tp_descr_get */ diff --git a/torch/csrc/Dtype.cpp b/torch/csrc/Dtype.cpp index 9a1b00c9727925..f1298e368de2df 100644 --- a/torch/csrc/Dtype.cpp +++ b/torch/csrc/Dtype.cpp @@ -98,8 +98,7 @@ static PyObject* THPDtype_to_complex(PyObject* _self, PyObject* noargs) { typedef PyObject* (*getter)(PyObject*, void*); -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays) -static struct PyGetSetDef THPDtype_properties[] = { +static const std::initializer_list THPDtype_properties = { {"is_floating_point", (getter)THPDtype_is_floating_point, nullptr, @@ -110,8 +109,7 @@ static struct PyGetSetDef THPDtype_properties[] = { {"itemsize", (getter)THPDtype_itemsize, nullptr, nullptr, nullptr}, {nullptr}}; -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays) -static PyMethodDef THPDtype_methods[] = { +static const std::initializer_list THPDtype_methods = { {"__reduce__", THPDtype_reduce, METH_NOARGS, nullptr}, {"to_real", THPDtype_to_real, METH_NOARGS, nullptr}, {"to_complex", THPDtype_to_complex, METH_NOARGS, nullptr}, @@ -150,9 +148,11 @@ PyTypeObject THPDtypeType = { 0, /* tp_weaklistoffset */ nullptr, /* tp_iter */ nullptr, /* tp_iternext */ - THPDtype_methods, /* tp_methods */ + // NOLINTNEXTLINE(*const-cast) + const_cast(std::data(THPDtype_methods)), /* tp_methods */ nullptr, /* tp_members */ - THPDtype_properties, /* tp_getset */ + // NOLINTNEXTLINE(*const-cast) + const_cast(std::data(THPDtype_properties)), /* tp_getset */ nullptr, /* tp_base */ nullptr, /* tp_dict */ nullptr, /* tp_descr_get */ diff --git a/torch/csrc/Exceptions.cpp b/torch/csrc/Exceptions.cpp index a33717fe82175f..5258181262da68 100644 --- a/torch/csrc/Exceptions.cpp +++ b/torch/csrc/Exceptions.cpp @@ -204,15 +204,14 @@ std::string processErrorMsg(std::string str) { } static std::string formatMessage(const char* format, va_list fmt_args) { - static const size_t ERROR_BUF_SIZE = 1024; - // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays) - char error_buf[ERROR_BUF_SIZE]; - vsnprintf(error_buf, ERROR_BUF_SIZE, format, fmt_args); - - // Ensure that the string is null terminated - error_buf[sizeof(error_buf) / sizeof(*error_buf) - 1] = 0; - - return std::string(error_buf); + constexpr size_t ERROR_BUF_SIZE = 1024; + std::string error_buf(ERROR_BUF_SIZE, '\0'); + auto res = vsnprintf(error_buf.data(), ERROR_BUF_SIZE, format, fmt_args); + if (res < 0) { + res = 0; + } + error_buf.resize(res); + return error_buf; } void translate_exception_to_python(const std::exception_ptr& e_ptr) { diff --git a/torch/csrc/Stream.cpp b/torch/csrc/Stream.cpp index 27dd4b3fcb1318..1fbcd74153522b 100644 --- a/torch/csrc/Stream.cpp +++ b/torch/csrc/Stream.cpp @@ -289,8 +289,7 @@ static PyObject* THPStream_richcompare( return result; } -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables) -static struct PyMemberDef THPStream_members[] = { +static const std::initializer_list THPStream_members = { {"stream_id", T_LONGLONG, offsetof(THPStream, stream_id), @@ -308,13 +307,11 @@ static struct PyMemberDef THPStream_members[] = { nullptr}, {nullptr}}; -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables) -static struct PyGetSetDef THPStream_properties[] = { +static const std::initializer_list THPStream_properties = { {"device", (getter)THPStream_get_device, nullptr, nullptr, nullptr}, {nullptr}}; -// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables) -static PyMethodDef THPStream_methods[] = { +static const std::initializer_list THPStream_methods = { {"query", THPStream_query, METH_NOARGS, nullptr}, {"synchronize", THPStream_synchronize, METH_NOARGS, nullptr}, {"wait_event", THPStream_wait_event, METH_O, nullptr}, @@ -355,9 +352,12 @@ static PyTypeObject THPStreamType = { 0, /* tp_weaklistoffset */ nullptr, /* tp_iter */ nullptr, /* tp_iternext */ - THPStream_methods, /* tp_methods */ - THPStream_members, /* tp_members */ - THPStream_properties, /* tp_getset */ + // NOLINTNEXTLINE(*const-cast) + const_cast(std::data(THPStream_methods)), /* tp_methods */ + // NOLINTNEXTLINE(*const-cast) + const_cast(std::data(THPStream_members)), /* tp_members */ + // NOLINTNEXTLINE(*const-cast) + const_cast(std::data(THPStream_properties)), /* tp_getset */ nullptr, /* tp_base */ nullptr, /* tp_dict */ nullptr, /* tp_descr_get */ diff --git a/torch/csrc/TypeInfo.cpp b/torch/csrc/TypeInfo.cpp index e6a03e5d326a2e..479d88ac206684 100644 --- a/torch/csrc/TypeInfo.cpp +++ b/torch/csrc/TypeInfo.cpp @@ -273,8 +273,7 @@ static PyObject* THPIInfo_str(THPIInfo* self) { return !PyErr_Occurred() ? THPUtils_packString(oss.str().c_str()) : nullptr; } -// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables,cppcoreguidelines-avoid-c-arrays) -static struct PyGetSetDef THPFInfo_properties[] = { +static const std::initializer_list THPFInfo_properties = { {"bits", (getter)THPDTypeInfo_bits, nullptr, nullptr, nullptr}, {"eps", (getter)THPFInfo_eps, nullptr, nullptr, nullptr}, {"max", (getter)THPFInfo_max, nullptr, nullptr, nullptr}, @@ -289,11 +288,6 @@ static struct PyGetSetDef THPFInfo_properties[] = { {"dtype", (getter)THPFInfo_dtype, nullptr, nullptr, nullptr}, {nullptr}}; -// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables,cppcoreguidelines-avoid-c-arrays) -static PyMethodDef THPFInfo_methods[] = { - {nullptr} /* Sentinel */ -}; - PyTypeObject THPFInfoType = { PyVarObject_HEAD_INIT(nullptr, 0) "torch.finfo", /* tp_name */ @@ -322,9 +316,10 @@ PyTypeObject THPFInfoType = { 0, /* tp_weaklistoffset */ nullptr, /* tp_iter */ nullptr, /* tp_iternext */ - THPFInfo_methods, /* tp_methods */ + nullptr, /* tp_methods */ nullptr, /* tp_members */ - THPFInfo_properties, /* tp_getset */ + // NOLINTNEXTLINE(*const-cast) + const_cast(std::data(THPFInfo_properties)), /* tp_getset */ nullptr, /* tp_base */ nullptr, /* tp_dict */ nullptr, /* tp_descr_get */ @@ -335,19 +330,13 @@ PyTypeObject THPFInfoType = { THPFInfo_pynew, /* tp_new */ }; -// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables,cppcoreguidelines-avoid-c-arrays) -static struct PyGetSetDef THPIInfo_properties[] = { +static const std::initializer_list THPIInfo_properties = { {"bits", (getter)THPDTypeInfo_bits, nullptr, nullptr, nullptr}, {"max", (getter)THPIInfo_max, nullptr, nullptr, nullptr}, {"min", (getter)THPIInfo_min, nullptr, nullptr, nullptr}, {"dtype", (getter)THPIInfo_dtype, nullptr, nullptr, nullptr}, {nullptr}}; -// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables,cppcoreguidelines-avoid-c-arrays) -static PyMethodDef THPIInfo_methods[] = { - {nullptr} /* Sentinel */ -}; - PyTypeObject THPIInfoType = { PyVarObject_HEAD_INIT(nullptr, 0) "torch.iinfo", /* tp_name */ @@ -376,9 +365,10 @@ PyTypeObject THPIInfoType = { 0, /* tp_weaklistoffset */ nullptr, /* tp_iter */ nullptr, /* tp_iternext */ - THPIInfo_methods, /* tp_methods */ + nullptr, /* tp_methods */ nullptr, /* tp_members */ - THPIInfo_properties, /* tp_getset */ + // NOLINTNEXTLINE(*const-cast) + const_cast(std::data(THPIInfo_properties)), /* tp_getset */ nullptr, /* tp_base */ nullptr, /* tp_dict */ nullptr, /* tp_descr_get */ From 9039fbb47ecfc93df74a014a209e5929d10fd2a3 Mon Sep 17 00:00:00 2001 From: Andrew Gu Date: Mon, 4 Nov 2024 10:59:50 -0800 Subject: [PATCH 057/503] [FSDP2] Make module-to-state mapping use weakrefs (#139650) Without this, `del model` does not free memory of a module with FSDP2 applied. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139650 Approved by: https://github.com/yf225 --- .../fsdp/test_fully_shard_memory.py | 31 +++++++++++++++++++ torch/distributed/_composable_state.py | 15 ++++++--- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/test/distributed/_composable/fsdp/test_fully_shard_memory.py b/test/distributed/_composable/fsdp/test_fully_shard_memory.py index 7dba4ce7350897..88e00e66c5e3e0 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_memory.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_memory.py @@ -1,6 +1,7 @@ # Owner(s): ["oncall: distributed"] import functools +import gc import torch from torch.distributed._composable.fsdp import ( @@ -197,6 +198,36 @@ def _test_fully_shard_training_memory( expected_mem_mb += (2 * model_sharded_numel) * 4 / 1e6 + buffer_mb self.assertLessEqual(mem_mb - base_mem_mb, expected_mem_mb) + @skip_if_lt_x_gpu(2) + def test_fully_shard_del_memory(self): + base_mem_mb = self._get_peak_active_memory_mb() + vocab_size = 32 + model_args = ModelArgs( + vocab_size=vocab_size, n_layers=3, dim=768, n_heads=12, weight_tying=False + ) + model = Transformer(model_args) + # Initializing the model on CPU should not change the GPU memory usage + post_model_init_mem_mb = self._get_peak_active_memory_mb() + self.assertEqual(base_mem_mb, post_model_init_mem_mb) + + for module in model.modules(): + if isinstance(module, TransformerBlock): + fully_shard(module) + fully_shard(model) + unsharded_numel = sum(p.numel() for p in model.parameters()) + sharded_numel = unsharded_numel // self.world_size + buffer_mb = 4 + mem_mb = self._get_curr_active_memory_mb() + expected_mb = sharded_numel * 4 / 1e6 + buffer_mb + self.assertLessEqual(mem_mb - base_mem_mb, expected_mb) + + # Deleting the model should free all of the FSDP-managed GPU memory + del model + # Manually call garbage collection since there are ref cycles in FSDP + gc.collect() + mem_mb = self._get_curr_active_memory_mb() + self.assertEqual(mem_mb, base_mem_mb) + def _get_peak_active_memory_mb(self) -> int: mem_stats = torch.cuda.memory_stats() return round(mem_stats["active_bytes.all.peak"] / 1e6) diff --git a/torch/distributed/_composable_state.py b/torch/distributed/_composable_state.py index f50da98f8c63e2..6d2b8baed766ff 100644 --- a/torch/distributed/_composable_state.py +++ b/torch/distributed/_composable_state.py @@ -1,4 +1,5 @@ -from typing import cast, Dict, Optional +import weakref +from typing import cast, Optional import torch.nn as nn @@ -7,13 +8,15 @@ class _State: pass -_module_state_mapping: Dict[nn.Module, _State] = {} +_module_state_mapping: weakref.WeakKeyDictionary[ + nn.Module, weakref.ReferenceType[_State] +] = weakref.WeakKeyDictionary() def _insert_module_state(module: nn.Module, state: _State) -> None: global _module_state_mapping assert module not in _module_state_mapping, f"Inserting {module} more than once." - _module_state_mapping[module] = state + _module_state_mapping[module] = weakref.ref(state) def _get_module_state(module: nn.Module) -> Optional[_State]: @@ -32,6 +35,10 @@ def _get_module_state(module: nn.Module) -> Optional[_State]: else: # https://github.com/pytorch/pytorch/issues/107054 if module in _module_state_mapping: - return _module_state_mapping[module] + state_ref = _module_state_mapping[module] + state = state_ref() + if state is None: + raise AssertionError("State has already been garbage collected") + return state else: return None From 1e9390a30ac29ee3a4a75c184059c5c4cb3d5f0b Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 5 Nov 2024 03:25:33 +0000 Subject: [PATCH 058/503] Add setuptools and wheel to cp312, cp313 and cp313t for Manylinux2_28 builds (#139636) Install setuptools and wheel dependencies for cp312, cp313, cp313t on Manylinux 2_28 images. This should resolve ``` ModuleNotFoundError: No module named 'setuptools' ``` On PR: https://github.com/pytorch/pytorch/pull/138732 This issue was addressed on XPU images already. We should apply the same fix for the rest of the images instead of keeping it XPU specific. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139636 Approved by: https://github.com/huydhn, https://github.com/chuanqi129 --- .ci/docker/manywheel/Dockerfile_2_28 | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.ci/docker/manywheel/Dockerfile_2_28 b/.ci/docker/manywheel/Dockerfile_2_28 index 655dc363548052..2e2998c6770ed3 100644 --- a/.ci/docker/manywheel/Dockerfile_2_28 +++ b/.ci/docker/manywheel/Dockerfile_2_28 @@ -120,6 +120,11 @@ ARG DEVTOOLSET_VERSION=11 # Ensure the expected devtoolset is used ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH +# Install setuptools and wheel for python 3.12/3.13 +RUN for cpython_version in "cp312-cp312" "cp313-cp313" "cp313-cp313t"; do \ + /opt/python/${cpython_version}/bin/python -m pip install setuptools wheel; \ + done; + # cmake-3.18.4 from pip RUN yum install -y python3-pip && \ @@ -152,8 +157,6 @@ ENV XPU_DRIVER_TYPE ROLLING # cmake-3.28.4 from pip RUN python3 -m pip install --upgrade pip && \ python3 -mpip install cmake==3.28.4 -# Install setuptools and wheel for python 3.13 -RUN /opt/python/cp313-cp313/bin/python -m pip install setuptools wheel ADD ./common/install_xpu.sh install_xpu.sh RUN bash ./install_xpu.sh && rm install_xpu.sh RUN pushd /opt/_internal && tar -xJf static-libs-for-embedding-only.tar.xz && popd From a766d84a3c1fe78f246c8e4da2f85b249824151b Mon Sep 17 00:00:00 2001 From: Gabriel Ferns Date: Tue, 5 Nov 2024 03:44:07 +0000 Subject: [PATCH 059/503] Allow inplacing buffer when other users are inconsequential (#138383) Summary: I think we can inplace a buffer if all of the users of said buffer are "inconsequential", defined as having been removed, being completed, or being part of the ancestors set. In particular, this allows LayerNorm to inplace its input buffer. Implements: https://github.com/pytorch/pytorch/issues/132826 Test Plan: New unit test of matmul followed by LayerNorm, make sure there's an inplaced buffer. Pull Request resolved: https://github.com/pytorch/pytorch/pull/138383 Approved by: https://github.com/eellison --- test/inductor/test_benchmark_fusion.py | 4 ++-- test/inductor/test_cooperative_reductions.py | 2 +- test/inductor/test_torchinductor.py | 24 +++++++++++++------- torch/_inductor/scheduler.py | 15 +++++++----- 4 files changed, 28 insertions(+), 17 deletions(-) diff --git a/test/inductor/test_benchmark_fusion.py b/test/inductor/test_benchmark_fusion.py index 8f8fbcd9274d95..f72be2373f43ce 100644 --- a/test/inductor/test_benchmark_fusion.py +++ b/test/inductor/test_benchmark_fusion.py @@ -169,8 +169,8 @@ def foo(m, inp): for c in out_code[0], out_code2[0]: FileCheck().check("async_compile.wait").check("DeviceGuard").check_count( - "empty_strided_cuda", 2, exactly=True - ).check("return").run(c) + "empty_strided_cuda", 1, exactly=True + ).check_regex("buf[0-9]* = buf[0-9]*; del buf[0-9]*").check("return").run(c) def test_tield_kernel_fusion(self): def f(x): diff --git a/test/inductor/test_cooperative_reductions.py b/test/inductor/test_cooperative_reductions.py index aeefced18c3fbc..700865c381d4ad 100644 --- a/test/inductor/test_cooperative_reductions.py +++ b/test/inductor/test_cooperative_reductions.py @@ -99,7 +99,7 @@ def fn(x): if "async_compile.multi_kernel" in source_code: return self.assertEqual(source_code.count("triton_helpers.x_grid_barrier"), 16) - self.assertEqual(source_code.count("empty_strided_cuda"), 8) + self.assertEqual(source_code.count("empty_strided_cuda"), 5) def test_reduce_split(self): def fn(a, b): diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py index de0d12408ff163..46e1c27e91acc1 100644 --- a/test/inductor/test_torchinductor.py +++ b/test/inductor/test_torchinductor.py @@ -12700,18 +12700,26 @@ def fn(x): _, (code,) = run_and_get_code(torch.compile(fn), inp) FileCheck().check("copy_").check_same("True").run(code) - @config.patch(inplace_buffers=True) - def test_layer_norm_should_not_inplace(self): - # https://github.com/pytorch/pytorch/issues/120217 - D = 16 + def test_layer_norm_inplaces_after_matmul(self): + # https://github.com/pytorch/pytorch/issues/132826 + batch_size = 32 + seq_length = 50 + hidden_size = 768 - def fn(x): - return nn.LayerNorm([D], dtype=torch.float16)(x) + layer_norm = torch.nn.LayerNorm(hidden_size, device=GPU_TYPE) + + def fn(inp, weight): + matmul_output = inp @ weight + final_output = layer_norm(matmul_output) + return final_output - inps = [torch.rand(D, dtype=torch.float16)] + inps = [ + torch.randn(batch_size, seq_length, hidden_size, device=GPU_TYPE), + torch.randn(hidden_size, hidden_size, device=GPU_TYPE), + ] fn_opt = torch.compile(fn) code = run_and_get_triton_code(fn_opt, *inps) - self.assertTrue("in_out_ptr" not in code) + self.assertTrue(len(re.findall(r"in_out_ptr\d+", code)) > 0) self.assertEqual(fn_opt(*inps), fn(*inps)) class RNNTest(TestCase): diff --git a/torch/_inductor/scheduler.py b/torch/_inductor/scheduler.py index 3fc04a14d0fffc..020e2792509b19 100644 --- a/torch/_inductor/scheduler.py +++ b/torch/_inductor/scheduler.py @@ -411,6 +411,14 @@ def decide_inplace_update(self) -> None: for node in self.scheduler.name_to_fused_node[self.get_name()].get_nodes() } + ordered_reads = sorted(self.read_writes.reads, key=lambda x: x.name) + # NOTE remove V.graph.removed_operations once deps issue is fixed + inconsequential_nodes = ( + self.ancestors + | V.graph.removed_operations + | self.scheduler.completed_operations + ) + for buf in self.get_outputs(): buf_node = buf.node assert buf_node is not None @@ -431,16 +439,11 @@ def decide_inplace_update(self) -> None: and V.graph.wrapper_code.can_reuse(input_buf, self) and not isinstance(input_buf.defining_op, NopKernelSchedulerNode) ): - # If the writers of input_buf are in the same FusedSchedulerNode as the current op, then there is - # no need to inplace. - if input_buf.defining_op.get_name() in fused_nodes: - continue - assert input_buf.users is not None remaining_uses = [ x for x in input_buf.users - if x.node.get_name() not in self.scheduler.completed_operations + if x.node.get_name() not in inconsequential_nodes ] if ( len(remaining_uses) == 1 From fdfd4c50bad8b7d12416d45233ad990c45cf7ef9 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Tue, 5 Nov 2024 04:48:12 +0000 Subject: [PATCH 060/503] Assign owners to periodic and slow jobs (#139519) As an outcome of https://fburl.com/gdoc/voce5o06, I want to assign owner(s) to any periodic or slows job that are still needed but couldn't run more frequently (too $$$, capacity constraint, don't fail that often). They include: * multigpu * debug build * ROCm (distributed, slow) @malfet @soulitzer I put down your names as the owners of debug build and slowgradcheck respectively. Please let me know if you are ok with that, or if you have a better option in mind. Any jobs there without an owner are owned by us (PT Dev Infra) ### Testing The owners are show up in the job name https://hud.pytorch.org/pr/139519 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139519 Approved by: https://github.com/malfet --- .github/workflows/periodic.yml | 26 +++++++++++++------------- .github/workflows/slow.yml | 20 ++++++++++---------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml index c37d686afbc977..1063df69be5d65 100644 --- a/.github/workflows/periodic.yml +++ b/.github/workflows/periodic.yml @@ -121,7 +121,7 @@ jobs: cuda-arch-list: 8.6 test-matrix: | { include: [ - { config: "multigpu", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.12xlarge.nvidia.gpu" }, + { config: "multigpu", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] }, ]} build-with-debug: false @@ -145,11 +145,11 @@ jobs: build-with-debug: true test-matrix: | { include: [ - { config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, - { config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, - { config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, - { config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, - { config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" }, + { config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] }, + { config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] }, + { config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] }, + { config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] }, + { config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu", owners: ["oncall:debug-build"] }, ]} linux-focal-cuda11_8-py3_10-gcc9-debug-test: @@ -183,9 +183,9 @@ jobs: docker-image-name: pytorch-linux-focal-rocm-n-py3 test-matrix: | { include: [ - { config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu" }, - { config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu" }, - { config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu" }, + { config: "distributed", shard: 1, num_shards: 3, runner: "linux.rocm.gpu", owners: ["module:rocm", "oncall:distributed"] }, + { config: "distributed", shard: 2, num_shards: 3, runner: "linux.rocm.gpu", owners: ["module:rocm", "oncall:distributed"] }, + { config: "distributed", shard: 3, num_shards: 3, runner: "linux.rocm.gpu", owners: ["module:rocm", "oncall:distributed"] }, ]} linux-focal-rocm6_2-py3_10-test: @@ -246,7 +246,7 @@ jobs: cuda-arch-list: 8.6 test-matrix: | { include: [ - { config: "multigpu", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.12xlarge.nvidia.gpu" }, + { config: "multigpu", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g5.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] }, ]} build-with-debug: false @@ -274,9 +274,9 @@ jobs: cuda-arch-list: '7.5' test-matrix: | { include: [ - { config: "distributed", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" }, - { config: "distributed", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" }, - { config: "distributed", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" }, + { config: "distributed", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] }, + { config: "distributed", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] }, + { config: "distributed", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu", owners: ["oncall:distributed"] }, ]} linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build-test: diff --git a/.github/workflows/slow.yml b/.github/workflows/slow.yml index 6dcb616e54bb64..5aeaf0f13a8c17 100644 --- a/.github/workflows/slow.yml +++ b/.github/workflows/slow.yml @@ -58,14 +58,14 @@ jobs: cuda-arch-list: 8.6 test-matrix: | { include: [ - { config: "default", shard: 1, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "default", shard: 2, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "default", shard: 3, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "default", shard: 4, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "default", shard: 5, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "default", shard: 6, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "default", shard: 7, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu" }, - { config: "default", shard: 8, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu" }, + { config: "default", shard: 1, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + { config: "default", shard: 2, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + { config: "default", shard: 3, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + { config: "default", shard: 4, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + { config: "default", shard: 5, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + { config: "default", shard: 6, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + { config: "default", shard: 7, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + { config: "default", shard: 8, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, ]} linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-test: @@ -142,8 +142,8 @@ jobs: docker-image-name: pytorch-linux-focal-rocm-n-py3 test-matrix: | { include: [ - { config: "slow", shard: 1, num_shards: 2, runner: "linux.rocm.gpu" }, - { config: "slow", shard: 2, num_shards: 2, runner: "linux.rocm.gpu" }, + { config: "slow", shard: 1, num_shards: 2, runner: "linux.rocm.gpu", owners: ["module:rocm"] }, + { config: "slow", shard: 2, num_shards: 2, runner: "linux.rocm.gpu", owners: ["module:rocm"] }, ]} linux-focal-rocm6_2-py3_10-test: From fe4fa1df9fa981f31f85170e40a754479759267f Mon Sep 17 00:00:00 2001 From: Animesh Jain Date: Mon, 4 Nov 2024 12:50:20 -0800 Subject: [PATCH 061/503] [dynamo][eval_frame] Set the callback to None earlier for guard eval (#139655) xref - https://fb.workplace.com/groups/1075192433118967/permalink/1536570810314458/ Pull Request resolved: https://github.com/pytorch/pytorch/pull/139655 Approved by: https://github.com/jansel, https://github.com/williamwen42 --- torch/csrc/dynamo/eval_frame.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/torch/csrc/dynamo/eval_frame.c b/torch/csrc/dynamo/eval_frame.c index f59bf878879146..f59c8fa880c82a 100644 --- a/torch/csrc/dynamo/eval_frame.c +++ b/torch/csrc/dynamo/eval_frame.c @@ -611,6 +611,12 @@ static PyObject* _custom_eval_frame( PyObject* backend = get_backend(callback); + + // We don't run the current custom_eval_frame behavior for guards. + // So we temporarily set the callback to Py_None to drive the correct behavior + // in the shim. + eval_frame_callback_set(Py_None); + // A callback of Py_False indicates "run only" mode, the cache is checked, but // we never compile. // Also, if extra is marked as "cache_limit_hit", run in "run only" mode @@ -645,6 +651,8 @@ static PyObject* _custom_eval_frame( PyCodeObject* cached_code = (PyCodeObject*)maybe_cached_code; // used cached version DEBUG_TRACE("cache hit %s", get_frame_name(frame)); + // Re-enable custom behavior + eval_frame_callback_set(callback); *should_clear_frame = 1; return eval_custom_code(tstate, frame, cached_code, trace_annotation, throw_flag, 0); } @@ -652,11 +660,6 @@ static PyObject* _custom_eval_frame( DEBUG_CHECK(PyDict_CheckExact(frame->f_globals)); DEBUG_CHECK(PyDict_CheckExact(frame->f_builtins)); - // We don't run the current custom_eval_frame behavior for guards. - // So we temporarily set the callback to Py_None to drive the correct behavior - // in the shim. - eval_frame_callback_set(Py_None); - _PytorchRecordFunctionState* rf = _pytorch_record_function_enter(cache_lookup_profiler_str); PyObject* maybe_cached_code = NULL; const char* trace_annotation = ""; From c8a55eea889831db0d3d917cdb7bcfa235d86bd4 Mon Sep 17 00:00:00 2001 From: Meet Vadakkanchery Date: Tue, 5 Nov 2024 05:24:36 +0000 Subject: [PATCH 062/503] [DCP] Fix process_group logging for DCP methods (#139428) Summary: Currently, we incorrectly log process_group for DCP based events. We rely on [c10d_logger.py](https://fburl.com/v4mdme9z) to fill in information about process_group (e.g. backend, nccl_version if available). In [checkpoint/logger.py](https://fburl.com/yho9nqbu) we pass the `msg_dict` to c10d_logger which never contains the `process_group` param, so [c10d_logger](https://fburl.com/zlw2ukxp) logs information about the default process_group which is always `NCCL`. Test Plan: Before: Always defaults to NCCL even though GLOO is passed by caller. {F1950847585} After: GLOO backend shows up. {F1950848375} Differential Revision: D65255871 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139428 Approved by: https://github.com/teja-rao, https://github.com/mhorowitz --- torch/distributed/checkpoint/logger.py | 2 +- torch/distributed/checkpoint/state_dict_loader.py | 1 + torch/distributed/checkpoint/state_dict_saver.py | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/torch/distributed/checkpoint/logger.py b/torch/distributed/checkpoint/logger.py index ee7ae4d9a5b949..ee617e9323db64 100644 --- a/torch/distributed/checkpoint/logger.py +++ b/torch/distributed/checkpoint/logger.py @@ -54,7 +54,7 @@ def _msg_dict_from_dcp_method_args(*args, **kwargs) -> Dict[str, Any]: def _get_msg_dict(func_name, *args, **kwargs) -> Dict[str, Any]: msg_dict = _msg_dict_from_dcp_method_args(*args, **kwargs) - msg_dict.update(c10d_logger._get_msg_dict(func_name, **msg_dict)) + msg_dict.update(c10d_logger._get_msg_dict(func_name, *args, **kwargs)) return msg_dict diff --git a/torch/distributed/checkpoint/state_dict_loader.py b/torch/distributed/checkpoint/state_dict_loader.py index 299eae839db567..6a915b78487801 100644 --- a/torch/distributed/checkpoint/state_dict_loader.py +++ b/torch/distributed/checkpoint/state_dict_loader.py @@ -206,6 +206,7 @@ def _load_state_dict( ckpt_kwargs = {} if (ckpt_id := getattr(storage_reader, "checkpoint_id", None)) is not None: ckpt_kwargs["checkpoint_id"] = ckpt_id + ckpt_kwargs["process_group"] = distW.group @_dcp_method_logger(**ckpt_kwargs) def local_step(): diff --git a/torch/distributed/checkpoint/state_dict_saver.py b/torch/distributed/checkpoint/state_dict_saver.py index 307c6d8d4a9605..8df80c21b42d00 100644 --- a/torch/distributed/checkpoint/state_dict_saver.py +++ b/torch/distributed/checkpoint/state_dict_saver.py @@ -280,6 +280,7 @@ def _save_state_dict( ckpt_kwargs = {} if (ckpt_id := getattr(storage_writer, "checkpoint_id", None)) is not None: ckpt_kwargs["checkpoint_id"] = ckpt_id + ckpt_kwargs["process_group"] = distW.group @_dcp_method_logger(**ckpt_kwargs) def local_step(): From 9e14d865734a3a71712c1015755182ba437be5f8 Mon Sep 17 00:00:00 2001 From: CaoE Date: Tue, 5 Nov 2024 05:33:29 +0000 Subject: [PATCH 063/503] [Inductor][CPP] Add oneDNN BRGEMM config for Half cpp gemm template (#136255) `kernel_micro_gemm` generated using BRGEMM: ``` template inline void kernel_micro_gemm( const half* __restrict__ A, const half* __restrict__ B, float* __restrict__ C, int64_t M, int64_t N, int64_t K, int64_t lda, int64_t ldb, int64_t ldc ) { at::native::cpublas::brgemm( M, N, K, lda, ldb, ldc, 1.f, accum ? 1.f : 0.f, A, B, C); } ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/136255 Approved by: https://github.com/jgong5, https://github.com/jansel --- aten/src/ATen/cpu/Utils.cpp | 8 +++ aten/src/ATen/cpu/Utils.h | 3 + aten/src/ATen/native/CPUBlas.h | 2 +- test/inductor/test_cpu_select_algorithm.py | 14 ++++- torch/_C/_cpu.pyi | 1 + torch/_dynamo/trace_rules.py | 2 + torch/_inductor/codegen/cpp_gemm_template.py | 9 ++- torch/_inductor/codegen/cpp_micro_gemm.py | 63 ++++++++++++++++++++ torch/cpu/__init__.py | 5 ++ torch/csrc/cpu/Module.cpp | 1 + 10 files changed, 104 insertions(+), 4 deletions(-) diff --git a/aten/src/ATen/cpu/Utils.cpp b/aten/src/ATen/cpu/Utils.cpp index 60ad78143f7334..b7b99e50d91b7b 100644 --- a/aten/src/ATen/cpu/Utils.cpp +++ b/aten/src/ATen/cpu/Utils.cpp @@ -49,6 +49,14 @@ bool is_amx_tile_supported() { #endif } +bool is_amx_fp16_supported() { +#if !defined(__s390x__) && !defined(__powerpc__) + return is_amx_tile_supported() && cpuinfo_has_x86_amx_fp16(); +#else + return false; +#endif +} + bool init_amx() { if (!is_amx_tile_supported()) { return false; diff --git a/aten/src/ATen/cpu/Utils.h b/aten/src/ATen/cpu/Utils.h index 27f9be3b3ffd02..1214e1e0ce6d9a 100644 --- a/aten/src/ATen/cpu/Utils.h +++ b/aten/src/ATen/cpu/Utils.h @@ -18,6 +18,9 @@ TORCH_API bool is_avx512_bf16_supported(); // Detect if CPU support Advanced Matrix Extension. TORCH_API bool is_amx_tile_supported(); +// Detect if CPU support Advanced Matrix Extension for fp16. +TORCH_API bool is_amx_fp16_supported(); + // Enable the system to use AMX instructions. TORCH_API bool init_amx(); diff --git a/aten/src/ATen/native/CPUBlas.h b/aten/src/ATen/native/CPUBlas.h index d49fe40409f814..16bcb246dc69df 100644 --- a/aten/src/ATen/native/CPUBlas.h +++ b/aten/src/ATen/native/CPUBlas.h @@ -206,7 +206,7 @@ TORCH_API void brgemm( float* C); // Release brgemm hardware context -void brgemm_release(); +TORCH_API void brgemm_release(); // Pack B matrix to get better performance if needed void pack( diff --git a/test/inductor/test_cpu_select_algorithm.py b/test/inductor/test_cpu_select_algorithm.py index 5a3355291e3c39..d3155808b09329 100644 --- a/test/inductor/test_cpu_select_algorithm.py +++ b/test/inductor/test_cpu_select_algorithm.py @@ -134,6 +134,12 @@ def _check_amx_counter(self, vec_amx): else: self.assertEqual(counters["inductor"]["cpp_micro_gemm_amx_counter"], 0) + def _check_brgemm_counter(self, vec_amx): + if vec_amx and torch.cpu._is_amx_fp16_supported(): + self.assertTrue(counters["inductor"]["cpp_micro_brgemm_counter"] > 0) + else: + self.assertEqual(counters["inductor"]["cpp_micro_brgemm_counter"], 0) + class TestSelectAlgorithm(BaseTestSelectAlgorithm): common = check_model @@ -800,7 +806,7 @@ def forward(self, arg7_1): @parametrize("in_features", (1024,)) @parametrize("out_features", (1024, 1025)) @parametrize("bias", (True, False)) - @dtypes(torch.bfloat16) + @dtypes(torch.bfloat16, torch.half) def test_linear_amx(self, batch_size, in_features, out_features, bias, dtype): class M(torch.nn.Module): def __init__(self, bias): @@ -817,7 +823,11 @@ def forward(self, x): self.common(mod, (v,), atol=atol, rtol=rtol) self.assertEqual(counters["inductor"]["select_algorithm_autotune"], 1) vec_amx = VecAMX() - self._check_amx_counter(vec_amx) + # Currently brgemm config is only added for half + if dtype == torch.half: + self._check_brgemm_counter(vec_amx) + else: + self._check_amx_counter(vec_amx) @inductor_config.patch({"freezing": True}) @patches diff --git a/torch/_C/_cpu.pyi b/torch/_C/_cpu.pyi index ddd9c4a95ec0af..f03164bfa00de9 100644 --- a/torch/_C/_cpu.pyi +++ b/torch/_C/_cpu.pyi @@ -7,6 +7,7 @@ def _is_avx512_supported() -> _bool: ... def _is_avx512_vnni_supported() -> _bool: ... def _is_avx512_bf16_supported() -> _bool: ... def _is_amx_tile_supported() -> _bool: ... +def _is_amx_fp16_supported() -> _bool: ... def _init_amx() -> _bool: ... def _is_arm_sve_supported() -> _bool: ... def _L1d_cache_size() -> _int: ... diff --git a/torch/_dynamo/trace_rules.py b/torch/_dynamo/trace_rules.py index 79887c7bd92a80..b46d8719c54c61 100644 --- a/torch/_dynamo/trace_rules.py +++ b/torch/_dynamo/trace_rules.py @@ -422,6 +422,7 @@ "torch._C._cpu._is_avx512_vnni_supported", "torch._C._cpu._is_avx512_bf16_supported", "torch._C._cpu._is_amx_tile_supported", + "torch._C._cpu._is_amx_fp16_supported", "torch._C._cpu._init_amx", "torch._C._cpu._is_arm_sve_supported", "torch._C._crash_if_aten_asan", @@ -2449,6 +2450,7 @@ "torch._C._cpu._is_avx512_vnni_supported", "torch._C._cpu._is_avx512_bf16_supported", "torch._C._cpu._is_amx_tile_supported", + "torch._C._cpu._is_amx_fp16_supported", "torch.cpu._init_amx", "torch._C._cpu._is_arm_sve_supported", "torch.cpu.current_device", diff --git a/torch/_inductor/codegen/cpp_gemm_template.py b/torch/_inductor/codegen/cpp_gemm_template.py index dcb582488f8312..f1b0b2c30b9a9b 100644 --- a/torch/_inductor/codegen/cpp_gemm_template.py +++ b/torch/_inductor/codegen/cpp_gemm_template.py @@ -22,7 +22,12 @@ ) from ..virtualized import ops, V from .cpp import get_export_declaration -from .cpp_micro_gemm import CppMicroGemmAMX, create_micro_gemm, LayoutType +from .cpp_micro_gemm import ( + CppMicroBrgemm, + CppMicroGemmAMX, + create_micro_gemm, + LayoutType, +) from .cpp_template import CppTemplate from .cpp_template_kernel import CppTemplateKernel from .cpp_utils import ( @@ -1070,6 +1075,8 @@ def get_reindexer(epilogue_node): self.log_blockings() if isinstance(micro_gemm, CppMicroGemmAMX): counters["inductor"]["cpp_micro_gemm_amx_counter"] += 1 + if isinstance(micro_gemm, CppMicroBrgemm): + counters["inductor"]["cpp_micro_brgemm_counter"] += 1 L1_cache_size = torch._C._cpu._L1d_cache_size() # per core cache size in Bytes assert L1_cache_size > 0, f"Expect L1_cache_size > 0 but got {L1_cache_size}" diff --git a/torch/_inductor/codegen/cpp_micro_gemm.py b/torch/_inductor/codegen/cpp_micro_gemm.py index 3b17ff9a50d026..152e8278d6eb14 100644 --- a/torch/_inductor/codegen/cpp_micro_gemm.py +++ b/torch/_inductor/codegen/cpp_micro_gemm.py @@ -786,6 +786,69 @@ def get_b_layout(self): return LayoutType.VNNI2 +# extra check for CppMicroBrgemm +def check_brgemm_extra(config, m, n, k, alpha, num_threads): + assert config.input_dtype == torch.half and config.output_dtype == torch.float + vnni_size = 2 + # use brgemm for Half when amx_fp16 is supported + return torch.cpu._is_amx_fp16_supported() and k % vnni_size == 0 and alpha == 1 + + +@register_micro_gemm( + *generate_gemm_config( + VecAMX, + [(32, 32, 32), (48, 16, 32), (16, 48, 32)], + input_dtype=torch.half, + output_dtype=torch.float, + extra_check=check_brgemm_extra, + ), +) +class CppMicroBrgemm(CppMicroGemm): + """ + This class generates the code for micro gemm using oneDNN brgemm. + It supports input types of torch.half. + """ + + TEMPLATE_ENTRY = r""" +#include +{{declare_kernel}} { + at::native::cpublas::brgemm( + M, N, K, + lda, ldb, ldc, + 1.f, accum ? 1.f : 0.f, + A, + B, + C); +} +""" + + def codegen_define(self, kernel: CppTemplateKernel) -> str: + options = { + "declare_kernel": self.get_kernel_declaration(), + "kernel": kernel, + "block_m": self.register_blocking.block_m, + "block_n": self.register_blocking.block_n, + "block_k": self.register_blocking.block_k, + "restrict_keyword": get_restrict_keyword(), + **self.get_common_options(), + } + result = "" + result += KernelTemplate._template_from_string(self.TEMPLATE_ENTRY).render( + options + ) + return result + + def codegen_finalize( + self, + kernel: CppTemplateKernel, + ) -> str: + return "at::native::cpublas::brgemm_release();" + + def get_b_layout(self): + assert self.input_dtype == torch.half and torch.cpu._is_amx_fp16_supported() + return LayoutType.VNNI2 + + def create_micro_gemm( name, m, diff --git a/torch/cpu/__init__.py b/torch/cpu/__init__.py index f62ddda893b3bd..67ebb633802f58 100644 --- a/torch/cpu/__init__.py +++ b/torch/cpu/__init__.py @@ -55,6 +55,11 @@ def _is_amx_tile_supported() -> bool: return torch._C._cpu._is_amx_tile_supported() +def _is_amx_fp16_supported() -> bool: + r"""Returns a bool indicating if CPU supports AMX FP16.""" + return torch._C._cpu._is_amx_fp16_supported() + + def _init_amx() -> bool: r"""Initializes AMX instructions.""" return torch._C._cpu._init_amx() diff --git a/torch/csrc/cpu/Module.cpp b/torch/csrc/cpu/Module.cpp index 23abb3abae9469..5e3f4b5b18bb05 100644 --- a/torch/csrc/cpu/Module.cpp +++ b/torch/csrc/cpu/Module.cpp @@ -13,6 +13,7 @@ void initModule(PyObject* module) { cpu.def("_is_avx512_vnni_supported", at::cpu::is_avx512_vnni_supported); cpu.def("_is_avx512_bf16_supported", at::cpu::is_avx512_bf16_supported); cpu.def("_is_amx_tile_supported", at::cpu::is_amx_tile_supported); + cpu.def("_is_amx_fp16_supported", at::cpu::is_amx_fp16_supported); cpu.def("_init_amx", at::cpu::init_amx); cpu.def("_is_arm_sve_supported", at::cpu::is_arm_sve_supported); cpu.def("_L1d_cache_size", at::cpu::L1d_cache_size); From de4216bfda2afbd5b3f74c99058039a77ad7ec81 Mon Sep 17 00:00:00 2001 From: Laith Sakka Date: Mon, 4 Nov 2024 18:09:36 -0800 Subject: [PATCH 064/503] increase add_loop benchmark and refresh all results! (#139703) see comments end of https://github.com/pytorch/pytorch/pull/138756 I am also refreshing all values Pull Request resolved: https://github.com/pytorch/pytorch/pull/139703 Approved by: https://github.com/bobrenjc93 --- .../pr_time_benchmarks/expected_results.csv | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/benchmarks/dynamo/pr_time_benchmarks/expected_results.csv b/benchmarks/dynamo/pr_time_benchmarks/expected_results.csv index 75c1a945704ca7..f60dcd76815e51 100644 --- a/benchmarks/dynamo/pr_time_benchmarks/expected_results.csv +++ b/benchmarks/dynamo/pr_time_benchmarks/expected_results.csv @@ -1,40 +1,40 @@ -add_loop_eager,compile_time_instruction_count,3027000000,0.015 +add_loop_eager,compile_time_instruction_count,3037000000,0.015 -add_loop_eager_dynamic,compile_time_instruction_count,5596000000,0.025 +add_loop_eager_dynamic,compile_time_instruction_count,5624000000,0.025 -add_loop_inductor,compile_time_instruction_count,24260000000,0.015 +add_loop_inductor,compile_time_instruction_count,24600000000,0.015 -add_loop_inductor_dynamic_gpu,compile_time_instruction_count,40380000000,0.025 +add_loop_inductor_dynamic_gpu,compile_time_instruction_count,40740000000,0.025 -add_loop_inductor_gpu,compile_time_instruction_count,23010000000,0.015 +add_loop_inductor_gpu,compile_time_instruction_count,23330000000,0.015 -basic_modules_ListOfLinears_eager,compile_time_instruction_count,1028000000,0.015 +basic_modules_ListOfLinears_eager,compile_time_instruction_count,1032000000,0.015 -basic_modules_ListOfLinears_inductor,compile_time_instruction_count,19170000000,0.015 +basic_modules_ListOfLinears_inductor,compile_time_instruction_count,19150000000,0.015 -basic_modules_ListOfLinears_inductor_gpu_force_shape_pad,compile_time_instruction_count,15810000000,0.015 +basic_modules_ListOfLinears_inductor_gpu_force_shape_pad,compile_time_instruction_count,15770000000,0.015 -basic_modules_ListOfLinears_inductor_gpu,compile_time_instruction_count,16760000000,0.2 +basic_modules_ListOfLinears_inductor_gpu,compile_time_instruction_count,16060000000,0.2 -update_hint_regression,compile_time_instruction_count,1743000000,0.02 +update_hint_regression,compile_time_instruction_count,1750000000,0.02 @@ -42,24 +42,24 @@ sum_floordiv_regression,compile_time_instruction_count,1160000000,0.015 -symint_sum,compile_time_instruction_count,3293000000,0.015 +symint_sum,compile_time_instruction_count,3319000000,0.015 -aotdispatcher_inference_nosubclass_cpu,compile_time_instruction_count,2001000000,0.015 +aotdispatcher_inference_nosubclass_cpu,compile_time_instruction_count,2000000000,0.015 -aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5778000000,0.015 +aotdispatcher_inference_subclass_cpu,compile_time_instruction_count,5779000000,0.015 -aotdispatcher_partitioner_cpu,compile_time_instruction_count,8989000000,0.015 +aotdispatcher_partitioner_cpu,compile_time_instruction_count,9010000000,0.015 -aotdispatcher_training_nosubclass_cpu,compile_time_instruction_count,3822000000,0.015 +aotdispatcher_training_nosubclass_cpu,compile_time_instruction_count,3824000000,0.015 -aotdispatcher_training_subclass_cpu,compile_time_instruction_count,10260000000,0.015 +aotdispatcher_training_subclass_cpu,compile_time_instruction_count,10270000000,0.015 From 9aaf3a04fa84e1ab336b6ba5c57d967c42ca05cc Mon Sep 17 00:00:00 2001 From: "Chen, Zejun" Date: Tue, 5 Nov 2024 05:46:11 +0000 Subject: [PATCH 065/503] [profiler][UT] instantiate profiler UTs for devices and enable UTs for xpu profiler (#134316) This PR enables the profiler related UT to be device-agnostic. It instantiates the profiler UTs for different device types and enable them on XPU backend. Pull Request resolved: https://github.com/pytorch/pytorch/pull/134316 Approved by: https://github.com/etaf, https://github.com/aaronenyeshi, https://github.com/gujinghui --- test/profiler/test_cpp_thread.py | 168 ++++++++++++++++-- test/profiler/test_execution_trace.py | 23 ++- test/profiler/test_memory_profiler.py | 32 +++- test/profiler/test_profiler_tree.py | 15 +- torch/testing/_internal/common_device_type.py | 4 +- torch/testing/_internal/common_utils.py | 5 + 6 files changed, 219 insertions(+), 28 deletions(-) diff --git a/test/profiler/test_cpp_thread.py b/test/profiler/test_cpp_thread.py index 527fc5aca1c050..9dbecf994a4fa5 100644 --- a/test/profiler/test_cpp_thread.py +++ b/test/profiler/test_cpp_thread.py @@ -1,6 +1,7 @@ # Owner(s): ["oncall: profiler"] import os +import unittest from unittest import skipIf import torch @@ -32,6 +33,7 @@ KinetoProfiler = None IterationCount = 5 ActivateIteration = 2 +device = None def blueprint(text): @@ -56,17 +58,20 @@ def onIterationStart(self, iteration: int) -> None: KinetoProfiler.step() def emulateTraining(self, iteration: int, thread_id: int) -> None: + global device # blueprint(f"training iteration {iteration} in thread {thread_id}") - device = torch.device("cuda") - # device = torch.device("cpu") + torch_device = getattr(torch, device) + assert hasattr(torch_device, "synchronize") + sync_func = torch_device.synchronize + with torch.autograd.profiler.record_function("user_function"): a = torch.ones(1, device=device) b = torch.ones(1, device=device) torch.add(a, b).cpu() - torch.cuda.synchronize() + sync_func() -class CppThreadTest(TestCase): +class CppThreadTestCUDA(TestCase): ThreadCount = 20 # set to 2 for debugging EventHandler = None TraceObject = None @@ -74,8 +79,8 @@ class CppThreadTest(TestCase): @classmethod def setUpClass(cls) -> None: super(TestCase, cls).setUpClass() - CppThreadTest.EventHandler = PythonProfilerEventHandler() - cpp.ProfilerEventHandler.Register(CppThreadTest.EventHandler) + CppThreadTestCUDA.EventHandler = PythonProfilerEventHandler() + cpp.ProfilerEventHandler.Register(CppThreadTestCUDA.EventHandler) @classmethod def tearDownClass(cls): @@ -85,6 +90,8 @@ def tearDownClass(cls): def setUp(self) -> None: if not torch.cuda.is_available(): self.skipTest("Test machine does not have cuda") + global device + device = "cuda" # this clears off events from initialization self.start_profiler(False) @@ -103,7 +110,7 @@ def start_profiler(self, profile_memory): ) def set_trace(self, trace_obj) -> None: - CppThreadTest.TraceObject = trace_obj + CppThreadTestCUDA.TraceObject = trace_obj def assert_text(self, condition, text, msg): if condition: @@ -114,7 +121,7 @@ def assert_text(self, condition, text, msg): def check_trace(self, expected, mem=False) -> None: blueprint("verifying trace") - event_list = CppThreadTest.TraceObject.events() + event_list = CppThreadTestCUDA.TraceObject.events() for key, values in expected.items(): count = values[0] min_count = count * (ActivateIteration - 1) @@ -160,7 +167,7 @@ def check_trace(self, expected, mem=False) -> None: IS_WINDOWS, "Failing on windows cuda, see https://github.com/pytorch/pytorch/pull/130037 for slightly more context", ) - def test_with_enable_profiler_in_child_thread(self) -> None: + def test_with_enable_profiler_in_child_thread_cuda(self) -> None: self.start_profiler(False) cpp.start_threads(self.ThreadCount, IterationCount, True) self.check_trace( @@ -174,7 +181,7 @@ def test_with_enable_profiler_in_child_thread(self) -> None: IS_WINDOWS, "Failing on windows cuda, see https://github.com/pytorch/pytorch/pull/130037 for slightly more context", ) - def test_without_enable_profiler_in_child_thread(self) -> None: + def test_without_enable_profiler_in_child_thread_cuda(self) -> None: self.start_profiler(False) cpp.start_threads(self.ThreadCount, IterationCount, False) self.check_trace( @@ -188,7 +195,146 @@ def test_without_enable_profiler_in_child_thread(self) -> None: IS_WINDOWS, "Failing on windows cuda, see https://github.com/pytorch/pytorch/pull/130037 for slightly more context", ) - def test_profile_memory(self) -> None: + def test_profile_memory_cuda(self) -> None: + self.start_profiler(True) + cpp.start_threads(self.ThreadCount, IterationCount, True) + self.check_trace( + { + "aten::add": [self.ThreadCount, "CPU"], + }, + mem=True, + ) + + +# Here duplicate the CppThreadTest to enable the xpu cases because the +# instantiate_device_type_tests will call class method setUpClass. +# In function setUpClass, the instantiated class(e.g CppThreadTestCPU, CppThreadTestXPU) +# needs to be called to get it member EventHandler, while in this period, +# the input class in argument cls is CppThreadTest, which is not defined any more. +# We cannot detect which instantiated class is being created in setUpClass, so duplicate here +# for enabling xpu test cases +class CppThreadTestXPU(TestCase): + ThreadCount = 20 # set to 2 for debugging + EventHandler = None + TraceObject = None + + @classmethod + def setUpClass(cls) -> None: + super(TestCase, cls).setUpClass() + CppThreadTestXPU.EventHandler = PythonProfilerEventHandler() + cpp.ProfilerEventHandler.Register(CppThreadTestXPU.EventHandler) + + @classmethod + def tearDownClass(cls): + if not is_fbcode(): + torch.testing._internal.common_utils.remove_cpp_extensions_build_root() + + def setUp(self) -> None: + if not torch.xpu.is_available(): + self.skipTest("Test machine does not have xpu") + global device + device = "xpu" + + # this clears off events from initialization + self.start_profiler(False) + cpp.start_threads(1, IterationCount, False) + + def start_profiler(self, profile_memory): + global KinetoProfiler + KinetoProfiler = torch.profiler.profile( + schedule=torch.profiler.schedule( + wait=1, warmup=1, active=ActivateIteration, repeat=1 + ), + on_trace_ready=self.set_trace, + with_stack=True, + profile_memory=profile_memory, + record_shapes=True, + ) + + def set_trace(self, trace_obj) -> None: + CppThreadTestXPU.TraceObject = trace_obj + + def assert_text(self, condition, text, msg): + if condition: + print(f"\33[32m{text}\33[0m") + else: + print(f"\33[31m{text}\33[0m") + self.assertTrue(condition, msg) + + def check_trace(self, expected, mem=False) -> None: + blueprint("verifying trace") + event_list = CppThreadTestXPU.TraceObject.events() + for key, values in expected.items(): + count = values[0] + min_count = count * (ActivateIteration - 1) + device = values[1] + filtered = filter( + lambda ev: ev.name == key + and str(ev.device_type) == f"DeviceType.{device}", + event_list, + ) + + if mem: + actual = 0 + for ev in filtered: + sev = str(ev) + has_cuda_memory_usage = ( + sev.find("xpu_memory_usage=0 ") < 0 + and sev.find("xpu_memory_usage=") > 0 + ) + if has_cuda_memory_usage: + actual += 1 + self.assert_text( + actual >= min_count, + f"{key}: {actual} >= {min_count}", + "not enough event with xpu_memory_usage set", + ) + else: + actual = len(list(filtered)) + if count == 1: # test_without + count *= ActivateIteration + self.assert_text( + actual == count, + f"{key}: {actual} == {count}", + "baseline event count incorrect", + ) + else: + self.assert_text( + actual >= min_count, + f"{key}: {actual} >= {min_count}", + "not enough event recorded", + ) + + @unittest.skip( + reason="The XPU Profiler will not cover this case for now. Will support it in next period." + ) + def test_with_enable_profiler_in_child_thread_xpu(self) -> None: + self.start_profiler(False) + cpp.start_threads(self.ThreadCount, IterationCount, True) + self.check_trace( + { + "aten::add": [self.ThreadCount, "CPU"], + "user_function": [self.ThreadCount, "XPU"], + } + ) + + @unittest.skip( + reason="The XPU Profiler will not cover this case for now. Will support it in next period." + ) + def test_without_enable_profiler_in_child_thread_xpu(self) -> None: + self.start_profiler(False) + cpp.start_threads(self.ThreadCount, IterationCount, False) + self.check_trace( + { + "aten::add": [1, "CPU"], + "user_function": [1, "XPU"], + } + ) + + @unittest.skip( + reason="The XPU Profiler will not cover this case for now. Will support it in next period." + ) + def test_profile_memory_xpu(self) -> None: self.start_profiler(True) cpp.start_threads(self.ThreadCount, IterationCount, True) self.check_trace( diff --git a/test/profiler/test_execution_trace.py b/test/profiler/test_execution_trace.py index da52d17845c637..e869a4796852b0 100644 --- a/test/profiler/test_execution_trace.py +++ b/test/profiler/test_execution_trace.py @@ -41,6 +41,7 @@ skipIfHpu, skipIfTorchDynamo, TEST_HPU, + TEST_XPU, TestCase, ) from torch.utils._triton import has_triton @@ -131,6 +132,7 @@ def trace_handler(p): use_device = ( torch.profiler.ProfilerActivity.CUDA + or torch.profiler.ProfilerActivity.XPU in supported_activities() or torch.profiler.ProfilerActivity.HPU in supported_activities() ) # Create a temp file to save execution trace and kineto data. @@ -202,6 +204,7 @@ def test_execution_trace_alone(self, device): use_device = ( torch.profiler.ProfilerActivity.CUDA or torch.profiler.ProfilerActivity.HPU in supported_activities() + or torch.profiler.ProfilerActivity.XPU in supported_activities() ) # Create a temp file to save execution trace data. fp = tempfile.NamedTemporaryFile("w+t", suffix=".et.json", delete=False) @@ -241,8 +244,10 @@ def test_execution_trace_alone(self, device): @unittest.skipIf( sys.version_info >= (3, 12), "torch.compile is not supported on python 3.12+" ) - @unittest.skipIf(not TEST_CUDA or not has_triton(), "need CUDA and triton to run") - @skipIfHpu + @unittest.skipIf( + (not has_triton()) or (not TEST_CUDA and not TEST_XPU), + "need triton and device(CUDA or XPU) availability to run", + ) def test_execution_trace_with_pt2(self, device): @torchdynamo.optimize("inductor") def fn(a, b, c): @@ -290,6 +295,7 @@ def fn(a, b, c): def test_execution_trace_start_stop(self, device): use_device = ( torch.profiler.ProfilerActivity.CUDA + or torch.profiler.ProfilerActivity.XPU in supported_activities() or torch.profiler.ProfilerActivity.HPU in supported_activities() ) # Create a temp file to save execution trace data. @@ -328,6 +334,7 @@ def test_execution_trace_start_stop(self, device): def test_execution_trace_repeat_in_loop(self, device): use_device = ( torch.profiler.ProfilerActivity.CUDA + or torch.profiler.ProfilerActivity.XPU in supported_activities() or torch.profiler.ProfilerActivity.HPU in supported_activities() ) iter_list = {3, 4, 6, 8} @@ -361,8 +368,7 @@ def test_execution_trace_repeat_in_loop(self, device): assert found_root_node assert event_count == expected_loop_events - @skipIfHpu - def test_execution_trace_no_capture(self, device): + def test_execution_trace_no_capture(self): fp = tempfile.NamedTemporaryFile("w+t", suffix=".et.json", delete=False) fp.close() et = ExecutionTraceObserver().register_callback(fp.name) @@ -377,8 +383,7 @@ def test_execution_trace_no_capture(self, device): assert found_root_node @skipIfTorchDynamo("https://github.com/pytorch/pytorch/issues/124500") - @skipIfHpu - def test_execution_trace_nested_tensor(self, device): + def test_execution_trace_nested_tensor(self): fp = tempfile.NamedTemporaryFile("w+t", suffix=".et.json", delete=False) fp.close() @@ -404,9 +409,13 @@ def fn(nt): devices = ["cpu", "cuda"] +if TEST_XPU: + devices.append("xpu") if TEST_HPU: devices.append("hpu") -instantiate_device_type_tests(TestExecutionTrace, globals(), only_for=devices) +instantiate_device_type_tests( + TestExecutionTrace, globals(), allow_xpu="xpu" in devices, only_for=devices +) if __name__ == "__main__": run_tests() diff --git a/test/profiler/test_memory_profiler.py b/test/profiler/test_memory_profiler.py index a074a29b60c51a..c0595109f5aebd 100644 --- a/test/profiler/test_memory_profiler.py +++ b/test/profiler/test_memory_profiler.py @@ -3,12 +3,20 @@ import gc import itertools as it import textwrap +import unittest from typing import Callable, Dict, Iterator, List, Optional, Tuple import torch from torch._C._profiler import _EventType, _TensorMetadata from torch.profiler import _memory_profiler, _utils -from torch.testing._internal.common_utils import run_tests, skipIfTorchDynamo, TestCase +from torch.testing._internal.common_device_type import instantiate_device_type_tests +from torch.testing._internal.common_utils import ( + ALLOW_XPU_PROFILING_TEST, + DEVICE_LIST_SUPPORT_PROFILING_TEST, + run_tests, + skipIfTorchDynamo, + TestCase, +) from torch.utils import _pytree as pytree @@ -1553,14 +1561,21 @@ def id_for_testing(key): destroy GRADIENT 13(v0) 1024 kB""", ) - def test_memory_timeline_no_id(self) -> None: + +@skipIfTorchDynamo("TorchDynamo changes Python calls that memory profiling relies on.") +class TestMemoryProfilerTimeline(TestCase): + @unittest.skipIf( + torch.xpu.is_available(), + "The XPU Profiler will not cover this case for now. Will support it in next period.", + ) + def test_memory_timeline_no_id(self, device) -> None: # On CPU the default behavior is to simply forward to malloc. That # means that when we free `x` the allocator doesn't actually know how # many bytes are in the allocation, and thus there's no point to # calling `c10::reportMemoryUsageToProfiler`. So in order to test that - # memory profiler processes this case correctly we need to use CUDA + # memory profiler processes this case correctly we need to use device # where we do always keep a record. - x = torch.ones((1024,), device="cuda" if torch.cuda.is_available() else "cpu") + x = torch.ones((1024,), device=device) with profile() as prof: # We never see `x` used so we don't know the storage is for a @@ -1595,7 +1610,7 @@ def test_memory_timeline_no_id(self) -> None: actual = [(action, size) for _, action, _, size in memory_profile.timeline] # See above. - if not torch.cuda.is_available(): + if device == "cpu": expected = expected[2:] for event in expected: self.assertTrue( @@ -1609,5 +1624,12 @@ def test_memory_timeline_no_id(self) -> None: ) +instantiate_device_type_tests( + TestMemoryProfilerTimeline, + globals(), + only_for=DEVICE_LIST_SUPPORT_PROFILING_TEST, + allow_xpu=ALLOW_XPU_PROFILING_TEST, +) + if __name__ == "__main__": run_tests() diff --git a/test/profiler/test_profiler_tree.py b/test/profiler/test_profiler_tree.py index 7de38519feca62..0ac262a0a43691 100644 --- a/test/profiler/test_profiler_tree.py +++ b/test/profiler/test_profiler_tree.py @@ -260,7 +260,10 @@ def assertTreesMatch(self, actual: str, expected: str, allow_failure: bool = Fal # TODO: Add logic for CUDA version of test @ProfilerTree.test - @unittest.skipIf(torch.cuda.is_available(), "Test not working for CUDA") + @unittest.skipIf( + torch.cuda.is_available() or torch.xpu.is_available(), + "Test not working for CUDA and XPU", + ) def test_profiler_experimental_tree(self): t1, t2 = torch.ones(1, requires_grad=True), torch.ones(1, requires_grad=True) with torch.profiler.profile() as p: @@ -315,7 +318,10 @@ def test_profiler_experimental_tree(self): # TODO: Add logic for CUDA version of test @ProfilerTree.test - @unittest.skipIf(torch.cuda.is_available(), "Test not working for CUDA") + @unittest.skipIf( + torch.cuda.is_available() or torch.xpu.is_available(), + "Test not working for CUDA and XPU", + ) def test_profiler_experimental_tree_with_record_function(self): with torch.profiler.profile() as p: with torch.autograd.profiler.record_function("Top level Annotation"): @@ -365,7 +371,10 @@ def test_profiler_experimental_tree_with_record_function(self): # TODO: Add logic for CUDA version of test @ProfilerTree.test - @unittest.skipIf(torch.cuda.is_available(), "Test not working for CUDA") + @unittest.skipIf( + torch.cuda.is_available() or torch.xpu.is_available(), + "Test not working for CUDA and XPU", + ) def test_profiler_experimental_tree_with_memory(self): t1, t2 = torch.ones(1, requires_grad=True), torch.ones(1, requires_grad=True) with torch.profiler.profile(profile_memory=True) as p: diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index cea74aa21a78d6..57d9b5bb4e39b4 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -384,7 +384,7 @@ def _init_and_get_primary_device(cls): try: return cls.get_primary_device() except Exception: - # For CUDATestBase, XLATestBase, and possibly others, the primary device won't be available + # For CUDATestBase, XPUTestBase, XLATestBase, and possibly others, the primary device won't be available # until setUpClass() sets it. Call that manually here if needed. if hasattr(cls, "setUpClass"): cls.setUpClass() @@ -667,7 +667,7 @@ def get_all_devices(cls): @classmethod def setUpClass(cls): - cls.primary_device = "xpu:0" + cls.primary_device = f"xpu:{torch.xpu.current_device()}" def _should_stop_test_suite(self): return False diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index deb46a2f337c45..52d74633021c29 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -301,6 +301,11 @@ def maybe_load_json(filename): NATIVE_DEVICES = ('cpu', 'cuda', 'xpu', 'meta', torch._C._get_privateuse1_backend_name()) +# used for managing devices testing for torch profiler UTs +# for now cpu, cuda and xpu are added for testing torch profiler UTs +DEVICE_LIST_SUPPORT_PROFILING_TEST = ('cpu', 'cuda', 'xpu') +ALLOW_XPU_PROFILING_TEST = True + check_names = ['orin', 'concord', 'galen', 'xavier', 'nano', 'jetson', 'tegra'] IS_JETSON = any(name in platform.platform() for name in check_names) From b2f5a5311b6dbf36d05078e53ac1723fd272ab3b Mon Sep 17 00:00:00 2001 From: Eli Simhayev Date: Tue, 5 Nov 2024 05:59:41 +0000 Subject: [PATCH 066/503] RMSNorms docs - remove biases initialization (#139620) RMSNorm doesn't use a bias in `elementwise_affine`, so I've removed it from the documentation. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139620 Approved by: https://github.com/mikaylagawarecki --- torch/nn/modules/normalization.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/torch/nn/modules/normalization.py b/torch/nn/modules/normalization.py index f6569f2915eb7c..af7314507bd66e 100644 --- a/torch/nn/modules/normalization.py +++ b/torch/nn/modules/normalization.py @@ -345,8 +345,7 @@ class RMSNorm(Module): normalize over the last dimension which is expected to be of that specific size. eps: a value added to the denominator for numerical stability. Default: :func:`torch.finfo(x.dtype).eps` elementwise_affine: a boolean value that when set to ``True``, this module - has learnable per-element affine parameters initialized to ones (for weights) - and zeros (for biases). Default: ``True``. + has learnable per-element affine parameters initialized to ones (for weights). Default: ``True``. Shape: - Input: :math:`(N, *)` From 51a3d6dbc3efd2312ae65f3fc79e2def37a560b6 Mon Sep 17 00:00:00 2001 From: Aaron Orenstein Date: Mon, 4 Nov 2024 14:51:16 -0800 Subject: [PATCH 067/503] Fix existing lint issues in ir.py (#139237) - Remove stale mypy "type: ignores" - Made ir.py pass the rest of the lints Pull Request resolved: https://github.com/pytorch/pytorch/pull/139237 Approved by: https://github.com/Skylion007 --- torch/_inductor/ir.py | 114 +++++++++++++------------- torch/utils/benchmark/utils/fuzzer.py | 2 +- 2 files changed, 56 insertions(+), 60 deletions(-) diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py index c4ce9b4c376837..37f769ae3bddcc 100644 --- a/torch/_inductor/ir.py +++ b/torch/_inductor/ir.py @@ -299,9 +299,9 @@ def ir_node_to_tensor( size = [shape_fn(s) for s in x.get_size()] stride: StrideType if is_storage_and_layout(x): - stride = [shape_fn(s) for s in x.get_layout().stride] # type: ignore[misc, union-attr] + stride = [shape_fn(s) for s in x.get_layout().stride] else: - stride = FlexibleLayout.contiguous_strides(size) # type: ignore[assignment] + stride = FlexibleLayout.contiguous_strides(size) dtype = x.get_dtype() device = x.get_device() size = convert_shape_to_symint(size) @@ -416,7 +416,7 @@ def get_numel(self): return sympy_product(self.get_size()) def is_zero_elements(self): - return V.graph.sizevars.is_expr_static_and_true(sympy.Eq(self.get_numel(), 0)) # type: ignore[arg-type] + return V.graph.sizevars.is_expr_static_and_true(sympy.Eq(self.get_numel(), 0)) def realize(self): """ @@ -902,7 +902,7 @@ def _is_static(x): # We don't support unbacked symints return ReductionHint.DEFAULT, 1 - device_interface = get_interface_for_device(get_device_type(device)) # type: ignore[arg-type] # next PR + device_interface = get_interface_for_device(get_device_type(device)) # type: ignore[arg-type] device_properties = device_interface.Worker.get_device_properties(device) if get_device_type(device) == "xpu": num_sm = device_properties.gpu_subslice_count @@ -1140,7 +1140,7 @@ def value_fn(index, rindex): return fn @classmethod - def create( # type: ignore[override] + def create( cls, device: torch.device, dst_dtype: torch.dtype, @@ -1345,7 +1345,7 @@ def _multilayer_wrap_loader( ): reindex = View.dynamic_reshape_indexer(reduction_ranges, [reduction_numel]) need_mask = not V.graph.sizevars.is_expr_static_and_true( - sympy.Eq(reduction_numel % split, 0) # type: ignore[arg-type] + sympy.Eq(reduction_numel % split, 0) ) def wrapper_fn(index, reduction_index): @@ -1488,7 +1488,7 @@ def create_multilayer( wrapper_fn, ranges, reduction_ranges, - [*ranges, split], # type: ignore[list-item] + [*ranges, split], [block_size], reduction_type, split, @@ -1720,7 +1720,7 @@ def create_multilayer( # type: ignore[override] """ reduction_numel = sympy_product(reduction_ranges) need_mask = not V.graph.sizevars.is_expr_static_and_true( - sympy.Eq(reduction_numel % split, 0) # type: ignore[arg-type] + sympy.Eq(reduction_numel % split, 0) ) if need_mask and reduction_type != "welford_combine": @@ -1760,7 +1760,7 @@ def constant(idx, reduction_idx, value): ) for loader in inner_fns ), - [*ranges, split], # type: ignore[list-item] + [*ranges, split], [block_size], reduction_type, reduction_hint, @@ -1785,7 +1785,7 @@ def intermediate_loader_fn(index, reduction_index, loader): for i in intermediates ), ranges, - [split], # type: ignore[list-item] + [split], # welford_reduce turns one input into three outputs, which are combined with welford_combine "welford_combine", reduction_hint, @@ -1886,7 +1886,7 @@ def create( assert len(dtypes) == len(inner_fns) # Scan with a single element is just a copy - if sizevars.is_expr_static_and_true(sympy.Le(scan_numel, 1)): # type: ignore[arg-type] + if sizevars.is_expr_static_and_true(sympy.Le(scan_numel, 1)): return [ Pointwise.create( device=device, @@ -2081,7 +2081,7 @@ def create( assert len(dtypes) == len(inner_fns) # Sort with a single element is just a copy - if sizevars.is_expr_static_and_true(sympy.Le(sort_numel, 1)): # type: ignore[arg-type] + if sizevars.is_expr_static_and_true(sympy.Le(sort_numel, 1)): return [ Pointwise.create( device=device, @@ -2420,7 +2420,7 @@ def get_size(self): def make_reindexer(self): inv = {j: i for i, j in enumerate(self.dims)} - inv = [inv[i] for i in range(len(self.dims))] # type: ignore[index] + inv = [inv[i] for i in range(len(self.dims))] assert OrderedSet(inv) == OrderedSet(range(len(self.dims))) def reindex(index): @@ -2648,12 +2648,12 @@ def _dynamic_reshape_indexer(old_size, new_size): while stack_old: size_old = stack_old.pop() - V.graph.sizevars.guard_equals(size_old, 1) # type: ignore[arg-type] + V.graph.sizevars.guard_equals(size_old, 1) view_expr.append(sympy.S.Zero) while stack_new: var, size_new = stack_new.pop() - V.graph.sizevars.guard_equals(size_new, 1) # type: ignore[arg-type] + V.graph.sizevars.guard_equals(size_new, 1) view_expr.reverse() assert len(view_expr) == len(old_size) @@ -2661,7 +2661,7 @@ def _dynamic_reshape_indexer(old_size, new_size): def reindex(index): assert len(index) == len(vars), (len(index), len(vars)) replacements = dict(zip(vars, index)) - return tuple(sympy_subs(x, replacements) for x in view_expr) # type: ignore[arg-type] + return tuple(sympy_subs(x, replacements) for x in view_expr) return reindex @@ -2987,7 +2987,7 @@ def is_channels_last_contiguous(shape, strides): if ndim not in [4, 5] or shape[1] == 1: return False for left, right, size in zip( - strides, make_channels_last_strides_for(shape), shape # type: ignore[arg-type] + strides, make_channels_last_strides_for(shape), shape ): if size != 1 and left != right: return False @@ -3141,7 +3141,7 @@ def __eq__(self, other) -> bool: ) def storage_size(self) -> sympy.Expr: - return compute_required_storage_length(self.size, self.stride, self.offset) # type: ignore[arg-type, return-value] + return compute_required_storage_length(self.size, self.stride, self.offset) class FixedLayout(Layout): @@ -3160,9 +3160,9 @@ def __init__( super().__init__( device=device, dtype=dtype, - size=size, # type: ignore[arg-type] + size=size, stride=stride, - offset=offset, # type: ignore[arg-type] + offset=offset, ) def make_indexer(self): @@ -3342,7 +3342,7 @@ def maybe_guard_aligned(self): return True from .utils import ALIGNMENT - return V.graph.sizevars.statically_known_multiple_of(offset, ALIGNMENT) # type: ignore[arg-type] + return V.graph.sizevars.statically_known_multiple_of(offset, ALIGNMENT) class CommBufferType(Enum): @@ -3561,7 +3561,7 @@ def freeze_layout_with_exact_strides(self, exact_strides, allow_padding=False): ) def is_zero_elements(self): - return V.graph.sizevars.is_expr_static_and_true(sympy.Eq(self.get_numel(), 0)) # type: ignore[arg-type] + return V.graph.sizevars.is_expr_static_and_true(sympy.Eq(self.get_numel(), 0)) def make_loader(self): # Loading from a zero-element buffer is a no-op @@ -3775,7 +3775,7 @@ def get_fill_order(self): else: indices = index_vars stride_lengths = [ - V.graph.sizevars.stride_hints(expr, indices) for expr in reads # type: ignore[arg-type] + V.graph.sizevars.stride_hints(expr, indices) for expr in reads ] from .scheduler import pick_loop_order @@ -4603,7 +4603,9 @@ def set_cpp_kernel_name(self, cpp_kernel_name: Optional[str] = None): from .codegen.wrapper import get_cpp_op_schema self.cpp_kernel_overload_name = kernel._schema.overload_name - self.cpp_kernel_key = f"{self.cpp_kernel_name.replace('::', '_')}_{self.cpp_kernel_overload_name}" # type: ignore[union-attr] + self.cpp_kernel_key = ( + f"{self.cpp_kernel_name.replace('::', '_')}_{self.cpp_kernel_overload_name}" + ) try: self.cpp_op_schema = get_cpp_op_schema(kernel) except Exception: @@ -4901,7 +4903,7 @@ def require_strides( want_contiguous=False, stride_order=None, allow_padding=allow_padding, - exact_strides=exact_strides, # type: ignore[arg-type] # int|Expr vs int|Integer + exact_strides=exact_strides, ) return x elif isinstance(x.get_layout(), FixedLayout) and ( @@ -4971,7 +4973,7 @@ def require_strides( want_contiguous=False, stride_order=order, allow_padding=allow_padding, - exact_strides=exact_strides, # type: ignore[arg-type] # int|Expr vs int|Integer + exact_strides=exact_strides, ) if order: assert is_stride_order_storage_and_layout(x, order) @@ -5063,9 +5065,7 @@ def codegen_const_args(self, names: Optional[List[str]] = None): if self.arg_properties and idx < len(self.arg_properties) else None ) - result.append( - V.graph.wrapper_code.val_to_arg_str(x, type_) # type: ignore[arg-type] - ) + result.append(V.graph.wrapper_code.val_to_arg_str(x, type_)) return result else: return map(V.graph.wrapper_code.val_to_arg_str, self.constant_args) @@ -5089,11 +5089,7 @@ def codegen_args(self): self.arg_properties ), "Invalid access to ExternKernel.arg_properties" type_ = self.arg_properties[i].get("type") - args.append( - V.graph.wrapper_code.val_to_arg_str( # type: ignore[arg-type] - x, type_ - ) - ) + args.append(V.graph.wrapper_code.val_to_arg_str(x, type_)) else: args.append(V.graph.wrapper_code.val_to_arg_str(x)) if need_codegen_constant_args: @@ -5129,14 +5125,10 @@ def codegen_kwargs(self, skip_out=False): if self.allarg_properties and arg_name in self.allarg_properties else None ) - kwargs.append( - V.graph.wrapper_code.val_to_arg_str( # type: ignore[arg-type] - v, type_ - ) - ) + kwargs.append(V.graph.wrapper_code.val_to_arg_str(v, type_)) else: kwargs = [ - f"{k}={V.graph.wrapper_code.val_to_arg_str(v)}" # type: ignore[misc] + f"{k}={V.graph.wrapper_code.val_to_arg_str(v)}" for k, v in self.kwargs.items() ] return kwargs @@ -5189,7 +5181,7 @@ def canonicalize(self): _, add_var = var_builder("c") replacement = dict(zip(index_vars, reindex([add_var(x) for x in new_sizes]))) - index = sympy_subs(sympy.expand(index), replacement) # type: ignore[arg-type] + index = sympy_subs(sympy.expand(index), replacement) return index, tuple(new_sizes) def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: @@ -5474,9 +5466,11 @@ def codegen(self, wrapper): constexpr_indices_set = set(constexpr_indices) REMOVED = object() raw_args = [ - (idx, arg) - if (arg is not None) or (arg is None and idx in constexpr_indices_set) - else (idx, REMOVED) + ( + (idx, arg) + if (arg is not None) or (arg is None and idx in constexpr_indices_set) + else (idx, REMOVED) + ) for idx, arg in enumerate(raw_args) ] removed_none_args = [idx for idx, val in raw_args if val == REMOVED] @@ -5546,7 +5540,7 @@ def __init__(self, *, kernel_idx, grid, tma_descriptor_metadata, kernel_args): super().__init__( None, - NoneLayout(device=self.device), # type: ignore[arg-type] + NoneLayout(device=self.device), inputs, tuple(constant_args), kwargs, @@ -5615,7 +5609,7 @@ def get_unbacked_symbol_defs(self) -> OrderedSet[sympy.Symbol]: def __init__(self, op_overload, x, *constant_args): super().__init__( None, - NoneLayout(device=x.get_device()), # type: ignore[arg-type] + NoneLayout(device=x.get_device()), self.unwrap_storage([x]), constant_args, op_overload=op_overload, @@ -5667,7 +5661,7 @@ def create(cls, dst, src, non_blocking: bool = False): inputs = [cls.realize_input(t) for t in [dst, src]] constant_args = (non_blocking,) result = InplaceCopyFallback( - NoneLayout(device=dst.get_device()), # type: ignore[arg-type] + NoneLayout(device=dst.get_device()), inputs, constant_args, ) @@ -5706,7 +5700,7 @@ def __init__(self, variable, new_size): assert isinstance(new_size, int), "TODO: dynamic shapes" super().__init__( None, - NoneLayout(device=variable.get_device()), # type: ignore[arg-type] + NoneLayout(device=variable.get_device()), self.unwrap_storage([variable]), constant_args=(new_size,), ) @@ -5802,7 +5796,7 @@ def __init__( super().__init__( None, - NoneLayout(device=x.get_device()), # type: ignore[arg-type] + NoneLayout(device=x.get_device()), self.unwrap_storage(tensors), constant_args, {"reduce": reduce, "include_self": include_self}, @@ -5850,7 +5844,7 @@ def __init__(self, op_overload, x, indices, values, accumulate): cpp_kernel_name = "aoti_torch_index_put_out" super().__init__( None, - NoneLayout(device=x.get_device()), # type: ignore[arg-type] + NoneLayout(device=x.get_device()), self.unwrap_storage(tensors), (accumulate,), python_kernel_name="aten.index_put_", @@ -5911,7 +5905,9 @@ def should_allocate(self): def __init__(self, sym, keypath, data): data.realize() - super().__init__(None, NoneLayout(device=torch.device("cpu")), self.unwrap_storage([data])) # type: ignore[arg-type] + super().__init__( + None, NoneLayout(device=torch.device("cpu")), self.unwrap_storage([data]) + ) self.sym = sym self.keypath = keypath @@ -5937,10 +5933,10 @@ def __init__(self, scalar, msg): super().__init__( # Buffer(name, layotu) None, - NoneLayout(device=torch.device("cpu")), # type: ignore[arg-type] + NoneLayout(device=torch.device("cpu")), # InputsKernel(inputs) [], - ) # type: ignore[arg-type] + ) self.scalar = scalar self.msg = msg @@ -6243,7 +6239,7 @@ def export_extern_kernel_node(self): return [*args, *ordered_kwargs] serializer = GraphModuleSerializer(None, None) # type: ignore[arg-type] - named_arguments = serializer.serialize_inputs(self.op_overload, args, kwargs) # type: ignore[arg-type] + named_arguments = serializer.serialize_inputs(self.op_overload, args, kwargs) # serialize_outputs def handle_single_output(return_type, output): @@ -6749,7 +6745,7 @@ def handle_sym_expr(stride): with V.set_graph_handler(subgraph.graph): subgraph.graph.run(*fake_operands) - outputs = subgraph.graph.graph_outputs # type: ignore[union-attr] + outputs = subgraph.graph.graph_outputs device = operands[0].get_device() invoke_subgraph = InvokeSubgraph( subgraph=subgraph, @@ -6807,8 +6803,8 @@ def __init__( super().__init__( name=None, - layout=layout, # type: ignore[arg-type] - inputs=inputs, # type: ignore[list-item] + layout=layout, + inputs=inputs, ) self.name = V.graph.register_buffer(self) @@ -6923,8 +6919,8 @@ def __init__( super().__init__( name=None, - layout=layout, # type: ignore[arg-type] - inputs=carried_inputs + additional_inputs, # type: ignore[list-item] + layout=layout, + inputs=carried_inputs + additional_inputs, ) self.name = V.graph.register_buffer(self) diff --git a/torch/utils/benchmark/utils/fuzzer.py b/torch/utils/benchmark/utils/fuzzer.py index 5f69196960c26e..831de4508ec26a 100644 --- a/torch/utils/benchmark/utils/fuzzer.py +++ b/torch/utils/benchmark/utils/fuzzer.py @@ -373,7 +373,7 @@ def __init__( """ import numpy as np if seed is None: - seed = np.random.RandomState().randint(0, 2 ** 32 - 1, dtype=np.int64) + seed = int(np.random.RandomState().randint(0, 2 ** 32 - 1, dtype=np.int64)) self._seed = seed self._parameters = Fuzzer._unpack(parameters, FuzzedParameter) self._tensors = Fuzzer._unpack(tensors, FuzzedTensor) From 6ad52db8c8d4704a545f9b4b4743f251c0ae2e8c Mon Sep 17 00:00:00 2001 From: Laith Sakka Date: Mon, 4 Nov 2024 11:31:01 -0800 Subject: [PATCH 068/503] use torch.sym_sum instead of incremental sum in _cat_meta (#139653) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139653 Approved by: https://github.com/ezyang --- torch/_prims/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/torch/_prims/__init__.py b/torch/_prims/__init__.py index 8a102876eec24e..62eddf3b97d209 100644 --- a/torch/_prims/__init__.py +++ b/torch/_prims/__init__.py @@ -1773,12 +1773,12 @@ def _cat_meta(tensors: Sequence[TensorLikeType], dim: int) -> TensorLikeType: # Verifies same shape (except in the concat dimension) assert dim >= 0 shape = tensors[0].shape - concat_length = 0 + sym_sum_args = [] for tensor_idx, tensor in enumerate(tensors): assert len(shape) == len(tensor.shape) for idx, (common_length, length) in enumerate(zip(shape, tensor.shape)): if idx == dim: - concat_length = concat_length + length + sym_sum_args.append(length) else: torch._check( length == common_length, @@ -1788,7 +1788,7 @@ def _cat_meta(tensors: Sequence[TensorLikeType], dim: int) -> TensorLikeType: ) new_shape = list(tensors[0].shape).copy() - new_shape[dim] = concat_length + new_shape[dim] = torch.sym_sum(sym_sum_args) return TensorMeta( tensors[0], shape=new_shape, From 356fc41ae04816fe137bc85ccca4087ede291e3e Mon Sep 17 00:00:00 2001 From: "xinan.lin" Date: Mon, 4 Nov 2024 18:29:11 -0800 Subject: [PATCH 069/503] [Intel GPU] Avoid target_link_libraries twice for torch_xpu_ops which will potentially cause multiple definition symbol linker error. (#139024) [Intel GPU] Avoid target_link_libraries twice for torch_xpu_ops which will potentially cause multiple definition symbol linker error. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139024 Approved by: https://github.com/EikanWang, https://github.com/fengyuan14, https://github.com/jansel --- caffe2/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index a99a0e428a6a21..8c14876b625cac 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -1084,12 +1084,12 @@ if(USE_XPU) target_link_libraries(torch_xpu PRIVATE torch_xpu_ops) if(MSVC) # Windows - target_link_libraries(torch_xpu PRIVATE - "-WHOLEARCHIVE:\"$\"") + target_link_options(torch_xpu PRIVATE + "-WHOLEARCHIVE:$") else() # Linux - target_link_libraries(torch_xpu PRIVATE - "-Wl,--whole-archive,\"$\" -Wl,--no-whole-archive") + target_link_options(torch_xpu PRIVATE + "-Wl,--whole-archive,$,--no-whole-archive") endif() # Set cached ${ATen_XPU_INCLUDE_DIRS} to torch From ffb7a08921239784636dc1e41f69990522c4d76e Mon Sep 17 00:00:00 2001 From: zeshengzong Date: Tue, 5 Nov 2024 08:42:35 +0000 Subject: [PATCH 070/503] Fix torch.histc not checking min > max on cuda for int8 tensors (#139372) Fixes #139360 https://github.com/pytorch/pytorch/blob/86e6513c86c49f219cfc8c539de2ccf9de97ccf0/aten/src/ATen/native/cuda/SummaryOps.cu#L323-L324 Assign `min` and `max` to with low-precision input_t variable `minvalue` and `maxvalue` cause wrong comparing result in following check in here: https://github.com/pytorch/pytorch/blob/86e6513c86c49f219cfc8c539de2ccf9de97ccf0/aten/src/ATen/native/cuda/SummaryOps.cu#L353 ![image](https://github.com/user-attachments/assets/0d5c87f4-3dc6-48bb-bcc8-b1803e7cd487) Change type of `minvalue` and `maxvalue` to fix it, similar like in line: https://github.com/pytorch/pytorch/blob/86e6513c86c49f219cfc8c539de2ccf9de97ccf0/aten/src/ATen/native/cuda/SummaryOps.cu#L280-L282 **Test Result** ```bash $ pytest test/test_reductions.py -vv ``` ![image](https://github.com/user-attachments/assets/6b5d0d48-ebc2-4a8c-85f4-dbad147c086c) ```bash $ lintrunner ``` ![image](https://github.com/user-attachments/assets/f97c2d6d-78ea-4439-a1ba-907bc9defad7) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139372 Approved by: https://github.com/eqy --- aten/src/ATen/native/cuda/SummaryOps.cu | 6 +++-- test/test_reductions.py | 24 +++++++++++++++++++ .../_internal/common_methods_invocations.py | 2 +- 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/aten/src/ATen/native/cuda/SummaryOps.cu b/aten/src/ATen/native/cuda/SummaryOps.cu index f9ceb9bdf7e0b3..d70443ad35d46f 100644 --- a/aten/src/ATen/native/cuda/SummaryOps.cu +++ b/aten/src/ATen/native/cuda/SummaryOps.cu @@ -320,8 +320,10 @@ Tensor _histc_cuda_template( std::nullopt /* layout */, DeviceType::CUDA, std::nullopt /* pin_memory */); - input_t minvalue = min; - input_t maxvalue = max; + using bounds_t = at::acc_type; + bounds_t minvalue = min; + bounds_t maxvalue = max; + if (min == max && self.numel() > 0) { minvalue = *self.min().cpu().const_data_ptr(); maxvalue = *self.max().cpu().const_data_ptr(); diff --git a/test/test_reductions.py b/test/test_reductions.py index 7bb0a79e59c268..57486414010222 100644 --- a/test/test_reductions.py +++ b/test/test_reductions.py @@ -3116,6 +3116,30 @@ def test_histc_lowp(self, device, dtype): actual) self.assertEqual(actual.dtype, dtype) + @dtypes(torch.uint8, torch.int8, torch.int, torch.long, torch.float, torch.double) + def test_histc_min_max_errors(self, device, dtype): + with self.assertRaisesRegex(RuntimeError, "max must be larger than min"): + torch.histc(torch.tensor([1., 2., 3.], dtype=dtype, device=device), bins=4, min=5, max=1) + + @dtypes(torch.float, torch.double) + def test_histc_min_max_corner_cases(self, device, dtype): + actual = torch.histc( + torch.tensor([1., 2, 1], dtype=dtype, device=device), + bins=4, min=5, max=5) + self.assertEqual( + torch.tensor([2, 0, 0, 1], dtype=dtype, device=device), + actual) + + @onlyCUDA + @dtypes(torch.uint8, torch.int8, torch.int, torch.long) + def test_histc_min_max_corner_cases_cuda(self, device, dtype): + actual = torch.histc( + torch.tensor([1., 2, 1], dtype=dtype, device=device), + bins=4, min=5, max=5) + self.assertEqual( + torch.tensor([2, 0, 0, 1], dtype=dtype, device=device), + actual) + """ Runs torch.histogram and numpy.histogram on the specified input parameters and asserts that their output is equal. diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index 0df201fb6f1339..b9ec84b8cd9cf4 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -19375,7 +19375,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): )), OpInfo('histc', dtypes=floating_types_and(torch.bfloat16, torch.float16), - dtypesIfCUDA=floating_types_and(torch.int8, torch.int16, torch.int32, torch.int64), + dtypesIfCUDA=floating_types_and(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64), sample_inputs_func=sample_inputs_histc, supports_out=True, supports_autograd=False, From e84d1121ad66a453c8c24fcc098625e2e9764fca Mon Sep 17 00:00:00 2001 From: Xuehai Pan Date: Tue, 5 Nov 2024 15:29:47 +0800 Subject: [PATCH 071/503] Deprecate `torch._utils.is_compiling()` and `torch._dynamo.external_utils.is_compiling()` (#127690) This PR is split from PR #126898. - #126898 ------ Pull Request resolved: https://github.com/pytorch/pytorch/pull/127690 Approved by: https://github.com/Skylion007, https://github.com/malfet --- test/dynamo/test_skip_non_tensor.py | 10 +++++----- test/export/test_torchbind.py | 2 +- test/functorch/test_memory_efficient_fusion.py | 2 +- test/inductor/test_distributed_patterns.py | 4 ++-- test/test_nestedtensor.py | 4 ++-- test/test_optim.py | 2 +- torch/_dynamo/decorators.py | 3 +-- torch/_dynamo/external_utils.py | 5 +++++ torch/_functorch/apis.py | 6 +++--- torch/_functorch/eager_transforms.py | 4 ++-- torch/_higher_order_ops/associative_scan.py | 2 +- torch/_higher_order_ops/scan.py | 2 +- torch/_utils.py | 6 +++++- .../algorithms/ddp_comm_hooks/default_hooks.py | 2 +- torch/distributed/tensor/parallel/_utils.py | 11 +++++------ torch/nn/modules/module.py | 4 +--- torch/nn/parallel/distributed.py | 4 ++-- torch/optim/_adafactor.py | 4 ++-- torch/optim/adadelta.py | 8 ++++---- torch/optim/adagrad.py | 2 +- torch/optim/adam.py | 8 ++++---- torch/optim/adamax.py | 8 ++++---- torch/optim/adamw.py | 8 ++++---- torch/optim/asgd.py | 6 +++--- torch/optim/nadam.py | 6 +++--- torch/optim/optimizer.py | 11 +++++------ torch/optim/radam.py | 6 +++--- torch/optim/rmsprop.py | 8 ++++---- torch/optim/rprop.py | 8 ++++---- torch/optim/sgd.py | 2 +- torch/testing/_internal/optests/generate_tests.py | 2 +- 31 files changed, 82 insertions(+), 78 deletions(-) diff --git a/test/dynamo/test_skip_non_tensor.py b/test/dynamo/test_skip_non_tensor.py index 72153d26a1ff09..48c4022ef28fb6 100644 --- a/test/dynamo/test_skip_non_tensor.py +++ b/test/dynamo/test_skip_non_tensor.py @@ -12,12 +12,12 @@ def user_function(): - return torch._utils.is_compiling() + return torch.compiler.is_compiling() def user_generator(): for _ in range(1): - yield torch._utils.is_compiling() + yield torch.compiler.is_compiling() return @@ -38,7 +38,7 @@ def forward(self, x): global _variable, _variable_2 if self.mode == 1: - if torch._utils.is_compiling(): + if torch.compiler.is_compiling(): _variable += 1 else: _variable_2 += 1 @@ -46,7 +46,7 @@ def forward(self, x): if user_function(): _variable += 1 elif self.mode == 3: - lambda_f = lambda: torch._utils.is_compiling() # noqa: E731 + lambda_f = lambda: torch.compiler.is_compiling() # noqa: E731 if lambda_f(): _variable += 1 elif self.mode == 4: @@ -163,7 +163,7 @@ def __len__(self): def test_do_not_skip_side_effects(self): # https://github.com/pytorch/pytorch/issues/110765 - # By invoking torch._utils.is_compiling(), + # By invoking torch.compiler.is_compiling(), # there may be side-effects inconsistent with eager when # compiling. Thus we force dynamo to commit the graph, # even if it does not perform any tensor operation diff --git a/test/export/test_torchbind.py b/test/export/test_torchbind.py index fd9f98199d37b7..53e2ffcc454c87 100644 --- a/test/export/test_torchbind.py +++ b/test/export/test_torchbind.py @@ -1315,7 +1315,7 @@ def f(tq, x): f(_empty_tensor_queue(), x), torch.compile(f, backend=backend)(_empty_tensor_queue(), x), ) - if not torch._dynamo.is_compiling() and backend == "eager": + if not torch.compiler.is_compiling() and backend == "eager": self.assertExpectedInline( backend.graphs[0].code.strip(), """\ diff --git a/test/functorch/test_memory_efficient_fusion.py b/test/functorch/test_memory_efficient_fusion.py index bfca66d333b963..d07fb136f5e7cf 100644 --- a/test/functorch/test_memory_efficient_fusion.py +++ b/test/functorch/test_memory_efficient_fusion.py @@ -278,7 +278,7 @@ def test_hash_with_numbers(self): # Test to repro issue with fx_graph_cse when # hash((primals_2, 1.0)) == hash((primals_2, 1)) - if torch._dynamo.is_compiling(): + if torch.compiler.is_compiling(): self.skipTest("Unsupported if test run is compiled") def f(inpt, osize): diff --git a/test/inductor/test_distributed_patterns.py b/test/inductor/test_distributed_patterns.py index fd446370434e94..31f91dce0a1296 100644 --- a/test/inductor/test_distributed_patterns.py +++ b/test/inductor/test_distributed_patterns.py @@ -91,13 +91,13 @@ def bw_post_hook(mod, gI, gO): def init_module_bw_hooks(allow_eager): def bw_pre_hook(mod, gO): - assert allow_eager or torch._dynamo.is_compiling() + assert allow_eager or torch.compiler.is_compiling() assert mod.weight.size() == (10, 10) mod.hook_count_pre.add_(1) return (torch.sin(gO[0] + 1.2),) def bw_post_hook(mod, gI, gO): - assert allow_eager or torch._dynamo.is_compiling() + assert allow_eager or torch.compiler.is_compiling() assert mod.weight.size() == (10, 10) mod.hook_count_post.add_(1) return (torch.sin(gI[0] + 3.4),) diff --git a/test/test_nestedtensor.py b/test/test_nestedtensor.py index ba2af0927c8e12..59d51ceb3bc076 100644 --- a/test/test_nestedtensor.py +++ b/test/test_nestedtensor.py @@ -4354,7 +4354,7 @@ def test_jagged_op_different_output_shape_dim( nt = torch.nested.as_nested_tensor(ts, layout=torch.jagged) out = func(nt, dim=rd, keepdim=keepdim) ref_shape = ref_shape_keepdim if keepdim else ref_shape_no_keepdim - if not torch.compiler.is_compiling: # if not using torch dynamo + if not torch.compiler.is_compiling(): # if not using torch dynamo self.assertEqual(len(out.shape), len(ref_shape)) for o, r in zip(out.shape, ref_shape): if r is not None: @@ -4597,7 +4597,7 @@ def test_layer_norm_reduce_ragged_idx_1( # requires_grad = False does not currently work with dynamo tests and throws this error: # AssertionError: SymInts must use SymNodeVariable. # If the underlying value is static, we will create a ConstantVariable and specialize. - if torch._dynamo.is_compiling() and not requires_grad: + if torch.compiler.is_compiling() and not requires_grad: return tensor_lists = self._get_example_tensor_lists( diff --git a/test/test_optim.py b/test/test_optim.py index 046b8728e3c004..211e91ab36b85e 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -288,7 +288,7 @@ def test_param_group_with_lrscheduler_goes_right_direction( inpt = torch.randn(5, device=device, dtype=dtype) # avoid endless recompiles by wrapping LR in a tensor if we're compiling - lr = torch.tensor(0.01) if torch._utils.is_compiling() else 0.01 + lr = torch.tensor(0.01) if torch.compiler.is_compiling() else 0.01 optimizer = optim_cls([{"params": [weight]}, {"params": [bias], "lr": lr}]) schedulers = [scheduler_c(optimizer) for scheduler_c in schedulers_c] diff --git a/torch/_dynamo/decorators.py b/torch/_dynamo/decorators.py index 73a942c6fbab74..fbf0770c17609a 100644 --- a/torch/_dynamo/decorators.py +++ b/torch/_dynamo/decorators.py @@ -19,7 +19,6 @@ RunOnlyContext, ) from .exc import IncorrectUsage -from .external_utils import is_compiling from .utils import is_function @@ -546,7 +545,7 @@ def mark_static(t, index=None): instances of the nn.Module can have different values of the attributes. The key point here is that the attributes are static. """ - if is_compiling(): + if torch.compiler.is_compiling(): if index is None: for s in t.size(): comptime.force_static(s) diff --git a/torch/_dynamo/external_utils.py b/torch/_dynamo/external_utils.py index 1c353efab73c98..534c9640a64643 100644 --- a/torch/_dynamo/external_utils.py +++ b/torch/_dynamo/external_utils.py @@ -3,6 +3,7 @@ import functools import warnings from typing import Any, Callable, List, Optional, Union +from typing_extensions import deprecated import torch import torch.utils._pytree as pytree @@ -14,6 +15,10 @@ np = None # type: ignore[assignment] +@deprecated( + "`torch._dynamo.external_utils.is_compiling` is deprecated. Use `torch.compiler.is_compiling` instead.", + category=FutureWarning, +) def is_compiling() -> bool: """ Indicates whether we are tracing/compiling with torch.compile() or torch.export(). diff --git a/torch/_functorch/apis.py b/torch/_functorch/apis.py index d906f3c906c989..db252d8ca6d7f7 100644 --- a/torch/_functorch/apis.py +++ b/torch/_functorch/apis.py @@ -191,7 +191,7 @@ def vmap( vmap does not provide general autobatching or handle variable-length sequences out of the box. """ - from torch._dynamo import is_compiling + from torch.compiler import is_compiling _check_randomness_arg(randomness) if not (chunk_size is None or chunk_size > 0): @@ -393,7 +393,7 @@ def grad(func: Callable, argnums: argnums_t = 0, has_aux: bool = False) -> Calla """ # To avoid cyclical dependency. import torch._functorch.eager_transforms as eager_transforms - from torch._dynamo import is_compiling + from torch.compiler import is_compiling def wrapper(*args, **kwargs): return eager_transforms.grad_impl(func, argnums, has_aux, args, kwargs) @@ -435,8 +435,8 @@ def grad_and_value( See :func:`grad` for examples """ - from torch._dynamo import is_compiling from torch._functorch import eager_transforms + from torch.compiler import is_compiling def wrapper(*args, **kwargs): return eager_transforms.grad_and_value_impl( diff --git a/torch/_functorch/eager_transforms.py b/torch/_functorch/eager_transforms.py index d389c7fda78949..7a7f724b3f3689 100644 --- a/torch/_functorch/eager_transforms.py +++ b/torch/_functorch/eager_transforms.py @@ -764,7 +764,7 @@ def compute_jacobian_preallocate_and_copy(): # Dynamo does not support HOP composition if their inner function is # annotated with @functools.wraps(...). We circumvent this issue by applying # wraps only if we're not tracing with dynamo. - if not torch._dynamo.is_compiling(): + if not torch.compiler.is_compiling(): wrapper_fn = wraps(func)(wrapper_fn) return wrapper_fn @@ -1344,7 +1344,7 @@ def push_jvp(basis): # Dynamo does not support HOP composition if their inner function is # annotated with @functools.wraps(...). We circumvent this issue by applying # wraps only if we're not tracing with dynamo. - if not torch._dynamo.is_compiling(): + if not torch.compiler.is_compiling(): wrapper_fn = wraps(func)(wrapper_fn) return wrapper_fn diff --git a/torch/_higher_order_ops/associative_scan.py b/torch/_higher_order_ops/associative_scan.py index d58d6b26bd33f7..204a17d6e0a1f9 100644 --- a/torch/_higher_order_ops/associative_scan.py +++ b/torch/_higher_order_ops/associative_scan.py @@ -132,7 +132,7 @@ def add(x: torch.Tensor, y: torch.Tensor): "Combine_mode must either 'pointwise' or 'generic', but got {combine_mode}" ) - if not torch._dynamo.is_compiling(): + if not torch.compiler.is_compiling(): with _set_compilation_env(), torch._dynamo.utils.disable_cache_limit(): return torch.compile(associative_scan, fullgraph=True)( combine_fn, xs, dim, reverse=reverse, combine_mode=combine_mode diff --git a/torch/_higher_order_ops/scan.py b/torch/_higher_order_ops/scan.py index a5a08fea26a317..ec117788ab85a8 100644 --- a/torch/_higher_order_ops/scan.py +++ b/torch/_higher_order_ops/scan.py @@ -191,7 +191,7 @@ def run_flattened_scan(combine_fn, leaves_init, leaves_xs, dim, reverse): combine_fn, leaves_init, leaves_xs, dim, reverse, additional_inputs=[] ) - if not torch._dynamo.is_compiling(): + if not torch.compiler.is_compiling(): from torch._dynamo.backends.debugging import ( make_eager_backend_with_torch_function_mode, ) diff --git a/torch/_utils.py b/torch/_utils.py index e5c3a14ca81d7d..b3bda10851d6b0 100644 --- a/torch/_utils.py +++ b/torch/_utils.py @@ -7,7 +7,7 @@ import warnings from collections import defaultdict from typing import Any, Callable, DefaultDict, Generic, List, Optional -from typing_extensions import ParamSpec +from typing_extensions import deprecated, ParamSpec import torch @@ -882,6 +882,10 @@ def classproperty(func): return _ClassPropertyDescriptor(func) +@deprecated( + "`torch._utils.is_compiling` is deprecated. Use `torch.compiler.is_compiling` instead.", + category=FutureWarning, +) def is_compiling() -> bool: """ Indicates whether we are tracing/compiling with torch.compile() or torch.export(). diff --git a/torch/distributed/algorithms/ddp_comm_hooks/default_hooks.py b/torch/distributed/algorithms/ddp_comm_hooks/default_hooks.py index b012c94ffcaa8d..ecf2b55f3ec50a 100644 --- a/torch/distributed/algorithms/ddp_comm_hooks/default_hooks.py +++ b/torch/distributed/algorithms/ddp_comm_hooks/default_hooks.py @@ -75,7 +75,7 @@ def decompress(fut): decompressed_tensor.copy_(value) return decompressed_tensor - if torch._utils.is_compiling(): + if torch.compiler.is_compiling(): grad = dist._functional_collectives.all_reduce( compressed_tensor, "sum", group_to_use ) diff --git a/torch/distributed/tensor/parallel/_utils.py b/torch/distributed/tensor/parallel/_utils.py index f50b5dd64768d0..2b5aaeabe85cbf 100644 --- a/torch/distributed/tensor/parallel/_utils.py +++ b/torch/distributed/tensor/parallel/_utils.py @@ -7,15 +7,14 @@ from torch.distributed.tensor.placement_types import Placement -try: - from torch._dynamo.external_utils import is_compiling as is_torchdynamo_compiling -except Exception: +LayoutsType = Union[Placement, Tuple[Placement, ...]] - def is_torchdynamo_compiling(): # type: ignore[misc] - return False +def is_torchdynamo_compiling() -> bool: + # Use local function to avoid circular imports + from torch.compiler import is_compiling -LayoutsType = Union[Placement, Tuple[Placement, ...]] + return is_compiling() def _deprecate_warnings(func_name: str, extra_msg: str) -> None: diff --git a/torch/nn/modules/module.py b/torch/nn/modules/module.py index dc3a85a03d3cab..7b3970944edfe5 100644 --- a/torch/nn/modules/module.py +++ b/torch/nn/modules/module.py @@ -1828,8 +1828,6 @@ def inner(): return result - from torch.compiler import is_compiling - # This is technically not behavior equivalent when compiling, but it's # incredibly unlikely we will ever support throwing an exception in NN # module, and then catching it here, and then reraising it, and then @@ -1837,7 +1835,7 @@ def inner(): # The reraise here just gunks up our exception handling for no good # reason. Don't try to run the always called hooks in event of # exception. - if is_compiling(): + if torch.compiler.is_compiling(): return inner() try: diff --git a/torch/nn/parallel/distributed.py b/torch/nn/parallel/distributed.py index aad7e6c5402cf8..a850cb4187a93e 100644 --- a/torch/nn/parallel/distributed.py +++ b/torch/nn/parallel/distributed.py @@ -1487,7 +1487,7 @@ def _lazy_init(self): def _should_disable_cpp_reducer(self) -> bool: return self._use_python_reducer and ( - torch._utils.is_compiling() or self._force_to_disable_cpp_reducer + torch.compiler.is_compiling() or self._force_to_disable_cpp_reducer ) def _pre_forward(self, *inputs, **kwargs): @@ -1500,7 +1500,7 @@ def _pre_forward(self, *inputs, **kwargs): h.remove() self._accum_grad_hooks.clear() - if not self._lazy_init_ran and not torch._utils.is_compiling(): + if not self._lazy_init_ran and not torch.compiler.is_compiling(): self._lazy_init() if self._delay_all_reduce_all_params: diff --git a/torch/optim/_adafactor.py b/torch/optim/_adafactor.py index 65f41d6ab182ed..340c3f3f26974b 100644 --- a/torch/optim/_adafactor.py +++ b/torch/optim/_adafactor.py @@ -505,7 +505,7 @@ def _multi_tensor_adafactor( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch._utils.is_compiling() and device_state_steps[0].is_cpu: + if not torch.compiler.is_compiling() and device_state_steps[0].is_cpu: torch._foreach_add_( device_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) @@ -624,7 +624,7 @@ def adafactor( See :class:`~torch.optim.Adafactor` for details. """ - if not torch._utils.is_compiling() and not all( + if not torch.compiler.is_compiling() and not all( isinstance(t, torch.Tensor) for t in state_steps ): raise RuntimeError( diff --git a/torch/optim/adadelta.py b/torch/optim/adadelta.py index 60c37680aeb57f..249fe53dce2c78 100644 --- a/torch/optim/adadelta.py +++ b/torch/optim/adadelta.py @@ -259,7 +259,7 @@ def _single_tensor_adadelta( has_complex: bool, ): # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -315,7 +315,7 @@ def _multi_tensor_adadelta( assert not differentiable, "_foreach ops don't support autograd" # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -352,7 +352,7 @@ def _multi_tensor_adadelta( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch._utils.is_compiling() and device_state_steps[0].is_cpu: + if not torch.compiler.is_compiling() and device_state_steps[0].is_cpu: torch._foreach_add_( device_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) @@ -423,7 +423,7 @@ def adadelta( # this check is slow during compilation, so we skip it # if it's strictly needed we can add this check back in dynamo - if not torch._utils.is_compiling() and not all( + if not torch.compiler.is_compiling() and not all( isinstance(t, torch.Tensor) for t in state_steps ): raise RuntimeError( diff --git a/torch/optim/adagrad.py b/torch/optim/adagrad.py index c45df14727c69a..8e08b62d1a19f8 100644 --- a/torch/optim/adagrad.py +++ b/torch/optim/adagrad.py @@ -451,7 +451,7 @@ def _multi_tensor_adagrad( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch._utils.is_compiling() and device_state_steps[0].is_cpu: + if not torch.compiler.is_compiling() and device_state_steps[0].is_cpu: torch._foreach_add_( device_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) diff --git a/torch/optim/adam.py b/torch/optim/adam.py index 23337e63525680..518831229f8bf1 100644 --- a/torch/optim/adam.py +++ b/torch/optim/adam.py @@ -353,7 +353,7 @@ def _single_tensor_adam( step_t = state_steps[i] # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type == step_t.device.type @@ -466,7 +466,7 @@ def _multi_tensor_adam( ) # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -520,7 +520,7 @@ def _multi_tensor_adam( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch._utils.is_compiling() and device_state_steps[0].is_cpu: + if not torch.compiler.is_compiling() and device_state_steps[0].is_cpu: torch._foreach_add_( device_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) @@ -762,7 +762,7 @@ def adam( # this check is slow during compilation, so we skip it # if it's strictly needed we can add this check back in dynamo - if not torch._utils.is_compiling() and not all( + if not torch.compiler.is_compiling() and not all( isinstance(t, torch.Tensor) for t in state_steps ): raise RuntimeError( diff --git a/torch/optim/adamax.py b/torch/optim/adamax.py index 4459d033c1e36b..f03e3151bdb0cf 100644 --- a/torch/optim/adamax.py +++ b/torch/optim/adamax.py @@ -248,7 +248,7 @@ def _single_tensor_adamax( step_t = state_steps[i] # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type == step_t.device.type @@ -320,7 +320,7 @@ def _multi_tensor_adamax( return # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -358,7 +358,7 @@ def _multi_tensor_adamax( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch._utils.is_compiling() and grouped_state_steps[0].is_cpu: + if not torch.compiler.is_compiling() and grouped_state_steps[0].is_cpu: torch._foreach_add_( grouped_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) @@ -435,7 +435,7 @@ def adamax( See :class:`~torch.optim.Adamax` for details. """ - if not torch._utils.is_compiling() and not all( + if not torch.compiler.is_compiling() and not all( isinstance(t, torch.Tensor) for t in state_steps ): raise RuntimeError( diff --git a/torch/optim/adamw.py b/torch/optim/adamw.py index fc6aec32b2e307..5deffcabd071af 100644 --- a/torch/optim/adamw.py +++ b/torch/optim/adamw.py @@ -350,7 +350,7 @@ def _single_tensor_adamw( step_t = state_steps[i] # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type == step_t.device.type @@ -463,7 +463,7 @@ def _multi_tensor_adamw( ) # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -516,7 +516,7 @@ def _multi_tensor_adamw( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch._utils.is_compiling() and device_state_steps[0].is_cpu: + if not torch.compiler.is_compiling() and device_state_steps[0].is_cpu: torch._foreach_add_( device_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) @@ -744,7 +744,7 @@ def adamw( See :class:`~torch.optim.AdamW` for details. """ - if not torch._utils.is_compiling() and not all( + if not torch.compiler.is_compiling() and not all( isinstance(t, torch.Tensor) for t in state_steps ): raise RuntimeError( diff --git a/torch/optim/asgd.py b/torch/optim/asgd.py index 32a52cf9ac4ee5..373a578fbf34a9 100644 --- a/torch/optim/asgd.py +++ b/torch/optim/asgd.py @@ -219,7 +219,7 @@ def _single_tensor_asgd( step_t = state_steps[i] # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type @@ -292,7 +292,7 @@ def _multi_tensor_asgd( assert not differentiable, "_foreach ops don't support autograd" # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -333,7 +333,7 @@ def _multi_tensor_asgd( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch._utils.is_compiling() and grouped_state_steps[0].is_cpu: + if not torch.compiler.is_compiling() and grouped_state_steps[0].is_cpu: torch._foreach_add_( grouped_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) diff --git a/torch/optim/nadam.py b/torch/optim/nadam.py index 2dd7e130c0d6cb..3828e2b410e952 100644 --- a/torch/optim/nadam.py +++ b/torch/optim/nadam.py @@ -310,7 +310,7 @@ def _single_tensor_nadam( exp_avg_sq = torch.view_as_real(exp_avg_sq) # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type == mu_product.device.type == step_t.device.type @@ -396,7 +396,7 @@ def _multi_tensor_nadam( assert not differentiable, "_foreach ops don't support autograd" # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -437,7 +437,7 @@ def _multi_tensor_nadam( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch._utils.is_compiling() and grouped_state_steps[0].is_cpu: + if not torch.compiler.is_compiling() and grouped_state_steps[0].is_cpu: torch._foreach_add_( grouped_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) diff --git a/torch/optim/optimizer.py b/torch/optim/optimizer.py index f3b7e7dac0af84..34bc62e605c013 100644 --- a/torch/optim/optimizer.py +++ b/torch/optim/optimizer.py @@ -26,7 +26,6 @@ import torch import torch.utils.hooks as hooks -from torch._utils import is_compiling from torch.utils._foreach_utils import ( _get_foreach_kernels_supported_devices, _get_fused_kernels_supported_devices, @@ -100,14 +99,14 @@ def _use_grad(self, *args, **kwargs): def _get_value(x): # item is significantly faster than a cpu tensor in eager mode - if not torch.jit.is_scripting() and is_compiling(): + if not torch.jit.is_scripting() and torch.compiler.is_compiling(): return x else: return x.item() if isinstance(x, torch.Tensor) else x def _stack_if_compiling(x): - if not torch.jit.is_scripting() and is_compiling(): + if not torch.jit.is_scripting() and torch.compiler.is_compiling(): return torch.stack(x) else: return x @@ -139,7 +138,7 @@ def wrapper(func): # the capturable flag. If capturable=True, this is not a problem. @functools.wraps(func) def maybe_fallback(*args, **kwargs): - if is_compiling() and ( + if torch.compiler.is_compiling() and ( not kwargs.get("capturable", False) and has_state_steps and (args[state_steps_ind] and args[state_steps_ind][0].is_cuda) @@ -429,7 +428,7 @@ def _cuda_graph_capture_health_check(self) -> None: # Thus, when compiling, inductor will determine if cudagraphs # can be enabled based on whether there is input mutation or CPU tensors. if ( - not is_compiling() + not torch.compiler.is_compiling() and torch.backends.cuda.is_built() and torch.cuda.is_available() ): @@ -516,7 +515,7 @@ def _group_tensors_by_device_and_dtype( Skips this step if we are compiling since this will occur during inductor lowering. """ - if is_compiling(): + if torch.compiler.is_compiling(): return {(None, None): (tensorlistlist, list(range(len(tensorlistlist[0]))))} else: return _group_tensors_by_device_and_dtype(tensorlistlist, with_indices) # type: ignore[return-value, arg-type] diff --git a/torch/optim/radam.py b/torch/optim/radam.py index 9a36a2be1841db..bcab947c8e44b7 100644 --- a/torch/optim/radam.py +++ b/torch/optim/radam.py @@ -276,7 +276,7 @@ def _single_tensor_radam( step_t = state_steps[i] # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type == step_t.device.type @@ -374,7 +374,7 @@ def _multi_tensor_radam( assert not differentiable, "_foreach ops don't support autograd" # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -404,7 +404,7 @@ def _multi_tensor_radam( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch._utils.is_compiling() and grouped_state_steps[0].is_cpu: + if not torch.compiler.is_compiling() and grouped_state_steps[0].is_cpu: torch._foreach_add_( grouped_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) diff --git a/torch/optim/rmsprop.py b/torch/optim/rmsprop.py index f839ba0f021c67..1e82e3cbc3437a 100644 --- a/torch/optim/rmsprop.py +++ b/torch/optim/rmsprop.py @@ -284,7 +284,7 @@ def _single_tensor_rmsprop( step = state_steps[i] # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type == step.device.type @@ -357,7 +357,7 @@ def _multi_tensor_rmsprop( assert not differentiable, "_foreach ops don't support autograd" # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert all( p.device.type == step.device.type @@ -402,7 +402,7 @@ def _multi_tensor_rmsprop( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch._utils.is_compiling() and grouped_state_steps[0].is_cpu: + if not torch.compiler.is_compiling() and grouped_state_steps[0].is_cpu: torch._foreach_add_( grouped_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) @@ -489,7 +489,7 @@ def rmsprop( """ # this check is slow during compilation, so we skip it # if it's strictly needed we can add this check back in dynamo - if not torch._utils.is_compiling() and not all( + if not torch.compiler.is_compiling() and not all( isinstance(t, torch.Tensor) for t in state_steps ): raise RuntimeError( diff --git a/torch/optim/rprop.py b/torch/optim/rprop.py index 538c8ac0a861d8..ed7a744d7d1f3c 100644 --- a/torch/optim/rprop.py +++ b/torch/optim/rprop.py @@ -243,7 +243,7 @@ def _single_tensor_rprop( step = state_steps[i] # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type == step.device.type @@ -309,7 +309,7 @@ def _multi_tensor_rprop( assert not differentiable, "_foreach ops don't support autograd" # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch._utils.is_compiling() and capturable: + if not torch.compiler.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert all( p.device.type == step.device.type @@ -337,7 +337,7 @@ def _multi_tensor_rprop( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch._utils.is_compiling() and grouped_state_steps[0].is_cpu: + if not torch.compiler.is_compiling() and grouped_state_steps[0].is_cpu: torch._foreach_add_( grouped_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) @@ -427,7 +427,7 @@ def rprop( """ # this check is slow during compilation, so we skip it # if it's strictly needed we can add this check back in dynamo - if not torch._utils.is_compiling() and not all( + if not torch.compiler.is_compiling() and not all( isinstance(t, torch.Tensor) for t in state_steps ): raise RuntimeError( diff --git a/torch/optim/sgd.py b/torch/optim/sgd.py index ab70f08b44113c..75cd3279070b4c 100644 --- a/torch/optim/sgd.py +++ b/torch/optim/sgd.py @@ -435,7 +435,7 @@ def _multi_tensor_sgd( if not device_has_sparse_grad: # handle internal item() call if lr is a tensor - if isinstance(lr, torch.Tensor) and torch._utils.is_compiling(): + if isinstance(lr, torch.Tensor) and torch.compiler.is_compiling(): grads_x_lr = torch._foreach_mul(device_grads, -lr) torch._foreach_add_(device_params, grads_x_lr) else: diff --git a/torch/testing/_internal/optests/generate_tests.py b/torch/testing/_internal/optests/generate_tests.py index 7820fed19ccc32..e9798ef89de198 100644 --- a/torch/testing/_internal/optests/generate_tests.py +++ b/torch/testing/_internal/optests/generate_tests.py @@ -565,7 +565,7 @@ def __torch_function__(self, func, types, args=(), kwargs=None): if ( torch.jit.is_tracing() or torch.jit.is_scripting() - or torch._dynamo.is_compiling() + or torch.compiler.is_compiling() ): return func(*args, **kwargs) # Pre-existing code may not use the .default overload. If we see an From f551d905522c4b6dab6a08df4be4fd6bf26f0784 Mon Sep 17 00:00:00 2001 From: Robert Hardwick Date: Tue, 5 Nov 2024 12:46:38 +0000 Subject: [PATCH 072/503] Fix for gcc10 torch.compile compiler error when march=aarch64+sve (#137795) Disable tree vectorize in vec_convert.h for gcc10 and aarch64+sve which causes compiler error to occur. ``` /tmp/tmpuqk7lj9j/zx/czx2eyturb6j6m727xhvknkjbdu3y5nqqk66wgxcjkwnxuzvpm5r.cpp:3:18: internal compiler error: in vect_get_vector_types_for_stmt, at tree-vect-stmts.c:12252 3 | extern "C" void kernel(const float* in_ptr0, ``` Fixes #137775 I've not linked a gcc bug report yet as they require a minimal reproducer to be made. Pull Request resolved: https://github.com/pytorch/pytorch/pull/137795 Approved by: https://github.com/malfet --- aten/src/ATen/cpu/vec/vec_base.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/aten/src/ATen/cpu/vec/vec_base.h b/aten/src/ATen/cpu/vec/vec_base.h index 2b29caf5edd61a..bf6d10f6a4a750 100644 --- a/aten/src/ATen/cpu/vec/vec_base.h +++ b/aten/src/ATen/cpu/vec/vec_base.h @@ -1,4 +1,8 @@ #pragma once +#if defined(__GNUC__) && __GNUC__ == 10 && __GNUC_MINOR__ <= 2 && defined(__ARM_FEATURE_SVE) +// Workaround for https: //gcc.gnu.org/bugzilla/show_bug.cgi?id=117161 +#pragma GCC optimize("no-tree-vectorize") +#endif // DO NOT DEFINE STATIC DATA IN THIS HEADER! // See Note [Do not compile initializers with AVX] From 546318e5599ecb6b0ea074cbcd14b6d5a6f96733 Mon Sep 17 00:00:00 2001 From: cyy Date: Tue, 5 Nov 2024 14:01:01 +0000 Subject: [PATCH 073/503] [7/N] Don't skip ASAN on some tests (#139675) Follows #139565 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139675 Approved by: https://github.com/ezyang --- aten/src/ATen/native/Scalar.cpp | 2 ++ test/test_decomp.py | 3 --- torch/csrc/utils/python_scalars.h | 4 +++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/aten/src/ATen/native/Scalar.cpp b/aten/src/ATen/native/Scalar.cpp index 76cf4a0fad67a9..e62b31cfb0c4f0 100644 --- a/aten/src/ATen/native/Scalar.cpp +++ b/aten/src/ATen/native/Scalar.cpp @@ -35,6 +35,8 @@ Scalar item(const Tensor& self) { #endif Scalar _local_scalar_dense_cpu(const Tensor& self) { + // Don't use bool*, since it may take out-of-range byte as bool. + // Instead, we cast explicitly to avoid ASAN error. if (self.scalar_type() == kBool) { return Scalar(static_cast(*reinterpret_cast(self.const_data_ptr()))); } diff --git a/test/test_decomp.py b/test/test_decomp.py index 1d21256a763f95..8e0f07e3d38aeb 100644 --- a/test/test_decomp.py +++ b/test/test_decomp.py @@ -545,7 +545,6 @@ class TestDecomp(TestCase): # NB: This actually overlaps with test_comprehensive, but it only # runs on things that are definitely decomposed so it's a lot faster # to run - @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @onlyNativeDeviceTypes @skipIfCrossRef @suppress_warnings @@ -553,7 +552,6 @@ class TestDecomp(TestCase): def test_quick(self, device, dtype, op): self.do_cross_ref(device, dtype, op, run_all=False) - @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @skipOps("TestDecomp", "test_quick_core_backward", core_backward_failures) @onlyNativeDeviceTypes @skipIfCrossRef @@ -663,7 +661,6 @@ def test_rrelu_with_noise(self, device): self.assertEqual(ref, res) self.assertEqual(noise_ref, noise_res) - @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") @suppress_warnings @tf32_off() # only tests RNNs since we have py dispsatcher decomps for them diff --git a/torch/csrc/utils/python_scalars.h b/torch/csrc/utils/python_scalars.h index 997425ac7de2ba..eeeebb709c93c7 100644 --- a/torch/csrc/utils/python_scalars.h +++ b/torch/csrc/utils/python_scalars.h @@ -137,7 +137,9 @@ inline PyObject* load_scalar(const void* data, at::ScalarType scalarType) { return PyComplex_FromCComplex( *reinterpret_cast((c10::complex*)data)); case at::kBool: - return PyBool_FromLong(*(bool*)data); + // Don't use bool*, since it may take out-of-range byte as bool. + // Instead, we cast explicitly to avoid ASAN error. + return PyBool_FromLong(static_cast(*(uint8_t*)data)); case at::kBFloat16: return PyFloat_FromDouble( at::convert(*(at::BFloat16*)data)); From 349cd49406f4962343d0c83af989f95c68e4ddc6 Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Mon, 4 Nov 2024 20:37:35 -0800 Subject: [PATCH 074/503] Fix compiler collective TORCH_TRACE and improve code state printing (#139716) Signed-off-by: Edward Z. Yang Pull Request resolved: https://github.com/pytorch/pytorch/pull/139716 Approved by: https://github.com/yf225 --- torch/_dynamo/output_graph.py | 8 +--- torch/_dynamo/pgo.py | 71 +++++++++++++++++-------------- torch/_dynamo/symbolic_convert.py | 5 +++ 3 files changed, 47 insertions(+), 37 deletions(-) diff --git a/torch/_dynamo/output_graph.py b/torch/_dynamo/output_graph.py index f690fe3991ccdf..e2d07350062f34 100644 --- a/torch/_dynamo/output_graph.py +++ b/torch/_dynamo/output_graph.py @@ -2,10 +2,8 @@ import collections import contextlib import copy -import dataclasses import functools import itertools -import json import logging import operator import re @@ -1318,11 +1316,9 @@ def run_compiler_collective(self, tx): "artifact", metadata_fn=lambda: { "name": "compiler_collective", - "encoding": "json", + "encoding": "string", }, - payload_fn=lambda: json.dumps( - dataclasses.asdict(ds.local_state), - ), + payload_fn=lambda: ds.local_state.render(), ) with torch.cuda.device(compile_pg.rank() % torch.cuda.device_count()): all_states = [None] * compile_pg.size() diff --git a/torch/_dynamo/pgo.py b/torch/_dynamo/pgo.py index 0943fa4c53ac9d..9af240a4cd03b4 100644 --- a/torch/_dynamo/pgo.py +++ b/torch/_dynamo/pgo.py @@ -4,7 +4,6 @@ import copy import dataclasses import enum -import json import logging import os import pickle @@ -170,6 +169,35 @@ class FrameStateSizeEntry: AutoDynamic, AutoUnset, Tuple[Union[int, AutoDynamic, InferStride], ...] ] = dataclasses.field(default=auto_unset) + def render(self) -> str: + # Special cases + def render_single(s: Union[int, AutoDynamic, AutoUnset, InferStride]) -> str: + if s is auto_dynamic: + return "?" + elif s is auto_unset: + # This basically shouldn't happen, this is for debugging + return "auto unset" + elif isinstance(s, InferStride): + return f"S({s.dim})" + else: + return str(s) + + def render_tuple(ss: Tuple[Union[int, AutoDynamic, InferStride], ...]) -> str: + return "[" + ", ".join(render_single(s) for s in ss) + "]" + + # Common cases + if self.size is auto_dynamic and self.stride is auto_dynamic: + if self.scalar is auto_dynamic: + return "fully dynamic scalar or tensor" + else: + return f"scalar {self.scalar}" + elif self.scalar is auto_dynamic: + if isinstance(self.size, tuple) and isinstance(self.stride, tuple): + return f"tensor size={render_tuple(self.size)} stride={render_tuple(self.stride)}" + + # Fallback + return "unusual {repr(self)}" + def is_size_dynamic(self, dim: int) -> bool: if self.size is auto_dynamic: return True @@ -474,33 +502,14 @@ def get_remote_cache() -> Optional[RemoteCache[JsonDataTy]]: ) -# TODO: this dump format sucks but apparently it's very difficult to json.dumps -# while not indenting inner lists SIGH - - -def _key_asdict(x: object) -> object: - if isinstance(x, CodeId): - return f"{x.filename}:{x.firstlineno}:{x.name}" - else: - return x - - -def _asdict(x: object) -> object: - if isinstance(x, (dict, defaultdict)): - return {_key_asdict(k): _asdict(v) for k, v in x.items()} - elif isinstance(x, (list, tuple)): - return [_asdict(v) for v in x] - elif dataclasses.is_dataclass(x): - return { - field.name: _asdict(getattr(x, field.name)) - for field in dataclasses.fields(x) - } - elif x is auto_unset: - return "auto_unset" - elif x is auto_dynamic: - return "auto_dynamic" - else: - return x +def render_code_state(cs: DefaultDict[CodeId, CodeState]) -> str: + return "\n".join( + f"{k.filename}:{k.firstlineno}:{k.name}:\n" + + "\n".join( + f" {src}: {fs.render()}" for src, fs in v.automatic_dynamic.items() + ) + for k, v in cs.items() + ) def get_code_state() -> DefaultDict[CodeId, CodeState]: @@ -524,7 +533,7 @@ def hit(ty: str) -> DefaultDict[CodeId, CodeState]: trace_structured_artifact( f"get_{ty}_code_state", "string", - lambda: json.dumps(_asdict(_CODE_STATE), indent=1), + lambda: render_code_state(_CODE_STATE), ) _INIT_CODE_STATE = copy.deepcopy(_CODE_STATE) return _CODE_STATE @@ -641,7 +650,7 @@ def put_local_code_state(cache_key: str) -> None: trace_structured_artifact( "put_local_code_state", "string", - lambda: json.dumps(_asdict(_CODE_STATE), indent=1), + lambda: render_code_state(_CODE_STATE), ) @@ -670,7 +679,7 @@ def put_remote_code_state(cache_key: str) -> None: trace_structured_artifact( "put_remote_code_state", "string", - lambda: json.dumps(_asdict(_CODE_STATE), indent=1), + lambda: render_code_state(_CODE_STATE), ) diff --git a/torch/_dynamo/symbolic_convert.py b/torch/_dynamo/symbolic_convert.py index 869495bbf832f7..b76148798c5d48 100644 --- a/torch/_dynamo/symbolic_convert.py +++ b/torch/_dynamo/symbolic_convert.py @@ -230,6 +230,11 @@ class LocalState: default_factory=dict ) + def render(self) -> str: + return "\n".join( + f"{k}: {v.render()}" for k, v in self.automatic_dynamic.items() + ) + # Mutable box that is shared across restarts @dataclasses.dataclass From 53f164cae509eb84b6fbd5b520394f545e55aacf Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 5 Nov 2024 15:12:41 +0000 Subject: [PATCH 075/503] [CUDA][CI][cusparselt] Only CUDA 11.8 ships the libcusparseLt.so.0, CUDA 12 would use PYPI libcusparselt (#138547) since nvidia-cusparselt-cu12 is available and nvidia-cusparselt-cu11 is not available Related: #138175 Pull Request resolved: https://github.com/pytorch/pytorch/pull/138547 Approved by: https://github.com/atalman --- .ci/manywheel/build_cuda.sh | 7 ++++++- .github/scripts/generate_binary_build_matrix.py | 1 + .../generated-linux-binary-manywheel-main.yml | 2 +- .../generated-linux-binary-manywheel-nightly.yml | 12 ++++++------ 4 files changed, 14 insertions(+), 8 deletions(-) diff --git a/.ci/manywheel/build_cuda.sh b/.ci/manywheel/build_cuda.sh index 4eda14a393da75..e8d2bdba75e31a 100644 --- a/.ci/manywheel/build_cuda.sh +++ b/.ci/manywheel/build_cuda.sh @@ -118,7 +118,9 @@ DEPS_SONAME=( "libgomp.so.1" ) -if [[ $USE_CUSPARSELT == "1" ]]; then +# CUDA 11.8 have to ship the libcusparseLt.so.0 with the binary +# since nvidia-cusparselt-cu11 is not available in PYPI +if [[ $USE_CUSPARSELT == "1" && $CUDA_VERSION == "11.8" ]]; then DEPS_SONAME+=( "libcusparseLt.so.0" ) @@ -145,6 +147,7 @@ if [[ $CUDA_VERSION == "12.1" || $CUDA_VERSION == "12.4" ]]; then "/usr/local/cuda/lib64/libcudnn.so.9" "/usr/local/cuda/lib64/libcublas.so.12" "/usr/local/cuda/lib64/libcublasLt.so.12" + "/usr/local/cuda/lib64/libcusparseLt.so.0" "/usr/local/cuda/lib64/libcudart.so.12" "/usr/local/cuda/lib64/libnvToolsExt.so.1" "/usr/local/cuda/lib64/libnvrtc.so.12" @@ -161,6 +164,7 @@ if [[ $CUDA_VERSION == "12.1" || $CUDA_VERSION == "12.4" ]]; then "libcudnn.so.9" "libcublas.so.12" "libcublasLt.so.12" + "libcusparseLt.so.0" "libcudart.so.12" "libnvToolsExt.so.1" "libnvrtc.so.12" @@ -178,6 +182,7 @@ if [[ $CUDA_VERSION == "12.1" || $CUDA_VERSION == "12.4" ]]; then '$ORIGIN/../../nvidia/curand/lib' '$ORIGIN/../../nvidia/cusolver/lib' '$ORIGIN/../../nvidia/cusparse/lib' + '$ORIGIN/../../cusparselt/lib' '$ORIGIN/../../nvidia/nccl/lib' '$ORIGIN/../../nvidia/nvtx/lib' ) diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py index 145df940631293..56d8d182cb218c 100644 --- a/.github/scripts/generate_binary_build_matrix.py +++ b/.github/scripts/generate_binary_build_matrix.py @@ -64,6 +64,7 @@ "nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | " "nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | " "nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | " + "nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | " "nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | " "nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'" ), diff --git a/.github/workflows/generated-linux-binary-manywheel-main.yml b/.github/workflows/generated-linux-binary-manywheel-main.yml index f34186e1ec6ed1..4b8d26d54bc122 100644 --- a/.github/workflows/generated-linux-binary-manywheel-main.yml +++ b/.github/workflows/generated-linux-binary-manywheel-main.yml @@ -107,7 +107,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_9-cuda12_1 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_9-cuda12_1-test: # Testing diff --git a/.github/workflows/generated-linux-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-binary-manywheel-nightly.yml index 3688be9d2cc523..c750531d18d88e 100644 --- a/.github/workflows/generated-linux-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-binary-manywheel-nightly.yml @@ -273,7 +273,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_9-cuda12_1 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_9-cuda12_1-test: # Testing @@ -959,7 +959,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_10-cuda12_1 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_10-cuda12_1-test: # Testing @@ -1645,7 +1645,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_11-cuda12_1 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_11-cuda12_1-test: # Testing @@ -2401,7 +2401,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_12-cuda12_1 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_12-cuda12_1-test: # Testing @@ -3087,7 +3087,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13-cuda12_1 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_13-cuda12_1-test: # Testing @@ -3553,7 +3553,7 @@ jobs: runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" build_name: manywheel-py3_13t-cuda12_1 build_environment: linux-binary-manywheel - PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' + PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparselt-cu12==0.6.2; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.21.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' secrets: github-token: ${{ secrets.GITHUB_TOKEN }} manywheel-py3_13t-cuda12_1-test: # Testing From 13eb3b3f6fa19289141b2d154b137041dd0c2a8f Mon Sep 17 00:00:00 2001 From: "Junjie Wang (PyTorch)" Date: Tue, 5 Nov 2024 15:28:03 +0000 Subject: [PATCH 076/503] [Torch Elastic] Fix the bug caused by wrong host address in creating TCPStore server inside dynamic rendezvous (#139702) Summary: During dynamic rendezvous, we shouldn't use the address from the store but just use `self._this_node.addr` directly because sometimes, the store host is not the host of rank0. Passing wrong host will cause timeout error. This is a follow up fix to S463164, for internal tests, we disable the TCPStore sharing for now. Test Plan: CI. Differential Revision: D65453312 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139702 Approved by: https://github.com/XilunWu --- .../distributed/elastic/rendezvous/dynamic_rendezvous.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/torch/distributed/elastic/rendezvous/dynamic_rendezvous.py b/torch/distributed/elastic/rendezvous/dynamic_rendezvous.py index f99542ff5c2a94..c6d2362cb1f4a3 100644 --- a/torch/distributed/elastic/rendezvous/dynamic_rendezvous.py +++ b/torch/distributed/elastic/rendezvous/dynamic_rendezvous.py @@ -1198,20 +1198,15 @@ def next_rendezvous(self) -> RendezvousInfo: # To avoid race in get_free_port because we release the port after the call, # we want to create a TCPStore server soon afterwards. server_port = 0 - addr = ( - self._store.host - if isinstance(self._store, dist.TCPStore) - else self._this_node.addr - ) if rank == 0: self._shared_tcp_store_server = self._create_tcp_store_server( - addr, server_port + self._this_node.addr, server_port ) server_port = self._shared_tcp_store_server.port self._bootstrap_store_info = RendezvousStoreInfo.build( rank, store, - local_addr=addr, + local_addr=self._this_node.addr, server_port=server_port, # For non-0 rank, this is a no-op ) From e0156f9faac7a041d3557b8fdda79709415110fe Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 5 Nov 2024 07:22:52 -0500 Subject: [PATCH 077/503] HACK: use FB proxy for testowners (#139473) I got fed up with this always timing out when I didn't have correct proxy settings. Signed-off-by: Edward Z. Yang Pull Request resolved: https://github.com/pytorch/pytorch/pull/139473 Approved by: https://github.com/malfet --- tools/linter/adapters/testowners_linter.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tools/linter/adapters/testowners_linter.py b/tools/linter/adapters/testowners_linter.py index b4c35b8ad91bab..7f6e2efd34c1ec 100755 --- a/tools/linter/adapters/testowners_linter.py +++ b/tools/linter/adapters/testowners_linter.py @@ -13,6 +13,7 @@ import argparse import json +import urllib.error from enum import Enum from typing import Any, NamedTuple from urllib.request import urlopen @@ -46,11 +47,18 @@ class LintMessage(NamedTuple): def get_pytorch_labels() -> Any: - labels = ( - urlopen("https://ossci-metrics.s3.amazonaws.com/pytorch_labels.json") - .read() - .decode("utf-8") - ) + url = "https://ossci-metrics.s3.amazonaws.com/pytorch_labels.json" + try: + labels = urlopen(url).read().decode("utf-8") + except urllib.error.URLError: + # This is an FB-only hack, if the json isn't available we may + # need to use a forwarding proxy to get out + proxy_url = "http://fwdproxy:8080" + proxy_handler = urllib.request.ProxyHandler( + {"http": proxy_url, "https": proxy_url} + ) + context = urllib.request.build_opener(proxy_handler) + labels = context.open(url).read().decode("utf-8") return json.loads(labels) From a2bc2e38f9b7142271119f7f8dca1f220f931d28 Mon Sep 17 00:00:00 2001 From: cyy Date: Tue, 5 Nov 2024 16:00:25 +0000 Subject: [PATCH 078/503] Use clang-tidy 17 (#139678) Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/139678 Approved by: https://github.com/Skylion007 --- .clang-tidy | 2 +- c10/core/Allocator.h | 2 +- c10/test/util/Metaprogramming_test.cpp | 1 + c10/test/util/logging_test.cpp | 1 + c10/util/Bitset.h | 1 + c10/util/signal_handler.cpp | 1 + tools/linter/adapters/s3_init_config.json | 4 ++-- torch/csrc/autograd/python_nn_functions.h | 2 +- 8 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index deff8f0fb8d3d3..3b03412a405095 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -64,5 +64,5 @@ readability-string-compare, HeaderFilterRegex: '^(aten/|c10/|torch/).*$' WarningsAsErrors: '*' CheckOptions: - misc-header-include-cycle.IgnoredFilesList: 'format.h;ivalue.h;custom_class.h;Dict.h;List.h' + misc-header-include-cycle.IgnoredFilesList: 'format.h;ivalue.h;custom_class.h;Dict.h;List.h;IListRef.h' ... diff --git a/c10/core/Allocator.h b/c10/core/Allocator.h index bdb8c719fbc53b..c881d104934b44 100644 --- a/c10/core/Allocator.h +++ b/c10/core/Allocator.h @@ -234,7 +234,7 @@ struct C10_API InefficientStdFunctionContext { deleter_(std::move(rhs.deleter_)) {} InefficientStdFunctionContext& operator=( const InefficientStdFunctionContext&) = delete; - // NOLINTNEXTLINE(performance-noexcept-move-constructor) + // NOLINTNEXTLINE(*-noexcept-move-*) InefficientStdFunctionContext& operator=( InefficientStdFunctionContext&& rhs) { this->~InefficientStdFunctionContext(); diff --git a/c10/test/util/Metaprogramming_test.cpp b/c10/test/util/Metaprogramming_test.cpp index 1a1819d9d5c92d..ad301462bd5147 100644 --- a/c10/test/util/Metaprogramming_test.cpp +++ b/c10/test/util/Metaprogramming_test.cpp @@ -230,6 +230,7 @@ TEST(MetaprogrammingTest, TupleMap_mapsToDifferentTypes) { TEST(MetaprogrammingTest, TupleMap_differentiatesLRValueReferences) { struct Mapper { + // NOLINTNEXTLINE(*move*) std::string operator()(std::string&& a) const { return "moved"; } diff --git a/c10/test/util/logging_test.cpp b/c10/test/util/logging_test.cpp index ca1fab0528cd2f..5798b37c18e385 100644 --- a/c10/test/util/logging_test.cpp +++ b/c10/test/util/logging_test.cpp @@ -23,6 +23,7 @@ TEST(LoggingTest, TestEnforceFalse) { CAFFE_ENFORCE(false, "This throws."); // This should never be triggered. ADD_FAILURE(); + // NOLINTNEXTLINE(*catch*) } catch (const ::c10::Error&) { } std::swap(FLAGS_caffe2_use_fatal_for_enforce, kFalse); diff --git a/c10/util/Bitset.h b/c10/util/Bitset.h index 1acb89afd929d4..782cefbd922e06 100644 --- a/c10/util/Bitset.h +++ b/c10/util/Bitset.h @@ -57,6 +57,7 @@ struct bitset final { // Call the given functor with the index of each bit that is set template + // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) void for_each_set_bit(Func&& func) const { bitset cur = *this; size_t index = cur.find_first_set(); diff --git a/c10/util/signal_handler.cpp b/c10/util/signal_handler.cpp index e457238713c54f..7132f08588ce72 100644 --- a/c10/util/signal_handler.cpp +++ b/c10/util/signal_handler.cpp @@ -37,6 +37,7 @@ std::atomic sighupCount(0); std::atomic hookedUpCount(0); void handleSignal(int signal) { + // NOLINTNEXTLINE(bugprone-switch-missing-default-case) switch (signal) { // TODO: what if the previous handler uses sa_sigaction? case SIGHUP: diff --git a/tools/linter/adapters/s3_init_config.json b/tools/linter/adapters/s3_init_config.json index 5a0ceb85ff3a02..94bd7b679b5fe8 100644 --- a/tools/linter/adapters/s3_init_config.json +++ b/tools/linter/adapters/s3_init_config.json @@ -30,8 +30,8 @@ "hash": "4ed664cf50bb9fddec2d4170b3d7bbe0135dc5648acbd620b61c8d25a5a2fdb7" }, "Linux": { - "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/15.0.6/clang-tidy", - "hash": "8defeb3a2698caca60251f9d682bc08374f1a37eec77d515533affdd03f93add" + "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/17.0.6/clang-tidy", + "hash": "a93110b0d58b430bb7ce86c8497f2528e1d44eed25d546557e7ec45c44ddfeb7" } }, "actionlint": { diff --git a/torch/csrc/autograd/python_nn_functions.h b/torch/csrc/autograd/python_nn_functions.h index 54dc6e1b293b1f..2fc4f4727e39e7 100644 --- a/torch/csrc/autograd/python_nn_functions.h +++ b/torch/csrc/autograd/python_nn_functions.h @@ -1,5 +1,5 @@ #pragma once - +#include namespace torch::autograd { void initNNFunctions(PyObject* module); From 4d5cc1b4efa2c07a798c2b216990056153c6fe3a Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Tue, 5 Nov 2024 16:22:30 +0000 Subject: [PATCH 079/503] Revert "[dynamo][guards] Consider tensors as immutable for dict tag matches (#139560)" This reverts commit e6ff07f00e04a9b58efb86a3dd70ed7280ae8522. Reverted https://github.com/pytorch/pytorch/pull/139560 on behalf of https://github.com/ZainRizvi due to Sorry but this seems to be breaking internal tests. Please see D65430317 for more details ([comment](https://github.com/pytorch/pytorch/pull/139560#issuecomment-2457620720)) --- test/dynamo/test_modules.py | 48 ------------------------------------ torch/_dynamo/config.py | 4 --- torch/csrc/dynamo/guards.cpp | 8 +----- 3 files changed, 1 insertion(+), 59 deletions(-) diff --git a/test/dynamo/test_modules.py b/test/dynamo/test_modules.py index acdd687b6c7b1c..e082004eea4a8a 100644 --- a/test/dynamo/test_modules.py +++ b/test/dynamo/test_modules.py @@ -3136,54 +3136,6 @@ def fn(x): res = opt_fn(x) self.assertEqual(ref, res) - @patch.object( - torch._dynamo.config, "skip_tensor_guards_with_matching_dict_tags", False - ) - def test_param_requires_grad(self): - def adjust_model(model): - to_freeze = model.num_iter % 2 == 0 - if to_freeze: - for param in model.layer2.parameters(): - param.requires_grad = False - else: - for param in model.layer2.parameters(): - param.requires_grad = True - - class MyModule(torch.nn.Module): - def __init__(self, input_size, hidden_size, output_size): - super().__init__() - - self.layer1 = torch.nn.Linear(hidden_size, hidden_size) - self.layer2 = torch.nn.Linear(hidden_size, hidden_size) - - self.num_iter = 0 - - def forward(self, x): - x = self.layer2(x + self.layer1.bias) - - self.num_iter += 1 - return x - - input_size = 1024 - hidden_size = 1024 - output_size = 1 - num_samples = 2048 - features = torch.randn(num_samples, input_size) - - model = MyModule(input_size, hidden_size, output_size) - - cnt = torch._dynamo.testing.CompileCounter() - opt_model = torch.compile(model, backend=cnt, fullgraph=True) - - for _ in range(3): - model.zero_grad(True) - adjust_model(model) - res = opt_model(features) - res.sum().backward() - - # Check that we have recompiled twice, which leads to 3 frames - self.assertEqual(cnt.frame_count, 3) - if __name__ == "__main__": from torch._dynamo.test_case import run_tests diff --git a/torch/_dynamo/config.py b/torch/_dynamo/config.py index 3ab8200ced0ef2..5c36654ae5d0d9 100644 --- a/torch/_dynamo/config.py +++ b/torch/_dynamo/config.py @@ -331,10 +331,6 @@ def _get_optimize_ddp_mode(): # notice and lead to incorrect result. skip_no_tensor_aliasing_guards_on_parameters = True -# Considers a tensor immutable if it is one of the values of a dictionary, and -# the dictionary tag is same across invocation calls. -skip_tensor_guards_with_matching_dict_tags = True - # If True, raises exception if TorchDynamo is called with a context manager raise_on_ctx_manager_usage = True diff --git a/torch/csrc/dynamo/guards.cpp b/torch/csrc/dynamo/guards.cpp index bd93f582f38f34..6c81bdfc785c2f 100644 --- a/torch/csrc/dynamo/guards.cpp +++ b/torch/csrc/dynamo/guards.cpp @@ -886,11 +886,6 @@ std::string get_exception_message() { } bool is_immutable_object(py::handle example_value) { - static py::object config_module = py::module_::import("torch._dynamo.config"); - bool is_tensor_immutable = - config_module.attr("skip_tensor_guards_with_matching_dict_tags") - .cast(); - if (PyTuple_Check(example_value.ptr())) { // Check that each element is immutable for (Py_ssize_t i = 0; i < PyTuple_Size(example_value.ptr()); ++i) { @@ -901,11 +896,10 @@ bool is_immutable_object(py::handle example_value) { } return true; } - return PyLong_Check(example_value.ptr()) || PyFloat_Check(example_value.ptr()) || PyBool_Check(example_value.ptr()) || PyUnicode_Check(example_value.ptr()) || - (is_tensor_immutable && THPVariable_Check(example_value.ptr())); + THPVariable_Check(example_value.ptr()); } bool is_parameter(py::handle tensor) { From c0d21b65818314ea18c2424c1a97e7b890bfb81a Mon Sep 17 00:00:00 2001 From: Oguz Ulgen Date: Tue, 5 Nov 2024 17:00:40 +0000 Subject: [PATCH 080/503] End TritonBundle on non-cache write codepaths (#139698) Summary: When we bypass cache write on inductor, we were also forgetting to reset the bundle, this moves resetting the bundle into post_compile step so it gets uniformly reset. This diff also turns on the cache for internal so that we can do a code rollout. Test Plan: updated tests Differential Revision: D65457224 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139698 Approved by: https://github.com/ezyang --- test/inductor/test_codecache.py | 100 ++++++++++++++++++++++-------- torch/_inductor/codecache.py | 30 ++++++--- torch/_inductor/graph.py | 2 - torch/_inductor/triton_bundler.py | 20 +++++- 4 files changed, 111 insertions(+), 41 deletions(-) diff --git a/test/inductor/test_codecache.py b/test/inductor/test_codecache.py index a83322f6154f15..758225534d0e65 100644 --- a/test/inductor/test_codecache.py +++ b/test/inductor/test_codecache.py @@ -123,7 +123,8 @@ def reset(self): @parametrize("dtype", (torch.float32, torch.bfloat16)) @parametrize("dynamic", (False, True)) @parametrize("bundle_triton", (False, True)) - def test_cache_load_function(self, device, dtype, dynamic, bundle_triton): + @parametrize("grad", (False, True)) + def test_cache_load_function(self, device, dtype, dynamic, bundle_triton, grad): """ Verify that we can populate and load functions from the cache. """ @@ -132,23 +133,43 @@ def test_cache_load_function(self, device, dtype, dynamic, bundle_triton): if device == "cuda" and dtype == torch.bfloat16 and not SM80OrLater: raise unittest.SkipTest("requires SM80 or later") + grad_multiplier = 2 if grad else 1 + def fn(x, y): - return (x * 2, y @ y) + yy = y @ y + return x * 2 + yy.view(25) - a = torch.rand(25, dtype=dtype, device=device) - b = torch.rand(5, 5, dtype=dtype, device=device) + a_orig = torch.rand(25, dtype=dtype, device=device) + b_orig = torch.rand(5, 5, dtype=dtype, device=device) with config.patch(bundle_triton_into_fx_graph_cache=bundle_triton): compiled_fn = torch.compile(fn, dynamic=dynamic) + a1 = a_orig.clone().requires_grad_(grad) + b1 = b_orig.clone().requires_grad_(grad) + a2 = a_orig.clone().requires_grad_(grad) + b2 = b_orig.clone().requires_grad_(grad) + # A first call should miss in the cache. - self.assertEqual(fn(a, b), compiled_fn(a, b)) - self.assertEqual(counters["inductor"]["fxgraph_cache_miss"], 1) + eager_result = fn(a1, b1) + compiled_result = compiled_fn(a2, b2) + self.assertEqual(eager_result, compiled_result) + if grad: + eager_result.sum().backward() + compiled_result.sum().backward() + self.assertEqual(a1.grad, a2.grad) + self.assertEqual(b1.grad, b2.grad) + self.assertEqual( + counters["inductor"]["fxgraph_cache_miss"], grad_multiplier * 1 + ) self.assertEqual(counters["inductor"]["fxgraph_cache_hit"], 0) self.assertEqual(counters["inductor"]["fxgraph_lookup_write_file"], 0) if bundle_triton and device != "cpu": - self.assertEqual(counters["inductor"]["triton_bundler_save_kernel"], 7) + self.assertEqual( + counters["inductor"]["triton_bundler_save_kernel"], + grad_multiplier * 7, + ) self.assertEqual( counters["inductor"]["triton_bundler_read_and_emit_kernel"], 0 ) @@ -161,15 +182,37 @@ def fn(x, y): PyCodeCache.cache_clear() shutil.rmtree(os.path.join(cache_dir(), "triton"), ignore_errors=True) - self.assertEqual(fn(a, b), compiled_fn(a, b)) - self.assertEqual(counters["inductor"]["fxgraph_cache_miss"], 1) - self.assertEqual(counters["inductor"]["fxgraph_cache_hit"], 1) - self.assertEqual(counters["inductor"]["fxgraph_lookup_write_file"], 1) + a1 = a_orig.clone().requires_grad_(grad) + b1 = b_orig.clone().requires_grad_(grad) + a2 = a_orig.clone().requires_grad_(grad) + b2 = b_orig.clone().requires_grad_(grad) + + eager_result = fn(a1, b1) + compiled_result = compiled_fn(a2, b2) + self.assertEqual(eager_result, compiled_result) + if grad: + eager_result.sum().backward() + compiled_result.sum().backward() + self.assertEqual(a1.grad, a2.grad) + self.assertEqual(b1.grad, b2.grad) + self.assertEqual( + counters["inductor"]["fxgraph_cache_miss"], grad_multiplier * 1 + ) + self.assertEqual( + counters["inductor"]["fxgraph_cache_hit"], grad_multiplier * 1 + ) + self.assertEqual( + counters["inductor"]["fxgraph_lookup_write_file"], grad_multiplier * 1 + ) if bundle_triton and device != "cpu": - self.assertEqual(counters["inductor"]["triton_bundler_save_kernel"], 7) self.assertEqual( - counters["inductor"]["triton_bundler_read_and_emit_kernel"], 7 + counters["inductor"]["triton_bundler_save_kernel"], + grad_multiplier * 7, + ) + self.assertEqual( + counters["inductor"]["triton_bundler_read_and_emit_kernel"], + grad_multiplier * 7, ) @requires_triton() @@ -448,29 +491,34 @@ def fn2(q, k, v): @requires_triton() @config.patch({"fx_graph_cache": True}) @config.patch({"fx_graph_remote_cache": False}) - def test_triton_higher_order_op_bypass(self): + @parametrize("bundle_triton", (False, True)) + @parametrize("grad", (False, True)) + def test_triton_higher_order_op_bypass(self, bundle_triton, grad): """ - Verify that we bypass the cache when we have a triton higher order ops. + Verify that we bypass the cache when we have a triton higher order ops + and that bundler start/end works with a cache bypass. """ def fn(x, y): - output = torch.zeros_like(x) - n_elements = output.numel() + n_elements = x.numel() grid = lambda meta: ( # noqa: E731 triton.cdiv(n_elements, meta["BLOCK_SIZE"]), ) - add_kernel[grid](x, y, output, n_elements, BLOCK_SIZE=4) - return output + add_kernel[grid](x, y, x, n_elements, BLOCK_SIZE=4) + return x - compiled_fn = torch.compile(fn, fullgraph=True) + with config.patch(bundle_triton_into_fx_graph_cache=bundle_triton): + compiled_fn = torch.compile(fn, fullgraph=True) - x = torch.randn(4, device=GPU_TYPE) - y = torch.randn(4, device=GPU_TYPE) - compiled_fn(x, y) + x = torch.randn(4, device=GPU_TYPE, requires_grad=grad) + y = torch.randn(4, device=GPU_TYPE, requires_grad=grad) + result = compiled_fn(x, y) + if grad: + result.sum().backward() - self.assertEqual(counters["inductor"]["fxgraph_cache_miss"], 0) - self.assertEqual(counters["inductor"]["fxgraph_cache_hit"], 0) - self.assertGreater(counters["inductor"]["fxgraph_cache_bypass"], 0) + self.assertEqual(counters["inductor"]["fxgraph_cache_miss"], 0) + self.assertEqual(counters["inductor"]["fxgraph_cache_hit"], 0) + self.assertGreater(counters["inductor"]["fxgraph_cache_bypass"], 0) @config.patch({"fx_graph_cache": True}) @config.patch({"fx_graph_remote_cache": False}) diff --git a/torch/_inductor/codecache.py b/torch/_inductor/codecache.py index fc4cbba2a702d4..8519a93d2f7cee 100644 --- a/torch/_inductor/codecache.py +++ b/torch/_inductor/codecache.py @@ -1145,9 +1145,12 @@ def iterate_over_candidates() -> Generator[CompiledFxGraph, None, None]: triton_bundler_meta = TritonBundler.read_and_emit(bundle) if (meta := triton_bundler_meta) is not None: cache_info["triton_bundler_meta"] = str(meta) - get_chromium_event_logger().add_event_data( - "inductor_compile", cached_kernel_names=meta.cached_kernel_names - ) + logger = get_chromium_event_logger() + if "inductor_compile" in logger.get_stack(): + # TODO: Clean up autograd cache integration + logger.add_event_data( + "inductor_compile", cached_kernel_names=meta.cached_kernel_names + ) inductor_meta = autotune_cache.inductor_meta_from_config() AutotuneCacheBundler.begin_compile(inductor_meta, code=code) @@ -1504,13 +1507,20 @@ def load( # type: ignore[no-untyped-def] assert compiled_graph is None assert key_info is not None start_time = cache_info["cache_event_time"] - compiled_graph = compile_fx_fn( - gm, example_inputs, inputs_to_check, fx_kwargs - ) - compiled_graph._time_taken_ns = time_ns() - start_time - cache_key = key_info[0] - compiled_graph._fx_graph_cache_key = cache_key - compiled_graph._triton_bundle, triton_bundler_meta = TritonBundler.collect() + TritonBundler.begin_compile() + try: + compiled_graph = compile_fx_fn( + gm, example_inputs, inputs_to_check, fx_kwargs + ) + compiled_graph._time_taken_ns = time_ns() - start_time + cache_key = key_info[0] + compiled_graph._fx_graph_cache_key = cache_key + ( + compiled_graph._triton_bundle, + triton_bundler_meta, + ) = TritonBundler.collect() + finally: + TritonBundler.end_compile() if triton_bundler_meta is not None: cache_info["triton_bundler_meta"] = str(triton_bundler_meta) cache_info["time_taken_ns"] = compiled_graph._time_taken_ns diff --git a/torch/_inductor/graph.py b/torch/_inductor/graph.py index 67c72fc2f3042b..86b5807392944f 100644 --- a/torch/_inductor/graph.py +++ b/torch/_inductor/graph.py @@ -97,7 +97,6 @@ from .runtime.autotune_cache import AutotuneCacheBundler from .scheduler import BaseSchedulerNode from .sizevars import SizeVarAllocator -from .triton_bundler import TritonBundler from .utils import ( convert_shape_to_inductor, gather_origins, @@ -1966,7 +1965,6 @@ def _compile_to_module(self) -> ModuleType: inductor_meta = autotune_cache.inductor_meta_from_config() AutotuneCacheBundler.begin_compile(inductor_meta, code=code) - TritonBundler.begin_compile() try: linemap = [(line_no, node.stack_trace) for line_no, node in linemap] # type: ignore[misc] diff --git a/torch/_inductor/triton_bundler.py b/torch/_inductor/triton_bundler.py index 7835f168b3763d..ff33730079c4fe 100644 --- a/torch/_inductor/triton_bundler.py +++ b/torch/_inductor/triton_bundler.py @@ -70,6 +70,8 @@ class TritonBundler: - TritonBundler.begin_compile is called when we start compiling in Inductor - TritonBundler.put is called each time a Triton Kernel is compiled - TritonBundler.collect is called when a cache entry is being generated + - TritonBundler.end_compile is called to indicate bundling is completed, + collect will execute this function as well. - TritonBundler.read_and_emit is called when a cache entry is read """ @@ -92,7 +94,9 @@ def is_enabled() -> bool: if not config.is_fbcode(): return False - return justknobs_check("pytorch/remote_cache:bundle_triton_into_fx_graph_cache") + return justknobs_check( + "pytorch/remote_cache:bundle_triton_into_fx_graph_cache_v2" + ) @classmethod def begin_compile(cls) -> None: @@ -102,9 +106,19 @@ def begin_compile(cls) -> None: """ if not TritonBundler.is_enabled(): return + log.debug("TritonBundler.begin_compile is called") assert cls._entries is None cls._entries = [] + @classmethod + def end_compile(cls) -> None: + """ + Finalizes the TritonBundler. If collect is not yet called, it + discards the current bundle. + """ + log.debug("TritonBundler.end_compile is called") + cls._entries = None + @classmethod def put(cls, kernel_hash: str, device: int) -> None: """ @@ -127,7 +141,7 @@ def collect( This function also finalizes the current bundle. """ if not TritonBundler.is_enabled(): - cls._entries = None + cls.end_compile() return [], None with dynamo_timed( @@ -171,7 +185,7 @@ def collect( artifacts, ) ) - cls._entries = None + cls.end_compile() return result, TritonBundlerMetadata(kernel_names) return [], None From 5860c8ebd155bd06666d87811847b73040b55f7b Mon Sep 17 00:00:00 2001 From: atalman Date: Tue, 5 Nov 2024 17:21:24 +0000 Subject: [PATCH 081/503] Use Manylinux2_28 for wheel builds (#138732) Fixes https://github.com/pytorch/pytorch/issues/123649 Use Manylinux 2_28 Docker builds for PyTorch Nightly builds This moves the wheels to a Docker image that uses : ``quay.io/pypa/manylinux_2_28_x86_64`` as a base rather then ``centos:7`` which is EOL on June 30, 2024. Information: https://github.com/pypa/manylinux#manylinux_2_28-almalinux-8-based manylinux_2_28 (AlmaLinux 8 based) Toolchain: GCC 13 Built wheels are also expected to be compatible with other distros using glibc 2.28 or later, including: Debian 10+ Ubuntu 18.10+ Fedora 29+ CentOS/RHEL 8+ This migration should enable us to migrate to latest CUDNN version, and land this PR: https://github.com/pytorch/pytorch/pull/137978 Pull Request resolved: https://github.com/pytorch/pytorch/pull/138732 Approved by: https://github.com/Skylion007, https://github.com/malfet --- .../scripts/generate_binary_build_matrix.py | 4 +- .../generated-linux-binary-manywheel-main.yml | 12 +- ...nerated-linux-binary-manywheel-nightly.yml | 150 +++++++++--------- ...rated-macos-arm64-binary-wheel-nightly.yml | 10 +- 4 files changed, 88 insertions(+), 88 deletions(-) diff --git a/.github/scripts/generate_binary_build_matrix.py b/.github/scripts/generate_binary_build_matrix.py index 56d8d182cb218c..fa9dec1c78c4a6 100644 --- a/.github/scripts/generate_binary_build_matrix.py +++ b/.github/scripts/generate_binary_build_matrix.py @@ -156,7 +156,7 @@ def arch_type(arch_version: str) -> str: WHEEL_CONTAINER_IMAGES = { **{ - gpu_arch: f"pytorch/manylinux-builder:cuda{gpu_arch}-{DEFAULT_TAG}" + gpu_arch: f"pytorch/manylinux2_28-builder:cuda{gpu_arch}-{DEFAULT_TAG}" for gpu_arch in CUDA_ARCHES }, **{ @@ -164,7 +164,7 @@ def arch_type(arch_version: str) -> str: for gpu_arch in ROCM_ARCHES }, "xpu": f"pytorch/manylinux2_28-builder:xpu-{DEFAULT_TAG}", - "cpu": f"pytorch/manylinux-builder:cpu-{DEFAULT_TAG}", + "cpu": f"pytorch/manylinux2_28-builder:cpu-{DEFAULT_TAG}", "cpu-cxx11-abi": f"pytorch/manylinuxcxx11-abi-builder:cpu-cxx11-abi-{DEFAULT_TAG}", "cpu-aarch64": f"pytorch/manylinuxaarch64-builder:cpu-aarch64-{DEFAULT_TAG}", "cpu-s390x": f"pytorch/manylinuxs390x-builder:cpu-s390x-{DEFAULT_TAG}", diff --git a/.github/workflows/generated-linux-binary-manywheel-main.yml b/.github/workflows/generated-linux-binary-manywheel-main.yml index 4b8d26d54bc122..71926572a1f5bf 100644 --- a/.github/workflows/generated-linux-binary-manywheel-main.yml +++ b/.github/workflows/generated-linux-binary-manywheel-main.yml @@ -54,7 +54,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.9" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -78,7 +78,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.9" build_name: manywheel-py3_9-cuda11_8 @@ -101,7 +101,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.9" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -125,7 +125,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.9" build_name: manywheel-py3_9-cuda12_1 @@ -148,7 +148,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.9" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -172,7 +172,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.9" build_name: manywheel-py3_9-cuda12_4 diff --git a/.github/workflows/generated-linux-binary-manywheel-nightly.yml b/.github/workflows/generated-linux-binary-manywheel-nightly.yml index c750531d18d88e..d99deca2d73f8f 100644 --- a/.github/workflows/generated-linux-binary-manywheel-nightly.yml +++ b/.github/workflows/generated-linux-binary-manywheel-nightly.yml @@ -58,7 +58,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.9" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -80,7 +80,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.9" build_name: manywheel-py3_9-cpu @@ -103,7 +103,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.9" build_name: manywheel-py3_9-cpu @@ -196,7 +196,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.9" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -220,7 +220,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.9" build_name: manywheel-py3_9-cuda11_8 @@ -244,7 +244,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.9" build_name: manywheel-py3_9-cuda11_8 @@ -267,7 +267,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.9" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -291,7 +291,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.9" build_name: manywheel-py3_9-cuda12_1 @@ -315,7 +315,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.9" build_name: manywheel-py3_9-cuda12_1 @@ -338,7 +338,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.9" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -362,7 +362,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.9" build_name: manywheel-py3_9-cuda12_4 @@ -386,7 +386,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.9" build_name: manywheel-py3_9-cuda12_4 @@ -744,7 +744,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.10" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -766,7 +766,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.10" build_name: manywheel-py3_10-cpu @@ -789,7 +789,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.10" build_name: manywheel-py3_10-cpu @@ -882,7 +882,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.10" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -906,7 +906,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.10" build_name: manywheel-py3_10-cuda11_8 @@ -930,7 +930,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.10" build_name: manywheel-py3_10-cuda11_8 @@ -953,7 +953,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.10" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -977,7 +977,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.10" build_name: manywheel-py3_10-cuda12_1 @@ -1001,7 +1001,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.10" build_name: manywheel-py3_10-cuda12_1 @@ -1024,7 +1024,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.10" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -1048,7 +1048,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.10" build_name: manywheel-py3_10-cuda12_4 @@ -1072,7 +1072,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.10" build_name: manywheel-py3_10-cuda12_4 @@ -1430,7 +1430,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.11" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -1452,7 +1452,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.11" build_name: manywheel-py3_11-cpu @@ -1475,7 +1475,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.11" build_name: manywheel-py3_11-cpu @@ -1568,7 +1568,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.11" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -1592,7 +1592,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.11" build_name: manywheel-py3_11-cuda11_8 @@ -1616,7 +1616,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.11" build_name: manywheel-py3_11-cuda11_8 @@ -1639,7 +1639,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.11" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -1663,7 +1663,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.11" build_name: manywheel-py3_11-cuda12_1 @@ -1687,7 +1687,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.11" build_name: manywheel-py3_11-cuda12_1 @@ -1710,7 +1710,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.11" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -1733,7 +1733,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.11" build_name: manywheel-py3_11-cuda12_1-full @@ -1757,7 +1757,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.11" build_name: manywheel-py3_11-cuda12_1-full @@ -1780,7 +1780,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.11" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -1804,7 +1804,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.11" build_name: manywheel-py3_11-cuda12_4 @@ -1828,7 +1828,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.11" build_name: manywheel-py3_11-cuda12_4 @@ -2186,7 +2186,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.12" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -2208,7 +2208,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.12" build_name: manywheel-py3_12-cpu @@ -2231,7 +2231,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.12" build_name: manywheel-py3_12-cpu @@ -2324,7 +2324,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.12" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -2348,7 +2348,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.12" build_name: manywheel-py3_12-cuda11_8 @@ -2372,7 +2372,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.12" build_name: manywheel-py3_12-cuda11_8 @@ -2395,7 +2395,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.12" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -2419,7 +2419,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.12" build_name: manywheel-py3_12-cuda12_1 @@ -2443,7 +2443,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.12" build_name: manywheel-py3_12-cuda12_1 @@ -2466,7 +2466,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.12" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -2490,7 +2490,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.12" build_name: manywheel-py3_12-cuda12_4 @@ -2514,7 +2514,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.12" build_name: manywheel-py3_12-cuda12_4 @@ -2872,7 +2872,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.13" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -2894,7 +2894,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.13" build_name: manywheel-py3_13-cpu @@ -2917,7 +2917,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.13" build_name: manywheel-py3_13-cpu @@ -3010,7 +3010,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.13" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -3034,7 +3034,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.13" build_name: manywheel-py3_13-cuda11_8 @@ -3058,7 +3058,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.13" build_name: manywheel-py3_13-cuda11_8 @@ -3081,7 +3081,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.13" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -3105,7 +3105,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.13" build_name: manywheel-py3_13-cuda12_1 @@ -3129,7 +3129,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.13" build_name: manywheel-py3_13-cuda12_1 @@ -3152,7 +3152,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.13" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -3176,7 +3176,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.13" build_name: manywheel-py3_13-cuda12_4 @@ -3200,7 +3200,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.13" build_name: manywheel-py3_13-cuda12_4 @@ -3338,7 +3338,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.13t" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -3360,7 +3360,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.13t" build_name: manywheel-py3_13t-cpu @@ -3383,7 +3383,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main use_split_build: False DESIRED_PYTHON: "3.13t" build_name: manywheel-py3_13t-cpu @@ -3476,7 +3476,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.13t" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -3500,7 +3500,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.13t" build_name: manywheel-py3_13t-cuda11_8 @@ -3524,7 +3524,7 @@ jobs: DESIRED_CUDA: cu118 GPU_ARCH_VERSION: 11.8 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda11.8-main use_split_build: False DESIRED_PYTHON: "3.13t" build_name: manywheel-py3_13t-cuda11_8 @@ -3547,7 +3547,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.13t" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -3571,7 +3571,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.13t" build_name: manywheel-py3_13t-cuda12_1 @@ -3595,7 +3595,7 @@ jobs: DESIRED_CUDA: cu121 GPU_ARCH_VERSION: 12.1 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.1-main use_split_build: False DESIRED_PYTHON: "3.13t" build_name: manywheel-py3_13t-cuda12_1 @@ -3618,7 +3618,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.13t" runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" @@ -3642,7 +3642,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.13t" build_name: manywheel-py3_13t-cuda12_4 @@ -3666,7 +3666,7 @@ jobs: DESIRED_CUDA: cu124 GPU_ARCH_VERSION: 12.4 GPU_ARCH_TYPE: cuda - DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cuda12.4-main use_split_build: False DESIRED_PYTHON: "3.13t" build_name: manywheel-py3_13t-cuda12_4 diff --git a/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml b/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml index 687e716eae471a..b528f2416d27a3 100644 --- a/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml +++ b/.github/workflows/generated-macos-arm64-binary-wheel-nightly.yml @@ -139,7 +139,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main DESIRED_PYTHON: "3.9" build_name: wheel-py3_9-cpu use_s3: False @@ -255,7 +255,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main DESIRED_PYTHON: "3.10" build_name: wheel-py3_10-cpu use_s3: False @@ -371,7 +371,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main DESIRED_PYTHON: "3.11" build_name: wheel-py3_11-cpu use_s3: False @@ -487,7 +487,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main DESIRED_PYTHON: "3.12" build_name: wheel-py3_12-cpu use_s3: False @@ -603,7 +603,7 @@ jobs: # favor of GPU_ARCH_VERSION DESIRED_CUDA: cpu GPU_ARCH_TYPE: cpu - DOCKER_IMAGE: pytorch/manylinux-builder:cpu-main + DOCKER_IMAGE: pytorch/manylinux2_28-builder:cpu-main DESIRED_PYTHON: "3.13" build_name: wheel-py3_13-cpu use_s3: False From 41e4d88584c4ed0708cd1d93c71cd4ee2e1bbbb5 Mon Sep 17 00:00:00 2001 From: "Junjie Wang (PyTorch)" Date: Tue, 5 Nov 2024 17:40:27 +0000 Subject: [PATCH 082/503] [logging][ez] Add timer logging for pickling and unpickle for object based collective (#139757) Summary: As discussed, we want to measure the time spent during pickling and unpickle. Test Plan: CI Reviewed By: wz337 Differential Revision: D65462767 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139757 Approved by: https://github.com/awgu, https://github.com/Skylion007, https://github.com/fegin, https://github.com/c-p-i-o --- torch/distributed/distributed_c10d.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/torch/distributed/distributed_c10d.py b/torch/distributed/distributed_c10d.py index 3ee7a2dbac3786..61cb222233108c 100644 --- a/torch/distributed/distributed_c10d.py +++ b/torch/distributed/distributed_c10d.py @@ -2812,6 +2812,7 @@ def reduce(tensor, dst, op=ReduceOp.SUM, group=None, async_op=False): work.wait() +@_time_logger def _object_to_tensor(obj, device, group): f = io.BytesIO() _pickler(f).dump(obj) @@ -2831,6 +2832,7 @@ def _object_to_tensor(obj, device, group): return byte_tensor, local_size +@_time_logger def _tensor_to_object(tensor, tensor_size, group): if get_debug_level() == DebugLevel.DETAIL and is_nccl_available(): backend = get_backend(group) From cc25b6d7ba540df81552db6cf7c13f9310c4c741 Mon Sep 17 00:00:00 2001 From: Sam Ginzburg Date: Mon, 4 Nov 2024 18:18:53 -0800 Subject: [PATCH 083/503] [inductor] Error on unsupported autotuner configs (#139658) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139658 Approved by: https://github.com/aakhundov --- test/inductor/test_triton_kernels.py | 50 +++++++++++++++++++ torch/_higher_order_ops/triton_kernel_wrap.py | 13 ++++- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/test/inductor/test_triton_kernels.py b/test/inductor/test_triton_kernels.py index 759f46e3c8ac39..cfb24382574926 100644 --- a/test/inductor/test_triton_kernels.py +++ b/test/inductor/test_triton_kernels.py @@ -3377,6 +3377,56 @@ def grid(META): "grid_wrapper_for_op_zeros_0" ).check_next("return (256").check_next("return (64").run(output) + @requires_gpu + def test_autotune_no_pre_or_post_hook(self): + def init_to_zero(name): + return lambda nargs: nargs[name].zero_() + + # pre_hook requires running arbitrary code at runtime, which we cannot handle at this time + # https://github.com/pytorch/pytorch/issues/139059 + @triton.autotune( + configs=[ + triton.Config( + {"BLOCK_SIZE": 1024}, + num_warps=4, + num_stages=2, + pre_hook=init_to_zero("output_ptr"), + ) + ], + key=["n_elements"], + ) + @triton.jit + def add_kernel(x_ptr, y_ptr, output_ptr, n_elements, BLOCK_SIZE: tl.constexpr): + pid = tl.program_id(axis=0) + + block_start = pid * BLOCK_SIZE + offsets = block_start + tl.arange(0, BLOCK_SIZE) + mask = offsets < n_elements + + x = tl.load(x_ptr + offsets, mask=mask) + y = tl.load(y_ptr + offsets, mask=mask) + output = x + y + tl.atomic_add(output_ptr + offsets, output, mask=mask) + + def add(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor: + output = torch.ones(x.shape, device=x.device, dtype=x.dtype) + n_elements = output.numel() + grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),) + add_kernel[grid](x, y, output, n_elements) + return output + + x = torch.ones((4096,), device=GPU_TYPE, dtype=torch.float16) + y = torch.ones((4096,), device=GPU_TYPE, dtype=torch.float16) + + # should always pass + assert add(x, y).mean() == 2, "Problem with add kernel" + + # this should cause an exception, since pre_hook is not allowed + msg = "pre_hook is not supported in triton.Autotune Configs" + with self.assertRaisesRegex(torch._dynamo.exc.Unsupported, msg): + add_compiled = torch.compile(add, mode="reduce-overhead", fullgraph=True) + add_compiled(x, y).mean() + common_utils.instantiate_parametrized_tests(KernelTests) common_utils.instantiate_parametrized_tests(CustomOpTests) diff --git a/torch/_higher_order_ops/triton_kernel_wrap.py b/torch/_higher_order_ops/triton_kernel_wrap.py index 5780a2fb638c59..f04da77b88bd07 100644 --- a/torch/_higher_order_ops/triton_kernel_wrap.py +++ b/torch/_higher_order_ops/triton_kernel_wrap.py @@ -1038,7 +1038,6 @@ def init_variable( # We only support configs and keys arguments of triton.autotune # Make sure other arguments are defaulted defaults = inspect.signature(Autotuner.__init__).parameters - # Newer version of triton change attribute name from warmup to num_warmup and rep to num_rep. # The call to get_first_attr is to maintain backward-compatibility. if ( @@ -1073,6 +1072,18 @@ def init_variable( self.raise_unsupported( "Only configs and keys are supported for triton.autotune" ) + if ( + not torch._inductor.config.unsafe_ignore_unsupported_triton_autotune_args + and ( + # pre_hook requires running arbitrary code at runtime, which we cannot handle at this time + # https://github.com/pytorch/pytorch/issues/139059 + # Check Config passed to autotuner in configs + any(cfg.pre_hook is not None for cfg in kernel.configs) + ) + ): + self.raise_unsupported( + "pre_hook is not supported in triton.Autotune Configs" + ) def call_getitem( self, From de509abe1c2d90c92ad15ec57523b09394c88cfb Mon Sep 17 00:00:00 2001 From: Angela Yi Date: Tue, 5 Nov 2024 18:16:03 +0000 Subject: [PATCH 084/503] [export] Dedup data-dependent errors based on stacktrace (#139540) Summary: Dedup the data-dependent errors based on the stacktrace it points to. Right now we just display every propagate-real-tensor log that shows up, but we actually can dedup them if they are due to the same piece of code (ex. there could multiple calls to a piece of code that does some data dependent computation). This occurred when trying out draft export on the PT2I model zoo. For a specific model, previously we would get ~3k data dependent errors, but after deduping based on the stacktrace we now only get 4 errors. Test Plan: CI Differential Revision: D65374254 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139540 Approved by: https://github.com/pianpwk, https://github.com/zou3519 --- test/export/test_draft_export.py | 23 ++++++++++++++++++++ torch/export/_draft_export.py | 37 ++++++++++++++++++++++---------- 2 files changed, 49 insertions(+), 11 deletions(-) diff --git a/test/export/test_draft_export.py b/test/export/test_draft_export.py index 11eb0bf7025a43..5a53566fc9d862 100644 --- a/test/export/test_draft_export.py +++ b/test/export/test_draft_export.py @@ -142,6 +142,29 @@ def forward(self, a, b, c): inp = (torch.randn(3, 3), torch.randn(3, 3), torch.tensor(2)) self.assertEqual(ep.module()(*inp), M()(*inp)) + def test_dedup_data_dependent_failure(self): + class M(torch.nn.Module): + def forward(self, x, y, z): + res = 0 + for v in [x, y]: + if v.item() > 10: + res += v * v + else: + res += v + v + + return z * res + + inp = (torch.tensor(5), torch.tensor(3), torch.tensor(2)) + + ep, report = draft_export(M(), inp) + self.assertTrue(len(report.failures) > 0) + self.assertEqual( + report.failures[0].failure_type, FailureType.DATA_DEPENDENT_ERROR + ) + + inp = (torch.tensor(4), torch.tensor(2), torch.tensor(6)) + self.assertEqual(ep.module()(*inp), M()(*inp)) + def test_offsets(self): class M(torch.nn.Module): def forward(self, x): diff --git a/torch/export/_draft_export.py b/torch/export/_draft_export.py index 4f37404a0634e2..531e993bd905e3 100644 --- a/torch/export/_draft_export.py +++ b/torch/export/_draft_export.py @@ -41,6 +41,7 @@ def uninteresting_files() -> Set[str]: torch._logging._internal, torch._subclasses.meta_utils, torch._subclasses.fake_tensor, + torch._subclasses.functional_tensor, ] return {inspect.getfile(m) for m in mods} @@ -62,6 +63,7 @@ def filter_stack( stack: List[Dict[str, str]], str_to_filename: Dict[str, str] ) -> List[Dict[str, str]]: for i, s in enumerate(reversed(stack)): + s["filename"] = str(s["filename"]) if s["filename"] not in str_to_filename: continue if str_to_filename[s["filename"]] not in uninteresting_files(): @@ -69,6 +71,10 @@ def filter_stack( return stack[-3:] +def hash_stack(stack: List[Dict[str, str]]) -> str: + return ";".join(f'line: {s["line"]} filename: {s["filename"]}' for s in stack) + + class FailureReport: def __init__( self, failure_type: FailureType, data: Dict[str, Any], xfail: bool = False @@ -234,21 +240,32 @@ def draft_export( str_to_filename: Dict[str, str] = {} failures: List[FailureReport] = [] custom_ops_logs: Dict[str, Dict[str, Any]] = {} # Dedup custom ops + data_dependent_logs: Dict[ + str, Dict[str, Any] + ] = {} # Dedup data dependent errors based on stacktrace for log_name, log_contents in capture_structured_log.logs: - if log_name == "propagate_real_tensors": - failure_type = FailureType.DATA_DEPENDENT_ERROR + failure_type = None + if log_name == "propagate_real_tensors": log_contents["stack"] = filter_stack( log_contents["stack"], str_to_filename ) + if hash_stack(log_contents["stack"]) in data_dependent_logs: + continue + + data_dependent_logs[hash_stack(log_contents["stack"])] = log_contents + failure_type = FailureType.DATA_DEPENDENT_ERROR + elif log_name == "str": filename, idx = log_contents str_to_filename[str(idx)] = filename continue + elif log_name == "guard_added": if new_shapes is None: continue + failure_type = FailureType.CONSTRAINT_VIOLATION_ERROR if len(log_contents["symbol_to_sources"]) == 0: # We only want to include guards added that are relevant to @@ -260,12 +277,18 @@ def draft_export( log_contents["stack"], str_to_filename ) log_contents["new_dynamic_shapes"] = new_shapes + elif log_name == "generated_fake_kernel": + if log_contents["op"] in custom_ops_logs: + continue + + failure_type = FailureType.MISSING_FAKE_KERNEL custom_ops_logs[log_contents["op"]] = log_contents - continue + else: raise RuntimeError(f"Unknown log name: {log_name}") + assert failure_type is not None failures.append( FailureReport( failure_type, @@ -273,14 +296,6 @@ def draft_export( ) ) - for custom_op_log in custom_ops_logs.values(): - failures.append( - FailureReport( - FailureType.MISSING_FAKE_KERNEL, - custom_op_log, - ) - ) - report = DraftExportReport(failures, str_to_filename) ep._report = report From 9dc5851f5dbc7c42043bdcf8116bef5e3ea5e36f Mon Sep 17 00:00:00 2001 From: Tomasz Bohutyn Date: Tue, 5 Nov 2024 18:19:51 +0000 Subject: [PATCH 085/503] handle more devices in method_type method of TensorVariable (#138078) Fixes #138077 Pull Request resolved: https://github.com/pytorch/pytorch/pull/138078 Approved by: https://github.com/jgong5, https://github.com/ezyang --- torch/_dynamo/variables/tensor.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/torch/_dynamo/variables/tensor.py b/torch/_dynamo/variables/tensor.py index 7d07c195524cf7..1957c4890a3663 100644 --- a/torch/_dynamo/variables/tensor.py +++ b/torch/_dynamo/variables/tensor.py @@ -664,10 +664,12 @@ def method_type(self, dtype=None, non_blocking=False, **kwargs): tensortype = next( k for k, v in tensortype_to_dtype.items() if self.dtype in v ) - if self.device.type == "cuda": - return ConstantVariable.create(f"torch.cuda.{tensortype.__name__}") - else: + if self.device.type == "cpu": return ConstantVariable.create(f"torch.{tensortype.__name__}") + else: + return ConstantVariable.create( + f"torch.{self.device.type}.{tensortype.__name__}" + ) elif ( dtype is not None and fqn(type(dtype.as_python_constant())) == "torch.tensortype" From 27ec3921bc66a92c4817c0ab45c42ff5a351b17d Mon Sep 17 00:00:00 2001 From: rzou Date: Fri, 1 Nov 2024 13:21:12 -0700 Subject: [PATCH 086/503] Optimize mutable torch.library.custom_op overhead (#139513) We don't need to do a loop over all the args, kwargs in the AdInplaceOrView key; we just need to bump the version on the args, kwargs that are mutable. On the benchmark mentioned in https://github.com/pytorch/pytorch/issues/139494 this made the time go from ``` mutate2 = 61.72943878173828 no_mutate2 = 36.89440155029297 mutate = 236.3092498779297 no_mutate = 59.31964874267578 ``` to ``` mutate2 = 47.976478576660156 no_mutate2 = 38.37468719482422 mutate = 71.21315002441406 no_mutate = 59.7432975769043 ``` Test Plan: - existing tests Pull Request resolved: https://github.com/pytorch/pytorch/pull/139513 Approved by: https://github.com/bdhirsh ghstack dependencies: #139509 --- torch/_library/custom_ops.py | 25 ++++++++++++++----------- torch/_library/utils.py | 14 +++++++++++++- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/torch/_library/custom_ops.py b/torch/_library/custom_ops.py index 2c57ceb9baf984..b0bc2ea9458c9b 100644 --- a/torch/_library/custom_ops.py +++ b/torch/_library/custom_ops.py @@ -594,19 +594,13 @@ def fake_impl(*args, **kwargs): schema = self._opoverload._schema if schema.is_mutable: + mutated_idxs, mutated_keys = utils.mutated_args_kwargs(schema) def adinplaceorview_impl(keyset, *args, **kwargs): - for arg, val in utils.zip_schema(schema, args, kwargs): - if not arg.alias_info: - continue - if not arg.alias_info.is_write: - continue - if isinstance(val, Tensor): - torch.autograd.graph.increment_version(val) - elif isinstance(val, (tuple, list)): - for v in val: - if isinstance(v, Tensor): - torch.autograd.graph.increment_version(v) + for idx in mutated_idxs: + increment_version(args[idx]) + for key in mutated_keys: + increment_version(kwargs[key]) with _C._AutoDispatchBelowADInplaceOrView(): return self._opoverload.redispatch( keyset & _C._after_ADInplaceOrView_keyset, *args, **kwargs @@ -740,6 +734,15 @@ def wrapped_func(keyset, *args, **kwargs): return register(func) +def increment_version(val: Any) -> None: + if isinstance(val, Tensor): + torch.autograd.graph.increment_version(val) + elif isinstance(val, (tuple, list)): + for v in val: + if isinstance(v, Tensor): + torch.autograd.graph.increment_version(v) + + # NOTE: [Supporting decorator and non-decorator usage] # # Some APIs may be both used as a decorator and not as a decorator. diff --git a/torch/_library/utils.py b/torch/_library/utils.py index 82ebdad018d466..9c12ec9ebb0bc1 100644 --- a/torch/_library/utils.py +++ b/torch/_library/utils.py @@ -3,7 +3,7 @@ import inspect import sys import warnings -from typing import Any, Callable, Dict, Iterable, Iterator, Tuple, Union +from typing import Any, Callable, Dict, Iterable, Iterator, List, Tuple, Union import torch import torch.utils._pytree as pytree @@ -463,3 +463,15 @@ def has_fake_kernel(op: torch._ops.OpOverload) -> bool: if opdef._abstract_fn is not None: return True return False + + +def mutated_args_kwargs(schema: _C.FunctionSchema) -> Tuple[List[int], List[str]]: + idxs = [] + keys = [] + for i, info in enumerate(schema.arguments): + if info.alias_info is not None and info.alias_info.is_write: + if info.kwarg_only: + keys.append(info.name) + else: + idxs.append(i) + return idxs, keys From 87059d4547551f197731f5c084e3be6054797578 Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Tue, 5 Nov 2024 18:38:17 +0000 Subject: [PATCH 087/503] [AOTAutograd] Handle edge cases for donated buffer & enable in oss (#139669) This PR enables donated buffer in OSS and handles two edge cases: 1. While donated buffer relies on storage to check alias, sparse tensor subclasses does not provide access to storage. So we skip sparse tensor subclasses for donated buffer. 2. Handles missing "val" from n.meta. This is observed from `inductor/test_fused_attention.py::SDPAPatternRewriterCpuTests::test_sdpa_rewriter_11_cpu`, `functorch/test_aotdispatch.py::TestAOTAutograd::test_input_mutation_simple_with_none_and_nontensor`, and `inductor/test_compiled_autograd.py::TestCompiledAutograd::test_trace_run_with_rng_state`. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139669 Approved by: https://github.com/bdhirsh --- test/dynamo/test_aot_autograd.py | 2 ++ .../jit_compile_runtime_wrappers.py | 25 +++++++++++++++---- torch/_functorch/config.py | 2 +- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/test/dynamo/test_aot_autograd.py b/test/dynamo/test_aot_autograd.py index 65ef6bcd6fe760..ce44463ba014ef 100644 --- a/test/dynamo/test_aot_autograd.py +++ b/test/dynamo/test_aot_autograd.py @@ -959,6 +959,8 @@ def mini_backend(gm, sample_inputs): out_test = m_compiled(*sample_inputs) self.assertEqual(out_ref, out_test) + # set donated_buffer=False due to create_graph=True + @torch._functorch.config.patch("donated_buffer", False) def test_eager_sequence_nr(self): class Model(torch.nn.Module): def __init__(self) -> None: diff --git a/torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py b/torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py index 4e6263e153b5d5..75bb60da680298 100644 --- a/torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py +++ b/torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py @@ -23,6 +23,7 @@ from torch._guards import CompileContext, TracingContext from torch._logging import getArtifactLogger, trace_structured from torch._subclasses import FakeTensor +from torch._subclasses.meta_utils import is_sparse_any from torch.fx.experimental._backward_state import BackwardState from torch.fx.experimental.proxy_tensor import is_sym_node from torch.fx.experimental.symbolic_shapes import fx_placeholder_vals @@ -290,14 +291,19 @@ def collect_fw_donated_buffer_idxs( storage_refs = set() for t in itertools.chain(fw_ins, user_fw_outs, bw_outs): - if isinstance(t, FakeTensor): + # Only access storage if a tensor has storage (not sparse) + if t is not None and isinstance(t, FakeTensor) and not is_sparse_any(t): storage_refs.add(StorageWeakRef(t.untyped_storage())) num_saved_tensor = len(saved_tensors) donated_buffer_idxs = [] for i in range(num_saved_tensor): t = saved_tensors[i] - if StorageWeakRef(t.untyped_storage()) not in storage_refs: + if ( + t is not None + and not is_sparse_any(t) + and StorageWeakRef(t.untyped_storage()) not in storage_refs + ): donated_buffer_idxs.append(i) return donated_buffer_idxs @@ -316,9 +322,18 @@ def collect_bw_donated_buffer_idxs( bw_outs = next(reversed(bw_module.graph.find_nodes(op="output"))).args[0] fw_outs = next(reversed(fw_module.graph.find_nodes(op="output"))).args[0] - fw_ins = [n.meta["val"] if hasattr(n, "meta") else None for n in fw_ins] - fw_outs = [n.meta["val"] if hasattr(n, "meta") else None for n in fw_outs] - bw_outs = [n.meta["val"] if hasattr(n, "meta") else None for n in bw_outs] + fw_ins = [ + n.meta["val"] if (hasattr(n, "meta") and "val" in n.meta) else None + for n in fw_ins + ] + fw_outs = [ + n.meta["val"] if (hasattr(n, "meta") and "val" in n.meta) else None + for n in fw_outs + ] + bw_outs = [ + n.meta["val"] if (hasattr(n, "meta") and "val" in n.meta) else None + for n in bw_outs + ] user_fw_outs = fw_outs[: fw_metadata.num_forward] saved_tensors = fw_outs[fw_metadata.tensors_saved_for_backwards_slice] diff --git a/torch/_functorch/config.py b/torch/_functorch/config.py index 73c0f3b71823d7..240eeaf96e0741 100644 --- a/torch/_functorch/config.py +++ b/torch/_functorch/config.py @@ -199,7 +199,7 @@ def remote_autograd_cache_default() -> Optional[bool]: # This controls whether we collect donated buffer. This flag must be set # False if a user wants to retain_graph=True for backward. -donated_buffer = False +donated_buffer = False if is_fbcode() else True # Controls the default graph output format used by draw_graph # Supported formats are defined here https://graphviz.org/docs/outputs/ From ee42a997455d8ae1f9c2c5d99b49f25d016e5fc3 Mon Sep 17 00:00:00 2001 From: Yifu Wang Date: Mon, 4 Nov 2024 13:39:43 -0800 Subject: [PATCH 088/503] [SymmetricMemory] introduce a binding for cuMemset32Async (#138755) ## This Stack This stack does the following things to support `xformers`-style, comm-aware Triton kernels: - Exposes `signal_pad`s as tensors in Python - Adds a binding for `cuMemsetAsync` These in combination aims to provide users with more flexibility to express custom signaling/synchronization patterns. ## This PR Make `cuMemset32Async` available via `_SymmetricMemory.memset32`. We chose `cuMemset32Async` over `cudaMemsetAsync` because it allows for `uint32_t`-wise memset. This provides users with better flexibility. To enable this, we also added the following cuda driver APIs in `c10::cuda::DriverAPI`: - `cuDevicePrimaryCtxRetain` - for obtaining the primary context of a device in the form of `CUcontext`. - `cuCtxGetCurrent`/`cuCtxSetCurrent` - for setting and restoring the context for cuda driver APIs such as `cuMemset32Async`. Pull Request resolved: https://github.com/pytorch/pytorch/pull/138755 Approved by: https://github.com/weifengpy, https://github.com/eqy, https://github.com/lw --- .lintrunner.toml | 2 - c10/cuda/driver_api.h | 1 + caffe2/CMakeLists.txt | 1 + test/distributed/test_symmetric_memory.py | 29 +++++++ .../c10d/CUDASymmetricMemoryOps.cu | 83 ++++++++++++++++--- torch/csrc/distributed/c10d/init.cpp | 18 +++- 6 files changed, 121 insertions(+), 13 deletions(-) diff --git a/.lintrunner.toml b/.lintrunner.toml index 3c628701bfbb48..191a905719ad6b 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -70,8 +70,6 @@ include_patterns = [ 'aten/src/ATen/native/cudnn/*.cpp', 'c10/**/*.h', 'c10/**/*.cpp', - 'distributed/c10d/*DMAConnectivity.*', - 'distributed/c10d/*SymmetricMemory.*', 'torch/csrc/**/*.h', 'torch/csrc/**/*.hpp', 'torch/csrc/**/*.cpp', diff --git a/c10/cuda/driver_api.h b/c10/cuda/driver_api.h index d698beada411f8..65cbdfe878dc0a 100644 --- a/c10/cuda/driver_api.h +++ b/c10/cuda/driver_api.h @@ -30,6 +30,7 @@ _(cuMemGetAllocationGranularity) \ _(cuMemExportToShareableHandle) \ _(cuMemImportFromShareableHandle) \ + _(cuMemsetD32Async) \ _(cuStreamWriteValue32) \ _(cuGetErrorString) diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index 8c14876b625cac..8c137cc90528f8 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -562,6 +562,7 @@ if(USE_CUDA) ${TORCH_SRC_DIR}/csrc/distributed/c10d/intra_node_comm.cpp ${TORCH_SRC_DIR}/csrc/distributed/c10d/CudaDMAConnectivity.cpp ${TORCH_SRC_DIR}/csrc/distributed/c10d/CUDASymmetricMemory.cu + ${TORCH_SRC_DIR}/csrc/distributed/c10d/CUDASymmetricMemoryOps.cu ${TORCH_SRC_DIR}/csrc/distributed/c10d/ProcessGroupNCCL.cpp PROPERTIES COMPILE_FLAGS "-DPYTORCH_C10_DRIVER_API_SUPPORTED=1" ) diff --git a/test/distributed/test_symmetric_memory.py b/test/distributed/test_symmetric_memory.py index e9bfcdcc9bcfc4..7e506286736862 100644 --- a/test/distributed/test_symmetric_memory.py +++ b/test/distributed/test_symmetric_memory.py @@ -24,9 +24,11 @@ from torch.testing._internal.common_utils import ( instantiate_parametrized_tests, parametrize, + requires_cuda, run_tests, skip_but_pass_in_sandcastle_if, skipIfRocm, + TestCase, ) @@ -849,5 +851,32 @@ def func_3(x): self.assertNotIn("return (buf0", code_3) +class SymmMemSingleProcTest(TestCase): + @skipIfRocm + @requires_cuda + def test_memset32(self): + t = _SymmetricMemory.empty_strided_p2p( + (64,), + (1,), + dtype=torch.uint32, + device=torch.device("cuda:0"), + group_name="0", + ).fill_(0) + + _SymmetricMemory.memset32(t, offset=32, val=1, count=16) + self.assertTrue(t[:32].eq(0).all()) + self.assertTrue(t[32:48].eq(1).all()) + self.assertTrue(t[48:].eq(0).all()) + + with self.assertRaises(RuntimeError): + _SymmetricMemory.memset32(t, offset=-1, val=1, count=16) + + with self.assertRaises(RuntimeError): + _SymmetricMemory.memset32(t, offset=32, val=4294967296, count=16) + + with self.assertRaises(RuntimeError): + _SymmetricMemory.memset32(t, offset=32, val=1, count=-1) + + if __name__ == "__main__": run_tests() diff --git a/torch/csrc/distributed/c10d/CUDASymmetricMemoryOps.cu b/torch/csrc/distributed/c10d/CUDASymmetricMemoryOps.cu index ee619f821161f8..d1598692d0aa7b 100644 --- a/torch/csrc/distributed/c10d/CUDASymmetricMemoryOps.cu +++ b/torch/csrc/distributed/c10d/CUDASymmetricMemoryOps.cu @@ -1,8 +1,14 @@ -#if defined(CUDART_VERSION) && CUDART_VERSION >= 12030 - #include #include #include +#include +#include + +#if !defined(USE_ROCM) && defined(PYTORCH_C10_DRIVER_API_SUPPORTED) +#include +#endif + +#if defined(CUDART_VERSION) && CUDART_VERSION >= 12030 #ifndef AT_PER_OPERATOR_HEADERS #include @@ -11,8 +17,6 @@ #include #endif -#include - #include #include @@ -491,7 +495,61 @@ at::Tensor two_shot_all_reduce_( return input; } +} // namespace +#endif // #if defined(CUDART_VERSION) && CUDART_VERSION >= 12030 + +namespace { + +at::Tensor memset32_( + at::Tensor& input, + int64_t offset, + int64_t val, + int64_t count) { +#if !defined(USE_ROCM) && defined(PYTORCH_C10_DRIVER_API_SUPPORTED) + TORCH_CHECK( + input.dim() == 1 && input.is_contiguous() && + input.scalar_type() == c10::ScalarType::UInt32, + "symm_mem::memset32_: input must be a flat, contiguous uint32 tensor."); + + TORCH_CHECK( + offset > 0 && count > 0, + "symm_mem::memset32_: offset and count must be positive integers."); + + TORCH_CHECK( + val >= 0 && + static_cast(val) <= std::numeric_limits::max(), + "symm_mem::memset32_: val must be in the range of " + "[0, 4294967295] (uint32_t).") + + auto element_size = c10::elementSize(input.scalar_type()); + TORCH_CHECK( + offset + count < input.numel(), + "symm_mem::memset32_: offset + count (", + offset + count, + ") exceeded the numel of the input (", + input.numel(), + ")"); + + auto addr = reinterpret_cast(input.data_ptr()) + offset; + + c10::cuda::CUDAGuard guard(input.device()); + auto driver_api = c10::cuda::DriverAPI::get(); + C10_CUDA_DRIVER_CHECK(driver_api->cuMemsetD32Async_( + reinterpret_cast(addr), + val, + count, + at::cuda::getCurrentCUDAStream())); +#else + TORCH_CHECK( + false, "CUDASymmetricMemory requires PYTORCH_C10_DRIVER_API_SUPPORTED"); +#endif + return input; +} + +} // namespace + TORCH_LIBRARY_FRAGMENT(symm_mem, m) { +#if defined(CUDART_VERSION) && CUDART_VERSION >= 12030 m.def( "multimem_all_reduce_(Tensor(a!) input, str reduce_op, str group_name) -> Tensor(a!)", torch::dispatch(c10::DispatchKey::CUDA, ::multimem_all_reduce_), @@ -519,8 +577,12 @@ TORCH_LIBRARY_FRAGMENT(symm_mem, m) { "one_shot_all_reduce(Tensor input, str reduce_op, str group_name) -> Tensor", {at::Tag::pt2_compliant_tag}); - m.impl("one_shot_all_reduce", torch::dispatch(c10::DispatchKey::Meta, ::one_shot_all_reduce_meta)); - m.impl("one_shot_all_reduce", torch::dispatch(c10::DispatchKey::CUDA, ::one_shot_all_reduce)); + m.impl( + "one_shot_all_reduce", + torch::dispatch(c10::DispatchKey::Meta, ::one_shot_all_reduce_meta)); + m.impl( + "one_shot_all_reduce", + torch::dispatch(c10::DispatchKey::CUDA, ::one_shot_all_reduce)); m.def( "one_shot_all_reduce_out(Tensor input, str reduce_op, str group_name, Tensor(a!) out) -> Tensor(a!)", @@ -531,8 +593,9 @@ TORCH_LIBRARY_FRAGMENT(symm_mem, m) { "two_shot_all_reduce_(Tensor(a!) input, str reduce_op, str group_name) -> Tensor(a!)", torch::dispatch(c10::DispatchKey::CUDA, ::two_shot_all_reduce_), {at::Tag::pt2_compliant_tag}); -} - -} // namespace - #endif + m.def( + "memset32_(Tensor(a!) input, int offset, int val, int count) -> Tensor(a!)", + torch::dispatch(c10::DispatchKey::CUDA, ::memset32_), + {at::Tag::pt2_compliant_tag}); +} diff --git a/torch/csrc/distributed/c10d/init.cpp b/torch/csrc/distributed/c10d/init.cpp index b71aa7886bfdea..ea760b068ec2f6 100644 --- a/torch/csrc/distributed/c10d/init.cpp +++ b/torch/csrc/distributed/c10d/init.cpp @@ -1122,7 +1122,23 @@ This class does not support ``__members__`` property.)"); "stream_write_value32", &SymmetricMemory::stream_write_value32, py::arg("addr"), - py::arg("val")); + py::arg("val")) + // Util functions that are often used together with symmetric memory but + // not necessarily directly on symmetric memory. + .def_static( + "memset32", + [](at::Tensor& input, int64_t offset, int64_t val, int64_t count) { + // The range of `val` is checked inside the op + auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("symm_mem::memset32_", "") + .typed(); + return op.call(input, offset, val, count); + }, + py::arg("input"), + py::arg("offset"), + py::arg("val"), + py::arg("count") = 1); auto store = py::class_<::c10d::Store, c10::intrusive_ptr<::c10d::Store>, PythonStore>( From 5f2ed505eba1862b26a6a69f8e0493a26a7c9071 Mon Sep 17 00:00:00 2001 From: Ke Wen Date: Tue, 5 Nov 2024 08:59:55 -0800 Subject: [PATCH 089/503] [PGNCCL] Watchdog prints call-time traceback when reporting timeout (#139659) ### Motivation Today, watchdog only reports that it found a collective timeout: ``` [rank1]:[E1104 14:02:18.767594328 ProcessGroupNCCL.cpp:688] [Rank 1] Watchdog caught collective operation timeout: WorkNCCL(SeqNum=1, OpType=ALLREDUCE, NumelIn=200, NumelOut=200, Timeout(ms)=5000) ran for 5096 milliseconds before timing out. ``` While this is nice, it is hard to associate the error with user's program or library stack. ### This PR This PR gives watchdog the ability to report the call-time stack of the collective, so that it would be easier to track the error back to the program's behavior. The call-time stack was recorded by Flight Recorder with minimal overhead (for details, please read this [doc](https://dev-discuss.pytorch.org/t/fast-combined-c-python-torchscript-inductor-tracebacks/1158) written by @zdevito ). In `ProcessGroupNCCL`, we are only tracking / reporting the python part so that it fits most PyTorch users. ### Demo [stack_demo.py](https://gist.github.com/kwen2501/6758e18d305d67fc6f3f926217825c09). ``` TORCH_NCCL_TRACE_BUFFER_SIZE=100 torchrun --nproc-per-node 2 stack_demo.py ``` `TORCH_NCCL_TRACE_BUFFER_SIZE` is for turning on the Flight Recorder. Output: ``` [rank0]:[E1104 14:19:27.591610653 ProcessGroupNCCL.cpp:695] Stack trace of the timedout collective operation: #0 all_reduce from /data/users/kw2501/pytorch/torch/distributed/distributed_c10d.py:2696 #1 wrapper from /data/users/kw2501/pytorch/torch/distributed/c10d_logger.py:83 #2 bar from /data/users/kw2501/sync_async/repro.py:15 #3 foo from /data/users/kw2501/sync_async/repro.py:24 #4 main from /data/users/kw2501/sync_async/repro.py:34 #5 from /data/users/kw2501/sync_async/repro.py:40 [rank1]:[E1104 14:19:27.771430164 ProcessGroupNCCL.cpp:695] Stack trace of the timedout collective operation: #0 all_gather_into_tensor from /data/users/kw2501/pytorch/torch/distributed/distributed_c10d.py:3630 #1 wrapper from /data/users/kw2501/pytorch/torch/distributed/c10d_logger.py:83 #2 baz from /data/users/kw2501/sync_async/repro.py:20 #3 foo from /data/users/kw2501/sync_async/repro.py:26 #4 main from /data/users/kw2501/sync_async/repro.py:34 #5 from /data/users/kw2501/sync_async/repro.py:40 ``` From the log above, we can tell that `bar()` and `baz()` are the places where the two ranks divert. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139659 Approved by: https://github.com/wconstab, https://github.com/fduwjj --- torch/csrc/distributed/c10d/NCCLUtils.cpp | 44 +++++++++++++++++++ torch/csrc/distributed/c10d/NCCLUtils.hpp | 7 +++ .../distributed/c10d/ProcessGroupNCCL.cpp | 28 ++++++++++++ 3 files changed, 79 insertions(+) diff --git a/torch/csrc/distributed/c10d/NCCLUtils.cpp b/torch/csrc/distributed/c10d/NCCLUtils.cpp index 00bd235c866668..98ec54d77aa245 100644 --- a/torch/csrc/distributed/c10d/NCCLUtils.cpp +++ b/torch/csrc/distributed/c10d/NCCLUtils.cpp @@ -376,6 +376,33 @@ void DebugInfoWriter::registerWriter(std::unique_ptr writer) { writer_ = std::move(writer); } +// Returns the traceback of current entry, in string form. +// Note: `getTraceback` invokes `torch::symbolize`, which may need to acquire +// the GIL. If you don't want to block the current thread or take the risk of a +// GIL deadlock, you can use an asynchronous calling mechanism like std::async. +std::string NCCLTraceBuffer::Entry::getTraceback() { + torch::CapturedTraceback* traceback = traceback_.get(); + torch::SymbolizedTracebacks s_tbs = torch::symbolize({traceback}); + // We use 0 because we only have one traceback here. + const auto& s_tb = s_tbs.tracebacks.at(0); + std::stringstream oss; + for (auto idx : c10::irange(s_tb.size())) { + auto frame_id = s_tb[idx]; + const auto& frame = s_tbs.all_frames.at(frame_id); + oss << "#" << idx << " " << frame.funcname << " from " << frame.filename + << ":" << frame.lineno << '\n'; + } + /* Resulted format is like: + #0 all_reduce from pytorch/torch/distributed/distributed_c10d.py:2696 + #1 wrapper from pytorch/torch/distributed/c10d_logger.py:83 + #2 bar from /home/user/repro.py:15 + #3 foo from /home/user/repro.py:24 + #4 main from /home/user/repro.py:34 + #5 from /home/user/repro.py:40 + */ + return oss.str(); +} + std::optional NCCLTraceBuffer::record( size_t pg_id, const std::tuple& pg_name, @@ -495,6 +522,23 @@ std::vector NCCLTraceBuffer::dump_entries() { return result; } +// Returns the entry with the given id, if it exists. Otherwise, returns +// std::nullopt. +std::optional NCCLTraceBuffer::getEntry( + std::optional id) { + if (!enabled_ || !id) { + return std::nullopt; + } + + std::unique_lock guard(mutex_); + Entry entry = entries_.at(*id % max_entries_); + if (entry.id_ == *id) { + return entry; + } else { + return std::nullopt; + } +} + void NCCLTraceBuffer::retire_id( std::optional id, bool compute_duration) { diff --git a/torch/csrc/distributed/c10d/NCCLUtils.hpp b/torch/csrc/distributed/c10d/NCCLUtils.hpp index 32361e17580f5f..69ca82adec5727 100644 --- a/torch/csrc/distributed/c10d/NCCLUtils.hpp +++ b/torch/csrc/distributed/c10d/NCCLUtils.hpp @@ -663,6 +663,9 @@ struct NCCLTraceBuffer { c10::SmallVector sizes_; // flattened from inputs, outputs bool retired_ = false; // is this work entry no longer in the workMetaList_? // a retired but not completed event has timed out + + // Returns the traceback of current entry, in string form. + std::string getTraceback(); }; bool enabled_ = false; @@ -699,6 +702,10 @@ struct NCCLTraceBuffer { std::vector dump_entries(); + // Returns the entry with the given id, if it exists. Otherwise, returns + // std::nullopt. + std::optional getEntry(std::optional id); + /* Mark an Event as completed and free its events. This is called by the watchdog thread, and is asynchronous from the diff --git a/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp b/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp index 4d0b9f17751fae..0da9f3fcb083e8 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp +++ b/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp @@ -690,6 +690,34 @@ bool ProcessGroupNCCL::WorkNCCL::checkTimeout( " milliseconds before timing out."); LOG(ERROR) << exceptionMsg; + + // Get the stack trace of the work at call time + // First step we get the corresponding record entry from FR, based on work's + // trace_id_ + std::optional entry = + NCCLTraceBuffer::get()->getEntry(trace_id_); + if (entry.has_value()) { + auto entryVal = entry.value(); + // Get stack trace from FR entry, in string format + // Note: `getTraceback` call below invokes `torch::symbolize`, which may + // need to acquire the GIL. In order for watchdog to be block-free, we make + // the call with std::async. + auto future = std::async( + std::launch::async, [&entryVal]() { return entryVal.getTraceback(); }); + // Wait for the future to complete or timeout + auto status = future.wait_for(std::chrono::seconds(8)); + if (status == std::future_status::ready) { + std::string tracebackStr = future.get(); + LOG(ERROR) << "Stack trace of the timedout collective operation: \n" + << tracebackStr; + } // else, symbolizer probably timed out, we skip logging the stack trace. + } else { + LOG(ERROR) + << "Stack trace of the timedout collective not found, " + << "potentially because FlightRecorder is disabled. " + << "You can enable it by setting TORCH_NCCL_TRACE_BUFFER_SIZE to a non-zero value."; + } + std::exception_ptr exception_ptr = std::make_exception_ptr(C10_BUILD_ERROR(DistBackendError, exceptionMsg)); setException(exception_ptr); From 693a0a1bd4d08c2efe938d4f7cce8ff2a78d6505 Mon Sep 17 00:00:00 2001 From: Ryan Guo Date: Mon, 4 Nov 2024 16:31:28 -0800 Subject: [PATCH 090/503] [dynamo][NFC] Rename `mutable_local` and add documentation (#139339) This patch addresses the renaming part of #133027, specifically, it renames the following and adds documentation for relevant classes. 1. `VariableTracker.mutable_local` to `mutation_type` 2. `MatableLocal `to `ValueMutationNew` 3. `MutableSideEffects `to `ValueMutationExisting` 4. `MutableLocalSource` to `SourceType` 5. `MutableLocalSource.Local` to `New` Note that (2), (3) and (5) are mainly to bring consistency between them and `AttributeMutationNew`, `AttributeMutationExisting`. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139339 Approved by: https://github.com/jansel, https://github.com/mlazos, https://github.com/anijain2305 --- .../source/torch.compiler_dynamo_deepdive.rst | 2 +- torch/_dynamo/codegen.py | 4 +- torch/_dynamo/output_graph.py | 2 +- torch/_dynamo/side_effects.py | 107 ++++++++++++------ torch/_dynamo/symbolic_convert.py | 26 ++--- torch/_dynamo/variables/base.py | 56 +++++---- torch/_dynamo/variables/builder.py | 22 ++-- torch/_dynamo/variables/builtin.py | 45 +++++--- torch/_dynamo/variables/dicts.py | 20 ++-- torch/_dynamo/variables/functions.py | 4 +- torch/_dynamo/variables/iter.py | 38 ++++--- torch/_dynamo/variables/lists.py | 34 +++--- torch/_dynamo/variables/misc.py | 10 +- torch/_dynamo/variables/nn_module.py | 18 +-- torch/_dynamo/variables/tensor.py | 2 +- torch/_dynamo/variables/user_defined.py | 18 +-- 16 files changed, 241 insertions(+), 167 deletions(-) diff --git a/docs/source/torch.compiler_dynamo_deepdive.rst b/docs/source/torch.compiler_dynamo_deepdive.rst index 0fb5f920723d74..4bf4633d3e4e42 100644 --- a/docs/source/torch.compiler_dynamo_deepdive.rst +++ b/docs/source/torch.compiler_dynamo_deepdive.rst @@ -317,7 +317,7 @@ all of Python bytecodes. As an example, we can see the implementation of def BUILD_LIST(self, inst): items = self.popn(inst.argval) - self.push(ListVariable(items, mutable_local=MutableLocal())) + self.push(ListVariable(items, mutation_type=ValueMutationNew())) This is the bytecode generated by constructions like ``l = [2, 3, 4]``. In this case, since there are three elements, the generated bytecode is diff --git a/torch/_dynamo/codegen.py b/torch/_dynamo/codegen.py index dd83f56c346159..fe7069c65bb0f9 100644 --- a/torch/_dynamo/codegen.py +++ b/torch/_dynamo/codegen.py @@ -140,9 +140,9 @@ def __call__(self, value, allow_cache=True): # value.source will get mutated to hold `value` # mutable_side_effects_from_source=False is used to codegen the mutation # mutable_side_effects_from_source=True is used to codegen a reference - from .side_effects import MutableSideEffects + from .side_effects import ValueMutationExisting - if isinstance(value.mutable_local, MutableSideEffects): + if isinstance(value.mutation_type, ValueMutationExisting): self(value.source) return diff --git a/torch/_dynamo/output_graph.py b/torch/_dynamo/output_graph.py index e2d07350062f34..201748da5f2102 100644 --- a/torch/_dynamo/output_graph.py +++ b/torch/_dynamo/output_graph.py @@ -936,7 +936,7 @@ def handle_aliases_for_stolen_lists(self, tx): if not ( ( x not in self.side_effects.store_attr_mutations - or isinstance(x.mutable_local, AttributeMutationExisting) + or isinstance(x.mutation_type, AttributeMutationExisting) ) and isinstance(x.source, GetItemSource) and isinstance(x.source.base, LocalSource) diff --git a/torch/_dynamo/side_effects.py b/torch/_dynamo/side_effects.py index cdf0649a7cbc43..f71f153f243665 100644 --- a/torch/_dynamo/side_effects.py +++ b/torch/_dynamo/side_effects.py @@ -22,42 +22,75 @@ from .utils import is_frozen_dataclass, nn_module_new, object_new from .variables.base import ( is_side_effect_safe, - MutableLocalBase, - MutableLocalSource, + MutationType, + SourceType, VariableTracker, ) from .variables.user_defined import FrozenDataClassVariable -class MutableSideEffects(MutableLocalBase): +class ValueMutationExisting(MutationType): """ - VariableTracker.mutable_local marker to indicate a list passed as - an input that if we mutate we need to re-apply those mutations after - the graph runs. + This case of VariableTracker.mutation_type marker indicates + 1. Dynamo allows mutation on the value itself (rather than its attributes). + 2. The value exists before Dynamo tracing started. + + For instance, Dynamo could model a pre-existing list with this marker, + indicating that if we encounter mutations to this list, we need to buffer + and re-apply those mutations after the graph runs, since the list might be + used afterwards in Python. """ + # A flag to indicate whether mutation happened on the associated + # `VariableTracker`. This enables SideEffects to accurately and quickly + # filter out which pre-existing values it needs to generate mutation for. + is_modified: bool + def __init__(self, is_modified: bool = False): - super().__init__(MutableLocalSource.Existing) + super().__init__(SourceType.Existing) self.is_modified = is_modified -class AttributeMutation(MutableLocalBase): +class AttributeMutation(MutationType): """ - VariableTracker.mutable_local marker to track changes to attributes + This case of VariableTracker.mutation_type marker indicates that Dynamo + allows mutation on the value's attributes. """ - def __init__(self, typ: MutableLocalSource): + def __init__(self, typ: SourceType): super().__init__(typ) class AttributeMutationExisting(AttributeMutation): + """ + This case of VariableTracker.mutation_type marker indicates + 1. Dynamo allows mutation on the value's attributes. + 2. The value exists before Dynamo tracing started. + + For instance, Dynamo could model a pre-existing object with this marker, + indicating that if we encounter mutations to this object, we need to buffer + then re-apply those mutations after the graph runs, since the object might + be used afterwards in Python. + """ + def __init__(self): - super().__init__(MutableLocalSource.Existing) + super().__init__(SourceType.Existing) class AttributeMutationNew(AttributeMutation): + """ + This case of VariableTracker.mutation_type marker indicates + 1. Dynamo allows mutation on the value's attributes. + 2. The value is created by the bytecode Dynamo is tracing through. + + For instance, Dynamo could model a newly created object with this marker, + indicating that while we need to model mutations to this object, we don't + have to emit bytecode for these mutations if the object doesn't escape into + the Python world. + """ + def __init__(self, cls_source: Optional[Source] = None): - super().__init__(MutableLocalSource.Local) + super().__init__(SourceType.New) self.cls_source = cls_source @@ -164,7 +197,7 @@ def check_allowed_side_effect(self, item): return True if self.should_allow_side_effects_under_checkpoint(): return True - if not is_side_effect_safe(item.mutable_local): + if not is_side_effect_safe(item.mutation_type): unimplemented( "HigherOrderOperator: Mutating a variable not in the current scope (SideEffects)" ) @@ -209,7 +242,7 @@ def cls_supports_mutation_side_effects(cls): ) def is_attribute_mutation(self, item): - return isinstance(item.mutable_local, AttributeMutation) + return isinstance(item.mutation_type, AttributeMutation) def has_pending_mutation(self, item): return self.is_attribute_mutation(item) and bool( @@ -222,17 +255,17 @@ def has_pending_mutation_of_attr(self, item, name): ) and name in self.store_attr_mutations.get(item, ()) def is_modified(self, item): - if isinstance(item.mutable_local, AttributeMutationNew): + if isinstance(item.mutation_type, AttributeMutationNew): return True if self.is_attribute_mutation(item): return item in self.store_attr_mutations - return item.mutable_local.is_modified + return item.mutation_type.is_modified def _track_obj( self, item: Any, variable: VariableTracker, - mutable_cls=MutableSideEffects, + mutation_type_cls=ValueMutationExisting, ): """Start tracking a new variable for mutation""" assert variable.source is not None @@ -246,7 +279,7 @@ def _track_obj( f"Source of previously tracked object: {self.id_to_variable[id(item)].source}." ) - variable.mutable_local = mutable_cls() + variable.mutation_type = mutation_type_cls() self.id_to_variable[id(item)] = variable self.keepalive.append(item) return variable @@ -258,7 +291,9 @@ def track_object_existing( item: Any, variable: VariableTracker, ): - return self._track_obj(item, variable, mutable_cls=AttributeMutationExisting) + return self._track_obj( + item, variable, mutation_type_cls=AttributeMutationExisting + ) def track_object_new( self, @@ -282,7 +317,7 @@ def track_object_new( unimplemented(f"Unable to construct the object of type {user_cls}") variable = variable_cls( obj, - mutable_local=AttributeMutationNew(cls_source), + mutation_type=AttributeMutationNew(cls_source), **options, ) self.id_to_variable[id(obj)] = variable @@ -320,7 +355,7 @@ def track_cell_new( ): obj = object() variable = variables.NewCellVariable( - mutable_local=AttributeMutationNew(), + mutation_type=AttributeMutationNew(), ) self.id_to_variable[id(obj)] = variable self.keepalive.append(obj) @@ -328,7 +363,7 @@ def track_cell_new( def track_cell_existing(self, source: Source, item: Any): variable = variables.NewCellVariable( - mutable_local=AttributeMutationExisting(), + mutation_type=AttributeMutationExisting(), source=source, ) self.id_to_variable[id(item)] = variable @@ -337,7 +372,7 @@ def track_cell_existing(self, source: Source, item: Any): def track_global_existing(self, source: Source, item: Any): variable = variables.NewGlobalVariable( - mutable_local=AttributeMutationExisting(), + mutation_type=AttributeMutationExisting(), source=source, ) self.id_to_variable[id(item)] = variable @@ -363,7 +398,7 @@ def track_tensor_variables_from_runahead_side_effects(self, other): def prune_dead_object_new(self, tx): live_new_objects: Set[VariableTracker] = set() - # use this to avoid cycles in mutable_local (though I'm not sure if that + # use this to avoid cycles in mutation_type (though I'm not sure if that # can actually happen). visited: Set[VariableTracker] = set({}) @@ -372,7 +407,7 @@ def visit(var: VariableTracker): return visited.add(var) # Object may have been mutated, store this mutation. - if isinstance(var.mutable_local, AttributeMutationNew): + if isinstance(var.mutation_type, AttributeMutationNew): live_new_objects.add(var) # It's possible that we have mutated the value of this variable # to be another one. The new value is in store_attr_mutations. @@ -383,14 +418,14 @@ def visit(var: VariableTracker): ) def is_live(var: VariableTracker): - if isinstance(var.mutable_local, AttributeMutationNew): + if isinstance(var.mutation_type, AttributeMutationNew): return var in live_new_objects return True pre_existing_vars = [ var for var in self.id_to_variable.values() - if not isinstance(var.mutable_local, AttributeMutationNew) + if not isinstance(var.mutation_type, AttributeMutationNew) ] # The only live side effects come from returns (tx.stack), any intermediates @@ -417,15 +452,15 @@ def is_live(var: VariableTracker): def mutation(self, var): self.check_allowed_side_effect(var) - if isinstance(var.mutable_local, MutableSideEffects): - var.mutable_local.is_modified = True + if isinstance(var.mutation_type, ValueMutationExisting): + var.mutation_type.is_modified = True def _get_modified_vars(self): return [var for var in self.id_to_variable.values() if self.is_modified(var)] def codegen_save_tempvars(self, cg: PyCodegen): for var in self._get_modified_vars(): - if isinstance(var.mutable_local, AttributeMutationNew) and isinstance( + if isinstance(var.mutation_type, AttributeMutationNew) and isinstance( var, variables.NewCellVariable ): cg.add_push_null( @@ -434,13 +469,13 @@ def codegen_save_tempvars(self, cg: PyCodegen): cg.extend_output(create_call_function(0, False)) cg.add_cache(var) var.source = LocalSource(cg.tempvars[var]) # type: ignore[attr-defined] - elif isinstance(var.mutable_local, AttributeMutationNew): + elif isinstance(var.mutation_type, AttributeMutationNew): if isinstance(var, variables.AutogradFunctionContextVariable): unimplemented("AutogradFunctionContextVariable escaped") cg.add_push_null( lambda: cg.load_import_from(utils.__name__, "object_new") ) - cg(var.mutable_local.cls_source) + cg(var.mutation_type.cls_source) cg.extend_output(create_call_function(1, False)) cg.add_cache(var) var.source = LocalSource(cg.tempvars[var]) @@ -467,7 +502,7 @@ def register_hook(self, tensor, hook, handle, name): assert isinstance(hook, variables.VariableTracker) assert ( isinstance(handle, variables.RemovableHandleVariable) - and handle.mutable_local + and handle.mutation_type ) assert hasattr(torch.Tensor, name) idx = len(self.tensor_hooks.keys()) @@ -536,11 +571,11 @@ def gen_fn(): cg.add_cache(handle) def get_ca_final_callbacks_var(self): - from .variables.base import MutableLocal + from .variables.base import ValueMutationNew if self.ca_final_callbacks_var is None: self.ca_final_callbacks_var = variables.ListVariable( - [], mutable_local=MutableLocal() + [], mutation_type=ValueMutationNew() ) return self.ca_final_callbacks_var @@ -677,7 +712,7 @@ def codegen_update_mutated(self, cg: PyCodegen): ) elif isinstance(value, variables.DeletedVariable): if isinstance( - var.mutable_local, AttributeMutationExisting + var.mutation_type, AttributeMutationExisting ) and hasattr(getattr(var, "value", None), name): cg.tx.output.update_co_names(name) cg(var.source) diff --git a/torch/_dynamo/symbolic_convert.py b/torch/_dynamo/symbolic_convert.py index b76148798c5d48..5b4b233b902435 100644 --- a/torch/_dynamo/symbolic_convert.py +++ b/torch/_dynamo/symbolic_convert.py @@ -70,7 +70,7 @@ LazyString, proxy_args_kwargs, ) -from .variables.base import MutableLocal, typestr, VariableTracker +from .variables.base import typestr, ValueMutationNew, VariableTracker from .variables.builder import FrameStateSizeEntry, wrap_fx_proxy from .variables.builtin import BuiltinVariable from .variables.constant import ConstantVariable @@ -1956,13 +1956,13 @@ def BUILD_SLICE(self, inst): def BUILD_LIST(self, inst): items = self.popn(inst.argval) - self.push(ListVariable(items, mutable_local=MutableLocal())) + self.push(ListVariable(items, mutation_type=ValueMutationNew())) def BUILD_SET(self, inst): if config.inject_BUILD_SET_unimplemented_TESTING_ONLY: unimplemented("missing: BUILD_SET") items = self.popn(inst.argval) - new_set = SetVariable(items, mutable_local=MutableLocal()) + new_set = SetVariable(items, mutation_type=ValueMutationNew()) self.push(new_set) def BUILD_LIST_UNPACK(self, inst, cls=ListVariable): @@ -1973,7 +1973,7 @@ def BUILD_LIST_UNPACK(self, inst, cls=ListVariable): items.extend(seq.force_unpack_var_sequence(self)) except NotImplementedError: unimplemented(f"BUILD_LIST_UNPACK {seq}") - self.push(cls(items, mutable_local=MutableLocal())) + self.push(cls(items, mutation_type=ValueMutationNew())) def BUILD_TUPLE_UNPACK(self, inst): self.BUILD_LIST_UNPACK(inst, cls=TupleVariable) @@ -1983,7 +1983,7 @@ def BUILD_TUPLE_UNPACK(self, inst): def BUILD_MAP(self, inst): items = self.popn(inst.argval * 2) d = dict(zip(items[::2], items[1::2])) - self.push(ConstDictVariable(d, mutable_local=MutableLocal())) + self.push(ConstDictVariable(d, mutation_type=ValueMutationNew())) def BUILD_MAP_UNPACK(self, inst): items = self.popn(inst.argval) @@ -1996,7 +1996,7 @@ def BUILD_MAP_UNPACK(self, inst): self.push( ConstDictVariable( result, - mutable_local=MutableLocal(), + mutation_type=ValueMutationNew(), ) ) @@ -2014,7 +2014,7 @@ def BUILD_CONST_KEY_MAP(self, inst): self.push( ConstDictVariable( dict(zip(keys, values)), - mutable_local=MutableLocal(), + mutation_type=ValueMutationNew(), ) ) @@ -2030,7 +2030,7 @@ def SET_ADD(self, inst): assert inst.argval > 0 obj = self.stack[-inst.arg] assert isinstance(obj, SetVariable) - assert obj.mutable_local + assert obj.mutation_type return obj.call_method(self, "add", [v], {}) def SET_UPDATE(self, inst): @@ -2038,7 +2038,7 @@ def SET_UPDATE(self, inst): assert inst.argval > 0 obj = self.stack[-inst.arg] assert isinstance(obj, SetVariable) - assert obj.mutable_local + assert obj.mutation_type obj.call_method(self, "update", [v], {}) def LIST_APPEND(self, inst): @@ -2046,7 +2046,7 @@ def LIST_APPEND(self, inst): assert inst.argval > 0 obj = self.stack[-inst.arg].realize() assert isinstance(obj, ListVariable) - assert obj.mutable_local + assert obj.mutation_type self.output.side_effects.mutation(obj) obj.items.append(v) @@ -2248,7 +2248,7 @@ def LIST_EXTEND(self, inst): assert inst.argval > 0 obj = self.stack[-inst.arg] assert isinstance(obj, ListVariable) - assert obj.mutable_local + assert obj.mutation_type obj.call_method(self, "extend", [v], {}) def LIST_TO_TUPLE(self, inst): @@ -2259,7 +2259,7 @@ def DICT_MERGE(self, inst): assert inst.argval > 0 obj = self.stack[-inst.arg].realize() assert isinstance(obj, ConstDictVariable) - assert obj.mutable_local + assert obj.mutation_type obj.call_method(self, "update", [v], {}) DICT_UPDATE = DICT_MERGE @@ -3286,7 +3286,7 @@ def get_trace_call_log_str(): assert tracer.symbolic_result.as_python_constant() is None return ListIteratorVariable( tracer.generated_items, - mutable_local=MutableLocal(), + mutation_type=ValueMutationNew(), ) else: return tracer.symbolic_result diff --git a/torch/_dynamo/variables/base.py b/torch/_dynamo/variables/base.py index 4572131553ea07..350c63f9e67935 100644 --- a/torch/_dynamo/variables/base.py +++ b/torch/_dynamo/variables/base.py @@ -15,25 +15,36 @@ from .symbolic_convert import InstructionTranslator, InstructionTranslatorBase -class MutableLocalSource(Enum): +class SourceType(Enum): """ - If the VariableTracker.mutable_local represents a Variable that: + This Enum divides VariableTracker into 2 cases, depending on the variable + it represents: - already existed that Dynamo began tracking while introspection (Existing) - - is a new variable that is created during Dynamo introspection (Local) + - is a new variable that is created during Dynamo introspection (New) + + In general, we have these invariants: + 1. for `VariableTracker` associated with `Existing`, its `source` field must not be None. + 2. for `VariableTracker` associated with `New`, most of the time its + `source` field is None, except for cases like side effect codegen for + `AttributeMutationNew`, during which we generate a + `LocalSource('tmp...')` for such variable, to facilitate codegen. """ Existing = 0 - Local = 1 + New = 1 -class MutableLocalBase: +class MutationType: """ - Base class for Variable.mutable_local + Base class for Variable.mutation_type. It encodes information about + 1. The type of mutation Dynamo allows on the variable. + 2. Whether the value represented by this variable already existed before + Dynamo tracing. """ - def __init__(self, typ: MutableLocalSource) -> None: + def __init__(self, typ: SourceType) -> None: # In HigherOrderOperator tracing, we need to distinguish - # between MutableLocals inside the HigherOrderOperator and + # between MutationTypes inside the HigherOrderOperator and # ones outside it. For example, it is not safe to mutate # `a` in the following example because it was constructed # in a different scope. @@ -55,23 +66,28 @@ def __init__(self, typ: MutableLocalSource) -> None: # Dynamo introspection of a HigherOrderOp. # The exact number corresponds to the level # of nested HigherOrderOps. - if typ is MutableLocalSource.Existing: + if typ is SourceType.Existing: self.scope = 0 - elif typ is MutableLocalSource.Local: + elif typ is SourceType.New: self.scope = current_scope_id() else: - unimplemented(f"Unsupported MutableLocalSource: {typ}") + unimplemented(f"Unsupported SourceType: {typ}") -class MutableLocal(MutableLocalBase): +class ValueMutationNew(MutationType): """ - Marker used to indicate this (list, iter, etc) was constructed in - local scope and can be mutated safely in analysis without leaking - state. + This case of VariableTracker.mutation_type marker indicates + 1. Dynamo allows mutation on the value itself (rather than its attributes). + 2. The value is created by the bytecode Dynamo is tracing through. + + For instance, Dynamo could model a newly created list with this marker, + indicating that while we need to model mutations to this list, we don't have + to emit bytecode for these mutations if the list doesn't escape into the + Python world. """ def __init__(self) -> None: - super().__init__(MutableLocalSource.Local) + super().__init__(SourceType.New) def __hash__(self): return id(self) @@ -84,7 +100,7 @@ def _is_top_level_scope(scope_id): return scope_id == 1 -def is_side_effect_safe(m: MutableLocalBase): +def is_side_effect_safe(m: MutationType): scope_id = current_scope_id() # In the top-level scope (if no HigherOrderOperators are involved), @@ -130,7 +146,7 @@ class VariableTracker(metaclass=VariableTrackerMeta): "value", "guards", "source", - "mutable_local", + "mutation_type", "parents_tracker", "user_code_variable_name", } @@ -381,11 +397,11 @@ def __init__( self, *, source: Source = None, - mutable_local: MutableLocal = None, + mutation_type: MutationType = None, ) -> None: super().__init__() self.source = source - self.mutable_local = mutable_local + self.mutation_type = mutation_type def typestr(*objs): diff --git a/torch/_dynamo/variables/builder.py b/torch/_dynamo/variables/builder.py index 02c9dd12854206..3650b321024dbe 100644 --- a/torch/_dynamo/variables/builder.py +++ b/torch/_dynamo/variables/builder.py @@ -124,7 +124,7 @@ unwrap_with_attr_name_if_wrapper, wrap_fake_exception, ) -from .base import MutableLocal, typestr, VariableTracker, VariableTrackerMeta +from .base import typestr, ValueMutationNew, VariableTracker, VariableTrackerMeta from .constant import ConstantVariable, EnumVariable from .ctx_manager import ( AutocastModeVariable, @@ -1311,7 +1311,7 @@ def wrap_listlike(self, value: Union[tuple, list, odict_values, NamedTuple]): tensor_list_proxy.node.meta["grapharg"] = grapharg result = BaseListVariable.cls_for_instance(value)( - output, mutable_local=MutableLocal() + output, mutation_type=ValueMutationNew() ) if istype(value, list): return self.set_source_and_track_mutable(value, result) @@ -1326,7 +1326,7 @@ def wrap_tuple_iterator(self, value: tuple_iterator): for i in range(tuple_iterator_len(value)) ] result = TupleIteratorVariable( - output, mutable_local=MutableLocal(), source=self.source + output, mutation_type=ValueMutationNew(), source=self.source ) return self.set_source_and_track_mutable(value, result) @@ -1335,7 +1335,7 @@ def wrap_range_iterator(self, value: range_iterator): self.install_guards(GuardBuilder.TYPE_MATCH) # Get all the values from the range iterator items = [ConstantVariable.create(v) for v in copy.deepcopy(value)] - return ListIteratorVariable(items, mutable_local=MutableLocal()) + return ListIteratorVariable(items, mutation_type=ValueMutationNew()) def wrap_slice_range(self, value: Union[slice, range]): items = [ @@ -2359,7 +2359,7 @@ def handle_traced_output(example_value, tx, proxy, options, subclass_type, targe elif istype(example_value, tuple): return TupleVariable(unpacked, **options) elif istype(example_value, (list, immutable_list)): - return ListVariable(unpacked, mutable_local=MutableLocal(), **options) + return ListVariable(unpacked, mutation_type=ValueMutationNew(), **options) else: assert example_value.__class__.__module__ == "torch.return_types" or hasattr( example_value, "_fields" @@ -2917,15 +2917,15 @@ def make_type_handlers(): for t in common_constant_types: handlers[t] = lambda tx, value: ConstantVariable(value) handlers[set] = lambda tx, value: SetVariable( - [create(tx, x) for x in value], mutable_local=MutableLocal() + [create(tx, x) for x in value], mutation_type=ValueMutationNew() ) handlers[dict] = lambda tx, value: ConstDictVariable( {create(tx, k): create(tx, v) for k, v in value.items()}, type(value), - mutable_local=MutableLocal(), + mutation_type=ValueMutationNew(), ) handlers[list] = lambda tx, value: ListVariable( - [create(tx, x) for x in value], mutable_local=MutableLocal() + [create(tx, x) for x in value], mutation_type=ValueMutationNew() ) handlers[tuple] = lambda tx, value: TupleVariable( [create(tx, x) for x in value] @@ -2942,17 +2942,17 @@ def make_type_handlers(): handlers[ torch.distributions.constraints._Real ] = lambda tx, value: UserDefinedObjectVariable( - value, mutable_local=MutableLocal() + value, mutation_type=ValueMutationNew() ) handlers[ torch.distributions.constraints._Interval ] = lambda tx, value: UserDefinedObjectVariable( - value, mutable_local=MutableLocal() + value, mutation_type=ValueMutationNew() ) handlers[ torch.distributions.constraints.Constraint ] = lambda tx, value: UserDefinedObjectVariable( - value, mutable_local=MutableLocal() + value, mutation_type=ValueMutationNew() ) def passthrough(tx: "InstructionTranslator", value): diff --git a/torch/_dynamo/variables/builtin.py b/torch/_dynamo/variables/builtin.py index 135b492e624850..cd223a4617d301 100644 --- a/torch/_dynamo/variables/builtin.py +++ b/torch/_dynamo/variables/builtin.py @@ -42,7 +42,7 @@ proxy_args_kwargs, tensortype_to_dtype, ) -from .base import MutableLocal, VariableTracker +from .base import ValueMutationNew, VariableTracker from .constant import ConstantVariable from .ctx_manager import EventVariable, StreamVariable from .dicts import ( @@ -399,7 +399,8 @@ def size_add_handler(tx: "InstructionTranslator", a, b): (BaseListVariable, ConstantVariable, ListIteratorVariable), ), lambda tx, a, b: ListVariable( - [*a.items, *b.unpack_var_sequence(tx)], mutable_local=MutableLocal() + [*a.items, *b.unpack_var_sequence(tx)], + mutation_type=ValueMutationNew(), ), ), ( @@ -410,7 +411,7 @@ def size_add_handler(tx: "InstructionTranslator", a, b): op_handlers[operator.add].extend(list_like_addition_handlers) def list_iadd_handler(tx: "InstructionTranslator", a, b): - if not a.mutable_local or not b.has_unpack_var_sequence(tx): + if not a.mutation_type or not b.has_unpack_var_sequence(tx): # Handler doesn't apply return None @@ -441,7 +442,7 @@ def expand_list_like(tx: "InstructionTranslator", lst, const): lst, const = const, lst return lst.__class__( items=lst.items * const.as_python_constant(), - mutable_local=MutableLocal(), + mutation_type=ValueMutationNew(), ) list_like_expansion_handlers = [ @@ -1269,7 +1270,7 @@ def _call_iter_tuple_list( if obj is None: return cls( [], - mutable_local=MutableLocal(), + mutation_type=ValueMutationNew(), ) elif obj.has_unpack_var_sequence(tx): if obj.source and not is_constant_source(obj.source): @@ -1289,7 +1290,7 @@ def _call_iter_tuple_list( return cls( list(obj.unpack_var_sequence(tx)), - mutable_local=MutableLocal(), + mutation_type=ValueMutationNew(), ) def _call_tuple_list(self, tx, obj=None, *args, **kwargs): @@ -1297,7 +1298,7 @@ def _call_tuple_list(self, tx, obj=None, *args, **kwargs): cls = variables.BaseListVariable.cls_for(self.fn) return cls( list(obj.force_unpack_var_sequence(tx)), - mutable_local=MutableLocal(), + mutation_type=ValueMutationNew(), ) else: return self._call_iter_tuple_list(tx, obj, *args, **kwargs) @@ -1364,9 +1365,11 @@ def call_custom_dict(tx: "InstructionTranslator", user_cls, *args, **kwargs): assert len(args) == 1 arg = args[0] if isinstance(arg, dict): - return ConstDictVariable(arg, user_cls, mutable_local=MutableLocal()) + return ConstDictVariable( + arg, user_cls, mutation_type=ValueMutationNew() + ) elif isinstance(arg, variables.ConstDictVariable): - return arg.clone(user_cls=user_cls, mutable_local=MutableLocal()) + return arg.clone(user_cls=user_cls, mutation_type=ValueMutationNew()) elif isinstance( arg, ( @@ -1380,7 +1383,9 @@ def call_custom_dict(tx: "InstructionTranslator", user_cls, *args, **kwargs): x.force_unpack_var_sequence(tx) for x in arg.force_unpack_var_sequence(tx) ) - return ConstDictVariable(items, user_cls, mutable_local=MutableLocal()) + return ConstDictVariable( + items, user_cls, mutation_type=ValueMutationNew() + ) elif isinstance(arg, variables.MutableMappingVariable): # This is applicable for user defined objects which seem like dict, but are not really dicts. For # example, TensorDict derives from MutableMapping. For such cases, we can directly inline the .items @@ -1406,7 +1411,7 @@ def call_custom_dict(tx: "InstructionTranslator", user_cls, *args, **kwargs): elif not args and kwargs: items = {ConstantVariable.create(k): v for k, v in kwargs.items()} return variables.ConstDictVariable( - items, user_cls=user_cls, mutable_local=MutableLocal() + items, user_cls=user_cls, mutation_type=ValueMutationNew() ) unimplemented(f"{user_cls.__name__}(): {args} {kwargs}") @@ -1433,13 +1438,15 @@ def call_custom_dict_fromkeys( if isinstance(arg, dict): arg = [ConstantVariable.create(k) for k in arg.keys()] return DictVariableType( - dict.fromkeys(arg, value), user_cls, mutable_local=MutableLocal() + dict.fromkeys(arg, value), user_cls, mutation_type=ValueMutationNew() ) elif arg.has_force_unpack_var_sequence(tx): keys = arg.force_unpack_var_sequence(tx) if all(is_hashable(v) for v in keys): return DictVariableType( - dict.fromkeys(keys, value), user_cls, mutable_local=MutableLocal() + dict.fromkeys(keys, value), + user_cls, + mutation_type=ValueMutationNew(), ) unimplemented(f"{user_cls.__name__}.fromkeys(): {args} {kwargs}") @@ -1447,14 +1454,14 @@ def call_set(self, tx: "InstructionTranslator", *args, **kwargs): # Can we merge this implementation and call_dict's one? assert not kwargs if not args: - return SetVariable([], mutable_local=MutableLocal()) + return SetVariable([], mutation_type=ValueMutationNew()) assert len(args) == 1 arg = args[0] if isinstance(arg, variables.SetVariable): - return arg.clone(mutable_local=MutableLocal()) + return arg.clone(mutation_type=ValueMutationNew()) elif arg.has_force_unpack_var_sequence(tx): items = arg.force_unpack_var_sequence(tx) - return SetVariable(items, mutable_local=MutableLocal()) + return SetVariable(items, mutation_type=ValueMutationNew()) elif isinstance(arg, variables.UserDefinedObjectVariable) and isinstance( arg.value, KeysView ): @@ -1491,7 +1498,9 @@ def call_zip(self, tx: "InstructionTranslator", *args, **kwargs): arg.unpack_var_sequence(tx) if arg.has_unpack_var_sequence(tx) else arg for arg in args ] - return variables.ZipVariable(args, strict=strict, mutable_local=MutableLocal()) + return variables.ZipVariable( + args, strict=strict, mutation_type=ValueMutationNew() + ) def call_len(self, tx: "InstructionTranslator", *args, **kwargs): return args[0].call_method(tx, "__len__", args[1:], kwargs) @@ -1596,7 +1605,7 @@ def call_map(self, tx: "InstructionTranslator", fn, *seqs): seq.unpack_var_sequence(tx) if seq.has_unpack_var_sequence(tx) else seq for seq in seqs ] - return variables.MapVariable(fn, seqs, mutable_local=MutableLocal()) + return variables.MapVariable(fn, seqs, mutation_type=ValueMutationNew()) def call_filter(self, tx: "InstructionTranslator", fn, seq): if seq.has_unpack_var_sequence(tx): diff --git a/torch/_dynamo/variables/dicts.py b/torch/_dynamo/variables/dicts.py index b1688060db3ac2..ad8c62d7d6924e 100644 --- a/torch/_dynamo/variables/dicts.py +++ b/torch/_dynamo/variables/dicts.py @@ -16,7 +16,7 @@ from ..guards import GuardBuilder, install_guard from ..source import AttrSource, GetItemSource, is_from_local_source from ..utils import dict_keys, dict_values, istype, specialize_symnode -from .base import MutableLocal, VariableTracker +from .base import ValueMutationNew, VariableTracker from .constant import ConstantVariable @@ -304,16 +304,16 @@ def call_method( return DictValues(self) elif name == "copy": assert not (args or kwargs) - return self.clone(items=self.items.copy(), mutable_local=MutableLocal()) + return self.clone(items=self.items.copy(), mutation_type=ValueMutationNew()) elif name == "__len__": assert not (args or kwargs) return ConstantVariable.create(len(self.items)) - elif name == "__setitem__" and arg_hashable and self.mutable_local: + elif name == "__setitem__" and arg_hashable and self.mutation_type: assert not kwargs and len(args) == 2 tx.output.side_effects.mutation(self) self.items[Hashable(args[0])] = args[1] return ConstantVariable.create(None) - elif name == "__delitem__" and arg_hashable and self.mutable_local: + elif name == "__delitem__" and arg_hashable and self.mutation_type: self.should_reconstruct_all = True tx.output.side_effects.mutation(self) self.items.__delitem__(Hashable(args[0])) @@ -324,7 +324,7 @@ def call_method( return ConstantVariable(None) else: return args[1] - elif name == "pop" and arg_hashable and self.mutable_local: + elif name == "pop" and arg_hashable and self.mutation_type: self.should_reconstruct_all = True tx.output.side_effects.mutation(self) return self.items.pop(Hashable(args[0])) @@ -333,7 +333,7 @@ def call_method( tx.output.side_effects.mutation(self) self.items.clear() return ConstantVariable.create(None) - elif name == "update" and self.mutable_local: + elif name == "update" and self.mutation_type: is_args_supported = len(args) == 1 and isinstance( args[0], ( @@ -368,7 +368,7 @@ def call_method( return self.getitem_const(tx, args[0]) elif name == "__contains__" and len(args) == 1: return ConstantVariable.create(args[0] in self) - elif name == "setdefault" and arg_hashable and self.mutable_local: + elif name == "setdefault" and arg_hashable and self.mutation_type: assert not kwargs assert len(args) <= 2 value = self.maybe_getitem_const(args[0]) @@ -547,7 +547,7 @@ def call_method( TupleVariable, ), ) - and self.mutable_local + and self.mutation_type ): if isinstance(args[0], (ListVariable, TupleVariable)): arg = SetVariable(args[0].unpack_var_sequence(tx)) @@ -733,7 +733,7 @@ def _call_hasattr_customobj( pass if name in self.items or hasattr(self.user_cls, name): return ConstantVariable(True) - elif istype(self.mutable_local, MutableLocal) and self.source is None: + elif istype(self.mutation_type, ValueMutationNew) and self.source is None: # Something created locally can't have any extra fields on it return ConstantVariable(False) elif self.source: @@ -747,7 +747,7 @@ def _call_hasattr_customobj( except KeyError: pass unimplemented( - f"hasattr({self.__class__.__name__}, {name}) {self.mutable_local} {self.source}" + f"hasattr({self.__class__.__name__}, {name}) {self.mutation_type} {self.source}" ) diff --git a/torch/_dynamo/variables/functions.py b/torch/_dynamo/variables/functions.py index a178b0a5956c0f..ade86959d7f8ca 100644 --- a/torch/_dynamo/variables/functions.py +++ b/torch/_dynamo/variables/functions.py @@ -35,7 +35,7 @@ istype, make_cell, ) -from .base import MutableLocal, typestr, VariableTracker +from .base import typestr, ValueMutationNew, VariableTracker from .constant import ConstantVariable @@ -682,7 +682,7 @@ def call_function( **{k: v.as_python_constant() for k, v in kwargs.items()}, ) return self.fold_through_function_to_wrapper().get(self.value)( - value, mutable_local=MutableLocal() + value, mutation_type=ValueMutationNew() ) elif ( self.value is functools.wraps diff --git a/torch/_dynamo/variables/iter.py b/torch/_dynamo/variables/iter.py index bb9faef582ae78..f506f45bce3286 100644 --- a/torch/_dynamo/variables/iter.py +++ b/torch/_dynamo/variables/iter.py @@ -14,7 +14,7 @@ unimplemented, UserError, ) -from .base import MutableLocal, VariableTracker +from .base import ValueMutationNew, VariableTracker from .constant import ConstantVariable @@ -51,7 +51,9 @@ def call_function( items = [] for item in itertools.product(*seqs): items.append(variables.TupleVariable(list(item))) - return variables.ListIteratorVariable(items, mutable_local=MutableLocal()) + return variables.ListIteratorVariable( + items, mutation_type=ValueMutationNew() + ) elif self.value is itertools.accumulate: from .builtin import BuiltinVariable @@ -96,7 +98,9 @@ def call_function( ) items.append(acc) - return variables.ListIteratorVariable(items, mutable_local=MutableLocal()) + return variables.ListIteratorVariable( + items, mutation_type=ValueMutationNew() + ) elif ( self.value is itertools.combinations and not kwargs @@ -110,7 +114,9 @@ def call_function( items = [] for item in itertools.combinations(iterable, r): items.append(variables.TupleVariable(list(item))) - return variables.ListIteratorVariable(items, mutable_local=MutableLocal()) + return variables.ListIteratorVariable( + items, mutation_type=ValueMutationNew() + ) elif self.value is itertools.groupby: if any(kw != "key" for kw in kwargs.keys()): unimplemented( @@ -154,10 +160,10 @@ def retrieve_const_key(key): if variables.ConstantVariable.is_literal(k) else k, variables.ListIteratorVariable( - list(v), mutable_local=MutableLocal() + list(v), mutation_type=ValueMutationNew() ), ], - mutable_local=MutableLocal(), + mutation_type=ValueMutationNew(), ) ) except Exception as e: @@ -165,20 +171,26 @@ def retrieve_const_key(key): "Unexpected failure when calling itertools.groupby", from_exc=e, ) - return variables.ListIteratorVariable(result, mutable_local=MutableLocal()) + return variables.ListIteratorVariable( + result, mutation_type=ValueMutationNew() + ) elif self.value is itertools.repeat: if len(args) < 2: return variables.RepeatIteratorVariable( - *args, mutable_local=MutableLocal() + *args, mutation_type=ValueMutationNew() ) return tx.inline_user_function_return( VariableTracker.build(tx, polyfills.repeat), args, kwargs ) elif self.value is itertools.count: - return variables.CountIteratorVariable(*args, mutable_local=MutableLocal()) + return variables.CountIteratorVariable( + *args, mutation_type=ValueMutationNew() + ) elif self.value is itertools.cycle: - return variables.CycleIteratorVariable(*args, mutable_local=MutableLocal()) + return variables.CycleIteratorVariable( + *args, mutation_type=ValueMutationNew() + ) elif self.value is itertools.dropwhile: return variables.UserFunctionVariable(polyfills.dropwhile).call_function( tx, args, kwargs @@ -251,7 +263,7 @@ def __init__(self, item: int = 0, step: int = 1, **kwargs) -> None: self.step = step def next_variable(self, tx): - assert self.mutable_local + assert self.mutation_type old_item = self.item tx.output.side_effects.mutation(self) self.item = self.item.call_method(tx, "__add__", [self.step], {}) @@ -289,7 +301,7 @@ def __init__( self.item = item def next_variable(self, tx): - assert self.mutable_local + assert self.mutation_type if self.iterator is not None: try: @@ -362,7 +374,7 @@ def unpack_var_sequence(self, tx) -> List["VariableTracker"]: return [variables.TupleVariable(list(var)) for var in zipped] def next_variable(self, tx): - assert self.mutable_local + assert self.mutation_type old_index = self.index args = [] diff --git a/torch/_dynamo/variables/lists.py b/torch/_dynamo/variables/lists.py index 372e32793b84a4..5c8906542bd75d 100644 --- a/torch/_dynamo/variables/lists.py +++ b/torch/_dynamo/variables/lists.py @@ -26,7 +26,7 @@ odict_values, set_example_value, ) -from .base import MutableLocal, VariableTracker +from .base import ValueMutationNew, VariableTracker from .constant import ConstantVariable from .functions import UserFunctionVariable, UserMethodVariable from .iter import IteratorVariable @@ -101,7 +101,7 @@ def getitem_const(self, tx: "InstructionTranslator", arg: VariableTracker): return self.clone( items=self.items[index], source=None, - mutable_local=MutableLocal() if self.mutable_local else None, + mutation_type=ValueMutationNew() if self.mutation_type else None, ) else: assert isinstance(index, (int, torch.SymInt)) @@ -273,7 +273,7 @@ def compute_item(index): variables.ConstantVariable.create(x) for x in [sub_start, sub_stop, sub_step] ], - mutable_local=MutableLocal() if self.mutable_local else None, + mutation_type=ValueMutationNew() if self.mutation_type else None, ) return result @@ -324,7 +324,7 @@ def call_method( ) -> "VariableTracker": from .tensor import SymNodeVariable - if name == "append" and self.mutable_local: + if name == "append" and self.mutation_type: assert not kwargs (arg,) = args tx.output.side_effects.mutation(self) @@ -332,7 +332,7 @@ def call_method( return ConstantVariable.create(None) elif ( name == "extend" - and self.mutable_local + and self.mutation_type and args and args[0].has_force_unpack_var_sequence(tx) ): @@ -342,7 +342,7 @@ def call_method( tx.output.side_effects.mutation(self) self.items.extend(seq) return ConstantVariable.create(None) - elif name == "insert" and self.mutable_local: + elif name == "insert" and self.mutation_type: assert not kwargs idx, value = args if isinstance(idx, SymNodeVariable): @@ -352,18 +352,18 @@ def call_method( tx.output.side_effects.mutation(self) self.items.insert(const_idx, value) return ConstantVariable.create(None) - elif name == "pop" and self.mutable_local: + elif name == "pop" and self.mutation_type: assert not kwargs tx.output.side_effects.mutation(self) return self.items.pop(*[a.as_python_constant() for a in args]) - elif name == "clear" and self.mutable_local: + elif name == "clear" and self.mutation_type: assert not kwargs and not args tx.output.side_effects.mutation(self) self.items.clear() return ConstantVariable.create(None) elif ( name == "__setitem__" - and self.mutable_local + and self.mutation_type and args and args[0].is_python_constant() ): @@ -380,8 +380,8 @@ def call_method( assert not kwargs assert not args items = list(self.items) - return self.modified(items, mutable_local=MutableLocal()) - elif name == "reverse" and self.mutable_local: + return self.modified(items, mutation_type=ValueMutationNew()) + elif name == "reverse" and self.mutation_type: assert not kwargs assert not args self.items.reverse() @@ -414,7 +414,7 @@ def call_method( ) -> "VariableTracker": if ( name == "__setitem__" - and self.mutable_local + and self.mutation_type and args and args[0].is_python_constant() ): @@ -506,7 +506,7 @@ def call_method( ) -> "VariableTracker": if ( name == "__setitem__" - and self.mutable_local + and self.mutation_type and args and args[0].is_python_constant() ): @@ -527,7 +527,7 @@ def call_method( if ( name == "extendleft" - and self.mutable_local + and self.mutation_type and len(args) > 0 and args[0].has_force_unpack_var_sequence(tx) ): @@ -538,12 +538,12 @@ def call_method( self.items[:] = [*reversed(prefix), *self.items] slice_within_maxlen = slice(None, maxlen) result = ConstantVariable.create(None) - elif name == "popleft" and self.mutable_local: + elif name == "popleft" and self.mutation_type: assert not args assert not kwargs tx.output.side_effects.mutation(self) result, *self.items[:] = self.items - elif name == "appendleft" and len(args) > 0 and self.mutable_local: + elif name == "appendleft" and len(args) > 0 and self.mutation_type: assert len(args) == 1 assert not kwargs tx.output.side_effects.mutation(self) @@ -897,7 +897,7 @@ def __repr__(self) -> str: return f"{self.__class__.__name__}(length={len(self.items)}, index={repr(self.index)})" def next_variable(self, tx): - assert self.mutable_local + assert self.mutation_type old_index = self.index if old_index >= len(self.items): raise_observed_exception(StopIteration, tx) diff --git a/torch/_dynamo/variables/misc.py b/torch/_dynamo/variables/misc.py index 7f4ad96601a731..a26ea51b223c7a 100644 --- a/torch/_dynamo/variables/misc.py +++ b/torch/_dynamo/variables/misc.py @@ -166,7 +166,7 @@ def call_method( if ( isinstance(objvar, variables.UserDefinedObjectVariable) - and isinstance(objvar.mutable_local, AttributeMutationNew) + and isinstance(objvar.mutation_type, AttributeMutationNew) and not (args or kwargs) ): with do_not_convert_to_tracable_parameter(): @@ -371,7 +371,7 @@ def create(callable, **kwargs): if kwargs: unimplemented(f"inspect.signature with {kwargs}") return InspectSignatureVariable( - callable, mutable_local=variables.base.MutableLocal() + callable, mutation_type=variables.base.ValueMutationNew() ) def __init__(self, inspected: VariableTracker, **kwargs) -> None: @@ -520,7 +520,7 @@ def __init__( self.bound_arguments_var = variables.ConstDictVariable( arguments_dict, type(bound_arguments.arguments), - mutable_local=variables.base.MutableLocal(), + mutation_type=variables.base.ValueMutationNew(), ) self.signature = signature @@ -1573,7 +1573,9 @@ def call_function(self, tx: "InstructionTranslator", args, kwargs): elif kwargs: unimplemented("random.Random() with kwargs") seed = variables.ConstantVariable.create(None) if len(args) == 0 else args[0] - return RandomVariable(seed=seed, mutable_local=variables.base.MutableLocal()) + return RandomVariable( + seed=seed, mutation_type=variables.base.ValueMutationNew() + ) class RandomVariable(VariableTracker): diff --git a/torch/_dynamo/variables/nn_module.py b/torch/_dynamo/variables/nn_module.py index 4116885bb449b7..a20d081a8bed6f 100644 --- a/torch/_dynamo/variables/nn_module.py +++ b/torch/_dynamo/variables/nn_module.py @@ -42,7 +42,7 @@ unpatched_nn_module_call, unpatched_nn_module_call_impl, ) -from .base import MutableLocal, typestr, VariableTracker +from .base import typestr, ValueMutationNew, VariableTracker from .functions import invoke_and_store_as_constant from .lazy import LazyVariableTracker from .lists import SliceVariable @@ -550,7 +550,7 @@ def wrap_values(items): source=NNModuleSource(gen_source(self.source, name)), ) ) - return ListIteratorVariable(result, mutable_local=MutableLocal()) + return ListIteratorVariable(result, mutation_type=ValueMutationNew()) def named_embed(name, obj): return TupleVariable( @@ -580,7 +580,7 @@ def gen_source(source, name): result = [] for name, submod in module.named_children(): result.append(named_embed(name, submod)) - return ListIteratorVariable(result, mutable_local=MutableLocal()) + return ListIteratorVariable(result, mutation_type=ValueMutationNew()) elif name == "named_parameters": tx.output.guard_on_key_order.add( AttrSource(self.source, "_parameters").name() @@ -590,7 +590,7 @@ def gen_source(source, name): **get_kwargs("prefix", "recurse") ): result.append(named_embed(name, param)) - return ListIteratorVariable(result, mutable_local=MutableLocal()) + return ListIteratorVariable(result, mutation_type=ValueMutationNew()) elif name == "named_buffers": tx.output.guard_on_key_order.add(AttrSource(self.source, "_buffers").name()) result = [] @@ -598,7 +598,7 @@ def gen_source(source, name): **get_kwargs("prefix", "recurse", "remove_duplicate") ): result.append(named_embed(name, buffer)) - return ListIteratorVariable(result, mutable_local=MutableLocal()) + return ListIteratorVariable(result, mutation_type=ValueMutationNew()) elif name == "named_modules": tx.output.guard_on_key_order.add(AttrSource(self.source, "_modules").name()) result = [] @@ -606,7 +606,7 @@ def gen_source(source, name): **get_kwargs("memo", "prefix", "remove_duplicate") ): result.append(named_embed(name, submod)) - return ListIteratorVariable(result, mutable_local=MutableLocal()) + return ListIteratorVariable(result, mutation_type=ValueMutationNew()) elif name == "children": tx.output.guard_on_key_order.add(AttrSource(self.source, "_modules").name()) assert not (args or kwargs) @@ -627,7 +627,7 @@ def gen_source(source, name): result = [] for name in module.keys(): result.append(ConstantVariable.create(name)) - return ListIteratorVariable(result, mutable_local=MutableLocal()) + return ListIteratorVariable(result, mutation_type=ValueMutationNew()) elif name == "values": assert not (args or kwargs) return wrap_values(module.items()) @@ -636,7 +636,7 @@ def gen_source(source, name): result = [] for name, submod in module.items(): result.append(named_embed(name, submod)) - return ListIteratorVariable(result, mutable_local=MutableLocal()) + return ListIteratorVariable(result, mutation_type=ValueMutationNew()) elif name == "__len__": assert not (args or kwargs) return ConstantVariable.create(len(module)) @@ -963,7 +963,7 @@ def collect_parameters(module_var, recurse): deduplicated_params = list(dict.fromkeys(params_list).keys()) return variables.ListIteratorVariable( - deduplicated_params, mutable_local=MutableLocal() + deduplicated_params, mutation_type=ValueMutationNew() ) else: raise AssertionError( diff --git a/torch/_dynamo/variables/tensor.py b/torch/_dynamo/variables/tensor.py index 1957c4890a3663..019d6a0df41499 100644 --- a/torch/_dynamo/variables/tensor.py +++ b/torch/_dynamo/variables/tensor.py @@ -1070,7 +1070,7 @@ def _register_hook_trampoline(tensor, bw_state): ) handle_variable = variables.RemovableHandleVariable( - mutable_local=variables.base.MutableLocal(), + mutation_type=variables.base.ValueMutationNew(), ) tx.output.side_effects.register_hook(self, hook, handle_variable, name) return handle_variable diff --git a/torch/_dynamo/variables/user_defined.py b/torch/_dynamo/variables/user_defined.py index 893e1fa0446708..73b2db6b8a95a3 100644 --- a/torch/_dynamo/variables/user_defined.py +++ b/torch/_dynamo/variables/user_defined.py @@ -54,7 +54,7 @@ tensortype_to_dtype, unpatched_nn_module_getattr, ) -from .base import MutableLocal, VariableTracker +from .base import ValueMutationNew, VariableTracker from .dicts import DefaultDictVariable @@ -303,7 +303,7 @@ def call_method( and not kwargs and "__subclasses__" not in self.value.__dict__ ): - options = {"mutable_local": MutableLocal()} + options = {"mutation_type": ValueMutationNew()} subs_as_vars: List[VariableTracker] = [] for sub in self.value.__subclasses__(): source = AttrSource(tx.import_source(sub.__module__), sub.__name__) @@ -368,7 +368,7 @@ def call_function( {}, collections.defaultdict, args[0], - mutable_local=MutableLocal(), + mutation_type=ValueMutationNew(), ) elif is_typeddict(self.value): if self.value.__optional_keys__: @@ -397,7 +397,7 @@ def call_function( else: unimplemented("deque() with invalid kwargs not supported") return variables.lists.DequeVariable( - items, maxlen=maxlen, mutable_local=MutableLocal() + items, maxlen=maxlen, mutation_type=ValueMutationNew() ) elif self.value is functools.partial: if not args: @@ -521,7 +521,7 @@ def call_function( var.call_method(tx, "__init__", args, kwargs) return var elif variables.CustomizedDictVariable.is_matching_cls(self.value): - options = {"mutable_local": MutableLocal()} + options = {"mutation_type": ValueMutationNew()} return variables.CustomizedDictVariable.create( self.value, args, kwargs, options ) @@ -533,7 +533,7 @@ def call_function( variables.BuiltinVariable(list).call_function(tx, args, kwargs).items, user_cls=self.value, user_cls_source=self.source, - mutable_local=MutableLocal(), + mutation_type=ValueMutationNew(), ) elif ( self.value in self._in_graph_classes() @@ -571,7 +571,7 @@ def call_function( return tensor_variable elif issubclass(self.value, enum.Enum) and len(args) == 1 and not kwargs: - options = {"mutable_local": MutableLocal()} + options = {"mutation_type": ValueMutationNew()} return variables.EnumVariable.create(self.value, args[0], options) elif self.value is random.Random: if len(args) == 1 and isinstance(args[0], variables.ConstantVariable): @@ -1361,13 +1361,13 @@ class RemovableHandleVariable(VariableTracker): def __init__( self, - mutable_local=None, + mutation_type=None, # index of the registration in the side_effects owned register_hook/handle list, used during removal. idx=None, **kwargs, ) -> None: super().__init__(**kwargs) - self.mutable_local = mutable_local + self.mutation_type = mutation_type self.idx = idx def call_method(self, tx: "InstructionTranslator", method_name, args, kwargs): From 0ba3962b800ccb3526371c7519e68be68e338bd2 Mon Sep 17 00:00:00 2001 From: Ryan Guo Date: Mon, 4 Nov 2024 16:31:29 -0800 Subject: [PATCH 091/503] [dynamo][NFC] Move `MutationType` classes into `variables/base.py` (#139340) As title, this addresses https://github.com/pytorch/pytorch/pull/137905/files#r1806800222. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139340 Approved by: https://github.com/anijain2305 ghstack dependencies: #139339 --- torch/_dynamo/side_effects.py | 71 ++------------------------------- torch/_dynamo/variables/base.py | 65 ++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 67 deletions(-) diff --git a/torch/_dynamo/side_effects.py b/torch/_dynamo/side_effects.py index f71f153f243665..f5fd82a6f1d6ff 100644 --- a/torch/_dynamo/side_effects.py +++ b/torch/_dynamo/side_effects.py @@ -21,79 +21,16 @@ from .source import GlobalSource, LocalSource, Source from .utils import is_frozen_dataclass, nn_module_new, object_new from .variables.base import ( + AttributeMutation, + AttributeMutationExisting, + AttributeMutationNew, is_side_effect_safe, - MutationType, - SourceType, + ValueMutationExisting, VariableTracker, ) from .variables.user_defined import FrozenDataClassVariable -class ValueMutationExisting(MutationType): - """ - This case of VariableTracker.mutation_type marker indicates - 1. Dynamo allows mutation on the value itself (rather than its attributes). - 2. The value exists before Dynamo tracing started. - - For instance, Dynamo could model a pre-existing list with this marker, - indicating that if we encounter mutations to this list, we need to buffer - and re-apply those mutations after the graph runs, since the list might be - used afterwards in Python. - """ - - # A flag to indicate whether mutation happened on the associated - # `VariableTracker`. This enables SideEffects to accurately and quickly - # filter out which pre-existing values it needs to generate mutation for. - is_modified: bool - - def __init__(self, is_modified: bool = False): - super().__init__(SourceType.Existing) - self.is_modified = is_modified - - -class AttributeMutation(MutationType): - """ - This case of VariableTracker.mutation_type marker indicates that Dynamo - allows mutation on the value's attributes. - """ - - def __init__(self, typ: SourceType): - super().__init__(typ) - - -class AttributeMutationExisting(AttributeMutation): - """ - This case of VariableTracker.mutation_type marker indicates - 1. Dynamo allows mutation on the value's attributes. - 2. The value exists before Dynamo tracing started. - - For instance, Dynamo could model a pre-existing object with this marker, - indicating that if we encounter mutations to this object, we need to buffer - then re-apply those mutations after the graph runs, since the object might - be used afterwards in Python. - """ - - def __init__(self): - super().__init__(SourceType.Existing) - - -class AttributeMutationNew(AttributeMutation): - """ - This case of VariableTracker.mutation_type marker indicates - 1. Dynamo allows mutation on the value's attributes. - 2. The value is created by the bytecode Dynamo is tracing through. - - For instance, Dynamo could model a newly created object with this marker, - indicating that while we need to model mutations to this object, we don't - have to emit bytecode for these mutations if the object doesn't escape into - the Python world. - """ - - def __init__(self, cls_source: Optional[Source] = None): - super().__init__(SourceType.New) - self.cls_source = cls_source - - def _manual_update_dict(dict_from, dict_to): for k, v in dict_from.items(): dict_to[k] = v diff --git a/torch/_dynamo/variables/base.py b/torch/_dynamo/variables/base.py index 350c63f9e67935..690492087c2ff4 100644 --- a/torch/_dynamo/variables/base.py +++ b/torch/_dynamo/variables/base.py @@ -96,6 +96,71 @@ def __eq__(self, other): return self is other +class ValueMutationExisting(MutationType): + """ + This case of VariableTracker.mutation_type marker indicates + 1. Dynamo allows mutation on the value itself (rather than its attributes). + 2. The value exists before Dynamo tracing started. + + For instance, Dynamo could model a pre-existing list with this marker, + indicating that if we encounter mutations to this list, we need to buffer + and re-apply those mutations after the graph runs, since the list might be + used afterwards in Python. + """ + + # A flag to indicate whether mutation happened on the associated + # `VariableTracker`. This enables SideEffects to accurately and quickly + # filter out which pre-existing values it needs to generate mutation for. + is_modified: bool + + def __init__(self, is_modified: bool = False): + super().__init__(SourceType.Existing) + self.is_modified = is_modified + + +class AttributeMutation(MutationType): + """ + This case of VariableTracker.mutation_type marker indicates that Dynamo + allows mutation on the value's attributes. + """ + + def __init__(self, typ: SourceType): + super().__init__(typ) + + +class AttributeMutationExisting(AttributeMutation): + """ + This case of VariableTracker.mutation_type marker indicates + 1. Dynamo allows mutation on the value's attributes. + 2. The value exists before Dynamo tracing started. + + For instance, Dynamo could model a pre-existing object with this marker, + indicating that if we encounter mutations to this object, we need to buffer + then re-apply those mutations after the graph runs, since the object might + be used afterwards in Python. + """ + + def __init__(self): + super().__init__(SourceType.Existing) + + +class AttributeMutationNew(AttributeMutation): + """ + This case of VariableTracker.mutation_type marker indicates + 1. Dynamo allows mutation on the value's attributes. + 2. The value is created by the bytecode Dynamo is tracing through. + + For instance, Dynamo could model a newly created object with this marker, + indicating that while we need to model mutations to this object, we don't + have to emit bytecode for these mutations if the object doesn't escape into + the Python world. + """ + + def __init__(self, cls_source: Optional[Source] = None): + super().__init__(SourceType.New) + self.cls_source = cls_source + + def _is_top_level_scope(scope_id): return scope_id == 1 From 2b3a227b3527582289111449f974fc6c331124af Mon Sep 17 00:00:00 2001 From: Ryan Guo Date: Mon, 4 Nov 2024 16:31:30 -0800 Subject: [PATCH 092/503] [dynamo] Add `is_mutable()` and `is_immutable()` methods to `VariableTracker` (#139341) This patch adds 2 simple methods `VariableTracker.is_mutable()` and `VariableTracker.is_immutable()`, which helps clarify intention. For instance, rather than writing ```python if var.mutation_type: ... ``` After this patch one can write ```python if var.is_mutable(): ... ``` This patch also simplifies `mutation_type` propagation in some `ListVariable` methods. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139341 Approved by: https://github.com/mlazos, https://github.com/anijain2305 ghstack dependencies: #139339, #139340 --- torch/_dynamo/side_effects.py | 2 +- torch/_dynamo/symbolic_convert.py | 10 +++++----- torch/_dynamo/variables/base.py | 8 ++++++++ torch/_dynamo/variables/builtin.py | 2 +- torch/_dynamo/variables/dicts.py | 12 ++++++------ torch/_dynamo/variables/iter.py | 6 +++--- torch/_dynamo/variables/lists.py | 26 +++++++++++++------------- 7 files changed, 37 insertions(+), 29 deletions(-) diff --git a/torch/_dynamo/side_effects.py b/torch/_dynamo/side_effects.py index f5fd82a6f1d6ff..0898c21b998a9a 100644 --- a/torch/_dynamo/side_effects.py +++ b/torch/_dynamo/side_effects.py @@ -439,7 +439,7 @@ def register_hook(self, tensor, hook, handle, name): assert isinstance(hook, variables.VariableTracker) assert ( isinstance(handle, variables.RemovableHandleVariable) - and handle.mutation_type + and handle.is_mutable() ) assert hasattr(torch.Tensor, name) idx = len(self.tensor_hooks.keys()) diff --git a/torch/_dynamo/symbolic_convert.py b/torch/_dynamo/symbolic_convert.py index 5b4b233b902435..6aebda16f5ab6a 100644 --- a/torch/_dynamo/symbolic_convert.py +++ b/torch/_dynamo/symbolic_convert.py @@ -2030,7 +2030,7 @@ def SET_ADD(self, inst): assert inst.argval > 0 obj = self.stack[-inst.arg] assert isinstance(obj, SetVariable) - assert obj.mutation_type + assert obj.is_mutable() return obj.call_method(self, "add", [v], {}) def SET_UPDATE(self, inst): @@ -2038,7 +2038,7 @@ def SET_UPDATE(self, inst): assert inst.argval > 0 obj = self.stack[-inst.arg] assert isinstance(obj, SetVariable) - assert obj.mutation_type + assert obj.is_mutable() obj.call_method(self, "update", [v], {}) def LIST_APPEND(self, inst): @@ -2046,7 +2046,7 @@ def LIST_APPEND(self, inst): assert inst.argval > 0 obj = self.stack[-inst.arg].realize() assert isinstance(obj, ListVariable) - assert obj.mutation_type + assert obj.is_mutable() self.output.side_effects.mutation(obj) obj.items.append(v) @@ -2248,7 +2248,7 @@ def LIST_EXTEND(self, inst): assert inst.argval > 0 obj = self.stack[-inst.arg] assert isinstance(obj, ListVariable) - assert obj.mutation_type + assert obj.is_mutable() obj.call_method(self, "extend", [v], {}) def LIST_TO_TUPLE(self, inst): @@ -2259,7 +2259,7 @@ def DICT_MERGE(self, inst): assert inst.argval > 0 obj = self.stack[-inst.arg].realize() assert isinstance(obj, ConstDictVariable) - assert obj.mutation_type + assert obj.is_mutable() obj.call_method(self, "update", [v], {}) DICT_UPDATE = DICT_MERGE diff --git a/torch/_dynamo/variables/base.py b/torch/_dynamo/variables/base.py index 690492087c2ff4..d2bd2837bda603 100644 --- a/torch/_dynamo/variables/base.py +++ b/torch/_dynamo/variables/base.py @@ -444,6 +444,14 @@ def next_variable(self, tx): def is_strict_mode(self, tx): return tx.strict_checks_fn and tx.strict_checks_fn(self) + def is_mutable(self): + """Whether Dynamo allows mutation on this variable.""" + return not self.is_immutable() + + def is_immutable(self): + """Whether Dynamo bans mutation on this variable.""" + return self.mutation_type is None + @staticmethod def build( tx: "InstructionTranslatorBase", diff --git a/torch/_dynamo/variables/builtin.py b/torch/_dynamo/variables/builtin.py index cd223a4617d301..c395bc0c00c00c 100644 --- a/torch/_dynamo/variables/builtin.py +++ b/torch/_dynamo/variables/builtin.py @@ -411,7 +411,7 @@ def size_add_handler(tx: "InstructionTranslator", a, b): op_handlers[operator.add].extend(list_like_addition_handlers) def list_iadd_handler(tx: "InstructionTranslator", a, b): - if not a.mutation_type or not b.has_unpack_var_sequence(tx): + if a.is_immutable() or not b.has_unpack_var_sequence(tx): # Handler doesn't apply return None diff --git a/torch/_dynamo/variables/dicts.py b/torch/_dynamo/variables/dicts.py index ad8c62d7d6924e..ce60951c2f92a4 100644 --- a/torch/_dynamo/variables/dicts.py +++ b/torch/_dynamo/variables/dicts.py @@ -308,12 +308,12 @@ def call_method( elif name == "__len__": assert not (args or kwargs) return ConstantVariable.create(len(self.items)) - elif name == "__setitem__" and arg_hashable and self.mutation_type: + elif name == "__setitem__" and arg_hashable and self.is_mutable(): assert not kwargs and len(args) == 2 tx.output.side_effects.mutation(self) self.items[Hashable(args[0])] = args[1] return ConstantVariable.create(None) - elif name == "__delitem__" and arg_hashable and self.mutation_type: + elif name == "__delitem__" and arg_hashable and self.is_mutable(): self.should_reconstruct_all = True tx.output.side_effects.mutation(self) self.items.__delitem__(Hashable(args[0])) @@ -324,7 +324,7 @@ def call_method( return ConstantVariable(None) else: return args[1] - elif name == "pop" and arg_hashable and self.mutation_type: + elif name == "pop" and arg_hashable and self.is_mutable(): self.should_reconstruct_all = True tx.output.side_effects.mutation(self) return self.items.pop(Hashable(args[0])) @@ -333,7 +333,7 @@ def call_method( tx.output.side_effects.mutation(self) self.items.clear() return ConstantVariable.create(None) - elif name == "update" and self.mutation_type: + elif name == "update" and self.is_mutable(): is_args_supported = len(args) == 1 and isinstance( args[0], ( @@ -368,7 +368,7 @@ def call_method( return self.getitem_const(tx, args[0]) elif name == "__contains__" and len(args) == 1: return ConstantVariable.create(args[0] in self) - elif name == "setdefault" and arg_hashable and self.mutation_type: + elif name == "setdefault" and arg_hashable and self.is_mutable(): assert not kwargs assert len(args) <= 2 value = self.maybe_getitem_const(args[0]) @@ -547,7 +547,7 @@ def call_method( TupleVariable, ), ) - and self.mutation_type + and self.is_mutable() ): if isinstance(args[0], (ListVariable, TupleVariable)): arg = SetVariable(args[0].unpack_var_sequence(tx)) diff --git a/torch/_dynamo/variables/iter.py b/torch/_dynamo/variables/iter.py index f506f45bce3286..7119b7f9ea2df8 100644 --- a/torch/_dynamo/variables/iter.py +++ b/torch/_dynamo/variables/iter.py @@ -263,7 +263,7 @@ def __init__(self, item: int = 0, step: int = 1, **kwargs) -> None: self.step = step def next_variable(self, tx): - assert self.mutation_type + assert self.is_mutable() old_item = self.item tx.output.side_effects.mutation(self) self.item = self.item.call_method(tx, "__add__", [self.step], {}) @@ -301,7 +301,7 @@ def __init__( self.item = item def next_variable(self, tx): - assert self.mutation_type + assert self.is_mutable() if self.iterator is not None: try: @@ -374,7 +374,7 @@ def unpack_var_sequence(self, tx) -> List["VariableTracker"]: return [variables.TupleVariable(list(var)) for var in zipped] def next_variable(self, tx): - assert self.mutation_type + assert self.is_mutable() old_index = self.index args = [] diff --git a/torch/_dynamo/variables/lists.py b/torch/_dynamo/variables/lists.py index 5c8906542bd75d..707c2cebd477c5 100644 --- a/torch/_dynamo/variables/lists.py +++ b/torch/_dynamo/variables/lists.py @@ -324,7 +324,7 @@ def call_method( ) -> "VariableTracker": from .tensor import SymNodeVariable - if name == "append" and self.mutation_type: + if name == "append" and self.is_mutable(): assert not kwargs (arg,) = args tx.output.side_effects.mutation(self) @@ -332,7 +332,7 @@ def call_method( return ConstantVariable.create(None) elif ( name == "extend" - and self.mutation_type + and self.is_mutable() and args and args[0].has_force_unpack_var_sequence(tx) ): @@ -342,7 +342,7 @@ def call_method( tx.output.side_effects.mutation(self) self.items.extend(seq) return ConstantVariable.create(None) - elif name == "insert" and self.mutation_type: + elif name == "insert" and self.is_mutable(): assert not kwargs idx, value = args if isinstance(idx, SymNodeVariable): @@ -352,18 +352,18 @@ def call_method( tx.output.side_effects.mutation(self) self.items.insert(const_idx, value) return ConstantVariable.create(None) - elif name == "pop" and self.mutation_type: + elif name == "pop" and self.is_mutable(): assert not kwargs tx.output.side_effects.mutation(self) return self.items.pop(*[a.as_python_constant() for a in args]) - elif name == "clear" and self.mutation_type: + elif name == "clear" and self.is_mutable(): assert not kwargs and not args tx.output.side_effects.mutation(self) self.items.clear() return ConstantVariable.create(None) elif ( name == "__setitem__" - and self.mutation_type + and self.is_mutable() and args and args[0].is_python_constant() ): @@ -381,7 +381,7 @@ def call_method( assert not args items = list(self.items) return self.modified(items, mutation_type=ValueMutationNew()) - elif name == "reverse" and self.mutation_type: + elif name == "reverse" and self.is_mutable(): assert not kwargs assert not args self.items.reverse() @@ -414,7 +414,7 @@ def call_method( ) -> "VariableTracker": if ( name == "__setitem__" - and self.mutation_type + and self.is_mutable() and args and args[0].is_python_constant() ): @@ -506,7 +506,7 @@ def call_method( ) -> "VariableTracker": if ( name == "__setitem__" - and self.mutation_type + and self.is_mutable() and args and args[0].is_python_constant() ): @@ -527,7 +527,7 @@ def call_method( if ( name == "extendleft" - and self.mutation_type + and self.is_mutable() and len(args) > 0 and args[0].has_force_unpack_var_sequence(tx) ): @@ -538,12 +538,12 @@ def call_method( self.items[:] = [*reversed(prefix), *self.items] slice_within_maxlen = slice(None, maxlen) result = ConstantVariable.create(None) - elif name == "popleft" and self.mutation_type: + elif name == "popleft" and self.is_mutable(): assert not args assert not kwargs tx.output.side_effects.mutation(self) result, *self.items[:] = self.items - elif name == "appendleft" and len(args) > 0 and self.mutation_type: + elif name == "appendleft" and len(args) > 0 and self.is_mutable(): assert len(args) == 1 assert not kwargs tx.output.side_effects.mutation(self) @@ -897,7 +897,7 @@ def __repr__(self) -> str: return f"{self.__class__.__name__}(length={len(self.items)}, index={repr(self.index)})" def next_variable(self, tx): - assert self.mutation_type + assert self.is_mutable() old_index = self.index if old_index >= len(self.items): raise_observed_exception(StopIteration, tx) From 01bcf37123a8ac8ed6c4cbb1258645d284f40836 Mon Sep 17 00:00:00 2001 From: Ryan Guo Date: Mon, 4 Nov 2024 15:26:25 -0800 Subject: [PATCH 093/503] [dynamo][NFC] Remove some dead code paths (#139674) As title. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139674 Approved by: https://github.com/Skylion007, https://github.com/anijain2305, https://github.com/mlazos --- torch/_dynamo/output_graph.py | 4 +--- torch/_dynamo/variables/misc.py | 6 +----- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/torch/_dynamo/output_graph.py b/torch/_dynamo/output_graph.py index 201748da5f2102..97e74a0f2dafbd 100644 --- a/torch/_dynamo/output_graph.py +++ b/torch/_dynamo/output_graph.py @@ -1066,10 +1066,8 @@ def append_prefix_insts(): } root = FakeRootModule(nn_modules_proxies) # Add all the local vars to the "stack" so restore at the end - restore_vars = [] + restore_vars: List[str] = [] val_to_names: Dict[VariableTracker, List[str]] = {} - if stack_values: - val_to_names[stack_values[-1]] = [] # NB: Typically (i.e., for graph compile from RETURN_VALUE), # symbolic_locals will be empty at this point, as prune_dead_locals # will clear out all of symbolic_locals because RETURN_VALUE is the diff --git a/torch/_dynamo/variables/misc.py b/torch/_dynamo/variables/misc.py index a26ea51b223c7a..5372328eb7e262 100644 --- a/torch/_dynamo/variables/misc.py +++ b/torch/_dynamo/variables/misc.py @@ -56,11 +56,10 @@ class NO_SUCH_SUBOBJ: class SuperVariable(VariableTracker): _nonvar_fields = { - "specialized", *VariableTracker._nonvar_fields, } - def __init__(self, typevar, objvar=None, specialized=False, **kwargs) -> None: + def __init__(self, typevar, objvar=None, **kwargs) -> None: super().__init__(**kwargs) # typevar is the fist argument to super(). In the case where no argument # is provided to super(), it is the __class__ object where @@ -71,7 +70,6 @@ def __init__(self, typevar, objvar=None, specialized=False, **kwargs) -> None: # to the current function where super() is called from (self for regular method, # cls for a classmethod) self.objvar = objvar - self.specialized = specialized # directly get attr from self.typevar if true def reconstruct(self, codegen): codegen.add_push_null(lambda: codegen(variables.BuiltinVariable(super))) @@ -84,8 +82,6 @@ def reconstruct(self, codegen): def _resolved_getattr_and_source(self, tx: "InstructionTranslator", name): assert self.objvar, "1-arg super not implemented" - if self.specialized: - return getattr(self.typevar.as_python_constant(), name) search_type = self.typevar.as_python_constant() # The rest of this function does two things: From b9f0563aafe9e21e71f5da6d7a6ffd62d9403641 Mon Sep 17 00:00:00 2001 From: rzou Date: Tue, 5 Nov 2024 08:09:55 -0800 Subject: [PATCH 094/503] Add repro instructions to fx_graph_runnable.py (#139481) This PR adds some instructions for how to add a TARGETS file to run the fx_graph_runnable script. I'm planning to add some followups that will add additional imports for custom ops and use autodeps to get the dependencies, but I figure this PR is an easy first step. Test Plan: - pytest test/dynamo/test_structured_trace.py - Does anyone have suggestions for how to test this? Pull Request resolved: https://github.com/pytorch/pytorch/pull/139481 Approved by: https://github.com/eellison --- torch/_dynamo/repro/after_aot.py | 35 ++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/torch/_dynamo/repro/after_aot.py b/torch/_dynamo/repro/after_aot.py index c5d7a87f48560c..4d501581c95622 100644 --- a/torch/_dynamo/repro/after_aot.py +++ b/torch/_dynamo/repro/after_aot.py @@ -25,6 +25,7 @@ backend_accuracy_fails, BuckTargetWriter, cast_to_fp64, + extra_deps, extra_imports, generate_config_string, helper_for_dump_minify, @@ -36,6 +37,7 @@ NopInputReader, same_two_models, ) +from torch._dynamo.trace_rules import is_fbcode from torch._dynamo.utils import clone_inputs, counters, same from torch.fx.experimental.proxy_tensor import make_fx from torch.fx.experimental.symbolic_shapes import ( @@ -225,6 +227,38 @@ def inner_debug_fn(real_inputs): # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # +def maybe_fbcode_instructions(): + if is_fbcode: + extra_deps_formatted = "\n".join([f' "{dep}",' for dep in extra_deps]) + if len(extra_deps_formatted) > 0: + extra_deps_formatted = "\n" + extra_deps_formatted + return f"""\ +\"\"\" +To run this script in fbcode: +- Create a directory (//scripts/{{your_unixname}}/repro) +- Put this file in scripts/{{your_unixname}}/repro/fx_graph_runnable.py +- Add a TARGETS file that looks like the following +- `buck2 run //scripts/{{your_unixname}}/repro:repro` + +NOTE: you may need additional deps to actually be able to run the script. +``` +# Contents of TARGETS file +load("@fbcode_macros//build_defs:python_binary.bzl", "python_binary") + +python_binary( + name = "repro", + main_src = "fx_graph_runnable.py", + deps = [ + "//caffe2:torch",{extra_deps_formatted} + ], +) +``` +\"\"\" +""" + else: + return "" + + def generate_compiler_repro_string( gm, args, *, stable_output=False, save_dir=None, stable_hash=False ): @@ -243,6 +277,7 @@ def generate_compiler_repro_string( {extra_imports} +{maybe_fbcode_instructions()} """ ) if not stable_output: From d549ddfb149189aaf734c1b287c3d9f0f6b3cd9d Mon Sep 17 00:00:00 2001 From: Chirag Pandya Date: Tue, 5 Nov 2024 20:14:18 +0000 Subject: [PATCH 095/503] [fr][rfc] use a logger to control output for flight recorder analyzer (#139656) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: Use a logger to control output to console. This is useful for hiding out debug/detail messages from the console v/s showing everything together. Test Plan: Ran `torchfrtrace` with various switches. The `-v` verbose swtch ``` torchfrtrace --prefix "trace_" /tmp/ -v loaded 2 files in 0.2567298412322998s built groups, memberships Not all ranks joining collective 3 at entry 2 group info: 0:default_pg collective: nccl:all_reduce missing ranks: {1} input sizes: [[4, 5]] output sizes: [[4, 5]] expected ranks: 2 collective state: scheduled collective stack trace: at /home/cpio/test/c.py:66 appending a non-matching collective built collectives, nccl_calls Groups id desc size -------------------- ---------- ------ 09000494312501845833 default_pg 2 Memberships group_id global_rank -------------------- ------------- 09000494312501845833 0 09000494312501845833 1 Collectives id group_id ---- ---------- 0 0 1 0 NCCLCalls id collective_id group_id global_rank traceback_id collective_type sizes ---- --------------- ---------- ------------- -------------- ----------------- -------- 0 0 0 0 0 nccl:all_reduce [[3, 4]] 1 0 0 1 0 nccl:all_reduce [[3, 4]] 2 1 0 0 0 nccl:all_reduce [[3, 4]] 3 1 0 1 0 nccl:all_reduce [[3, 4]] 4 0 0 0 nccl:all_reduce [[4, 5]] ``` Without the verbose switch ``` ❯ torchfrtrace --prefix "trace_" /tmp/ Not all ranks joining collective 3 at entry 2 group info: 0:default_pg collective: nccl:all_reduce missing ranks: {1} input sizes: [[4, 5]] output sizes: [[4, 5]] expected ranks: 2 collective state: scheduled collective stack trace: at /home/cpio/test/c.py:66 ``` With the `-j` switch: ``` ❯ torchfrtrace --prefix "trace_" /tmp/ -j Rank 0 Rank 1 ------------------------------------------------- ------------------------------------------------- all_reduce(input_sizes=[[3, 4]], state=completed) all_reduce(input_sizes=[[3, 4]], state=completed) all_reduce(input_sizes=[[3, 4]], state=completed) all_reduce(input_sizes=[[3, 4]], state=completed) all_reduce(input_sizes=[[4, 5]], state=scheduled) ``` Differential Revision: D65438520 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139656 Approved by: https://github.com/fduwjj --- tools/flight_recorder/components/builder.py | 81 ++++++++++++------- .../components/config_manager.py | 8 ++ tools/flight_recorder/components/loader.py | 11 ++- tools/flight_recorder/components/utils.py | 63 ++++++++++++--- 4 files changed, 123 insertions(+), 40 deletions(-) diff --git a/tools/flight_recorder/components/builder.py b/tools/flight_recorder/components/builder.py index bd5b65e62c279e..639c1246a97dc2 100644 --- a/tools/flight_recorder/components/builder.py +++ b/tools/flight_recorder/components/builder.py @@ -25,6 +25,7 @@ check_size_alltoall, check_version, find_coalesced_group, + FlightRecorderLogger, format_frames, get_version_detail, just_print_entries, @@ -33,10 +34,14 @@ ) +# Set up logging +logger: FlightRecorderLogger = FlightRecorderLogger() + + try: from tabulate import tabulate except ModuleNotFoundError: - print("tabulate is not installed. Proceeding without it.") + logger.warning("tabulate is not installed. Proceeding without it.") # Define a no-op tabulate function def tabulate(data: Any, headers: Any = None) -> Any: # type: ignore[misc] @@ -311,13 +316,20 @@ def build_collectives( # case one: not every rank join the collective or in the flight recorder. if (candidate_ranks | found_ranks) != expected_ranks: mismatch[pg_name] += 1 - print( - f"Not all ranks joining collective {collective_seq_id} at entry {record_id}", - f" for group {pg_desc} collective {profiling_name} ", - f"Missing ranks are {expected_ranks - (candidate_ranks | found_ranks)} ", - f"{input_sizes} {output_sizes} {len(expected_ranks)} {collective_state} ", - f"\nCollective stack traces: \n{collective_frames}", + logger.info( + "Not all ranks joining collective %s at entry %s", + collective_seq_id, + record_id, ) + logger.info("group info: %s", pg_desc) + logger.info("collective: %s", profiling_name) + missing_ranks = expected_ranks - (candidate_ranks | found_ranks) + logger.info("missing ranks: %s", missing_ranks) + logger.info("input sizes: %s", input_sizes) + logger.info("output sizes: %s", output_sizes) + logger.info("expected ranks: %d", len(expected_ranks)) + logger.info("collective state: %s", collective_state) + logger.info("collective stack trace: \n %s", collective_frames) candidate_ranks.update(found_ranks) candidate_idx.update(found_idx) found_idx.clear() @@ -338,13 +350,18 @@ def build_collectives( if fail_check: # When we see errors in all_to_all, it's hard to tell which rank is the source of the error. mismatch[pg_name] += 1 - print( - f"Input/output mismatch in the collective {collective_seq_id} ", - f"at entry {record_id} for group {pg_desc} collective {profiling_name} ", - f"input_numel {input_numel} output_numel {output_numel} ", - f"{input_sizes} {output_sizes} {len(expected_ranks)} {collective_state} ", - f"\nCollective stack traces: \n{collective_frames}", + logger.info( + "Input/output mismatch in the collective %s at entry %s", + collective_seq_id, + record_id, ) + logger.info("group info: %s", pg_desc) + logger.info("collective: %s", profiling_name) + logger.info("input sizes: %s", input_sizes) + logger.info("output sizes: %s", output_sizes) + logger.info("expected ranks: %d", len(expected_ranks)) + logger.info("collective state: %s", collective_state) + logger.info("collective stack trace: \n%s", collective_frames) candidate_ranks.update(found_ranks) candidate_idx.update(found_idx) found_idx.clear() @@ -366,13 +383,17 @@ def build_collectives( error_msg = ", ".join( f"Culprit rank {error[0]}; {str(error[1])}" for error in errors ) - print( - f"Collective {collective_seq_id} at entry {record_id} errors", - f" for group {pg_desc} collective {profiling_name} ", - f"{input_sizes} {output_sizes} {len(expected_ranks)} {collective_state} ", - f"\nFound errors: {error_msg}.\n", - f"\nCollective stack traces: \n{collective_frames} ", + logger.info( + "Collective %s at entry %s errors", collective_seq_id, record_id ) + logger.info("group info: %s", pg_desc) + logger.info("collective: %s", profiling_name) + logger.info("input sizes: %s", input_sizes) + logger.info("output sizes: %s", output_sizes) + logger.info("expected ranks: %d", len(expected_ranks)) + logger.info("collective state: %s", collective_state) + logger.info("error message: %s", error_msg) + logger.info("collective stack trace: \n%s", collective_frames) candidate_ranks.update(found_ranks) candidate_idx.update(found_idx) found_idx.clear() @@ -402,7 +423,7 @@ def build_collectives( # -> since its not a complete collective, no entry goes into collectives but we still record a nccl call # TODO should there be a way to mark 'mismatches'? else: - print("appending a non-matching collective") + logger.debug("appending a non-matching collective") # TODO: figure out a better for mismatch. # Also, shall we add seq Id as well? for r in candidate_ranks: @@ -418,7 +439,9 @@ def build_collectives( ) if mismatch[pg_name] > MISMATCH_TAIL: - print(f"Too many mismatches for process_group {pg_name}:{desc}, aborting") + logger.error( + "Too many mismatches for process_group %s: %s aborting", pg_name, desc + ) sys.exit(-1) return tracebacks, collectives, nccl_calls @@ -445,7 +468,7 @@ def build_db( groups, _groups, memberships, _memberships, _pg_guids = build_groups_memberships( pg_config ) - print("built groups, memberships") + logger.debug("built groups, memberships") if args.just_print_entries: just_print_entries(entries, _groups, _memberships, _pg_guids, args) @@ -456,12 +479,16 @@ def build_db( tracebacks, collectives, nccl_calls = build_collectives( entries, _groups, _memberships, _pg_guids, version ) - print("built collectives, nccl_calls") + logger.debug("built collectives, nccl_calls") if args.verbose: - print("Groups\n", tabulate(groups, headers=Group._fields)) - print("Memberships\n", tabulate(memberships, headers=Membership._fields)) - print("Collectives\n", tabulate(collectives, headers=Collective._fields)) - print("NCCLCalls\n", tabulate(nccl_calls, headers=NCCLCall._fields)) + logger.debug("Groups") + logger.debug(tabulate(groups, headers=Group._fields)) + logger.debug("Memberships") + logger.debug(tabulate(memberships, headers=Membership._fields)) + logger.debug("Collectives") + logger.debug(tabulate(collectives, headers=Collective._fields)) + logger.debug("NCCLCalls") + logger.debug(tabulate(nccl_calls, headers=NCCLCall._fields)) db = Database( tracebacks=tracebacks, collectives=collectives, diff --git a/tools/flight_recorder/components/config_manager.py b/tools/flight_recorder/components/config_manager.py index d4912c427b3a83..5a203bbe3806e3 100644 --- a/tools/flight_recorder/components/config_manager.py +++ b/tools/flight_recorder/components/config_manager.py @@ -5,8 +5,14 @@ # LICENSE file in the root directory of this source tree. import argparse +import logging from typing import Optional, Sequence +from tools.flight_recorder.components.utils import FlightRecorderLogger + + +logger = FlightRecorderLogger() + class JobConfig: """ @@ -64,4 +70,6 @@ def parse_args( assert ( args.just_print_entries ), "Not support selecting pg filters without printing entries" + if args.verbose: + logger.set_log_level(logging.DEBUG) return args diff --git a/tools/flight_recorder/components/loader.py b/tools/flight_recorder/components/loader.py index 442b000532f1f7..30daad78752746 100644 --- a/tools/flight_recorder/components/loader.py +++ b/tools/flight_recorder/components/loader.py @@ -13,6 +13,11 @@ from collections import defaultdict from typing import Any, Dict, List, Optional, Set, Tuple, Union +from tools.flight_recorder.components.utils import FlightRecorderLogger + + +logger: FlightRecorderLogger = FlightRecorderLogger() + def read_dump(prefix: str, filename: str) -> Dict[str, Union[str, int, List[Any]]]: basename = os.path.basename(filename) @@ -52,7 +57,7 @@ def _determine_prefix(files: List[str]) -> str: possible_prefixes[p].add(int(r)) if len(possible_prefixes) == 1: prefix = next(iter(possible_prefixes)) - print(f"Inferred common prefix {prefix}") + logger.debug("Inferred common prefix %s", prefix) return prefix else: raise ValueError( @@ -68,6 +73,7 @@ def read_dir( details = {} t0 = time.time() version = "" + filecount = 0 assert os.path.isdir(folder), f"folder {folder} does not exist" for root, _, files in os.walk(folder): if prefix is None: @@ -76,9 +82,10 @@ def read_dir( if f.find(prefix) != 0: continue details[f] = read_dump(prefix, os.path.join(root, f)) + filecount += 1 if not version: version = str(details[f]["version"]) tb = time.time() assert len(details) > 0, f"no files loaded from {folder} with prefix {prefix}" - print(f"loaded {len(files)} files in {tb - t0}s") + logger.debug("loaded %s files in %ss", filecount, tb - t0) return details, version diff --git a/tools/flight_recorder/components/utils.py b/tools/flight_recorder/components/utils.py index 7f2af5eeb29ec8..37a6e100b601d0 100644 --- a/tools/flight_recorder/components/utils.py +++ b/tools/flight_recorder/components/utils.py @@ -5,22 +5,62 @@ # LICENSE file in the root directory of this source tree. import argparse +import logging import math -from typing import Any, Dict, List, Set, Tuple +from typing import Any, Callable, Dict, List, Optional, Set, Tuple -from tools.flight_recorder.components.types import ( - Group, - MatchState, - Membership, - Op, - P2P, -) +from .types import Group, MatchState, Membership, Op, P2P + + +class FlightRecorderLogger: + _instance: Optional[Any] = None + logger: logging.Logger + + def __init__(self) -> None: + self.logger: logging.Logger = logging.getLogger("Flight Recorder") + + def __new__(cls) -> Any: + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance.logger = logging.getLogger("Flight Recorder") + cls._instance.logger.setLevel(logging.INFO) + formatter = logging.Formatter("%(message)s") + ch = logging.StreamHandler() + ch.setFormatter(formatter) + cls._instance.logger.addHandler(ch) + return cls._instance + + def set_log_level(self, level: int) -> None: + self.logger.setLevel(level) + + @property + def debug(self) -> Callable[..., None]: + return self.logger.debug + + @property + def info(self) -> Callable[..., None]: + return self.logger.info + + @property + def warning(self) -> Callable[..., None]: + return self.logger.warning + + @property + def error(self) -> Callable[..., None]: + return self.logger.error + + @property + def critical(self) -> Callable[..., None]: + return self.logger.critical + + +logger = FlightRecorderLogger() try: from tabulate import tabulate except ModuleNotFoundError: - print("tabulate is not installed. Proceeding without it.") + logger.debug("tabulate is not installed. Proceeding without it.") def format_frame(frame: Dict[str, str]) -> str: @@ -121,7 +161,8 @@ def visualize_ops( row = [] i += 1 title = "Match" if match else "MISMATCH" - print(f"{title}\n", tabulate(table)) # type: ignore[operator] + logger.info("%s \n", title) + logger.info("%s", tabulate(table)) # type: ignore[operator] # TODO can't verify seq_id bc there might have been valid seq deltas between ranks even within a pg. for op_list in all_ops.values(): @@ -248,7 +289,7 @@ def just_print_entries( if progress: rows.append(row) - print(tabulate(rows, headers=headers)) + logger.info(tabulate(rows, headers=headers)) def check_no_missing_dump_files( From ae86939425b338bc9ca9852e89f01d3e7a6edc6f Mon Sep 17 00:00:00 2001 From: Ting Lu Date: Tue, 5 Nov 2024 20:15:48 +0000 Subject: [PATCH 096/503] [aarch64] add CUDA 12.6 to docker for sbsa wheel (#138562) Add cuda 12.6 installation for sbsa docker Related to #138440 Pull Request resolved: https://github.com/pytorch/pytorch/pull/138562 Approved by: https://github.com/atalman --- .ci/docker/common/install_cuda_aarch64.sh | 82 +++++++++++++++++++++-- 1 file changed, 76 insertions(+), 6 deletions(-) diff --git a/.ci/docker/common/install_cuda_aarch64.sh b/.ci/docker/common/install_cuda_aarch64.sh index 82395421851d21..dbc574df83478f 100644 --- a/.ci/docker/common/install_cuda_aarch64.sh +++ b/.ci/docker/common/install_cuda_aarch64.sh @@ -4,6 +4,7 @@ set -ex NCCL_VERSION=v2.21.5-1 +CUDNN_VERSION=9.1.0.70 function install_cusparselt_062 { # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html @@ -17,7 +18,7 @@ function install_cusparselt_062 { } function install_124 { - echo "Installing CUDA 12.4.1 and cuDNN 9.1 and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2" + echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2" rm -rf /usr/local/cuda-12.4 /usr/local/cuda # install CUDA 12.4.1 in the same container wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux_sbsa.run @@ -28,10 +29,10 @@ function install_124 { # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement mkdir tmp_cudnn && cd tmp_cudnn - wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz -O cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz - tar xf cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz - cp -a cudnn-linux-sbsa-9.1.0.70_cuda12-archive/include/* /usr/local/cuda/include/ - cp -a cudnn-linux-sbsa-9.1.0.70_cuda12-archive/lib/* /usr/local/cuda/lib64/ + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz + tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz + cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ cd .. rm -rf tmp_cudnn @@ -74,18 +75,87 @@ function prune_124 { $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a ##################################################################################### - # CUDA 12.1 prune visual tools + # CUDA 12.4 prune visual tools ##################################################################################### export CUDA_BASE="/usr/local/cuda-12.4/" rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/ } +function install_126 { + echo "Installing CUDA 12.6.2 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2" + rm -rf /usr/local/cuda-12.6 /usr/local/cuda + # install CUDA 12.6.2 in the same container + wget -q https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux_sbsa.run + chmod +x cuda_12.6.2_560.35.03_linux_sbsa.run + ./cuda_12.6.2_560.35.03_linux_sbsa.run --toolkit --silent + rm -f cuda_12.6.2_560.35.03_linux_sbsa.run + rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda + + # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement + mkdir tmp_cudnn && cd tmp_cudnn + wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz + tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz + cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ + cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf tmp_cudnn + + # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses + # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build + git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git + cd nccl && make -j src.build + cp -a build/include/* /usr/local/cuda/include/ + cp -a build/lib/* /usr/local/cuda/lib64/ + cd .. + rm -rf nccl + + install_cusparselt_062 + + ldconfig +} + +function prune_126 { + echo "Pruning CUDA 12.6" + ##################################################################################### + # CUDA 12.6 prune static libs + ##################################################################################### + export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune" + export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64" + + export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" + export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" + + if [[ -n "$OVERRIDE_GENCODE" ]]; then + export GENCODE=$OVERRIDE_GENCODE + fi + if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then + export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN + fi + + # all CUDA libs except CuDNN and CuBLAS + ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \ + | xargs -I {} bash -c \ + "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" + + # prune CuDNN and CuBLAS + $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a + $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a + + ##################################################################################### + # CUDA 12.6 prune visual tools + ##################################################################################### + export CUDA_BASE="/usr/local/cuda-12.6/" + rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/ +} + # idiomatic parameter and option handling in sh while test $# -gt 0 do case "$1" in 12.4) install_124; prune_124 ;; + 12.6) install_126; prune_126 + ;; *) echo "bad argument $1"; exit 1 ;; esac From 6734cb7bf2c1763118dcc430cee6110a88f8f849 Mon Sep 17 00:00:00 2001 From: Yidi Wu Date: Mon, 4 Nov 2024 14:05:22 -0800 Subject: [PATCH 097/503] [hop free symbols] refactor tensor.to_list implementation to call wrap_fx_proxy. (#139663) Refactoring only. Previously, we manually cal SymNodeVariable.create, now we handle it with wrap_fx_proxy. This unifies the handling of operations that produce symints in wrap_fx_proxy. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139663 Approved by: https://github.com/zou3519 ghstack dependencies: #138345, #138428, #138558, #138737, #138559 --- torch/_dynamo/variables/tensor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torch/_dynamo/variables/tensor.py b/torch/_dynamo/variables/tensor.py index 019d6a0df41499..bf5e7ddfbeaec3 100644 --- a/torch/_dynamo/variables/tensor.py +++ b/torch/_dynamo/variables/tensor.py @@ -755,6 +755,7 @@ def method_numpy(self, *, force=False): def method_tolist(self): from ..symbolic_convert import InstructionTranslator + from .builder import wrap_fx_proxy tx = InstructionTranslator.current_tx() @@ -765,7 +766,7 @@ def wrap(i, sub_proxy): with unittest.mock.patch.object( tx.fake_mode, "allow_scalar_outputs", True ): - return SymNodeVariable.create( + return wrap_fx_proxy( tx, sub_proxy.item(), ) From 59b66944d4287ca154508c25691fde95b5cecb36 Mon Sep 17 00:00:00 2001 From: Jean Schmidt <4520845+jeanschmidt@users.noreply.github.com> Date: Tue, 5 Nov 2024 21:24:28 +0100 Subject: [PATCH 098/503] Migrate inductor-perf-test-nightly.yml to use linux.aws.a100 (#139657) Co-authored-by: Huy Do --- .../workflows/inductor-perf-test-nightly.yml | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/inductor-perf-test-nightly.yml b/.github/workflows/inductor-perf-test-nightly.yml index eed18b6746e96f..51c57a4e8f015f 100644 --- a/.github/workflows/inductor-perf-test-nightly.yml +++ b/.github/workflows/inductor-perf-test-nightly.yml @@ -87,18 +87,18 @@ jobs: cuda-arch-list: '8.0' test-matrix: | { include: [ - { config: "inductor_huggingface_perf", shard: 1, num_shards: 3, runner: "linux.gcp.a100.large" }, - { config: "inductor_huggingface_perf", shard: 2, num_shards: 3, runner: "linux.gcp.a100.large" }, - { config: "inductor_huggingface_perf", shard: 3, num_shards: 3, runner: "linux.gcp.a100.large" }, - { config: "inductor_timm_perf", shard: 1, num_shards: 5, runner: "linux.gcp.a100.large" }, - { config: "inductor_timm_perf", shard: 2, num_shards: 5, runner: "linux.gcp.a100.large" }, - { config: "inductor_timm_perf", shard: 3, num_shards: 5, runner: "linux.gcp.a100.large" }, - { config: "inductor_timm_perf", shard: 4, num_shards: 5, runner: "linux.gcp.a100.large" }, - { config: "inductor_timm_perf", shard: 5, num_shards: 5, runner: "linux.gcp.a100.large" }, - { config: "inductor_torchbench_perf", shard: 1, num_shards: 4, runner: "linux.gcp.a100.large" }, - { config: "inductor_torchbench_perf", shard: 2, num_shards: 4, runner: "linux.gcp.a100.large" }, - { config: "inductor_torchbench_perf", shard: 3, num_shards: 4, runner: "linux.gcp.a100.large" }, - { config: "inductor_torchbench_perf", shard: 4, num_shards: 4, runner: "linux.gcp.a100.large" }, + { config: "inductor_huggingface_perf", shard: 1, num_shards: 3, runner: "linux.aws.a100" }, + { config: "inductor_huggingface_perf", shard: 2, num_shards: 3, runner: "linux.aws.a100" }, + { config: "inductor_huggingface_perf", shard: 3, num_shards: 3, runner: "linux.aws.a100" }, + { config: "inductor_timm_perf", shard: 1, num_shards: 5, runner: "linux.aws.a100" }, + { config: "inductor_timm_perf", shard: 2, num_shards: 5, runner: "linux.aws.a100" }, + { config: "inductor_timm_perf", shard: 3, num_shards: 5, runner: "linux.aws.a100" }, + { config: "inductor_timm_perf", shard: 4, num_shards: 5, runner: "linux.aws.a100" }, + { config: "inductor_timm_perf", shard: 5, num_shards: 5, runner: "linux.aws.a100" }, + { config: "inductor_torchbench_perf", shard: 1, num_shards: 4, runner: "linux.aws.a100" }, + { config: "inductor_torchbench_perf", shard: 2, num_shards: 4, runner: "linux.aws.a100" }, + { config: "inductor_torchbench_perf", shard: 3, num_shards: 4, runner: "linux.aws.a100" }, + { config: "inductor_torchbench_perf", shard: 4, num_shards: 4, runner: "linux.aws.a100" }, ]} selected-test-configs: ${{ inputs.benchmark_configs }} secrets: From 1a701853098b0f6e50d7e35e5e39b2f78b428970 Mon Sep 17 00:00:00 2001 From: Jun Luo Date: Tue, 5 Nov 2024 20:58:20 +0000 Subject: [PATCH 099/503] Add Autograd Fallback for MTIA (#139211) Summary: As title. Test Plan: OSS and internal CIs. Differential Revision: D65022481 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139211 Approved by: https://github.com/jvandebon --- aten/src/ATen/core/VariableFallbackKernel.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/aten/src/ATen/core/VariableFallbackKernel.cpp b/aten/src/ATen/core/VariableFallbackKernel.cpp index 87e0f67bd8f088..390d9189190e06 100644 --- a/aten/src/ATen/core/VariableFallbackKernel.cpp +++ b/aten/src/ATen/core/VariableFallbackKernel.cpp @@ -76,6 +76,10 @@ TORCH_LIBRARY_IMPL(_, AutogradCUDA, m) { m.fallback(AUTOGRAD_FALLBACK); } +TORCH_LIBRARY_IMPL(_, AutogradMTIA, m) { + m.fallback(AUTOGRAD_FALLBACK); +} + TORCH_LIBRARY_IMPL(_, AutogradXLA, m) { m.fallback(AUTOGRAD_FALLBACK); } From 75eeefbfab3862abe887e1d85a0b1b18c227d9f3 Mon Sep 17 00:00:00 2001 From: Howard Huang Date: Tue, 5 Nov 2024 10:08:46 -0800 Subject: [PATCH 100/503] [pp] pipelining + dcp unit test (#139633) Currently there aren't any unit tests for PP and DCP, this unit test could be useful for quick experimentation in issues like (https://github.com/pytorch/torchtitan/issues/474). `python test/distributed/_composable/test_composability/test_pp_composability.py -k test_pp_and_dcp` Pull Request resolved: https://github.com/pytorch/pytorch/pull/139633 Approved by: https://github.com/wconstab --- .../test_pp_composability.py | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/test/distributed/_composable/test_composability/test_pp_composability.py b/test/distributed/_composable/test_composability/test_pp_composability.py index 93895e4d3ae509..fe0f85d7d044f0 100644 --- a/test/distributed/_composable/test_composability/test_pp_composability.py +++ b/test/distributed/_composable/test_composability/test_pp_composability.py @@ -1,14 +1,21 @@ # Owner(s): ["oncall: distributed"] import copy import os +from typing import TYPE_CHECKING import torch +import torch.distributed.checkpoint as dcp import torch.nn as nn from torch.distributed._composable.fsdp.fully_shard import ( fully_shard, MixedPrecisionPolicy, ) from torch.distributed._tensor import DTensor +from torch.distributed.checkpoint import FileSystemReader +from torch.distributed.checkpoint.default_planner import _EmptyStateDictLoadPlanner +from torch.distributed.checkpoint.state_dict import get_state_dict, set_state_dict +from torch.distributed.checkpoint.state_dict_loader import _load_state_dict +from torch.distributed.checkpoint.stateful import Stateful from torch.distributed.device_mesh import init_device_mesh from torch.distributed.pipelining import PipelineStage from torch.distributed.pipelining.schedules import ( @@ -32,6 +39,11 @@ run_tests, skip_but_pass_in_sandcastle_if, ) +from torch.testing._internal.distributed.checkpoint_utils import with_temp_dir + + +if TYPE_CHECKING: + from torch.distributed.checkpoint.metadata import STATE_DICT_TYPE # MLP Layer @@ -253,6 +265,98 @@ def build_stage(stage_idx, num_stages): torch.distributed.destroy_process_group() + @requires_nccl() + @skip_if_lt_x_gpu(4) + @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "Test requires 4+ GPUs") + def test_pp_and_dcp(self): + """ + Test that pipeline parallelism and distributed checkpointing can be used together and + with saved correct FQNs + """ + + class AppState(Stateful): + def __init__(self, model, optimizer): + self.model = model + self.optimizer = optimizer + + def state_dict(self): + # this line automatically manages FSDP FQN's, as well as sets the default state dict type to FSDP.SHARDED_STATE_DICT + model_state_dict, optimizer_state_dict = get_state_dict( + self.model, self.optimizer + ) + return {"model": model_state_dict, "optim": optimizer_state_dict} + + def load_state_dict(self, state_dict): + # sets our state dicts on the model and optimizer, now that we've loaded + set_state_dict( + self.model, + self.optimizer, + model_state_dict=state_dict["model"], + optim_state_dict=state_dict["optim"], + ) + + class PPModelChunk(nn.Module): + def __init__(self, layers: nn.ModuleDict, start_index: int, end_index: int): + super().__init__() + # Filter layers based on start_index and end_index + self.layers = nn.ModuleDict( + {str(i): layers[str(i)] for i in range(start_index, end_index)} + ) + + def forward(self, x): + for layer in self.layers.values(): + x = layer(x) + return x + + device = torch.device("cuda", self.device) + torch.cuda.set_device(self.device) + store = torch.distributed.FileStore(self.file_name, self.world_size) + torch.distributed.init_process_group( + backend="nccl", + store=store, + rank=self.rank, + world_size=self.world_size, + device_id=device, + ) + # create "entire model" + total_layers = 8 + dim = 10 + full_model = nn.ModuleDict( + {f"{i}": MLPModule(dim) for i in range(total_layers)} + ) + # Calculate start and end indices based on rank + start_index = self.rank * 2 + end_index = start_index + 2 + pp_model = PPModelChunk(full_model, start_index, end_index) + + pp_model.to(self.device) + opt = torch.optim.Adam(pp_model.parameters(), lr=0.1) + + # perform work in a temp dir that is cleaned up after the test + @with_temp_dir + def _dcp_test(self): + state_dict = {"app": AppState(pp_model, opt)} + dcp.save(state_dict, checkpoint_id=self.temp_dir) + # temp checkpoint + sd: STATE_DICT_TYPE = {} + _load_state_dict( + sd, + storage_reader=FileSystemReader(self.temp_dir), + planner=_EmptyStateDictLoadPlanner(), + ) + # Check parameter names in sd and compare with pp_model + pp_model_param_names = set(pp_model.state_dict().keys()) + sd_param_names = set(sd["app"]["model"].keys()) + # Verify each parameter name in pp_model is contained in sd + for param_name in pp_model_param_names: + self.assertIn( + param_name, + sd_param_names, + f"Parameter name '{param_name}' not found in state_dict.", + ) + + _dcp_test(self) + instantiate_parametrized_tests(ComposabilityTest) From b09eb6ed6a22476746d8b7d5f6e464e34f89747a Mon Sep 17 00:00:00 2001 From: Animesh Jain Date: Tue, 5 Nov 2024 10:03:21 -0800 Subject: [PATCH 101/503] [dynamo][guards] Consider tensors as immutable for dict tag matches (#139560) This is a bug on the main exposed by https://github.com/pytorch/pytorch/issues/139476 We have dict tag optimization where if the dict tag does not change, we skip guards on all the items of the dict that are "immutable". We considered tensors as immutable in such scenarios. This is critical for guard eval performance, because generally users dont change their parameters. If I try to remove this optimization, we see slowdowns, e.g, 3.03x to 2.95x on conv_mixer TIMM benchamrk. So, I am adding a flag which keeps the current state but allows the users to remove this optimization. Not ideal, but given how serious guard eval perf has to be, we are in the gray are of unsoundness vs performance tradeoff. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139560 Approved by: https://github.com/jansel --- test/dynamo/test_modules.py | 49 ++++++++++++++++++++++++++++++++++++ torch/_dynamo/config.py | 4 +++ torch/csrc/dynamo/guards.cpp | 8 +++++- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/test/dynamo/test_modules.py b/test/dynamo/test_modules.py index e082004eea4a8a..bbba9e91d65d45 100644 --- a/test/dynamo/test_modules.py +++ b/test/dynamo/test_modules.py @@ -3136,6 +3136,55 @@ def fn(x): res = opt_fn(x) self.assertEqual(ref, res) + @patch.object( + torch._dynamo.config, "skip_tensor_guards_with_matching_dict_tags", False + ) + @patch.object(torch._dynamo.config, "inline_inbuilt_nn_modules", True) + def test_param_requires_grad(self): + def adjust_model(model): + to_freeze = model.num_iter % 2 == 0 + if to_freeze: + for param in model.layer2.parameters(): + param.requires_grad = False + else: + for param in model.layer2.parameters(): + param.requires_grad = True + + class MyModule(torch.nn.Module): + def __init__(self, input_size, hidden_size, output_size): + super().__init__() + + self.layer1 = torch.nn.Linear(hidden_size, hidden_size) + self.layer2 = torch.nn.Linear(hidden_size, hidden_size) + + self.num_iter = 0 + + def forward(self, x): + x = self.layer2(x + self.layer1.bias) + + self.num_iter += 1 + return x + + input_size = 1024 + hidden_size = 1024 + output_size = 1 + num_samples = 2048 + features = torch.randn(num_samples, input_size) + + model = MyModule(input_size, hidden_size, output_size) + + cnt = torch._dynamo.testing.CompileCounter() + opt_model = torch.compile(model, backend=cnt, fullgraph=True) + + for _ in range(3): + model.zero_grad(True) + adjust_model(model) + res = opt_model(features) + res.sum().backward() + + # Check that we have recompiled twice, which leads to 3 frames + self.assertEqual(cnt.frame_count, 3) + if __name__ == "__main__": from torch._dynamo.test_case import run_tests diff --git a/torch/_dynamo/config.py b/torch/_dynamo/config.py index 5c36654ae5d0d9..3ab8200ced0ef2 100644 --- a/torch/_dynamo/config.py +++ b/torch/_dynamo/config.py @@ -331,6 +331,10 @@ def _get_optimize_ddp_mode(): # notice and lead to incorrect result. skip_no_tensor_aliasing_guards_on_parameters = True +# Considers a tensor immutable if it is one of the values of a dictionary, and +# the dictionary tag is same across invocation calls. +skip_tensor_guards_with_matching_dict_tags = True + # If True, raises exception if TorchDynamo is called with a context manager raise_on_ctx_manager_usage = True diff --git a/torch/csrc/dynamo/guards.cpp b/torch/csrc/dynamo/guards.cpp index 6c81bdfc785c2f..bd93f582f38f34 100644 --- a/torch/csrc/dynamo/guards.cpp +++ b/torch/csrc/dynamo/guards.cpp @@ -886,6 +886,11 @@ std::string get_exception_message() { } bool is_immutable_object(py::handle example_value) { + static py::object config_module = py::module_::import("torch._dynamo.config"); + bool is_tensor_immutable = + config_module.attr("skip_tensor_guards_with_matching_dict_tags") + .cast(); + if (PyTuple_Check(example_value.ptr())) { // Check that each element is immutable for (Py_ssize_t i = 0; i < PyTuple_Size(example_value.ptr()); ++i) { @@ -896,10 +901,11 @@ bool is_immutable_object(py::handle example_value) { } return true; } + return PyLong_Check(example_value.ptr()) || PyFloat_Check(example_value.ptr()) || PyBool_Check(example_value.ptr()) || PyUnicode_Check(example_value.ptr()) || - THPVariable_Check(example_value.ptr()); + (is_tensor_immutable && THPVariable_Check(example_value.ptr())); } bool is_parameter(py::handle tensor) { From d26dcda35eebde63cffbc967dff9b5b7683e4a43 Mon Sep 17 00:00:00 2001 From: Sam Ginzburg Date: Tue, 5 Nov 2024 10:25:11 -0800 Subject: [PATCH 102/503] [test] Fix Triton test to use the correct divisibility attr (#139772) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139772 Approved by: https://github.com/bertmaher --- test/inductor/test_torchinductor.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py index 46e1c27e91acc1..ce6c4c02b42d4f 100644 --- a/test/inductor/test_torchinductor.py +++ b/test/inductor/test_torchinductor.py @@ -251,6 +251,13 @@ def register_ops_with_aoti_compile(ns, op_set, dispatch_key, torch_compile_op_li continue +def get_divisible_by_16(cfg): + # attribute was renamed between triton versions, from "divisible_by_16" to "divisibility_16" + if hasattr(cfg, "divisibility_16"): + return cfg.divisibility_16 + return cfg.divisible_by_16 + + class TestCase(InductorTestCase): @classmethod def setUpClass(cls): @@ -11870,8 +11877,8 @@ def fn(a: torch.Tensor) -> torch.Tensor: self.assertEqual(len(kernels), 2) for kernel_id, expected in expected_divisible.items(): - divisible_by_16 = ( - kernels[kernel_id].triton_meta["configs"][0].divisible_by_16 + divisible_by_16 = get_divisible_by_16( + kernels[kernel_id].triton_meta["configs"][0] ) self.assertEqual(divisible_by_16, expected) @@ -11882,12 +11889,6 @@ def test_codegen_config_option_dont_assume_alignment(self): def fn(x: torch.Tensor) -> torch.Tensor: return x.sin() + x.cos() - def get_divisible_by_16(cfg): - # attribute was renamed between triton versions, from "divisible_by_16" to "divisibility_16" - if hasattr(cfg, "divisibility_16"): - return cfg.divisibility_16 - return cfg.divisible_by_16 - # We want code that assumes alignment if the initial input is 16-byte aligned for offset in (0, 1, 2, 3, 4): base = torch.randn(64 * 64 + 64, dtype=torch.float32, device=GPU_TYPE) From 16da289402a7d21c1683c0d4f7133afd5c12fb35 Mon Sep 17 00:00:00 2001 From: drisspg Date: Tue, 5 Nov 2024 10:40:19 -0800 Subject: [PATCH 103/503] [Workspace Inductor] Fix dynamic shapes (#139777) # Summary Arg ordering was wrong for when dynamic shapes is enabled and we pass in the additional size args Pull Request resolved: https://github.com/pytorch/pytorch/pull/139777 Approved by: https://github.com/eellison ghstack dependencies: #139157 --- torch/_inductor/autotune_process.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch/_inductor/autotune_process.py b/torch/_inductor/autotune_process.py index 8064343234229e..13e6898b1b1369 100644 --- a/torch/_inductor/autotune_process.py +++ b/torch/_inductor/autotune_process.py @@ -689,8 +689,8 @@ def run_with_workspace(): run_method( *input_tensors, output_tensor, - workspace_tensor, *extra_args, + workspace_tensor, grid=self.grid, **warmup_arg, stream=stream, From f63ee13f2c9e2f9a60f0862c016eee8ec067e934 Mon Sep 17 00:00:00 2001 From: wz337 Date: Tue, 5 Nov 2024 22:56:33 +0000 Subject: [PATCH 104/503] [Test][DTensor] Skip test_dtensor_mm if ROCm (#139719) Seems there are some numeric issues when running on ROCm. ``` PYTORCH_TEST_WITH_ROCM=1 python test/distributed/_tensor/test_matrix_ops.py DistMatrixOpsTest.test_dtensor_mm ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/139719 Approved by: https://github.com/XilunWu --- test/distributed/_tensor/test_matrix_ops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/distributed/_tensor/test_matrix_ops.py b/test/distributed/_tensor/test_matrix_ops.py index 20298dce11abc9..e8b5eb2ff18f2c 100644 --- a/test/distributed/_tensor/test_matrix_ops.py +++ b/test/distributed/_tensor/test_matrix_ops.py @@ -16,7 +16,7 @@ Shard, ) from torch.distributed.tensor.debug import CommDebugMode -from torch.testing._internal.common_utils import run_tests +from torch.testing._internal.common_utils import run_tests, skipIfRocm from torch.testing._internal.distributed._tensor.common_dtensor import ( DTensorTestBase, skip_unless_torch_gpu, @@ -340,6 +340,7 @@ def test_scaled_dot_product_attention(self): self.assertTrue(dist_value.grad.placements[0].is_shard(dim=1)) self.assertEqual(dist_value.grad.full_tensor(), value.grad) + @skipIfRocm @skip_unless_torch_gpu @with_comms() def test_dtensor_mm(self): From 1d28b8b6d52c69607bedf0a7c00b9a859adf6f4d Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Tue, 5 Nov 2024 23:10:38 +0000 Subject: [PATCH 105/503] Revert "Deprecate `torch._utils.is_compiling()` and `torch._dynamo.external_utils.is_compiling()` (#127690)" This reverts commit e84d1121ad66a453c8c24fcc098625e2e9764fca. Reverted https://github.com/pytorch/pytorch/pull/127690 on behalf of https://github.com/ZainRizvi due to Sorry but this is breaking internally. More details in D65483292 ([comment](https://github.com/pytorch/pytorch/pull/127690#issuecomment-2458381056)) --- test/dynamo/test_skip_non_tensor.py | 10 +++++----- test/export/test_torchbind.py | 2 +- test/functorch/test_memory_efficient_fusion.py | 2 +- test/inductor/test_distributed_patterns.py | 4 ++-- test/test_nestedtensor.py | 4 ++-- test/test_optim.py | 2 +- torch/_dynamo/decorators.py | 3 ++- torch/_dynamo/external_utils.py | 5 ----- torch/_functorch/apis.py | 6 +++--- torch/_functorch/eager_transforms.py | 4 ++-- torch/_higher_order_ops/associative_scan.py | 2 +- torch/_higher_order_ops/scan.py | 2 +- torch/_utils.py | 6 +----- .../algorithms/ddp_comm_hooks/default_hooks.py | 2 +- torch/distributed/tensor/parallel/_utils.py | 11 ++++++----- torch/nn/modules/module.py | 4 +++- torch/nn/parallel/distributed.py | 4 ++-- torch/optim/_adafactor.py | 4 ++-- torch/optim/adadelta.py | 8 ++++---- torch/optim/adagrad.py | 2 +- torch/optim/adam.py | 8 ++++---- torch/optim/adamax.py | 8 ++++---- torch/optim/adamw.py | 8 ++++---- torch/optim/asgd.py | 6 +++--- torch/optim/nadam.py | 6 +++--- torch/optim/optimizer.py | 11 ++++++----- torch/optim/radam.py | 6 +++--- torch/optim/rmsprop.py | 8 ++++---- torch/optim/rprop.py | 8 ++++---- torch/optim/sgd.py | 2 +- torch/testing/_internal/optests/generate_tests.py | 2 +- 31 files changed, 78 insertions(+), 82 deletions(-) diff --git a/test/dynamo/test_skip_non_tensor.py b/test/dynamo/test_skip_non_tensor.py index 48c4022ef28fb6..72153d26a1ff09 100644 --- a/test/dynamo/test_skip_non_tensor.py +++ b/test/dynamo/test_skip_non_tensor.py @@ -12,12 +12,12 @@ def user_function(): - return torch.compiler.is_compiling() + return torch._utils.is_compiling() def user_generator(): for _ in range(1): - yield torch.compiler.is_compiling() + yield torch._utils.is_compiling() return @@ -38,7 +38,7 @@ def forward(self, x): global _variable, _variable_2 if self.mode == 1: - if torch.compiler.is_compiling(): + if torch._utils.is_compiling(): _variable += 1 else: _variable_2 += 1 @@ -46,7 +46,7 @@ def forward(self, x): if user_function(): _variable += 1 elif self.mode == 3: - lambda_f = lambda: torch.compiler.is_compiling() # noqa: E731 + lambda_f = lambda: torch._utils.is_compiling() # noqa: E731 if lambda_f(): _variable += 1 elif self.mode == 4: @@ -163,7 +163,7 @@ def __len__(self): def test_do_not_skip_side_effects(self): # https://github.com/pytorch/pytorch/issues/110765 - # By invoking torch.compiler.is_compiling(), + # By invoking torch._utils.is_compiling(), # there may be side-effects inconsistent with eager when # compiling. Thus we force dynamo to commit the graph, # even if it does not perform any tensor operation diff --git a/test/export/test_torchbind.py b/test/export/test_torchbind.py index 53e2ffcc454c87..fd9f98199d37b7 100644 --- a/test/export/test_torchbind.py +++ b/test/export/test_torchbind.py @@ -1315,7 +1315,7 @@ def f(tq, x): f(_empty_tensor_queue(), x), torch.compile(f, backend=backend)(_empty_tensor_queue(), x), ) - if not torch.compiler.is_compiling() and backend == "eager": + if not torch._dynamo.is_compiling() and backend == "eager": self.assertExpectedInline( backend.graphs[0].code.strip(), """\ diff --git a/test/functorch/test_memory_efficient_fusion.py b/test/functorch/test_memory_efficient_fusion.py index d07fb136f5e7cf..bfca66d333b963 100644 --- a/test/functorch/test_memory_efficient_fusion.py +++ b/test/functorch/test_memory_efficient_fusion.py @@ -278,7 +278,7 @@ def test_hash_with_numbers(self): # Test to repro issue with fx_graph_cse when # hash((primals_2, 1.0)) == hash((primals_2, 1)) - if torch.compiler.is_compiling(): + if torch._dynamo.is_compiling(): self.skipTest("Unsupported if test run is compiled") def f(inpt, osize): diff --git a/test/inductor/test_distributed_patterns.py b/test/inductor/test_distributed_patterns.py index 31f91dce0a1296..fd446370434e94 100644 --- a/test/inductor/test_distributed_patterns.py +++ b/test/inductor/test_distributed_patterns.py @@ -91,13 +91,13 @@ def bw_post_hook(mod, gI, gO): def init_module_bw_hooks(allow_eager): def bw_pre_hook(mod, gO): - assert allow_eager or torch.compiler.is_compiling() + assert allow_eager or torch._dynamo.is_compiling() assert mod.weight.size() == (10, 10) mod.hook_count_pre.add_(1) return (torch.sin(gO[0] + 1.2),) def bw_post_hook(mod, gI, gO): - assert allow_eager or torch.compiler.is_compiling() + assert allow_eager or torch._dynamo.is_compiling() assert mod.weight.size() == (10, 10) mod.hook_count_post.add_(1) return (torch.sin(gI[0] + 3.4),) diff --git a/test/test_nestedtensor.py b/test/test_nestedtensor.py index 59d51ceb3bc076..ba2af0927c8e12 100644 --- a/test/test_nestedtensor.py +++ b/test/test_nestedtensor.py @@ -4354,7 +4354,7 @@ def test_jagged_op_different_output_shape_dim( nt = torch.nested.as_nested_tensor(ts, layout=torch.jagged) out = func(nt, dim=rd, keepdim=keepdim) ref_shape = ref_shape_keepdim if keepdim else ref_shape_no_keepdim - if not torch.compiler.is_compiling(): # if not using torch dynamo + if not torch.compiler.is_compiling: # if not using torch dynamo self.assertEqual(len(out.shape), len(ref_shape)) for o, r in zip(out.shape, ref_shape): if r is not None: @@ -4597,7 +4597,7 @@ def test_layer_norm_reduce_ragged_idx_1( # requires_grad = False does not currently work with dynamo tests and throws this error: # AssertionError: SymInts must use SymNodeVariable. # If the underlying value is static, we will create a ConstantVariable and specialize. - if torch.compiler.is_compiling() and not requires_grad: + if torch._dynamo.is_compiling() and not requires_grad: return tensor_lists = self._get_example_tensor_lists( diff --git a/test/test_optim.py b/test/test_optim.py index 211e91ab36b85e..046b8728e3c004 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -288,7 +288,7 @@ def test_param_group_with_lrscheduler_goes_right_direction( inpt = torch.randn(5, device=device, dtype=dtype) # avoid endless recompiles by wrapping LR in a tensor if we're compiling - lr = torch.tensor(0.01) if torch.compiler.is_compiling() else 0.01 + lr = torch.tensor(0.01) if torch._utils.is_compiling() else 0.01 optimizer = optim_cls([{"params": [weight]}, {"params": [bias], "lr": lr}]) schedulers = [scheduler_c(optimizer) for scheduler_c in schedulers_c] diff --git a/torch/_dynamo/decorators.py b/torch/_dynamo/decorators.py index fbf0770c17609a..73a942c6fbab74 100644 --- a/torch/_dynamo/decorators.py +++ b/torch/_dynamo/decorators.py @@ -19,6 +19,7 @@ RunOnlyContext, ) from .exc import IncorrectUsage +from .external_utils import is_compiling from .utils import is_function @@ -545,7 +546,7 @@ def mark_static(t, index=None): instances of the nn.Module can have different values of the attributes. The key point here is that the attributes are static. """ - if torch.compiler.is_compiling(): + if is_compiling(): if index is None: for s in t.size(): comptime.force_static(s) diff --git a/torch/_dynamo/external_utils.py b/torch/_dynamo/external_utils.py index 534c9640a64643..1c353efab73c98 100644 --- a/torch/_dynamo/external_utils.py +++ b/torch/_dynamo/external_utils.py @@ -3,7 +3,6 @@ import functools import warnings from typing import Any, Callable, List, Optional, Union -from typing_extensions import deprecated import torch import torch.utils._pytree as pytree @@ -15,10 +14,6 @@ np = None # type: ignore[assignment] -@deprecated( - "`torch._dynamo.external_utils.is_compiling` is deprecated. Use `torch.compiler.is_compiling` instead.", - category=FutureWarning, -) def is_compiling() -> bool: """ Indicates whether we are tracing/compiling with torch.compile() or torch.export(). diff --git a/torch/_functorch/apis.py b/torch/_functorch/apis.py index db252d8ca6d7f7..d906f3c906c989 100644 --- a/torch/_functorch/apis.py +++ b/torch/_functorch/apis.py @@ -191,7 +191,7 @@ def vmap( vmap does not provide general autobatching or handle variable-length sequences out of the box. """ - from torch.compiler import is_compiling + from torch._dynamo import is_compiling _check_randomness_arg(randomness) if not (chunk_size is None or chunk_size > 0): @@ -393,7 +393,7 @@ def grad(func: Callable, argnums: argnums_t = 0, has_aux: bool = False) -> Calla """ # To avoid cyclical dependency. import torch._functorch.eager_transforms as eager_transforms - from torch.compiler import is_compiling + from torch._dynamo import is_compiling def wrapper(*args, **kwargs): return eager_transforms.grad_impl(func, argnums, has_aux, args, kwargs) @@ -435,8 +435,8 @@ def grad_and_value( See :func:`grad` for examples """ + from torch._dynamo import is_compiling from torch._functorch import eager_transforms - from torch.compiler import is_compiling def wrapper(*args, **kwargs): return eager_transforms.grad_and_value_impl( diff --git a/torch/_functorch/eager_transforms.py b/torch/_functorch/eager_transforms.py index 7a7f724b3f3689..d389c7fda78949 100644 --- a/torch/_functorch/eager_transforms.py +++ b/torch/_functorch/eager_transforms.py @@ -764,7 +764,7 @@ def compute_jacobian_preallocate_and_copy(): # Dynamo does not support HOP composition if their inner function is # annotated with @functools.wraps(...). We circumvent this issue by applying # wraps only if we're not tracing with dynamo. - if not torch.compiler.is_compiling(): + if not torch._dynamo.is_compiling(): wrapper_fn = wraps(func)(wrapper_fn) return wrapper_fn @@ -1344,7 +1344,7 @@ def push_jvp(basis): # Dynamo does not support HOP composition if their inner function is # annotated with @functools.wraps(...). We circumvent this issue by applying # wraps only if we're not tracing with dynamo. - if not torch.compiler.is_compiling(): + if not torch._dynamo.is_compiling(): wrapper_fn = wraps(func)(wrapper_fn) return wrapper_fn diff --git a/torch/_higher_order_ops/associative_scan.py b/torch/_higher_order_ops/associative_scan.py index 204a17d6e0a1f9..d58d6b26bd33f7 100644 --- a/torch/_higher_order_ops/associative_scan.py +++ b/torch/_higher_order_ops/associative_scan.py @@ -132,7 +132,7 @@ def add(x: torch.Tensor, y: torch.Tensor): "Combine_mode must either 'pointwise' or 'generic', but got {combine_mode}" ) - if not torch.compiler.is_compiling(): + if not torch._dynamo.is_compiling(): with _set_compilation_env(), torch._dynamo.utils.disable_cache_limit(): return torch.compile(associative_scan, fullgraph=True)( combine_fn, xs, dim, reverse=reverse, combine_mode=combine_mode diff --git a/torch/_higher_order_ops/scan.py b/torch/_higher_order_ops/scan.py index ec117788ab85a8..a5a08fea26a317 100644 --- a/torch/_higher_order_ops/scan.py +++ b/torch/_higher_order_ops/scan.py @@ -191,7 +191,7 @@ def run_flattened_scan(combine_fn, leaves_init, leaves_xs, dim, reverse): combine_fn, leaves_init, leaves_xs, dim, reverse, additional_inputs=[] ) - if not torch.compiler.is_compiling(): + if not torch._dynamo.is_compiling(): from torch._dynamo.backends.debugging import ( make_eager_backend_with_torch_function_mode, ) diff --git a/torch/_utils.py b/torch/_utils.py index b3bda10851d6b0..e5c3a14ca81d7d 100644 --- a/torch/_utils.py +++ b/torch/_utils.py @@ -7,7 +7,7 @@ import warnings from collections import defaultdict from typing import Any, Callable, DefaultDict, Generic, List, Optional -from typing_extensions import deprecated, ParamSpec +from typing_extensions import ParamSpec import torch @@ -882,10 +882,6 @@ def classproperty(func): return _ClassPropertyDescriptor(func) -@deprecated( - "`torch._utils.is_compiling` is deprecated. Use `torch.compiler.is_compiling` instead.", - category=FutureWarning, -) def is_compiling() -> bool: """ Indicates whether we are tracing/compiling with torch.compile() or torch.export(). diff --git a/torch/distributed/algorithms/ddp_comm_hooks/default_hooks.py b/torch/distributed/algorithms/ddp_comm_hooks/default_hooks.py index ecf2b55f3ec50a..b012c94ffcaa8d 100644 --- a/torch/distributed/algorithms/ddp_comm_hooks/default_hooks.py +++ b/torch/distributed/algorithms/ddp_comm_hooks/default_hooks.py @@ -75,7 +75,7 @@ def decompress(fut): decompressed_tensor.copy_(value) return decompressed_tensor - if torch.compiler.is_compiling(): + if torch._utils.is_compiling(): grad = dist._functional_collectives.all_reduce( compressed_tensor, "sum", group_to_use ) diff --git a/torch/distributed/tensor/parallel/_utils.py b/torch/distributed/tensor/parallel/_utils.py index 2b5aaeabe85cbf..f50b5dd64768d0 100644 --- a/torch/distributed/tensor/parallel/_utils.py +++ b/torch/distributed/tensor/parallel/_utils.py @@ -7,14 +7,15 @@ from torch.distributed.tensor.placement_types import Placement -LayoutsType = Union[Placement, Tuple[Placement, ...]] +try: + from torch._dynamo.external_utils import is_compiling as is_torchdynamo_compiling +except Exception: + def is_torchdynamo_compiling(): # type: ignore[misc] + return False -def is_torchdynamo_compiling() -> bool: - # Use local function to avoid circular imports - from torch.compiler import is_compiling - return is_compiling() +LayoutsType = Union[Placement, Tuple[Placement, ...]] def _deprecate_warnings(func_name: str, extra_msg: str) -> None: diff --git a/torch/nn/modules/module.py b/torch/nn/modules/module.py index 7b3970944edfe5..dc3a85a03d3cab 100644 --- a/torch/nn/modules/module.py +++ b/torch/nn/modules/module.py @@ -1828,6 +1828,8 @@ def inner(): return result + from torch.compiler import is_compiling + # This is technically not behavior equivalent when compiling, but it's # incredibly unlikely we will ever support throwing an exception in NN # module, and then catching it here, and then reraising it, and then @@ -1835,7 +1837,7 @@ def inner(): # The reraise here just gunks up our exception handling for no good # reason. Don't try to run the always called hooks in event of # exception. - if torch.compiler.is_compiling(): + if is_compiling(): return inner() try: diff --git a/torch/nn/parallel/distributed.py b/torch/nn/parallel/distributed.py index a850cb4187a93e..aad7e6c5402cf8 100644 --- a/torch/nn/parallel/distributed.py +++ b/torch/nn/parallel/distributed.py @@ -1487,7 +1487,7 @@ def _lazy_init(self): def _should_disable_cpp_reducer(self) -> bool: return self._use_python_reducer and ( - torch.compiler.is_compiling() or self._force_to_disable_cpp_reducer + torch._utils.is_compiling() or self._force_to_disable_cpp_reducer ) def _pre_forward(self, *inputs, **kwargs): @@ -1500,7 +1500,7 @@ def _pre_forward(self, *inputs, **kwargs): h.remove() self._accum_grad_hooks.clear() - if not self._lazy_init_ran and not torch.compiler.is_compiling(): + if not self._lazy_init_ran and not torch._utils.is_compiling(): self._lazy_init() if self._delay_all_reduce_all_params: diff --git a/torch/optim/_adafactor.py b/torch/optim/_adafactor.py index 340c3f3f26974b..65f41d6ab182ed 100644 --- a/torch/optim/_adafactor.py +++ b/torch/optim/_adafactor.py @@ -505,7 +505,7 @@ def _multi_tensor_adafactor( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch.compiler.is_compiling() and device_state_steps[0].is_cpu: + if not torch._utils.is_compiling() and device_state_steps[0].is_cpu: torch._foreach_add_( device_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) @@ -624,7 +624,7 @@ def adafactor( See :class:`~torch.optim.Adafactor` for details. """ - if not torch.compiler.is_compiling() and not all( + if not torch._utils.is_compiling() and not all( isinstance(t, torch.Tensor) for t in state_steps ): raise RuntimeError( diff --git a/torch/optim/adadelta.py b/torch/optim/adadelta.py index 249fe53dce2c78..60c37680aeb57f 100644 --- a/torch/optim/adadelta.py +++ b/torch/optim/adadelta.py @@ -259,7 +259,7 @@ def _single_tensor_adadelta( has_complex: bool, ): # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -315,7 +315,7 @@ def _multi_tensor_adadelta( assert not differentiable, "_foreach ops don't support autograd" # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -352,7 +352,7 @@ def _multi_tensor_adadelta( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch.compiler.is_compiling() and device_state_steps[0].is_cpu: + if not torch._utils.is_compiling() and device_state_steps[0].is_cpu: torch._foreach_add_( device_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) @@ -423,7 +423,7 @@ def adadelta( # this check is slow during compilation, so we skip it # if it's strictly needed we can add this check back in dynamo - if not torch.compiler.is_compiling() and not all( + if not torch._utils.is_compiling() and not all( isinstance(t, torch.Tensor) for t in state_steps ): raise RuntimeError( diff --git a/torch/optim/adagrad.py b/torch/optim/adagrad.py index 8e08b62d1a19f8..c45df14727c69a 100644 --- a/torch/optim/adagrad.py +++ b/torch/optim/adagrad.py @@ -451,7 +451,7 @@ def _multi_tensor_adagrad( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch.compiler.is_compiling() and device_state_steps[0].is_cpu: + if not torch._utils.is_compiling() and device_state_steps[0].is_cpu: torch._foreach_add_( device_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) diff --git a/torch/optim/adam.py b/torch/optim/adam.py index 518831229f8bf1..23337e63525680 100644 --- a/torch/optim/adam.py +++ b/torch/optim/adam.py @@ -353,7 +353,7 @@ def _single_tensor_adam( step_t = state_steps[i] # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type == step_t.device.type @@ -466,7 +466,7 @@ def _multi_tensor_adam( ) # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -520,7 +520,7 @@ def _multi_tensor_adam( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch.compiler.is_compiling() and device_state_steps[0].is_cpu: + if not torch._utils.is_compiling() and device_state_steps[0].is_cpu: torch._foreach_add_( device_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) @@ -762,7 +762,7 @@ def adam( # this check is slow during compilation, so we skip it # if it's strictly needed we can add this check back in dynamo - if not torch.compiler.is_compiling() and not all( + if not torch._utils.is_compiling() and not all( isinstance(t, torch.Tensor) for t in state_steps ): raise RuntimeError( diff --git a/torch/optim/adamax.py b/torch/optim/adamax.py index f03e3151bdb0cf..4459d033c1e36b 100644 --- a/torch/optim/adamax.py +++ b/torch/optim/adamax.py @@ -248,7 +248,7 @@ def _single_tensor_adamax( step_t = state_steps[i] # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type == step_t.device.type @@ -320,7 +320,7 @@ def _multi_tensor_adamax( return # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -358,7 +358,7 @@ def _multi_tensor_adamax( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch.compiler.is_compiling() and grouped_state_steps[0].is_cpu: + if not torch._utils.is_compiling() and grouped_state_steps[0].is_cpu: torch._foreach_add_( grouped_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) @@ -435,7 +435,7 @@ def adamax( See :class:`~torch.optim.Adamax` for details. """ - if not torch.compiler.is_compiling() and not all( + if not torch._utils.is_compiling() and not all( isinstance(t, torch.Tensor) for t in state_steps ): raise RuntimeError( diff --git a/torch/optim/adamw.py b/torch/optim/adamw.py index 5deffcabd071af..fc6aec32b2e307 100644 --- a/torch/optim/adamw.py +++ b/torch/optim/adamw.py @@ -350,7 +350,7 @@ def _single_tensor_adamw( step_t = state_steps[i] # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type == step_t.device.type @@ -463,7 +463,7 @@ def _multi_tensor_adamw( ) # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -516,7 +516,7 @@ def _multi_tensor_adamw( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch.compiler.is_compiling() and device_state_steps[0].is_cpu: + if not torch._utils.is_compiling() and device_state_steps[0].is_cpu: torch._foreach_add_( device_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) @@ -744,7 +744,7 @@ def adamw( See :class:`~torch.optim.AdamW` for details. """ - if not torch.compiler.is_compiling() and not all( + if not torch._utils.is_compiling() and not all( isinstance(t, torch.Tensor) for t in state_steps ): raise RuntimeError( diff --git a/torch/optim/asgd.py b/torch/optim/asgd.py index 373a578fbf34a9..32a52cf9ac4ee5 100644 --- a/torch/optim/asgd.py +++ b/torch/optim/asgd.py @@ -219,7 +219,7 @@ def _single_tensor_asgd( step_t = state_steps[i] # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type @@ -292,7 +292,7 @@ def _multi_tensor_asgd( assert not differentiable, "_foreach ops don't support autograd" # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -333,7 +333,7 @@ def _multi_tensor_asgd( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch.compiler.is_compiling() and grouped_state_steps[0].is_cpu: + if not torch._utils.is_compiling() and grouped_state_steps[0].is_cpu: torch._foreach_add_( grouped_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) diff --git a/torch/optim/nadam.py b/torch/optim/nadam.py index 3828e2b410e952..2dd7e130c0d6cb 100644 --- a/torch/optim/nadam.py +++ b/torch/optim/nadam.py @@ -310,7 +310,7 @@ def _single_tensor_nadam( exp_avg_sq = torch.view_as_real(exp_avg_sq) # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type == mu_product.device.type == step_t.device.type @@ -396,7 +396,7 @@ def _multi_tensor_nadam( assert not differentiable, "_foreach ops don't support autograd" # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -437,7 +437,7 @@ def _multi_tensor_nadam( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch.compiler.is_compiling() and grouped_state_steps[0].is_cpu: + if not torch._utils.is_compiling() and grouped_state_steps[0].is_cpu: torch._foreach_add_( grouped_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) diff --git a/torch/optim/optimizer.py b/torch/optim/optimizer.py index 34bc62e605c013..f3b7e7dac0af84 100644 --- a/torch/optim/optimizer.py +++ b/torch/optim/optimizer.py @@ -26,6 +26,7 @@ import torch import torch.utils.hooks as hooks +from torch._utils import is_compiling from torch.utils._foreach_utils import ( _get_foreach_kernels_supported_devices, _get_fused_kernels_supported_devices, @@ -99,14 +100,14 @@ def _use_grad(self, *args, **kwargs): def _get_value(x): # item is significantly faster than a cpu tensor in eager mode - if not torch.jit.is_scripting() and torch.compiler.is_compiling(): + if not torch.jit.is_scripting() and is_compiling(): return x else: return x.item() if isinstance(x, torch.Tensor) else x def _stack_if_compiling(x): - if not torch.jit.is_scripting() and torch.compiler.is_compiling(): + if not torch.jit.is_scripting() and is_compiling(): return torch.stack(x) else: return x @@ -138,7 +139,7 @@ def wrapper(func): # the capturable flag. If capturable=True, this is not a problem. @functools.wraps(func) def maybe_fallback(*args, **kwargs): - if torch.compiler.is_compiling() and ( + if is_compiling() and ( not kwargs.get("capturable", False) and has_state_steps and (args[state_steps_ind] and args[state_steps_ind][0].is_cuda) @@ -428,7 +429,7 @@ def _cuda_graph_capture_health_check(self) -> None: # Thus, when compiling, inductor will determine if cudagraphs # can be enabled based on whether there is input mutation or CPU tensors. if ( - not torch.compiler.is_compiling() + not is_compiling() and torch.backends.cuda.is_built() and torch.cuda.is_available() ): @@ -515,7 +516,7 @@ def _group_tensors_by_device_and_dtype( Skips this step if we are compiling since this will occur during inductor lowering. """ - if torch.compiler.is_compiling(): + if is_compiling(): return {(None, None): (tensorlistlist, list(range(len(tensorlistlist[0]))))} else: return _group_tensors_by_device_and_dtype(tensorlistlist, with_indices) # type: ignore[return-value, arg-type] diff --git a/torch/optim/radam.py b/torch/optim/radam.py index bcab947c8e44b7..9a36a2be1841db 100644 --- a/torch/optim/radam.py +++ b/torch/optim/radam.py @@ -276,7 +276,7 @@ def _single_tensor_radam( step_t = state_steps[i] # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type == step_t.device.type @@ -374,7 +374,7 @@ def _multi_tensor_radam( assert not differentiable, "_foreach ops don't support autograd" # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices( supports_xla=False ) @@ -404,7 +404,7 @@ def _multi_tensor_radam( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch.compiler.is_compiling() and grouped_state_steps[0].is_cpu: + if not torch._utils.is_compiling() and grouped_state_steps[0].is_cpu: torch._foreach_add_( grouped_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) diff --git a/torch/optim/rmsprop.py b/torch/optim/rmsprop.py index 1e82e3cbc3437a..f839ba0f021c67 100644 --- a/torch/optim/rmsprop.py +++ b/torch/optim/rmsprop.py @@ -284,7 +284,7 @@ def _single_tensor_rmsprop( step = state_steps[i] # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type == step.device.type @@ -357,7 +357,7 @@ def _multi_tensor_rmsprop( assert not differentiable, "_foreach ops don't support autograd" # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert all( p.device.type == step.device.type @@ -402,7 +402,7 @@ def _multi_tensor_rmsprop( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch.compiler.is_compiling() and grouped_state_steps[0].is_cpu: + if not torch._utils.is_compiling() and grouped_state_steps[0].is_cpu: torch._foreach_add_( grouped_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) @@ -489,7 +489,7 @@ def rmsprop( """ # this check is slow during compilation, so we skip it # if it's strictly needed we can add this check back in dynamo - if not torch.compiler.is_compiling() and not all( + if not torch._utils.is_compiling() and not all( isinstance(t, torch.Tensor) for t in state_steps ): raise RuntimeError( diff --git a/torch/optim/rprop.py b/torch/optim/rprop.py index ed7a744d7d1f3c..538c8ac0a861d8 100644 --- a/torch/optim/rprop.py +++ b/torch/optim/rprop.py @@ -243,7 +243,7 @@ def _single_tensor_rprop( step = state_steps[i] # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert ( param.device.type == step.device.type @@ -309,7 +309,7 @@ def _multi_tensor_rprop( assert not differentiable, "_foreach ops don't support autograd" # If compiling, the compiler will handle cudagraph checks, see note [torch.compile x capturable] - if not torch.compiler.is_compiling() and capturable: + if not torch._utils.is_compiling() and capturable: capturable_supported_devices = _get_capturable_supported_devices() assert all( p.device.type == step.device.type @@ -337,7 +337,7 @@ def _multi_tensor_rprop( # If steps are on CPU, foreach will fall back to the slow path, which is a for-loop calling t.add(1) over # and over. 1 will then be wrapped into a Tensor over and over again, which is slower than if we just # wrapped it once now. The alpha is required to assure we go to the right overload. - if not torch.compiler.is_compiling() and grouped_state_steps[0].is_cpu: + if not torch._utils.is_compiling() and grouped_state_steps[0].is_cpu: torch._foreach_add_( grouped_state_steps, torch.tensor(1.0, device="cpu"), alpha=1.0 ) @@ -427,7 +427,7 @@ def rprop( """ # this check is slow during compilation, so we skip it # if it's strictly needed we can add this check back in dynamo - if not torch.compiler.is_compiling() and not all( + if not torch._utils.is_compiling() and not all( isinstance(t, torch.Tensor) for t in state_steps ): raise RuntimeError( diff --git a/torch/optim/sgd.py b/torch/optim/sgd.py index 75cd3279070b4c..ab70f08b44113c 100644 --- a/torch/optim/sgd.py +++ b/torch/optim/sgd.py @@ -435,7 +435,7 @@ def _multi_tensor_sgd( if not device_has_sparse_grad: # handle internal item() call if lr is a tensor - if isinstance(lr, torch.Tensor) and torch.compiler.is_compiling(): + if isinstance(lr, torch.Tensor) and torch._utils.is_compiling(): grads_x_lr = torch._foreach_mul(device_grads, -lr) torch._foreach_add_(device_params, grads_x_lr) else: diff --git a/torch/testing/_internal/optests/generate_tests.py b/torch/testing/_internal/optests/generate_tests.py index e9798ef89de198..7820fed19ccc32 100644 --- a/torch/testing/_internal/optests/generate_tests.py +++ b/torch/testing/_internal/optests/generate_tests.py @@ -565,7 +565,7 @@ def __torch_function__(self, func, types, args=(), kwargs=None): if ( torch.jit.is_tracing() or torch.jit.is_scripting() - or torch.compiler.is_compiling() + or torch._dynamo.is_compiling() ): return func(*args, **kwargs) # Pre-existing code may not use the .default overload. If we see an From c0d642a2959d283186df84a23eea700114f4829d Mon Sep 17 00:00:00 2001 From: Shuqiang Zhang Date: Tue, 5 Nov 2024 10:34:44 -0800 Subject: [PATCH 106/503] [pgnccl][simple] log started work numel (#139773) Summary: We saw some cases that the same work was started on multiple ranks, but did not complete. This info could give us more info if the numel matches Test Plan: CI Tags: Pull Request resolved: https://github.com/pytorch/pytorch/pull/139773 Approved by: https://github.com/Skylion007, https://github.com/kwen2501 --- torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp | 4 ++++ torch/csrc/distributed/c10d/TraceUtils.h | 3 +++ 2 files changed, 7 insertions(+) diff --git a/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp b/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp index 0da9f3fcb083e8..5bccc815ba484d 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp +++ b/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp @@ -1952,6 +1952,8 @@ void ProcessGroupNCCL::watchdogHandler() { pgStatus_->lastCompletedNumelIn; data.integers["last_completed_numel_out"] = pgStatus_->lastCompletedNumelOut; + data.integers["last_started_numel_in"] = pgStatus_->lastStartedNumelIn; + data.integers["last_started_numel_out"] = pgStatus_->lastStartedNumelOut; // logging strings data.strings["last_enqueued_work_name"] = pgStatus_->lastEnqueuedWorkName; data.strings["last_started_work_name"] = pgStatus_->lastStartedWorkName; @@ -2090,6 +2092,8 @@ void ProcessGroupNCCL::watchdogHandler() { work.isStarted()) { pgStatus_->lastStartedSeq = static_cast(work.seq_); pgStatus_->lastStartedWorkName = opTypeToString(work.opType_); + pgStatus_->lastStartedNumelIn = work.numelIn_; + pgStatus_->lastStartedNumelOut = work.numelOut_; } // Clean up completed work diff --git a/torch/csrc/distributed/c10d/TraceUtils.h b/torch/csrc/distributed/c10d/TraceUtils.h index b211fc83564a8c..fcd00fc6bca8c7 100644 --- a/torch/csrc/distributed/c10d/TraceUtils.h +++ b/torch/csrc/distributed/c10d/TraceUtils.h @@ -42,6 +42,9 @@ struct ProcessGroupStatus { // the sizes of the last work completed size_t lastCompletedNumelIn; size_t lastCompletedNumelOut; + // the sizes of the last work started + size_t lastStartedNumelIn; + size_t lastStartedNumelOut; }; inline std::string getTraceStartKey(const std::string& pgName, int rank) { From 86d7d39bffd3b7b099310fb351b2b36f99981d6f Mon Sep 17 00:00:00 2001 From: Mikayla Gawarecki Date: Tue, 5 Nov 2024 23:19:06 +0000 Subject: [PATCH 107/503] Forward fix D65441551 for T206731737 (#139767) Test Plan: - Differential Revision: D65482429 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139767 Approved by: https://github.com/awgu --- torch/serialization.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/torch/serialization.py b/torch/serialization.py index 352514d541505a..8d8ae774e3df83 100644 --- a/torch/serialization.py +++ b/torch/serialization.py @@ -67,8 +67,6 @@ "skip_data", ] -IS_FBCODE = not hasattr(torch.version, "git_version") - DEFAULT_PROTOCOL = 2 LONG_SIZE = struct.Struct("=l").size @@ -94,7 +92,8 @@ def _default_to_weights_only(pickle_module): - return pickle_module is None and not IS_FBCODE + is_fbcode = not hasattr(torch.version, "git_version") + return pickle_module is None and not is_fbcode # _serialization_tls is used to store thread local state specific to serialization From faab564bdacacbe12e4824ddd112f7b26f9fee1c Mon Sep 17 00:00:00 2001 From: Tongzhou Wang <5674597+ssnl@users.noreply.github.com> Date: Tue, 5 Nov 2024 23:26:36 +0000 Subject: [PATCH 108/503] [doc] Fix grammar in export.ir_spec.rst (#139584) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139584 Approved by: https://github.com/zou3519 --- docs/source/export.ir_spec.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/export.ir_spec.rst b/docs/source/export.ir_spec.rst index 13a498b44df8ae..fb43ea847c86c8 100644 --- a/docs/source/export.ir_spec.rst +++ b/docs/source/export.ir_spec.rst @@ -212,7 +212,7 @@ A ``call_function`` node represents a call to an operator. 2. In Export IR, constant arguments will be embedded within the graph. 3. In FX graph, a get_attr node can represent reading any attribute stored in - the graph module. However, in Export IR this is restricted to readign only + the graph module. However, in Export IR this is restricted to reading only submodules as all parameters/buffers will be passed in as inputs to the graph module. @@ -435,9 +435,9 @@ The following types are defined as **leaf type**: * - Scalar - Any numerical types from Python, including integral types, floating point types, and zero dimensional tensors. * - int - - Python int (binded as int64_t in C++) + - Python int (bound as int64_t in C++) * - float - - Python float (binded as double in C++) + - Python float (bound as double in C++) * - bool - Python bool * - str From eec153a69c83919ddf8b50134dc956a9e0cc9a99 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Tue, 5 Nov 2024 14:31:07 -0800 Subject: [PATCH 109/503] [BE][Attention] Factor out common code (#139788) - Compute attention mask before the switch - Introduce `query_device_type` variable - Refactor some of MPS-math checks into easily readable boolean names Pull Request resolved: https://github.com/pytorch/pytorch/pull/139788 Approved by: https://github.com/Skylion007, https://github.com/drisspg --- .../ATen/native/transformers/attention.cpp | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/aten/src/ATen/native/transformers/attention.cpp b/aten/src/ATen/native/transformers/attention.cpp index 5becd2a782f7df..8deb5cf9799813 100644 --- a/aten/src/ATen/native/transformers/attention.cpp +++ b/aten/src/ATen/native/transformers/attention.cpp @@ -710,24 +710,26 @@ Tensor scaled_dot_product_attention( bool is_causal, std::optional scale, bool enable_gqa) { + using sdp::SDPBackend; validate_sdpa_input(query_, key, value, attn_mask_, dropout_p, is_causal, scale); int64_t choice_int = static_cast(sdp::SDPBackend::math); if (_fused_sdp_choice_stub.is_device_supported(query_.device().type())) { choice_int = _fused_sdp_choice_stub(query_.device().type(), query_, key, value, attn_mask_, dropout_p, is_causal, scale, enable_gqa); } - sdp::SDPBackend backend = static_cast(choice_int); + const auto query_device_type = query_.device().type(); + const auto backend = static_cast(choice_int); + const auto convert_attn_func = backend != SDPBackend::cudnn_attention ? convert_boolean_attn_mask : convert_boolean_attn_mask_cudnn; + auto attn_mask = convert_attn_func(attn_mask_, query_.dtype()); switch (backend) { - case sdp::SDPBackend::cudnn_attention: { - std::optional attn_mask = convert_boolean_attn_mask_cudnn(attn_mask_, query_.dtype()); + case SDPBackend::cudnn_attention: { bool compute_logsumexp = should_compute_logsumexp(query_, key, value); auto out_lse_softmax = at::_scaled_dot_product_cudnn_attention( query_, key, value, attn_mask, compute_logsumexp, dropout_p, is_causal, false /*return_debug_mask*/, scale); return std::get<0>(out_lse_softmax); } - case sdp::SDPBackend::flash_attention: { - std::optional attn_mask = convert_boolean_attn_mask(attn_mask_, query_.dtype()); - if(query_.device().type() == DeviceType::CUDA){ + case SDPBackend::flash_attention: { + if(query_device_type == DeviceType::CUDA){ c10::SymInt og_size = query_.sym_size(-1); Tensor query_padded = pad_last_dim<8, false>(query_); Tensor key_padded = pad_last_dim<8, false>(key); @@ -742,8 +744,7 @@ Tensor scaled_dot_product_attention( return std::get<0>(at::_scaled_dot_product_flash_attention_for_cpu( query_, key, value, dropout_p, is_causal, attn_mask, scale)); } - case sdp::SDPBackend::efficient_attention: { - std::optional attn_mask = convert_boolean_attn_mask(attn_mask_, query_.dtype()); + case SDPBackend::efficient_attention: { bool compute_logsumexp = should_compute_logsumexp(query_, key, value); if (attn_mask.has_value()) { attn_mask.value() = preprocess_mask(attn_mask.value(), query_, key, value);; @@ -752,18 +753,19 @@ Tensor scaled_dot_product_attention( query_, key, value, attn_mask, compute_logsumexp, dropout_p, is_causal, scale); return std::get<0>(out_and_lse); } - case sdp::SDPBackend::overrideable: { - std::optional attn_mask = convert_boolean_attn_mask(attn_mask_, query_.dtype()); + case SDPBackend::overrideable: { auto out_lse_softmax = at::_scaled_dot_product_fused_attention_overrideable( query_, key, value, attn_mask, dropout_p, is_causal, false /*return_debug_mask*/, scale); return std::get<0>(out_lse_softmax); } - case sdp::SDPBackend::math: { - std::optional attn_mask = convert_boolean_attn_mask(attn_mask_, query_.dtype()); - if ((!GradMode::is_enabled() || (!query_.requires_grad() && !key.requires_grad() && !value.requires_grad())) - && query_.device().type() == DeviceType::MPS && dropout_p == 0.0 - && query_.is_contiguous() && key.is_contiguous() && value.is_contiguous() - && !query_.is_nested() && !key.is_nested() && !value.is_nested()) { + case SDPBackend::math: { + const auto any_nested = query_.is_nested() || key.is_nested() || value.is_nested(); + const bool any_inputs_require_grad = query_.requires_grad() || key.requires_grad() || value.requires_grad(); + const auto all_contiguous = query_.is_contiguous() && key.is_contiguous() && value.is_contiguous(); + if (query_device_type == DeviceType::MPS && dropout_p == 0.0 + && !(GradMode::is_enabled() && any_inputs_require_grad) + && all_contiguous + && !any_nested) { return std::get<0>(at::_scaled_dot_product_attention_math_for_mps( query_, key, From d1c26b07819637415a8c5ead903968b462e02964 Mon Sep 17 00:00:00 2001 From: Thomas Bohnstingl Date: Tue, 5 Nov 2024 23:38:18 +0000 Subject: [PATCH 110/503] Improvements for associative_scan - slicing of xs (#138858) In this PR, the combine_fn is consistently called with a slice along the scan dim. It implements part of https://github.com/pytorch/pytorch/pull/136966 Pull Request resolved: https://github.com/pytorch/pytorch/pull/138858 Approved by: https://github.com/ydwu4 --- test/functorch/test_control_flow.py | 1392 +++++++++++++------ test/inductor/test_control_flow.py | 3 +- torch/_dynamo/trace_rules.py | 1 + torch/_dynamo/variables/higher_order_ops.py | 27 +- torch/_higher_order_ops/associative_scan.py | 138 +- torch/_higher_order_ops/utils.py | 5 + 6 files changed, 1057 insertions(+), 509 deletions(-) diff --git a/test/functorch/test_control_flow.py b/test/functorch/test_control_flow.py index 91561308177365..c798d32eedeea0 100644 --- a/test/functorch/test_control_flow.py +++ b/test/functorch/test_control_flow.py @@ -7,7 +7,10 @@ import torch.utils._pytree as pytree from functorch.experimental import control_flow from functorch.experimental.control_flow import cond, UnsupportedAliasMutationException -from torch._higher_order_ops.associative_scan import associative_scan +from torch._higher_order_ops.associative_scan import ( + _fake_associative_scan, + associative_scan, +) from torch._higher_order_ops.scan import _fake_scan, scan from torch._higher_order_ops.while_loop import while_loop from torch._subclasses.functional_tensor import ( @@ -86,34 +89,6 @@ def _fake_while_loop(cond_fn, body_fn, operands): return operands -def _fake_associative_scan(combine_fn, xs, dim, reverse=False): - inp_leaves, spec = pytree.tree_flatten(xs) - result_flat = [] - num_leaves = len(inp_leaves) - op = reversed if reverse else lambda x: x - - for ind in op(range(inp_leaves[0].size(dim))): - r = [ - inp_leaves[leave_ind][(slice(None),) * dim + (ind,)] - for leave_ind in range(num_leaves) - ] - if (ind > 0 and not reverse) or ( - ind < (inp_leaves[0].size(dim) - 1) and reverse - ): - r = combine_fn( - pytree.tree_unflatten(result_flat[-1], spec), - pytree.tree_unflatten(r, spec), - ) - r_flat, _ = pytree.tree_flatten(r) - result_flat.append(r_flat) - - results = [ - torch.stack([e[leave_ind] for e in op(result_flat)], dim) - for leave_ind in range(num_leaves) - ] - return pytree.tree_unflatten(results, spec) - - def compile_mode_helper(fct, compile_mode): if compile_mode == "compile": return torch.compile(fct, fullgraph=True, dynamic=False) @@ -1280,62 +1255,6 @@ def fwbw(map_op, f, x, y): fake_outs = fwbw(_fake_map, f, x, y) self.assertEqual(true_outs, fake_outs) - # TODO: provide an implementation for all compile modes and re-enable all test - @unittest.skipIf(not SM70OrLater, "triton") - @requires_cuda - @parametrize("reverse", [False, True]) - @parametrize("compile_mode", ["none", "compile", "compile_dynamic_shape"]) - @parametrize("combine_mode", ["pointwise", "generic"]) - @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) - # Skipping the combination of combine_mode=pointwise and device=cpu - # as the current implementation of pointwise does only support CUDA device - @decorateIf( - unittest.skip, - lambda params: ( - params["combine_mode"] == "pointwise" - and (params["device"] == torch.device("cpu") or torch.version.hip) - ), - ) - def test_associative_scan_compile( - self, combine_mode, reverse, compile_mode, device - ): - x = torch.randn(3, 10, 2, device=device) - - scan_fct = compile_mode_helper(associative_scan, compile_mode) - - for op, op_pt in [ - (get_scan_combine_fn("add", True), torch.cumsum), - (get_scan_combine_fn("mul", True), torch.cumprod), - ]: - result = scan_fct(op, x, 0, reverse=reverse, combine_mode=combine_mode) - result_exp = _fake_associative_scan(op, xs=x, dim=0, reverse=reverse) - self.assertEqual(result, result_exp) - if not reverse: - result_exp_PT = op_pt(x, 0) - self.assertEqual(result, result_exp_PT) - - # Jax Examples - x = torch.arange(0, 4, device=device) - cumsum1 = scan_fct( - get_scan_combine_fn("add", True), - x, - 0, - reverse=reverse, - combine_mode=combine_mode, - ) - cumsum_exp = _fake_associative_scan( - get_scan_combine_fn("add", True), x, 0, reverse=reverse - ) - if not reverse: - self.assertEqual( - cumsum1, torch.tensor([0.0, 1.0, 3.0, 6.0], dtype=torch.int64) - ) - else: - self.assertEqual( - cumsum1, torch.tensor([6.0, 6.0, 5.0, 3.0], dtype=torch.int64) - ) - self.assertEqual(cumsum1, cumsum_exp) - def test_scan_y_less_ndim_then_dim(self): def combine_fn(carry, x): return carry @ x, (carry @ x).sum() @@ -1520,46 +1439,6 @@ def test_scan_dtype(self, reverse, compile_mode, device, dtype): ], ) - @unittest.skipIf(not SM70OrLater, "triton") - @requires_cuda - @parametrize("reverse", [False, True]) - @parametrize("combine_mode", ["pointwise", "generic"]) - @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) - # Skipping the combination of combine_mode=pointwise and device=cpu - # as the current implementation of pointwise does only support CUDA device - @decorateIf( - unittest.skip, - lambda params: ( - params["combine_mode"] == "pointwise" - and (params["device"] == torch.device("cpu") or torch.version.hip) - ), - ) - def test_associative_scan_dim(self, combine_mode, reverse, device): - import random - - random.seed(10) - - num_dims = [random.randint(2, 5) for _ in range(10)] - for num_dim in num_dims: - shapes = [random.randint(1, 10) for _ in range(num_dim)] - rnd_scan_dim = random.randint(0, num_dim - 1) - x = torch.randn(*shapes, device=device) - - for op, op_pt in [ - (get_scan_combine_fn("add", True), torch.cumsum), - (get_scan_combine_fn("mul", True), torch.cumprod), - ]: - result = associative_scan( - op, x, rnd_scan_dim, reverse=reverse, combine_mode=combine_mode - ) - result_exp = _fake_associative_scan( - op, x, rnd_scan_dim, reverse=reverse - ) - self.assertEqual(result, result_exp) - if not reverse: - result_exp_PT = op_pt(x, rnd_scan_dim) - self.assertEqual(result, result_exp_PT) - @requires_cuda @parametrize("reverse", [False, True]) @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) @@ -1596,46 +1475,6 @@ def test_scan_dim(self, reverse, device): res_list[1] = res_list[1].movedim(0, rnd_scan_dim) self.assertEqual(res_list[1], result_exp_PT) - @skipIfRocm(msg="Unsupported on ROCM yet") - @unittest.skipIf(not SM70OrLater, "triton") - @requires_cuda - @parametrize("combine_mode", ["pointwise", "generic"]) - @parametrize("reverse", [False, True]) - @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) - # Skipping the combination of combine_mode=pointwise and device=cpu - # as the current implementation of pointwise does only support CUDA device - @decorateIf( - unittest.skip, - lambda params: ( - params["combine_mode"] == "pointwise" - and (params["device"] == torch.device("cpu") or torch.version.hip) - ), - ) - def test_associative_scan_binary_operator(self, combine_mode, reverse, device): - state_dim = 20 - timesteps = 10 - projected_inputs = torch.randn( - timesteps, state_dim, requires_grad=True, device=device - ) - A = torch.randn(state_dim, requires_grad=True, device=device) - elements = (A.repeat((timesteps, 1)), projected_inputs) - - result1 = associative_scan( - get_scan_combine_fn("s5_operator", True), - elements, - 0, - combine_mode=combine_mode, - reverse=reverse, - ) - expected_result = _fake_associative_scan( - get_scan_combine_fn("s5_operator", True), elements, 0, reverse=reverse - ) - self.assertEqual( - result1, - expected_result, - ) - self.assertEqual([r.device.type for r in result1], [device.type] * len(result1)) - @requires_cuda @parametrize("reverse", [False, True]) @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) @@ -1678,38 +1517,6 @@ def test_scan_binary_operator(self, reverse, device): ) self.assertEqual(result, expected_result) - @skipIfRocm(msg="Unsupported on ROCM yet") - @unittest.skipIf(not SM70OrLater, "triton") - @requires_cuda - @parametrize("combine_mode", ["pointwise", "generic"]) - @parametrize("reverse", [False, True]) - @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) - # Skipping the combination of combine_mode=pointwise and device=cpu - # as the current implementation of pointwise does only support CUDA device - @decorateIf( - unittest.skip, - lambda params: ( - params["combine_mode"] == "pointwise" - and (params["device"] == torch.device("cpu") or torch.version.hip) - ), - ) - def test_associative_scan_tuple(self, combine_mode, reverse, device): - x = torch.randn(3, 2, 2, device=device) - y = torch.randn(3, 2, 2, device=device) - inp = (x, y) - - result1 = associative_scan( - get_scan_combine_fn("tuple_fct", True), - inp, - 0, - reverse=reverse, - combine_mode=combine_mode, - ) - expected_result = _fake_associative_scan( - get_scan_combine_fn("tuple_fct", True), inp, 0, reverse=reverse - ) - self.assertEqual(result1, expected_result) - @skipIfRocm(msg="Unsupported on ROCM yet") @requires_cuda @parametrize("reverse", [False, True]) @@ -1751,72 +1558,6 @@ def fct_different_output_tuple(x, y): self.assertEqual(result_diff, expected_result) self.assertEqual(result_diff[1], result_same[1][1]) - @unittest.skipIf(not SM70OrLater, "triton") - @requires_cuda - @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) - def test_associative_scan_wrong_pytree(self, device): - def fct_wrong_pytree(x, y): - return { - "i": x["i"] * y["j"][0][0], - "k": 0.0, - "j": ([x["j"][1][0]["o"]], [{"o": torch.sin(x["i"])}]), - } - - x = torch.randn(3, 2, 2, device=device) - y = torch.randn(3, 2, 2, device=device) - z = torch.randn(3, 2, 2, device=device) - inp = {"i": x, "j": ([y], [{"o": z}])} - - with self.assertRaisesRegex( - # Should be: RuntimeError, - # r"The number of leaves of the pytree of the output of the operator - # needs to match the lenght of the pytree of the input", - torch._dynamo.exc.Unsupported, - "Observed exception.*", - ): - result = associative_scan(fct_wrong_pytree, inp, 0, combine_mode="generic") - - @unittest.skipIf(not SM70OrLater, "triton") - @requires_cuda - @parametrize("combine_mode", ["pointwise", "generic"]) - @parametrize("reverse", [False, True]) - @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) - # Skipping the combination of combine_mode=pointwise and device=cpu - # as the current implementation of pointwise does only support CUDA device - @decorateIf( - unittest.skip, - lambda params: ( - params["combine_mode"] == "pointwise" - and (params["device"] == torch.device("cpu") or torch.version.hip) - ), - ) - def test_associative_scan_complex_pytree(self, combine_mode, reverse, device): - def fct_pointwise(x, y): - return { - "i": x["i"] * y["i"], - "j": ( - [x["j"][0][0] * y["j"][0][0]], - [{"o": x["j"][1][0]["o"] + y["j"][1][0]["o"]}], - ), - } - - x = torch.randn(3, 2, 2, device=device) - y = torch.randn(3, 2, 2, device=device) - z = torch.randn(3, 2, 2, device=device) - inp = {"i": x, "j": ([y], [{"o": z}])} - - result = associative_scan( - get_scan_combine_fn("complex_pointwise", True), - inp, - 0, - combine_mode=combine_mode, - reverse=reverse, - ) - expected_result = _fake_associative_scan( - get_scan_combine_fn("complex_pointwise", True), inp, 0, reverse=reverse - ) - self.assertEqual(result, expected_result) - @requires_cuda def test_scan_wrong_pytree(self): # Init and input have same pytree @@ -1881,157 +1622,57 @@ def test_scan_complex_pytree(self, reverse, device): ) self.assertEqual(result, expected_result) - # TODO: provide an implementation for all compile modes and re-enable all test + # TODO: Does not work because of the usage of vmap witin associative_scan + # The parameterization is commented out for the moment and the test is marked with expected fail + # Fails with: AssertionError: scan is not an OpOverload + @skipIfRocm(msg="Unsupported on ROCM yet") @unittest.skipIf(not SM70OrLater, "triton") @requires_cuda - @parametrize("combine_mode", ["pointwise", "generic"]) - @parametrize("compile_mode", ["none", "compile", "compile_dynamic_shape"]) - @parametrize("reverse", [False, True]) - @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) - # Skipping the combination of combine_mode=pointwise and device=cpu - # as the current implementation of pointwise does only support CUDA device - @decorateIf( - unittest.skip, - lambda params: ( - params["combine_mode"] == "pointwise" - and (params["device"] == torch.device("cpu") or torch.version.hip) - ), - ) - def test_associative_scan_downstream_scan_matmul( - self, combine_mode, compile_mode, reverse, device - ): - # Chain with matmul - def chain_fct(inp): - W = torch.ones(2, 5, device=device) - o = associative_scan( + @unittest.expectedFailure + def test_scan_associative_scan(self): + combine_mode = "generic" + compile_mode_scan = "compile" + compile_mode_associative_scan = "none" + reverse = True + reverse_associative_scan = True + device = torch.device("cuda") + + scan_fct = compile_mode_helper(scan, compile_mode_scan) + associative_scan_fct = compile_mode_helper( + associative_scan, compile_mode_associative_scan + ) + init = torch.randn(10, 5, device=device) + inp = torch.randn(3, 10, 5, device=device) + + def body(x, y): + val = associative_scan_fct( get_scan_combine_fn("add", True), - inp, - 1, - reverse=reverse, + y, + 0, + reverse=reverse_associative_scan, combine_mode=combine_mode, ) - return o @ W + return x + y, x + val - fct_cmp = compile_mode_helper(chain_fct, compile_mode) + result = scan_fct(body, init, inp, dim=0, reverse=reverse) + expected_result = _fake_scan( + body, + init, + inp, + 0, + reverse=reverse, + ) - inp = torch.randn(3, 10, 2, device=device) - expected_result = _fake_associative_scan( - get_scan_combine_fn("add", True), inp, 1, reverse=reverse - ) @ torch.ones(2, 5, device=device) - result1 = fct_cmp(inp) - self.assertEqual(result1, expected_result) + self.assertEqual(result, expected_result) # TODO: provide an implementation for all compile modes and re-enable all test - @unittest.skipIf(not SM70OrLater, "triton") @requires_cuda - @parametrize("combine_mode", ["pointwise", "generic"]) - @parametrize("compile_mode", ["none", "compile", "compile_dynamic_shape"]) + @parametrize("compile_mode", ["none", "eager"]) @parametrize("reverse", [False, True]) @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) - # Skipping the combination of combine_mode=pointwise and device=cpu - # as the current implementation of pointwise does only support CUDA device - @decorateIf( - unittest.skip, - lambda params: ( - params["combine_mode"] == "pointwise" - and (params["device"] == torch.device("cpu") or torch.version.hip) - ), - ) - def test_associative_scan_downstream_scan_scan( - self, combine_mode, compile_mode, reverse, device - ): - # Chain with scan - def chain_fct_same_dim(inp): - o1 = associative_scan( - get_scan_combine_fn("add", True), - inp, - 1, - combine_mode=combine_mode, - reverse=reverse, - ) - o2 = associative_scan( - get_scan_combine_fn("add", True), - o1, - 1, - combine_mode=combine_mode, - reverse=reverse, - ) - return o2 - - fct_cmp = compile_mode_helper(chain_fct_same_dim, compile_mode) - + def test_scan_downstream_scan_matmul(self, compile_mode, reverse, device): inp = torch.randn(3, 10, 2, device=device) - - expected_result = _fake_associative_scan( - get_scan_combine_fn("add", True), - _fake_associative_scan( - get_scan_combine_fn("add", True), inp, 1, reverse=reverse - ), - 1, - reverse=reverse, - ) - result1 = fct_cmp(inp) - self.assertEqual(result1, expected_result) - - # TODO: provide an implementation for all compile modes and re-enable all test - @unittest.skipIf(not SM70OrLater, "triton") - @requires_cuda - @parametrize("combine_mode", ["pointwise", "generic"]) - @parametrize("compile_mode", ["none", "compile", "compile_dynamic_shape"]) - @parametrize("reverse", [False, True]) - @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) - # Skipping the combination of combine_mode=pointwise and device=cpu - # as the current implementation of pointwise does only support CUDA device - @decorateIf( - unittest.skip, - lambda params: ( - params["combine_mode"] == "pointwise" - and (params["device"] == torch.device("cpu") or torch.version.hip) - ), - ) - def test_associative_scan_downstream_scan_scan_different_dim( - self, combine_mode, compile_mode, reverse, device - ): - # Chain with scan on different dim - def chain_fct_different_dim(inp): - o1 = associative_scan( - get_scan_combine_fn("add", True), - inp, - 1, - combine_mode=combine_mode, - reverse=reverse, - ) - o2 = associative_scan( - get_scan_combine_fn("add", True), - o1, - 0, - combine_mode=combine_mode, - reverse=reverse, - ) - return o2 - - fct_cmp = compile_mode_helper(chain_fct_different_dim, compile_mode) - - inp = torch.randn(3, 10, 2, device=device) - expected_result = _fake_associative_scan( - get_scan_combine_fn("add", True), - _fake_associative_scan( - get_scan_combine_fn("add", True), inp, 1, reverse=reverse - ), - 0, - reverse=reverse, - ) - result1 = fct_cmp(inp) - self.assertEqual(result1, expected_result) - - # TODO: provide an implementation for all compile modes and re-enable all test - @requires_cuda - @parametrize("compile_mode", ["none", "eager"]) - @parametrize("reverse", [False, True]) - @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) - def test_scan_downstream_scan_matmul(self, compile_mode, reverse, device): - inp = torch.randn(3, 10, 2, device=device) - init = torch.randn(3, 2, device=device) + init = torch.randn(3, 2, device=device) for ind in range(2): # Chain with matmul @@ -2055,8 +1696,8 @@ def chain_fct(inp): dim=1, reverse=reverse, )[ind] @ torch.ones(2, 5, device=device) - result1 = fct_cmp(inp) - self.assertEqual(result1, expected_result) + result = fct_cmp(inp) + self.assertEqual(result, expected_result) # TODO: provide an implementation for all compile modes and re-enable all test @requires_cuda @@ -2105,57 +1746,8 @@ def chain_fct_different_dim(inp): dim=0, reverse=reverse, ) - result1 = fct_cmp(inp) - self.assertEqual(result1, expected_result) - - @unittest.skipIf(not SM70OrLater, "triton") - @requires_cuda - @parametrize("reverse", [False, True]) - @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) - # Skipping the combination of associative_scan and device=cpu - # as the current implementation of pointwise does only support CUDA device - @decorateIf( - unittest.skip, - lambda params: (params["device"] == torch.device("cpu")), - ) - def test_associative_scan_non_pointwise(self, reverse, device): - x = torch.randn(3, 10, 2, device=device) - # Expected to fail, as the pointwise combine_mode does not allow non-pointwise operations - with self.assertRaisesRegex( - Exception, - "For combine_mode='pointwise', the combine_fn needs to be pointwise", - ): - out = associative_scan( - get_scan_combine_fn("non_pointwise", True), - x, - 0, - reverse=reverse, - combine_mode="pointwise", - ) - - @unittest.skipIf(not SM70OrLater, "triton") - @requires_cuda - @parametrize("reverse", [False, True]) - @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) - # Skipping the combination of associative_scan and device=cpu - # as the current implementation of pointwise does only support CUDA device - @decorateIf( - unittest.skip, - lambda params: (params["device"] == torch.device("cpu")), - ) - def test_associative_scan_non_pointwise_generic(self, reverse, device): - x = torch.randn(3, 10, 2, device=device) - result_expected = _fake_associative_scan( - get_scan_combine_fn("non_pointwise", True), x, 0, reverse=reverse - ) - result1 = associative_scan( - get_scan_combine_fn("non_pointwise", True), - x, - 0, - reverse=reverse, - combine_mode="generic", - ) - self.assertEqual(result1, result_expected) + result = fct_cmp(inp) + self.assertEqual(result, expected_result) @requires_cuda @parametrize("reverse", [False, True]) @@ -2811,6 +2403,897 @@ def forward(self, L_init_ : torch.Tensor, L_xs_ : torch.Tensor): ) +class AssociativeScanModels: + @staticmethod + def get_scan_fct(compile_mode, combine_mode): + # Compile the associative_scan according to the provided compile_mode + if compile_mode != "fake": + compile_mode = "none" + assoc_scan_comp = compile_mode_helper(associative_scan, compile_mode) + + def scan_fct(combine_fn, xs, dim, reverse): + return assoc_scan_comp(combine_fn, xs, dim, reverse, combine_mode) + + else: + scan_fct = _fake_associative_scan + return scan_fct + + class CombineFn(torch.nn.Module): + def __init__(self, combine_fn, dim, reverse, combine_mode, compile_mode): + super().__init__() + + self.scan_fct = AssociativeScanModels.get_scan_fct( + compile_mode, combine_mode + ) + self.combine_fn = combine_fn + self.dim = dim + self.reverse = reverse + + def forward(self, inputs): + results = self.scan_fct(self.combine_fn, inputs, self.dim, self.reverse) + return results + + class Simple(torch.nn.Module): + def __init__(self, dim, reverse, combine_mode, compile_mode): + super().__init__() + + kwargs = { + "dim": dim, + "reverse": reverse, + "combine_mode": combine_mode, + "compile_mode": compile_mode, + } + self.combine_fns = [ + AssociativeScanModels.CombineFn( + get_scan_combine_fn("add", True), **kwargs + ), + AssociativeScanModels.CombineFn( + get_scan_combine_fn("mul", True), **kwargs + ), + ] + + def forward(self, inputs): + results = [] + for combine_fn in self.combine_fns: + results.append(combine_fn(inputs)) + return results + + class ChainFn(torch.nn.Module): + def __init__(self, combine_fn, dim, reverse, combine_mode, compile_mode): + super().__init__() + + chain_len = len(combine_fn) + kwargs = { + "combine_fn": combine_fn, + "dim": dim, + "reverse": reverse, + "combine_mode": combine_mode, + } + + # Prepare the kwargs as a list. + self.nested_tuple = [] + for ind in range(chain_len): + kwargs_el = {} + for key, val in kwargs.items(): + # Check if val is a list and if it has the same length as combine_fn + # If so, then use the individual elements. + # If not, duplicate the first element. + if type(val) == list and len(val) == chain_len: + kwargs_el[key] = val[ind] + else: + kwargs_el[key] = val + + scan_fct = AssociativeScanModels.get_scan_fct( + compile_mode, kwargs_el["combine_mode"] + ) + combine_fn = kwargs_el["combine_fn"] + del kwargs_el["combine_fn"] + del kwargs_el["combine_mode"] + self.nested_tuple.append((combine_fn, scan_fct, kwargs_el)) + + def forward(self, inputs): + results = inputs + for combine_fn, scan_fct, kwargs in self.nested_tuple: + results = combine_fn(scan_fct, results, **kwargs) + return results + + class NestedFn(torch.nn.Module): + def forward(self, scan_fct, inputs, **kwargs): + combine_fn = kwargs["combine_fn"] + + # Remove combine_fn from kwargs + del kwargs["combine_fn"] + + results = scan_fct(combine_fn, inputs, **kwargs) + + return results + + +@unittest.skipIf(IS_WINDOWS, "Windows not supported for this test") +@skipIfNoDynamoSupport +class AssociativeScanTests(TestCase): + def setUp(self): + torch._dynamo.reset() + super().setUp() + + def _run_test(self, model, model_fake, inputs): + result = model(inputs) + result_exp = model_fake(inputs) + self.assertEqual(result, result_exp) + + # Return the result of the functions under test for further investigations + return result + + def _prepare_fake_kwargs(self, original_kwargs): + kwargs_fake = original_kwargs.copy() + kwargs_fake["compile_mode"] = "fake" + return kwargs_fake + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("reverse", [False, True]) + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("combine_mode", ["pointwise", "generic"]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + # Skipping the combination of combine_mode=pointwise and device=cpu + # as the current implementation of pointwise does only support CUDA device + @decorateIf( + unittest.skip, + lambda params: ( + params["combine_mode"] == "pointwise" + and (params["device"] == torch.device("cpu") or torch.version.hip) + ), + ) + def test_associative_scan_compile( + self, combine_mode, reverse, compile_mode, device + ): + x = torch.randn(3, 10, 2, device=device) + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_mode": combine_mode, + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + results = self._run_test( + model=AssociativeScanModels.Simple(**kwargs), + model_fake=AssociativeScanModels.Simple(**kwargs_fake), + inputs=x, + ) + + if not reverse: + results_torch = [] + for op_pt in [torch.cumsum, torch.cumprod]: + results_torch.append(op_pt(x, 0)) + self.assertEqual(results, results_torch) + + # Jax Examples + x = torch.arange(0, 4, device=device) + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": get_scan_combine_fn("add", True), + "combine_mode": combine_mode, + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + result = self._run_test( + model=AssociativeScanModels.CombineFn(**kwargs), + model_fake=AssociativeScanModels.CombineFn(**kwargs_fake), + inputs=x, + ) + + if not reverse: + results_torch = torch.tensor([0.0, 1.0, 3.0, 6.0], dtype=torch.int64) + else: + results_torch = torch.tensor([6.0, 6.0, 5.0, 3.0], dtype=torch.int64) + + self.assertEqual(result, results_torch) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("reverse", [False, True]) + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("combine_mode", ["pointwise", "generic"]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + # Skipping the combination of combine_mode=pointwise and device=cpu + # as the current implementation of pointwise does only support CUDA device + @decorateIf( + unittest.skip, + lambda params: ( + params["combine_mode"] == "pointwise" + and (params["device"] == torch.device("cpu") or torch.version.hip) + ), + ) + def test_associative_scan_dim(self, combine_mode, compile_mode, reverse, device): + import random + + random.seed(1234) + + num_dims = [random.randint(2, 5) for _ in range(4)] + for num_dim in num_dims: + shapes = [random.randint(1, 9) for _ in range(num_dim)] + rnd_scan_dim = random.randint(0, num_dim - 1) + x = torch.randn(*shapes, device=device) + + kwargs = { + "dim": rnd_scan_dim, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_mode": combine_mode, + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + results = self._run_test( + model=AssociativeScanModels.Simple(**kwargs), + model_fake=AssociativeScanModels.Simple(**kwargs_fake), + inputs=x, + ) + + if not reverse: + results_torch = [] + for op_pt in [torch.cumsum, torch.cumprod]: + results_torch.append(op_pt(x, 0)) + self.assertEqual(results, results_torch) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + # This test is expected to fail, as there may be an issue with the underlying triton implementation + # See https://github.com/pytorch/pytorch/issues/137943 + @unittest.expectedFailure + def test_associative_scan_dim_shape_failure(self): + num_dims = [2] + for num_dim in num_dims: + shapes = [9 for _ in range(num_dim)] + rnd_scan_dim = 0 + x = torch.randn(*shapes, device=torch.device("cuda")) + + kwargs = { + "dim": rnd_scan_dim, + "reverse": True, + "compile_mode": "none", + "combine_mode": "generic", + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.Simple(**kwargs), + model_fake=AssociativeScanModels.Simple(**kwargs_fake), + inputs=x, + ) + + @skipIfRocm(msg="Unsupported on ROCM yet") + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("combine_mode", ["pointwise", "generic"]) + @parametrize("reverse", [False, True]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + # Skipping the combination of combine_mode=pointwise and device=cpu + # as the current implementation of pointwise does only support CUDA device + @decorateIf( + unittest.skip, + lambda params: ( + params["combine_mode"] == "pointwise" + and (params["device"] == torch.device("cpu") or torch.version.hip) + ), + ) + def test_associative_scan_tuple(self, compile_mode, combine_mode, reverse, device): + x = torch.randn(3, 2, 2, device=device) + y = torch.randn(3, 2, 2, device=device) + inp = (x, y) + + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": get_scan_combine_fn("tuple_fct", True), + "combine_mode": combine_mode, + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.CombineFn(**kwargs), + model_fake=AssociativeScanModels.CombineFn(**kwargs_fake), + inputs=inp, + ) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("combine_mode", ["pointwise", "generic"]) + @parametrize("reverse", [False, True]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + def test_associative_scan_expand_in_combine_fn( + self, compile_mode, combine_mode, reverse, device + ): + x = torch.randn(3, 2, 2, device=device) + + def combine_fn(x, y): + return x * torch.sum(y, -1).expand(x.shape) + + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": combine_fn, + "combine_mode": "generic", + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.CombineFn(**kwargs), + model_fake=AssociativeScanModels.CombineFn(**kwargs_fake), + inputs=x, + ) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("reverse", [False, True]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + def test_associative_scan_non_contiguous_tensor( + self, compile_mode, reverse, device + ): + x = torch.arange(30, device=device).view(10, 3).t() + assert not x.is_contiguous() + + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": get_scan_combine_fn("add", True), + "combine_mode": "generic", + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.CombineFn(**kwargs), + model_fake=AssociativeScanModels.CombineFn(**kwargs_fake), + inputs=x, + ) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("combine_mode", ["pointwise", "generic"]) + @parametrize("reverse", [False, True]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + # Skipping the combination of combine_mode=pointwise and device=cpu + # as the current implementation of pointwise does only support CUDA device + @decorateIf( + unittest.skip, + lambda params: ( + params["combine_mode"] == "pointwise" + and (params["device"] == torch.device("cpu") or torch.version.hip) + ), + ) + def test_associative_scan_complex_pytree( + self, compile_mode, combine_mode, reverse, device + ): + x = torch.randn(3, 2, 2, device=device) + y = torch.randn(3, 2, 2, device=device) + z = torch.randn(3, 2, 2, device=device) + inp = {"i": x, "j": ([y], [{"o": z}])} + + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": get_scan_combine_fn("complex_pointwise", True), + "combine_mode": combine_mode, + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.CombineFn(**kwargs), + model_fake=AssociativeScanModels.CombineFn(**kwargs_fake), + inputs=inp, + ) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("combine_mode", ["pointwise", "generic"]) + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("reverse", [False, True]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + # Skipping the combination of combine_mode=pointwise and device=cpu + # as the current implementation of pointwise does only support CUDA device + @decorateIf( + unittest.skip, + lambda params: ( + params["combine_mode"] == "pointwise" + and (params["device"] == torch.device("cpu") or torch.version.hip) + ), + ) + def test_associative_scan_downstream_scan_matmul( + self, combine_mode, compile_mode, reverse, device + ): + def first_chain_fct(scan_fct, inp, **kwargs): + o = scan_fct(get_scan_combine_fn("add", True), inp, **kwargs) + return o + + def second_chain_fct(scan_fct, inp, **kwargs): + W = torch.ones(2, 5, device=device) + return inp @ W + + inp = torch.randn(3, 10, 2, device=device) + kwargs = { + "dim": 1, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": [first_chain_fct, second_chain_fct], + "combine_mode": combine_mode, + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.ChainFn(**kwargs), + model_fake=AssociativeScanModels.ChainFn(**kwargs_fake), + inputs=inp, + ) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("combine_mode", ["pointwise", "generic"]) + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("reverse", [False, True]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + # Skipping the combination of combine_mode=pointwise and device=cpu + # as the current implementation of pointwise does only support CUDA device + @decorateIf( + unittest.skip, + lambda params: ( + params["combine_mode"] == "pointwise" + and (params["device"] == torch.device("cpu") or torch.version.hip) + ), + ) + def test_associative_scan_downstream_scan_scan( + self, combine_mode, compile_mode, reverse, device + ): + def first_chain_fct(scan_fct, inp, **kwargs): + o1 = scan_fct(get_scan_combine_fn("add", True), inp, **kwargs) + return o1 + + def second_chain_fct(scan_fct, inp, **kwargs): + o2 = scan_fct(get_scan_combine_fn("add", True), inp, **kwargs) + return o2 + + inp = torch.randn(3, 10, 2, device=device) + + kwargs = { + "dim": 1, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": [first_chain_fct, second_chain_fct], + "combine_mode": combine_mode, + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.ChainFn(**kwargs), + model_fake=AssociativeScanModels.ChainFn(**kwargs_fake), + inputs=inp, + ) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("combine_mode", ["pointwise", "generic"]) + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("reverse_first", [False, True]) + @parametrize("same_direction", [False, True]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + # Skipping the combination of combine_mode=pointwise and device=cpu + # as the current implementation of pointwise does only support CUDA device + @decorateIf( + unittest.skip, + lambda params: ( + params["combine_mode"] == "pointwise" + and (params["device"] == torch.device("cpu") or torch.version.hip) + ), + ) + def test_associative_scan_downstream_scan_scan_different_dim( + self, combine_mode, compile_mode, reverse_first, same_direction, device + ): + reverse_second = reverse_first if same_direction else not reverse_first + + def first_chain_fct(scan_fct, inp, **kwargs): + o1 = scan_fct(get_scan_combine_fn("add", True), inp, **kwargs) + return o1 + + def second_chain_fct(scan_fct, inp, **kwargs): + o2 = scan_fct(get_scan_combine_fn("add", True), inp, **kwargs) + return o2 + + inp = torch.randn(3, 10, 2, device=device) + + kwargs = { + "dim": [1, 0], + "reverse": [reverse_first, reverse_second], + "compile_mode": compile_mode, + "combine_fn": [first_chain_fct, second_chain_fct], + "combine_mode": [combine_mode, combine_mode], + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.ChainFn(**kwargs), + model_fake=AssociativeScanModels.ChainFn(**kwargs_fake), + inputs=inp, + ) + + # TODO: Does not work because of the usage of vmap witin associative_scan + # TODO: Re-enable additional parameters again once this issues has been resolved + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @unittest.expectedFailure + def test_associative_scan_nested(self): + combine_mode = "pointwise" + compile_mode = "eager" + reverse_first = False + same_direction = False + device = torch.device("cuda") + + reverse_second = reverse_first if same_direction else not reverse_first + + def first_nested_fct(x, y): + y_new = associative_scan( + second_nested_fct, + y, + 0, + reverse=reverse_second, + combine_mode=combine_mode, + ) + return x + y_new + + def first_nested_fct_fake(x, y): + y_new = _fake_associative_scan( + second_nested_fct, y, 0, reverse=reverse_second + ) + return x + y_new + + def second_nested_fct(x, y): + return x * y + + inp = torch.randn(3, 10, 2, device=device) + + kwargs = { + "dim": 0, + "reverse": reverse_first, + "compile_mode": compile_mode, + "combine_fn": first_nested_fct, + "combine_mode": combine_mode, + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + kwargs_fake["combine_fn"] = first_nested_fct_fake + self._run_test( + model=AssociativeScanModels.NestedFn(**kwargs), + model_fake=AssociativeScanModels.NestedFn(**kwargs_fake), + inputs=inp, + ) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("loop_type", ["for"]) + @parametrize("reverse", [False, True]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + def test_associative_scan_loop_in_combine_fn( + self, compile_mode, loop_type, reverse, device + ): + def combine_fn(x, y): + cnt = torch.zeros_like(y[0, :]) + if loop_type == "while": + + def cond_fn(ind, loop_val): + return (loop_val < 5)[0] + + def body_fn(ind, loop_val): + return ind + 1, loop_val + torch.abs(ind) + + new_ind, cnt = torch.while_loop( + cond_fn=cond_fn, + body_fn=body_fn, + carried_inputs=( + torch.zeros(1, dtype=torch.int32, device=cnt.device), + cnt, + ), + ) + else: + for ind in range(10): + cnt += torch.abs(y[ind]) + return x * cnt + + inp = torch.randn(3, 10, 1, device=device) * 2 + + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": combine_fn, + "combine_mode": "generic", + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.CombineFn(**kwargs), + model_fake=AssociativeScanModels.CombineFn(**kwargs_fake), + inputs=inp, + ) + + # TODO: Does not work because of the usage of vmap witin associative_scan + # TODO: Re-enable additional parameters again once this issues has been resolved + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @unittest.expectedFailure + def test_associative_scan_loop_in_combine_fn_failure(self): + compile_mode = "none" + loop_type = "while" + reverse = False + device = torch.device("cuda") + + def combine_fn(x, y): + cnt = torch.zeros_like(y[0, :]) + if loop_type == "while": + + def cond_fn(ind, loop_val): + return (loop_val < 5)[0] + + def body_fn(ind, loop_val): + return ind + 1, loop_val + torch.abs(ind) + + inp = torch.randn(3, 10, 1, device=device) * 2 + + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": combine_fn, + "combine_mode": "generic", + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.CombineFn(**kwargs), + model_fake=AssociativeScanModels.CombineFn(**kwargs_fake), + inputs=inp, + ) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("reverse", [False, True]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + def test_associative_scan_cond_in_combine_fn(self, compile_mode, reverse, device): + def combine_fn(x, y): + val = cond(torch.sum(y) > 0.0, lambda y: y + 0.0, lambda y: 1.0 - y, (y,)) + return x * val + + inp = torch.randn(3, 10, 1, device=device) + + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": combine_fn, + "combine_mode": "generic", + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.CombineFn(**kwargs), + model_fake=AssociativeScanModels.CombineFn(**kwargs_fake), + inputs=inp, + ) + + # TODO: Does not work because of the usage of vmap witin associative_scan + # TODO: Re-enable additional parameters again once this issues has been resolved + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @unittest.expectedFailure + def test_associative_scan_map_in_combine_fn(self): + compile_mode = "none" + reverse = False + device = torch.device("cuda") + + def combine_fn(x, y): + def body(x, y): + return x + y + + y_init = y[0] + y_new = control_flow.map(body, y, y_init) + return x * y_new + + inp = torch.randn(3, 10, 1, device=device) + + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": combine_fn, + "combine_mode": "generic", + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.CombineFn(**kwargs), + model_fake=AssociativeScanModels.CombineFn(**kwargs_fake), + inputs=inp, + ) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("reverse", [False, True]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + def test_associative_scan_vmap_in_combine_fn(self, compile_mode, reverse, device): + def combine_fn(x, y): + def body(x): + return x**2 + + mapped_body = torch.vmap(body, 0, 0) + y_new = mapped_body(y) + return x + y_new + + inp = torch.randn(3, 10, 2, device=device) + + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": combine_fn, + "combine_mode": "generic", + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.CombineFn(**kwargs), + model_fake=AssociativeScanModels.CombineFn(**kwargs_fake), + inputs=inp, + ) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("reverse", [False, True]) + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + # Skipping the combination of associative_scan and device=cpu + # as the current implementation of pointwise does only support CUDA device + @decorateIf( + unittest.skip, + lambda params: (params["device"] == torch.device("cpu")), + ) + def test_associative_scan_non_pointwise_generic( + self, reverse, compile_mode, device + ): + x = torch.randn(3, 10, 2, device=device) + + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": get_scan_combine_fn("non_pointwise", True), + "combine_mode": "generic", + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.CombineFn(**kwargs), + model_fake=AssociativeScanModels.CombineFn(**kwargs_fake), + inputs=x, + ) + + @skipIfRocm(msg="Unsupported on ROCM yet") + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + @parametrize("compile_mode", ["none", "eager", "compile", "compile_dynamic_shape"]) + @parametrize("combine_mode", ["pointwise", "generic"]) + @parametrize("reverse", [False, True]) + @parametrize("device", [torch.device("cpu"), torch.device("cuda")]) + # Skipping the combination of combine_mode=pointwise and device=cpu + # as the current implementation of pointwise does only support CUDA device + @decorateIf( + unittest.skip, + lambda params: ( + params["combine_mode"] == "pointwise" + and (params["device"] == torch.device("cpu") or torch.version.hip) + ), + ) + def test_associative_scan_binary_operator( + self, compile_mode, combine_mode, reverse, device + ): + state_dim = 20 + timesteps = 10 + projected_inputs = torch.randn( + timesteps, state_dim, requires_grad=True, device=device + ) + A = torch.randn(state_dim, requires_grad=True, device=device) + elements = (A.repeat((timesteps, 1)), projected_inputs) + + kwargs = { + "dim": 0, + "reverse": reverse, + "compile_mode": compile_mode, + "combine_fn": get_scan_combine_fn("s5_operator", True), + "combine_mode": combine_mode, + } + kwargs_fake = self._prepare_fake_kwargs(kwargs) + self._run_test( + model=AssociativeScanModels.CombineFn(**kwargs), + model_fake=AssociativeScanModels.CombineFn(**kwargs_fake), + inputs=elements, + ) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + def test_associative_scan_sparse_tensor(self): + x = torch.tensor( + [[[0.0, 0], [1.0, 2.0]], [[0.0, 0], [3.0, 4.0]], [[0.0, 0], [5.0, 6.0]]] + ).to_sparse() + + with self.assertRaisesRegex( + RuntimeError, + "torch.compile does not support sparse Tensors", + ): + result = associative_scan( + get_scan_combine_fn("add", True), + x, + 0, + ) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + def test_associative_scan_combine_fn_wrong_meta_in_combine_fn(self): + device = torch.device("cuda") + B, N, C, H, W = 3, 3, 2, 3, 3 + x = torch.randn(B, N, C, H, W, device=device) + + def fct_wrong_dtype(x, y): + return (x + y).to(torch.int64) + + def fct_wrong_device(x, y): + return (x + y).to( + torch.device("cpu") if device.type == "cuda" else torch.device("cuda") + ) + + def fct_wrong_stride(x, y): + return (x + y).to(memory_format=torch.channels_last) + + for fct in [fct_wrong_dtype, fct_wrong_device, fct_wrong_stride]: + with self.assertRaisesRegex( + # Should be: RuntimeError, + # "The pytree of the output of the operator needs to match the xs pytree" + torch._dynamo.exc.Unsupported, + "Observed exception.*", + ): + result = associative_scan(fct, x, 0) + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + def test_associative_scan_wrong_pytree(self): + def fct_wrong_pytree(x, y): + return { + "i": x["i"] * y["j"][0][0], + "k": 0.0, + "j": ([x["j"][1][0]["o"]], [{"o": torch.sin(x["i"])}]), + } + + x = torch.randn(3, 2, 2) + y = torch.randn(3, 2, 2) + z = torch.randn(3, 2, 2) + inp = {"i": x, "j": ([y], [{"o": z}])} + + with self.assertRaisesRegex( + # Should be: RuntimeError, + # r"The number of leaves of the pytree of the output of the operator + # needs to match the lenght of the pytree of the input", + torch._dynamo.exc.Unsupported, + "Observed exception.*", + ): + result = associative_scan(fct_wrong_pytree, inp, 0, combine_mode="generic") + + @unittest.skipIf(not SM70OrLater, "triton") + @requires_cuda + def test_associative_scan_non_pointwise(self): + x = torch.randn(3, 10, 2, device=torch.device("cuda")) + # Expected to fail, as the pointwise combine_mode does not allow non-pointwise operations + with self.assertRaisesRegex( + Exception, + "For combine_mode='pointwise', the combine_fn needs to be pointwise", + ): + out = associative_scan( + get_scan_combine_fn("non_pointwise", True), + x, + 0, + combine_mode="pointwise", + ) + + @unittest.skipIf(IS_WINDOWS, "Windows not supported for this test") @skipIfNoDynamoSupport class TestControlFlowTraced(TestCase): @@ -5567,6 +6050,7 @@ def forward(self, a, b1, b2, c): instantiate_parametrized_tests(TestControlFlowTraced) instantiate_parametrized_tests(TestControlFlow) +instantiate_parametrized_tests(AssociativeScanTests) if __name__ == "__main__": run_tests() diff --git a/test/inductor/test_control_flow.py b/test/inductor/test_control_flow.py index ae7d51264244c9..b24c524ec4a135 100644 --- a/test/inductor/test_control_flow.py +++ b/test/inductor/test_control_flow.py @@ -829,7 +829,8 @@ def test_associative_scan_CUDA_flip(self, combine_mode, backend, device): def fct(x: torch.Tensor, y: torch.Tensor): return x + y - for n in range(10): + # for n in range(10): + for n in [9]: x = torch.arange(n, device=device) torch.compiler.reset() associative_scan1 = torch.compile( diff --git a/torch/_dynamo/trace_rules.py b/torch/_dynamo/trace_rules.py index b46d8719c54c61..b2c7d39c11f4e5 100644 --- a/torch/_dynamo/trace_rules.py +++ b/torch/_dynamo/trace_rules.py @@ -3216,6 +3216,7 @@ def _module_dir(m: types.ModuleType): "torch._higher_order_ops.while_loop", "torch._higher_order_ops.associative_scan", "torch._higher_order_ops.scan", + "torch._higher_order_ops.utils", "torch.nn.attention.flex_attention", "torch.ao.quantization.pt2e.export_utils", "torch.ao.quantization.pt2e.qat_utils", diff --git a/torch/_dynamo/variables/higher_order_ops.py b/torch/_dynamo/variables/higher_order_ops.py index 6a6f3e51186474..e73f90810b04be 100644 --- a/torch/_dynamo/variables/higher_order_ops.py +++ b/torch/_dynamo/variables/higher_order_ops.py @@ -1062,6 +1062,8 @@ def call_function( args: List[VariableTracker], kwargs: Dict[str, VariableTracker], ) -> VariableTracker: + from torch._higher_order_ops.utils import first_slice_copy + from .builder import wrap_fx_proxy args, kwargs = LazyVariableTracker.realize_all((args, kwargs)) @@ -1079,29 +1081,10 @@ def arg_extractor(combine_fn, xs, dim): # Trace the subgraph # TODO: Fix these pointless new_empty calls appearing in the dynamo output graph. + # The sub_args is a slice of original input, e.g. if input.size is (3, 4), and scan dim=0 + # the sub_args shape will be (4, ). sub_args = [ - leaf.call_method( - tx, - "new_empty", - args=( - VariableTracker.build( - tx, - ( - leaf.size - if leaf.size is not None - else BuiltinVariable(getattr) - .call_function( - tx, [leaf, ConstantVariable.create("shape")], {} - ) - .items - ), - ), - ), - kwargs={ - "dtype": VariableTracker.build(tx, leaf.dtype), - "requires_grad": VariableTracker.build(tx, leaf.requires_grad), - }, - ) + _make_inlined(tx, first_slice_copy)(leaf, dim) for leaf in itertools.chain(xs.items, xs.items) ] ( diff --git a/torch/_higher_order_ops/associative_scan.py b/torch/_higher_order_ops/associative_scan.py index d58d6b26bd33f7..c59ce340cb8091 100644 --- a/torch/_higher_order_ops/associative_scan.py +++ b/torch/_higher_order_ops/associative_scan.py @@ -1,7 +1,7 @@ # mypy: allow-untyped-defs import functools import itertools -from typing import Callable, List +from typing import Any, Callable, List import torch import torch._prims_common as utils @@ -12,6 +12,7 @@ _maybe_run_with_interpreter, _set_compilation_env, autograd_not_implemented, + first_slice_copy, reenter_make_fx, unique_graph_id, ) @@ -124,11 +125,11 @@ def add(x: torch.Tensor, y: torch.Tensor): """ if not callable(combine_fn): - raise RuntimeError("Combine_fn must be a callable, but got {combine_fn}") + raise ValueError("Combine_fn must be a callable, but got {combine_fn}") if not isinstance(dim, int): - raise RuntimeError("Dim must be an int, but got " + str(type(dim))) + raise ValueError("Dim must be an int, but got " + str(type(dim))) if combine_mode not in ["pointwise", "generic"]: - raise RuntimeError( + raise ValueError( "Combine_mode must either 'pointwise' or 'generic', but got {combine_mode}" ) @@ -146,41 +147,94 @@ def add(x: torch.Tensor, y: torch.Tensor): ) if len(leaves) == 0: - raise RuntimeError("Expected at least 1 xs leaf") + raise ValueError("Expected at least 1 xs leaf") if any(not isinstance(x, torch.Tensor) for x in leaves): - raise RuntimeError("xs leaves must be a Tensor") + raise ValueError("xs leaves must be a Tensor") + if any(x.is_sparse for x in leaves): + raise ValueError("xs leaves must dense Tensors, consider using `to_dense()`") + if any(x.ndim < dim for x in leaves): + raise ValueError( + "All xs leaves must at least have 'dim' number of dimensions and scan dimension > 0" + ) + if any(x.shape[dim] == 0 for x in leaves): + raise ValueError( + "All xs leaves must at least have 'dim' number of dimensions and scan dimension > 0" + ) if reverse: leaves = [torch.flip(elem, [dim]) for elem in leaves] - shape = leaves[0].shape - ndim = len(shape) + ndim = leaves[0].ndim dim = utils.canonicalize_dim(ndim, dim) + shape = leaves[0].shape for x in leaves[1:]: assert x.shape == shape, "All xs tensors must have the same shape" + # Call the combine_fn with only a slice along the scan dim + # and check whether the output leaves have the same slice dimensions + sliced_leaves = [first_slice_copy(leaf, dim) for leaf in leaves] + sliced_shape = sliced_leaves[0].shape + out = combine_fn( - pytree.tree_unflatten(leaves, spec), - pytree.tree_unflatten(leaves, spec), + pytree.tree_unflatten(sliced_leaves, spec), + pytree.tree_unflatten(sliced_leaves, spec), ) - out_leaves, tree_out = pytree.tree_flatten(out) + out_leaves = pytree.tree_leaves(out) if len(leaves) != len(out_leaves): raise RuntimeError( "The number of leaves of the pytree of the output of the operator needs to match the length of the pytree of the input" ) - if any(x.shape != shape for x in out_leaves): + if any( + x.shape != sliced_shape + or x.dtype != x_sliced.dtype + or x.device != x_sliced.device + or x.stride() != x_sliced.stride() + for x, x_sliced in zip(out_leaves, sliced_leaves) + ): raise RuntimeError( - "The pytree of the output of the operator needs to match the xs pytree" + f"The metadata of the output of the operator needs to match the meta data of the xs pytree" + f"\n xs metadata : {[(x.shape, x.dtype, x.device, x.stride()) for x in sliced_leaves]}" + f"\n operator output metadata: {[(x.shape, x.dtype, x.device, x.stride()) for x in out_leaves]}" ) - combine_fn = functools.partial( - wrap_combine_fn_flat, combine_fn=combine_fn, spec=spec, num_leaves=len(leaves) - ) - if combine_mode == "generic": + # The generic_associative_scan implementation calls the combine_fn with a `batch` along the scan dimension + # For example, consider: + # def add(x: torch.Tensor, y: torch.Tensor): + # return x + y + # leaves = torch.tensor([[0.0, 1.0, 2.0, 3.0] + # [0.0, 1.0, 2.0, 3.0]]) + # which has shape 2 x 4; + # dim = 1; + # In the first iteration of `_scan` the combine_fn gets invoked with + # combine_fn([torch.tensor([[0.0, 2.0], + # [0.0, 2.0]])], + # [torch.tensor([[1.0, 3.0], + # [1.0, 3.0]])]) + # The arguments are of shape 2 x 2, but can be evaluated in parallel along the scan dimension. + # TODO: In case of the additional inputs, we the in_dims should be set to None + combine_fn = functools.partial( + wrap_combine_fn_flat, + combine_fn=torch.vmap( + combine_fn, + in_dims=( + pytree.tree_unflatten([dim] * len(leaves), spec), + pytree.tree_unflatten([dim] * len(leaves), spec), + ), + out_dims=dim, + ), + spec=spec, + num_leaves=len(leaves), + ) result_flat = generic_associative_scan(combine_fn, leaves, dim) else: + combine_fn = functools.partial( + wrap_combine_fn_flat, + combine_fn=combine_fn, + spec=spec, + num_leaves=len(leaves), + ) result_flat = associative_scan_op(combine_fn, leaves, dim) if reverse: @@ -189,10 +243,10 @@ def add(x: torch.Tensor, y: torch.Tensor): return pytree.tree_unflatten(result_flat, spec) -def generic_associative_scan(operator, elems_flat, dim=0): +def generic_associative_scan(operator, leaves, dim=0): r""" This function performs the associative_scan operation. - The algorithm works by recursively collecting neighbours of ``elems_flat`` and subsequently + The algorithm works by recursively collecting neighbours of ``leaves`` and subsequently applying the ``operator`` on all pairs in parallel along ``dim``. The results of the recursive calls are later combined. @@ -200,7 +254,7 @@ def generic_associative_scan(operator, elems_flat, dim=0): operator (Callable): A binary callable with type ``(Tensor, Tensor) -> Tensor``, or if input is a pytree ``(pytree, pytree) -> pytree``. This function must be pure, pointwise, and satisfy the associative property. - elems_flat (torch.Tensor): A list of torch.Tensors converted from the pytree of + leaves (torch.Tensor): A list of torch.Tensors converted from the pytree of ``xs`` provided to ``associative_scan``. All inputs are expected to have the same shape. dim (int): the dimension to scan over @@ -211,7 +265,7 @@ def generic_associative_scan(operator, elems_flat, dim=0): def add(x: torch.Tensor, y: torch.Tensor): return x + y - elems_flat = torch.tensor([0.0, 1.0, 2.0, 3.0]) + leaves = torch.tensor([0.0, 1.0, 2.0, 3.0]) First iteration of _scan -> # odd_elems -> apply operator on all neighbours @@ -280,7 +334,7 @@ def _scan(elems): safe_map(functools.partial(_interleave, dim=dim), even_elems, odd_elems) ) - scans = _scan(elems_flat) + scans = _scan(leaves) return scans @@ -289,15 +343,7 @@ def trace_associative_scan( proxy_mode, func_overload, combine_fn: Callable, xs: List[torch.Tensor], dim: int ): with disable_proxy_modes_tracing(): - sample_xs = [ - torch.empty_like( - x, - dtype=x.dtype, - device=x.device, - requires_grad=x.requires_grad, - ) - for x in itertools.chain(xs, xs) - ] + sample_xs = [first_slice_copy(x, dim) for x in itertools.chain(xs, xs)] combine_graph = reenter_make_fx(combine_fn)(*sample_xs) outputs = None @@ -342,7 +388,7 @@ def trace_associative_scan( @associative_scan_op.py_impl(DispatchKey.CompositeExplicitAutograd) def associative_scan_op_dense(combine_fn, xs, dim): - raise NotImplementedError("associative_scan is not implemented for eager") + return generic_associative_scan(combine_fn, xs, dim) associative_scan_op.py_impl(DispatchKey.Autograd)( @@ -370,3 +416,31 @@ def associative_scan_functionalize(ctx, combine_fn, xs, dim): ) ret = associative_scan_op(functional_combine_fn, unwrapped_xs, dim) return ctx.wrap_tensors(ret) + + +def _fake_associative_scan(combine_fn, xs, dim, reverse=False): # noqa: F811 + inp_leaves, spec = pytree.tree_flatten(xs) + result_flat: List[Any] = [] + num_leaves = len(inp_leaves) + op = reversed if reverse else lambda x: x + + for ind in op(range(inp_leaves[0].size(dim))): + r = [ + inp_leaves[leave_ind][(slice(None),) * dim + (ind,)] + for leave_ind in range(num_leaves) + ] + if (ind > 0 and not reverse) or ( + ind < (inp_leaves[0].size(dim) - 1) and reverse + ): + r = combine_fn( + pytree.tree_unflatten(result_flat[-1], spec), + pytree.tree_unflatten(r, spec), + ) + r_flat, _ = pytree.tree_flatten(r) + result_flat.append(r_flat) + + results = [ + torch.stack([e[leave_ind] for e in op(result_flat)], dim) + for leave_ind in range(num_leaves) + ] + return pytree.tree_unflatten(results, spec) diff --git a/torch/_higher_order_ops/utils.py b/torch/_higher_order_ops/utils.py index f6a8d29d520c00..105061663dc377 100644 --- a/torch/_higher_order_ops/utils.py +++ b/torch/_higher_order_ops/utils.py @@ -476,3 +476,8 @@ def get_dummy_aot_autograd_config(): aot_id=0, keep_inference_input_mutations=False, ) + + +# Slices off the first element of a given dimension +def first_slice_copy(t: torch.Tensor, dim: int = 0) -> torch.Tensor: + return torch.select_copy(t, dim, 0) From 82e4de4994e4538eca4e7502f1d023f4d49a36c8 Mon Sep 17 00:00:00 2001 From: leslie-fang-intel Date: Mon, 4 Nov 2024 16:26:19 -0800 Subject: [PATCH 111/503] [Inductor][CPU] Enable the oneDNN Linear fusion for special case (#139172) **Summary** In the case of LLaMA2, for a linear operation with an activation size of `(4, 1, 4096)` and a stride of `(4096, 128, 1)` which has been decomposed into `matmul`. And the decomposition of `matmul` results in `bmm` due to a strict continuity check. We can align the continuity check with ATen by skip dim of size 1 to enable decomposition into `mm` instead. **Test Plan** ``` python -u -m pytest -s -v test/inductor/test_mkldnn_pattern_matcher.py -k test_linear_input_non_contiguous_3D_wo_bias ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/139172 Approved by: https://github.com/jgong5, https://github.com/ezyang --- test/inductor/test_mkldnn_pattern_matcher.py | 46 ++++++++++++++++++++ torch/_decomp/decompositions.py | 13 +++++- 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/test/inductor/test_mkldnn_pattern_matcher.py b/test/inductor/test_mkldnn_pattern_matcher.py index 08f885d130b2e1..772d083b03b361 100644 --- a/test/inductor/test_mkldnn_pattern_matcher.py +++ b/test/inductor/test_mkldnn_pattern_matcher.py @@ -382,6 +382,49 @@ def forward(self, x): matcher_nodes = 1 self._test_common(mod, (v,), matcher_count, matcher_nodes) + @unittest.skipIf(not TEST_MKL, "Test requires MKL") + def test_linear_input_non_contiguous_3D_wo_bias(self): + # Activation is 3D, non-contiguous and without Bias + class M(torch.nn.Module): + def __init__(self): + super().__init__() + self.linear = torch.nn.Linear(4096, 1024, bias=False) + + def forward(self, x): + x = torch.ops.aten.permute.default(x, [0, 2, 1, 3]) + x = torch.ops.aten.reshape.default(x, [4, 1, 4096]) + return self.linear(x) + + mod = M().eval() + v = torch.randn(4, 32, 1, 128) + + dtypes = [torch.float] + if torch.ops.mkldnn._is_mkldnn_bf16_supported(): + dtypes.append(torch.bfloat16) + if torch.ops.mkldnn._is_mkldnn_fp16_supported(): + dtypes.append(torch.float16) + + for dtype in dtypes: + torch._dynamo.reset() + autocast_enabled = ( + True if dtype in [torch.bfloat16, torch.float16] else False + ) + with torch.no_grad(), torch.autocast( + device_type="cpu", enabled=autocast_enabled, dtype=dtype + ): + expected = mod(v) + actual, (source_code,) = run_and_get_code( + torch.compile(mod, fullgraph=True), + v, + ) + self.assertIn( + "torch.ops.mkldnn._linear_pointwise.default" + if autocast_enabled + else "torch.ops.mkl._mkl_linear.default", + source_code, + ) + torch.testing.assert_close(actual, expected, atol=1e-2, rtol=1e-2) + def test_linear_add_bias(self): class M(torch.nn.Module): def __init__(self, dtype, unary_fn, cast_bias): @@ -2791,6 +2834,9 @@ class TestDynamicPatternMatcher(TestPatternMatcherBase): test_conv2d_binary_dynamic_shapes = TestPatternMatcher.test_conv2d_binary test_conv3d_binary_dynamic_shapes = TestPatternMatcher.test_conv3d_binary test_linear_unary_dynamic_shapes = TestPatternMatcher.test_linear_unary + test_linear_input_non_contiguous_3D_wo_bias_dynamic_shapes = ( + TestPatternMatcher.test_linear_input_non_contiguous_3D_wo_bias + ) def test_conv_transpose2d_dynamic_shapes(self): # We don't support conv_transpose2d for now. diff --git a/torch/_decomp/decompositions.py b/torch/_decomp/decompositions.py index ffa83a80bcc667..49aceefa2aee9c 100644 --- a/torch/_decomp/decompositions.py +++ b/torch/_decomp/decompositions.py @@ -4410,9 +4410,18 @@ def should_fold(tensor1: torch.Tensor, tensor2: torch.Tensor, is_out: bool) -> b t1_shape = t1.shape t1_stride = t1.stride() + + # Check the contiguous, we can skip the dim with size of 1 + # as aten: https://github.com/pytorch/pytorch/blob/ + # e201460f8aa1510b4c4686627d57b69756c4b916/aten/src/ATen/TensorGeometry.cpp#L17 + expected_stride = [1] + for size in reversed(t1_shape[1:]): + expected_stride.append(size * expected_stride[-1]) return all( - st1 == st2 * s2 - for (st1, st2, s2) in zip(t1_stride[:-2], t1_stride[1:-1], t1_shape[1:-1]) + guard_size_oblivious(size == 1) or left == right + for left, right, size in zip( + t1_stride, list(reversed(expected_stride)), t1_shape + ) ) From c2109ec479638154af82a44486e75c11faba0ce2 Mon Sep 17 00:00:00 2001 From: Aaron Orenstein Date: Tue, 5 Nov 2024 08:32:22 -0800 Subject: [PATCH 112/503] typing ir.py - Disallow untyped defs for ir.py (#139238) - Remove "mypy: allow-untyped-defs" and mark functions individually with "no-untyped-def" - Mark some trivial functions with the proper return types (`None` and `torch.dtype`) - Fixed a type bug in the signature of supported_dtype_of_cpp_wrapper() - `ruff check torch/_inductor/ir.py --select ANN --fix --unsafe-fixes` and then fixed up things that looked incorrectly applied. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139238 Approved by: https://github.com/Skylion007, https://github.com/ezyang --- torch/_inductor/graph.py | 2 +- torch/_inductor/ir.py | 967 ++++++++++++++++++++------------------- 2 files changed, 486 insertions(+), 483 deletions(-) diff --git a/torch/_inductor/graph.py b/torch/_inductor/graph.py index 86b5807392944f..21423d8323b4ee 100644 --- a/torch/_inductor/graph.py +++ b/torch/_inductor/graph.py @@ -131,7 +131,7 @@ def log_module_code(*args: Any, **kwargs: Any) -> None: pass -def supported_dtype_of_cpp_wrapper(dtype: torch.device, device_type: str) -> bool: +def supported_dtype_of_cpp_wrapper(dtype: torch.dtype, device_type: str) -> bool: supported_dtype = { torch.float32, torch.float64, diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py index 37f769ae3bddcc..76f05626bde606 100644 --- a/torch/_inductor/ir.py +++ b/torch/_inductor/ir.py @@ -1,4 +1,3 @@ -# mypy: allow-untyped-defs from __future__ import annotations import contextlib @@ -17,6 +16,7 @@ ClassVar, ContextManager, Dict, + Generator, Iterable, List, Literal, @@ -58,6 +58,7 @@ free_unbacked_symbols, rebind_unbacked, resolve_unbacked_bindings, + ShapeEnv, SymTypes, ) from torch.utils._ordered_set import OrderedSet @@ -237,7 +238,7 @@ def reindex(index: Sequence[_T]) -> Sequence[_V]: def get_fill_order( - seq: Sequence[Union[int, torch.SymInt, Expr]], shape_env=None + seq: Sequence[Union[int, torch.SymInt, Expr]], shape_env: Optional[ShapeEnv] = None ) -> Sequence[int]: """ Convert strides to fill order (argsort) @@ -263,7 +264,7 @@ def stride_order2fill_order(order: Sequence[Union[int, Integer]]) -> Sequence[in def get_stride_order( - seq: Sequence[Union[int, torch.SymInt, Expr]], shape_env=None + seq: Sequence[Union[int, torch.SymInt, Expr]], shape_env: Optional[ShapeEnv] = None ) -> Sequence[int]: """ Convert strides to stride order @@ -345,12 +346,14 @@ class IRNode: # NB: These are kinda weird, origins: OrderedSet[Any] = dataclasses.field(init=False) - traceback: str = dataclasses.field(init=False) + traceback: Optional[List[str]] = dataclasses.field(init=False) origin_node: Optional[torch.fx.Node] = dataclasses.field(init=False) @staticmethod @contextlib.contextmanager - def current_origins(origins: OrderedSet[torch.fx.Node]): + def current_origins( + origins: OrderedSet[torch.fx.Node], + ) -> Generator[None, None, None]: old = IRNode._current_origins IRNode._current_origins = old | origins try: @@ -358,13 +361,13 @@ def current_origins(origins: OrderedSet[torch.fx.Node]): finally: IRNode._current_origins = old - def _post_init_setattr(self, attr, value): + def _post_init_setattr(self, attr, value) -> None: # type: ignore[no-untyped-def] # Intended for use in __post_init__ for enforcing an invariant on a dataclass # If you must, can also be used for setting provenance info # We would like to try and minimize these usages though object.__setattr__(self, attr, value) - def __post_init__(self): + def __post_init__(self) -> None: self._post_init_setattr("origins", OrderedSet(self._current_origins)) self._post_init_setattr( "traceback", traceback.format_stack() if config.debug_ir_traceback else None @@ -374,23 +377,23 @@ def __post_init__(self): def get_read_names(self) -> OrderedSet[str]: raise NotImplementedError(f"NYI on {type(self)}") - def get_traceback(self): + def get_traceback(self) -> Optional[List[str]]: return self.traceback - def get_origin_node(self): + def get_origin_node(self): # type: ignore[no-untyped-def] return self.origin_node - def get_defining_op(self): + def get_defining_op(self) -> Optional[Operation]: raise NotImplementedError - def common_repr(self, shorten=True): + def common_repr(self, shorten=True): # type: ignore[no-untyped-def] origins = f"origins={getattr(self, 'origins', '')}" if shorten and len(origins) > 64: # this can get *very* long origins = f"{origins[:61]}..." return [origins] - def str_helper(self, lines, shorten=True, multiline=True): + def str_helper(self, lines, shorten=True, multiline=True) -> str: # type: ignore[no-untyped-def] lines = lines + self.common_repr(shorten) lines = list(map(str, lines)) if multiline: @@ -399,26 +402,26 @@ def str_helper(self, lines, shorten=True, multiline=True): else: return f"{type(self).__name__}({lines})" - def get_dtype(self): + def get_dtype(self) -> torch.dtype: return self.dtype - def get_layout(self): + def get_layout(self): # type: ignore[no-untyped-def] raise NotImplementedError(f"get_layout() is not implemented by {type(self)}!") - def get_size(self): + def get_size(self): # type: ignore[no-untyped-def] raise NotImplementedError(f"get_size() is not implemented by {type(self)}!") @property - def shape(self): + def shape(self): # type: ignore[no-untyped-def] return self.get_size() - def get_numel(self): + def get_numel(self): # type: ignore[no-untyped-def] return sympy_product(self.get_size()) - def is_zero_elements(self): + def is_zero_elements(self): # type: ignore[no-untyped-def] return V.graph.sizevars.is_expr_static_and_true(sympy.Eq(self.get_numel(), 0)) - def realize(self): + def realize(self): # type: ignore[no-untyped-def] """ If the IRNode refers to data which has not been materialized (e.g., it is a Pointwise/Reduction that could potentially have more @@ -436,7 +439,7 @@ def realize(self): """ raise NotImplementedError(f"realize NYI on {type(self)}") - def codegen_reference(self, writer=None): + def codegen_reference(self, writer=None): # type: ignore[no-untyped-def] raise NotImplementedError(f"codegen_reference NYI on {type(self)}") # The abstract method declarations below serve to convince mypy that all IRNode instances have these functions @@ -459,17 +462,17 @@ def codegen_reference(self, writer=None): @ir_dataclass(frozen=False) class Operation: - def __post_init__(self): + def __post_init__(self) -> None: self.operation_name: Optional[str] = None - def get_device(self): + def get_device(self): # type: ignore[no-untyped-def] raise NotImplementedError - def get_origin_node(self): + def get_origin_node(self): # type: ignore[no-untyped-def] assert hasattr(self, "origin_node") return self.origin_node - def get_origins(self): + def get_origins(self): # type: ignore[no-untyped-def] assert hasattr(self, "origins") return self.origins @@ -477,22 +480,22 @@ def get_operation_name(self) -> str: assert self.operation_name is not None return self.operation_name - def is_extern(self): + def is_extern(self) -> bool: return False - def is_no_op(self): + def is_no_op(self) -> bool: return False - def get_read_writes(self): + def get_read_writes(self): # type: ignore[no-untyped-def] raise NotImplementedError - def is_user_of(self, name): + def is_user_of(self, name): # type: ignore[no-untyped-def] return name in self.get_read_names() def get_read_names(self) -> OrderedSet[str]: return OrderedSet(dep.name for dep in self.get_reads()) - def get_reads(self): + def get_reads(self): # type: ignore[no-untyped-def] return self.get_read_writes().reads def get_outputs(self) -> List[Buffer]: @@ -518,7 +521,7 @@ def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: """ return OrderedSet() - def get_workspace_size(self): + def get_workspace_size(self) -> int: """ Gets extra global memory size needed by this buffer. Some algorithms (e.g. group gemm) may require extra global memory in the generated code. @@ -539,7 +542,7 @@ def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: self.inner_fn_free_unbacked_symbols(), ) - def __str__(self, names=("ranges",)): + def __str__(self, names=("ranges",)) -> str: # type: ignore[no-untyped-def] return self.str_helper( [ f"'{self.device.type}'", @@ -550,28 +553,28 @@ def __str__(self, names=("ranges",)): + [f"origin_node={self.origin_node!r}"] ) - def __post_init__(self): + def __post_init__(self) -> None: super().__post_init__() __repr__ = __str__ - def get_device(self): + def get_device(self): # type: ignore[no-untyped-def] return self.device - def get_origin_node(self): + def get_origin_node(self): # type: ignore[no-untyped-def] return self.origin_node - def get_size(self): + def get_size(self): # type: ignore[no-untyped-def] return self.ranges - def get_pointwise_size(self): + def get_pointwise_size(self): # type: ignore[no-untyped-def] return self.ranges - def is_extern(self): + def is_extern(self) -> bool: return False @classmethod - def create(cls, *args, **kwargs): + def create(cls, *args, **kwargs): # type: ignore[no-untyped-def] origin_node = kwargs.pop("origin_node", None) tb = kwargs.pop("traceback", None) # if "origin_node" in kwargs: @@ -585,7 +588,7 @@ def create(cls, *args, **kwargs): return TensorBox.create(r) @staticmethod - def _index(ranges, prefix=SymT.INDEX): + def _index(ranges, prefix=SymT.INDEX): # type: ignore[no-untyped-def] return [ sympy.S.Zero if s == 1 else sympy_index_symbol_with_prefix(prefix, n) for n, s in enumerate(ranges) @@ -600,26 +603,26 @@ def inner_fn_opcount(self) -> OpCountResult: self.inner_fn(*self.inner_fn_args()) return opcounter.getvalue() - def inner_fn_args(self): + def inner_fn_args(self): # type: ignore[no-untyped-def] return (self._index(self.ranges),) @cache_on_self - def inner_fn_str(self): + def inner_fn_str(self): # type: ignore[no-untyped-def] return V.KernelFormatterHandler.ir_to_string( self.inner_fn, *self.inner_fn_args() ) - def has_large_inner_fn(self, threshold=None): + def has_large_inner_fn(self, threshold=None): # type: ignore[no-untyped-def] if threshold is None: threshold = 0 threshold = max(threshold, config.realize_opcount_threshold) return self.inner_fn_opcount().num_ops > threshold - def inner_fn_free_unbacked_symbols(self): + def inner_fn_free_unbacked_symbols(self): # type: ignore[no-untyped-def] index = self._index(self.ranges) return extract_free_unbacked_symbols(self.inner_fn, index) - def get_reads(self): + def get_reads(self): # type: ignore[no-untyped-def] with patch.object(FlexibleLayout, "allow_indexing", True): if self.get_reduction_type(): return extract_read_writes( @@ -636,20 +639,20 @@ def get_reads(self): def get_read_names(self) -> OrderedSet[str]: return OrderedSet(self.inner_fn_opcount().read_buffers) - def num_reads(self): + def num_reads(self): # type: ignore[no-untyped-def] return len(self.inner_fn_opcount().read_buffers) - def get_reduction_size(self): + def get_reduction_size(self): # type: ignore[no-untyped-def] raise NotImplementedError( f"get_reduction_size() is not implemented by {type(self)}!" ) - def get_reduction_type(self): + def get_reduction_type(self): # type: ignore[no-untyped-def] raise NotImplementedError( f"get_reduction_type() is not implemented by {type(self)}!" ) - def constant_to_device(self, device): + def constant_to_device(self, device): # type: ignore[no-untyped-def] raise NotImplementedError( f"constant_to_device() is not implemented by {type(self)}!" ) @@ -664,24 +667,24 @@ def nop_loader_fn(idx: Union[Expr, Sequence[Expr]], *, dtype: torch.dtype) -> Op @ir_dataclass class Pointwise(Loops): - def make_loader(self): + def make_loader(self): # type: ignore[no-untyped-def] # Make zero-element loops into a no-op if self.is_zero_elements(): return partial(nop_loader_fn, dtype=self.dtype) return self.inner_fn - def get_reduction_size(self): + def get_reduction_size(self): # type: ignore[no-untyped-def] return [] - def get_reduction_type(self): + def get_reduction_type(self): # type: ignore[no-untyped-def] return None - def store_output(self, output_name, indexer, vars): + def store_output(self, output_name, indexer, vars): # type: ignore[no-untyped-def] loader = self.make_loader() return ops.store(output_name, indexer(vars), loader(vars)) - def constant_to_device(self, device): + def constant_to_device(self, device): # type: ignore[no-untyped-def] """Move this to a given device. Requires that all reads are to constants.""" loader = self.make_loader() loader = patch.object(ConstantBuffer, "override_device", device)(loader) @@ -695,7 +698,7 @@ class Scatter(Pointwise): output_indexer: Callable[[List[Expr]], Expr] scatter_mode: Optional[str] = None - def constant_to_device(self, device): + def constant_to_device(self, device): # type: ignore[no-untyped-def] """Move this to a given device. Requires that all reads are to constants.""" loader = self.make_loader() loader = patch.object(ConstantBuffer, "override_device", device)(loader) @@ -708,7 +711,7 @@ def constant_to_device(self, device): scatter_mode=self.scatter_mode, ) - def store_output(self, output_name, indexer, vars): + def store_output(self, output_name, indexer, vars): # type: ignore[no-untyped-def] loader = self.make_loader() return ops.store( output_name, @@ -828,13 +831,13 @@ def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: *(free_unbacked_symbols(e) for e in self.reduction_ranges) ) - def get_reduction_size(self): + def get_reduction_size(self): # type: ignore[no-untyped-def] return self.reduction_ranges - def get_reduction_type(self): + def get_reduction_type(self): # type: ignore[no-untyped-def] return self.reduction_type - def store_reduction(self, output_name, indexer, vars, reduction_vars): + def store_reduction(self, output_name, indexer, vars, reduction_vars): # type: ignore[no-untyped-def] value = ops.reduction( self.dtype, self.src_dtype, @@ -843,20 +846,20 @@ def store_reduction(self, output_name, indexer, vars, reduction_vars): ) return ops.store_reduction(output_name, indexer(vars), value) - def index_length(self): + def index_length(self) -> int: return len(self.ranges) + len(self.reduction_ranges) - def inner_fn_args(self): + def inner_fn_args(self): # type: ignore[no-untyped-def] index = self._index(self.ranges) rindex = self._index(self.reduction_ranges, SymT.RINDEX) return (index, rindex) - def inner_fn_free_unbacked_symbols(self): + def inner_fn_free_unbacked_symbols(self): # type: ignore[no-untyped-def] index = self._index(self.ranges) rindex = self._index(self.reduction_ranges, SymT.RINDEX) return extract_free_unbacked_symbols(self.inner_fn, index, rindex) - def constant_to_device(self, device): + def constant_to_device(self, device): # type: ignore[no-untyped-def] """Move this to a given device. Requires that all reads are to constants.""" loader = self.make_loader() loader = patch.object(ConstantBuffer, "override_device", device)(loader) @@ -872,7 +875,7 @@ def constant_to_device(self, device): ) @staticmethod - def num_splits( + def num_splits( # type: ignore[no-untyped-def] # type: ignore[no-untyped-def] device, dst_dtype, src_dtype, @@ -883,7 +886,7 @@ def num_splits( reduction_numel, input_node: Optional[IRNode] = None, ): - def _is_static(x): + def _is_static(x): # type: ignore[no-untyped-def] return isinstance(x, (int, sympy.Integer)) reduction_numel_hint = V.graph.sizevars.symbolic_hint(reduction_numel) @@ -916,7 +919,7 @@ def _is_static(x): min_elements_per_device = min_elements_per_thread * num_sm * threads_per_sm max_elements_per_device = max_elements_per_thread * num_sm * threads_per_sm - def inner_reduction_splits(reduction_numel_hint, numel_hint): + def inner_reduction_splits(reduction_numel_hint, numel_hint): # type: ignore[no-untyped-def] if not should_split: return 1 # do heuristics that's close to eager mode for split inner reduction @@ -954,7 +957,7 @@ def inner_reduction_splits(reduction_numel_hint, numel_hint): split_size * num_threads ) - def outer_reduction_splits(reduction_numel_hint, numel_hint): + def outer_reduction_splits(reduction_numel_hint, numel_hint): # type: ignore[no-untyped-def] if not should_split: return 1 # TODO the best heuristic currently has XBLOCK (corresponding to numel_hint) 128 @@ -1037,7 +1040,7 @@ def outer_reduction_splits(reduction_numel_hint, numel_hint): reduction_hint=ReductionHint.DEFAULT, ) - def get_read_indices(r): + def get_read_indices(r): # type: ignore[no-untyped-def] cb = ComputedBuffer( name=None, layout=FlexibleLayout( @@ -1100,7 +1103,7 @@ def get_read_indices(r): ) @staticmethod - def _unroll_reduction_fn(inner_fn, reduction_ranges, reduction_type, src_dtype): + def _unroll_reduction_fn(inner_fn, reduction_ranges, reduction_type, src_dtype): # type: ignore[no-untyped-def] """Convert inner_fn from a reduction to an pointwise""" reduction_ranges = [ V.graph.sizevars.evaluate_static_shape(x) for x in reduction_ranges @@ -1108,7 +1111,7 @@ def _unroll_reduction_fn(inner_fn, reduction_ranges, reduction_type, src_dtype): combine_fn = get_reduction_combine_fn(reduction_type, src_dtype) - def fn(index): + def fn(index): # type: ignore[no-untyped-def] return functools.reduce( combine_fn, ( @@ -1127,7 +1130,7 @@ def fn(index): FlexibleLayout.contiguous_strides(reduction_ranges), ).make_indexer() - def value_fn(index, rindex): + def value_fn(index, rindex): # type: ignore[no-untyped-def] rindex = [sympy.expand(i) for i in rindex] return ( inner_fn(index, rindex), @@ -1140,7 +1143,7 @@ def value_fn(index, rindex): return fn @classmethod - def create( + def create( # type: ignore[no-untyped-def] cls, device: torch.device, dst_dtype: torch.dtype, @@ -1158,7 +1161,7 @@ def create( # N.B. This is a hack to generate the literal of the given type # Ideally, we should be fixing `def constant` in triton.py # but it breaks due to hardcoded dtypes in other places - def py_cnst(val): + def py_cnst(val): # type: ignore[no-untyped-def] return ( bool(val) if dst_dtype == torch.bool @@ -1179,7 +1182,7 @@ def py_cnst(val): reduction_type in rtypes_to_inits.keys() ), f"{reduction_type} not supported for zero-dimension tensors!" - def const_fn(index): + def const_fn(index): # type: ignore[no-untyped-def] return ops.constant(rtypes_to_inits[reduction_type], dst_dtype) return Pointwise.create( @@ -1193,12 +1196,12 @@ def const_fn(index): # this reduction is actually a pointwise op if reduction_type in ("argmin", "argmax"): - def fn(index): + def fn(index): # type: ignore[no-untyped-def] return ops.constant(0, dst_dtype) else: - def fn(index): + def fn(index): # type: ignore[no-untyped-def] reduction_index = [sympy.S.Zero for _ in reduction_ranges] return inner_fn(index, reduction_index) @@ -1285,7 +1288,7 @@ def fn(index): ) @staticmethod - def default_accumulator(reduction_type, dtype): + def default_accumulator(reduction_type, dtype): # type: ignore[no-untyped-def] if reduction_type in ("max", "argmax"): if is_float_dtype(dtype): return float("-inf") @@ -1311,7 +1314,7 @@ def default_accumulator(reduction_type, dtype): }[reduction_type] @staticmethod - def default_value(reduction_type, dtype): + def default_value(reduction_type, dtype): # type: ignore[no-untyped-def] if reduction_type == "welford_reduce": return 0 return Reduction.default_accumulator(reduction_type, dtype) @@ -1334,7 +1337,7 @@ def _multilayer_second_step_hint( return reduction_hint @classmethod - def _multilayer_wrap_loader( + def _multilayer_wrap_loader( # type: ignore[no-untyped-def] cls, loader, reduction_ranges, @@ -1348,12 +1351,12 @@ def _multilayer_wrap_loader( sympy.Eq(reduction_numel % split, 0) ) - def wrapper_fn(index, reduction_index): + def wrapper_fn(index, reduction_index): # type: ignore[no-untyped-def] (reduction_index,) = reduction_index *new_index, reduction_block = index indices = block_size * reduction_block + reduction_index - def body(): + def body(): # type: ignore[no-untyped-def] return loader(new_index, reindex([indices])) if need_mask: @@ -1368,7 +1371,7 @@ def body(): return wrapper_fn @classmethod - def _multilayer_wrap_loader_existing_ranges( + def _multilayer_wrap_loader_existing_ranges( # type: ignore[no-untyped-def] cls, loader, original_ranges, @@ -1384,7 +1387,7 @@ def _multilayer_wrap_loader_existing_ranges( original_reduction_ranges, tuple(new_ranges) + tuple(new_reduction_ranges) ) - def wrapper_fn(merged_index, new_reduction_index): + def wrapper_fn(merged_index, new_reduction_index): # type: ignore[no-untyped-def] original_idx = merged_index[: len(original_ranges)] new_index = merged_index[len(original_ranges) :] return loader( @@ -1395,7 +1398,7 @@ def wrapper_fn(merged_index, new_reduction_index): return wrapper_fn @classmethod - def create_multilayer_helper( + def create_multilayer_helper( # type: ignore[no-untyped-def] cls, device: torch.device, dst_dtype: torch.dtype, @@ -1434,7 +1437,7 @@ def create_multilayer_helper( intermediate.realize() intermediate_loader = intermediate.make_loader() - def intermediate_fn(index, reduction_index): + def intermediate_fn(index, reduction_index): # type: ignore[no-untyped-def] return intermediate_loader([*index, *reduction_index]) numel_hint = V.graph.sizevars.size_hint(sympy_product(original_ranges)) @@ -1457,7 +1460,7 @@ def intermediate_fn(index, reduction_index): ) @classmethod - def create_multilayer( + def create_multilayer( # type: ignore[no-untyped-def] cls, device: torch.device, dst_dtype: torch.dtype, @@ -1496,7 +1499,7 @@ def create_multilayer( ) @classmethod - def create_multilayer_existing_ranges( + def create_multilayer_existing_ranges( # type: ignore[no-untyped-def] cls, device: torch.device, dst_dtype: torch.dtype, @@ -1540,7 +1543,7 @@ def create_multilayer_existing_ranges( class WelfordReduction(Reduction): output_index: int - def __init__( + def __init__( # type: ignore[no-untyped-def] self, device, dtype, @@ -1550,12 +1553,12 @@ def __init__( reduction_type, reduction_hint, output_index, - ): + ) -> None: if len(inner_fns) == 1: loader = inner_fns[0] else: - def loader(idx, reduction_idx): + def loader(idx, reduction_idx): # type: ignore[no-untyped-def] return tuple(fn(idx, reduction_idx) for fn in inner_fns) super().__init__( @@ -1570,7 +1573,7 @@ def loader(idx, reduction_idx): ) self.output_index = output_index - def store_reduction(self, output_name, indexer, vars, reduction_vars): + def store_reduction(self, output_name, indexer, vars, reduction_vars): # type: ignore[no-untyped-def] values = ops.reduction( self.dtype, self.src_dtype, @@ -1581,7 +1584,7 @@ def store_reduction(self, output_name, indexer, vars, reduction_vars): return ops.store_reduction(output_name, indexer(vars), value) @classmethod - def create( # type: ignore[override] + def create( # type: ignore[override, no-untyped-def] cls, device: torch.device, dtype: torch.dtype, @@ -1595,8 +1598,8 @@ def create( # type: ignore[override] reduction_numel = V.graph.sizevars.simplify(sympy_product(reduction_ranges)) - def const(val): - def inner_fn(idx): + def const(val): # type: ignore[no-untyped-def] + def inner_fn(idx): # type: ignore[no-untyped-def] return ops.constant( val, dtype, @@ -1617,8 +1620,8 @@ def inner_fn(idx): if reduction_numel == 1: - def copy(loader): - def inner_fn(idx): + def copy(loader): # type: ignore[no-untyped-def] + def inner_fn(idx): # type: ignore[no-untyped-def] reduction_index = [sympy.S.Zero for _ in reduction_ranges] return loader(idx, reduction_index) @@ -1699,11 +1702,11 @@ def inner_fn(idx): return results @staticmethod - def default_value(reduction_type, dtype): + def default_value(reduction_type, dtype): # type: ignore[no-untyped-def] return (0, 0, 0) @classmethod - def create_multilayer( # type: ignore[override] + def create_multilayer( # type: ignore[override, no-untyped-def] cls, device: torch.device, dtype: torch.dtype, @@ -1727,7 +1730,7 @@ def create_multilayer( # type: ignore[override] # If we need mask, then "welford_reduce" doesn't work because # masked inputs shouldn't count towards the welford weight - def constant(idx, reduction_idx, value): + def constant(idx, reduction_idx, value): # type: ignore[no-untyped-def] return ops.constant(value, dtype) return cls.create_multilayer( @@ -1770,7 +1773,7 @@ def constant(idx, reduction_idx, value): i_loaders = [i.make_loader() for i in intermediates] - def intermediate_loader_fn(index, reduction_index, loader): + def intermediate_loader_fn(index, reduction_index, loader): # type: ignore[no-untyped-def] return loader([*index, *reduction_index]) numel_hint = V.graph.sizevars.size_hint(sympy_product(ranges)) @@ -1816,46 +1819,46 @@ def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: | OrderedSet().union(*(free_unbacked_symbols(e) for e in self.size)) ) - def __post_init__(self): + def __post_init__(self) -> None: assert len(self.ranges) + len(self.scan_ranges) == len(self.size) super().__post_init__() - def store_reduction(self, output_name, indexer, vars, scan_vars): + def store_reduction(self, output_name, indexer, vars, scan_vars): # type: ignore[no-untyped-def] idx = self.reindex(vars, scan_vars) values = [inner_fn(idx) for inner_fn in self.inner_fns] result = ops.scan(self.dtypes, self.combine_fn, values) return ops.store(output_name, indexer(idx), result[self.output_index]) - def get_reduction_type(self): + def get_reduction_type(self): # type: ignore[no-untyped-def] # return self.scan_op return "custom" - def get_reduction_size(self): + def get_reduction_size(self): # type: ignore[no-untyped-def] return self.scan_ranges - def get_size(self): + def get_size(self): # type: ignore[no-untyped-def] return self.size - def get_pointwise_size(self): + def get_pointwise_size(self): # type: ignore[no-untyped-def] return self.ranges - def index_length(self): + def index_length(self) -> int: return len(self.ranges) + len(self.scan_ranges) - def inner_fn_args(self): + def inner_fn_args(self): # type: ignore[no-untyped-def] index = self._index(self.ranges) rindex = self._index(self.scan_ranges, SymT.RINDEX) idx = self.reindex(index, rindex) return (idx,) - def inner_fn_free_unbacked_symbols(self): + def inner_fn_free_unbacked_symbols(self): # type: ignore[no-untyped-def] index = self._index(self.ranges) rindex = self._index(self.scan_ranges, SymT.RINDEX) idx = self.reindex(index, rindex) return extract_free_unbacked_symbols(self.inner_fn, idx) @classmethod - def create( + def create( # type: ignore[no-untyped-def] cls, device: torch.device, dtypes: Tuple[torch.dtype, ...], @@ -1919,7 +1922,7 @@ def create( else: scan_type = SplitScan - def reindex(index, scan_index): + def reindex(index, scan_index): # type: ignore[no-untyped-def] assert len(scan_index) == len(scan_ranges) assert len(index) == len(pointwise_ranges) return [*index[:axis], *scan_index, *index[axis:]] @@ -1951,7 +1954,7 @@ def reindex(index, scan_index): return results @classmethod - def num_splits( + def num_splits( # type: ignore[no-untyped-def] cls, device: torch.device, dtype: torch.dtype, @@ -1963,7 +1966,7 @@ def num_splits( scan_numel: Expr, ): # TODO: custom splitting heuristic for scan - def wrapper_fn(idx, reduction_idx): + def wrapper_fn(idx, reduction_idx): # type: ignore[no-untyped-def] return inner_fn([*idx[:axis], *reduction_idx, *idx[axis:]]) return Reduction.num_splits( @@ -2008,45 +2011,45 @@ def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: | OrderedSet().union(*(free_unbacked_symbols(e) for e in self.size)) ) - def __post_init__(self): + def __post_init__(self) -> None: assert len(self.ranges) + len(self.sort_ranges) == len(self.size) super().__post_init__() - def store_reduction(self, output_name, indexer, vars, sort_vars): + def store_reduction(self, output_name, indexer, vars, sort_vars): # type: ignore[no-untyped-def] idx = self.reindex(vars, sort_vars) values = [inner_fn(idx) for inner_fn in self.inner_fns] result = ops.sort(self.dtypes, values, self.stable, self.descending) return ops.store(output_name, indexer(idx), result[self.output_index]) - def get_reduction_type(self): + def get_reduction_type(self): # type: ignore[no-untyped-def] return "sort" - def get_reduction_size(self): + def get_reduction_size(self): # type: ignore[no-untyped-def] return self.sort_ranges - def get_size(self): + def get_size(self): # type: ignore[no-untyped-def] return self.size - def get_pointwise_size(self): + def get_pointwise_size(self): # type: ignore[no-untyped-def] return self.ranges - def index_length(self): + def index_length(self) -> int: return len(self.ranges) + len(self.sort_ranges) - def inner_fn_args(self): + def inner_fn_args(self): # type: ignore[no-untyped-def] index = self._index(self.ranges) rindex = self._index(self.sort_ranges, SymT.RINDEX) idx = self.reindex(index, rindex) return (idx,) - def inner_fn_free_unbacked_symbols(self): + def inner_fn_free_unbacked_symbols(self): # type: ignore[no-untyped-def] index = self._index(self.ranges) rindex = self._index(self.sort_ranges, SymT.RINDEX) idx = self.reindex(index, rindex) return extract_free_unbacked_symbols(self.inner_fn, idx) @classmethod - def create( + def create( # type: ignore[no-untyped-def] cls, device: torch.device, dtypes: Tuple[torch.dtype, ...], @@ -2092,7 +2095,7 @@ def create( for output_index in range(len(dtypes)) ] - def reindex(index, sort_index): + def reindex(index, sort_index): # type: ignore[no-untyped-def] assert len(sort_index) == len(sort_ranges) assert len(index) == len(pointwise_ranges) return [*index[:axis], *sort_index, *index[axis:]] @@ -2214,87 +2217,87 @@ def is_stride_order_storage_and_layout( class BaseView(IRNode): data: IRNode - def get_unbacked_symbol_uses(self): + def get_unbacked_symbol_uses(self): # type: ignore[no-untyped-def] return self.data.get_unbacked_symbol_uses() - def make_reindexer(self): + def make_reindexer(self): # type: ignore[no-untyped-def] raise NotImplementedError(f"make_reindexer NYI on {self}") - def make_indexer(self): + def make_indexer(self): # type: ignore[no-untyped-def] inner = self.data.make_indexer() reindex = self.make_reindexer() - def indexer(idx): + def indexer(idx): # type: ignore[no-untyped-def] return inner(reindex(idx)) return indexer - def make_loader(self): + def make_loader(self): # type: ignore[no-untyped-def] inner = self.data.make_loader() reindex = self.make_reindexer() - def loader(idx): + def loader(idx): # type: ignore[no-untyped-def] return inner(reindex(idx)) return loader @property - def dtype(self): + def dtype(self): # type: ignore[no-untyped-def] return self.data.dtype - def get_layout(self): + def get_layout(self): # type: ignore[no-untyped-def] return self.data.get_layout() - def get_device(self): + def get_device(self): # type: ignore[no-untyped-def] return self.data.get_device() - def get_origin_node(self): + def get_origin_node(self): # type: ignore[no-untyped-def] return None - def get_name(self): + def get_name(self): # type: ignore[no-untyped-def] return self.data.get_name() - def get_pointwise_size(self): + def get_pointwise_size(self): # type: ignore[no-untyped-def] return self.get_size() - def mark_reuse(self, users): + def mark_reuse(self, users): # type: ignore[no-untyped-def] return self.data.mark_reuse(users) - def has_exceeded_max_reads(self): + def has_exceeded_max_reads(self): # type: ignore[no-untyped-def] return self.data.has_exceeded_max_reads() - def realize(self): + def realize(self): # type: ignore[no-untyped-def] return self.data.realize() - def realize_hint(self): + def realize_hint(self): # type: ignore[no-untyped-def] return self.data.realize_hint() - def get_storage_numel(self): + def get_storage_numel(self): # type: ignore[no-untyped-def] return self.data.get_storage_numel() - def is_extern(self): + def is_extern(self): # type: ignore[no-untyped-def] return self.data.is_extern() # type: ignore[attr-defined] - def is_module_buffer(self): + def is_module_buffer(self): # type: ignore[no-untyped-def] return self.data.is_module_buffer() # type: ignore[attr-defined] def get_read_names(self) -> OrderedSet[str]: return self.data.get_read_names() - def get_reads(self): + def get_reads(self): # type: ignore[no-untyped-def] with patch.object(FlexibleLayout, "allow_indexing", True): return extract_read_writes( self.make_loader(), self.get_size(), ).reads - def unwrap_view(self): + def unwrap_view(self): # type: ignore[no-untyped-def] x: IRNode = self while isinstance(x, BaseView): x = x.data return x - def constant_to_device(self, device): + def constant_to_device(self, device): # type: ignore[no-untyped-def] """Move this to a given device. Requires that all reads are to constants.""" loader = self.make_loader() loader = patch.object(ConstantBuffer, "override_device", device)(loader) @@ -2311,7 +2314,7 @@ class ExpandView(BaseView): size: List[Expr] @staticmethod - def _normalize_size(x, new_size): + def _normalize_size(x, new_size): # type: ignore[no-untyped-def] """Replace `-1` with correct sizes""" sizevars = V.graph.sizevars new_size = list(map(sympy.expand, new_size)) @@ -2338,7 +2341,7 @@ def _normalize_size(x, new_size): return new_size @classmethod - def create(cls, x, new_size): + def create(cls, x, new_size): # type: ignore[no-untyped-def] new_size = cls._normalize_size(x, new_size) if is_storage_and_layout(x): @@ -2365,15 +2368,15 @@ def create(cls, x, new_size): return ExpandView(data=x, size=new_size) - def get_size(self): + def get_size(self): # type: ignore[no-untyped-def] return self.size - def make_reindexer(self): + def make_reindexer(self): # type: ignore[no-untyped-def] target = self.get_size() actual = self.data.get_size() skip = len(target) - len(actual) - def reindex(index): + def reindex(index): # type: ignore[no-untyped-def] index = list(index[skip:]) assert len(index) == len(actual) for i in range(len(actual)): @@ -2390,7 +2393,7 @@ class PermuteView(BaseView): dims: List[Expr] @classmethod - def create(cls, x, dims): + def create(cls, x, dims): # type: ignore[no-untyped-def] dims = cls._map_neg_dims(dims) assert OrderedSet(dims) == OrderedSet(range(len(dims))) @@ -2408,22 +2411,22 @@ def create(cls, x, dims): return PermuteView(data=x, dims=dims) @classmethod - def _map_neg_dims(cls, dims): + def _map_neg_dims(cls, dims): # type: ignore[no-untyped-def] return [dim if dim >= 0 else len(dims) + dim for dim in dims] - def get_size(self): + def get_size(self): # type: ignore[no-untyped-def] assert OrderedSet(self._map_neg_dims(self.dims)) == OrderedSet( range(len(self.dims)) ) size = self.data.get_size() return [size[i] for i in self.dims] - def make_reindexer(self): + def make_reindexer(self): # type: ignore[no-untyped-def] inv = {j: i for i, j in enumerate(self.dims)} inv = [inv[i] for i in range(len(self.dims))] assert OrderedSet(inv) == OrderedSet(range(len(self.dims))) - def reindex(index): + def reindex(index): # type: ignore[no-untyped-def] return [index[i] for i in inv] return reindex @@ -2432,7 +2435,7 @@ def reindex(index): @ir_dataclass class SqueezeView(BaseView): @classmethod - def create(cls, x, *, dim=None): + def create(cls, x, *, dim=None): # type: ignore[no-untyped-def] if is_storage_and_layout(x): storage, old_layout = as_storage_and_layout(x) new_size = [] @@ -2470,7 +2473,7 @@ def create(cls, x, *, dim=None): return View.create(x, [s for i, s in enumerate(x.get_size()) if i != dim]) @staticmethod - def squeezer(size: Tuple[sympy.Expr, ...]): + def squeezer(size: Tuple[sympy.Expr, ...]): # type: ignore[no-untyped-def] new_size = [s for s in size if s != 1] not_one = [i for i, s in enumerate(size) if s != 1] length = len(size) @@ -2484,7 +2487,7 @@ def reindex(index: List[sympy.Expr]) -> Tuple[sympy.Expr, ...]: return new_size, reindex - def __init__(self, data): + def __init__(self, data) -> None: # type: ignore[no-untyped-def] raise AssertionError("use SqueezeView.create()") @@ -2493,10 +2496,10 @@ class GenericView(BaseView): size: List[Expr] reindex: Callable[..., Any] - def make_reindexer(self): + def make_reindexer(self): # type: ignore[no-untyped-def] return self.reindex - def reindex_str(self): + def reindex_str(self) -> str: index_old = [ sympy_index_symbol_with_prefix(SymT.INDEX, n) for n in range(len(self.size)) ] @@ -2511,17 +2514,17 @@ def __str__(self) -> str: __repr__ = __str__ @classmethod - def create(cls, x, new_size, reindex): + def create(cls, x, new_size, reindex): # type: ignore[no-untyped-def] return cls(data=x, size=list(new_size), reindex=reindex) - def get_size(self): + def get_size(self): # type: ignore[no-untyped-def] return self.size @ir_dataclass class View(GenericView): @staticmethod - def handle_negative_index(idx, size): + def handle_negative_index(idx, size): # type: ignore[no-untyped-def] idx = sympy.expand(idx) size = sympy.expand(size) evaluate_expr = V.graph.sizevars.shape_env.evaluate_expr @@ -2530,7 +2533,7 @@ def handle_negative_index(idx, size): return idx @classmethod - def create(cls, x, new_size): + def create(cls, x, new_size): # type: ignore[no-untyped-def] assert isinstance(new_size, (tuple, list)) old_size, new_size = cls.resolve_negative_size(x.get_size(), new_size) @@ -2547,7 +2550,7 @@ def create(cls, x, new_size): if 0 in new_size: - def fake_reindex(index): + def fake_reindex(index): # type: ignore[no-untyped-def] return tuple([0] * len(old_size)) return cls(data=x, size=list(new_size), reindex=fake_reindex) @@ -2572,7 +2575,7 @@ def fake_reindex(index): return cls(data=x, size=list(new_size), reindex=reindex) @staticmethod - def resolve_negative_size(old_size, new_size): + def resolve_negative_size(old_size, new_size): # type: ignore[no-untyped-def] new_size = [V.graph.sizevars.simplify(x) for x in new_size] old_size = [V.graph.sizevars.simplify(x) for x in old_size] @@ -2587,7 +2590,7 @@ def resolve_negative_size(old_size, new_size): return old_size, new_size @classmethod - def dynamic_reshape_indexer(cls, old_size, new_size): + def dynamic_reshape_indexer(cls, old_size, new_size): # type: ignore[no-untyped-def] try: reindex = cls._dynamic_reshape_indexer(old_size, new_size) except (AssertionError, IndexError): @@ -2599,7 +2602,7 @@ def dynamic_reshape_indexer(cls, old_size, new_size): return reindex @staticmethod - def _dynamic_reshape_indexer(old_size, new_size): + def _dynamic_reshape_indexer(old_size, new_size): # type: ignore[no-untyped-def] """ Perform a reshape entirely by modifying indexing math """ @@ -2658,7 +2661,7 @@ def _dynamic_reshape_indexer(old_size, new_size): view_expr.reverse() assert len(view_expr) == len(old_size) - def reindex(index): + def reindex(index): # type: ignore[no-untyped-def] assert len(index) == len(vars), (len(index), len(vars)) replacements = dict(zip(vars, index)) return tuple(sympy_subs(x, replacements) for x in view_expr) @@ -2672,7 +2675,7 @@ class ReinterpretView(BaseView): layout: Layout - def __post_init__(self): + def __post_init__(self) -> None: super().__post_init__() if isinstance(self.data, BaseView): object.__setattr__(self, "data", self.data.unwrap_view()) @@ -2687,27 +2690,27 @@ def __str__(self) -> str: __repr__ = __str__ - def get_name(self): + def get_name(self): # type: ignore[no-untyped-def] return self.data.get_name() - def get_device(self): + def get_device(self): # type: ignore[no-untyped-def] return self.layout.device - def get_origin_node(self): + def get_origin_node(self): # type: ignore[no-untyped-def] return None @property - def dtype(self): + def dtype(self): # type: ignore[no-untyped-def] return self.layout.dtype - def get_size(self): + def get_size(self): # type: ignore[no-untyped-def] return list(self.layout.size) - def get_stride(self): + def get_stride(self): # type: ignore[no-untyped-def] return list(self.layout.stride) - def make_loader(self): - def loader(index): + def make_loader(self): # type: ignore[no-untyped-def] + def loader(index): # type: ignore[no-untyped-def] indexer = self.layout.make_indexer() tmp_loader = ops.load(self.get_name(), indexer(index)) if self.layout.dtype != self.data.dtype: @@ -2717,13 +2720,13 @@ def loader(index): return loader - def make_indexer(self): + def make_indexer(self): # type: ignore[no-untyped-def] return self.layout.make_indexer() - def get_layout(self): + def get_layout(self): # type: ignore[no-untyped-def] return self.layout - def freeze_layout(self): + def freeze_layout(self) -> None: pass def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: @@ -2733,7 +2736,7 @@ def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: | free_unbacked_symbols(self.layout.offset) ) - def codegen_reference(self, writer=None): + def codegen_reference(self, writer=None): # type: ignore[no-untyped-def] # reinterpret_tensor is similar to as_strided except: # - offset is added to the existing offset (rather than replacing it) # - view tracking is disabled similar to unsafe_view @@ -2746,7 +2749,7 @@ def codegen_reference(self, writer=None): dtype=self.layout.dtype, ) - def num_reads(self): + def num_reads(self) -> int: return 1 @@ -2757,7 +2760,7 @@ class DtypeView(BaseView): target_dtype: torch.dtype @classmethod - def create(cls, x, new_dtype): + def create(cls, x, new_dtype): # type: ignore[no-untyped-def] if is_storage_and_layout(x): storage, old_layout = as_storage_and_layout(x) new_layout = FixedLayout( @@ -2776,16 +2779,16 @@ def __str__(self) -> str: __repr__ = __str__ @property - def dtype(self): + def dtype(self): # type: ignore[no-untyped-def] return self.target_dtype - def get_size(self): + def get_size(self): # type: ignore[no-untyped-def] return self.data.get_size() - def make_loader(self): + def make_loader(self): # type: ignore[no-untyped-def] inner = self.data.make_loader() - def loader(idx): + def loader(idx): # type: ignore[no-untyped-def] return ops.to_dtype_bitcast(inner(idx), self.target_dtype, self.data.dtype) return loader @@ -2793,7 +2796,7 @@ def loader(idx): class SliceView(View): @classmethod - def normalize_start_end(cls, x, dim, start, end): + def normalize_start_end(cls, x, dim, start, end): # type: ignore[no-untyped-def] """ Normalize start and end such that both are in the range [0, x.get_size()[dim]] and start <= end. @@ -2803,15 +2806,15 @@ def normalize_start_end(cls, x, dim, start, end): if any(free_unbacked_symbols(x) for x in (start, end, dim_size)): - def clamp(x, lower, upper): + def clamp(x, lower, upper): # type: ignore[no-untyped-def] return sympy.Min(sympy.Max(x, lower), upper) else: - def clamp(x, lower, upper): + def clamp(x, lower, upper): # type: ignore[no-untyped-def] return sizevars.evaluate_min(sizevars.evaluate_max(x, lower), upper) - def clamp_wrap(val, lower, upper, default): + def clamp_wrap(val, lower, upper, default): # type: ignore[no-untyped-def] if val is None: return default val = cls.handle_negative_index(val, dim_size) @@ -2822,7 +2825,7 @@ def clamp_wrap(val, lower, upper, default): return start, end @classmethod - def create(cls, x, dim, start, end, step=1, clamp=True): + def create(cls, x, dim, start, end, step=1, clamp=True): # type: ignore[no-untyped-def] step = sympy.expand(step) assert isinstance(step, sympy.Expr) or step > 0 try: @@ -2856,7 +2859,7 @@ def create(cls, x, dim, start, end, step=1, clamp=True): ) return ReinterpretView(data=storage, layout=new_layout) - def reindex(index): + def reindex(index): # type: ignore[no-untyped-def] assert len(index) == len(new_size), f"wrong ndim {index} {new_size}" index = list(index) index[dim] = index[dim] * step + start @@ -2871,25 +2874,25 @@ class BaseConstant(IRNode): dtype: torch.dtype device: torch.device - def get_size(self): + def get_size(self): # type: ignore[no-untyped-def] return () - def get_device(self): + def get_device(self): # type: ignore[no-untyped-def] return self.device - def get_origin_node(self): + def get_origin_node(self): # type: ignore[no-untyped-def] return None - def mark_reuse(self, users): + def mark_reuse(self, users) -> None: # type: ignore[no-untyped-def] pass - def has_exceeded_max_reads(self): + def has_exceeded_max_reads(self) -> bool: return False - def get_reads(self): + def get_reads(self): # type: ignore[no-untyped-def] return () - def is_extern(self): + def is_extern(self) -> bool: return False @@ -2899,16 +2902,16 @@ class Constant(BaseConstant): dtype: torch.dtype device: torch.device - def make_loader(self): - def loader(index): + def make_loader(self): # type: ignore[no-untyped-def] + def loader(index): # type: ignore[no-untyped-def] return ops.constant(self.value, self.dtype) return loader - def realize(self): + def realize(self): # type: ignore[no-untyped-def] pass - def constant_to_device(self, device): + def constant_to_device(self, device): # type: ignore[no-untyped-def] return Constant(value=self.value, dtype=self.dtype, device=device) @@ -2918,13 +2921,13 @@ class IndexingConstant(BaseConstant): dtype: torch.dtype device: torch.device - def make_loader(self): - def loader(index): + def make_loader(self): # type: ignore[no-untyped-def] + def loader(index): # type: ignore[no-untyped-def] return ops.index_expr(self.index, self.dtype) return loader - def constant_to_device(self, device): + def constant_to_device(self, device): # type: ignore[no-untyped-def] return IndexingConstant(index=self.index, dtype=self.dtype, device=device) @@ -2952,7 +2955,7 @@ def __init__( size: List[Expr], stride: Optional[Sequence[Union[Expr, int]]], offset: Expr = Integer(0), - ): + ) -> None: assert stride is None or len(size) == len( stride ), f"size={size}, stride={stride}" @@ -2964,7 +2967,7 @@ def __init__( self.offset = offset @property - def stride(self): + def stride(self): # type: ignore[no-untyped-def] return self._stride def __str__(self) -> str: @@ -2978,11 +2981,11 @@ def __str__(self) -> str: __repr__ = __str__ - def is_contiguous(self): + def is_contiguous(self): # type: ignore[no-untyped-def] return is_contiguous_strides_for_shape(self.stride, self.size) @staticmethod - def is_channels_last_contiguous(shape, strides): + def is_channels_last_contiguous(shape, strides) -> bool: # type: ignore[no-untyped-def] ndim = len(shape) if ndim not in [4, 5] or shape[1] == 1: return False @@ -2993,7 +2996,7 @@ def is_channels_last_contiguous(shape, strides): return False return True - def is_transposed(self): + def is_transposed(self) -> bool: for left, right, size in zip( self.stride, reversed(FlexibleLayout.contiguous_strides(list(reversed(self.size)))), @@ -3003,7 +3006,7 @@ def is_transposed(self): return False return True - def is_stride_ordered(self, order): + def is_stride_ordered(self, order) -> bool: # type: ignore[no-untyped-def] assert len(self.stride) == len(order) # ignore dimensions of size 1, they dont affect layout @@ -3016,7 +3019,7 @@ def is_stride_ordered(self, order): stride = [self.stride[i] for i in non_1_indices] order = [order[i] for i in non_1_indices] - def sorted_indices(arr): + def sorted_indices(arr): # type: ignore[no-untyped-def] sorted_arr = sorted(arr) return [sorted_arr.index(element) for element in arr] @@ -3038,14 +3041,14 @@ def sorted_indices(arr): return False return True - def is_channels_last_stride_ordered(self): + def is_channels_last_stride_ordered(self): # type: ignore[no-untyped-def] # create channels_last order(NCHW, NCDHW, the C is the first order). order = [0] + list(reversed(range(1, len(self.stride) - 1))) order = [len(order)] + order return self.is_stride_ordered(order) @staticmethod - def _pad_strides(in_strides, size, dtype): + def _pad_strides(in_strides, size, dtype): # type: ignore[no-untyped-def] """ The padding does not change stride order but makes sure all strides larger than the threshold are multiple of align. @@ -3103,15 +3106,15 @@ def _pad_strides(in_strides, size, dtype): metrics.num_comprehensive_padding += 1 return new_strides - def pad_strides(self): + def pad_strides(self) -> None: assert isinstance(self, FlexibleLayout) assert self._stride is not None self._stride = self._pad_strides(self._stride, self.size, self.dtype) - def should_pad_strides(self): + def should_pad_strides(self): # type: ignore[no-untyped-def] return config.comprehensive_padding and isinstance(self, FlexibleLayout) - def as_fixed(self): + def as_fixed(self): # type: ignore[no-untyped-def] if isinstance(self, FixedLayout): return self @@ -3125,13 +3128,13 @@ def as_fixed(self): self.offset, ) - def make_indexer(self): + def make_indexer(self): # type: ignore[no-untyped-def] assert ( FlexibleLayout.allow_indexing ), f"convert {type(self).__name__} to FixedLayout first" return self.as_fixed().make_indexer() - def __eq__(self, other) -> bool: + def __eq__(self, other) -> bool: # type: ignore[no-untyped-def] return ( self.device == other.device and self.dtype == other.dtype @@ -3154,7 +3157,7 @@ def __init__( size: Union[List[Expr], List[int]], stride: Optional[Sequence[Union[Expr, int]]] = None, offset: Union[Expr, int] = Integer(0), - ): + ) -> None: if stride is None: stride = FlexibleLayout.contiguous_strides(size) super().__init__( @@ -3165,10 +3168,10 @@ def __init__( offset=offset, ) - def make_indexer(self): + def make_indexer(self): # type: ignore[no-untyped-def] """A closure containing math to read a given element""" - def indexer(index): + def indexer(index): # type: ignore[no-untyped-def] assert len(index) == len(self.stride) assert len(index) == len(self.size) result = self.offset @@ -3187,7 +3190,7 @@ class FlexibleLayout(Layout): # WARNING! This doesn't handle zero size tensors correctly @staticmethod - def contiguous_strides(sizes): + def contiguous_strides(sizes): # type: ignore[no-untyped-def] if len(sizes) == 0: return [] reversed_strides = [sympy.S.One] @@ -3196,7 +3199,7 @@ def contiguous_strides(sizes): return list(reversed(reversed_strides)) @staticmethod - def fill_ordered(sizes, order): + def fill_ordered(sizes, order): # type: ignore[no-untyped-def] """ Create a stride based on the order the dimensions should be filled in. @@ -3213,7 +3216,7 @@ def fill_ordered(sizes, order): return strides @staticmethod - def stride_ordered(sizes, order): + def stride_ordered(sizes, order): # type: ignore[no-untyped-def] """ Create a stride based on the sorted order of a permuted range. @@ -3225,7 +3228,7 @@ def stride_ordered(sizes, order): return FlexibleLayout.fill_ordered(sizes, fill_order) @staticmethod - def stride_ordered_for_memory_format(sizes, memory_format): + def stride_ordered_for_memory_format(sizes, memory_format): # type: ignore[no-untyped-def] """ Create a stride based on a memory format. @@ -3250,7 +3253,7 @@ def stride_ordered_for_memory_format(sizes, memory_format): raise NotImplementedError @staticmethod - def same_ordered(sizes, stride): + def same_ordered(sizes, stride): # type: ignore[no-untyped-def] """ Create a stride that has the same stride order as given stride @@ -3262,7 +3265,7 @@ def same_ordered(sizes, stride): fill_order = sorted(range(len(stride)), key=stride.__getitem__) return FlexibleLayout.fill_ordered(sizes, fill_order) - def as_stride_order(self, order, allow_padding=False): + def as_stride_order(self, order, allow_padding=False): # type: ignore[no-untyped-def] new_stride = self.stride_ordered(self.size, order) if self.should_pad_strides() and allow_padding: new_stride = self._pad_strides(new_stride, self.size, self.dtype) @@ -3275,7 +3278,7 @@ def as_stride_order(self, order, allow_padding=False): self.offset, ) - def as_exact_strides(self, exact_strides, allow_padding=False): + def as_exact_strides(self, exact_strides, allow_padding=False): # type: ignore[no-untyped-def] new_stride = exact_strides if self.should_pad_strides() and allow_padding: new_stride = self._pad_strides(new_stride, self.size, self.dtype) @@ -3288,7 +3291,7 @@ def as_exact_strides(self, exact_strides, allow_padding=False): self.offset, ) - def as_fill_order(self, order): + def as_fill_order(self, order): # type: ignore[no-untyped-def] new_stride = self.fill_ordered(self.size, order) if self.should_pad_strides(): new_stride = self._pad_strides(new_stride, self.size, self.dtype) @@ -3300,7 +3303,7 @@ def as_fill_order(self, order): self.offset, ) - def as_same_order(self, stride): + def as_same_order(self, stride): # type: ignore[no-untyped-def] new_stride = self.same_ordered(self.size, stride) if self.should_pad_strides(): new_stride = self._pad_strides(new_stride, self.size, self.dtype) @@ -3312,7 +3315,7 @@ def as_same_order(self, stride): self.offset, ) - def __init__(self, device, dtype, size, stride_order=None): + def __init__(self, device, dtype, size, stride_order=None) -> None: # type: ignore[no-untyped-def] if stride_order: strides = FlexibleLayout.fill_ordered(size, stride_order) else: @@ -3323,7 +3326,7 @@ def __init__(self, device, dtype, size, stride_order=None): class NonOwningLayout(Layout): """Is a view into the storage of another tensor""" - def __init__(self, view: Union[BaseView, TensorBox]): + def __init__(self, view: Union[BaseView, TensorBox]) -> None: layout = view.get_layout() super().__init__( layout.device, @@ -3333,10 +3336,10 @@ def __init__(self, view: Union[BaseView, TensorBox]): ) self.view = view - def make_indexer(self): + def make_indexer(self): # type: ignore[no-untyped-def] return self.as_fixed().make_indexer() - def maybe_guard_aligned(self): + def maybe_guard_aligned(self): # type: ignore[no-untyped-def] offset = self.view.get_layout().offset if offset == 0: return True @@ -3402,15 +3405,15 @@ class NoneLayout(IRNode): size: List[int] = dataclasses.field(default_factory=lambda: [0]) stride: List[int] = dataclasses.field(default_factory=lambda: [0]) - def storage_size(self): + def storage_size(self) -> int: return 0 - def as_fixed(self): + def as_fixed(self): # type: ignore[no-untyped-def] return self class MutationLayoutSHOULDREMOVE(Layout): - def __init__(self, target: IRNode): + def __init__(self, target: IRNode) -> None: super().__init__( target.get_device(), target.get_dtype(), @@ -3422,14 +3425,14 @@ def __init__(self, target: IRNode): V.graph.mark_buffer_mutated(name) @Layout.stride.getter # type: ignore[attr-defined] - def stride(self): + def stride(self): # type: ignore[no-untyped-def] return self.real_layout().stride def storage_size(self) -> sympy.Expr: return self.real_layout().storage_size() def get_buffer(self) -> Buffer: - def unwrap_views(target): + def unwrap_views(target): # type: ignore[no-untyped-def] if isinstance(target, MutationLayoutSHOULDREMOVE): return unwrap_views(target.target) if isinstance(target, BaseView): @@ -3444,11 +3447,11 @@ def unwrap_views(target): ), "MutationLayoutSHOULDREMOVE must refer to a buffer" return result - def real_layout(self): + def real_layout(self): # type: ignore[no-untyped-def] return self.get_buffer().layout @classmethod - def realize_into(cls, src, dst, unsafe_alias=False): + def realize_into(cls, src, dst, unsafe_alias=False): # type: ignore[no-untyped-def] dst.realize() # NOTE: We must realize users of `dst` before we realize `src`, since # realization order determines scheduling order. Otherwise, src's @@ -3482,10 +3485,10 @@ def realize_into(cls, src, dst, unsafe_alias=False): src.data.layout = MutationLayoutSHOULDREMOVE(dst) return src.data - def as_fixed(self): + def as_fixed(self): # type: ignore[no-untyped-def] return self - def make_indexer(self): + def make_indexer(self): # type: ignore[no-untyped-def] return self.target.make_indexer() @@ -3499,93 +3502,93 @@ class Buffer(IRNode): # Multi-output buffers will define 'outputs: List[Buffer]'. Confusingly, # MultiOutput does NOT define this! - def __post_init__(self): + def __post_init__(self) -> None: super().__post_init__() self._post_init_setattr("origin_node", None) - def make_indexer(self): + def make_indexer(self): # type: ignore[no-untyped-def] return self.layout.make_indexer() def get_name(self) -> str: assert self.name, self return self.name - def get_device(self): + def get_device(self): # type: ignore[no-untyped-def] return self.layout.device def get_defining_op(self) -> Optional[Operation]: return None @property - def dtype(self): + def dtype(self): # type: ignore[no-untyped-def] return getattr(self.layout, "dtype", None) - def get_size(self): + def get_size(self): # type: ignore[no-untyped-def] return list(self.layout.size) - def get_stride(self): + def get_stride(self): # type: ignore[no-untyped-def] return list(self.layout.stride) - def get_offset(self): + def get_offset(self): # type: ignore[no-untyped-def] return self.layout.offset - def get_layout(self): + def get_layout(self): # type: ignore[no-untyped-def] return self.layout - def get_storage_numel(self): + def get_storage_numel(self): # type: ignore[no-untyped-def] return self.get_numel() - def is_extern(self): + def is_extern(self) -> bool: return False - def freeze_layout(self): + def freeze_layout(self) -> None: if not isinstance(self.layout, (MultiOutputLayout, NonOwningLayout)): self.layout = self.layout.as_fixed() - def freeze_layout_with_stride_order(self, order, allow_padding=False): + def freeze_layout_with_stride_order(self, order, allow_padding=False) -> None: # type: ignore[no-untyped-def] assert isinstance(self.layout, FlexibleLayout) self.layout = self.layout.as_stride_order(order, allow_padding=allow_padding) - def freeze_layout_with_fill_order(self, order): + def freeze_layout_with_fill_order(self, order) -> None: # type: ignore[no-untyped-def] assert isinstance(self.layout, FlexibleLayout) self.layout = self.layout.as_fill_order(order) - def freeze_layout_with_same_order(self, stride): + def freeze_layout_with_same_order(self, stride) -> None: # type: ignore[no-untyped-def] assert isinstance(self.layout, FlexibleLayout) self.layout = self.layout.as_same_order(stride) - def freeze_layout_with_exact_strides(self, exact_strides, allow_padding=False): + def freeze_layout_with_exact_strides(self, exact_strides, allow_padding=False) -> None: # type: ignore[no-untyped-def] assert isinstance(self.layout, FlexibleLayout) self.layout = self.layout.as_exact_strides( exact_strides, allow_padding=allow_padding ) - def is_zero_elements(self): + def is_zero_elements(self): # type: ignore[no-untyped-def] return V.graph.sizevars.is_expr_static_and_true(sympy.Eq(self.get_numel(), 0)) - def make_loader(self): + def make_loader(self): # type: ignore[no-untyped-def] # Loading from a zero-element buffer is a no-op if self.is_zero_elements(): return partial(nop_loader_fn, dtype=self.get_dtype()) - def loader(index): + def loader(index): # type: ignore[no-untyped-def] indexer = self.layout.make_indexer() return ops.load(self.name, indexer(index)) return loader - def codegen_reference(self, writer=None): + def codegen_reference(self, writer=None): # type: ignore[no-untyped-def] return self.get_name() - def decide_layout(self): + def decide_layout(self) -> None: pass - def get_inputs_that_alias_output(self): + def get_inputs_that_alias_output(self): # type: ignore[no-untyped-def] if isinstance(self.layout, NonOwningLayout): return [self.layout.view.get_name()] return () - def get_mutation_names(self): + def get_mutation_names(self): # type: ignore[no-untyped-def] if isinstance(self.layout, MutationLayoutSHOULDREMOVE): return [self.layout.target.get_name()] return () @@ -3599,10 +3602,10 @@ def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: def get_unbacked_symbol_defs(self) -> OrderedSet[sympy.Symbol]: return OrderedSet() - def realize(self): + def realize(self): # type: ignore[no-untyped-def] pass - def should_allocate(self): + def should_allocate(self) -> bool: # Returns False by default. return False @@ -3616,21 +3619,21 @@ def get_outputs(self) -> List[Buffer]: def get_defining_op(self) -> Operation: return self - def __post_init__(self): + def __post_init__(self) -> None: Buffer.__post_init__(self) Operation.__post_init__(self) class InputBuffer(Buffer): - def num_reads(self): + def num_reads(self) -> int: return 1 class ConstantBuffer(InputBuffer): override_device: Optional[torch.device] = None - def make_loader(self): - def loader(index): + def make_loader(self): # type: ignore[no-untyped-def] + def loader(index): # type: ignore[no-untyped-def] indexer = self.layout.make_indexer() return ops.load( V.graph.constant_name(self.get_name(), self.override_device), @@ -3639,7 +3642,7 @@ def loader(index): return loader - def constant_to_device(self, device): + def constant_to_device(self, device): # type: ignore[no-untyped-def] return ConstantBuffer( name=V.graph.constant_name(self.get_name(), device), layout=self.layout ) @@ -3650,7 +3653,7 @@ class NoneAsConstantBuffer(IRNode): def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: return OrderedSet() - def codegen_reference(self, writer=None): + def codegen_reference(self, writer=None): # type: ignore[no-untyped-def] return V.graph.wrapper_code.none_str @@ -3661,7 +3664,7 @@ class ShapeAsConstantBuffer(IRNode): def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: return free_unbacked_symbols(self.expr) - def codegen_reference(self, writer=None): + def codegen_reference(self, writer=None): # type: ignore[no-untyped-def] return V.graph.wrapper_code.expr_printer(V.graph.sizevars.simplify(self.expr)) @@ -3669,7 +3672,7 @@ def codegen_reference(self, writer=None): class ComputedBuffer(OperationBuffer): data: Loops - def get_computed_buffer_name(self): + def get_computed_buffer_name(self): # type: ignore[no-untyped-def] """ Returns self.name if it exists, otherwise returns the name of the data node if that exists. If neither exist, returns None. @@ -3680,13 +3683,13 @@ def get_computed_buffer_name(self): return self.data.name return None - def num_reads(self): + def num_reads(self): # type: ignore[no-untyped-def] return self.data.num_reads() def get_read_names(self) -> OrderedSet[str]: return self.data.get_read_names() - def get_read_writes(self): + def get_read_writes(self): # type: ignore[no-untyped-def] with patch.object(FlexibleLayout, "allow_indexing", True): if self.data.get_reduction_type(): return extract_read_writes( @@ -3725,7 +3728,7 @@ def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: | self.data.get_unbacked_symbol_uses() ) - def make_loader(self): + def make_loader(self): # type: ignore[no-untyped-def] # Inline constants and index_expressions if ( hasattr(self.data, "make_loader") @@ -3736,7 +3739,7 @@ def make_loader(self): return self.data.make_loader() return super().make_loader() - def get_store_function(self): + def get_store_function(self): # type: ignore[no-untyped-def] indexer = self.layout.as_fixed().make_indexer() if isinstance(self.data, (Reduction, Scan, Sort)): return partial(self.data.store_reduction, self.name, indexer) @@ -3744,7 +3747,7 @@ def get_store_function(self): assert isinstance(self.data, Pointwise) return partial(self.data.store_output, self.name, indexer) - def get_fill_order(self): + def get_fill_order(self): # type: ignore[no-untyped-def] """ If our layout is still flexible, try to determine the stride order based on stride orders of reads. @@ -3783,7 +3786,7 @@ def get_fill_order(self): return None - def decide_layout(self): + def decide_layout(self) -> None: if isinstance(self.layout, FlexibleLayout): order = self.get_fill_order() if order: @@ -3792,7 +3795,7 @@ def decide_layout(self): self.freeze_layout() @cache_on_self - def get_default_sizes_body(self): + def get_default_sizes_body(self): # type: ignore[no-untyped-def] args, var_ranges = dependencies.index_vars_squeeze( self.data.get_pointwise_size(), self.data.get_reduction_size(), prefix="q" ) @@ -3818,7 +3821,7 @@ def get_default_sizes_body(self): reduce_size.append(s) return (index_size, reduce_size), body, (index_vars, reduce_vars) - def simplify_and_reorder( + def simplify_and_reorder( # type: ignore[no-untyped-def] self, extra_indexing_constraints: Optional[Tuple[Dict[Any, Any], List[Any]]] = None, recompute_sizes_body_func: Optional[Callable[..., Any]] = None, @@ -3881,7 +3884,7 @@ def simplify_and_reorder( if not V.graph.has_feature(self, BackendFeature.PREFER_STORE_LOOP_ORDER): memory_addrs.extend(body.get_read_exprs()) - def simplify_and_reorder(x_vars, support_vars, sizes, simplify_loops): + def simplify_and_reorder(x_vars, support_vars, sizes, simplify_loops): # type: ignore[no-untyped-def] sizes, reindex0, reindex1 = self._apply_loop_reordering( x_vars, support_vars, sizes, memory_addrs ) @@ -3934,7 +3937,7 @@ def simplify_and_reorder(x_vars, support_vars, sizes, simplify_loops): return (iter_ranges, reduce_ranges), body @staticmethod - def _apply_loop_reordering( + def _apply_loop_reordering( # type: ignore[no-untyped-def] index_vars, support_vars, sizes, @@ -3969,19 +3972,19 @@ def _apply_loop_reordering( sizes = [sizes[i] for i in order] return sizes, same_reorder(order), inverse_reorder(order) - def get_reduction_size(self): + def get_reduction_size(self): # type: ignore[no-untyped-def] return self.data.get_reduction_size() - def get_reduction_type(self): + def get_reduction_type(self): # type: ignore[no-untyped-def] return self.data.get_reduction_type() - def is_no_op(self): + def is_no_op(self): # type: ignore[no-untyped-def] return self.data.is_zero_elements() - def should_allocate(self): + def should_allocate(self) -> bool: return True - def constant_to_device(self, device): + def constant_to_device(self, device): # type: ignore[no-untyped-def] """Move this to a given device. Requires that all reads are to constants.""" return self.data.constant_to_device(device) @@ -3992,21 +3995,21 @@ class TemplateBuffer(OperationBuffer): that we can fuse an epilogue onto. """ - def __init__(self, layout, inputs, make_kernel_render): + def __init__(self, layout, inputs, make_kernel_render) -> None: # type: ignore[no-untyped-def] super().__init__(name=None, layout=layout) self.inputs = InputsKernel.unwrap_storage(inputs) self.make_kernel_render = make_kernel_render self.name = V.graph.register_buffer(self) V.graph.register_operation(self) - def get_read_writes(self): + def get_read_writes(self): # type: ignore[no-untyped-def] return self.extract_read_writes(normalize=True) - def extract_read_writes(self, normalize): + def extract_read_writes(self, normalize): # type: ignore[no-untyped-def] name = self.get_name() indexer = self.layout.make_indexer() - def dummy(index, rindex): + def dummy(index, rindex): # type: ignore[no-untyped-def] assert len(rindex) == 0 return ops.store(name, indexer(index), "fake") @@ -4016,19 +4019,19 @@ def dummy(index, rindex): deps.reads = OrderedSet(dependencies.StarDep(x.get_name()) for x in self.inputs) return deps - def get_reduction_size(self): + def get_reduction_size(self): # type: ignore[no-untyped-def] return 1 - def get_reduction_type(self): + def get_reduction_type(self): # type: ignore[no-untyped-def] return None - def is_no_op(self): + def is_no_op(self) -> bool: return False - def should_allocate(self): + def should_allocate(self) -> bool: return True - def simplify_and_reorder( + def simplify_and_reorder( # type: ignore[no-untyped-def] self, extra_indexing_constraints: Optional[Tuple[Dict[Any, Any], List[Any]]] = None, recompute_sizes_body_func: Optional[Callable[..., Any]] = None, @@ -4043,13 +4046,13 @@ def simplify_and_reorder( class TritonTemplateBuffer(TemplateBuffer): - def __init__( + def __init__( # type: ignore[no-untyped-def] self, layout, inputs, make_kernel_render, mutated_inputs: Optional[Iterable[IRNode]] = None, - ): + ) -> None: """ NOTE:[TritonTemplates with multiple outputs] We want the ability for TritonTemplates to output multiple tensors. Triton @@ -4104,7 +4107,7 @@ def __init__( input_nodes: List[Buffer], layout: Layout, description: str, - ): + ) -> None: super().__init__() self.name = name self.layout = layout @@ -4113,14 +4116,14 @@ def __init__( # knowing what autotuning is choosing) self.description = description - def benchmark(self, *args, out) -> float: + def benchmark(self, *args, out) -> float: # type: ignore[no-untyped-def] algo = self.to_callable() return benchmarker.benchmark(algo, args, {"out": out}) def call_name(self) -> str: raise NotImplementedError - def to_callable(self): + def to_callable(self): # type: ignore[no-untyped-def] raise NotImplementedError def hash_key(self) -> str: @@ -4157,7 +4160,7 @@ def __init__( inputs: List[IRNode], choice_timings: Callable[[], Dict[ChoiceCaller, float]], unfiltered_choices: List[ChoiceCaller], - ): + ) -> None: super().__init__(layout=layout, inputs=inputs, make_kernel_render=None) self._choice_timings_fn = choice_timings self._choice_timings: Optional[Dict[ChoiceCaller, float]] = None @@ -4185,7 +4188,7 @@ def choice_timings(self) -> Dict[ChoiceCaller, float]: return self._choice_timings @contextlib.contextmanager - def swap_as_triton_caller(self, caller: TritonTemplateCallerBase): + def swap_as_triton_caller(self, caller: TritonTemplateCallerBase): # type: ignore[no-untyped-def] assert isinstance(caller, torch._inductor.select_algorithm.TritonTemplateCaller) assert self.layout == caller.layout @@ -4196,7 +4199,7 @@ def swap_as_triton_caller(self, caller: TritonTemplateCallerBase): finally: self.make_kernel_render = render - def finalize_as_triton_caller(self, caller: TritonTemplateCallerBase): + def finalize_as_triton_caller(self, caller: TritonTemplateCallerBase) -> None: assert isinstance(caller, torch._inductor.select_algorithm.TritonTemplateCaller) assert self.layout.size == caller.layout.size assert self.layout.stride == caller.layout.stride @@ -4208,25 +4211,25 @@ def get_min_choice(self) -> Tuple[ChoiceCaller, float]: class CUDATemplateBuffer(TemplateBuffer): - def __init__( + def __init__( # type: ignore[no-untyped-def] self, layout, inputs, make_kernel_render, workspace_size: int, template: CUDATemplate, # type: ignore[name-defined] # noqa: F821 - ): + ) -> None: super().__init__(layout, inputs, make_kernel_render) # Global memory (in bytes) needed for this template. self.workspace_size = workspace_size self.template = template - def get_workspace_size(self): + def get_workspace_size(self): # type: ignore[no-untyped-def] return self.workspace_size if self.workspace_size is not None else 0 class CppTemplateBuffer(TemplateBuffer): - def __init__(self, layout, inputs, make_kernel_render, template, choice): + def __init__(self, layout, inputs, make_kernel_render, template, choice) -> None: # type: ignore[no-untyped-def] super().__init__(layout, inputs, make_kernel_render) self.template = template self.choice = choice @@ -4236,7 +4239,7 @@ def __init__(self, layout, inputs, make_kernel_render, template, choice): class InputsKernel(OperationBuffer): inputs: List[Buffer] - def get_read_writes(self): + def get_read_writes(self): # type: ignore[no-untyped-def] reads: OrderedSet[dependencies.Dep] = OrderedSet() StarDep = dependencies.StarDep for input in self.inputs: @@ -4259,7 +4262,7 @@ def get_read_writes(self): ) @classmethod - def unwrap_storage_for_input(cls, x): + def unwrap_storage_for_input(cls, x): # type: ignore[no-untyped-def] if isinstance(x, TensorBox): x = x.data if isinstance(x, StorageBox): @@ -4278,7 +4281,7 @@ def unwrap_storage_for_input(cls, x): return x @staticmethod - def unwrap_storage(inputs): + def unwrap_storage(inputs): # type: ignore[no-untyped-def] inputs_new = [] for x in inputs: if isinstance(x, list): @@ -4288,15 +4291,15 @@ def unwrap_storage(inputs): inputs_new.append(x) return inputs_new - def is_extern(self): + def is_extern(self) -> bool: return True - def num_reads(self): + def num_reads(self) -> int: return 1 class NopKernel(InputsKernel): - def is_no_op(self): + def is_no_op(self) -> bool: return True @@ -4307,7 +4310,7 @@ class ConcatKernel(NopKernel): """ @classmethod - def create(cls, inputs, dim): + def create(cls, inputs, dim): # type: ignore[no-untyped-def] device = inputs[0].get_device() dtype = inputs[0].get_dtype() new_size = list(inputs[0].get_size()) @@ -4398,7 +4401,7 @@ def create(cls, inputs, dim): return kernel @classmethod - def can_realize_into_without_copy(cls, src, dst=None): + def can_realize_into_without_copy(cls, src, dst=None): # type: ignore[no-untyped-def] if isinstance(src, TensorBox): # unwrap a TensorBox return cls.can_realize_into_without_copy(src.data, dst) @@ -4429,7 +4432,7 @@ def can_realize_into_without_copy(cls, src, dst=None): ) @classmethod - def realize_into(cls, src, dst): + def realize_into(cls, src, dst): # type: ignore[no-untyped-def] # Attempt to turn this into a ReinterpretView rather than assert. # This has concessions around layout, as as_storage_and_layout # can cause us to go from flexible to fixed layout. @@ -4461,7 +4464,7 @@ def realize_into(cls, src, dst): ) return cls.realize_into(pw, dst) - def should_allocate(self): + def should_allocate(self) -> bool: return True @@ -4487,7 +4490,7 @@ class ExternKernel(InputsKernel): ) mutation_outputs: List[MutationOutput] = dataclasses.field(default_factory=list) - def __init__( + def __init__( # type: ignore[no-untyped-def] self, name, layout, @@ -4499,7 +4502,7 @@ def __init__( cpp_kernel_name=None, ordered_kwargs_for_cpp_kernel=(), op_overload=None, - ): + ) -> None: super().__init__( name=name, layout=layout, @@ -4523,7 +4526,7 @@ def get_outputs(self) -> List[Buffer]: def get_unbacked_symbol_defs(self) -> OrderedSet[sympy.Symbol]: return OrderedSet() - def collect_arg_kwarg_properties(self): + def collect_arg_kwarg_properties(self) -> None: # if self.op_overload is torch._ops.OpOverload, we can use its schema to collect additional # information for args and kwargs, e.g. type and default value, to help with the cpp wrapper codegen self.arg_properties = ( @@ -4558,20 +4561,20 @@ def collect_arg_kwarg_properties(self): x for x in self.op_overload._schema.arguments if x.kwarg_only ] - def decide_layout(self): + def decide_layout(self) -> None: if isinstance(self.layout, FlexibleLayout): self.apply_constraint() self.freeze_layout() - def codegen_comment(self, wrapper): + def codegen_comment(self, wrapper) -> None: # type: ignore[no-untyped-def] origin_str, detailed_origin_str = get_kernel_metadata(self, wrapper) if origin_str: wrapper.writeline(origin_str) - def codegen(self, wrapper): + def codegen(self, wrapper): # type: ignore[no-untyped-def] raise NotImplementedError - def set_cpp_kernel_name(self, cpp_kernel_name: Optional[str] = None): + def set_cpp_kernel_name(self, cpp_kernel_name: Optional[str] = None) -> None: self.cpp_kernel_name = cpp_kernel_name self.cpp_kernel_overload_name = None self.cpp_kernel_key = None @@ -4611,7 +4614,7 @@ def set_cpp_kernel_name(self, cpp_kernel_name: Optional[str] = None): except Exception: self.cpp_op_schema = "" - def set_python_kernel_name(self, python_kernel_name: Optional[str]): + def set_python_kernel_name(self, python_kernel_name: Optional[str]) -> None: self.python_kernel_name = python_kernel_name if python_kernel_name is not None: return @@ -4626,7 +4629,7 @@ def set_python_kernel_name(self, python_kernel_name: Optional[str]): f"{kernel.__module__.replace('._ops.', '.ops.')}.{kernel.__name__}" ) - def get_kernel_name(self): + def get_kernel_name(self): # type: ignore[no-untyped-def] return ( V.graph.wrapper_code.get_c_shim_func_name(self.cpp_kernel_name) # type: ignore[attr-defined] if V.graph.cpp_wrapper @@ -4634,7 +4637,7 @@ def get_kernel_name(self): ) @staticmethod - def copy_input(x): + def copy_input(x): # type: ignore[no-untyped-def] pw = Pointwise.create( device=x.get_device(), dtype=x.get_dtype(), @@ -4647,7 +4650,7 @@ def copy_input(x): return pw @classmethod - def process_kernel( + def process_kernel( # type: ignore[no-untyped-def] cls, kernel, *args, **kwargs ) -> Tuple[ Any, @@ -4672,7 +4675,7 @@ def process_kernel( arg = V.graph.sizevars.shape_env.create_symintnode(arg, hint=None) non_tensor_args.append(arg) - def unflatten_args(new_tensor_args, new_non_tensor_args): + def unflatten_args(new_tensor_args, new_non_tensor_args): # type: ignore[no-untyped-def] result = [] it_tensors = iter(new_tensor_args) it_non_tensors = iter(new_non_tensor_args) @@ -4744,7 +4747,7 @@ def unflatten_args(new_tensor_args, new_non_tensor_args): ) @classmethod - def convert_to_reinterpret_view(cls, x): + def convert_to_reinterpret_view(cls, x): # type: ignore[no-untyped-def] """ In order to pass this to an extern kernel we need a ReinterpretView not a View. This allows us to avoid some @@ -4812,7 +4815,7 @@ def convert_to_reinterpret_view(cls, x): ) @classmethod - def realize_input(cls, x): + def realize_input(cls, x): # type: ignore[no-untyped-def] if x is None: return NoneAsConstantBuffer() if isinstance(x, (sympy.Expr, sympy.logic.boolalg.Boolean, int)): @@ -4845,7 +4848,7 @@ def realize_input(cls, x): return cls.copy_input(x) @classmethod - def require_stride1(cls, x): + def require_stride1(cls, x): # type: ignore[no-untyped-def] if is_storage_and_layout(x): if len(x.get_stride()) == 0: return x @@ -4855,7 +4858,7 @@ def require_stride1(cls, x): return cls.copy_input(x) @classmethod - def require_strides( + def require_strides( # type: ignore[no-untyped-def] # type: ignore[no-untyped-def] cls, x, order: Optional[Sequence[int]] = None, @@ -4980,31 +4983,31 @@ def require_strides( return x @classmethod - def require_exact_strides(cls, x, exact_strides, allow_padding=False): + def require_exact_strides(cls, x, exact_strides, allow_padding=False): # type: ignore[no-untyped-def] return cls.require_strides( x, exact_strides=exact_strides, allow_padding=allow_padding ) @classmethod - def require_stride_order(cls, x, order, allow_padding=False): + def require_stride_order(cls, x, order, allow_padding=False): # type: ignore[no-untyped-def] return cls.require_strides(x, order=order, allow_padding=allow_padding) @classmethod - def require_channels_last(cls, x): + def require_channels_last(cls, x): # type: ignore[no-untyped-def] return cls.require_stride_order(x, NHWC_STRIDE_ORDER) @classmethod - def require_channels_last_3d(cls, x): + def require_channels_last_3d(cls, x): # type: ignore[no-untyped-def] return cls.require_stride_order(x, NHWDC_STRIDE_ORDER) @classmethod - def require_contiguous(cls, x): + def require_contiguous(cls, x): # type: ignore[no-untyped-def] return cls.require_stride_order(x, list(reversed(range(len(x.get_size()))))) - def apply_constraint(self): + def apply_constraint(self) -> None: pass - def fill_non_provided_args(self, args, kwargs): + def fill_non_provided_args(self, args, kwargs): # type: ignore[no-untyped-def] # Previously, we want to maintain forward-compatibility by skipping # default args in the serialized artifacts in fbcode. However, # some of our shim interfaces require default values being OrderedSet. @@ -5038,7 +5041,7 @@ def fill_non_provided_args(self, args, kwargs): ) return args - def codegen_const_args(self, names: Optional[List[str]] = None): + def codegen_const_args(self, names: Optional[List[str]] = None): # type: ignore[no-untyped-def] if V.graph.cpp_wrapper: result = [] # Aten ops follow the convention that tensor args are before non-tensor args, @@ -5070,7 +5073,7 @@ def codegen_const_args(self, names: Optional[List[str]] = None): else: return map(V.graph.wrapper_code.val_to_arg_str, self.constant_args) - def codegen_args(self): + def codegen_args(self): # type: ignore[no-untyped-def] if V.graph.cpp_wrapper and self.op_overload is not None: # cpp wrapper needs special logic to fill in missing args with default values inputs = self.fill_non_provided_args( @@ -5096,7 +5099,7 @@ def codegen_args(self): args.extend(self.codegen_const_args()) return args - def get_kwargs_value(self, arg_name): + def get_kwargs_value(self, arg_name): # type: ignore[no-untyped-def] if arg_name in self.kwargs: return self.kwargs.get(arg_name) if self.allarg_properties and self.allarg_properties.get(arg_name): @@ -5104,7 +5107,7 @@ def get_kwargs_value(self, arg_name): else: raise AssertionError(f"{arg_name} not in self.allarg_properties") - def codegen_kwargs(self, skip_out=False): + def codegen_kwargs(self, skip_out=False): # type: ignore[no-untyped-def] if V.graph.cpp_wrapper: if self.op_overload is not None and len(self.schema_kwargs) == 0: # All the args should have been generated by fill_non_provided_args in codegen_args @@ -5133,7 +5136,7 @@ def codegen_kwargs(self, skip_out=False): ] return kwargs - def codegen_size_asserts(self, wrapper): + def codegen_size_asserts(self, wrapper) -> None: # type: ignore[no-untyped-def] if config.size_asserts and not V.graph.cpp_wrapper: # comparing strides for 0 size tensor is tricky. Ignore them for now. if sympy_product(self.get_size()) == 0: @@ -5144,7 +5147,7 @@ def codegen_size_asserts(self, wrapper): f"assert_size_stride({self.get_name()}, {size}, {stride})" ) - def get_group_stride(self): + def get_group_stride(self): # type: ignore[no-untyped-def] """ get output sizes and strides, for template_codegen """ @@ -5153,7 +5156,7 @@ def get_group_stride(self): # iter_ranges = _size of output tensor, reduce_range = [] because no reduction return [_size, []], _stride - def canonicalize(self): + def canonicalize(self): # type: ignore[no-untyped-def] """ Manually get canonicalization of the output index """ @@ -5211,7 +5214,7 @@ def __str__(self) -> str: @ir_dataclass(frozen=False) class ExternKernelOut(ExternKernel): - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] self.codegen_comment(wrapper) args = [*self.codegen_args(), *self.codegen_kwargs(skip_out=True)] kernel_name = self.get_kernel_name() @@ -5230,7 +5233,7 @@ def codegen(self, wrapper): args, ) - def __init__( + def __init__( # type: ignore[no-untyped-def] self, layout, inputs, @@ -5241,7 +5244,7 @@ def __init__( cpp_kernel_name=None, ordered_kwargs_for_cpp_kernel=(), op_overload=None, - ): + ) -> None: super().__init__( None, layout, @@ -5257,12 +5260,12 @@ def __init__( self.name = V.graph.register_buffer(self) V.graph.register_operation(self) - def should_allocate(self): + def should_allocate(self) -> bool: return True class RandomSeeds(ExternKernelOut): - def __init__(self, count: int, device: torch.device): + def __init__(self, count: int, device: torch.device) -> None: limits = torch.iinfo(torch.int64) super().__init__( layout=FixedLayout( @@ -5282,14 +5285,14 @@ def __init__(self, count: int, device: torch.device): class ExternKernelAlloc(ExternKernel): - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] self.codegen_comment(wrapper) args = [*self.codegen_args(), *self.codegen_kwargs()] V.graph.wrapper_code.generate_extern_kernel_alloc(self, args) if isinstance(self.layout, Layout): self.codegen_size_asserts(wrapper) - def __init__( + def __init__( # type: ignore[no-untyped-def] self, layout, inputs, @@ -5299,7 +5302,7 @@ def __init__( cpp_kernel_name=None, ordered_kwargs_for_cpp_kernel=(), op_overload=None, - ): + ) -> None: super().__init__( None, layout, @@ -5319,10 +5322,10 @@ def __init__( self.name = V.graph.register_buffer(self) V.graph.register_operation(self) - def should_allocate(self): + def should_allocate(self) -> bool: return False - def apply_constraint(self): + def apply_constraint(self): # type: ignore[no-untyped-def] raise NotImplementedError @@ -5331,7 +5334,7 @@ class MutationOutput(Buffer): An output buffer that represents the mutation of a pre-existing buffer """ - def __init__(self, layout, mutated_node, mutating_node: Operation): + def __init__(self, layout, mutated_node, mutating_node: Operation) -> None: # type: ignore[no-untyped-def] super().__init__(name=None, layout=layout) mutated_node_name = mutated_node.get_name() V.graph.mark_buffer_mutated(mutated_node_name) @@ -5342,10 +5345,10 @@ def __init__(self, layout, mutated_node, mutating_node: Operation): def get_defining_op(self) -> Operation: return self.mutating_node - def get_mutation_names(self): + def get_mutation_names(self): # type: ignore[no-untyped-def] return self.mutation_names - def should_allocate(self): + def should_allocate(self) -> bool: return False @@ -5362,7 +5365,7 @@ class TMADescriptor(ExternKernel): _CACHE: Dict[Any, TMADescriptor] = {} @classmethod - def create( + def create( # type: ignore[no-untyped-def] cls, tensor: TensorBox, dims: List[Union[int, torch.SymInt]], @@ -5380,7 +5383,7 @@ def __init__( dims: List[Union[int, torch.SymInt]], block_dims: List[Union[int, torch.SymInt]], element_size: Optional[int] = None, - ): + ) -> None: assert len(dims) in (1, 2) assert len(dims) == len(block_dims) @@ -5419,12 +5422,12 @@ def __init__( self.name = V.graph.register_buffer(self) V.graph.register_operation(self) - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] wrapper.generate_tma_descriptor(self) class UserDefinedTritonKernel(ExternKernel): - def get_kernel_and_configs(self): + def get_kernel_and_configs(self): # type: ignore[no-untyped-def] from triton.runtime.autotuner import Autotuner from torch._higher_order_ops.triton_kernel_wrap import kernel_side_table @@ -5436,7 +5439,7 @@ def get_kernel_and_configs(self): kernel = kernel.fn return kernel, configs - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] kernel, configs = self.get_kernel_and_configs() # Definition of kernel @@ -5520,7 +5523,7 @@ def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: def get_unbacked_symbol_defs(self) -> OrderedSet[sympy.Symbol]: return OrderedSet() - def __init__(self, *, kernel_idx, grid, tma_descriptor_metadata, kernel_args): + def __init__(self, *, kernel_idx, grid, tma_descriptor_metadata, kernel_args) -> None: # type: ignore[no-untyped-def] inputs = [] kwargs = {} constant_args = [] @@ -5583,7 +5586,7 @@ class InplaceBernoulliFallback(ExternKernel): This needs to be a custom class to handle mutation properly """ - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] (x,) = (t.codegen_reference() for t in self.inputs) if V.graph.cpp_wrapper: @@ -5597,16 +5600,16 @@ def codegen(self, wrapper): f"{self.get_kernel_name()}({x}, {', '.join(map(repr, self.constant_args))}){wrapper.ending}" ) - def should_allocate(self): + def should_allocate(self) -> bool: return False - def get_mutation_names(self): + def get_mutation_names(self): # type: ignore[no-untyped-def] return [self.inputs[0].get_name()] def get_unbacked_symbol_defs(self) -> OrderedSet[sympy.Symbol]: return OrderedSet() - def __init__(self, op_overload, x, *constant_args): + def __init__(self, op_overload, x, *constant_args) -> None: # type: ignore[no-untyped-def] super().__init__( None, NoneLayout(device=x.get_device()), @@ -5625,25 +5628,25 @@ class InplaceCopyFallback(ExternKernel): This needs to be a custom class to handle mutation properly """ - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] (dst, src, non_blocking) = self.codegen_args() wrapper.codegen_device_copy(src, dst, non_blocking) - def should_allocate(self): + def should_allocate(self) -> bool: return False - def get_mutation_names(self): + def get_mutation_names(self): # type: ignore[no-untyped-def] return [self.inputs[0].get_name()] def get_unbacked_symbol_defs(self) -> OrderedSet[sympy.Symbol]: return OrderedSet() - def __init__( + def __init__( # type: ignore[no-untyped-def] self, layout, inputs, constant_args, - ): + ) -> None: super().__init__( None, layout, @@ -5657,7 +5660,7 @@ def __init__( V.graph.register_operation(self) @classmethod - def create(cls, dst, src, non_blocking: bool = False): + def create(cls, dst, src, non_blocking: bool = False): # type: ignore[no-untyped-def] # type: ignore[no-untyped-def] inputs = [cls.realize_input(t) for t in [dst, src]] constant_args = (non_blocking,) result = InplaceCopyFallback( @@ -5673,7 +5676,7 @@ class MutatingFirstArgExternKernel(ExternKernel): This needs to be a custom class to handle mutation properly """ - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] argrefs = [ *(t.codegen_reference() for t in self.inputs), *map(repr, self.constant_args), @@ -5682,21 +5685,21 @@ def codegen(self, wrapper): f"{self.get_kernel_name()}({', '.join(argrefs)}){wrapper.ending}" ) - def should_allocate(self): + def should_allocate(self) -> bool: return False - def get_mutation_names(self): + def get_mutation_names(self): # type: ignore[no-untyped-def] return [self.inputs[0].get_name()] def get_unbacked_symbol_defs(self) -> OrderedSet[sympy.Symbol]: return OrderedSet() - def has_side_effects(self): + def has_side_effects(self) -> bool: return True class ResizeStorageBytes(MutatingFirstArgExternKernel): - def __init__(self, variable, new_size): + def __init__(self, variable, new_size) -> None: # type: ignore[no-untyped-def] assert isinstance(new_size, int), "TODO: dynamic shapes" super().__init__( None, @@ -5713,7 +5716,7 @@ def __init__(self, variable, new_size): class SetSourceTensorKernel(ExternKernelAlloc): - def __init__(self, self_tensor, storage_tensor): + def __init__(self, self_tensor, storage_tensor) -> None: # type: ignore[no-untyped-def] storage_tensor.freeze_layout() super().__init__( storage_tensor.get_layout(), @@ -5730,7 +5733,7 @@ def __init__(self, self_tensor, storage_tensor): MutationOutput(NoneLayout(device=device), storage_tensor, self), ] - def get_inputs_that_alias_output(self): + def get_inputs_that_alias_output(self): # type: ignore[no-untyped-def] return [self.inputs[0].get_name(), self.inputs[1].get_name()] @@ -5741,7 +5744,7 @@ class ScatterFallback(ExternKernel): It also handle the case `src` being a scalar properly. """ - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] reduce = self.kwargs["reduce"] if V.graph.cpp_wrapper: # Follow aten/src/ATen/native/ReductionType.h:get_operator_enum @@ -5764,16 +5767,16 @@ def codegen(self, wrapper): self.codegen_kwargs(), ) - def should_allocate(self): + def should_allocate(self) -> bool: return False - def get_mutation_names(self): + def get_mutation_names(self): # type: ignore[no-untyped-def] return [self.inputs[0].get_name()] def get_unbacked_symbol_defs(self) -> OrderedSet[sympy.Symbol]: return OrderedSet() - def __init__( + def __init__( # type: ignore[no-untyped-def] self, op_overload, x, @@ -5783,7 +5786,7 @@ def __init__( *, reduce: Optional[str] = None, include_self: bool = True, - ): + ) -> None: self.src_is_tensor = isinstance(src, TensorBox) constant_args: Tuple[Any, ...] @@ -5814,7 +5817,7 @@ class IndexPutFallback(ExternKernel): This needs to be a custom class to handle mutation and indices properly """ - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] (x, values, *valid_indices) = (t.codegen_reference() for t in self.inputs) indices = [] iter_valid_indices = iter(valid_indices) @@ -5828,16 +5831,16 @@ def codegen(self, wrapper): self.get_kernel_name(), x, indices, values, *self.codegen_const_args() ) - def should_allocate(self): + def should_allocate(self) -> bool: return False - def get_mutation_names(self): + def get_mutation_names(self): # type: ignore[no-untyped-def] return [self.inputs[0].get_name()] def get_unbacked_symbol_defs(self) -> OrderedSet[sympy.Symbol]: return OrderedSet() - def __init__(self, op_overload, x, indices, values, accumulate): + def __init__(self, op_overload, x, indices, values, accumulate) -> None: # type: ignore[no-untyped-def] self.indices = indices valid_indices = [i for i in indices if i is not None] tensors = [self.realize_input(x) for x in [x, values, *valid_indices]] @@ -5858,7 +5861,7 @@ def __init__(self, op_overload, x, indices, values, accumulate): class DeviceCopy(ExternKernelOut): @classmethod - def create(cls, x, device, non_blocking): + def create(cls, x, device, non_blocking): # type: ignore[no-untyped-def] if ( not x.is_extern() and all(r in V.graph.constants for r in x.get_read_names()) @@ -5881,7 +5884,7 @@ def create(cls, x, device, non_blocking): constant_args, ) - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] args = self.codegen_args() assert len(args) == 2 if self.output_view: @@ -5897,13 +5900,13 @@ class DynamicScalar(ExternKernel): The result of a call to aten._local_scalar_dense. """ - def get_reads(self): + def get_reads(self): # type: ignore[no-untyped-def] return () - def should_allocate(self): + def should_allocate(self) -> bool: return False - def __init__(self, sym, keypath, data): + def __init__(self, sym, keypath, data) -> None: # type: ignore[no-untyped-def] data.realize() super().__init__( None, NoneLayout(device=torch.device("cpu")), self.unwrap_storage([data]) @@ -5914,7 +5917,7 @@ def __init__(self, sym, keypath, data): def get_unbacked_symbol_defs(self) -> OrderedSet[sympy.Symbol]: return OrderedSet([self.sym]) - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] wrapper.codegen_dynamic_scalar(self) @@ -5923,13 +5926,13 @@ class AssertScalar(ExternKernel): The result of a call to aten._assert_scalar """ - def get_reads(self): + def get_reads(self): # type: ignore[no-untyped-def] return () - def should_allocate(self): + def should_allocate(self) -> bool: return False - def __init__(self, scalar, msg): + def __init__(self, scalar, msg) -> None: # type: ignore[no-untyped-def] super().__init__( # Buffer(name, layotu) None, @@ -5940,13 +5943,13 @@ def __init__(self, scalar, msg): self.scalar = scalar self.msg = msg - def has_side_effects(self): + def has_side_effects(self) -> bool: return True - def get_unbacked_symbol_uses(self): + def get_unbacked_symbol_uses(self): # type: ignore[no-untyped-def] return free_unbacked_symbols(self.scalar) - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] if V.graph.cpp_wrapper: pass else: @@ -5973,7 +5976,7 @@ class ExternKernelNode: class FallbackKernel(ExternKernelAlloc): - def __init__( + def __init__( # type: ignore[no-untyped-def] self, layout, kernel, @@ -5983,7 +5986,7 @@ def __init__( kwargs=None, *, unbacked_bindings=None, - ): + ) -> None: if ( kernel == aten.mul.Tensor and len(tensor_args) == 1 @@ -6060,7 +6063,7 @@ def __init__( schema_args = schema.arguments args, kwargs = self.unflatten_args(self.inputs, self.constant_args) - def handle_aliasing_and_mutation(info, arg): + def handle_aliasing_and_mutation(info, arg) -> None: # type: ignore[no-untyped-def] # Assertions to make sure we didn't mismatch args if isinstance(info.type, torch.ListType): assert isinstance(arg, (list, tuple)) @@ -6080,7 +6083,7 @@ def handle_aliasing_and_mutation(info, arg): if info.alias_info is None: return - def add_alias(t): + def add_alias(t) -> None: # type: ignore[no-untyped-def] self.alias_names.append(t.get_name()) if info.alias_info.is_write: self.mutation_outputs.append( @@ -6097,7 +6100,7 @@ def add_alias(t): for info, arg in torch._library.utils.zip_schema(schema, args, kwargs): handle_aliasing_and_mutation(info, arg) - def codegen_unbacked_symbol_defs(self, wrapper): + def codegen_unbacked_symbol_defs(self, wrapper) -> None: # type: ignore[no-untyped-def] if not hasattr(self, "unbacked_bindings"): return @@ -6110,7 +6113,7 @@ def codegen_unbacked_symbol_defs(self, wrapper): for s, keypath in unbacked_bindings.items(): - def go(expr, keypath): + def go(expr, keypath): # type: ignore[no-untyped-def] if keypath == (): return expr @@ -6137,7 +6140,7 @@ def go(expr, keypath): else: raise AssertionError(f"unrecognized keypath {keypath}") - def go_outer(): + def go_outer(): # type: ignore[no-untyped-def] if V.graph.cpp_wrapper: # Special handling for the top level buffer access, # because self.get_name() is actually never bound; the @@ -6165,7 +6168,7 @@ def get_unbacked_symbol_defs(self) -> OrderedSet[sympy.Symbol]: else: return OrderedSet() - def codegen_args(self): + def codegen_args(self): # type: ignore[no-untyped-def] @dataclasses.dataclass class Shim: ref: Any @@ -6189,7 +6192,7 @@ def __repr__(self) -> str: return args @staticmethod - def find_device(tensor_args, example_output): + def find_device(tensor_args, example_output): # type: ignore[no-untyped-def] if tensor_args: devices = [arg.get_device() for arg in tensor_args if arg.get_device()] return devices[0] @@ -6209,15 +6212,15 @@ def find_device(tensor_args, example_output): return devices[0] return None - def has_side_effects(self): + def has_side_effects(self): # type: ignore[no-untyped-def] if isinstance(self.op_overload, torch._ops.HigherOrderOperator): return False return get_schema_info(self.op_overload).is_mutable() - def get_inputs_that_alias_output(self): + def get_inputs_that_alias_output(self): # type: ignore[no-untyped-def] return self.alias_names - def get_mutation_names(self): + def get_mutation_names(self): # type: ignore[no-untyped-def] assert len(self.mutation_names) <= 1 return self.mutation_names @@ -6227,7 +6230,7 @@ def get_mutation_names(self): # This is currently only implemented for fbcode. Eventually, we will also make this work for OSS. # Detailed design doc can be found at # https://docs.google.com/document/d/1wC4DOZFaYym2t1Esz0X5yxlLI3RDnSiyRbUus3bkJ64/edit?usp=sharing - def export_extern_kernel_node(self): + def export_extern_kernel_node(self): # type: ignore[no-untyped-def] assert isinstance(self, FallbackKernel) args, kwargs = self.unflatten_args(self.inputs, self.constant_args) args = self.fill_non_provided_args(args, kwargs) @@ -6242,7 +6245,7 @@ def export_extern_kernel_node(self): named_arguments = serializer.serialize_inputs(self.op_overload, args, kwargs) # serialize_outputs - def handle_single_output(return_type, output): + def handle_single_output(return_type, output): # type: ignore[no-untyped-def] if isinstance(return_type, torch.TensorType): # For single Tensor out = output @@ -6301,7 +6304,7 @@ def handle_single_output(return_type, output): return [*args, *ordered_kwargs] - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] kernel = self.op_overload if kernel.namespace == "aten": # type: ignore[union-attr] # Aten Fallback Ops @@ -6354,7 +6357,7 @@ def codegen(self, wrapper): self.codegen_unbacked_symbol_defs(wrapper) @staticmethod - def tensor_to_layout(output: torch.Tensor): + def tensor_to_layout(output: torch.Tensor): # type: ignore[no-untyped-def] return FixedLayout( output.device, output.dtype, @@ -6363,7 +6366,7 @@ def tensor_to_layout(output: torch.Tensor): ) @classmethod - def create(cls, kernel, *args, **kwargs): + def create(cls, kernel, *args, **kwargs): # type: ignore[no-untyped-def] fake_incorrect_kernels = (aten._fused_moving_avg_obs_fq_helper_functional,) context: ContextManager[None] = ( V.graph.fake_mode if kernel not in fake_incorrect_kernels else nullcontext() # type: ignore[assignment] @@ -6399,7 +6402,7 @@ def create(cls, kernel, *args, **kwargs): unbacked_bindings=unbacked_bindings, ) - def generate_output(output, indices): + def generate_output(output, indices): # type: ignore[no-untyped-def] if isinstance(output, (list, tuple)): return type(output)( generate_output(output[i], indices + [(type(output), i)]) @@ -6433,7 +6436,7 @@ def generate_output(output, indices): packed.outputs = [outputs] return outputs - def apply_constraint(self): + def apply_constraint(self): # type: ignore[no-untyped-def] return super().apply_constraint() @@ -6441,14 +6444,14 @@ def apply_constraint(self): class ComplexView(FallbackKernel): """View a complex number as two dtyped numbers or vice versa""" - def should_allocate(self): + def should_allocate(self) -> bool: return False - def get_inputs_that_alias_output(self): + def get_inputs_that_alias_output(self): # type: ignore[no-untyped-def] # Signal to codegen that our output buffer isn't safe to reuse return [self.inputs[0].get_name()] - def __init__( + def __init__( # type: ignore[no-untyped-def] self, layout, kernel, @@ -6457,7 +6460,7 @@ def __init__( unflatten_args, *, unbacked_bindings=None, - ): + ) -> None: super().__init__( layout, kernel, @@ -6477,7 +6480,7 @@ class MultiOutput(ExternKernel): # Given an input MultiOutputLayout buffer, indexes out an actual buffer # from that result. This doesn't actually produce multiple outputs, # that's MultiOutputLayout! - def codegen_list_tuple_access(self, basename, indices): + def codegen_list_tuple_access(self, basename, indices): # type: ignore[no-untyped-def] if len(indices) > 0: itype, i = indices[0] if issubclass(itype, list): @@ -6495,13 +6498,13 @@ def codegen_list_tuple_access(self, basename, indices): else: return basename - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] wrapper.codegen_multi_output( self.get_name(), self.codegen_list_tuple_access(self.inputs[0].get_name(), self.indices), ) - def __init__(self, layout, input, indices: List[Tuple[Any, ...]]): + def __init__(self, layout, input, indices: List[Tuple[Any, ...]]) -> None: # type: ignore[no-untyped-def] super().__init__(None, layout, [input], ()) self.name = V.graph.register_buffer(self) V.graph.register_operation(self) @@ -6510,10 +6513,10 @@ def __init__(self, layout, input, indices: List[Tuple[Any, ...]]): def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: return self.inputs[0].get_unbacked_symbol_uses() - def should_allocate(self): + def should_allocate(self) -> bool: return False - def get_inputs_that_alias_output(self): + def get_inputs_that_alias_output(self): # type: ignore[no-untyped-def] return [ inp.get_name() for inp in self.inputs @@ -6532,13 +6535,13 @@ class MutableBox(IRNode): data: IRNode - def __getattr__(self, name): + def __getattr__(self, name): # type: ignore[no-untyped-def] fn = getattr(self.data, name) if callable(fn): return fn raise AttributeError(f"{type(self.data).__name__}.{name} not callable") - def realize(self): + def realize(self): # type: ignore[no-untyped-def] return self.data.realize() def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: @@ -6547,24 +6550,24 @@ def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: def get_read_names(self) -> OrderedSet[str]: return self.data.get_read_names() - def get_defining_op(self): + def get_defining_op(self) -> Optional[Operation]: return self.data.get_defining_op() - def codegen_reference(self, writer=None): + def codegen_reference(self, writer=None): # type: ignore[no-untyped-def] return self.data.codegen_reference(writer) @property - def layout(self): + def layout(self): # type: ignore[no-untyped-def] return self.data.get_layout() - def get_layout(self): + def get_layout(self): # type: ignore[no-untyped-def] return self.layout - def get_size(self): + def get_size(self): # type: ignore[no-untyped-def] return self.data.get_size() @property - def dtype(self): + def dtype(self): # type: ignore[no-untyped-def] return self.data.dtype def __str__(self) -> str: @@ -6589,23 +6592,23 @@ def __str__(self) -> str: class TensorBox(MutableBox): @staticmethod - def create(data): + def create(data): # type: ignore[no-untyped-def] return TensorBox(StorageBox(data)) class StorageBox(MutableBox): - def is_input_buffer(self): + def is_input_buffer(self): # type: ignore[no-untyped-def] if isinstance(self.data, (InputBuffer, ReinterpretView)): return self.data.get_name() in V.graph.graph_inputs return False - def is_module_buffer(self): + def is_module_buffer(self): # type: ignore[no-untyped-def] return ( isinstance(self.data, (ConstantBuffer)) and self.data.get_name() in V.graph.constants ) - def realize(self): + def realize(self): # type: ignore[no-untyped-def] if isinstance( self.data, ( @@ -6638,7 +6641,7 @@ def realize(self): self.data.traceback = traceback return self.data.name - def realize_hint(self): + def realize_hint(self) -> None: """ Called on buffers we expect to be forced to realize later. """ @@ -6648,13 +6651,13 @@ def realize_hint(self): ): self.realize() - def has_exceeded_max_reads(self): + def has_exceeded_max_reads(self): # type: ignore[no-untyped-def] return isinstance(self.data, Pointwise) and ( self.num_reads() > config.realize_acc_reads_threshold or self.has_large_inner_fn() ) - def should_realize_on_reuse(self, users): + def should_realize_on_reuse(self, users): # type: ignore[no-untyped-def] """ A heuristic to decide if we should realize a tensor that is used multiple times. @@ -6672,11 +6675,11 @@ def should_realize_on_reuse(self, users): ) return False - def mark_reuse(self, users): + def mark_reuse(self, users) -> None: # type: ignore[no-untyped-def] if self.should_realize_on_reuse(users): self.realize() - def num_reads(self): + def num_reads(self): # type: ignore[no-untyped-def] return self.data.num_reads() @@ -6704,7 +6707,7 @@ class InvokeSubgraph(ExternKernel): def __init__( self, subgraph: Subgraph, operands: List[TensorBox], layout: MultiOutputLayout - ): + ) -> None: super().__init__( name=None, layout=layout, @@ -6715,7 +6718,7 @@ def __init__( V.graph.register_operation(self) @classmethod - def create(cls, subgraph: Subgraph, operands): + def create(cls, subgraph: Subgraph, operands): # type: ignore[no-untyped-def] # TODO(anijain2305) - Support sym expr as operands in future. fx_operands = V.graph.current_node.args[-1] fake_operands = [x.meta["val"] for x in fx_operands] # type: ignore[union-attr] @@ -6725,7 +6728,7 @@ def create(cls, subgraph: Subgraph, operands): # strides as that of subgraph inputs. operands = [cls.realize_input(x) for x in operands] - def handle_sym_expr(stride): + def handle_sym_expr(stride): # type: ignore[no-untyped-def] return [s.node.expr if isinstance(s, torch.SymInt) else s for s in stride] fake_strides = [fake_operand.stride() for fake_operand in fake_operands] @@ -6771,7 +6774,7 @@ def handle_sym_expr(stride): invoke_subgraph.outputs = outputs return outputs - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] wrapper.codegen_invoke_subgraph(self) @@ -6790,7 +6793,7 @@ def __init__( true_subgraph: Subgraph, false_subgraph: Subgraph, layout: MultiOutputLayout, - ): + ) -> None: self.predicate = predicate self.operands = operands self.true_subgraph = true_subgraph @@ -6811,7 +6814,7 @@ def __init__( V.graph.register_operation(self) @classmethod - def create( + def create( # type: ignore[no-untyped-def] cls, predicate: TensorBox, true_fn: Subgraph, @@ -6892,7 +6895,7 @@ def create( conditional.outputs = outputs return outputs - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] wrapper.codegen_conditional(self) @@ -6911,7 +6914,7 @@ def __init__( cond_subgraph: Subgraph, body_subgraph: Subgraph, layout: MultiOutputLayout, - ): + ) -> None: self.carried_inputs = carried_inputs self.additional_inputs = additional_inputs self.cond_subgraph = cond_subgraph @@ -6927,7 +6930,7 @@ def __init__( V.graph.register_operation(self) @classmethod - def create( + def create( # type: ignore[no-untyped-def] cls, cond_fn: Subgraph, body_fn: Subgraph, @@ -7019,12 +7022,12 @@ def create( while_loop.outputs = outputs return outputs - def codegen(self, wrapper): + def codegen(self, wrapper) -> None: # type: ignore[no-untyped-def] wrapper.codegen_while_loop(self) class EffectfulKernel(FallbackKernel): - def __init__( + def __init__( # type: ignore[no-untyped-def] self, layout, kernel, @@ -7034,7 +7037,7 @@ def __init__( kwargs=None, *, unbacked_bindings=None, - ): + ) -> None: super().__init__( layout, kernel, @@ -7053,7 +7056,7 @@ def __init__( self.prev_effect_buffer = V.graph.effectful_ops.get(effect_type, None) V.graph.effectful_ops[effect_type] = self - def get_read_writes(self): + def get_read_writes(self): # type: ignore[no-untyped-def] read_writes = super().get_read_writes() if self.prev_effect_buffer is not None: @@ -7063,7 +7066,7 @@ def get_read_writes(self): return read_writes - def has_side_effects(self): + def has_side_effects(self) -> bool: return True @@ -7072,26 +7075,26 @@ class TorchBindObject(IRNode): name: str value: torch._C.ScriptObject - def get_name(self): + def get_name(self): # type: ignore[no-untyped-def] return self.name - def get_device(self): + def get_device(self): # type: ignore[no-untyped-def] return None # is there a device?? - def codegen_reference(self, writer=None): + def codegen_reference(self, writer=None): # type: ignore[no-untyped-def] return self.name class _CollectiveKernel(FallbackKernel): - def should_allocate(self): + def should_allocate(self) -> bool: return False - def has_side_effects(self): + def has_side_effects(self) -> bool: return True # This is identical to FallbackKernel.set_cpp_kernel(), minus the # part that checks against input aliasing and mutation. - def set_cpp_kernel_name(self, cpp_kernel_name: Optional[str] = None): + def set_cpp_kernel_name(self, cpp_kernel_name: Optional[str] = None) -> None: from .codegen.wrapper import get_cpp_op_schema assert ( @@ -7114,7 +7117,7 @@ def set_cpp_kernel_name(self, cpp_kernel_name: Optional[str] = None): # the constraints, we model collective -> wait_tensor as as two-step # mutation of the input buffers. @classmethod - def create_inplace( + def create_inplace( # type: ignore[no-untyped-def] cls, kernel, inputs: Union[TensorBox, List[TensorBox]], *args, **kwargs ) -> None: with V.graph.fake_mode: @@ -7175,7 +7178,7 @@ def create_inplace( # TODO(yifu): add a pre-grad pass to validate the correctness of collective # usage in the user program. @classmethod - def create_out_of_place( + def create_out_of_place( # type: ignore[no-untyped-def] cls, kernel, inputs: Union[TensorBox, List[TensorBox]], *args, **kwargs ): with V.graph.fake_mode: @@ -7221,7 +7224,7 @@ def create_out_of_place( class _WaitKernel(_CollectiveKernel): - def get_volatile_reads(self): + def get_volatile_reads(self): # type: ignore[no-untyped-def] inp = self.inputs[0] if isinstance(inp, _CollectiveKernel): # Out-of-place single-output @@ -7243,7 +7246,7 @@ def get_volatile_reads(self): return [] @classmethod - def create_wait(cls, kernel, inp: TensorBox) -> None: + def create_wait(cls, kernel, inp: TensorBox) -> None: # type: ignore[no-untyped-def] with V.graph.fake_mode: ( example_output, @@ -7264,7 +7267,7 @@ def create_wait(cls, kernel, inp: TensorBox) -> None: MutationOutput(NoneLayout(device=inp.get_device()), inp, packed) ) - def get_read_writes(self): + def get_read_writes(self): # type: ignore[no-untyped-def] read_writes = super().get_read_writes() # See [Out-of-Place Collective Safety]. volatile_reads = self.get_volatile_reads() From 06f619d999ac396bd83e32d36a64360bd24dacca Mon Sep 17 00:00:00 2001 From: Aaron Orenstein Date: Tue, 5 Nov 2024 08:32:22 -0800 Subject: [PATCH 113/503] typing ir.py - part 2 (#131846) See #131852 Pull Request resolved: https://github.com/pytorch/pytorch/pull/131846 Approved by: https://github.com/eellison ghstack dependencies: #139238 --- torch/_inductor/codegen/cpp.py | 26 +- torch/_inductor/codegen/cuda/cuda_kernel.py | 2 +- .../codegen/cuda/cutlass_epilogue_gen.py | 2 +- torch/_inductor/codegen/cuda/gemm_template.py | 6 +- .../codegen/rocm/ck_conv_template.py | 10 +- torch/_inductor/debug.py | 2 +- torch/_inductor/fx_passes/b2b_gemm.py | 2 +- torch/_inductor/graph.py | 2 +- torch/_inductor/ir.py | 451 ++++++++++-------- torch/_inductor/lowering.py | 12 +- torch/_inductor/select_algorithm.py | 2 +- 11 files changed, 291 insertions(+), 226 deletions(-) diff --git a/torch/_inductor/codegen/cpp.py b/torch/_inductor/codegen/cpp.py index a6e5475082b4fe..84bfdefa40a46b 100644 --- a/torch/_inductor/codegen/cpp.py +++ b/torch/_inductor/codegen/cpp.py @@ -1003,18 +1003,20 @@ def wrapper(*args, **kwargs): if scalars and vectors: assert isinstance(V.kernel, CppVecKernel) new_args = [ - V.kernel.broadcast(new_arg) - if ( - isinstance(new_arg, CppCSEVariable) - and not new_arg.is_vec - and func - not in [ - CppVecOverrides.rand, - CppVecOverrides.randn, - CppVecOverrides.randint64, - ] + ( + V.kernel.broadcast(new_arg) + if ( + isinstance(new_arg, CppCSEVariable) + and not new_arg.is_vec + and func + not in [ + CppVecOverrides.rand, + CppVecOverrides.randn, + CppVecOverrides.randint64, + ] + ) + else new_arg ) - else new_arg for new_arg in new_args ] @@ -4047,7 +4049,7 @@ def _can_fuse_nodes_with_compatible_ranges(self, node1, node2): else: assert isinstance(ref_node, SchedulerNode) assert isinstance(ref_node.node, ir.ComputedBuffer) - ranges1 = ref_node.node.data.get_size() + ranges1 = ref_node.node.data.get_size() # type: ignore[assignment] if ranges1 != ranges2: return False diff --git a/torch/_inductor/codegen/cuda/cuda_kernel.py b/torch/_inductor/codegen/cuda/cuda_kernel.py index ad288244012b1b..91312e013580be 100644 --- a/torch/_inductor/codegen/cuda/cuda_kernel.py +++ b/torch/_inductor/codegen/cuda/cuda_kernel.py @@ -263,7 +263,7 @@ def offset(self, node: IRNode) -> str: if node is None: return "0" - return str(node.get_layout().offset) + return str(node.get_layout().offset) # type: ignore[union-attr] def ptr(self, node: IRNode) -> str: """ diff --git a/torch/_inductor/codegen/cuda/cutlass_epilogue_gen.py b/torch/_inductor/codegen/cuda/cutlass_epilogue_gen.py index a41fa62b5a7b9f..d82208a9af78a2 100644 --- a/torch/_inductor/codegen/cuda/cutlass_epilogue_gen.py +++ b/torch/_inductor/codegen/cuda/cutlass_epilogue_gen.py @@ -265,7 +265,7 @@ def ir_to_evt_argument_string( result = pnode.inner_fn(index) # each epilogue node results in a single "using" statement and may refer to the previous steps by name if node.name is not None: - formatter.aliases[node.name] = result + formatter.aliases[node.name] = result # type: ignore[assignment] res: str = formatter.getvalue(result) # type: ignore[possibly-undefined] if _MAGIC_SYMPY_ERROR_STRING in res: diff --git a/torch/_inductor/codegen/cuda/gemm_template.py b/torch/_inductor/codegen/cuda/gemm_template.py index ee2c51bd779cb8..d324da4c38b932 100644 --- a/torch/_inductor/codegen/cuda/gemm_template.py +++ b/torch/_inductor/codegen/cuda/gemm_template.py @@ -1269,14 +1269,14 @@ def render_gemm_arguments( # Swap def clone_with_transposed_stride(node: IRNode) -> IRNode: old_layout = node.get_layout() - new_stride = list(old_layout.stride) + new_stride = list(old_layout.stride) # type: ignore[union-attr] new_stride[-2], new_stride[-1] = new_stride[-1], new_stride[-2] new_layout = FixedLayout( old_layout.device, old_layout.dtype, - list(old_layout.size), + list(old_layout.size), # type: ignore[union-attr] new_stride, - old_layout.offset, + old_layout.offset, # type: ignore[union-attr] ) return Buffer(name=node.get_name(), layout=new_layout) diff --git a/torch/_inductor/codegen/rocm/ck_conv_template.py b/torch/_inductor/codegen/rocm/ck_conv_template.py index 02ad5a404808a2..3fa2a2a7ccc253 100644 --- a/torch/_inductor/codegen/rocm/ck_conv_template.py +++ b/torch/_inductor/codegen/rocm/ck_conv_template.py @@ -540,11 +540,11 @@ def render(self, kernel: ROCmTemplateKernel, op: "CKGroupedConvFwdOp", **kwargs) n_d_tensors=1 if Bias is not None else 0, n_dim_spatial=self.n_spatial_dimensions, group_count=self.groups, - batch_size=X.shape[0], - n_output_channels=Y.shape[1], - n_input_channels=X.shape[1], - filter_size=", ".join(map(str, W.shape[2:])), - input_size=", ".join(map(str, X.shape[2:])), + batch_size=X.shape[0], # type: ignore[index] + n_output_channels=Y.shape[1], # type: ignore[index] + n_input_channels=X.shape[1], # type: ignore[index] + filter_size=", ".join(map(str, W.shape[2:])), # type: ignore[index] + input_size=", ".join(map(str, X.shape[2:])), # type: ignore[index] convolution_strides=", ".join(map(str, self.stride)), dilations=", ".join(map(str, self.dilation)), left_pads=", ".join(map(str, self.padding)), diff --git a/torch/_inductor/debug.py b/torch/_inductor/debug.py index be0a15a981c9d2..092cb09f92cbad 100644 --- a/torch/_inductor/debug.py +++ b/torch/_inductor/debug.py @@ -604,7 +604,7 @@ def build_node_info(node: ir.IRNode) -> Dict[str, str]: except Exception as e: pass try: - node_info["size"] = str(V.graph.sizevars.size_hints(node.get_size())) + node_info["size"] = str(V.graph.sizevars.size_hints(node.get_size())) # type: ignore[arg-type] except Exception as e: pass try: diff --git a/torch/_inductor/fx_passes/b2b_gemm.py b/torch/_inductor/fx_passes/b2b_gemm.py index 64cb597188eed0..aa7e8e56ea8f50 100644 --- a/torch/_inductor/fx_passes/b2b_gemm.py +++ b/torch/_inductor/fx_passes/b2b_gemm.py @@ -531,7 +531,7 @@ def tuned_b2b_gemm( A.realize() B.realize() C.realize() - layout = FixedLayout(A.get_device(), A.get_dtype(), [A.shape[0], C.shape[1]]) + layout = FixedLayout(A.get_device(), A.get_dtype(), [A.shape[0], C.shape[1]]) # type: ignore[index] subgraph_buffer = build_subgraph_buffer( [create_placeholder("inner_mm", A.get_dtype(), A.get_device())], subgraph, diff --git a/torch/_inductor/graph.py b/torch/_inductor/graph.py index 21423d8323b4ee..fb96fb7c69b491 100644 --- a/torch/_inductor/graph.py +++ b/torch/_inductor/graph.py @@ -1745,7 +1745,7 @@ def validate_can_generate_cpp_wrapper(self) -> None: ): dtype = may_get_constant_buffer_dtype(value) - if not supported_dtype_of_cpp_wrapper(dtype, self.device_type): + if not supported_dtype_of_cpp_wrapper(dtype, self.device_type): # type: ignore[arg-type] raise CppWrapperCodegenError(f"Unsupported input dtype {dtype}") def init_wrapper_code( diff --git a/torch/_inductor/ir.py b/torch/_inductor/ir.py index 76f05626bde606..0e1bce54b1a743 100644 --- a/torch/_inductor/ir.py +++ b/torch/_inductor/ir.py @@ -7,6 +7,7 @@ import logging import textwrap import traceback +import typing from contextlib import nullcontext from enum import Enum from functools import partial @@ -23,12 +24,13 @@ Optional, overload, Sequence, + Set, Tuple, TYPE_CHECKING, TypeVar, Union, ) -from typing_extensions import TypeAlias +from typing_extensions import Never, TypeAlias from unittest.mock import patch import sympy @@ -68,6 +70,7 @@ from . import config, dependencies from .codegen.common import BackendFeature, index_prevent_reordering from .dependencies import ( + Dep, extract_free_unbacked_symbols, extract_input_node_reduction_ranges, extract_read_writes, @@ -99,13 +102,24 @@ if TYPE_CHECKING: + from torch.fx.node import Node + + from .codegen.cuda.cuda_template import CUDATemplate from .graph import GraphLowering + from .utils import IndentedBuffer + +else: + CUDATemplate: TypeAlias = object + _T = TypeVar("_T") _U = TypeVar("_U") _V = TypeVar("_V") _IntLike: TypeAlias = Union[int, Expr] +_NumLike: TypeAlias = Union[int, float, Expr] + +_AnyLayout: TypeAlias = Union["Layout", "MultiOutputLayout", "NoneLayout"] log = logging.getLogger(__name__) indent = functools.partial(textwrap.indent, prefix=" ") @@ -300,7 +314,7 @@ def ir_node_to_tensor( size = [shape_fn(s) for s in x.get_size()] stride: StrideType if is_storage_and_layout(x): - stride = [shape_fn(s) for s in x.get_layout().stride] + stride = [shape_fn(s) for s in x.get_layout().stride] # type: ignore[union-attr] else: stride = FlexibleLayout.contiguous_strides(size) dtype = x.get_dtype() @@ -351,9 +365,7 @@ class IRNode: @staticmethod @contextlib.contextmanager - def current_origins( - origins: OrderedSet[torch.fx.Node], - ) -> Generator[None, None, None]: + def current_origins(origins: OrderedSet[Node]) -> Generator[None, None, None]: old = IRNode._current_origins IRNode._current_origins = old | origins try: @@ -386,15 +398,17 @@ def get_origin_node(self): # type: ignore[no-untyped-def] def get_defining_op(self) -> Optional[Operation]: raise NotImplementedError - def common_repr(self, shorten=True): # type: ignore[no-untyped-def] + def common_repr(self, shorten: bool = True) -> Sequence[str]: origins = f"origins={getattr(self, 'origins', '')}" if shorten and len(origins) > 64: # this can get *very* long origins = f"{origins[:61]}..." return [origins] - def str_helper(self, lines, shorten=True, multiline=True) -> str: # type: ignore[no-untyped-def] - lines = lines + self.common_repr(shorten) + def str_helper( + self, lines: Sequence[object], shorten: bool = True, multiline: bool = True + ) -> str: + lines = list(lines) + list(self.common_repr(shorten)) lines = list(map(str, lines)) if multiline: new_lines = indent(",\n".join(lines)) @@ -405,23 +419,23 @@ def str_helper(self, lines, shorten=True, multiline=True) -> str: # type: ignor def get_dtype(self) -> torch.dtype: return self.dtype - def get_layout(self): # type: ignore[no-untyped-def] + def get_layout(self) -> _AnyLayout: raise NotImplementedError(f"get_layout() is not implemented by {type(self)}!") - def get_size(self): # type: ignore[no-untyped-def] + def get_size(self) -> Sequence[_IntLike]: raise NotImplementedError(f"get_size() is not implemented by {type(self)}!") @property - def shape(self): # type: ignore[no-untyped-def] + def shape(self) -> Union[_IntLike, sympy.Rel, Sequence[_IntLike]]: return self.get_size() - def get_numel(self): # type: ignore[no-untyped-def] + def get_numel(self) -> Expr: return sympy_product(self.get_size()) - def is_zero_elements(self): # type: ignore[no-untyped-def] + def is_zero_elements(self) -> bool: return V.graph.sizevars.is_expr_static_and_true(sympy.Eq(self.get_numel(), 0)) - def realize(self): # type: ignore[no-untyped-def] + def realize(self) -> Optional[str]: """ If the IRNode refers to data which has not been materialized (e.g., it is a Pointwise/Reduction that could potentially have more @@ -439,25 +453,32 @@ def realize(self): # type: ignore[no-untyped-def] """ raise NotImplementedError(f"realize NYI on {type(self)}") - def codegen_reference(self, writer=None): # type: ignore[no-untyped-def] + def codegen_reference(self, writer: Optional[IndentedBuffer] = None) -> str: raise NotImplementedError(f"codegen_reference NYI on {type(self)}") # The abstract method declarations below serve to convince mypy that all IRNode instances have these functions # defined, while having no effect at runtime. We cannot create stub implementations here because other parts of # the code dynamically check for defined attributes. get_device: Callable[[], torch.device] - dtype: torch.dtype get_name: Callable[[], str] get_reads: Callable[[], Any] num_reads: Callable[[], int] get_stride: Callable[[], Any] - get_storage_numel: Callable[[], Any] + get_storage_numel: Callable[[], _IntLike] has_exceeded_max_reads: Callable[[], bool] - make_loader: Callable[[], Callable[[Any], Any]] - make_indexer: Callable[[], Callable[[Any], Any]] - mark_reuse: Callable[[int], None] + make_loader: Callable[[], Callable[[Sequence[_IntLike]], OpsValue]] + make_indexer: Callable[[], Callable[[Sequence[_IntLike]], _IntLike]] realize_hint: Callable[[], None] - get_unbacked_symbol_uses: Callable[[], OrderedSet[sympy.Symbol]] + get_unbacked_symbol_uses: Callable[[], OrderedSet[Symbol]] + + if TYPE_CHECKING: + + @property + def dtype(self) -> torch.dtype: + ... + + def mark_reuse(self, users: int) -> None: + ... @ir_dataclass(frozen=False) @@ -533,16 +554,16 @@ def get_workspace_size(self) -> int: class Loops(IRNode): device: torch.device dtype: torch.dtype - inner_fn: Callable[..., Any] - ranges: List[Expr] + inner_fn: Callable[..., OpsValue] + ranges: Sequence[_IntLike] - def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: + def get_unbacked_symbol_uses(self) -> OrderedSet[Symbol]: return OrderedSet().union( *(free_unbacked_symbols(e) for e in self.ranges), self.inner_fn_free_unbacked_symbols(), ) - def __str__(self, names=("ranges",)) -> str: # type: ignore[no-untyped-def] + def __str__(self, names: Tuple[str] = ("ranges",)) -> str: return self.str_helper( [ f"'{self.device.type}'", @@ -558,16 +579,16 @@ def __post_init__(self) -> None: __repr__ = __str__ - def get_device(self): # type: ignore[no-untyped-def] + def get_device(self) -> torch.device: return self.device - def get_origin_node(self): # type: ignore[no-untyped-def] + def get_origin_node(self) -> Optional[Node]: return self.origin_node - def get_size(self): # type: ignore[no-untyped-def] + def get_size(self) -> Sequence[_IntLike]: return self.ranges - def get_pointwise_size(self): # type: ignore[no-untyped-def] + def get_pointwise_size(self) -> Sequence[_IntLike]: return self.ranges def is_extern(self) -> bool: @@ -588,7 +609,7 @@ def create(cls, *args, **kwargs): # type: ignore[no-untyped-def] return TensorBox.create(r) @staticmethod - def _index(ranges, prefix=SymT.INDEX): # type: ignore[no-untyped-def] + def _index(ranges: Sequence[_IntLike], prefix: SymT = SymT.INDEX) -> Sequence[Expr]: return [ sympy.S.Zero if s == 1 else sympy_index_symbol_with_prefix(prefix, n) for n, s in enumerate(ranges) @@ -603,37 +624,37 @@ def inner_fn_opcount(self) -> OpCountResult: self.inner_fn(*self.inner_fn_args()) return opcounter.getvalue() - def inner_fn_args(self): # type: ignore[no-untyped-def] + def inner_fn_args(self) -> Sequence[Sequence[_IntLike]]: return (self._index(self.ranges),) @cache_on_self - def inner_fn_str(self): # type: ignore[no-untyped-def] + def inner_fn_str(self) -> str: return V.KernelFormatterHandler.ir_to_string( self.inner_fn, *self.inner_fn_args() ) - def has_large_inner_fn(self, threshold=None): # type: ignore[no-untyped-def] + def has_large_inner_fn(self, threshold=None) -> bool: # type: ignore[no-untyped-def] if threshold is None: threshold = 0 threshold = max(threshold, config.realize_opcount_threshold) return self.inner_fn_opcount().num_ops > threshold - def inner_fn_free_unbacked_symbols(self): # type: ignore[no-untyped-def] + def inner_fn_free_unbacked_symbols(self) -> Set[Symbol]: index = self._index(self.ranges) return extract_free_unbacked_symbols(self.inner_fn, index) - def get_reads(self): # type: ignore[no-untyped-def] + def get_reads(self) -> Set[Dep]: with patch.object(FlexibleLayout, "allow_indexing", True): if self.get_reduction_type(): return extract_read_writes( self.make_loader(), - self.get_size(), - self.get_reduction_size(), + self.get_size(), # type: ignore[arg-type] + self.get_reduction_size(), # type: ignore[arg-type] ).reads else: return extract_read_writes( self.make_loader(), - self.get_size(), + self.get_size(), # type: ignore[arg-type] ).reads def get_read_names(self) -> OrderedSet[str]: @@ -642,17 +663,17 @@ def get_read_names(self) -> OrderedSet[str]: def num_reads(self): # type: ignore[no-untyped-def] return len(self.inner_fn_opcount().read_buffers) - def get_reduction_size(self): # type: ignore[no-untyped-def] + def get_reduction_size(self) -> Sequence[_IntLike]: raise NotImplementedError( f"get_reduction_size() is not implemented by {type(self)}!" ) - def get_reduction_type(self): # type: ignore[no-untyped-def] + def get_reduction_type(self) -> Optional[str]: raise NotImplementedError( f"get_reduction_type() is not implemented by {type(self)}!" ) - def constant_to_device(self, device): # type: ignore[no-untyped-def] + def constant_to_device(self, device: torch.device) -> IRNode: raise NotImplementedError( f"constant_to_device() is not implemented by {type(self)}!" ) @@ -667,24 +688,29 @@ def nop_loader_fn(idx: Union[Expr, Sequence[Expr]], *, dtype: torch.dtype) -> Op @ir_dataclass class Pointwise(Loops): - def make_loader(self): # type: ignore[no-untyped-def] + def make_loader(self) -> Callable[[Sequence[_IntLike]], OpsValue]: # Make zero-element loops into a no-op if self.is_zero_elements(): return partial(nop_loader_fn, dtype=self.dtype) return self.inner_fn - def get_reduction_size(self): # type: ignore[no-untyped-def] + def get_reduction_size(self) -> Sequence[_IntLike]: return [] - def get_reduction_type(self): # type: ignore[no-untyped-def] + def get_reduction_type(self) -> Optional[str]: return None - def store_output(self, output_name, indexer, vars): # type: ignore[no-untyped-def] + def store_output( + self, + output_name: Optional[str], + indexer: Callable[[Sequence[Expr]], Never], + vars: Sequence[Expr], + ) -> OpsValue: loader = self.make_loader() return ops.store(output_name, indexer(vars), loader(vars)) - def constant_to_device(self, device): # type: ignore[no-untyped-def] + def constant_to_device(self, device: torch.device) -> IRNode: """Move this to a given device. Requires that all reads are to constants.""" loader = self.make_loader() loader = patch.object(ConstantBuffer, "override_device", device)(loader) @@ -695,10 +721,10 @@ def constant_to_device(self, device): # type: ignore[no-untyped-def] @ir_dataclass class Scatter(Pointwise): - output_indexer: Callable[[List[Expr]], Expr] + output_indexer: Callable[[Sequence[Expr]], Expr] scatter_mode: Optional[str] = None - def constant_to_device(self, device): # type: ignore[no-untyped-def] + def constant_to_device(self, device: torch.device) -> IRNode: """Move this to a given device. Requires that all reads are to constants.""" loader = self.make_loader() loader = patch.object(ConstantBuffer, "override_device", device)(loader) @@ -711,7 +737,12 @@ def constant_to_device(self, device): # type: ignore[no-untyped-def] scatter_mode=self.scatter_mode, ) - def store_output(self, output_name, indexer, vars): # type: ignore[no-untyped-def] + def store_output( + self, + output_name: Optional[str], + indexer: Callable[[Sequence[Expr]], Never], + vars: Sequence[Expr], + ) -> OpsValue: loader = self.make_loader() return ops.store( output_name, @@ -812,7 +843,7 @@ def significant_strides_equal( @ir_dataclass class Reduction(Loops): - reduction_ranges: List[Expr] + reduction_ranges: Sequence[_IntLike] reduction_type: str # self.dtype represents the dst dtype src_dtype: torch.dtype @@ -826,18 +857,24 @@ def __str__(self) -> str: # type: ignore[override] def __repr__(self) -> str: # type: ignore[override] return self.__str__() - def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: + def get_unbacked_symbol_uses(self) -> OrderedSet[Symbol]: return super().get_unbacked_symbol_uses() | OrderedSet().union( *(free_unbacked_symbols(e) for e in self.reduction_ranges) ) - def get_reduction_size(self): # type: ignore[no-untyped-def] + def get_reduction_size(self) -> Sequence[_IntLike]: return self.reduction_ranges - def get_reduction_type(self): # type: ignore[no-untyped-def] + def get_reduction_type(self) -> Optional[str]: return self.reduction_type - def store_reduction(self, output_name, indexer, vars, reduction_vars): # type: ignore[no-untyped-def] + def store_reduction( + self, + output_name: Optional[str], + indexer: Callable[[Sequence[Expr]], Never], + vars: Sequence[Expr], + reduction_vars: Sequence[Symbol], + ) -> OpsValue: value = ops.reduction( self.dtype, self.src_dtype, @@ -849,17 +886,17 @@ def store_reduction(self, output_name, indexer, vars, reduction_vars): # type: def index_length(self) -> int: return len(self.ranges) + len(self.reduction_ranges) - def inner_fn_args(self): # type: ignore[no-untyped-def] + def inner_fn_args(self) -> Sequence[Sequence[Expr]]: index = self._index(self.ranges) rindex = self._index(self.reduction_ranges, SymT.RINDEX) return (index, rindex) - def inner_fn_free_unbacked_symbols(self): # type: ignore[no-untyped-def] + def inner_fn_free_unbacked_symbols(self) -> Set[Symbol]: index = self._index(self.ranges) rindex = self._index(self.reduction_ranges, SymT.RINDEX) return extract_free_unbacked_symbols(self.inner_fn, index, rindex) - def constant_to_device(self, device): # type: ignore[no-untyped-def] + def constant_to_device(self, device: torch.device) -> IRNode: """Move this to a given device. Requires that all reads are to constants.""" loader = self.make_loader() loader = patch.object(ConstantBuffer, "override_device", device)(loader) @@ -875,19 +912,19 @@ def constant_to_device(self, device): # type: ignore[no-untyped-def] ) @staticmethod - def num_splits( # type: ignore[no-untyped-def] # type: ignore[no-untyped-def] - device, - dst_dtype, - src_dtype, - inner_fn, - ranges, - reduction_ranges, - reduction_type, - reduction_numel, + def num_splits( + device: torch.device, + dst_dtype: torch.dtype, + src_dtype: torch.dtype, + inner_fn: Callable[..., OpsValue], + ranges: Sequence[_IntLike], + reduction_ranges: Sequence[_IntLike], + reduction_type: str, + reduction_numel: Expr, input_node: Optional[IRNode] = None, - ): - def _is_static(x): # type: ignore[no-untyped-def] - return isinstance(x, (int, sympy.Integer)) + ) -> Tuple[ReductionHint, _IntLike]: + def _is_static(x: object) -> bool: + return isinstance(x, (int, Integer)) reduction_numel_hint = V.graph.sizevars.symbolic_hint(reduction_numel) numel_hint = V.graph.sizevars.symbolic_hint(sympy_product(ranges)) @@ -905,7 +942,9 @@ def _is_static(x): # type: ignore[no-untyped-def] # We don't support unbacked symints return ReductionHint.DEFAULT, 1 - device_interface = get_interface_for_device(get_device_type(device)) # type: ignore[arg-type] + dtype = get_device_type(device) + assert dtype is not None + device_interface = get_interface_for_device(dtype) device_properties = device_interface.Worker.get_device_properties(device) if get_device_type(device) == "xpu": num_sm = device_properties.gpu_subslice_count @@ -919,7 +958,7 @@ def _is_static(x): # type: ignore[no-untyped-def] min_elements_per_device = min_elements_per_thread * num_sm * threads_per_sm max_elements_per_device = max_elements_per_thread * num_sm * threads_per_sm - def inner_reduction_splits(reduction_numel_hint, numel_hint): # type: ignore[no-untyped-def] + def inner_reduction_splits(reduction_numel_hint: _IntLike, numel_hint: _IntLike): # type: ignore[no-untyped-def] if not should_split: return 1 # do heuristics that's close to eager mode for split inner reduction @@ -1040,7 +1079,7 @@ def outer_reduction_splits(reduction_numel_hint, numel_hint): # type: ignore[no reduction_hint=ReductionHint.DEFAULT, ) - def get_read_indices(r): # type: ignore[no-untyped-def] + def get_read_indices(r: Reduction) -> Tuple[Sequence[Expr], bool]: cb = ComputedBuffer( name=None, layout=FlexibleLayout( @@ -1054,10 +1093,11 @@ def get_read_indices(r): # type: ignore[no-untyped-def] # try finding the full size producer # TODO this will fail for something like ((1, N) * (N, 1)).sum() # this would also possibly be wrong for producers with the different contiguity but we hope those cases are rare + assert read_writes.range_vars is not None range_vars = [ r for r in read_writes.range_vars - if isinstance(r, sympy.Expr) and not isinstance(r, sympy.Number) + if isinstance(r, Expr) and not isinstance(r, sympy.Number) ] indices = [] changed = False @@ -1066,9 +1106,9 @@ def get_read_indices(r): # type: ignore[no-untyped-def] indices.append(md.index) if md.name in V.graph.name_to_buffer: buf = V.graph.name_to_buffer[md.name] - original_stride = buf.layout.stride + original_stride = getattr(buf.layout, "stride", None) buf.decide_layout() - if buf.layout.stride != original_stride: + if getattr(buf.layout, "stride", None) != original_stride: changed = True return indices, changed @@ -1080,14 +1120,14 @@ def get_read_indices(r): # type: ignore[no-untyped-def] # TODO determine splits when all inputs are broadcast return ReductionHint.DEFAULT, 1 - (_, reduction_vars), ranges = dependencies.index_vars_squeeze( - r.get_size(), r.get_reduction_size() + (_, reduction_vars), ranges1 = dependencies.index_vars_squeeze( + r.get_size(), r.get_reduction_size() # type: ignore[arg-type] ) num_outer = 0 num_inner = 0 for i in indices: - i = V.graph.sizevars.simplify_with_ranges(i, ranges) - strides = V.graph.sizevars.stride_hints(i, reduction_vars, ranges.keys()) + j = V.graph.sizevars.simplify_with_ranges(i, ranges1) + strides = V.graph.sizevars.stride_hints(j, reduction_vars, ranges1.keys()) outer = all(s > 1 for s in strides) if outer: num_outer += 1 @@ -1143,32 +1183,33 @@ def value_fn(index, rindex): # type: ignore[no-untyped-def] return fn @classmethod - def create( # type: ignore[no-untyped-def] + def create( cls, device: torch.device, dst_dtype: torch.dtype, src_dtype: torch.dtype, inner_fn: Callable[..., Any], - ranges: List[Expr], - reduction_ranges: List[Expr], + ranges: Sequence[Expr], + reduction_ranges: Sequence[Expr], reduction_type: str, reduction_hint: ReductionHint = ReductionHint.DEFAULT, input_node: Optional[IRNode] = None, - ): + ) -> TensorBox: reduction_numel = V.graph.sizevars.simplify(sympy_product(reduction_ranges)) if reduction_numel == 0: # N.B. This is a hack to generate the literal of the given type # Ideally, we should be fixing `def constant` in triton.py # but it breaks due to hardcoded dtypes in other places - def py_cnst(val): # type: ignore[no-untyped-def] - return ( - bool(val) - if dst_dtype == torch.bool - else float(val) - if dst_dtype.is_floating_point - else int(val) - ) + def py_cnst(val: object) -> Union[bool, float, int]: + if dst_dtype == torch.bool: + return bool(val) + elif dst_dtype.is_floating_point: + assert isinstance(val, typing.SupportsFloat) + return float(val) + else: + assert isinstance(val, typing.SupportsInt) + return int(val) rtypes_to_inits = { "sum": py_cnst(0), @@ -1182,7 +1223,7 @@ def py_cnst(val): # type: ignore[no-untyped-def] reduction_type in rtypes_to_inits.keys() ), f"{reduction_type} not supported for zero-dimension tensors!" - def const_fn(index): # type: ignore[no-untyped-def] + def const_fn(index: int) -> OpsValue: return ops.constant(rtypes_to_inits[reduction_type], dst_dtype) return Pointwise.create( @@ -1196,12 +1237,12 @@ def const_fn(index): # type: ignore[no-untyped-def] # this reduction is actually a pointwise op if reduction_type in ("argmin", "argmax"): - def fn(index): # type: ignore[no-untyped-def] + def fn(index: int) -> OpsValue: return ops.constant(0, dst_dtype) else: - def fn(index): # type: ignore[no-untyped-def] + def fn(index: int) -> OpsValue: reduction_index = [sympy.S.Zero for _ in reduction_ranges] return inner_fn(index, reduction_index) @@ -1210,7 +1251,7 @@ def fn(index): # type: ignore[no-untyped-def] ) if ( - isinstance(reduction_numel, sympy.Integer) + isinstance(reduction_numel, Integer) and V.graph.sizevars.size_hint(reduction_numel) < config.unroll_reductions_threshold and sympy_product(ranges) != 1 @@ -1288,7 +1329,9 @@ def fn(index): # type: ignore[no-untyped-def] ) @staticmethod - def default_accumulator(reduction_type, dtype): # type: ignore[no-untyped-def] + def default_accumulator( + reduction_type: str, dtype: torch.dtype + ) -> Union[_NumLike, Sequence[_NumLike]]: if reduction_type in ("max", "argmax"): if is_float_dtype(dtype): return float("-inf") @@ -1314,14 +1357,16 @@ def default_accumulator(reduction_type, dtype): # type: ignore[no-untyped-def] }[reduction_type] @staticmethod - def default_value(reduction_type, dtype): # type: ignore[no-untyped-def] + def default_value( + reduction_type: str, dtype: torch.dtype + ) -> Union[_NumLike, Sequence[_NumLike]]: if reduction_type == "welford_reduce": return 0 return Reduction.default_accumulator(reduction_type, dtype) @staticmethod def _multilayer_second_step_hint( - split: int, numel_hint: int, reduction_hint: ReductionHint + split: _IntLike, numel_hint: int, reduction_hint: ReductionHint ) -> ReductionHint: if split == -1: return reduction_hint @@ -1337,26 +1382,28 @@ def _multilayer_second_step_hint( return reduction_hint @classmethod - def _multilayer_wrap_loader( # type: ignore[no-untyped-def] + def _multilayer_wrap_loader( cls, - loader, - reduction_ranges, - reduction_numel, - split, - block_size, - default, - ): + loader: Callable[..., OpsValue], + reduction_ranges: Sequence[_IntLike], + reduction_numel: _IntLike, + split: _IntLike, + block_size: _IntLike, + default: Union[_NumLike, Sequence[_NumLike]], + ) -> Callable[..., object]: reindex = View.dynamic_reshape_indexer(reduction_ranges, [reduction_numel]) need_mask = not V.graph.sizevars.is_expr_static_and_true( sympy.Eq(reduction_numel % split, 0) ) - def wrapper_fn(index, reduction_index): # type: ignore[no-untyped-def] + def wrapper_fn( + index: Sequence[Symbol], reduction_index: Sequence[Symbol] + ) -> OpsValue: (reduction_index,) = reduction_index *new_index, reduction_block = index indices = block_size * reduction_block + reduction_index - def body(): # type: ignore[no-untyped-def] + def body() -> OpsValue: return loader(new_index, reindex([indices])) if need_mask: @@ -1398,20 +1445,20 @@ def wrapper_fn(merged_index, new_reduction_index): # type: ignore[no-untyped-de return wrapper_fn @classmethod - def create_multilayer_helper( # type: ignore[no-untyped-def] + def create_multilayer_helper( cls, device: torch.device, dst_dtype: torch.dtype, src_dtype: torch.dtype, wrapper_fn: Callable[..., Any], - original_ranges: List[Expr], - original_reduction_ranges: List[Expr], + original_ranges: Sequence[Expr], + original_reduction_ranges: Sequence[Expr], new_ranges: List[Expr], - new_reduction_ranges: List[Expr], + new_reduction_ranges: List[Integer], reduction_type: str, - split: int, + split: _IntLike, reduction_hint: ReductionHint, - ): + ) -> TensorBox: """ Break a large reduction up into multiple smaller reductions recursively @@ -1437,7 +1484,9 @@ def create_multilayer_helper( # type: ignore[no-untyped-def] intermediate.realize() intermediate_loader = intermediate.make_loader() - def intermediate_fn(index, reduction_index): # type: ignore[no-untyped-def] + def intermediate_fn( + index: Sequence[_IntLike], reduction_index: Sequence[_IntLike] + ) -> OpsValue: return intermediate_loader([*index, *reduction_index]) numel_hint = V.graph.sizevars.size_hint(sympy_product(original_ranges)) @@ -1460,18 +1509,18 @@ def intermediate_fn(index, reduction_index): # type: ignore[no-untyped-def] ) @classmethod - def create_multilayer( # type: ignore[no-untyped-def] + def create_multilayer( cls, device: torch.device, dst_dtype: torch.dtype, src_dtype: torch.dtype, inner_fn: Callable[..., Any], - ranges: List[Expr], - reduction_ranges: List[Expr], + ranges: Sequence[Expr], + reduction_ranges: Sequence[Expr], reduction_type: str, - split: int, + split: _IntLike, reduction_hint: ReductionHint, - ): + ) -> TensorBox: """ Break a large reduction up into multiple smaller reductions recursively @@ -1505,10 +1554,10 @@ def create_multilayer_existing_ranges( # type: ignore[no-untyped-def] dst_dtype: torch.dtype, src_dtype: torch.dtype, inner_fn: Callable[..., Any], - original_ranges: List[Expr], - original_reduction_ranges: List[Expr], - new_ranges: List[Expr], - new_reduction_ranges: List[Expr], + original_ranges: Sequence[Expr], + original_reduction_ranges: Sequence[Expr], + new_ranges: List[Integer], + new_reduction_ranges: List[Integer], reduction_type: str, reduction_hint: ReductionHint, ): @@ -1543,16 +1592,16 @@ def create_multilayer_existing_ranges( # type: ignore[no-untyped-def] class WelfordReduction(Reduction): output_index: int - def __init__( # type: ignore[no-untyped-def] + def __init__( self, - device, - dtype, - inner_fns, - ranges, - reduction_ranges, - reduction_type, - reduction_hint, - output_index, + device: torch.device, + dtype: torch.dtype, + inner_fns: Sequence[Callable[..., Any]], + ranges: Sequence[Integer], + reduction_ranges: Sequence[Integer], + reduction_type: str, + reduction_hint: ReductionHint, + output_index: int, ) -> None: if len(inner_fns) == 1: loader = inner_fns[0] @@ -1573,7 +1622,13 @@ def loader(idx, reduction_idx): # type: ignore[no-untyped-def] ) self.output_index = output_index - def store_reduction(self, output_name, indexer, vars, reduction_vars): # type: ignore[no-untyped-def] + def store_reduction( + self, + output_name: Optional[str], + indexer: Callable[[Sequence[Expr]], Never], + vars: Sequence[Expr], + reduction_vars: Sequence[Symbol], + ) -> OpsValue: values = ops.reduction( self.dtype, self.src_dtype, @@ -1584,16 +1639,16 @@ def store_reduction(self, output_name, indexer, vars, reduction_vars): # type: return ops.store_reduction(output_name, indexer(vars), value) @classmethod - def create( # type: ignore[override, no-untyped-def] + def create( # type: ignore[override] cls, device: torch.device, dtype: torch.dtype, inner_fns: Sequence[Callable[..., Any]], - ranges: List[Expr], - reduction_ranges: List[Expr], + ranges: List[Integer], + reduction_ranges: List[Integer], reduction_type: str, reduction_hint: ReductionHint = ReductionHint.DEFAULT, - ): + ) -> Sequence[TensorBox]: assert reduction_type in ("welford_reduce", "welford_combine") reduction_numel = V.graph.sizevars.simplify(sympy_product(reduction_ranges)) @@ -1639,7 +1694,7 @@ def inner_fn(idx): # type: ignore[no-untyped-def] # TODO: Unrolled reduction # if ( - # isinstance(reduction_numel, sympy.Integer) + # isinstance(reduction_numel, Integer) # and V.graph.sizevars.size_hint(reduction_numel) # < config.unroll_reductions_threshold # and sympy_product(ranges) != 1 @@ -1702,21 +1757,23 @@ def inner_fn(idx): # type: ignore[no-untyped-def] return results @staticmethod - def default_value(reduction_type, dtype): # type: ignore[no-untyped-def] + def default_value( + reduction_type: str, dtype: torch.dtype + ) -> Union[_NumLike, Sequence[_NumLike]]: return (0, 0, 0) @classmethod - def create_multilayer( # type: ignore[override, no-untyped-def] + def create_multilayer( # type: ignore[override] cls, device: torch.device, dtype: torch.dtype, inner_fns: Sequence[Callable[..., Any]], - ranges: List[Expr], - reduction_ranges: List[Expr], + ranges: List[Integer], + reduction_ranges: List[Integer], reduction_type: str, - split: int, + split: _IntLike, reduction_hint: ReductionHint, - ): + ) -> Sequence[TensorBox]: """ Break a large reduction up into multiple smaller reductions recursively @@ -1797,10 +1854,10 @@ def intermediate_loader_fn(index, reduction_index, loader): # type: ignore[no-u @ir_dataclass class Scan(Loops): - scan_ranges: List[Expr] - size: List[Expr] + scan_ranges: List[Integer] + size: List[Integer] combine_fn: Callable[[Tuple[Any, ...], Tuple[Any, ...]], Tuple[Any, ...]] - reindex: Callable[[List[Expr], List[Expr]], List[Expr]] + reindex: Callable[[Sequence[_IntLike], Sequence[_IntLike]], Sequence[_IntLike]] reduction_hint: ReductionHint output_index: int # output_index indexes the following tuples @@ -1809,7 +1866,7 @@ class Scan(Loops): # HACK we mimick reduction - def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: + def get_unbacked_symbol_uses(self) -> OrderedSet[Symbol]: # TODO: Can combine_fn/reindex close over unbacked symbols? If so, we # need to explicitly represent the closure so we can pull out unbacked # symbols here @@ -1823,35 +1880,41 @@ def __post_init__(self) -> None: assert len(self.ranges) + len(self.scan_ranges) == len(self.size) super().__post_init__() - def store_reduction(self, output_name, indexer, vars, scan_vars): # type: ignore[no-untyped-def] + def store_reduction( + self, + output_name: Optional[str], + indexer: Callable[[Sequence[_IntLike]], Never], + vars: Sequence[Expr], + scan_vars: Sequence[Symbol], + ) -> OpsValue: idx = self.reindex(vars, scan_vars) values = [inner_fn(idx) for inner_fn in self.inner_fns] result = ops.scan(self.dtypes, self.combine_fn, values) return ops.store(output_name, indexer(idx), result[self.output_index]) - def get_reduction_type(self): # type: ignore[no-untyped-def] + def get_reduction_type(self) -> Optional[str]: # return self.scan_op return "custom" - def get_reduction_size(self): # type: ignore[no-untyped-def] + def get_reduction_size(self) -> Sequence[_IntLike]: return self.scan_ranges - def get_size(self): # type: ignore[no-untyped-def] + def get_size(self) -> Sequence[_IntLike]: return self.size - def get_pointwise_size(self): # type: ignore[no-untyped-def] + def get_pointwise_size(self) -> Sequence[_IntLike]: return self.ranges def index_length(self) -> int: return len(self.ranges) + len(self.scan_ranges) - def inner_fn_args(self): # type: ignore[no-untyped-def] + def inner_fn_args(self) -> Sequence[Sequence[_IntLike]]: index = self._index(self.ranges) rindex = self._index(self.scan_ranges, SymT.RINDEX) idx = self.reindex(index, rindex) return (idx,) - def inner_fn_free_unbacked_symbols(self): # type: ignore[no-untyped-def] + def inner_fn_free_unbacked_symbols(self) -> Set[Symbol]: index = self._index(self.ranges) rindex = self._index(self.scan_ranges, SymT.RINDEX) idx = self.reindex(index, rindex) @@ -1863,7 +1926,7 @@ def create( # type: ignore[no-untyped-def] device: torch.device, dtypes: Tuple[torch.dtype, ...], inner_fns: Tuple[Callable[[List[Expr]], Any], ...], - size: List[Expr], + size: List[Integer], axis: int, combine_fn: Callable[[Tuple[Any, ...], Tuple[Any, ...]], Tuple[Any, ...]], reduction_hint: ReductionHint = ReductionHint.DEFAULT, @@ -1871,7 +1934,7 @@ def create( # type: ignore[no-untyped-def] # Whether we have the option to fallback to aten can_fallback_to_aten: bool = True, **kwargs, - ) -> List[Optional[TensorBox]]: + ) -> Sequence[Optional[TensorBox]]: pointwise_ranges = [*size[:axis], *size[axis + 1 :]] scan_ranges = [size[axis]] @@ -1960,8 +2023,8 @@ def num_splits( # type: ignore[no-untyped-def] dtype: torch.dtype, inner_fn: Callable[[List[Expr]], Any], axis: int, - pointwise_ranges: List[Expr], - scan_ranges: List[Expr], + pointwise_ranges: List[Integer], + scan_ranges: List[Integer], combine_fn: Callable[[Tuple[Any, ...], Tuple[Any, ...]], Tuple[Any, ...]], scan_numel: Expr, ): @@ -1990,9 +2053,9 @@ class SplitScan(Scan): @ir_dataclass class Sort(Loops): # Sorts a tuple of key, value pairs - sort_ranges: List[Expr] - size: List[Expr] - reindex: Callable[[List[Expr], List[Expr]], List[Expr]] + sort_ranges: List[Integer] + size: List[Integer] + reindex: Callable[[Sequence[Expr], Sequence[Expr]], Sequence[Expr]] reduction_hint: ReductionHint output_index: int # output_index indexes the following tuples @@ -2004,7 +2067,7 @@ class Sort(Loops): # HACK we mimick reduction - def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: + def get_unbacked_symbol_uses(self) -> OrderedSet[Symbol]: return ( super().get_unbacked_symbol_uses() | OrderedSet().union(*(free_unbacked_symbols(e) for e in self.sort_ranges)) @@ -2021,28 +2084,28 @@ def store_reduction(self, output_name, indexer, vars, sort_vars): # type: ignor result = ops.sort(self.dtypes, values, self.stable, self.descending) return ops.store(output_name, indexer(idx), result[self.output_index]) - def get_reduction_type(self): # type: ignore[no-untyped-def] + def get_reduction_type(self) -> Optional[str]: return "sort" - def get_reduction_size(self): # type: ignore[no-untyped-def] + def get_reduction_size(self) -> Sequence[_IntLike]: return self.sort_ranges - def get_size(self): # type: ignore[no-untyped-def] + def get_size(self) -> Sequence[_IntLike]: return self.size - def get_pointwise_size(self): # type: ignore[no-untyped-def] + def get_pointwise_size(self) -> Sequence[_IntLike]: return self.ranges def index_length(self) -> int: return len(self.ranges) + len(self.sort_ranges) - def inner_fn_args(self): # type: ignore[no-untyped-def] + def inner_fn_args(self) -> Sequence[Sequence[Expr]]: index = self._index(self.ranges) rindex = self._index(self.sort_ranges, SymT.RINDEX) idx = self.reindex(index, rindex) return (idx,) - def inner_fn_free_unbacked_symbols(self): # type: ignore[no-untyped-def] + def inner_fn_free_unbacked_symbols(self) -> Set[Symbol]: index = self._index(self.ranges) rindex = self._index(self.sort_ranges, SymT.RINDEX) idx = self.reindex(index, rindex) @@ -2054,13 +2117,13 @@ def create( # type: ignore[no-untyped-def] device: torch.device, dtypes: Tuple[torch.dtype, ...], inner_fns: Tuple[Callable[[List[Expr]], Any], ...], - size: List[Expr], + size: List[Integer], axis: int, stable: bool, descending: bool, reduction_hint: ReductionHint = ReductionHint.DEFAULT, **kwargs, - ) -> List[Optional[TensorBox]]: + ) -> Sequence[Optional[TensorBox]]: pointwise_ranges = [*size[:axis], *size[axis + 1 :]] sort_ranges = [size[axis]] @@ -2288,7 +2351,7 @@ def get_reads(self): # type: ignore[no-untyped-def] with patch.object(FlexibleLayout, "allow_indexing", True): return extract_read_writes( self.make_loader(), - self.get_size(), + self.get_size(), # type: ignore[arg-type] ).reads def unwrap_view(self): # type: ignore[no-untyped-def] @@ -2726,7 +2789,7 @@ def make_indexer(self): # type: ignore[no-untyped-def] def get_layout(self): # type: ignore[no-untyped-def] return self.layout - def freeze_layout(self) -> None: + def freeze_layout(self): # type: ignore[no-untyped-def] pass def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: @@ -2960,7 +3023,7 @@ def __init__( stride ), f"size={size}, stride={stride}" self.device = device - self.dtype = dtype + self.dtype = dtype # type: ignore[misc] assert all(isinstance(s, (Expr, int)) for s in size) self.size = size self._stride = stride @@ -3106,7 +3169,7 @@ def _pad_strides(in_strides, size, dtype): # type: ignore[no-untyped-def] metrics.num_comprehensive_padding += 1 return new_strides - def pad_strides(self) -> None: + def pad_strides(self): # type: ignore[no-untyped-def] assert isinstance(self, FlexibleLayout) assert self._stride is not None self._stride = self._pad_strides(self._stride, self.size, self.dtype) @@ -3417,7 +3480,7 @@ def __init__(self, target: IRNode) -> None: super().__init__( target.get_device(), target.get_dtype(), - target.get_size(), + target.get_size(), # type: ignore[arg-type] None, ) self.target = target @@ -3541,7 +3604,7 @@ def get_storage_numel(self): # type: ignore[no-untyped-def] def is_extern(self) -> bool: return False - def freeze_layout(self) -> None: + def freeze_layout(self): # type: ignore[no-untyped-def] if not isinstance(self.layout, (MultiOutputLayout, NonOwningLayout)): self.layout = self.layout.as_fixed() @@ -3580,7 +3643,7 @@ def loader(index): # type: ignore[no-untyped-def] def codegen_reference(self, writer=None): # type: ignore[no-untyped-def] return self.get_name() - def decide_layout(self) -> None: + def decide_layout(self): # type: ignore[no-untyped-def] pass def get_inputs_that_alias_output(self): # type: ignore[no-untyped-def] @@ -3694,13 +3757,13 @@ def get_read_writes(self): # type: ignore[no-untyped-def] if self.data.get_reduction_type(): return extract_read_writes( self.get_store_function(), - self.data.get_pointwise_size(), - self.data.get_reduction_size(), + self.data.get_pointwise_size(), # type: ignore[arg-type] + self.data.get_reduction_size(), # type: ignore[arg-type] ) else: return extract_read_writes( self.get_store_function(), - self.data.get_size(), + self.data.get_size(), # type: ignore[arg-type] ) def get_unbacked_symbol_uses(self) -> OrderedSet[sympy.Symbol]: @@ -3757,7 +3820,7 @@ def get_fill_order(self): # type: ignore[no-untyped-def] """ if isinstance(self.layout, FlexibleLayout): (index_vars, reduction_vars), _ = dependencies.index_vars_squeeze( - self.data.get_pointwise_size(), self.data.get_reduction_size() + self.data.get_pointwise_size(), self.data.get_reduction_size() # type: ignore[arg-type] ) reads = self.get_read_writes().reads # only consider reads to buffer of same size @@ -3786,7 +3849,7 @@ def get_fill_order(self): # type: ignore[no-untyped-def] return None - def decide_layout(self) -> None: + def decide_layout(self): # type: ignore[no-untyped-def] if isinstance(self.layout, FlexibleLayout): order = self.get_fill_order() if order: @@ -3797,7 +3860,7 @@ def decide_layout(self) -> None: @cache_on_self def get_default_sizes_body(self): # type: ignore[no-untyped-def] args, var_ranges = dependencies.index_vars_squeeze( - self.data.get_pointwise_size(), self.data.get_reduction_size(), prefix="q" + self.data.get_pointwise_size(), self.data.get_reduction_size(), prefix="q" # type: ignore[arg-type] ) with patch.object(ConstantBuffer, "override_device", self.get_device()): body = LoopBody( @@ -4217,7 +4280,7 @@ def __init__( # type: ignore[no-untyped-def] inputs, make_kernel_render, workspace_size: int, - template: CUDATemplate, # type: ignore[name-defined] # noqa: F821 + template: CUDATemplate, ) -> None: super().__init__(layout, inputs, make_kernel_render) # Global memory (in bytes) needed for this template. @@ -4526,7 +4589,7 @@ def get_outputs(self) -> List[Buffer]: def get_unbacked_symbol_defs(self) -> OrderedSet[sympy.Symbol]: return OrderedSet() - def collect_arg_kwarg_properties(self) -> None: + def collect_arg_kwarg_properties(self): # type: ignore[no-untyped-def] # if self.op_overload is torch._ops.OpOverload, we can use its schema to collect additional # information for args and kwargs, e.g. type and default value, to help with the cpp wrapper codegen self.arg_properties = ( @@ -4561,7 +4624,7 @@ def collect_arg_kwarg_properties(self) -> None: x for x in self.op_overload._schema.arguments if x.kwarg_only ] - def decide_layout(self) -> None: + def decide_layout(self): # type: ignore[no-untyped-def] if isinstance(self.layout, FlexibleLayout): self.apply_constraint() self.freeze_layout() @@ -4784,7 +4847,7 @@ def convert_to_reinterpret_view(cls, x): # type: ignore[no-untyped-def] x_unwrap_view.freeze_layout() index_args, var_ranges = dependencies.index_vars_squeeze( - x.get_size(), prefix="r" + x.get_size(), prefix="r" # type: ignore[arg-type] ) range_vars = index_args[0] index = x.make_indexer()(range_vars) @@ -4808,7 +4871,7 @@ def convert_to_reinterpret_view(cls, x): # type: ignore[no-untyped-def] layout=FixedLayout( device=x.get_device(), dtype=x.get_dtype(), - size=x.get_size(), + size=x.get_size(), # type: ignore[arg-type] stride=strides, offset=offset, ), @@ -4858,7 +4921,7 @@ def require_stride1(cls, x): # type: ignore[no-untyped-def] return cls.copy_input(x) @classmethod - def require_strides( # type: ignore[no-untyped-def] # type: ignore[no-untyped-def] + def require_strides( # type: ignore[no-untyped-def] cls, x, order: Optional[Sequence[int]] = None, @@ -5660,7 +5723,7 @@ def __init__( # type: ignore[no-untyped-def] V.graph.register_operation(self) @classmethod - def create(cls, dst, src, non_blocking: bool = False): # type: ignore[no-untyped-def] # type: ignore[no-untyped-def] + def create(cls, dst, src, non_blocking: bool = False): # type: ignore[no-untyped-def] inputs = [cls.realize_input(t) for t in [dst, src]] constant_args = (non_blocking,) result = InplaceCopyFallback( @@ -6761,9 +6824,9 @@ def handle_sym_expr(stride): # type: ignore[no-untyped-def] FixedLayout( device=output.get_device(), dtype=output.get_dtype(), - size=output.get_size(), + size=output.get_size(), # type: ignore[arg-type] stride=output.get_stride(), - offset=output.get_layout().offset, + offset=output.get_layout().offset, # type: ignore[union-attr] ), invoke_subgraph, [(list, i)], diff --git a/torch/_inductor/lowering.py b/torch/_inductor/lowering.py index 88706728345784..292de89a6e4b0a 100644 --- a/torch/_inductor/lowering.py +++ b/torch/_inductor/lowering.py @@ -585,7 +585,7 @@ def inner_fn(index): device = override_device or device return Pointwise.create( - device=device, + device=device, # type: ignore[arg-type] dtype=dtype, inner_fn=inner_fn, ranges=ranges, @@ -792,10 +792,10 @@ def register_frexp(): frexp = ops_wrapper("frexp") def frexp0(*args, **kwargs): - return frexp(*args, **kwargs)[0] # type: ignore[index] # next PR + return frexp(*args, **kwargs)[0] # type: ignore[index] def frexp1(*args, **kwargs): - return frexp(*args, **kwargs)[1] # type: ignore[index] # next PR + return frexp(*args, **kwargs)[1] # type: ignore[index] pw_fns = [ make_pointwise(frexp0), @@ -5288,7 +5288,7 @@ def inner(x, axis=None, keepdims=False, *, dtype=None): ) result = Reduction.create(reduction_type=reduction_type, input_node=x, **kwargs) if isinstance( - result.data.data, Reduction + result.data.data, Reduction # type: ignore[attr-defined] ): # Only realize if reduction isn't unrolled result.realize() return result @@ -5816,7 +5816,7 @@ def cummax(x, axis=None): kwargs = _make_scan_inner(x, axis=axis, dtype=dtype) kwargs["dtypes"] = (dtype, torch.int64) kwargs["inner_fns"] = (x.make_loader(), lambda _: "rindex") - values, indices = ir.Scan.create(**kwargs, combine_fn=combine_fn) # type: ignore[arg-type] # next PR + values, indices = ir.Scan.create(**kwargs, combine_fn=combine_fn) # type: ignore[arg-type] if values is None: return fallback_cummax(x, dim=axis) return values, indices @@ -5846,7 +5846,7 @@ def cummin(x, axis=None): kwargs = _make_scan_inner(x, axis=axis, dtype=dtype) kwargs["dtypes"] = (dtype, torch.int64) kwargs["inner_fns"] = (x.make_loader(), lambda _: "rindex") - values, indices = ir.Scan.create(**kwargs, combine_fn=combine_fn) # type: ignore[arg-type] # next PR + values, indices = ir.Scan.create(**kwargs, combine_fn=combine_fn) # type: ignore[arg-type] if values is None: return fallback_cummin(x, dim=axis) return values, indices diff --git a/torch/_inductor/select_algorithm.py b/torch/_inductor/select_algorithm.py index 16e3073d08cf55..ed618673b21038 100644 --- a/torch/_inductor/select_algorithm.py +++ b/torch/_inductor/select_algorithm.py @@ -1666,7 +1666,7 @@ def log_results( map( str, V.graph.sizevars.size_hints( - n.get_size(), fallback=config.unbacked_symint_fallback + n.get_size(), fallback=config.unbacked_symint_fallback # type: ignore[arg-type] ), ) ) From e7cf7d00be1eb561e2e2abbec45d5a99c2bda8ed Mon Sep 17 00:00:00 2001 From: Xiaodong Wang Date: Wed, 6 Nov 2024 00:02:52 +0000 Subject: [PATCH 114/503] Support torch.bool in torch.sort + CUDA (#139409) Summary: This might be out-dated, so I'm adding it back and see if we pass all the tests. I'm pretty sure cuda12 is ok. Test Plan: CI Differential Revision: D65282650 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139409 Approved by: https://github.com/zou3519, https://github.com/ngimel, https://github.com/eqy --- aten/src/ATen/native/cuda/Sort.cpp | 3 --- test/test_sort_and_select.py | 6 ++--- .../_internal/common_methods_invocations.py | 22 +++++++++++++++---- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/aten/src/ATen/native/cuda/Sort.cpp b/aten/src/ATen/native/cuda/Sort.cpp index 4605be8cdf187e..39581cef25c203 100644 --- a/aten/src/ATen/native/cuda/Sort.cpp +++ b/aten/src/ATen/native/cuda/Sort.cpp @@ -63,9 +63,6 @@ void sort_cuda_kernel( "The dimension being sorted can not have more than INT_MAX elements."); const auto self_dtype = self.dtype(); - // FIXME: remove this check once cub sort supports bool - TORCH_CHECK(self_dtype != ScalarType::Bool, - "Sort currently does not support bool dtype on CUDA."); TORCH_CHECK(self_dtype != ScalarType::ComplexFloat && self_dtype != ScalarType::ComplexDouble, "Sort currently does not support complex dtypes on CUDA."); diff --git a/test/test_sort_and_select.py b/test/test_sort_and_select.py index aebfdaec0cb854..6d37607ffbf197 100644 --- a/test/test_sort_and_select.py +++ b/test/test_sort_and_select.py @@ -193,8 +193,7 @@ def test_sort_large_slice(self, device): self.assertEqual(res1val, res1val_cpu.cuda()) self.assertEqual(res1ind, res1ind_cpu.cuda()) - # FIXME: remove torch.bool from unsupported types once support is added for cub sort - @dtypes(*all_types_and(torch.half, torch.bfloat16)) + @dtypes(*all_types_and(torch.bool, torch.half, torch.bfloat16)) def test_stable_sort(self, device, dtype): sizes = (100, 1000, 10000) for ncopies in sizes: @@ -323,8 +322,7 @@ def test_topk_1d_output_discontiguous(self, device, dtype): self.assertEqual(indices, indices_cont) self.assertEqual(values, values_cont) - # FIXME: remove torch.bool from unsupported types once support is added for cub sort - @dtypes(*all_types_and(torch.half, torch.bfloat16)) + @dtypes(*all_types_and(torch.bool, torch.half, torch.bfloat16)) def test_stable_sort_against_numpy(self, device, dtype): if dtype in floating_types_and(torch.float16, torch.bfloat16): inf = float("inf") diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index b9ec84b8cd9cf4..d52a647497741f 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -3320,7 +3320,10 @@ def large_1d_unique(): flag = [True, False] for dim, descending, stable in product(dims, flag, flag): # default schema without stable sort - yield SampleInput(small_3d_unique(), dim, descending) + if not (dtype == torch.bool and torch.device(device).type == 'cuda'): + # bool and cuda requires stable sort for stable results, at least + # for the return index + yield SampleInput(small_3d_unique(), dim, descending) # schema with stable sort, no CUDA support yet if torch.device(device).type == 'cpu': yield SampleInput( @@ -18477,11 +18480,13 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): )), OpInfo('sort', dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16), - dtypesIfCUDA=all_types_and(torch.float16, torch.bfloat16), + dtypesIfCUDA=all_types_and(torch.bool, torch.float16, torch.bfloat16), sample_inputs_func=sample_inputs_sort, supports_forward_ad=True, supports_fwgrad_bwgrad=True, skips=( + DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_non_standard_bool_values', + dtypes=[torch.bool], device_type='cuda'), )), OpInfo('unique', dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16, torch.uint16, torch.uint32, torch.uint64), @@ -19506,12 +19511,14 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): sample_inputs_func=sample_inputs_unfold), OpInfo('msort', dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16), - dtypesIfCUDA=all_types_and(torch.float16, torch.bfloat16), + dtypesIfCUDA=all_types_and(torch.bool, torch.float16, torch.bfloat16), check_batched_gradgrad=False, supports_forward_ad=True, supports_fwgrad_bwgrad=True, sample_inputs_func=sample_inputs_msort, skips=( + DecorateInfo(unittest.expectedFailure, 'TestCommon', 'test_non_standard_bool_values', + dtypes=[torch.bool], device_type='cuda'), )), OpInfo('movedim', aliases=('moveaxis',), @@ -21324,7 +21331,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): OpInfo( "argsort", dtypes=all_types_and(torch.bool, torch.float16, torch.bfloat16), - dtypesIfCUDA=all_types_and(torch.float16, torch.bfloat16), + dtypesIfCUDA=all_types_and(torch.bool, torch.float16, torch.bfloat16), sample_inputs_func=sample_inputs_sort, supports_out=False, supports_autograd=False, @@ -21335,6 +21342,13 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): "test_variant_consistency_jit", dtypes=(torch.float32,), ), + DecorateInfo( + unittest.expectedFailure, + "TestCommon", + "test_non_standard_bool_values", + dtypes=[torch.bool], + device_type='cuda', + ), ), ), OpInfo( From 6a30c14a0ad8ffd42b54a186b3781840dcf6f2d3 Mon Sep 17 00:00:00 2001 From: Will Feng Date: Mon, 4 Nov 2024 17:04:35 -0800 Subject: [PATCH 115/503] [Traceable FSDP2] Run any unexecuted post_backward at beginning of pre_backward hook (#139671) Assuming the forward pass user code looks like: ``` for _ in range(2): x = layer(x) ``` and we have `fully_shard(layer)`, then: - the forward pass will be like: "unshard layer -> call layer 1st time -> reshard layer -> unshard layer -> call layer 2nd time-> reshard layer" (currently same for both eager and compile) - the backward pass will be like: "unshard layer -> call layer 1st time -> reshard layer -> unshard layer -> call layer 2nd time-> reshard layer" in eager, but currently it's "unshard layer -> call layer 1st time -> call layer 2nd time -> reshard layer" in compile The behavior in the backward pass is different between eager and compile, which is not ideal. I am currently trying to look for a way to fix this non-ideal behavior of compile - tried a few things: 1. Tracing the RegisterPostBackwardFunction custom autograd function - this stills seems to be a no-go, due to HOP not supporting side-effects. 2. Instead of custom autograd function, do a "multi-grad hook" to wait for all gradients to be ready before triggering post_backward. However, this approach seems to have bad interaction with register_hook of pre_backward, in the sense that it's unclear which of them will be triggered first in practice. 3. Force execute any pending post_backward before unshard in pre_backward hook, and rely on compiler to move the reshard to the right place to optimize peak memory. -> This PR Pull Request resolved: https://github.com/pytorch/pytorch/pull/139671 Approved by: https://github.com/awgu --- .../fsdp/test_fully_shard_compile.py | 45 +++++++++++++++++-- .../_composable/fsdp/_fsdp_param.py | 4 +- .../_composable/fsdp/_fsdp_param_group.py | 13 +++++- 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/test/distributed/_composable/fsdp/test_fully_shard_compile.py b/test/distributed/_composable/fsdp/test_fully_shard_compile.py index b1552909d457d0..f01279dc4dc972 100644 --- a/test/distributed/_composable/fsdp/test_fully_shard_compile.py +++ b/test/distributed/_composable/fsdp/test_fully_shard_compile.py @@ -171,6 +171,16 @@ def f(x): torch.compile(f, backend="aot_eager")(x) self.assertEqual(x, ref_x) + def _get_resize_count_in_fx_graph(self, graph: torch.fx.Graph): + resize_count = 0 + for node in graph.nodes: + if ( + node.op == "call_function" + and node.target == torch.ops.inductor.resize_storage_bytes_.default + ): + resize_count += 1 + return resize_count + def _assert_no_aliased_unsharded_params_in_graph_inputs( self, model, graph: torch.fx.Graph ) -> None: @@ -212,9 +222,17 @@ def _assert_no_aliased_unsharded_params_in_graph_inputs( self.assertTrue(no_aliased_unsharded_params_in_graph_inputs, err_msg) def _remove_fsdp2_unsharded_param_graph_input_usage_with_optional_checks( - self, model, fwd_fullgraph + self, model, *, bwd_resize_count_before_pass=None, fwd_fullgraph=False ): def _run_with_checks(graph, orig_fn): + if ( + self._is_bwd_fx_graph(graph) + and bwd_resize_count_before_pass is not None + ): + self.assertEqual( + bwd_resize_count_before_pass, + self._get_resize_count_in_fx_graph(graph), + ) self._assert_no_aliased_unsharded_params_in_graph_inputs(model, graph) orig_fn(graph) @@ -313,6 +331,16 @@ def _is_fwd_graph(self, snodes): else: return False + def _is_bwd_fx_graph(self, graph): + for node in graph.nodes: + if ( + node.op == "call_function" + and node.target + == torch.ops._c10d_functional.reduce_scatter_tensor.default + ): + return True + return False + def _maybe_run_decide_global_ordering_of_comms_with_checks(self, fwd_fullgraph): def _check_fsdp_ops_in_snodes(snodes, is_fwd_graph, expect=True): assert_method = self.assertTrue if expect else self.assertFalse @@ -443,6 +471,8 @@ def _test_traceable_fsdp( input_creation_fn, backend, fwd_fullgraph, + *, + bwd_resize_count_before_inductor=None, ): def fwd_bwd(model, inp): out = model(inp) @@ -474,7 +504,9 @@ def test_compiled(): counters.clear() with self._remove_fsdp2_unsharded_param_graph_input_usage_with_optional_checks( - model, fwd_fullgraph + model, + bwd_resize_count_before_pass=bwd_resize_count_before_inductor, + fwd_fullgraph=fwd_fullgraph, ): fwd_bwd_fn_compiled = torch.compile( fwd_bwd_fn, @@ -619,8 +651,11 @@ def __init__(self, n_layers): def forward(self, x): # Intentionally reusing all layers a few times, # to test "multiple all-gathers for the same parameter" case. - for layer in self.layers: - x = layer(x) + # Case 1: rerun the same layer twice + for layer_id in range(len(self.layers)): + for _ in range(2): + x = self.layers[layer_id](x) + # Case 2: iterate through all layers twice for layer in self.layers: x = layer(x) for layer in self.layers: @@ -700,6 +735,7 @@ def test_nested_fully_shard_backend_inductor_fullgraph_True(self): ), "inductor", fwd_fullgraph=fwd_fullgraph, + bwd_resize_count_before_inductor=48 if fwd_fullgraph else None, ) ) if fwd_fullgraph: @@ -925,6 +961,7 @@ def test_transformer_backend_inductor_fullgraph_True(self): ), "inductor", fwd_fullgraph=fwd_fullgraph, + bwd_resize_count_before_inductor=76 if fwd_fullgraph else None, ) ) if fwd_fullgraph: diff --git a/torch/distributed/_composable/fsdp/_fsdp_param.py b/torch/distributed/_composable/fsdp/_fsdp_param.py index ac66b6f3300d57..5cc43ab84a5f8b 100644 --- a/torch/distributed/_composable/fsdp/_fsdp_param.py +++ b/torch/distributed/_composable/fsdp/_fsdp_param.py @@ -518,7 +518,9 @@ def init_unsharded_param(self): # resize_(full) -> copy_ -> resize_(0) pattern, we will remove those # resize_ and copy_ ops in a compiler graph pass # `remove_fsdp2_unsharded_param_graph_input_usage` to recover performance. - alloc_storage(self._unsharded_param) + self._unsharded_param.untyped_storage().resize_( + self._unsharded_param.numel() * self._unsharded_param.itemsize + ) torch.ops.fsdp.copy_(self._unsharded_param, unsharded_param) else: self._unsharded_param = nn.Parameter( diff --git a/torch/distributed/_composable/fsdp/_fsdp_param_group.py b/torch/distributed/_composable/fsdp/_fsdp_param_group.py index e19ac1e814dc40..51d0160d9dc289 100644 --- a/torch/distributed/_composable/fsdp/_fsdp_param_group.py +++ b/torch/distributed/_composable/fsdp/_fsdp_param_group.py @@ -345,6 +345,13 @@ def _record_post_forward(self) -> None: self._post_forward_indices.append(post_forward_index) def pre_backward(self, default_prefetch: bool, *unused: Any): + if ( + compiled_autograd_enabled() + and self._training_state == TrainingState.PRE_BACKWARD + ): + # Traceable FSDP2 cannot trigger the param group's `post_backward` immediately after param usage; + # instead it relies on this to trigger the previously unexecuted `post_backward`. + self.post_backward() if self._training_state == TrainingState.PRE_BACKWARD: return if not compiled_autograd_enabled(): @@ -677,8 +684,10 @@ def _assert_not_tracing_fsdp(): if compiled_autograd_enabled(): # TODO: Find a way to print the offending FSDP2 module. msg = """\ -When Traceable FSDP2 is enabled, we rely on `root_post_backward_callback` to call -each `FSDPParamGroup.post_backward`, and we should not be calling into `RegisterPostBackwardFunction`. +When Traceable FSDP2 is enabled, we should not be calling into `RegisterPostBackwardFunction`. +Instead, we rely on the param group's next `pre_backward` hook to trigger its previously unexecuted +`post_backward`, and we rely on FSDPState's `root_post_backward_callback` to trigger the resharding +of any leftover unsharded param groups. If you are here, it means the forward part of this FSDP2 instance is not compiled, and you must also compile the forward part if you want to use Traceable FSDP2.""" torch._dynamo.comptime.comptime.print(msg) From a787320d0ffc80463b5c7dbba60a4c52a20d75cc Mon Sep 17 00:00:00 2001 From: Laith Sakka Date: Tue, 5 Nov 2024 10:44:29 -0800 Subject: [PATCH 116/503] Do not try to optimize new implications in get_implications (#139738) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: save around 8% on the torchrec model. In most case the new implications are not optimizaiton anyway in some case though they are, but optimizing them is useless. ex: ``` generating implications for Eq(Mod(s0, 3), 0) adding Eq(Mod(s0, 3), 0) adding Eq(0, Mod(s0, 3)) adding Ne(Mod(s0, 3), 0) adding Ne(0, Mod(s0, 3)) adding Mod(s0, 3) <= 0 adding 0 < Mod(s0, 3) adding True adding False ``` VS ``` generating implications for Eq(Mod(s0, 3), 0) adding Eq(Mod(s0, 3), 0) adding Eq(0, Mod(s0, 3)) adding Ne(Mod(s0, 3), 0) adding Ne(0, Mod(s0, 3)) adding Mod(s0, 3) <= 0 adding 0 < Mod(s0, 3) adding 0 <= Mod(s0, 3) adding Mod(s0, 3) < 0 ``` the main difference is that 0 <= Mod(s0, 3) can be simplified to True and Mod(s0, 3) < 0 to False but with this change this wont happen. but True:True and False: False are useless anyway lol. so its ok i think ``` buck2 run fbcode//mode/opt fbcode//torchrec/distributed/tests:pt2_compile_benchmark -- --num-features=1000 ``` Screenshot 2024-11-04 at 9 25 51 PM Pull Request resolved: https://github.com/pytorch/pytorch/pull/139738 Approved by: https://github.com/ezyang ghstack dependencies: #139703 --- test/dynamo/test_misc.py | 4 ++-- torch/fx/experimental/symbolic_shapes.py | 23 +++++++++++++---------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py index 653cc94f3c5367..32f73334dcbe0d 100644 --- a/test/dynamo/test_misc.py +++ b/test/dynamo/test_misc.py @@ -10225,7 +10225,7 @@ def test_shape_env_equal_evaluate_expr_divisible(self): ShapeEnv not equal: field values don't match: ==> axioms: values don't match. - > Left: {0 < Mod(s0, 3): False, Eq(0, Mod(s0, 3)): True, Eq(Mod(s0, 3), 0): True, False: False, Mod(s0, 3) <= 0: True, Ne(0, Mod(s0, 3)): False, Ne(Mod(s0, 3), 0): False, True: True} + > Left: {0 < Mod(s0, 3): False, 0 <= Mod(s0, 3): True, Eq(0, Mod(s0, 3)): True, Eq(Mod(s0, 3), 0): True, Mod(s0, 3) < 0: False, Mod(s0, 3) <= 0: True, Ne(0, Mod(s0, 3)): False, Ne(Mod(s0, 3), 0): False} > Right: {} ==> divisible: values don't match. > Left: {Mod(s0, 3)} @@ -10344,7 +10344,7 @@ def test_shape_env_equal_runtime_assert(self): ShapeEnv not equal: field values don't match: ==> axioms: values don't match. - > Left: {0 < PythonMod(u0, 3): False, Eq(0, PythonMod(u0, 3)): True, Eq(PythonMod(u0, 3), 0): True, False: False, Ne(0, PythonMod(u0, 3)): False, Ne(PythonMod(u0, 3), 0): False, PythonMod(u0, 3) <= 0: True, True: True} + > Left: {0 < PythonMod(u0, 3): False, 0 <= PythonMod(u0, 3): True, Eq(0, PythonMod(u0, 3)): True, Eq(PythonMod(u0, 3), 0): True, Ne(0, PythonMod(u0, 3)): False, Ne(PythonMod(u0, 3), 0): False, PythonMod(u0, 3) < 0: False, PythonMod(u0, 3) <= 0: True} > Right: {} ==> deferred_runtime_asserts: values don't match. > Left: {u0: [Eq(PythonMod(u0, 3), 0)]} diff --git a/torch/fx/experimental/symbolic_shapes.py b/torch/fx/experimental/symbolic_shapes.py index fb7028fb715b5f..2cf0e6ecab6455 100644 --- a/torch/fx/experimental/symbolic_shapes.py +++ b/torch/fx/experimental/symbolic_shapes.py @@ -5320,27 +5320,30 @@ def add_expr(expr: SympyBoolean) -> None: # With this, we could remove the need for the commutativity part opposite = sympy.Eq if isinstance(expr, sympy.Ne) else sympy.Ne # Commutativity of == and != - equiv[type(expr)(expr.lhs, expr.rhs)] = sympy.true - equiv[type(expr)(expr.rhs, expr.lhs)] = sympy.true - equiv[opposite(expr.lhs, expr.rhs)] = sympy.false - equiv[opposite(expr.rhs, expr.lhs)] = sympy.false + equiv[type(expr)(expr.lhs, expr.rhs, evaluate=False)] = sympy.true + equiv[type(expr)(expr.rhs, expr.lhs, evaluate=False)] = sympy.true + equiv[opposite(expr.lhs, expr.rhs, evaluate=False)] = sympy.false + equiv[opposite(expr.rhs, expr.lhs, evaluate=False)] = sympy.false else: # Expr and negation equiv[expr] = sympy.true + # we do not pass evaluate=False like others on purpose here! + # we want not(a=b and not ~(a Date: Tue, 5 Nov 2024 09:48:51 -0800 Subject: [PATCH 117/503] Fix docs for logcumsumexp formula (#139768) The previous formula was wrong and reused some indexing variables. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139768 Approved by: https://github.com/janeyx99 --- torch/_torch_docs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py index 4991b2da357372..0935aa1eddc191 100644 --- a/torch/_torch_docs.py +++ b/torch/_torch_docs.py @@ -3199,7 +3199,7 @@ def merge_dicts(*dicts): For summation index :math:`j` given by `dim` and other indices :math:`i`, the result is .. math:: - \text{{logcumsumexp}}(x)_{{ij}} = \log \sum\limits_{{j=0}}^{{i}} \exp(x_{{ij}}) + \text{{logcumsumexp}}(x)_{{ij}} = \log \sum\limits_{{k=0}}^{{j}} \exp(x_{{ik}}) Args: {input} From bd45c00fdeb5af059c57a005a2634789123a1c33 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Tue, 5 Nov 2024 14:31:08 -0800 Subject: [PATCH 118/503] [BE][Attention] Code de-dup (#139784) The only difference between `convert_boolean_attn_mask_cudnn` and `convert_boolean_attn_mask` is the value we initialize boolean tensor to Reduce duplication by introducing `convert_boolean_attn_mask_` that takes `neg_inf` value and make abovementioned implementations are trivial oneline call Also, as suggested by @Skylion007, replace `at::where(foo->logical_not, -inf, 0)` with `at::where(*foo, 0, -inf)` Pull Request resolved: https://github.com/pytorch/pytorch/pull/139784 Approved by: https://github.com/Skylion007, https://github.com/drisspg ghstack dependencies: #139788 --- .../ATen/native/transformers/attention.cpp | 24 +++++++------------ 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/aten/src/ATen/native/transformers/attention.cpp b/aten/src/ATen/native/transformers/attention.cpp index 8deb5cf9799813..a78e4c73f1a9ec 100644 --- a/aten/src/ATen/native/transformers/attention.cpp +++ b/aten/src/ATen/native/transformers/attention.cpp @@ -524,35 +524,29 @@ inline void validate_sdpa_input( // the math and memory efficient attn_mask implementation // Args: // attn_mask: attn_mask of shape (B, L, S) or (L, S) or (B, N_heads, L, S) -std::optional convert_boolean_attn_mask(const std::optional& attn_mask, caffe2::TypeMeta dtype) { +std::optional convert_boolean_attn_mask_(const std::optional& attn_mask, caffe2::TypeMeta dtype, double neg_inf) { // Pass through - if(!attn_mask.has_value()){ + if (!attn_mask.has_value()) { return std::nullopt; } // Convert boolean mask to additive mask; need to invert mask to indicate what // to mask *out*. if (attn_mask->dtype() == at::kBool) { - return at::where(attn_mask->logical_not(), -std::numeric_limits::infinity(), at::scalar_tensor(0.0, at::TensorOptions().dtype(dtype).device(attn_mask->device()))); + return at::where(*attn_mask, 0.0, at::scalar_tensor(neg_inf, at::TensorOptions().dtype(dtype).device(attn_mask->device()))); } // Otherwise, attn_mask represents an additive attention tensor return attn_mask; } +std::optional convert_boolean_attn_mask(const std::optional& attn_mask, caffe2::TypeMeta dtype) { + return convert_boolean_attn_mask_(attn_mask, dtype, -std::numeric_limits::infinity()); +} + // alternate version to workaround -inf issue with cuDNN // TODO(eqy): delete this when cuDNN -inf issue is resolved std::optional convert_boolean_attn_mask_cudnn(const std::optional& attn_mask, caffe2::TypeMeta dtype) { - // Pass through - if(!attn_mask.has_value()){ - return std::nullopt; - } - // Convert boolean mask to additive mask; need to invert mask to indicate what - // to mask *out*. - if (attn_mask->dtype() == at::kBool) { - // TODO Use the max type of the input and output - return at::where(attn_mask->logical_not(), -65504.0, at::scalar_tensor(0.0, at::TensorOptions().dtype(dtype).device(attn_mask->device()))); - } - // Otherwise, attn_mask represents an additive attention tensor - return attn_mask; + // TODO Use the max type of the input and output + return convert_boolean_attn_mask_(attn_mask, dtype, -65504.0); } // Memory Efficient Attention requires a padded attn mask bias From 39ede99a33b0631330a3966e567d3c07d93aca17 Mon Sep 17 00:00:00 2001 From: Andrew Gu Date: Tue, 5 Nov 2024 07:13:40 -0800 Subject: [PATCH 119/503] Add current FSDP2 path to old composable FSDP1 warning (#139759) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139759 Approved by: https://github.com/weifengpy, https://github.com/wz337 ghstack dependencies: #139650 --- torch/distributed/_composable/fully_shard.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/torch/distributed/_composable/fully_shard.py b/torch/distributed/_composable/fully_shard.py index 4afa0f431075f7..35a443ec4ecc3d 100644 --- a/torch/distributed/_composable/fully_shard.py +++ b/torch/distributed/_composable/fully_shard.py @@ -42,7 +42,8 @@ "`torch.distributed._composable.fully_shard` is being deprecated. " "You can continue to use the wrapper based FSDP. " "See usage in: https://github.com/pytorch/pytorch/blob/main/torch/distributed/fsdp/fully_sharded_data_parallel.py. " - "`torch.distributed._composable.fully_shard` will be removed after PyTorch 2.5.", + "`torch.distributed._composable.fully_shard` will be removed after PyTorch 2.5. " + "If you are looking for FSDP2, please see `torch.distributed._composable.fsdp.fully_shard.`", category=FutureWarning, ) def fully_shard( From 028c5d3426743673edbbe6e11a491d76f1402f7c Mon Sep 17 00:00:00 2001 From: cyy Date: Wed, 6 Nov 2024 01:50:38 +0000 Subject: [PATCH 120/503] [2/N] Replace c10::sv with std::sv (#139456) Follows #139453 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139456 Approved by: https://github.com/ezyang --- aten/src/ATen/core/dynamic_type.cpp | 16 +++++++++++++--- aten/src/ATen/core/dynamic_type.h | 3 ++- aten/src/ATen/core/jit_type.h | 10 ++++++++-- aten/src/ATen/core/type.cpp | 6 +++--- aten/src/ATen/core/type_factory.cpp | 4 ++-- aten/src/ATen/core/type_factory.h | 4 ++-- test/cpp/jit/source_range_test.cpp | 6 +++--- torch/csrc/jit/frontend/source_range.cpp | 4 ++-- torch/csrc/jit/frontend/source_range.h | 4 ++-- torch/csrc/jit/mobile/type_parser.cpp | 4 ++-- torch/csrc/jit/runtime/static/ops.cpp | 2 +- torch/csrc/jit/serialization/export_bytecode.cpp | 2 +- .../serialization/source_range_serialization.cpp | 4 ++-- 13 files changed, 43 insertions(+), 26 deletions(-) diff --git a/aten/src/ATen/core/dynamic_type.cpp b/aten/src/ATen/core/dynamic_type.cpp index 091d07bdaaaf56..68023b85c6acb3 100644 --- a/aten/src/ATen/core/dynamic_type.cpp +++ b/aten/src/ATen/core/dynamic_type.cpp @@ -59,6 +59,16 @@ DynamicType::Arguments::Arguments(c10::ArrayRef args) { } } +DynamicType::Arguments::Arguments( + const std::vector& names, + c10::ArrayRef args) + : Arguments(args) { + TORCH_INTERNAL_ASSERT(names.size() == args.size()); + for (size_t i = 0; i < args.size(); i++) { + elems[i].label = std::string{names[i]}; + } +} + DynamicType::Arguments::Arguments( const std::vector& names, c10::ArrayRef args) @@ -105,7 +115,7 @@ DynamicTypePtr DynamicType::create(Type& other) { DynamicType::DynamicType(Tag tag, Arguments arguments) : SharedType(Kind), tag_(tag), arguments_(std::move(arguments)) {} -DynamicType::DynamicType(Tag tag, c10::string_view name, Arguments arguments) +DynamicType::DynamicType(Tag tag, std::string_view name, Arguments arguments) : SharedType(Kind), tag_(tag), name_(std::string{name}), @@ -258,7 +268,7 @@ TypePtr DynamicType::fallback() const { fallbacks.push_back(elem.ty->fallback()); } if (name_) { - std::vector fields; + std::vector fields; fields.reserve(arguments_.elems.size()); for (const auto& elem : arguments_.elems) { // NOLINTNEXTLINE(bugprone-unchecked-optional-access) @@ -382,7 +392,7 @@ TORCH_API TupleTypePtr ivalue::TupleTypeFactory::fallback( return nullptr; #else const auto& dyn = type.expectRef(); - std::vector fields; + std::vector fields; std::vector types; for (const auto& elem : dyn.arguments().elems) { diff --git a/aten/src/ATen/core/dynamic_type.h b/aten/src/ATen/core/dynamic_type.h index 52c4f029927b1d..4ad833a295b12c 100644 --- a/aten/src/ATen/core/dynamic_type.h +++ b/aten/src/ATen/core/dynamic_type.h @@ -139,6 +139,7 @@ class DynamicType : public SharedType { Arguments() = default; Arguments(c10::ArrayRef); Arguments(const std::vector&, c10::ArrayRef); + Arguments(const std::vector&, c10::ArrayRef); std::vector elems; }; @@ -156,7 +157,7 @@ class DynamicType : public SharedType { static TORCH_API DynamicTypePtr create(Type& ty); explicit DynamicType(Tag, Arguments); - explicit DynamicType(Tag, c10::string_view, Arguments); + explicit DynamicType(Tag, std::string_view, Arguments); TypePtr containedType(size_t) const override; size_t containedTypeSize() const override; diff --git a/aten/src/ATen/core/jit_type.h b/aten/src/ATen/core/jit_type.h index 456b720684036f..5951b4763be3a0 100644 --- a/aten/src/ATen/core/jit_type.h +++ b/aten/src/ATen/core/jit_type.h @@ -1154,7 +1154,7 @@ struct TORCH_API TupleType : public NamedType { const std::vector& field_types); static TupleTypePtr createNamed(const std::optional& name, - const std::vector& field_names, + const std::vector& field_names, const std::vector& field_types); static TupleTypePtr create( @@ -1190,7 +1190,7 @@ struct TORCH_API TupleType : public NamedType { const std::shared_ptr& schema() const { return schema_; } - std::optional> names() const; + std::optional> names() const; static const TypeKind Kind = TypeKind::TupleType; @@ -1961,6 +1961,12 @@ struct getTypePtr_ final { } }; template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return StringType::get(); + } +}; +template <> struct getTypePtr_ final { static decltype(auto) call() { return StringType::get(); diff --git a/aten/src/ATen/core/type.cpp b/aten/src/ATen/core/type.cpp index 88a6cd8ff6f5c7..92c30e6ec8437c 100644 --- a/aten/src/ATen/core/type.cpp +++ b/aten/src/ATen/core/type.cpp @@ -728,7 +728,7 @@ TupleTypePtr TupleType::createNamed( TupleTypePtr TupleType::createNamed( const std::optional& qualName, - const std::vector& field_names, + const std::vector& field_names, const std::vector& field_types) { std::vector empty_defaults; return createWithSpec(qualName, field_names, field_types, empty_defaults); @@ -784,11 +784,11 @@ TupleTypePtr TupleType::createWithSpec(const std::optional& field_types, qualName, std::move(schema))); // NOLINT(modernize-make-shared) } -std::optional> TupleType::names() const { +std::optional> TupleType::names() const { if (!schema_) { return {}; } - std::vector ret; + std::vector ret; for (const auto& arg : schema_->arguments()) { ret.emplace_back(arg.name()); } diff --git a/aten/src/ATen/core/type_factory.cpp b/aten/src/ATen/core/type_factory.cpp index b36c25c8c77519..607c91bb96bf33 100644 --- a/aten/src/ATen/core/type_factory.cpp +++ b/aten/src/ATen/core/type_factory.cpp @@ -43,7 +43,7 @@ const std::unordered_map& DynamicTypeFactory:: static const std::unordered_map map = { #define MAP_ITEM(NAME, TYPE) \ {#NAME, c10::DynamicTypeTrait::getBaseType()}, - FORALL_BASE_PYTHON_TYPES(MAP_ITEM) + FORALL_BASE_PYTHON_TYPES(MAP_ITEM) #undef MAP_ITEM }; return map; @@ -61,7 +61,7 @@ const std::unordered_map& DefaultTypeFactory:: c10::TypePtr DefaultTypeFactory::createNamedTuple( const std::string& name, - const std::vector& fields, + const std::vector& fields, const std::vector& types) { return c10::TupleType::createNamed(name, fields, types); } diff --git a/aten/src/ATen/core/type_factory.h b/aten/src/ATen/core/type_factory.h index 8592a8864d64e4..5b573b5c41e90b 100644 --- a/aten/src/ATen/core/type_factory.h +++ b/aten/src/ATen/core/type_factory.h @@ -29,7 +29,7 @@ struct TORCH_API TypeFactoryBase { } static c10::DynamicTypePtr createNamedTuple( const std::string& name, - const std::vector& fields, + const std::vector& fields, const std::vector& types) { return std::make_shared( c10::DynamicType::Tag::Tuple, @@ -80,7 +80,7 @@ struct TORCH_API TypeFactoryBase { } static c10::TypePtr createNamedTuple( const std::string& name, - const std::vector& fields, + const std::vector& fields, const std::vector& types); template C10_ERASE static c10::TypePtr createNamed(const std::string& name) { diff --git a/test/cpp/jit/source_range_test.cpp b/test/cpp/jit/source_range_test.cpp index 16c7f850bf2617..4cbb58ddda638c 100644 --- a/test/cpp/jit/source_range_test.cpp +++ b/test/cpp/jit/source_range_test.cpp @@ -9,7 +9,7 @@ TEST(SourceRangeTest, test_find) { strings.push_back(std::make_shared("hello world")); strings.push_back(std::make_shared("nihaoma")); - std::vector pieces{*strings[0], *strings[1]}; + std::vector pieces{*strings[0], *strings[1]}; StringCordView view(pieces, strings); @@ -22,7 +22,7 @@ TEST(SourceRangeTest, test_substr) { strings.push_back(std::make_shared("hello world")); strings.push_back(std::make_shared("nihaoma")); - std::vector pieces{*strings[0], *strings[1]}; + std::vector pieces{*strings[0], *strings[1]}; StringCordView view(pieces, strings); @@ -36,7 +36,7 @@ TEST(SourceRangeTest, test_iter) { strings.push_back(std::make_shared("hello world")); strings.push_back(std::make_shared("nihaoma")); - std::vector pieces{*strings[0], *strings[1]}; + std::vector pieces{*strings[0], *strings[1]}; StringCordView view(pieces, strings); diff --git a/torch/csrc/jit/frontend/source_range.cpp b/torch/csrc/jit/frontend/source_range.cpp index 5e524aeae878f6..05067ac80f9a37 100644 --- a/torch/csrc/jit/frontend/source_range.cpp +++ b/torch/csrc/jit/frontend/source_range.cpp @@ -14,7 +14,7 @@ StringCordView::StringCordView() { } StringCordView::StringCordView( - std::vector inputs, + std::vector inputs, std::vector> ownerships) : pieces_(std::move(inputs)), owned_strings_(std::move(ownerships)) { accumulated_sizes_.push_back(0); @@ -70,7 +70,7 @@ size_t StringCordView::find_regex(const std::string& tok, size_t start) const { } StringCordView StringCordView::substr(size_t start, size_t size) const { - std::vector pieces; + std::vector pieces; std::vector> ownerships; if (start >= this->size()) { // out of bounds diff --git a/torch/csrc/jit/frontend/source_range.h b/torch/csrc/jit/frontend/source_range.h index bde2f1803ae42f..4e36f31f0e0577 100644 --- a/torch/csrc/jit/frontend/source_range.h +++ b/torch/csrc/jit/frontend/source_range.h @@ -22,7 +22,7 @@ struct TORCH_API StringCordView { StringCordView(const StringCordView&) = default; StringCordView(StringCordView&&) noexcept = default; StringCordView( - std::vector inputs, + std::vector inputs, std::vector> ownerships); StringCordView& operator=(const StringCordView&) = default; @@ -171,7 +171,7 @@ struct TORCH_API StringCordView { Iterator iter_for_pos(size_t pos) const; private: - std::vector pieces_; + std::vector pieces_; std::vector accumulated_sizes_; std::vector> owned_strings_; }; diff --git a/torch/csrc/jit/mobile/type_parser.cpp b/torch/csrc/jit/mobile/type_parser.cpp index 091a0dc1a69151..2407211ea05450 100644 --- a/torch/csrc/jit/mobile/type_parser.cpp +++ b/torch/csrc/jit/mobile/type_parser.cpp @@ -182,7 +182,7 @@ TypePtr TypeParser::parse() { // ] // ]" TypePtr TypeParser::parseNamedTuple(const std::string& qualified_name) { - std::vector field_names; + std::vector field_names; std::vector field_types; expect(","); expect("["); @@ -282,7 +282,7 @@ void TypeParser::expect(const char* s) { advance(); } -// c10::string_view::operator== calls memcmp to compare against the target +// std::string_view::operator== may call memcmp to compare against the target // string; we can do better if we specialize for a single character. void TypeParser::expectChar(char c) { std::string_view token = cur(); diff --git a/torch/csrc/jit/runtime/static/ops.cpp b/torch/csrc/jit/runtime/static/ops.cpp index 35a74c0bac089b..2620adff21ec9e 100644 --- a/torch/csrc/jit/runtime/static/ops.cpp +++ b/torch/csrc/jit/runtime/static/ops.cpp @@ -1905,7 +1905,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::div, aten_div, [](Node* n) -> SROperator { return [te = createDiv()](ProcessedNode* p_node) { const auto& in0_t = p_node->Input(0).toTensor(); - std::optional rounding_mode = std::nullopt; + std::optional rounding_mode = std::nullopt; if (p_node->num_inputs() > 2) { rounding_mode = p_node->Input(2).toOptional(); } diff --git a/torch/csrc/jit/serialization/export_bytecode.cpp b/torch/csrc/jit/serialization/export_bytecode.cpp index e5dbae392ccb47..952e0a881dcc78 100644 --- a/torch/csrc/jit/serialization/export_bytecode.cpp +++ b/torch/csrc/jit/serialization/export_bytecode.cpp @@ -67,7 +67,7 @@ static std::vector findAllDependentFunctions( const Module& module, Graph& graph) { std::vector methods; - std::unordered_set called_method_names; + std::unordered_set called_method_names; auto nodes = findAllNodes(graph, c10::prim::CallMethod, true); for (Node* node : nodes) { if (auto iface = node->input(0)->type()->castRaw()) { diff --git a/torch/csrc/jit/serialization/source_range_serialization.cpp b/torch/csrc/jit/serialization/source_range_serialization.cpp index 8c9568c26723ed..11d9664b60f285 100644 --- a/torch/csrc/jit/serialization/source_range_serialization.cpp +++ b/torch/csrc/jit/serialization/source_range_serialization.cpp @@ -43,7 +43,7 @@ class SourceRangeSerializer { int64_t store_text_and_get_index(const std::string& text_view); std::vector texts_; - std::unordered_map text_to_idx_; + std::unordered_map text_to_idx_; }; SourceRange SourceRangeDeserializer::deserialize(const c10::IValue& iv) { @@ -76,7 +76,7 @@ std::shared_ptr SourceRangeDeserializer::deserialize_source( "Text table index is out of range") filename = *text_table_[fnameIndex]; - std::vector pieces; + std::vector pieces; std::vector> strs; for (int64_t i : textIndex) { From 63b01f328ef63a28b6c31e711d094b1a4b2b7d28 Mon Sep 17 00:00:00 2001 From: Colin Peppler Date: Mon, 4 Nov 2024 18:14:16 +0000 Subject: [PATCH 121/503] [inductor] support masked_scatter w/ unbacked sized source (#138083) Pull Request resolved: https://github.com/pytorch/pytorch/pull/138083 Approved by: https://github.com/jansel --- test/inductor/test_unbacked_symints.py | 15 +++++++++++++++ torch/_decomp/decompositions.py | 4 +++- torch/_inductor/dependencies.py | 5 +++++ torch/_meta_registrations.py | 2 +- 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/test/inductor/test_unbacked_symints.py b/test/inductor/test_unbacked_symints.py index 5c438d6cbc7099..def07dfef825eb 100644 --- a/test/inductor/test_unbacked_symints.py +++ b/test/inductor/test_unbacked_symints.py @@ -279,6 +279,21 @@ def fn(x, num): expected = fn(*example_inputs) torch.testing.assert_close(actual, expected) + @dynamo_config.patch({"capture_scalar_outputs": True}) + def test_unbacked_masked_scatter(self, device): + def fn(value, mask): + u0 = mask.count_nonzero() + source = torch.ones(u0, dtype=torch.float32, device=device) + return torch.masked_scatter(value, mask, source) + + value = make_tensor(10, 10, dtype=torch.float32, device=device) + mask = make_tensor(10, 10, dtype=torch.bool, device=device) + example_inputs = (value, mask) + + actual = torch.compile(fn, fullgraph=True)(*example_inputs) + expected = fn(*example_inputs) + torch.testing.assert_close(actual, expected) + instantiate_device_type_tests(TestUnbackedSymints, globals(), allow_xpu=True) diff --git a/torch/_decomp/decompositions.py b/torch/_decomp/decompositions.py index 49aceefa2aee9c..8822a3840aac75 100644 --- a/torch/_decomp/decompositions.py +++ b/torch/_decomp/decompositions.py @@ -3926,7 +3926,9 @@ def _unsafe_masked_index(x, mask, indices, fill): lambda: "tensors used as masks must be bool tensors", ) - if x.numel() == 0: + from torch.fx.experimental.symbolic_shapes import guard_size_oblivious + + if guard_size_oblivious(x.numel() == 0): meta_result = torch._meta_registrations.meta_index_Tensor(x, indices) return x.new_full(meta_result.shape, fill) diff --git a/torch/_inductor/dependencies.py b/torch/_inductor/dependencies.py index 75f2fdb62ebf4c..bcbeb8dfc735f9 100644 --- a/torch/_inductor/dependencies.py +++ b/torch/_inductor/dependencies.py @@ -734,6 +734,11 @@ def reduction( num_values = reduction_num_outputs(reduction_type) return (None,) * num_values if num_values > 1 else None + def masked(self, mask, body, other) -> None: + assert callable(body), "masked body must always be callable." + # The body can make additional calls, for e.g. ops.indirect_indexing + body() + def _typecheck_FreeUnbackedSymbolsOpsHandler( h: FreeUnbackedSymbolsOpsHandler, diff --git a/torch/_meta_registrations.py b/torch/_meta_registrations.py index 35264096aa4372..083da0857a5c8e 100644 --- a/torch/_meta_registrations.py +++ b/torch/_meta_registrations.py @@ -3709,7 +3709,7 @@ def meta_masked_scatter_(self, mask, source): torch._check( self.dtype == source.dtype, lambda: "masked_scatter: expected self and source to have same " - "dtypes but got {self.dtype} and {source.dtype}", + f"dtypes but got {self.dtype} and {source.dtype}", ) return self From c19c38469030cdf399714eb2051887f4583006a8 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 6 Nov 2024 03:08:29 +0000 Subject: [PATCH 122/503] Fix torch.load (torch.utils.benchmark) after #137602 (#139810) After #137602, the default `weights_only` has been set to True. This test is failing in trunk slow jobs atm benchmark_utils/test_benchmark_utils.py::TestBenchmarkUtils::test_collect_callgrind [GH job link](https://github.com/pytorch/pytorch/actions/runs/11672436111/job/32502454946) [HUD commit link](https://hud.pytorch.org/pytorch/pytorch/commit/1aa71be56c39908893273bd9558b127159e1ef3a) Pull Request resolved: https://github.com/pytorch/pytorch/pull/139810 Approved by: https://github.com/kit1980 --- .../benchmark/utils/valgrind_wrapper/timer_interface.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py b/torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py index 199a49bde20ff2..9525fd54aa8e12 100644 --- a/torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py +++ b/torch/utils/benchmark/utils/valgrind_wrapper/timer_interface.py @@ -457,7 +457,10 @@ def construct(self) -> str: elif wrapped_value.serialization == Serialization.TORCH: path = os.path.join(self._data_dir, f"{name}.pt") - load_lines.append(f"{name} = torch.load({repr(path)})") + # TODO: Figure out if we can use torch.serialization.add_safe_globals here + # Using weights_only=False after the change in + # https://dev-discuss.pytorch.org/t/bc-breaking-change-torch-load-is-being-flipped-to-use-weights-only-true-by-default-in-the-nightlies-after-137602/2573 + load_lines.append(f"{name} = torch.load({repr(path)}, weights_only=False)") torch.save(wrapped_value.value, path) elif wrapped_value.serialization == Serialization.TORCH_JIT: From 96ca17fec43581133117a3add2c7f4ea09e8e8fa Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Wed, 6 Nov 2024 04:16:48 +0000 Subject: [PATCH 123/503] [CD] Move linux-aarch64 build scripts (#139815) All files in `.ci/aarch64_linux` folder are from https://github.com/pytorch/builder/tree/88590cd635629a8fb652cd5ba6555cf53dd572fe/aarch64_linux Companion PR to delete `aarch64_linux` folder in builder: https://github.com/pytorch/builder/pull/2030 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139815 Approved by: https://github.com/wdvr, https://github.com/huydhn --- .ci/aarch64_linux/README.md | 19 + .ci/aarch64_linux/aarch64_ci_build.sh | 39 + .ci/aarch64_linux/aarch64_ci_setup.sh | 37 + .ci/aarch64_linux/aarch64_wheel_ci_build.py | 230 ++++ .ci/aarch64_linux/build_aarch64_wheel.py | 1041 +++++++++++++++++++ .ci/aarch64_linux/embed_library.py | 87 ++ .github/workflows/_binary-build-linux.yml | 2 +- 7 files changed, 1454 insertions(+), 1 deletion(-) create mode 100644 .ci/aarch64_linux/README.md create mode 100644 .ci/aarch64_linux/aarch64_ci_build.sh create mode 100755 .ci/aarch64_linux/aarch64_ci_setup.sh create mode 100755 .ci/aarch64_linux/aarch64_wheel_ci_build.py create mode 100755 .ci/aarch64_linux/build_aarch64_wheel.py create mode 100644 .ci/aarch64_linux/embed_library.py diff --git a/.ci/aarch64_linux/README.md b/.ci/aarch64_linux/README.md new file mode 100644 index 00000000000000..583ed4af998444 --- /dev/null +++ b/.ci/aarch64_linux/README.md @@ -0,0 +1,19 @@ +# Aarch64 (ARM/Graviton) Support Scripts +Scripts for building aarch64 PyTorch PIP Wheels. These scripts build the following wheels: +* torch +* torchvision +* torchaudio +* torchtext +* torchdata +## Aarch64_ci_build.sh +This script is design to support CD operations within PyPi manylinux aarch64 container, and be executed in the container. It prepares the container and then executes __aarch64_wheel_ci_build.py__ to build the wheels. The script "assumes" the PyTorch repo is located at: ```/pytorch``` and will put the wheels into ```/artifacts```. +### Usage +```DESIRED_PYTHON= aarch64_ci_build.sh``` + +__NOTE:__ CI build is currently __EXPERMINTAL__ + +## Build_aarch64_wheel.py +This app allows a person to build using AWS EC3 resources and requires AWS-CLI and Boto3 with AWS credentials to support building EC2 instances for the wheel builds. Can be used in a codebuild CD or from a local system. + +### Usage +```build_aarch64_wheel.py --key-name --use-docker --python 3.8 --branch ``` diff --git a/.ci/aarch64_linux/aarch64_ci_build.sh b/.ci/aarch64_linux/aarch64_ci_build.sh new file mode 100644 index 00000000000000..4859b01b4efdc7 --- /dev/null +++ b/.ci/aarch64_linux/aarch64_ci_build.sh @@ -0,0 +1,39 @@ +#!/bin/bash +set -eux -o pipefail + +GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-} + +SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" +source $SCRIPTPATH/aarch64_ci_setup.sh + +tagged_version() { + GIT_DESCRIBE="git --git-dir /pytorch/.git describe --tags --match v[0-9]*.[0-9]*.[0-9]*" + if ${GIT_DESCRIBE} --exact >/dev/null; then + ${GIT_DESCRIBE} + else + return 1 + fi +} + +if tagged_version >/dev/null; then + export OVERRIDE_PACKAGE_VERSION="$(tagged_version | sed -e 's/^v//' -e 's/-.*$//')" +fi + +############################################################################### +# Run aarch64 builder python +############################################################################### +cd / +# adding safe directory for git as the permissions will be +# on the mounted pytorch repo +git config --global --add safe.directory /pytorch +pip install -r /pytorch/requirements.txt +pip install auditwheel +if [ "$DESIRED_CUDA" = "cpu" ]; then + echo "BASE_CUDA_VERSION is not set. Building cpu wheel." + #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files + USE_PRIORITIZED_TEXT_FOR_LD=1 python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn +else + echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA" + #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files + USE_PRIORITIZED_TEXT_FOR_LD=1 python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda +fi \ No newline at end of file diff --git a/.ci/aarch64_linux/aarch64_ci_setup.sh b/.ci/aarch64_linux/aarch64_ci_setup.sh new file mode 100755 index 00000000000000..d34b9426365202 --- /dev/null +++ b/.ci/aarch64_linux/aarch64_ci_setup.sh @@ -0,0 +1,37 @@ +#!/bin/bash +set -eux -o pipefail + +# This script is used to prepare the Docker container for aarch64_ci_wheel_build.py python script +# as we need to install conda and setup the python version for the build. + +CONDA_PYTHON_EXE=/opt/conda/bin/python +CONDA_EXE=/opt/conda/bin/conda +CONDA_ENV_NAME=aarch64_env +PATH=/opt/conda/bin:$PATH +LD_LIBRARY_PATH=/opt/conda/envs/${CONDA_ENV_NAME}/lib/:/opt/conda/lib:$LD_LIBRARY_PATH + +############################################################################### +# Install conda +# disable SSL_verify due to getting "Could not find a suitable TLS CA certificate bundle, invalid path" +# when using Python version, less than the conda latest +############################################################################### +echo 'Installing conda-forge' +curl -L -o /mambaforge.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-aarch64.sh +chmod +x /mambaforge.sh +/mambaforge.sh -b -p /opt/conda +rm /mambaforge.sh +source /opt/conda/etc/profile.d/conda.sh +conda config --set ssl_verify False +conda create -y -c conda-forge -n "${CONDA_ENV_NAME}" python=${DESIRED_PYTHON} +conda activate "${CONDA_ENV_NAME}" + +if [[ "$DESIRED_PYTHON" == "3.13" ]]; then + pip install -q --pre numpy==2.1.2 + conda install -y -c conda-forge pyyaml==6.0.2 patchelf==0.17.2 pygit2==1.15.1 ninja==1.11.1 scons==4.7.0 +else + pip install -q --pre numpy==2.0.2 + conda install -y -c conda-forge pyyaml==6.0.1 patchelf==0.17.2 pygit2==1.13.2 ninja==1.11.1 scons==4.5.2 +fi + +python --version +conda --version diff --git a/.ci/aarch64_linux/aarch64_wheel_ci_build.py b/.ci/aarch64_linux/aarch64_wheel_ci_build.py new file mode 100755 index 00000000000000..f0ad414e37bb96 --- /dev/null +++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python3 +# encoding: UTF-8 + +import os +import shutil +from subprocess import check_call, check_output +from typing import List + +from pygit2 import Repository + + +def list_dir(path: str) -> List[str]: + """' + Helper for getting paths for Python + """ + return check_output(["ls", "-1", path]).decode().split("\n") + + +def build_ArmComputeLibrary() -> None: + """ + Using ArmComputeLibrary for aarch64 PyTorch + """ + print("Building Arm Compute Library") + acl_build_flags = [ + "debug=0", + "neon=1", + "opencl=0", + "os=linux", + "openmp=1", + "cppthreads=0", + "arch=armv8a", + "multi_isa=1", + "fixed_format_kernels=1", + "build=native", + ] + acl_install_dir = "/acl" + acl_checkout_dir = "ComputeLibrary" + os.makedirs(acl_install_dir) + check_call( + [ + "git", + "clone", + "https://github.com/ARM-software/ComputeLibrary.git", + "-b", + "v24.09", + "--depth", + "1", + "--shallow-submodules", + ] + ) + + check_call( + ["scons", "Werror=1", "-j8", f"build_dir=/{acl_install_dir}/build"] + + acl_build_flags, + cwd=acl_checkout_dir, + ) + for d in ["arm_compute", "include", "utils", "support", "src"]: + shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}") + + +def update_wheel(wheel_path) -> None: + """ + Update the cuda wheel libraries + """ + folder = os.path.dirname(wheel_path) + wheelname = os.path.basename(wheel_path) + os.mkdir(f"{folder}/tmp") + os.system(f"unzip {wheel_path} -d {folder}/tmp") + libs_to_copy = [ + "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12", + "/usr/local/cuda/lib64/libcudnn.so.9", + "/usr/local/cuda/lib64/libcublas.so.12", + "/usr/local/cuda/lib64/libcublasLt.so.12", + "/usr/local/cuda/lib64/libcudart.so.12", + "/usr/local/cuda/lib64/libcufft.so.11", + "/usr/local/cuda/lib64/libcusparse.so.12", + "/usr/local/cuda/lib64/libcusparseLt.so.0", + "/usr/local/cuda/lib64/libcusolver.so.11", + "/usr/local/cuda/lib64/libcurand.so.10", + "/usr/local/cuda/lib64/libnvToolsExt.so.1", + "/usr/local/cuda/lib64/libnvJitLink.so.12", + "/usr/local/cuda/lib64/libnvrtc.so.12", + "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.4", + "/usr/local/cuda/lib64/libcudnn_adv.so.9", + "/usr/local/cuda/lib64/libcudnn_cnn.so.9", + "/usr/local/cuda/lib64/libcudnn_graph.so.9", + "/usr/local/cuda/lib64/libcudnn_ops.so.9", + "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9", + "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9", + "/usr/local/cuda/lib64/libcudnn_heuristic.so.9", + "/opt/conda/envs/aarch64_env/lib/libgomp.so.1", + "/usr/lib64/libgfortran.so.5", + "/acl/build/libarm_compute.so", + "/acl/build/libarm_compute_graph.so", + ] + if enable_cuda: + libs_to_copy += [ + "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0", + "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0", + "/usr/local/lib/libnvpl_lapack_core.so.0", + "/usr/local/lib/libnvpl_blas_core.so.0", + ] + else: + libs_to_copy += [ + "/opt/OpenBLAS/lib/libopenblas.so.0", + ] + # Copy libraries to unzipped_folder/a/lib + for lib_path in libs_to_copy: + lib_name = os.path.basename(lib_path) + shutil.copy2(lib_path, f"{folder}/tmp/torch/lib/{lib_name}") + os.system( + f"cd {folder}/tmp/torch/lib/; " + f"patchelf --set-rpath '$ORIGIN' --force-rpath {folder}/tmp/torch/lib/{lib_name}" + ) + os.mkdir(f"{folder}/cuda_wheel") + os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *") + shutil.move( + f"{folder}/cuda_wheel/{wheelname}", + f"{folder}/{wheelname}", + copy_function=shutil.copy2, + ) + os.system(f"rm -rf {folder}/tmp/ {folder}/cuda_wheel/") + + +def complete_wheel(folder: str) -> str: + """ + Complete wheel build and put in artifact location + """ + wheel_name = list_dir(f"/{folder}/dist")[0] + + if "pytorch" in folder and not enable_cuda: + print("Repairing Wheel with AuditWheel") + check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder) + repaired_wheel_name = list_dir(f"/{folder}/wheelhouse")[0] + + print(f"Moving {repaired_wheel_name} wheel to /{folder}/dist") + os.rename( + f"/{folder}/wheelhouse/{repaired_wheel_name}", + f"/{folder}/dist/{repaired_wheel_name}", + ) + else: + repaired_wheel_name = wheel_name + + print(f"Copying {repaired_wheel_name} to artifacts") + shutil.copy2( + f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}" + ) + + return repaired_wheel_name + + +def parse_arguments(): + """ + Parse inline arguments + """ + from argparse import ArgumentParser + + parser = ArgumentParser("AARCH64 wheels python CD") + parser.add_argument("--debug", action="store_true") + parser.add_argument("--build-only", action="store_true") + parser.add_argument("--test-only", type=str) + parser.add_argument("--enable-mkldnn", action="store_true") + parser.add_argument("--enable-cuda", action="store_true") + return parser.parse_args() + + +if __name__ == "__main__": + """ + Entry Point + """ + args = parse_arguments() + enable_mkldnn = args.enable_mkldnn + enable_cuda = args.enable_cuda + repo = Repository("/pytorch") + branch = repo.head.name + if branch == "HEAD": + branch = "master" + + print("Building PyTorch wheel") + build_vars = "MAX_JOBS=5 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " + os.system("cd /pytorch; python setup.py clean") + + override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION") + if override_package_version is not None: + version = override_package_version + build_vars += ( + f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 " + ) + elif branch in ["nightly", "master"]: + build_date = ( + check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch") + .decode() + .replace("-", "") + ) + version = ( + check_output(["cat", "version.txt"], cwd="/pytorch").decode().strip()[:-2] + ) + if enable_cuda: + desired_cuda = os.getenv("DESIRED_CUDA") + build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date}+{desired_cuda} PYTORCH_BUILD_NUMBER=1 " + else: + build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 " + elif branch.startswith(("v1.", "v2.")): + build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " + + if enable_mkldnn: + build_ArmComputeLibrary() + print("build pytorch with mkldnn+acl backend") + build_vars += ( + "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " + "ACL_ROOT_DIR=/acl " + "LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " + "ACL_INCLUDE_DIR=/acl/build " + "ACL_LIBRARY=/acl/build " + ) + if enable_cuda: + build_vars += "BLAS=NVPL " + else: + build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/OpenBLAS " + else: + print("build pytorch without mkldnn backend") + + os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel") + if enable_cuda: + print("Updating Cuda Dependency") + filename = os.listdir("/pytorch/dist/") + wheel_path = f"/pytorch/dist/{filename[0]}" + update_wheel(wheel_path) + pytorch_wheel_name = complete_wheel("/pytorch/") + print(f"Build Complete. Created {pytorch_wheel_name}..") diff --git a/.ci/aarch64_linux/build_aarch64_wheel.py b/.ci/aarch64_linux/build_aarch64_wheel.py new file mode 100755 index 00000000000000..99a70dd318629b --- /dev/null +++ b/.ci/aarch64_linux/build_aarch64_wheel.py @@ -0,0 +1,1041 @@ +#!/usr/bin/env python3 + +# This script is for building AARCH64 wheels using AWS EC2 instances. +# To generate binaries for the release follow these steps: +# 1. Update mappings for each of the Domain Libraries by adding new row to a table like this: +# "v1.11.0": ("0.11.0", "rc1"), +# 2. Run script with following arguments for each of the supported python versions and required tag, for example: +# build_aarch64_wheel.py --key-name --use-docker --python 3.8 --branch v1.11.0-rc3 + + +import os +import subprocess +import sys +import time +from typing import Dict, List, Optional, Tuple, Union + +import boto3 + + +# AMI images for us-east-1, change the following based on your ~/.aws/config +os_amis = { + "ubuntu18_04": "ami-078eece1d8119409f", # login_name: ubuntu + "ubuntu20_04": "ami-052eac90edaa9d08f", # login_name: ubuntu + "ubuntu22_04": "ami-0c6c29c5125214c77", # login_name: ubuntu + "redhat8": "ami-0698b90665a2ddcf1", # login_name: ec2-user +} +ubuntu18_04_ami = os_amis["ubuntu18_04"] + + +def compute_keyfile_path(key_name: Optional[str] = None) -> Tuple[str, str]: + if key_name is None: + key_name = os.getenv("AWS_KEY_NAME") + if key_name is None: + return os.getenv("SSH_KEY_PATH", ""), "" + + homedir_path = os.path.expanduser("~") + default_path = os.path.join(homedir_path, ".ssh", f"{key_name}.pem") + return os.getenv("SSH_KEY_PATH", default_path), key_name + + +ec2 = boto3.resource("ec2") + + +def ec2_get_instances(filter_name, filter_value): + return ec2.instances.filter( + Filters=[{"Name": filter_name, "Values": [filter_value]}] + ) + + +def ec2_instances_of_type(instance_type="t4g.2xlarge"): + return ec2_get_instances("instance-type", instance_type) + + +def ec2_instances_by_id(instance_id): + rc = list(ec2_get_instances("instance-id", instance_id)) + return rc[0] if len(rc) > 0 else None + + +def start_instance( + key_name, ami=ubuntu18_04_ami, instance_type="t4g.2xlarge", ebs_size: int = 50 +): + inst = ec2.create_instances( + ImageId=ami, + InstanceType=instance_type, + SecurityGroups=["ssh-allworld"], + KeyName=key_name, + MinCount=1, + MaxCount=1, + BlockDeviceMappings=[ + { + "DeviceName": "/dev/sda1", + "Ebs": { + "DeleteOnTermination": True, + "VolumeSize": ebs_size, + "VolumeType": "standard", + }, + } + ], + )[0] + print(f"Create instance {inst.id}") + inst.wait_until_running() + running_inst = ec2_instances_by_id(inst.id) + print(f"Instance started at {running_inst.public_dns_name}") + return running_inst + + +class RemoteHost: + addr: str + keyfile_path: str + login_name: str + container_id: Optional[str] = None + ami: Optional[str] = None + + def __init__(self, addr: str, keyfile_path: str, login_name: str = "ubuntu"): + self.addr = addr + self.keyfile_path = keyfile_path + self.login_name = login_name + + def _gen_ssh_prefix(self) -> List[str]: + return [ + "ssh", + "-o", + "StrictHostKeyChecking=no", + "-i", + self.keyfile_path, + f"{self.login_name}@{self.addr}", + "--", + ] + + @staticmethod + def _split_cmd(args: Union[str, List[str]]) -> List[str]: + return args.split() if isinstance(args, str) else args + + def run_ssh_cmd(self, args: Union[str, List[str]]) -> None: + subprocess.check_call(self._gen_ssh_prefix() + self._split_cmd(args)) + + def check_ssh_output(self, args: Union[str, List[str]]) -> str: + return subprocess.check_output( + self._gen_ssh_prefix() + self._split_cmd(args) + ).decode("utf-8") + + def scp_upload_file(self, local_file: str, remote_file: str) -> None: + subprocess.check_call( + [ + "scp", + "-i", + self.keyfile_path, + local_file, + f"{self.login_name}@{self.addr}:{remote_file}", + ] + ) + + def scp_download_file( + self, remote_file: str, local_file: Optional[str] = None + ) -> None: + if local_file is None: + local_file = "." + subprocess.check_call( + [ + "scp", + "-i", + self.keyfile_path, + f"{self.login_name}@{self.addr}:{remote_file}", + local_file, + ] + ) + + def start_docker(self, image="quay.io/pypa/manylinux2014_aarch64:latest") -> None: + self.run_ssh_cmd("sudo apt-get install -y docker.io") + self.run_ssh_cmd(f"sudo usermod -a -G docker {self.login_name}") + self.run_ssh_cmd("sudo service docker start") + self.run_ssh_cmd(f"docker pull {image}") + self.container_id = self.check_ssh_output( + f"docker run -t -d -w /root {image}" + ).strip() + + def using_docker(self) -> bool: + return self.container_id is not None + + def run_cmd(self, args: Union[str, List[str]]) -> None: + if not self.using_docker(): + return self.run_ssh_cmd(args) + assert self.container_id is not None + docker_cmd = self._gen_ssh_prefix() + [ + "docker", + "exec", + "-i", + self.container_id, + "bash", + ] + p = subprocess.Popen(docker_cmd, stdin=subprocess.PIPE) + p.communicate( + input=" ".join(["source .bashrc && "] + self._split_cmd(args)).encode( + "utf-8" + ) + ) + rc = p.wait() + if rc != 0: + raise subprocess.CalledProcessError(rc, docker_cmd) + + def check_output(self, args: Union[str, List[str]]) -> str: + if not self.using_docker(): + return self.check_ssh_output(args) + assert self.container_id is not None + docker_cmd = self._gen_ssh_prefix() + [ + "docker", + "exec", + "-i", + self.container_id, + "bash", + ] + p = subprocess.Popen(docker_cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) + (out, err) = p.communicate( + input=" ".join(["source .bashrc && "] + self._split_cmd(args)).encode( + "utf-8" + ) + ) + rc = p.wait() + if rc != 0: + raise subprocess.CalledProcessError(rc, docker_cmd, output=out, stderr=err) + return out.decode("utf-8") + + def upload_file(self, local_file: str, remote_file: str) -> None: + if not self.using_docker(): + return self.scp_upload_file(local_file, remote_file) + tmp_file = os.path.join("/tmp", os.path.basename(local_file)) + self.scp_upload_file(local_file, tmp_file) + self.run_ssh_cmd( + ["docker", "cp", tmp_file, f"{self.container_id}:/root/{remote_file}"] + ) + self.run_ssh_cmd(["rm", tmp_file]) + + def download_file(self, remote_file: str, local_file: Optional[str] = None) -> None: + if not self.using_docker(): + return self.scp_download_file(remote_file, local_file) + tmp_file = os.path.join("/tmp", os.path.basename(remote_file)) + self.run_ssh_cmd( + ["docker", "cp", f"{self.container_id}:/root/{remote_file}", tmp_file] + ) + self.scp_download_file(tmp_file, local_file) + self.run_ssh_cmd(["rm", tmp_file]) + + def download_wheel( + self, remote_file: str, local_file: Optional[str] = None + ) -> None: + if self.using_docker() and local_file is None: + basename = os.path.basename(remote_file) + local_file = basename.replace( + "-linux_aarch64.whl", "-manylinux2014_aarch64.whl" + ) + self.download_file(remote_file, local_file) + + def list_dir(self, path: str) -> List[str]: + return self.check_output(["ls", "-1", path]).split("\n") + + +def wait_for_connection(addr, port, timeout=15, attempt_cnt=5): + import socket + + for i in range(attempt_cnt): + try: + with socket.create_connection((addr, port), timeout=timeout): + return + except (ConnectionRefusedError, socket.timeout): # noqa: PERF203 + if i == attempt_cnt - 1: + raise + time.sleep(timeout) + + +def update_apt_repo(host: RemoteHost) -> None: + time.sleep(5) + host.run_cmd("sudo systemctl stop apt-daily.service || true") + host.run_cmd("sudo systemctl stop unattended-upgrades.service || true") + host.run_cmd( + "while systemctl is-active --quiet apt-daily.service; do sleep 1; done" + ) + host.run_cmd( + "while systemctl is-active --quiet unattended-upgrades.service; do sleep 1; done" + ) + host.run_cmd("sudo apt-get update") + time.sleep(3) + host.run_cmd("sudo apt-get update") + + +def install_condaforge( + host: RemoteHost, suffix: str = "latest/download/Miniforge3-Linux-aarch64.sh" +) -> None: + print("Install conda-forge") + host.run_cmd(f"curl -OL https://github.com/conda-forge/miniforge/releases/{suffix}") + host.run_cmd(f"sh -f {os.path.basename(suffix)} -b") + host.run_cmd(f"rm -f {os.path.basename(suffix)}") + if host.using_docker(): + host.run_cmd("echo 'PATH=$HOME/miniforge3/bin:$PATH'>>.bashrc") + else: + host.run_cmd( + [ + "sed", + "-i", + "'/^# If not running interactively.*/i PATH=$HOME/miniforge3/bin:$PATH'", + ".bashrc", + ] + ) + + +def install_condaforge_python(host: RemoteHost, python_version="3.8") -> None: + if python_version == "3.6": + # Python-3.6 EOLed and not compatible with conda-4.11 + install_condaforge( + host, suffix="download/4.10.3-10/Miniforge3-4.10.3-10-Linux-aarch64.sh" + ) + host.run_cmd(f"conda install -y python={python_version} numpy pyyaml") + else: + install_condaforge( + host, suffix="download/4.11.0-4/Miniforge3-4.11.0-4-Linux-aarch64.sh" + ) + # Pytorch-1.10 or older are not compatible with setuptools=59.6 or newer + host.run_cmd( + f"conda install -y python={python_version} numpy pyyaml setuptools>=59.5.0" + ) + + +def build_OpenBLAS(host: RemoteHost, git_clone_flags: str = "") -> None: + print("Building OpenBLAS") + host.run_cmd( + f"git clone https://github.com/xianyi/OpenBLAS -b v0.3.28 {git_clone_flags}" + ) + make_flags = "NUM_THREADS=64 USE_OPENMP=1 NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=ARMV8" + host.run_cmd( + f"pushd OpenBLAS && make {make_flags} -j8 && sudo make {make_flags} install && popd && rm -rf OpenBLAS" + ) + + +def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None: + print("Building Arm Compute Library") + acl_build_flags = " ".join( + [ + "debug=0", + "neon=1", + "opencl=0", + "os=linux", + "openmp=1", + "cppthreads=0", + "arch=armv8a", + "multi_isa=1", + "fixed_format_kernels=1", + "build=native", + ] + ) + host.run_cmd( + f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v24.09 {git_clone_flags}" + ) + + host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}") + + +def embed_libgomp(host: RemoteHost, use_conda, wheel_name) -> None: + host.run_cmd("pip3 install auditwheel") + host.run_cmd( + "conda install -y patchelf" if use_conda else "sudo apt-get install -y patchelf" + ) + from tempfile import NamedTemporaryFile + + with NamedTemporaryFile() as tmp: + tmp.write(embed_library_script.encode("utf-8")) + tmp.flush() + host.upload_file(tmp.name, "embed_library.py") + + print("Embedding libgomp into wheel") + if host.using_docker(): + host.run_cmd(f"python3 embed_library.py {wheel_name} --update-tag") + else: + host.run_cmd(f"python3 embed_library.py {wheel_name}") + + +def checkout_repo( + host: RemoteHost, + *, + branch: str = "main", + url: str, + git_clone_flags: str, + mapping: Dict[str, Tuple[str, str]], +) -> Optional[str]: + for prefix in mapping: + if not branch.startswith(prefix): + continue + tag = f"v{mapping[prefix][0]}-{mapping[prefix][1]}" + host.run_cmd(f"git clone {url} -b {tag} {git_clone_flags}") + return mapping[prefix][0] + + host.run_cmd(f"git clone {url} -b {branch} {git_clone_flags}") + return None + + +def build_torchvision( + host: RemoteHost, + *, + branch: str = "main", + use_conda: bool = True, + git_clone_flags: str, + run_smoke_tests: bool = True, +) -> str: + print("Checking out TorchVision repo") + build_version = checkout_repo( + host, + branch=branch, + url="https://github.com/pytorch/vision", + git_clone_flags=git_clone_flags, + mapping={ + "v1.7.1": ("0.8.2", "rc2"), + "v1.8.0": ("0.9.0", "rc3"), + "v1.8.1": ("0.9.1", "rc1"), + "v1.9.0": ("0.10.0", "rc1"), + "v1.10.0": ("0.11.1", "rc1"), + "v1.10.1": ("0.11.2", "rc1"), + "v1.10.2": ("0.11.3", "rc1"), + "v1.11.0": ("0.12.0", "rc1"), + "v1.12.0": ("0.13.0", "rc4"), + "v1.12.1": ("0.13.1", "rc6"), + "v1.13.0": ("0.14.0", "rc4"), + "v1.13.1": ("0.14.1", "rc2"), + "v2.0.0": ("0.15.1", "rc2"), + "v2.0.1": ("0.15.2", "rc2"), + }, + ) + print("Building TorchVision wheel") + + # Please note libnpg and jpeg are required to build image.so extension + if use_conda: + host.run_cmd("conda install -y libpng jpeg") + # Remove .so files to force static linking + host.run_cmd( + "rm miniforge3/lib/libpng.so miniforge3/lib/libpng16.so miniforge3/lib/libjpeg.so" + ) + # And patch setup.py to include libz dependency for libpng + host.run_cmd( + [ + 'sed -i -e \'s/image_link_flags\\.append("png")/image_link_flags += ["png", "z"]/\' vision/setup.py' + ] + ) + + build_vars = "" + if branch == "nightly": + version = host.check_output( + ["if [ -f vision/version.txt ]; then cat vision/version.txt; fi"] + ).strip() + if len(version) == 0: + # In older revisions, version was embedded in setup.py + version = ( + host.check_output(["grep", '"version = \'"', "vision/setup.py"]) + .strip() + .split("'")[1][:-2] + ) + build_date = ( + host.check_output("cd vision && git log --pretty=format:%s -1") + .strip() + .split()[0] + .replace("-", "") + ) + build_vars += f"BUILD_VERSION={version}.dev{build_date}" + elif build_version is not None: + build_vars += ( + f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}" + ) + if host.using_docker(): + build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" + + host.run_cmd(f"cd vision && {build_vars} python3 setup.py bdist_wheel") + vision_wheel_name = host.list_dir("vision/dist")[0] + embed_libgomp(host, use_conda, os.path.join("vision", "dist", vision_wheel_name)) + + print("Copying TorchVision wheel") + host.download_wheel(os.path.join("vision", "dist", vision_wheel_name)) + if run_smoke_tests: + host.run_cmd( + f"pip3 install {os.path.join('vision', 'dist', vision_wheel_name)}" + ) + host.run_cmd("python3 vision/test/smoke_test.py") + print("Delete vision checkout") + host.run_cmd("rm -rf vision") + + return vision_wheel_name + + +def build_torchdata( + host: RemoteHost, + *, + branch: str = "main", + use_conda: bool = True, + git_clone_flags: str = "", +) -> str: + print("Checking out TorchData repo") + git_clone_flags += " --recurse-submodules" + build_version = checkout_repo( + host, + branch=branch, + url="https://github.com/pytorch/data", + git_clone_flags=git_clone_flags, + mapping={ + "v1.13.1": ("0.5.1", ""), + "v2.0.0": ("0.6.0", "rc5"), + "v2.0.1": ("0.6.1", "rc1"), + }, + ) + print("Building TorchData wheel") + build_vars = "" + if branch == "nightly": + version = host.check_output( + ["if [ -f data/version.txt ]; then cat data/version.txt; fi"] + ).strip() + build_date = ( + host.check_output("cd data && git log --pretty=format:%s -1") + .strip() + .split()[0] + .replace("-", "") + ) + build_vars += f"BUILD_VERSION={version}.dev{build_date}" + elif build_version is not None: + build_vars += ( + f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}" + ) + if host.using_docker(): + build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" + + host.run_cmd(f"cd data && {build_vars} python3 setup.py bdist_wheel") + wheel_name = host.list_dir("data/dist")[0] + embed_libgomp(host, use_conda, os.path.join("data", "dist", wheel_name)) + + print("Copying TorchData wheel") + host.download_wheel(os.path.join("data", "dist", wheel_name)) + + return wheel_name + + +def build_torchtext( + host: RemoteHost, + *, + branch: str = "main", + use_conda: bool = True, + git_clone_flags: str = "", +) -> str: + print("Checking out TorchText repo") + git_clone_flags += " --recurse-submodules" + build_version = checkout_repo( + host, + branch=branch, + url="https://github.com/pytorch/text", + git_clone_flags=git_clone_flags, + mapping={ + "v1.9.0": ("0.10.0", "rc1"), + "v1.10.0": ("0.11.0", "rc2"), + "v1.10.1": ("0.11.1", "rc1"), + "v1.10.2": ("0.11.2", "rc1"), + "v1.11.0": ("0.12.0", "rc1"), + "v1.12.0": ("0.13.0", "rc2"), + "v1.12.1": ("0.13.1", "rc5"), + "v1.13.0": ("0.14.0", "rc3"), + "v1.13.1": ("0.14.1", "rc1"), + "v2.0.0": ("0.15.1", "rc2"), + "v2.0.1": ("0.15.2", "rc2"), + }, + ) + print("Building TorchText wheel") + build_vars = "" + if branch == "nightly": + version = host.check_output( + ["if [ -f text/version.txt ]; then cat text/version.txt; fi"] + ).strip() + build_date = ( + host.check_output("cd text && git log --pretty=format:%s -1") + .strip() + .split()[0] + .replace("-", "") + ) + build_vars += f"BUILD_VERSION={version}.dev{build_date}" + elif build_version is not None: + build_vars += ( + f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}" + ) + if host.using_docker(): + build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" + + host.run_cmd(f"cd text && {build_vars} python3 setup.py bdist_wheel") + wheel_name = host.list_dir("text/dist")[0] + embed_libgomp(host, use_conda, os.path.join("text", "dist", wheel_name)) + + print("Copying TorchText wheel") + host.download_wheel(os.path.join("text", "dist", wheel_name)) + + return wheel_name + + +def build_torchaudio( + host: RemoteHost, + *, + branch: str = "main", + use_conda: bool = True, + git_clone_flags: str = "", +) -> str: + print("Checking out TorchAudio repo") + git_clone_flags += " --recurse-submodules" + build_version = checkout_repo( + host, + branch=branch, + url="https://github.com/pytorch/audio", + git_clone_flags=git_clone_flags, + mapping={ + "v1.9.0": ("0.9.0", "rc2"), + "v1.10.0": ("0.10.0", "rc5"), + "v1.10.1": ("0.10.1", "rc1"), + "v1.10.2": ("0.10.2", "rc1"), + "v1.11.0": ("0.11.0", "rc1"), + "v1.12.0": ("0.12.0", "rc3"), + "v1.12.1": ("0.12.1", "rc5"), + "v1.13.0": ("0.13.0", "rc4"), + "v1.13.1": ("0.13.1", "rc2"), + "v2.0.0": ("2.0.1", "rc3"), + "v2.0.1": ("2.0.2", "rc2"), + }, + ) + print("Building TorchAudio wheel") + build_vars = "" + if branch == "nightly": + version = ( + host.check_output(["grep", '"version = \'"', "audio/setup.py"]) + .strip() + .split("'")[1][:-2] + ) + build_date = ( + host.check_output("cd audio && git log --pretty=format:%s -1") + .strip() + .split()[0] + .replace("-", "") + ) + build_vars += f"BUILD_VERSION={version}.dev{build_date}" + elif build_version is not None: + build_vars += ( + f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}" + ) + if host.using_docker(): + build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" + + host.run_cmd(f"cd audio && export FFMPEG_ROOT=$(pwd)/third_party/ffmpeg && export USE_FFMPEG=1 \ + && ./packaging/ffmpeg/build.sh \ + && {build_vars} python3 setup.py bdist_wheel") + + wheel_name = host.list_dir("audio/dist")[0] + embed_libgomp(host, use_conda, os.path.join("audio", "dist", wheel_name)) + + print("Copying TorchAudio wheel") + host.download_wheel(os.path.join("audio", "dist", wheel_name)) + + return wheel_name + + +def configure_system( + host: RemoteHost, + *, + compiler: str = "gcc-8", + use_conda: bool = True, + python_version: str = "3.8", +) -> None: + if use_conda: + install_condaforge_python(host, python_version) + + print("Configuring the system") + if not host.using_docker(): + update_apt_repo(host) + host.run_cmd("sudo apt-get install -y ninja-build g++ git cmake gfortran unzip") + else: + host.run_cmd("yum install -y sudo") + host.run_cmd("conda install -y ninja scons") + + if not use_conda: + host.run_cmd( + "sudo apt-get install -y python3-dev python3-yaml python3-setuptools python3-wheel python3-pip" + ) + host.run_cmd("pip3 install dataclasses typing-extensions") + # Install and switch to gcc-8 on Ubuntu-18.04 + if not host.using_docker() and host.ami == ubuntu18_04_ami and compiler == "gcc-8": + host.run_cmd("sudo apt-get install -y g++-8 gfortran-8") + host.run_cmd( + "sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 100" + ) + host.run_cmd( + "sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 100" + ) + host.run_cmd( + "sudo update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-8 100" + ) + if not use_conda: + print("Installing Cython + numpy from PyPy") + host.run_cmd("sudo pip3 install Cython") + host.run_cmd("sudo pip3 install numpy") + + +def build_domains( + host: RemoteHost, + *, + branch: str = "main", + use_conda: bool = True, + git_clone_flags: str = "", +) -> Tuple[str, str, str, str]: + vision_wheel_name = build_torchvision( + host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags + ) + audio_wheel_name = build_torchaudio( + host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags + ) + data_wheel_name = build_torchdata( + host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags + ) + text_wheel_name = build_torchtext( + host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags + ) + return (vision_wheel_name, audio_wheel_name, data_wheel_name, text_wheel_name) + + +def start_build( + host: RemoteHost, + *, + branch: str = "main", + compiler: str = "gcc-8", + use_conda: bool = True, + python_version: str = "3.8", + pytorch_only: bool = False, + pytorch_build_number: Optional[str] = None, + shallow_clone: bool = True, + enable_mkldnn: bool = False, +) -> Tuple[str, str, str, str, str]: + git_clone_flags = " --depth 1 --shallow-submodules" if shallow_clone else "" + if host.using_docker() and not use_conda: + print("Auto-selecting conda option for docker images") + use_conda = True + if not host.using_docker(): + print("Disable mkldnn for host builds") + enable_mkldnn = False + + configure_system( + host, compiler=compiler, use_conda=use_conda, python_version=python_version + ) + build_OpenBLAS(host, git_clone_flags) + + if host.using_docker(): + print("Move libgfortant.a into a standard location") + # HACK: pypa gforntran.a is compiled without PIC, which leads to the following error + # libgfortran.a(error.o)(.text._gfortrani_st_printf+0x34): unresolvable R_AARCH64_ADR_PREL_PG_HI21 relocation against symbol `__stack_chk_guard@@GLIBC_2.17' # noqa: E501, B950 + # Workaround by copying gfortran library from the host + host.run_ssh_cmd("sudo apt-get install -y gfortran-8") + host.run_cmd("mkdir -p /usr/lib/gcc/aarch64-linux-gnu/8") + host.run_ssh_cmd( + [ + "docker", + "cp", + "/usr/lib/gcc/aarch64-linux-gnu/8/libgfortran.a", + f"{host.container_id}:/opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/", + ] + ) + + print("Checking out PyTorch repo") + host.run_cmd( + f"git clone --recurse-submodules -b {branch} https://github.com/pytorch/pytorch {git_clone_flags}" + ) + + print("Building PyTorch wheel") + build_opts = "" + if pytorch_build_number is not None: + build_opts += f" --build-number {pytorch_build_number}" + # Breakpad build fails on aarch64 + build_vars = "USE_BREAKPAD=0 " + if branch == "nightly": + build_date = ( + host.check_output("cd pytorch && git log --pretty=format:%s -1") + .strip() + .split()[0] + .replace("-", "") + ) + version = host.check_output("cat pytorch/version.txt").strip()[:-2] + build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1" + if branch.startswith(("v1.", "v2.")): + build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1" + if host.using_docker(): + build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000" + if enable_mkldnn: + build_ArmComputeLibrary(host, git_clone_flags) + print("build pytorch with mkldnn+acl backend") + build_vars += " USE_MKLDNN=ON USE_MKLDNN_ACL=ON" + host.run_cmd( + f"cd $HOME/pytorch && export ACL_ROOT_DIR=$HOME/ComputeLibrary && {build_vars} python3 setup.py bdist_wheel{build_opts}" + ) + print("Repair the wheel") + pytorch_wheel_name = host.list_dir("pytorch/dist")[0] + ld_library_path = "$HOME/acl/build:$HOME/pytorch/build/lib" + host.run_cmd( + f"export LD_LIBRARY_PATH={ld_library_path} && auditwheel repair $HOME/pytorch/dist/{pytorch_wheel_name}" + ) + print("replace the original wheel with the repaired one") + pytorch_repaired_wheel_name = host.list_dir("wheelhouse")[0] + host.run_cmd( + f"cp $HOME/wheelhouse/{pytorch_repaired_wheel_name} $HOME/pytorch/dist/{pytorch_wheel_name}" + ) + else: + print("build pytorch without mkldnn backend") + host.run_cmd( + f"cd pytorch && {build_vars} python3 setup.py bdist_wheel{build_opts}" + ) + + print("Deleting build folder") + host.run_cmd("cd pytorch && rm -rf build") + pytorch_wheel_name = host.list_dir("pytorch/dist")[0] + embed_libgomp(host, use_conda, os.path.join("pytorch", "dist", pytorch_wheel_name)) + print("Copying the wheel") + host.download_wheel(os.path.join("pytorch", "dist", pytorch_wheel_name)) + + print("Installing PyTorch wheel") + host.run_cmd(f"pip3 install pytorch/dist/{pytorch_wheel_name}") + + if pytorch_only: + return (pytorch_wheel_name, None, None, None, None) + domain_wheels = build_domains( + host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags + ) + + return (pytorch_wheel_name, *domain_wheels) + + +embed_library_script = """ +#!/usr/bin/env python3 + +from auditwheel.patcher import Patchelf +from auditwheel.wheeltools import InWheelCtx +from auditwheel.elfutils import elf_file_filter +from auditwheel.repair import copylib +from auditwheel.lddtree import lddtree +from subprocess import check_call +import os +import shutil +import sys +from tempfile import TemporaryDirectory + + +def replace_tag(filename): + with open(filename, 'r') as f: + lines = f.read().split("\\n") + for i,line in enumerate(lines): + if not line.startswith("Tag: "): + continue + lines[i] = line.replace("-linux_", "-manylinux2014_") + print(f'Updated tag from {line} to {lines[i]}') + + with open(filename, 'w') as f: + f.write("\\n".join(lines)) + + +class AlignedPatchelf(Patchelf): + def set_soname(self, file_name: str, new_soname: str) -> None: + check_call(['patchelf', '--page-size', '65536', '--set-soname', new_soname, file_name]) + + def replace_needed(self, file_name: str, soname: str, new_soname: str) -> None: + check_call(['patchelf', '--page-size', '65536', '--replace-needed', soname, new_soname, file_name]) + + +def embed_library(whl_path, lib_soname, update_tag=False): + patcher = AlignedPatchelf() + out_dir = TemporaryDirectory() + whl_name = os.path.basename(whl_path) + tmp_whl_name = os.path.join(out_dir.name, whl_name) + with InWheelCtx(whl_path) as ctx: + torchlib_path = os.path.join(ctx._tmpdir.name, 'torch', 'lib') + ctx.out_wheel=tmp_whl_name + new_lib_path, new_lib_soname = None, None + for filename, elf in elf_file_filter(ctx.iter_files()): + if not filename.startswith('torch/lib'): + continue + libtree = lddtree(filename) + if lib_soname not in libtree['needed']: + continue + lib_path = libtree['libs'][lib_soname]['path'] + if lib_path is None: + print(f"Can't embed {lib_soname} as it could not be found") + break + if lib_path.startswith(torchlib_path): + continue + + if new_lib_path is None: + new_lib_soname, new_lib_path = copylib(lib_path, torchlib_path, patcher) + patcher.replace_needed(filename, lib_soname, new_lib_soname) + print(f'Replacing {lib_soname} with {new_lib_soname} for {filename}') + if update_tag: + # Add manylinux2014 tag + for filename in ctx.iter_files(): + if os.path.basename(filename) != 'WHEEL': + continue + replace_tag(filename) + shutil.move(tmp_whl_name, whl_path) + + +if __name__ == '__main__': + embed_library(sys.argv[1], 'libgomp.so.1', len(sys.argv) > 2 and sys.argv[2] == '--update-tag') +""" + + +def run_tests(host: RemoteHost, whl: str, branch="main") -> None: + print("Configuring the system") + update_apt_repo(host) + host.run_cmd("sudo apt-get install -y python3-pip git") + host.run_cmd("sudo pip3 install Cython") + host.run_cmd("sudo pip3 install numpy") + host.upload_file(whl, ".") + host.run_cmd(f"sudo pip3 install {whl}") + host.run_cmd("python3 -c 'import torch;print(torch.rand((3,3))'") + host.run_cmd(f"git clone -b {branch} https://github.com/pytorch/pytorch") + host.run_cmd("cd pytorch/test; python3 test_torch.py -v") + + +def get_instance_name(instance) -> Optional[str]: + if instance.tags is None: + return None + for tag in instance.tags: + if tag["Key"] == "Name": + return tag["Value"] + return None + + +def list_instances(instance_type: str) -> None: + print(f"All instances of type {instance_type}") + for instance in ec2_instances_of_type(instance_type): + ifaces = instance.network_interfaces + az = ifaces[0].subnet.availability_zone if len(ifaces) > 0 else None + print( + f"{instance.id} {get_instance_name(instance)} {instance.public_dns_name} {instance.state['Name']} {az}" + ) + + +def terminate_instances(instance_type: str) -> None: + print(f"Terminating all instances of type {instance_type}") + instances = list(ec2_instances_of_type(instance_type)) + for instance in instances: + print(f"Terminating {instance.id}") + instance.terminate() + print("Waiting for termination to complete") + for instance in instances: + instance.wait_until_terminated() + + +def parse_arguments(): + from argparse import ArgumentParser + + parser = ArgumentParser("Builid and test AARCH64 wheels using EC2") + parser.add_argument("--key-name", type=str) + parser.add_argument("--debug", action="store_true") + parser.add_argument("--build-only", action="store_true") + parser.add_argument("--test-only", type=str) + parser.add_argument( + "--os", type=str, choices=list(os_amis.keys()), default="ubuntu20_04" + ) + parser.add_argument( + "--python-version", + type=str, + choices=[f"3.{d}" for d in range(6, 12)], + default=None, + ) + parser.add_argument("--alloc-instance", action="store_true") + parser.add_argument("--list-instances", action="store_true") + parser.add_argument("--pytorch-only", action="store_true") + parser.add_argument("--keep-running", action="store_true") + parser.add_argument("--terminate-instances", action="store_true") + parser.add_argument("--instance-type", type=str, default="t4g.2xlarge") + parser.add_argument("--ebs-size", type=int, default=50) + parser.add_argument("--branch", type=str, default="main") + parser.add_argument("--use-docker", action="store_true") + parser.add_argument( + "--compiler", + type=str, + choices=["gcc-7", "gcc-8", "gcc-9", "clang"], + default="gcc-8", + ) + parser.add_argument("--use-torch-from-pypi", action="store_true") + parser.add_argument("--pytorch-build-number", type=str, default=None) + parser.add_argument("--disable-mkldnn", action="store_true") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_arguments() + ami = os_amis[args.os] + keyfile_path, key_name = compute_keyfile_path(args.key_name) + + if args.list_instances: + list_instances(args.instance_type) + sys.exit(0) + + if args.terminate_instances: + terminate_instances(args.instance_type) + sys.exit(0) + + if len(key_name) == 0: + raise RuntimeError(""" + Cannot start build without key_name, please specify + --key-name argument or AWS_KEY_NAME environment variable.""") + if len(keyfile_path) == 0 or not os.path.exists(keyfile_path): + raise RuntimeError(f""" + Cannot find keyfile with name: [{key_name}] in path: [{keyfile_path}], please + check `~/.ssh/` folder or manually set SSH_KEY_PATH environment variable.""") + + # Starting the instance + inst = start_instance( + key_name, ami=ami, instance_type=args.instance_type, ebs_size=args.ebs_size + ) + instance_name = f"{args.key_name}-{args.os}" + if args.python_version is not None: + instance_name += f"-py{args.python_version}" + inst.create_tags( + DryRun=False, + Tags=[ + { + "Key": "Name", + "Value": instance_name, + } + ], + ) + addr = inst.public_dns_name + wait_for_connection(addr, 22) + host = RemoteHost(addr, keyfile_path) + host.ami = ami + if args.use_docker: + update_apt_repo(host) + host.start_docker() + + if args.test_only: + run_tests(host, args.test_only) + sys.exit(0) + + if args.alloc_instance: + if args.python_version is None: + sys.exit(0) + install_condaforge_python(host, args.python_version) + sys.exit(0) + + python_version = args.python_version if args.python_version is not None else "3.8" + + if args.use_torch_from_pypi: + configure_system(host, compiler=args.compiler, python_version=python_version) + print("Installing PyTorch wheel") + host.run_cmd("pip3 install torch") + build_domains( + host, branch=args.branch, git_clone_flags=" --depth 1 --shallow-submodules" + ) + else: + start_build( + host, + branch=args.branch, + compiler=args.compiler, + python_version=python_version, + pytorch_only=args.pytorch_only, + pytorch_build_number=args.pytorch_build_number, + enable_mkldnn=not args.disable_mkldnn, + ) + if not args.keep_running: + print(f"Waiting for instance {inst.id} to terminate") + inst.terminate() + inst.wait_until_terminated() diff --git a/.ci/aarch64_linux/embed_library.py b/.ci/aarch64_linux/embed_library.py new file mode 100644 index 00000000000000..2834a4632989b7 --- /dev/null +++ b/.ci/aarch64_linux/embed_library.py @@ -0,0 +1,87 @@ +#!/usr/bin/env python3 + +import os +import shutil +import sys +from subprocess import check_call +from tempfile import TemporaryDirectory + +from auditwheel.elfutils import elf_file_filter +from auditwheel.lddtree import lddtree +from auditwheel.patcher import Patchelf +from auditwheel.repair import copylib +from auditwheel.wheeltools import InWheelCtx + + +def replace_tag(filename): + with open(filename) as f: + lines = f.read().split("\\n") + for i, line in enumerate(lines): + if not line.startswith("Tag: "): + continue + lines[i] = line.replace("-linux_", "-manylinux2014_") + print(f"Updated tag from {line} to {lines[i]}") + + with open(filename, "w") as f: + f.write("\\n".join(lines)) + + +class AlignedPatchelf(Patchelf): + def set_soname(self, file_name: str, new_soname: str) -> None: + check_call( + ["patchelf", "--page-size", "65536", "--set-soname", new_soname, file_name] + ) + + def replace_needed(self, file_name: str, soname: str, new_soname: str) -> None: + check_call( + [ + "patchelf", + "--page-size", + "65536", + "--replace-needed", + soname, + new_soname, + file_name, + ] + ) + + +def embed_library(whl_path, lib_soname, update_tag=False): + patcher = AlignedPatchelf() + out_dir = TemporaryDirectory() + whl_name = os.path.basename(whl_path) + tmp_whl_name = os.path.join(out_dir.name, whl_name) + with InWheelCtx(whl_path) as ctx: + torchlib_path = os.path.join(ctx._tmpdir.name, "torch", "lib") + ctx.out_wheel = tmp_whl_name + new_lib_path, new_lib_soname = None, None + for filename, _ in elf_file_filter(ctx.iter_files()): + if not filename.startswith("torch/lib"): + continue + libtree = lddtree(filename) + if lib_soname not in libtree["needed"]: + continue + lib_path = libtree["libs"][lib_soname]["path"] + if lib_path is None: + print(f"Can't embed {lib_soname} as it could not be found") + break + if lib_path.startswith(torchlib_path): + continue + + if new_lib_path is None: + new_lib_soname, new_lib_path = copylib(lib_path, torchlib_path, patcher) + patcher.replace_needed(filename, lib_soname, new_lib_soname) + print(f"Replacing {lib_soname} with {new_lib_soname} for {filename}") + if update_tag: + # Add manylinux2014 tag + for filename in ctx.iter_files(): + if os.path.basename(filename) != "WHEEL": + continue + replace_tag(filename) + shutil.move(tmp_whl_name, whl_path) + + +if __name__ == "__main__": + embed_library( + sys.argv[1], "libgomp.so.1", len(sys.argv) > 2 and sys.argv[2] == "--update-tag" + ) diff --git a/.github/workflows/_binary-build-linux.yml b/.github/workflows/_binary-build-linux.yml index 120439c7c114d9..425b44c751feee 100644 --- a/.github/workflows/_binary-build-linux.yml +++ b/.github/workflows/_binary-build-linux.yml @@ -271,7 +271,7 @@ jobs: ) docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh" if [[ ${BUILD_ENVIRONMENT} == *"aarch64"* ]]; then - docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/aarch64_linux/aarch64_ci_build.sh" + docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/aarch64_linux/aarch64_ci_build.sh" elif [[ ${{ inputs.PACKAGE_TYPE }} == "manywheel" || ${{ inputs.PACKAGE_TYPE }} == "libtorch" ]]; then docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /pytorch/.ci/${{ inputs.PACKAGE_TYPE }}/build.sh" else From d35a600b74dc8cb4ddcc39e07aa309325affaab9 Mon Sep 17 00:00:00 2001 From: Shuqiang Zhang Date: Tue, 5 Nov 2024 15:16:35 -0800 Subject: [PATCH 124/503] [pgnccl] skip restart test fro rocm (#139809) Summary: PG restart test is flaky in rocm: https://github.com/pytorch/pytorch/pull/139809, skip the AMD/ROCM test for now Test Plan: CI Tags: Pull Request resolved: https://github.com/pytorch/pytorch/pull/139809 Approved by: https://github.com/kwen2501 --- test/distributed/test_c10d_nccl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py index 0dd46749b7c35d..f5f971c6ae7e79 100644 --- a/test/distributed/test_c10d_nccl.py +++ b/test/distributed/test_c10d_nccl.py @@ -348,6 +348,7 @@ def test_close_pg(self, eager_init: bool): dist.all_reduce(t) @requires_nccl() + @skip_if_rocm_multiprocess @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs") def test_restart_pg(self): # Note: restart test passes steadily only for blocking mode for now. From 1c63612567ea7b52e5822c87f03e6b1bca88b27a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Judica=C3=ABl=20Clair?= Date: Wed, 6 Nov 2024 04:23:01 +0000 Subject: [PATCH 125/503] Fix & unit test for `c10::ArrayRef` constructed from user-defined types (#139758) Fixes #139391 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139758 Approved by: https://github.com/ezyang --- c10/test/util/ArrayRef_test.cpp | 45 +++++++++++++++++++++++++++++++++ c10/util/ArrayRef.h | 6 ++--- 2 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 c10/test/util/ArrayRef_test.cpp diff --git a/c10/test/util/ArrayRef_test.cpp b/c10/test/util/ArrayRef_test.cpp new file mode 100644 index 00000000000000..00e5eeab6950c4 --- /dev/null +++ b/c10/test/util/ArrayRef_test.cpp @@ -0,0 +1,45 @@ +#include + +#include +#include + +#include +#include + +namespace { + +template +class ctor_from_container_test_span_ { + T* data_; + std::size_t sz_; + + public: + template >> + constexpr explicit ctor_from_container_test_span_( + std::conditional_t, const V, V>& vec) noexcept + : data_(vec.data()), sz_(vec.size()) {} + + [[nodiscard]] constexpr auto data() const noexcept { + return data_; + } + + [[nodiscard]] constexpr auto size() const noexcept { + return sz_; + } +}; + +TEST(ArrayRefTest, ctor_from_container_test) { + using value_type = int; + std::vector test_vec{1, 6, 32, 4, 68, 3, 7}; + const ctor_from_container_test_span_ test_mspan{test_vec}; + const ctor_from_container_test_span_ test_cspan{ + std::as_const(test_vec)}; + + const auto test_ref_mspan = c10::ArrayRef(test_mspan); + const auto test_ref_cspan = c10::ArrayRef(test_cspan); + + EXPECT_EQ(std::as_const(test_vec), test_ref_mspan); + EXPECT_EQ(std::as_const(test_vec), test_ref_cspan); +} + +} // namespace diff --git a/c10/util/ArrayRef.h b/c10/util/ArrayRef.h index bd1405c1fc652c..10c83998c42022 100644 --- a/c10/util/ArrayRef.h +++ b/c10/util/ArrayRef.h @@ -98,9 +98,9 @@ class ArrayRef final { template < typename Container, - typename = std::enable_if_t().data())>, - T*>>> + typename U = decltype(std::declval().data()), + typename = std::enable_if_t< + (std::is_same_v || std::is_same_v)>> /* implicit */ ArrayRef(const Container& container) : Data(container.data()), Length(container.size()) { debugCheckNullptrInvariant(); From 157c18a180398eddef52da559fe1649e35ce61f1 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Tue, 5 Nov 2024 14:31:09 -0800 Subject: [PATCH 126/503] [BE][Attention] Use `isneginf` (#139763) May be I'm missing some vital piece of information, but it feels like ```c++ const auto neg_inf = at::scalar_tensor(-std::numeric_limits::infinity(), at::TensorOptions().dtype(out.dtype()).device(out.device())); const auto masked = self.eq(neg_inf); ``` should be equivalent to [`torch.isneginf`](https://pytorch.org/docs/stable/generated/torch.isneginf.html) call Pull Request resolved: https://github.com/pytorch/pytorch/pull/139763 Approved by: https://github.com/Skylion007 ghstack dependencies: #139788, #139784 --- aten/src/ATen/native/native_functions.yaml | 1 + aten/src/ATen/native/nested/NestedTensorUnaryOps.cpp | 4 ++++ aten/src/ATen/native/transformers/attention.cpp | 3 +-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml index 00b21cc25cc44c..6afbf3c804aaa3 100644 --- a/aten/src/ATen/native/native_functions.yaml +++ b/aten/src/ATen/native/native_functions.yaml @@ -13102,6 +13102,7 @@ variants: function, method structured_delegate: isneginf.out dispatch: + NestedTensorCPU, NestedTensorCUDA: NestedTensor_isneginf SparseCPU, SparseCUDA: isneginf_sparse SparseCsrCPU, SparseCsrCUDA, SparseCsrMeta: isneginf_sparse_csr tags: pointwise diff --git a/aten/src/ATen/native/nested/NestedTensorUnaryOps.cpp b/aten/src/ATen/native/nested/NestedTensorUnaryOps.cpp index 43205452441718..db12c744936b8e 100644 --- a/aten/src/ATen/native/nested/NestedTensorUnaryOps.cpp +++ b/aten/src/ATen/native/nested/NestedTensorUnaryOps.cpp @@ -89,6 +89,10 @@ Tensor NestedTensor_logical_not(const Tensor& self) { return map_nt(self, at::logical_not); } +Tensor NestedTensor_isneginf(const Tensor& self) { + return map_nt(self, at::isneginf); +} + Tensor& NestedTensor_relu_(Tensor& self) { auto self_ptr = get_nested_tensor_impl(self); check_numel_equals_buffer_size(self_ptr); diff --git a/aten/src/ATen/native/transformers/attention.cpp b/aten/src/ATen/native/transformers/attention.cpp index a78e4c73f1a9ec..617b6d99da3419 100644 --- a/aten/src/ATen/native/transformers/attention.cpp +++ b/aten/src/ATen/native/transformers/attention.cpp @@ -660,8 +660,7 @@ Tensor _safe_softmax( int64_t dim, std::optional dtype) { auto out = at::softmax(self, dim, dtype); - const auto neg_inf = at::scalar_tensor(-std::numeric_limits::infinity(), at::TensorOptions().dtype(out.dtype()).device(out.device())); - const auto masked = self.eq(neg_inf); + const auto masked = self.isneginf(); const auto masked_rows = all(masked, dim, true); const auto zero = at::scalar_tensor(0.0, at::TensorOptions().dtype(out.dtype()).device(out.device())); return at::where(masked_rows, zero, out); From d031d1bf4cf00648ce1fba45ae2561a77918f21d Mon Sep 17 00:00:00 2001 From: Thanh Ha Date: Wed, 6 Nov 2024 05:57:38 +0000 Subject: [PATCH 127/503] Update to upload-artifacts and download-artifacts to v4 (#139808) The 2 actions actions/download-artifact@v3 and actions/upload-artifact@v3 will be deprecated December 5th, 2024. This change updates them to using v4. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139808 Approved by: https://github.com/seemethere --- .github/actions/download-build-artifacts/action.yml | 2 +- .github/actions/download-td-artifacts/action.yml | 2 +- .github/actions/upload-test-artifacts/action.yml | 6 +++--- .github/workflows/_linux-build.yml | 4 ++-- .github/workflows/_mac-build.yml | 4 ++-- .github/workflows/_rocm-test.yml | 2 +- .github/workflows/_xpu-test.yml | 2 +- .github/workflows/scorecards.yml | 2 +- .github/workflows/target_determination.yml | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/actions/download-build-artifacts/action.yml b/.github/actions/download-build-artifacts/action.yml index 2deeda72802dda..c44b6a40834488 100644 --- a/.github/actions/download-build-artifacts/action.yml +++ b/.github/actions/download-build-artifacts/action.yml @@ -26,7 +26,7 @@ runs: - name: Download PyTorch Build Artifacts from GHA if: ${{ inputs.use-gha }} - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: ${{ inputs.name }} diff --git a/.github/actions/download-td-artifacts/action.yml b/.github/actions/download-td-artifacts/action.yml index 595093abaead05..ebb5c65353ab93 100644 --- a/.github/actions/download-td-artifacts/action.yml +++ b/.github/actions/download-td-artifacts/action.yml @@ -18,7 +18,7 @@ runs: - name: Download TD Artifacts from GHA if: inputs.use-gha - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 with: name: td_results.json diff --git a/.github/actions/upload-test-artifacts/action.yml b/.github/actions/upload-test-artifacts/action.yml index 796919f88d9ec2..76b0e5533ce6b3 100644 --- a/.github/actions/upload-test-artifacts/action.yml +++ b/.github/actions/upload-test-artifacts/action.yml @@ -147,7 +147,7 @@ runs: # GHA upload - name: Store Test Downloaded JSONs on Github - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: inputs.use-gha continue-on-error: true with: @@ -158,7 +158,7 @@ runs: path: test/**/*.json - name: Store Test Reports on Github - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: inputs.use-gha continue-on-error: true with: @@ -172,7 +172,7 @@ runs: test/**/*.csv - name: Store Usage Logs on Github - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: inputs.use-gha continue-on-error: true with: diff --git a/.github/workflows/_linux-build.yml b/.github/workflows/_linux-build.yml index 0d913076b1b023..3ff752f3f5ca56 100644 --- a/.github/workflows/_linux-build.yml +++ b/.github/workflows/_linux-build.yml @@ -287,7 +287,7 @@ jobs: s3-bucket: ${{ inputs.s3-bucket }} - name: Store PyTorch Build Artifacts for s390x - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel' with: name: ${{ inputs.build-environment }} @@ -296,7 +296,7 @@ jobs: path: artifacts.zip - name: Store PyTorch Build Artifacts for s390x for split build - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel' with: name: ${{ inputs.build-environment }}-experimental-split-build diff --git a/.github/workflows/_mac-build.yml b/.github/workflows/_mac-build.yml index 1ab35a05def372..01db1c0b14bc16 100644 --- a/.github/workflows/_mac-build.yml +++ b/.github/workflows/_mac-build.yml @@ -186,7 +186,7 @@ jobs: zip -1 -r artifacts.zip dist/ build/.ninja_log build/compile_commands.json .additional_ci_files - name: Store PyTorch Build Artifacts on GHA - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' with: name: ${{ env.BUILD_ENVIRONMENT }} @@ -195,7 +195,7 @@ jobs: path: artifacts.zip - name: Upload sccache stats to GHA - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 # Only if sccache is installed, see above if: ${{ (github.event_name == 'push' || github.event.pull_request.head.repo.full_name == github.repository) && steps.build.outcome != 'skipped' }} with: diff --git a/.github/workflows/_rocm-test.yml b/.github/workflows/_rocm-test.yml index 790cd9d403dc25..5c97641b58a1db 100644 --- a/.github/workflows/_rocm-test.yml +++ b/.github/workflows/_rocm-test.yml @@ -269,7 +269,7 @@ jobs: find . -iname "core.[1-9]*" -exec docker exec "${CONTAINER_NAME}" sh -c "gdb python {} -ex 'bt' -ex 'q'" \; - name: Store Core dumps on GitHub - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: failure() with: name: coredumps-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }} diff --git a/.github/workflows/_xpu-test.yml b/.github/workflows/_xpu-test.yml index 6b80b1767e26f0..fe82e132dcbc07 100644 --- a/.github/workflows/_xpu-test.yml +++ b/.github/workflows/_xpu-test.yml @@ -270,7 +270,7 @@ jobs: docker stop "${{ env.CONTAINER_NAME }}" - name: Store Core dumps on GitHub - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: failure() with: name: coredumps-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }} diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index cd4258c478763f..9567e15d2f5d5f 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -42,7 +42,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: SARIF file path: results.sarif diff --git a/.github/workflows/target_determination.yml b/.github/workflows/target_determination.yml index 523f816fa49f56..4fa2278aef4390 100644 --- a/.github/workflows/target_determination.yml +++ b/.github/workflows/target_determination.yml @@ -85,7 +85,7 @@ jobs: path: td_results.json - name: Store TD results on GHA - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 if: steps.td.outcome == 'success' with: name: td_results.json From 22e89ea2aaa3e0ef0ec4504bd2dbf230447a6d2a Mon Sep 17 00:00:00 2001 From: "Xia, Weiwen" Date: Mon, 4 Nov 2024 22:06:17 -0800 Subject: [PATCH 128/503] [Inductor][CPU] Fuse SmoothQuant int8 linear pattern (#139595) **About the PR** In the implementation of SmoothQuant in Torchao, quantized linear is computed by `_int_mm(a, b)` + `mul(b_scale)` + `mul(a_scale)` (+ optional `add` for bias) with `reshape` and `convert_dtype` in between. This PR adds a pass to fuse the corresponding patterns: - (no bias) `reshape -> _int_mm -> convert_element_type -> (expand -> mul) -> mul -> reshape` - (with bias) `pattern_no_bias -> add -> reshape -> reshape` The patterns are replaced by `onednn.qlinear_pointwise` and `onednn.qlinear_prepack`, the latter of which is evaluated and frozen during the freezing process of Inductor. The final graph contains `onednn.qlinear_pointwise` only with packed weight constants. Note that `onednn.qlinear_pointwise` does not support per-channel quantization of activation, which is a limitation of oneDNN library, so in that case we set activation scale to 1 and bias to none and apply scales and add bias after `onednn.qlinear_pointwise`. **Validation results** Accuracy/perplexity is not changed with or without this fusion pass. Latency is improved by >10% with the fusion pass. Test method: - Model: EleutherAI/gpt-j-6b - Hardware: Intel(R) Xeon(R) Platinum 8490H, running on 1 socket, 60 cores - Using Intel OMP and Tcmalloc - Running [the example script of SmoothQuant in Torchao](https://github.com/pytorch/ao/blob/main/torchao/prototype/smoothquant/example.py) with `TORCHINDUCTOR_FREEZING=1 numactl -N1 python example.py -m EleutherAI/gpt-j-6b --device=cpu --quant-mode=dynamic --compile` **Test plan** ``` python test/inductor/test_mkldnn_pattern_matcher.py -k test_smooth_quant_with_int_mm ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/139595 Approved by: https://github.com/leslie-fang-intel, https://github.com/jgong5, https://github.com/jerryzh168 --- .../src/ATen/native/quantized/cpu/qlinear.cpp | 23 ++- test/inductor/test_mkldnn_pattern_matcher.py | 86 ++++++++ torch/_inductor/fx_passes/quantization.py | 184 ++++++++++++++++++ 3 files changed, 284 insertions(+), 9 deletions(-) diff --git a/aten/src/ATen/native/quantized/cpu/qlinear.cpp b/aten/src/ATen/native/quantized/cpu/qlinear.cpp index 1c76f986ee1bd8..ed5badfb076e08 100644 --- a/aten/src/ATen/native/quantized/cpu/qlinear.cpp +++ b/aten/src/ATen/native/quantized/cpu/qlinear.cpp @@ -932,8 +932,8 @@ static at::Tensor linear_int8_with_onednn_weight( c10::string_view& unary_post_op_algorithm) { using ideep::tensor; const int64_t dim = input.dim(); - TORCH_CHECK(input.scalar_type() == c10::ScalarType::Byte, - "qlinear with mkldnn tensor: data type of input should be uint8 (unsigned char)."); + TORCH_CHECK(input.scalar_type() == c10::ScalarType::Byte || input.scalar_type() == c10::ScalarType::Char, + "qlinear with mkldnn tensor: data type of input should be uint8 or int8 (unsigned char or char)."); TORCH_CHECK(onednn_weight.scalar_type() == c10::ScalarType::Char, "qlinear with mkldnn tensor: data type of weight should be int8 (char)."); TORCH_CHECK( @@ -1022,7 +1022,8 @@ static at::Tensor linear_int8_with_onednn_weight( empty_tensor; // Create onednn primitive - auto src_desc = tensor::desc(src_dims, ideep::data_type::u8, ideep::format_tag::any); + auto src_dtype = input.scalar_type() == c10::kByte ? ideep::data_type::u8 : ideep::data_type::s8; + auto src_desc = tensor::desc(src_dims, src_dtype, ideep::format_tag::any); auto weights_desc = packed_weight.get_desc(); auto dst_dtype = dst.get_data_type(); auto dst_desc = tensor::desc(dst_dims, dst_dtype, ideep::format_tag::any); @@ -1119,12 +1120,14 @@ namespace at::native { torch::List> post_op_args, c10::string_view post_op_algorithm) { #if AT_MKLDNN_ENABLED() - TORCH_CHECK(act_scale.numel() == 1 && act_zero_point.numel() == 1, - "onednn int8 linear: act scale/zp size should be 1"); + // act_zero_point.numel() == 0 for symmetric quantization + TORCH_CHECK(act_scale.numel() == 1 && act_zero_point.numel() <= 1, + "onednn int8 linear: act scale/zp size should be 1/<=1"); static std::optional other = std::nullopt; static const c10::string_view binary_post_op = "none"; + int64_t act_zp = act_zero_point.numel() == 1 ? act_zero_point.item().toLong() : 0; return linear_int8_with_onednn_weight( - act, act_scale.item().toDouble(), act_zero_point.item().toLong(), + act, act_scale.item().toDouble(), act_zp, onednn_weight, weight_scales, weight_zero_points, bias, output_scale, output_zero_point, output_dtype, other, /*other scale*/1.0, /*other zp*/0, @@ -1155,10 +1158,12 @@ namespace at::native { torch::List> unary_post_op_args, c10::string_view unary_post_op_algorithm) { #if AT_MKLDNN_ENABLED() - TORCH_CHECK(act_scale.numel() == 1 && act_zero_point.numel() == 1, - "onednn int8 linear: act scale/zp size should be 1"); + // act_zero_point.numel() == 0 for symmetric quantization + TORCH_CHECK(act_scale.numel() == 1 && act_zero_point.numel() <= 1, + "onednn int8 linear: act scale/zp size should be 1/<=1"); + int64_t act_zp = act_zero_point.numel() == 1 ? act_zero_point.item().toLong() : 0; return linear_int8_with_onednn_weight( - act, act_scale.item().toDouble(), act_zero_point.item().toLong(), + act, act_scale.item().toDouble(), act_zp, onednn_weight, weight_scales, weight_zero_points, bias, output_scale, output_zero_point, output_dtype, other, other_scale, other_zero_point, diff --git a/test/inductor/test_mkldnn_pattern_matcher.py b/test/inductor/test_mkldnn_pattern_matcher.py index 772d083b03b361..7d211c1b17e78f 100644 --- a/test/inductor/test_mkldnn_pattern_matcher.py +++ b/test/inductor/test_mkldnn_pattern_matcher.py @@ -2824,6 +2824,92 @@ def matcher_check_fn(): rtol=0.07, ) + @skipIfNoDynamoSupport + @skipIfNoONEDNN + def test_smooth_quant_with_int_mm(self): + r""" + This testcase check if we can match the SmoothQuant int8 linear pattern from Torchao. + The pattern is: + (no bias) reshape -> _int_mm -> convert_element_type -> (expand -> mul) -> mul -> reshape + or + (with bias) pattern_no_bias -> add -> reshape -> reshape + """ + M = 16 + in_feature = 64 + out_feature = 128 + q_min, q_max = -32, 31 + + class Mod(torch.nn.Module): + def __init__( + self, dtype: torch.dtype, has_bias: bool, per_channel_quant: bool + ): + super().__init__() + self.dtype = dtype + self.has_bias = has_bias + self.b = torch.randint( + q_min, q_max, [in_feature, out_feature], dtype=torch.int8 + ) + self.per_channel_quant = per_channel_quant + self.b_scale = torch.rand([out_feature]) * 0.01 + 0.01 + self.b_scale = self.b_scale.to(dtype) + self.bias = torch.rand([out_feature], dtype=dtype) if has_bias else None + + def forward(self, a, a_scale_per_tensor, a_scale_per_channel): + out_shape = a.shape[:-1] + (self.b.size(-1),) + a_reshaped = a.reshape(-1, a.size(-1)) + c = torch._int_mm(a_reshaped, self.b) + c = c.to(self.dtype) + c_shape = c.shape + a_scale = ( + a_scale_per_channel + if self.per_channel_quant + else a_scale_per_tensor + ) + a_scale = a_scale.expand(c.shape) + c = c * a_scale + c = c * self.b_scale + if self.has_bias: + c = c.reshape([1, *list(c_shape)]) + c = c + self.bias + c = c.reshape(c_shape) + c = c.reshape(out_shape) + return c + + has_bias_list = [True, False] + dype_list = ( + [torch.float, torch.bfloat16] + if torch.ops.mkldnn._is_mkldnn_bf16_supported() + else [torch.float] + ) + per_channel_list = [True, False] + for has_bias, dtype, per_channel_quant in itertools.product( + has_bias_list, dype_list, per_channel_list + ): + mod = Mod(dtype, has_bias, per_channel_quant).eval() + a = torch.randint(q_min, q_max, [1, M, in_feature], dtype=torch.int8) + a_scale_per_tensor = torch.rand([1], dtype=dtype) * 0.01 + 0.01 + a_scale_per_channel = torch.rand([M, 1], dtype=dtype) * 0.01 + 0.01 + a_scale_per_tensor, a_scale_per_channel = ( + a_scale_per_tensor.to(dtype), + a_scale_per_channel.to(dtype), + ) + + def matcher_check_fn(): + self.assertEqual( + counters["inductor"]["qlinear_weight_prepack_matcher_count"], 1 + ) + self.assertEqual( + counters["inductor"]["qlinear_weight_prepack_matcher_nodes"], + 10 if has_bias else 7, + ) + + self._test_common( + mod, + (a, a_scale_per_tensor, a_scale_per_channel), + matcher_check_fn=matcher_check_fn, + check_autocast=dtype, + ) + @dynamo_config.patch({"dynamic_shapes": True, "assume_static_by_default": False}) class TestDynamicPatternMatcher(TestPatternMatcherBase): diff --git a/torch/_inductor/fx_passes/quantization.py b/torch/_inductor/fx_passes/quantization.py index 0188558ada93a0..5161b7473ac278 100644 --- a/torch/_inductor/fx_passes/quantization.py +++ b/torch/_inductor/fx_passes/quantization.py @@ -2529,6 +2529,187 @@ def _register_qlinear_weight_prepack(): ) +def _register_smooth_quant_int_mm_pattern(): + """ + The pattern is: + (no bias) reshape -> _int_mm -> convert_element_type -> (expand -> mul) -> mul -> reshape + or + (with bias) pattern_no_bias -> add -> reshape -> reshape + """ + pattern_no_bias = CallFunction( + aten.reshape.default, + CallFunction( + aten.mul.Tensor, + CallFunction( + aten.mul.Tensor, + CallFunction( + prims.convert_element_type.default, + CallFunction( + aten._int_mm.default, + CallFunction( + aten.reshape.default, + KeywordArg("a"), + KeywordArg("in_shape"), + ), + KeywordArg("b"), + ), + KeywordArg("dtype"), + ), + CallFunction( + aten.expand.default, + KeywordArg("x_scale"), + Arg(), + ), + ), + KeywordArg("w_scale"), + ), + KeywordArg("out_shape_no_bias"), + ) + pattern_with_bias = CallFunction( + aten.reshape.default, + CallFunction( + aten.reshape.default, + CallFunction( + aten.add.Tensor, + pattern_no_bias, + KeywordArg("bias"), + ), + Arg(), + ), + KeywordArg("out_shape_with_bias"), + ) + + def _validate_pattern(match: Match): + return len(match.nodes) in [7, 10] + + for pattern in [pattern_with_bias, pattern_no_bias]: + + @register_freezing_graph_pattern( + pattern, + extra_check=_validate_pattern, + pass_number=0, + ) + def _int_mm_weight_prepack(match: Match, *args, **kwargs): + bias = kwargs.get("bias", None) + if bias is not None: + if len(bias.meta.get("tensor_meta").shape) != 1: + # we expect bias is a vector + return + x = kwargs["a"] + weight = kwargs["b"] + dtype = kwargs["dtype"] + x_scale = kwargs["x_scale"] + w_scale = kwargs["w_scale"] + x_shape = x.meta.get("tensor_meta").shape + if has_free_symbols(x_shape): + # For dynamic shape case, we can't get activation shape ahead of runtime. + x_shape = None + + out_node = match.output_node() + with match.graph.inserting_before(out_node): + transpose_node = match.graph.call_function( + aten.permute.default, args=(weight, [1, 0]) + ) + contig_node = match.graph.call_function( + aten.contiguous.default, args=(transpose_node,) + ) + packed_weight_inputs = ( + contig_node, + x_shape, + ) + packed_weight_op = torch.ops.onednn.qlinear_prepack + prepack_weight_node = match.graph.call_function( + packed_weight_op, args=packed_weight_inputs + ) + + dummy_zp = match.graph.call_function(aten.empty, args=([0],)) + w_scale = match.graph.call_function( + prims.convert_element_type.default, args=(w_scale, torch.float32) + ) + + x_scale_shape = x_scale.meta.get("tensor_meta").shape + x_scale_is_scalar = False + if not has_free_symbols(x_scale_shape): + prod = 1 + for d in x_scale_shape: + prod *= d + x_scale_is_scalar = prod == 1 + + new_args: Tuple[Any, ...] + if x_scale_is_scalar: + # in this case, we can call onednn.qlinear directly + new_args = ( + x, + x_scale, + dummy_zp, # x_zp + prepack_weight_node, + w_scale, + dummy_zp, # w_zp + bias, + 1.0, # output_scale + 0, # output_zero_point + dtype, # output_dtype + "none", # post op name + [], # post op args + "", # post op algorithm + ) + new_linear_node = match.graph.call_function( + torch.ops.onednn.qlinear_pointwise.tensor, args=new_args + ) + out_node.replace_all_uses_with(new_linear_node) + new_linear_node.meta.update(out_node.meta) + else: + # onednn.qlinear does not support per-channel quantization of x + # so in this case, we have to apply x scale and add bias ourselves after qlinear + x_reshaped = match.graph.call_function( + aten.reshape.default, args=(x, kwargs["in_shape"]) + ) + new_args = ( + x_reshaped, + 1.0, # x_scale + 0, # x_zp + prepack_weight_node, + w_scale, + dummy_zp, # w_zp + None, # bias + 1.0, # output_scale + 0, # output_zero_point + dtype, # output_dtype + "none", # post op name + [], # post op args + "", # post op algorithm + ) + new_linear_node = match.graph.call_function( + torch.ops.onednn.qlinear_pointwise, args=new_args + ) + # apply x scale + new_out_node = match.graph.call_function( + aten.mul.Tensor, args=(new_linear_node, x_scale) + ) + # Add bias and reshape + if bias is not None: + new_out_node = match.graph.call_function( + aten.add.Tensor, args=(new_out_node, bias) + ) + new_out_node = match.graph.call_function( + aten.reshape.default, + args=(new_out_node, kwargs["out_shape_with_bias"]), + ) + else: + new_out_node = match.graph.call_function( + aten.reshape.default, + args=(new_out_node, kwargs["out_shape_no_bias"]), + ) + out_node.replace_all_uses_with(new_out_node) + new_out_node.meta.update(out_node.meta) + for node in reversed(match.nodes): + match.graph.erase_node(node) + counters["inductor"]["qlinear_weight_prepack_matcher_count"] += 1 + counters["inductor"]["qlinear_weight_prepack_matcher_nodes"] += len( + match.nodes + ) + + @functools.lru_cache(None) def _register_quantization_weight_pack_pass(): # Step 1: Dequant promotion for int8-mixed-fp32/bf16 @@ -2540,6 +2721,9 @@ def _register_quantization_weight_pack_pass(): # Step 3: QLinear weight prepack _register_qlinear_weight_prepack() + # Step 4: weight prepack for SmoothQuant from Torchao + _register_smooth_quant_int_mm_pattern() + def quant_lift_up(graph_module: torch.fx.GraphModule): """ From a9b4989c726a29b4b89c64282e32b9e4fc0b7d68 Mon Sep 17 00:00:00 2001 From: cyy Date: Wed, 6 Nov 2024 07:59:09 +0000 Subject: [PATCH 129/503] Enable cppcoreguidelines-special-member-functions (#139132) Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/139132 Approved by: https://github.com/sraikund16 --- .clang-tidy | 3 ++- aten/src/ATen/Context.h | 8 ++++++++ aten/src/ATen/DynamicLibrary.h | 2 ++ aten/src/ATen/SparseCsrTensorUtils.h | 6 ++++++ aten/src/ATen/TensorIterator.h | 1 + aten/src/ATen/ThreadLocalState.h | 4 ++++ aten/src/ATen/core/Dict.h | 1 + aten/src/ATen/core/PythonFallbackKernel.h | 8 ++++++++ aten/src/ATen/core/QuantizerBase.h | 2 +- aten/src/ATen/core/Tensor.h | 6 ++++++ aten/src/ATen/core/Vitals.h | 3 +++ aten/src/ATen/core/dynamic_type.h | 5 +++++ aten/src/ATen/core/jit_type.h | 2 +- aten/src/ATen/core/jit_type_base.h | 1 + aten/src/ATen/core/rref_interface.h | 1 + aten/src/ATen/core/type.cpp | 2 -- aten/src/ATen/functorch/DynamicLayer.cpp | 6 ++++++ aten/src/ATen/quantized/Quantizer.cpp | 2 -- aten/src/ATen/record_function.h | 6 ++++++ c10/core/impl/PythonDispatcherTLS.h | 4 ++++ c10/test/util/Metaprogramming_test.cpp | 4 ++-- c10/test/util/ThreadLocal_test.cpp | 2 ++ c10/test/util/intrusive_ptr_test.cpp | 1 + c10/test/util/logging_test.cpp | 1 + c10/test/util/typeid_test.cpp | 2 ++ c10/util/DynamicCounter.cpp | 5 +++++ c10/util/Exception.h | 4 ++++ c10/util/LeftRight.h | 3 +++ c10/util/order_preserving_flat_hash_map.h | 1 + torch/csrc/distributed/rpc/types.h | 4 ++++ torch/csrc/dynamo/python_compiled_autograd.cpp | 4 ++++ .../inductor/aoti_runner/model_container_runner_cuda.h | 1 + torch/csrc/lazy/core/ir.cpp | 2 -- torch/csrc/lazy/core/ir.h | 2 +- torch/csrc/lazy/core/ir_metadata.h | 4 ++++ torch/csrc/lazy/core/metrics.h | 4 ++++ torch/csrc/lazy/core/tensor.h | 8 ++++++++ torch/csrc/lazy/core/thread_pool.cpp | 4 ++++ torch/csrc/lazy/core/thread_pool.h | 1 + torch/csrc/monitor/counters.h | 4 ++++ torch/csrc/profiler/collection.cpp | 4 ++++ torch/csrc/profiler/orchestration/observer.h | 4 ++++ 42 files changed, 130 insertions(+), 12 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 3b03412a405095..5776dabe00728a 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -29,7 +29,6 @@ cppcoreguidelines-*, -cppcoreguidelines-pro-type-static-cast-downcast, -cppcoreguidelines-pro-type-union-access, -cppcoreguidelines-pro-type-vararg, --cppcoreguidelines-special-member-functions, -cppcoreguidelines-non-private-member-variables-in-classes, -facebook-hte-RelativeInclude, hicpp-exception-baseclass, @@ -64,5 +63,7 @@ readability-string-compare, HeaderFilterRegex: '^(aten/|c10/|torch/).*$' WarningsAsErrors: '*' CheckOptions: + cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor: true + cppcoreguidelines-special-member-functions.AllowImplicitlyDeletedCopyOrMove: true misc-header-include-cycle.IgnoredFilesList: 'format.h;ivalue.h;custom_class.h;Dict.h;List.h;IListRef.h' ... diff --git a/aten/src/ATen/Context.h b/aten/src/ATen/Context.h index 40e843cbfa3380..e37fa9ea516c19 100644 --- a/aten/src/ATen/Context.h +++ b/aten/src/ATen/Context.h @@ -604,6 +604,10 @@ inline void manual_seed(uint64_t seed) { // NoTF32Guard disable_tf32; struct TORCH_API NoTF32Guard { NoTF32Guard(); + NoTF32Guard(NoTF32Guard&& other) = delete; + NoTF32Guard(const NoTF32Guard&) = delete; + NoTF32Guard& operator=(const NoTF32Guard&) = delete; + NoTF32Guard& operator=(NoTF32Guard&&) = delete; ~NoTF32Guard(); static bool should_disable_tf32(); @@ -613,6 +617,10 @@ struct TORCH_API NoTF32Guard { struct TORCH_API ROCmBackwardPassGuard { ROCmBackwardPassGuard(); + ROCmBackwardPassGuard(ROCmBackwardPassGuard&& other) = delete; + ROCmBackwardPassGuard(const ROCmBackwardPassGuard&) = delete; + ROCmBackwardPassGuard& operator=(const ROCmBackwardPassGuard&) = delete; + ROCmBackwardPassGuard& operator=(ROCmBackwardPassGuard&&) = delete; ~ROCmBackwardPassGuard(); static bool is_backward_pass(); }; diff --git a/aten/src/ATen/DynamicLibrary.h b/aten/src/ATen/DynamicLibrary.h index 523a21985f225e..061456c081e611 100644 --- a/aten/src/ATen/DynamicLibrary.h +++ b/aten/src/ATen/DynamicLibrary.h @@ -16,6 +16,8 @@ namespace at { struct DynamicLibrary { AT_DISALLOW_COPY_AND_ASSIGN(DynamicLibrary); + DynamicLibrary(DynamicLibrary&& other) = delete; + DynamicLibrary& operator=(DynamicLibrary&&) = delete; TORCH_API DynamicLibrary( const char* name, diff --git a/aten/src/ATen/SparseCsrTensorUtils.h b/aten/src/ATen/SparseCsrTensorUtils.h index 2ec973013c4941..3c6877083aeebf 100644 --- a/aten/src/ATen/SparseCsrTensorUtils.h +++ b/aten/src/ATen/SparseCsrTensorUtils.h @@ -155,6 +155,12 @@ class CheckSparseTensorInvariants { : old_state(at::globalContext().checkSparseTensorInvariants()) { at::globalContext().setCheckSparseTensorInvariants(state); } + CheckSparseTensorInvariants(CheckSparseTensorInvariants&& other) = delete; + CheckSparseTensorInvariants(const CheckSparseTensorInvariants&) = delete; + CheckSparseTensorInvariants& operator=(const CheckSparseTensorInvariants&) = + delete; + CheckSparseTensorInvariants& operator=(CheckSparseTensorInvariants&&) = + delete; ~CheckSparseTensorInvariants() { at::globalContext().setCheckSparseTensorInvariants(old_state); diff --git a/aten/src/ATen/TensorIterator.h b/aten/src/ATen/TensorIterator.h index 471faf664e271a..7bbd68b91ba837 100644 --- a/aten/src/ATen/TensorIterator.h +++ b/aten/src/ATen/TensorIterator.h @@ -995,6 +995,7 @@ class TORCH_API TensorIteratorConfig final { /// TensorIterator that can use 32-bit indexing. Taken together the splits cover /// the original TensorIterator. struct TORCH_API SplitUntil32Bit { + // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct TORCH_API iterator { iterator() = default; iterator(const TensorIteratorBase& iter); diff --git a/aten/src/ATen/ThreadLocalState.h b/aten/src/ATen/ThreadLocalState.h index 2469cb1c3c47e1..bb28175c5f42e0 100644 --- a/aten/src/ATen/ThreadLocalState.h +++ b/aten/src/ATen/ThreadLocalState.h @@ -96,6 +96,10 @@ class TORCH_API ThreadLocalStateGuard { // set the given state across the thread boundary ThreadLocalState::setThreadLocalState(state); } + ThreadLocalStateGuard(ThreadLocalStateGuard&& other) = delete; + ThreadLocalStateGuard(const ThreadLocalStateGuard&) = delete; + ThreadLocalStateGuard& operator=(const ThreadLocalStateGuard&) = delete; + ThreadLocalStateGuard& operator=(ThreadLocalStateGuard&&) = delete; ~ThreadLocalStateGuard() { // restore previously set variables diff --git a/aten/src/ATen/core/Dict.h b/aten/src/ATen/core/Dict.h index a1d4da07520fa3..d187d7b7c11699 100644 --- a/aten/src/ATen/core/Dict.h +++ b/aten/src/ATen/core/Dict.h @@ -206,6 +206,7 @@ template Dict toGenericDict(Dict +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class Dict final { private: static_assert((std::is_same_v && std::is_same_v) || guts::typelist::contains::value, "Invalid Key type for Dict. We only support int64_t, double, bool, and string."); diff --git a/aten/src/ATen/core/PythonFallbackKernel.h b/aten/src/ATen/core/PythonFallbackKernel.h index 67f24795eeb58d..1d2b613166d3f3 100644 --- a/aten/src/ATen/core/PythonFallbackKernel.h +++ b/aten/src/ATen/core/PythonFallbackKernel.h @@ -6,6 +6,10 @@ namespace at::impl { struct TORCH_API RestorePythonTLSSnapshot { RestorePythonTLSSnapshot(); + RestorePythonTLSSnapshot(RestorePythonTLSSnapshot&& other) = delete; + RestorePythonTLSSnapshot(const RestorePythonTLSSnapshot&) = delete; + RestorePythonTLSSnapshot& operator=(const RestorePythonTLSSnapshot&) = delete; + RestorePythonTLSSnapshot& operator=(RestorePythonTLSSnapshot&&) = delete; ~RestorePythonTLSSnapshot(); private: @@ -18,6 +22,10 @@ struct TORCH_API RestorePythonTLSSnapshot { struct TORCH_API MaybeSetTLSOnEntryGuard { public: MaybeSetTLSOnEntryGuard(); + MaybeSetTLSOnEntryGuard(MaybeSetTLSOnEntryGuard&& other) = delete; + MaybeSetTLSOnEntryGuard(const MaybeSetTLSOnEntryGuard&) = delete; + MaybeSetTLSOnEntryGuard& operator=(const MaybeSetTLSOnEntryGuard&) = delete; + MaybeSetTLSOnEntryGuard& operator=(MaybeSetTLSOnEntryGuard&&) = delete; ~MaybeSetTLSOnEntryGuard(); private: diff --git a/aten/src/ATen/core/QuantizerBase.h b/aten/src/ATen/core/QuantizerBase.h index 0d2eaeece88980..a56ead7a30c696 100644 --- a/aten/src/ATen/core/QuantizerBase.h +++ b/aten/src/ATen/core/QuantizerBase.h @@ -40,7 +40,7 @@ struct TORCH_API Quantizer : public c10::intrusive_ptr_target { // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const ScalarType scalar_type_; explicit Quantizer(ScalarType scalar_type) : scalar_type_(scalar_type) {} - ~Quantizer() override; + ~Quantizer() override = default; // Copied from torch/csrc/jit/ir/scope.h QuantizerPtr intrusive_from_this() { diff --git a/aten/src/ATen/core/Tensor.h b/aten/src/ATen/core/Tensor.h index de887a024c22fb..63b707767d344d 100644 --- a/aten/src/ATen/core/Tensor.h +++ b/aten/src/ATen/core/Tensor.h @@ -4,6 +4,7 @@ #include namespace at { +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class TORCH_API OptionalTensorRef { public: OptionalTensorRef() = default; @@ -20,6 +21,7 @@ class TORCH_API OptionalTensorRef { OptionalTensorRef(const OptionalTensorRef& rhs) : ref_(Tensor::unsafe_borrow_t{}, rhs.ref_) {} + OptionalTensorRef(OptionalTensorRef&& rhs) = default; OptionalTensorRef& operator=(OptionalTensorRef rhs) { std::swap(ref_, rhs.ref_); return *this; @@ -59,6 +61,10 @@ class TORCH_API TensorRef { TensorRef(const TensorBase& src) : ref_(Tensor::unsafe_borrow_t{}, src) {} + TensorRef(TensorRef&& other) = default; + TensorRef(const TensorRef&) = default; + TensorRef& operator=(const TensorRef&) = default; + TensorRef& operator=(TensorRef&&) = default; const Tensor& operator*() const & { return ref_; diff --git a/aten/src/ATen/core/Vitals.h b/aten/src/ATen/core/Vitals.h index 8a7a51e81e1d27..7ec213938d564a 100644 --- a/aten/src/ATen/core/Vitals.h +++ b/aten/src/ATen/core/Vitals.h @@ -39,6 +39,8 @@ struct TORCH_API TorchVital { explicit TorchVital(std::string n) : name(std::move(n)) {} TorchVital(const TorchVital&) = default; TorchVital(TorchVital&&) = default; + TorchVital& operator=(const TorchVital&) = default; + TorchVital& operator=(TorchVital&&) = default; TorchVital() = delete; TorchVitalAttr& create(const std::string& attr); @@ -71,6 +73,7 @@ class TORCH_API APIVitals { APIVitals(APIVitals&& other) = delete; APIVitals& operator=(const APIVitals&) = delete; APIVitals& operator=(APIVitals&&) = delete; + ~APIVitals() = default; private: std::unordered_map name_map_; diff --git a/aten/src/ATen/core/dynamic_type.h b/aten/src/ATen/core/dynamic_type.h index 4ad833a295b12c..697fcec39e34cd 100644 --- a/aten/src/ATen/core/dynamic_type.h +++ b/aten/src/ATen/core/dynamic_type.h @@ -159,6 +159,11 @@ class DynamicType : public SharedType { explicit DynamicType(Tag, Arguments); explicit DynamicType(Tag, std::string_view, Arguments); + DynamicType(DynamicType&& other) = delete; + DynamicType(const DynamicType&) = delete; + DynamicType& operator=(const DynamicType&) = delete; + DynamicType& operator=(DynamicType&&) = delete; + TypePtr containedType(size_t) const override; size_t containedTypeSize() const override; Tag tag() const { diff --git a/aten/src/ATen/core/jit_type.h b/aten/src/ATen/core/jit_type.h index 5951b4763be3a0..58d50de74faed3 100644 --- a/aten/src/ATen/core/jit_type.h +++ b/aten/src/ATen/core/jit_type.h @@ -2204,7 +2204,7 @@ struct TORCH_API InterfaceType : public NamedType { return is_module_; } static const TypeKind Kind = TypeKind::InterfaceType; - ~InterfaceType() override; + ~InterfaceType() override = default; private: InterfaceType(QualifiedName name, bool is_module); static bool isSubTypeImpl( diff --git a/aten/src/ATen/core/jit_type_base.h b/aten/src/ATen/core/jit_type_base.h index 8904cd7bc431e3..de440787ee686f 100644 --- a/aten/src/ATen/core/jit_type_base.h +++ b/aten/src/ATen/core/jit_type_base.h @@ -227,6 +227,7 @@ struct TORCH_API Type { SingletonOrSharedTypePtr(SingletonOrSharedTypePtr&&) noexcept = default; SingletonOrSharedTypePtr& operator=(const SingletonOrSharedTypePtr&) = default; SingletonOrSharedTypePtr& operator=(SingletonOrSharedTypePtr&&) noexcept = default; + ~SingletonOrSharedTypePtr() = default; T* get() const { return repr_.isSharedAndNonNull() ? repr_.shared_.repr_.get() : static_cast(repr_.rawRepr().first); diff --git a/aten/src/ATen/core/rref_interface.h b/aten/src/ATen/core/rref_interface.h index f0749d368792f0..70273f168d9361 100644 --- a/aten/src/ATen/core/rref_interface.h +++ b/aten/src/ATen/core/rref_interface.h @@ -17,6 +17,7 @@ class C10_EXPORT RRefInterface : public c10::intrusive_ptr_target { // counting. RRefInterface(const RRefInterface& other) = delete; RRefInterface(RRefInterface&& other) = delete; + RRefInterface& operator=(const RRefInterface& other) = delete; RRefInterface& operator=(RRefInterface&& other) = delete; ~RRefInterface() override = default; diff --git a/aten/src/ATen/core/type.cpp b/aten/src/ATen/core/type.cpp index 92c30e6ec8437c..164ea6d44f5846 100644 --- a/aten/src/ATen/core/type.cpp +++ b/aten/src/ATen/core/type.cpp @@ -1037,8 +1037,6 @@ InterfaceType::InterfaceType(QualifiedName name, bool is_module) methods_(std::make_shared>()), is_module_(is_module) {} -InterfaceType::~InterfaceType() = default; - bool containsAnyType(const TypePtr& type) { std::vector to_scan = { type }; while (!to_scan.empty()) { diff --git a/aten/src/ATen/functorch/DynamicLayer.cpp b/aten/src/ATen/functorch/DynamicLayer.cpp index 81b82f2556c19e..9bdf155affc2b2 100644 --- a/aten/src/ATen/functorch/DynamicLayer.cpp +++ b/aten/src/ATen/functorch/DynamicLayer.cpp @@ -202,6 +202,8 @@ struct SaveLocalDispatchKeySet { } SaveLocalDispatchKeySet(const SaveLocalDispatchKeySet&) = delete; SaveLocalDispatchKeySet& operator=(const SaveLocalDispatchKeySet&) = delete; + SaveLocalDispatchKeySet(SaveLocalDispatchKeySet&&) = delete; + SaveLocalDispatchKeySet& operator=(SaveLocalDispatchKeySet&&) = delete; }; const std::vector& getDynamicLayerStack() { @@ -406,6 +408,10 @@ static void dump_local_tls() { struct WithoutTop { WithoutTop(); + WithoutTop(WithoutTop&& other) = delete; + WithoutTop(const WithoutTop&) = delete; + WithoutTop& operator=(const WithoutTop&) = delete; + WithoutTop& operator=(WithoutTop&&) = delete; ~WithoutTop(); DynamicLayer layer_; }; diff --git a/aten/src/ATen/quantized/Quantizer.cpp b/aten/src/ATen/quantized/Quantizer.cpp index ef8f8deb4973be..fa48b33ce7c0d0 100644 --- a/aten/src/ATen/quantized/Quantizer.cpp +++ b/aten/src/ATen/quantized/Quantizer.cpp @@ -313,8 +313,6 @@ Tensor& PerChannelAffineFloatQParamsQuantizer::dequantize_out( return rtensor; } -Quantizer::~Quantizer() = default; - C10_EXPORT void set_quantizer_(const Tensor& self, ConstQuantizerPtr quantizer) { get_qtensorimpl(self)->set_quantizer_(quantizer); } diff --git a/aten/src/ATen/record_function.h b/aten/src/ATen/record_function.h index 15130c91367522..52115b4a65af66 100644 --- a/aten/src/ATen/record_function.h +++ b/aten/src/ATen/record_function.h @@ -353,6 +353,8 @@ struct TORCH_API RecordFunction { RecordFunction(const RecordFunction&) = delete; RecordFunction& operator=(const RecordFunction&) = delete; + RecordFunction(RecordFunction&&) = delete; + RecordFunction& operator=(RecordFunction&&) = delete; const char* name() const; @@ -764,6 +766,10 @@ class TORCH_API RecordFunctionGuard { enableRecordFunction(is_enabled); } + RecordFunctionGuard(RecordFunctionGuard&& other) = delete; + RecordFunctionGuard(const RecordFunctionGuard&) = delete; + RecordFunctionGuard& operator=(const RecordFunctionGuard&) = delete; + RecordFunctionGuard& operator=(RecordFunctionGuard&&) = delete; virtual ~RecordFunctionGuard() { enableRecordFunction(prev_value_); } diff --git a/c10/core/impl/PythonDispatcherTLS.h b/c10/core/impl/PythonDispatcherTLS.h index 12c0677f36fdb5..7b91aab686eca1 100644 --- a/c10/core/impl/PythonDispatcherTLS.h +++ b/c10/core/impl/PythonDispatcherTLS.h @@ -16,6 +16,10 @@ struct C10_API DisablePythonDispatcher { PythonDispatcherTLS::set_state({}); } + DisablePythonDispatcher(DisablePythonDispatcher&& other) = delete; + DisablePythonDispatcher(const DisablePythonDispatcher&) = delete; + DisablePythonDispatcher& operator=(const DisablePythonDispatcher&) = delete; + DisablePythonDispatcher& operator=(DisablePythonDispatcher&&) = delete; ~DisablePythonDispatcher() { PythonDispatcherTLS::set_state(old_); } diff --git a/c10/test/util/Metaprogramming_test.cpp b/c10/test/util/Metaprogramming_test.cpp index ad301462bd5147..a7bca7a5b511f9 100644 --- a/c10/test/util/Metaprogramming_test.cpp +++ b/c10/test/util/Metaprogramming_test.cpp @@ -5,7 +5,7 @@ using namespace c10::guts; -// NOLINTBEGIN(modernize*) +// NOLINTBEGIN(modernize*, cppcoreguidelines-special-member-functions) namespace { namespace test_function_traits { @@ -302,4 +302,4 @@ TEST(MetaprogrammingTest, TupleMap_canBeUsedWithAutoLambdas) { } // namespace test_tuple_map } // namespace -// NOLINTEND(modernize*) +// NOLINTEND(modernize*, cppcoreguidelines-special-member-functions) diff --git a/c10/test/util/ThreadLocal_test.cpp b/c10/test/util/ThreadLocal_test.cpp index bbc10c0c52e443..29e748e14890e7 100644 --- a/c10/test/util/ThreadLocal_test.cpp +++ b/c10/test/util/ThreadLocal_test.cpp @@ -148,6 +148,7 @@ TEST(ThreadLocalTest, TestThreadWithGlobalScopeVar) { TEST(ThreadLocalTest, TestObjectsAreReleased) { static std::atomic ctors{0}; static std::atomic dtors{0}; + // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct A { A() { ++ctors; @@ -183,6 +184,7 @@ TEST(ThreadLocalTest, TestObjectsAreReleased) { TEST(ThreadLocalTest, TestObjectsAreReleasedByNonstaticThreadLocal) { static std::atomic ctors(0); static std::atomic dtors(0); + // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct A { A() { ++ctors; diff --git a/c10/test/util/intrusive_ptr_test.cpp b/c10/test/util/intrusive_ptr_test.cpp index 14c12f422f2cd8..47e7942950ef79 100644 --- a/c10/test/util/intrusive_ptr_test.cpp +++ b/c10/test/util/intrusive_ptr_test.cpp @@ -45,6 +45,7 @@ struct SomeChildClass : SomeBaseClass { SomeChildClass(int v) : SomeBaseClass(v) {} }; +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class DestructableMock : public intrusive_ptr_target { public: DestructableMock(bool* resourcesReleased, bool* wasDestructed) diff --git a/c10/test/util/logging_test.cpp b/c10/test/util/logging_test.cpp index 5798b37c18e385..c06dfb43d46cb4 100644 --- a/c10/test/util/logging_test.cpp +++ b/c10/test/util/logging_test.cpp @@ -81,6 +81,7 @@ TEST( } namespace { +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct Noncopyable { int x; diff --git a/c10/test/util/typeid_test.cpp b/c10/test/util/typeid_test.cpp index 7587d5eefdbd53..8e78ec84e530aa 100644 --- a/c10/test/util/typeid_test.cpp +++ b/c10/test/util/typeid_test.cpp @@ -70,6 +70,7 @@ TEST(TypeMetaTest, TypeMeta) { EXPECT_NE(bar_meta.name().find("TypeMetaTestBar"), c10::string_view::npos); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class ClassAllowAssignment { public: ClassAllowAssignment() = default; @@ -78,6 +79,7 @@ class ClassAllowAssignment { int x{42}; }; +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class ClassNoAssignment { public: ClassNoAssignment() = default; diff --git a/c10/util/DynamicCounter.cpp b/c10/util/DynamicCounter.cpp index 0b7906af1b1204..cd9decfc41f3af 100644 --- a/c10/util/DynamicCounter.cpp +++ b/c10/util/DynamicCounter.cpp @@ -52,6 +52,11 @@ struct DynamicCounter::Guard { } } + Guard(Guard&& other) = delete; + Guard(const Guard&) = delete; + Guard& operator=(const Guard&) = delete; + Guard& operator=(Guard&&) = delete; + ~Guard() { for (const auto& backend : backends_) { backend->unregisterCounter(key_); diff --git a/c10/util/Exception.h b/c10/util/Exception.h index 275526cf400823..e83b65cc5efc0f 100644 --- a/c10/util/Exception.h +++ b/c10/util/Exception.h @@ -205,6 +205,10 @@ class C10_API WarningHandlerGuard { : prev_handler_(c10::WarningUtils::get_warning_handler()) { c10::WarningUtils::set_warning_handler(new_handler); } + WarningHandlerGuard(WarningHandlerGuard&& other) = delete; + WarningHandlerGuard(const WarningHandlerGuard&) = delete; + WarningHandlerGuard& operator=(const WarningHandlerGuard&) = delete; + WarningHandlerGuard& operator=(WarningHandlerGuard&&) = delete; ~WarningHandlerGuard() { c10::WarningUtils::set_warning_handler(prev_handler_); } diff --git a/c10/util/LeftRight.h b/c10/util/LeftRight.h index 58145b2c779cc3..0ad9a1b346103e 100644 --- a/c10/util/LeftRight.h +++ b/c10/util/LeftRight.h @@ -18,6 +18,8 @@ struct IncrementRAII final { ~IncrementRAII() { _counter->fetch_sub(1); } + IncrementRAII(IncrementRAII&&) = delete; + IncrementRAII& operator=(IncrementRAII&&) = delete; private: std::atomic* _counter; @@ -201,6 +203,7 @@ class RWSafeLeftRightWrapper final { RWSafeLeftRightWrapper(RWSafeLeftRightWrapper&&) noexcept = delete; RWSafeLeftRightWrapper& operator=(const RWSafeLeftRightWrapper&) = delete; RWSafeLeftRightWrapper& operator=(RWSafeLeftRightWrapper&&) noexcept = delete; + ~RWSafeLeftRightWrapper() = default; template // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) diff --git a/c10/util/order_preserving_flat_hash_map.h b/c10/util/order_preserving_flat_hash_map.h index 021995600344ad..fd8196432c994c 100644 --- a/c10/util/order_preserving_flat_hash_map.h +++ b/c10/util/order_preserving_flat_hash_map.h @@ -139,6 +139,7 @@ struct KeyOrValueEquality : functor_storage { }; static constexpr int8_t min_lookups = 4; template +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct sherwood_v3_entry { // NOLINTNEXTLINE(modernize-use-equals-default) sherwood_v3_entry() {} diff --git a/torch/csrc/distributed/rpc/types.h b/torch/csrc/distributed/rpc/types.h index 7844ce270cd471..82cf528bb9bd6b 100644 --- a/torch/csrc/distributed/rpc/types.h +++ b/torch/csrc/distributed/rpc/types.h @@ -13,6 +13,10 @@ TORCH_API void disableJitRRefPickle(); struct TORCH_API JitRRefPickleGuard { JitRRefPickleGuard(); + JitRRefPickleGuard(JitRRefPickleGuard&& other) = delete; + JitRRefPickleGuard(const JitRRefPickleGuard&) = delete; + JitRRefPickleGuard& operator=(const JitRRefPickleGuard&) = delete; + JitRRefPickleGuard& operator=(JitRRefPickleGuard&&) = delete; ~JitRRefPickleGuard(); }; diff --git a/torch/csrc/dynamo/python_compiled_autograd.cpp b/torch/csrc/dynamo/python_compiled_autograd.cpp index 33dac77d74340a..8bd192cde7b1b0 100644 --- a/torch/csrc/dynamo/python_compiled_autograd.cpp +++ b/torch/csrc/dynamo/python_compiled_autograd.cpp @@ -543,6 +543,10 @@ static PyObject* call_end_capture(PyObject* self, const variable_list& inputs) { struct ClosingTHPObjectPtr : public THPObjectPtr { ClosingTHPObjectPtr(PyObject* o) : THPObjectPtr(o) {} + ClosingTHPObjectPtr(ClosingTHPObjectPtr&& other) = default; + ClosingTHPObjectPtr(const ClosingTHPObjectPtr&) = delete; + ClosingTHPObjectPtr& operator=(const ClosingTHPObjectPtr&) = delete; + ClosingTHPObjectPtr& operator=(ClosingTHPObjectPtr&&) = default; ~ClosingTHPObjectPtr() { if (PyErr_Occurred()) { // do nothing, do not attempt to close diff --git a/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h b/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h index 5db82bf413668a..2018cf7573e7a9 100644 --- a/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h +++ b/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h @@ -8,6 +8,7 @@ namespace torch::inductor { // NOTICE: Following APIs are subject to change due to active development // We provide NO BC guarantee for these APIs +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class TORCH_API AOTIModelContainerRunnerCuda : public AOTIModelContainerRunner { public: // @param device_str: cuda device string, e.g. "cuda", "cuda:0" diff --git a/torch/csrc/lazy/core/ir.cpp b/torch/csrc/lazy/core/ir.cpp index fe9cfba2556c06..033e4f5cf00b95 100644 --- a/torch/csrc/lazy/core/ir.cpp +++ b/torch/csrc/lazy/core/ir.cpp @@ -98,8 +98,6 @@ Node::Node(OpKind op, Shape shape, size_t num_outputs) : Node(op, num_outputs) { shapes_.push_back(std::move(shape)); } -Node::~Node() = default; - // Retrieves the full shape of the IR Node. c10::ArrayRef Node::shapes() const { return shapes_; diff --git a/torch/csrc/lazy/core/ir.h b/torch/csrc/lazy/core/ir.h index dd244b7442b2d3..8e384e7982bd58 100644 --- a/torch/csrc/lazy/core/ir.h +++ b/torch/csrc/lazy/core/ir.h @@ -101,7 +101,7 @@ class TORCH_API Node { // Construct node with shape and no operands Node(OpKind op, Shape shape, size_t num_outputs = 1); - virtual ~Node(); + virtual ~Node() = default; const OpKind& op() const { return op_; diff --git a/torch/csrc/lazy/core/ir_metadata.h b/torch/csrc/lazy/core/ir_metadata.h index aeaacb596e0a35..7e73a593181997 100644 --- a/torch/csrc/lazy/core/ir_metadata.h +++ b/torch/csrc/lazy/core/ir_metadata.h @@ -38,6 +38,10 @@ struct TORCH_API MetaData { struct TORCH_API ScopePusher { explicit ScopePusher(const std::string& name); ~ScopePusher(); + ScopePusher(ScopePusher&& other) = delete; + ScopePusher(const ScopePusher&) = delete; + ScopePusher& operator=(const ScopePusher&) = delete; + ScopePusher& operator=(ScopePusher&&) = delete; static void ResetScopes(); }; diff --git a/torch/csrc/lazy/core/metrics.h b/torch/csrc/lazy/core/metrics.h index 191baa5eb1768e..05b525778d9a3e 100644 --- a/torch/csrc/lazy/core/metrics.h +++ b/torch/csrc/lazy/core/metrics.h @@ -258,6 +258,10 @@ class TORCH_API TimedSection { public: explicit TimedSection(Metric* metric) : metric_(metric), start_(NowNs()) {} + TimedSection(TimedSection&& other) = delete; + TimedSection(const TimedSection&) = delete; + TimedSection& operator=(const TimedSection&) = delete; + TimedSection& operator=(TimedSection&&) = delete; ~TimedSection() { int64_t now = NowNs(); metric_->AddSample(now, static_cast(now - start_)); diff --git a/torch/csrc/lazy/core/tensor.h b/torch/csrc/lazy/core/tensor.h index dfa317d3b81f90..b739399b6bbdb3 100644 --- a/torch/csrc/lazy/core/tensor.h +++ b/torch/csrc/lazy/core/tensor.h @@ -42,12 +42,18 @@ class TORCH_API LazyTensor : public c10::intrusive_ptr_target { Data(BackendDevice device) : device(std::move(device)), unique_id(GetNextTensorId()) {} + Data(Data&& other) = delete; + Data(const Data&) = delete; + Data& operator=(const Data&) = delete; + Data& operator=(Data&&) = delete; virtual ~Data(); BackendDataPtr handle; Value ir_value; std::optional tensor_data; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const BackendDevice device; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const int64_t unique_id = 0; size_t generation = 1; }; @@ -68,6 +74,8 @@ class TORCH_API LazyTensor : public c10::intrusive_ptr_target { LazyTensor() = delete; LazyTensor(const LazyTensor&) = default; LazyTensor(LazyTensor&&) noexcept = default; + LazyTensor& operator=(const LazyTensor&) = default; + LazyTensor& operator=(LazyTensor&&) noexcept = default; ~LazyTensor() override = default; diff --git a/torch/csrc/lazy/core/thread_pool.cpp b/torch/csrc/lazy/core/thread_pool.cpp index 3f87aaa96b5191..e61827e5b0fdc1 100644 --- a/torch/csrc/lazy/core/thread_pool.cpp +++ b/torch/csrc/lazy/core/thread_pool.cpp @@ -26,6 +26,10 @@ class ThreadPool { }); } } + ThreadPool(const ThreadPool&) = delete; + ThreadPool(ThreadPool&&) = delete; + ThreadPool& operator=(const ThreadPool&) = delete; + ThreadPool& operator=(ThreadPool&&) = delete; ~ThreadPool() { { diff --git a/torch/csrc/lazy/core/thread_pool.h b/torch/csrc/lazy/core/thread_pool.h index 8caa1e0bbc4537..2e0ae8f89d8e9a 100644 --- a/torch/csrc/lazy/core/thread_pool.h +++ b/torch/csrc/lazy/core/thread_pool.h @@ -13,6 +13,7 @@ namespace torch::lazy { +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class TORCH_API Completion { public: class Data; diff --git a/torch/csrc/monitor/counters.h b/torch/csrc/monitor/counters.h index bfb1a463b713f3..986dfb7b85ca13 100644 --- a/torch/csrc/monitor/counters.h +++ b/torch/csrc/monitor/counters.h @@ -122,6 +122,10 @@ class Stat { maxSamples_(maxSamples) { detail::registerStat(this); } + Stat(const Stat&) = delete; + Stat(Stat&&) = delete; + Stat& operator=(const Stat&) = delete; + Stat& operator=(Stat&&) = delete; virtual ~Stat() { { diff --git a/torch/csrc/profiler/collection.cpp b/torch/csrc/profiler/collection.cpp index eefe5621a293eb..c5b0d9539d4fd5 100644 --- a/torch/csrc/profiler/collection.cpp +++ b/torch/csrc/profiler/collection.cpp @@ -402,6 +402,10 @@ struct StealOrDefault { explicit StealOrDefault(T& container) : container_{container}, it_{container.begin()} {} + StealOrDefault(const StealOrDefault&) = delete; + StealOrDefault(StealOrDefault&&) = delete; + StealOrDefault& operator=(const StealOrDefault&) = delete; + StealOrDefault& operator=(StealOrDefault&&) = delete; ~StealOrDefault() { container_.get().clear(); } diff --git a/torch/csrc/profiler/orchestration/observer.h b/torch/csrc/profiler/orchestration/observer.h index c3beb4cca4d0c7..272e2e4f9d5f93 100644 --- a/torch/csrc/profiler/orchestration/observer.h +++ b/torch/csrc/profiler/orchestration/observer.h @@ -128,6 +128,10 @@ struct TORCH_API ProfilerConfig { // ---------------------------------------------------------------------------- struct TORCH_API ProfilerStateBase : public c10::MemoryReportingInfoBase { explicit ProfilerStateBase(ProfilerConfig config); + ProfilerStateBase(const ProfilerStateBase&) = delete; + ProfilerStateBase(ProfilerStateBase&&) = delete; + ProfilerStateBase& operator=(const ProfilerStateBase&) = delete; + ProfilerStateBase& operator=(ProfilerStateBase&&) = delete; ~ProfilerStateBase() override; static ProfilerStateBase* get(bool global); From d622b490d62e5439295bab2b89986eefae8ee5fc Mon Sep 17 00:00:00 2001 From: Michael Lazos Date: Wed, 6 Nov 2024 08:52:53 +0000 Subject: [PATCH 130/503] [Dynamo] Support tensor mro without source (#139838) Fixes https://github.com/pytorch/pytorch/issues/137743 The issue here is that if `type` was called on a tensor without a source, we wouldn't have a source even for `torch.Tensor`, and the `__mro__` retrieval would fail. Since `torch.Tensor` is an internal torch type, I add handling for it in `call_type` in builtins. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139838 Approved by: https://github.com/williamwen42 --- test/dynamo/test_misc.py | 13 +++++++++++++ torch/_dynamo/variables/builtin.py | 13 ++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py index 32f73334dcbe0d..88d2c556cd71a3 100644 --- a/test/dynamo/test_misc.py +++ b/test/dynamo/test_misc.py @@ -1188,6 +1188,19 @@ def fn(x): inp.test = None self.assertEqual(torch.ones(2, 2) + 2, fn(inp)) + def test_mro_type_tensor_no_source(self): + @torch.compile(fullgraph=True) + def fn(x): + z = [] + input_type = type(torch.ones(2, 2)) + for cls in input_type.__mro__: + z.append(cls.__name__) + + return x, input_type, z + + inp = torch.ones(2, 2) + fn(inp) + def test_shape_unpack(self): def fn(x): a, b = x.size() diff --git a/torch/_dynamo/variables/builtin.py b/torch/_dynamo/variables/builtin.py index c395bc0c00c00c..5d6b351a8f49cc 100644 --- a/torch/_dynamo/variables/builtin.py +++ b/torch/_dynamo/variables/builtin.py @@ -26,7 +26,13 @@ ) from ..guards import GuardBuilder, install_guard from ..replay_record import DummyModule -from ..source import AttrSource, GetItemSource, is_constant_source, TypeSource +from ..source import ( + AttrSource, + GetItemSource, + GlobalSource, + is_constant_source, + TypeSource, +) from ..utils import ( check_constant_args, check_numpy_ndarray_args, @@ -1879,6 +1885,11 @@ def call_type(self, tx: "InstructionTranslator", obj: VariableTracker): ) from None source = obj.source and TypeSource(obj.source) + if py_type is torch.Tensor: + # In some cases torch isn't available in globals + name = tx.output.install_global_by_id("", torch) + source = AttrSource(GlobalSource(name), "Tensor") + return VariableTracker.build(tx, py_type, source) def call_reversed(self, tx: "InstructionTranslator", obj: VariableTracker): From 5f266b5a023b9a16ecda7b5c5f2e00580d4c65e9 Mon Sep 17 00:00:00 2001 From: Jack Taylor <108682042+jataylo@users.noreply.github.com> Date: Wed, 6 Nov 2024 12:49:44 +0000 Subject: [PATCH 131/503] [ROCm] re-enable flex attention UTs (#139632) https://github.com/pytorch/pytorch/pull/136792 accidentally disabled flex attention UTs on ROCm. Re-enabling. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139632 Approved by: https://github.com/drisspg --- test/test_nestedtensor.py | 2 ++ torch/testing/_internal/common_device_type.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_nestedtensor.py b/test/test_nestedtensor.py index ba2af0927c8e12..cf2b01c495afd2 100644 --- a/test/test_nestedtensor.py +++ b/test/test_nestedtensor.py @@ -58,6 +58,7 @@ NestedTensorTestCase, parametrize, run_tests, + skipIfRocm, skipIfSlowGradcheckEnv, skipIfTorchDynamo, subtest, @@ -7040,6 +7041,7 @@ def _rand_qkv(self, device, dtype, noncontig_with_holes=False): # non-contiguous with holes not supported yet @decorateIf(unittest.skip, lambda params: params["noncontig_with_holes"]) @parametrize("noncontig_with_holes", [False, True]) + @skipIfRocm def test_flex_attention(self, device, dtype, noncontig_with_holes): query, key, value = self._rand_qkv(device, dtype, noncontig_with_holes) diff --git a/torch/testing/_internal/common_device_type.py b/torch/testing/_internal/common_device_type.py index 57d9b5bb4e39b4..8ad0f9e0f9f725 100644 --- a/torch/testing/_internal/common_device_type.py +++ b/torch/testing/_internal/common_device_type.py @@ -1954,7 +1954,6 @@ def get_all_device_types() -> List[str]: flex_attention_supported_platform = unittest.skipUnless( torch.cuda.is_available() - and torch.version.hip is None and torch.utils._triton.has_triton() and torch.cuda.get_device_capability() >= (8, 0), "Requires CUDA and Triton", From 53299b8a38b1354f41505d0c1ecb7742f77ed308 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Wed, 6 Nov 2024 13:23:43 +0000 Subject: [PATCH 132/503] Revert "Don't use deprecated type properties in UpsampleKernel (#139399)" This reverts commit 0058f7100222523fa8b9f74af9ea7d341a6458b4. Reverted https://github.com/pytorch/pytorch/pull/139399 on behalf of https://github.com/malfet due to And it was backed out again due to the internal usages of deprecated API ([comment](https://github.com/pytorch/pytorch/pull/139358#issuecomment-2459740090)) --- aten/src/ATen/native/cpu/UpSampleKernel.cpp | 18 +++++++++--------- torch/csrc/jit/mobile/flatbuffer_loader.cpp | 4 ++-- .../csrc/jit/runtime/register_special_ops.cpp | 2 +- torch/csrc/jit/serialization/unpickler.cpp | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/aten/src/ATen/native/cpu/UpSampleKernel.cpp b/aten/src/ATen/native/cpu/UpSampleKernel.cpp index 74fb38779ea156..3cc02b5077665e 100644 --- a/aten/src/ATen/native/cpu/UpSampleKernel.cpp +++ b/aten/src/ATen/native/cpu/UpSampleKernel.cpp @@ -735,8 +735,8 @@ struct HelperInterpBase { for ([[maybe_unused]] const auto j : c10::irange(interp_size)) { output.emplace_back( - empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); - output.emplace_back(empty(new_shape, at::device(kCPU).dtype(output_type))); + empty(new_shape, CPU(c10::CppTypeToScalarType()))); + output.emplace_back(empty(new_shape, CPU(output_type))); } } @@ -878,16 +878,16 @@ struct HelperInterpBase { // Bounds approach as in PIL: xmin/xmax output.emplace_back( - empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); + empty(new_shape, CPU(c10::CppTypeToScalarType()))); output.emplace_back( - empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); + empty(new_shape, CPU(c10::CppTypeToScalarType()))); output.emplace_back( - empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); + empty(new_shape, CPU(c10::CppTypeToScalarType()))); { // Weights new_shape[reshape_dim] = output_size * max_interp_size; - auto wts = empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType())); + auto wts = empty(new_shape, CPU(c10::CppTypeToScalarType())); auto strides = wts.strides().vec(); strides[reshape_dim] = 0; new_shape[reshape_dim] = output_size; @@ -895,7 +895,7 @@ struct HelperInterpBase { output.emplace_back(wts); // Weights indices output.emplace_back( - empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); + empty(new_shape, CPU(c10::CppTypeToScalarType()))); } int64_t* idx_ptr_xmin = output[0].data_ptr(); @@ -1050,9 +1050,9 @@ struct HelperInterpNearest : public HelperInterpBase { for ([[maybe_unused]] const auto j : c10::irange(interp_size)) { output.emplace_back( - empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); + empty(new_shape, CPU(c10::CppTypeToScalarType()))); // Defines weights for consistency, but not used - output.emplace_back(at::ones(new_shape, at::device(kCPU).dtype(output_type))); + output.emplace_back(at::ones(new_shape, CPU(output_type))); } } diff --git a/torch/csrc/jit/mobile/flatbuffer_loader.cpp b/torch/csrc/jit/mobile/flatbuffer_loader.cpp index f56b5818ecaccf..246bee03d6fdd6 100644 --- a/torch/csrc/jit/mobile/flatbuffer_loader.cpp +++ b/torch/csrc/jit/mobile/flatbuffer_loader.cpp @@ -469,8 +469,8 @@ IValue parseBasic( at::Tensor parseTensorFromMetadata( FlatbufferLoader* loader, const mobile::serialization::TensorMetadata* tensor_md) { - auto type = static_cast(tensor_md->scalar_type()); - auto options = at::device(at::kCPU).dtype(type); + at::ScalarType type = static_cast(tensor_md->scalar_type()); + auto options = at::CPU(type).options(); at::Tensor tensor; if (tensor_md->quantized_schema() != nullptr) { // is quantized diff --git a/torch/csrc/jit/runtime/register_special_ops.cpp b/torch/csrc/jit/runtime/register_special_ops.cpp index 0f2447e05a9f8d..783aaf87ef7d7d 100644 --- a/torch/csrc/jit/runtime/register_special_ops.cpp +++ b/torch/csrc/jit/runtime/register_special_ops.cpp @@ -293,7 +293,7 @@ RegisterOperators reg({ DEFINE_TORCH_TENSOR_OP( bool, bool, - at::empty({}, at::device(at::kCPU).dtype(at::kBool)).fill_(scalar_val)) + at::empty({}, at::CPU(at::kBool).options()).fill_(scalar_val)) DEFINE_TORCH_TENSOR_OP( float, double, diff --git a/torch/csrc/jit/serialization/unpickler.cpp b/torch/csrc/jit/serialization/unpickler.cpp index 5a81a25c358e20..fc95f7fe9a4a65 100644 --- a/torch/csrc/jit/serialization/unpickler.cpp +++ b/torch/csrc/jit/serialization/unpickler.cpp @@ -586,7 +586,7 @@ PickleOpCode Unpickler::readInstruction() { storage = storage_context_->getStorage(key); } else { int64_t numel = args.at(4).toInt(); - auto dtype = scalarTypeToTypeMeta(type); + caffe2::TypeMeta dtype = at::CPU(type).typeMeta(); at::DataPtr storage_ptr; if (numel > 0) { @@ -608,7 +608,7 @@ PickleOpCode Unpickler::readInstruction() { } } - auto options = at::device(at::kCPU).dtype(type); + auto options = at::CPU(type).options(); if (use_storage_device_) { options = options.device(storage.device()); device = storage.device(); From 06ad4044019a10bc8561b4f699d9bf77254bc04f Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Wed, 6 Nov 2024 13:23:43 +0000 Subject: [PATCH 133/503] Revert "[BE] And delete `DeprecatedTypProperties` cast (#139358)" This reverts commit b82a51bc6b1170da3db8f67816799f3a47530ff8. Reverted https://github.com/pytorch/pytorch/pull/139358 on behalf of https://github.com/malfet due to And it was backed out again due to the internal usages of deprecated API ([comment](https://github.com/pytorch/pytorch/pull/139358#issuecomment-2459740090)) --- aten/src/ATen/Dispatch.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/aten/src/ATen/Dispatch.h b/aten/src/ATen/Dispatch.h index 30114e42d3de78..c70d68fb93e77d 100644 --- a/aten/src/ATen/Dispatch.h +++ b/aten/src/ATen/Dispatch.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include @@ -102,6 +102,13 @@ inline at::ScalarType scalar_type(at::ScalarType s) { return s; } +C10_DEPRECATED_MESSAGE( + "passing at::DeprecatedTypeProperties to an AT_DISPATCH macro is deprecated, " + "pass an at::ScalarType instead") +inline at::ScalarType scalar_type(const at::DeprecatedTypeProperties& t) { + return t.scalarType(); +} + } // namespace detail // The AT_DISPATCH_* family of macros provides the ability to From 10d77293335b01ab97ef3271a3a0ea38dbccf843 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Wed, 6 Nov 2024 13:27:41 +0000 Subject: [PATCH 134/503] Revert "Enable cppcoreguidelines-special-member-functions (#139132)" This reverts commit a9b4989c726a29b4b89c64282e32b9e4fc0b7d68. Reverted https://github.com/pytorch/pytorch/pull/139132 on behalf of https://github.com/ZainRizvi due to Sorry but this fails on trunk. See inductor/test_mkldnn_pattern_matcher.py::TestPatternMatcher::test_smooth_quant_with_int_mm [GH job link](https://github.com/pytorch/pytorch/actions/runs/11699366379/job/32591132460) [HUD commit link](https://hud.pytorch.org/pytorch/pytorch/commit/22e89ea2aaa3e0ef0ec4504bd2dbf230447a6d2a) ([comment](https://github.com/pytorch/pytorch/pull/139132#issuecomment-2459743145)) --- .clang-tidy | 3 +-- aten/src/ATen/Context.h | 8 -------- aten/src/ATen/DynamicLibrary.h | 2 -- aten/src/ATen/SparseCsrTensorUtils.h | 6 ------ aten/src/ATen/TensorIterator.h | 1 - aten/src/ATen/ThreadLocalState.h | 4 ---- aten/src/ATen/core/Dict.h | 1 - aten/src/ATen/core/PythonFallbackKernel.h | 8 -------- aten/src/ATen/core/QuantizerBase.h | 2 +- aten/src/ATen/core/Tensor.h | 6 ------ aten/src/ATen/core/Vitals.h | 3 --- aten/src/ATen/core/dynamic_type.h | 5 ----- aten/src/ATen/core/jit_type.h | 2 +- aten/src/ATen/core/jit_type_base.h | 1 - aten/src/ATen/core/rref_interface.h | 1 - aten/src/ATen/core/type.cpp | 2 ++ aten/src/ATen/functorch/DynamicLayer.cpp | 6 ------ aten/src/ATen/quantized/Quantizer.cpp | 2 ++ aten/src/ATen/record_function.h | 6 ------ c10/core/impl/PythonDispatcherTLS.h | 4 ---- c10/test/util/Metaprogramming_test.cpp | 4 ++-- c10/test/util/ThreadLocal_test.cpp | 2 -- c10/test/util/intrusive_ptr_test.cpp | 1 - c10/test/util/logging_test.cpp | 1 - c10/test/util/typeid_test.cpp | 2 -- c10/util/DynamicCounter.cpp | 5 ----- c10/util/Exception.h | 4 ---- c10/util/LeftRight.h | 3 --- c10/util/order_preserving_flat_hash_map.h | 1 - torch/csrc/distributed/rpc/types.h | 4 ---- torch/csrc/dynamo/python_compiled_autograd.cpp | 4 ---- .../inductor/aoti_runner/model_container_runner_cuda.h | 1 - torch/csrc/lazy/core/ir.cpp | 2 ++ torch/csrc/lazy/core/ir.h | 2 +- torch/csrc/lazy/core/ir_metadata.h | 4 ---- torch/csrc/lazy/core/metrics.h | 4 ---- torch/csrc/lazy/core/tensor.h | 8 -------- torch/csrc/lazy/core/thread_pool.cpp | 4 ---- torch/csrc/lazy/core/thread_pool.h | 1 - torch/csrc/monitor/counters.h | 4 ---- torch/csrc/profiler/collection.cpp | 4 ---- torch/csrc/profiler/orchestration/observer.h | 4 ---- 42 files changed, 12 insertions(+), 130 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 5776dabe00728a..3b03412a405095 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -29,6 +29,7 @@ cppcoreguidelines-*, -cppcoreguidelines-pro-type-static-cast-downcast, -cppcoreguidelines-pro-type-union-access, -cppcoreguidelines-pro-type-vararg, +-cppcoreguidelines-special-member-functions, -cppcoreguidelines-non-private-member-variables-in-classes, -facebook-hte-RelativeInclude, hicpp-exception-baseclass, @@ -63,7 +64,5 @@ readability-string-compare, HeaderFilterRegex: '^(aten/|c10/|torch/).*$' WarningsAsErrors: '*' CheckOptions: - cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor: true - cppcoreguidelines-special-member-functions.AllowImplicitlyDeletedCopyOrMove: true misc-header-include-cycle.IgnoredFilesList: 'format.h;ivalue.h;custom_class.h;Dict.h;List.h;IListRef.h' ... diff --git a/aten/src/ATen/Context.h b/aten/src/ATen/Context.h index e37fa9ea516c19..40e843cbfa3380 100644 --- a/aten/src/ATen/Context.h +++ b/aten/src/ATen/Context.h @@ -604,10 +604,6 @@ inline void manual_seed(uint64_t seed) { // NoTF32Guard disable_tf32; struct TORCH_API NoTF32Guard { NoTF32Guard(); - NoTF32Guard(NoTF32Guard&& other) = delete; - NoTF32Guard(const NoTF32Guard&) = delete; - NoTF32Guard& operator=(const NoTF32Guard&) = delete; - NoTF32Guard& operator=(NoTF32Guard&&) = delete; ~NoTF32Guard(); static bool should_disable_tf32(); @@ -617,10 +613,6 @@ struct TORCH_API NoTF32Guard { struct TORCH_API ROCmBackwardPassGuard { ROCmBackwardPassGuard(); - ROCmBackwardPassGuard(ROCmBackwardPassGuard&& other) = delete; - ROCmBackwardPassGuard(const ROCmBackwardPassGuard&) = delete; - ROCmBackwardPassGuard& operator=(const ROCmBackwardPassGuard&) = delete; - ROCmBackwardPassGuard& operator=(ROCmBackwardPassGuard&&) = delete; ~ROCmBackwardPassGuard(); static bool is_backward_pass(); }; diff --git a/aten/src/ATen/DynamicLibrary.h b/aten/src/ATen/DynamicLibrary.h index 061456c081e611..523a21985f225e 100644 --- a/aten/src/ATen/DynamicLibrary.h +++ b/aten/src/ATen/DynamicLibrary.h @@ -16,8 +16,6 @@ namespace at { struct DynamicLibrary { AT_DISALLOW_COPY_AND_ASSIGN(DynamicLibrary); - DynamicLibrary(DynamicLibrary&& other) = delete; - DynamicLibrary& operator=(DynamicLibrary&&) = delete; TORCH_API DynamicLibrary( const char* name, diff --git a/aten/src/ATen/SparseCsrTensorUtils.h b/aten/src/ATen/SparseCsrTensorUtils.h index 3c6877083aeebf..2ec973013c4941 100644 --- a/aten/src/ATen/SparseCsrTensorUtils.h +++ b/aten/src/ATen/SparseCsrTensorUtils.h @@ -155,12 +155,6 @@ class CheckSparseTensorInvariants { : old_state(at::globalContext().checkSparseTensorInvariants()) { at::globalContext().setCheckSparseTensorInvariants(state); } - CheckSparseTensorInvariants(CheckSparseTensorInvariants&& other) = delete; - CheckSparseTensorInvariants(const CheckSparseTensorInvariants&) = delete; - CheckSparseTensorInvariants& operator=(const CheckSparseTensorInvariants&) = - delete; - CheckSparseTensorInvariants& operator=(CheckSparseTensorInvariants&&) = - delete; ~CheckSparseTensorInvariants() { at::globalContext().setCheckSparseTensorInvariants(old_state); diff --git a/aten/src/ATen/TensorIterator.h b/aten/src/ATen/TensorIterator.h index 7bbd68b91ba837..471faf664e271a 100644 --- a/aten/src/ATen/TensorIterator.h +++ b/aten/src/ATen/TensorIterator.h @@ -995,7 +995,6 @@ class TORCH_API TensorIteratorConfig final { /// TensorIterator that can use 32-bit indexing. Taken together the splits cover /// the original TensorIterator. struct TORCH_API SplitUntil32Bit { - // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct TORCH_API iterator { iterator() = default; iterator(const TensorIteratorBase& iter); diff --git a/aten/src/ATen/ThreadLocalState.h b/aten/src/ATen/ThreadLocalState.h index bb28175c5f42e0..2469cb1c3c47e1 100644 --- a/aten/src/ATen/ThreadLocalState.h +++ b/aten/src/ATen/ThreadLocalState.h @@ -96,10 +96,6 @@ class TORCH_API ThreadLocalStateGuard { // set the given state across the thread boundary ThreadLocalState::setThreadLocalState(state); } - ThreadLocalStateGuard(ThreadLocalStateGuard&& other) = delete; - ThreadLocalStateGuard(const ThreadLocalStateGuard&) = delete; - ThreadLocalStateGuard& operator=(const ThreadLocalStateGuard&) = delete; - ThreadLocalStateGuard& operator=(ThreadLocalStateGuard&&) = delete; ~ThreadLocalStateGuard() { // restore previously set variables diff --git a/aten/src/ATen/core/Dict.h b/aten/src/ATen/core/Dict.h index d187d7b7c11699..a1d4da07520fa3 100644 --- a/aten/src/ATen/core/Dict.h +++ b/aten/src/ATen/core/Dict.h @@ -206,7 +206,6 @@ template Dict toGenericDict(Dict -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class Dict final { private: static_assert((std::is_same_v && std::is_same_v) || guts::typelist::contains::value, "Invalid Key type for Dict. We only support int64_t, double, bool, and string."); diff --git a/aten/src/ATen/core/PythonFallbackKernel.h b/aten/src/ATen/core/PythonFallbackKernel.h index 1d2b613166d3f3..67f24795eeb58d 100644 --- a/aten/src/ATen/core/PythonFallbackKernel.h +++ b/aten/src/ATen/core/PythonFallbackKernel.h @@ -6,10 +6,6 @@ namespace at::impl { struct TORCH_API RestorePythonTLSSnapshot { RestorePythonTLSSnapshot(); - RestorePythonTLSSnapshot(RestorePythonTLSSnapshot&& other) = delete; - RestorePythonTLSSnapshot(const RestorePythonTLSSnapshot&) = delete; - RestorePythonTLSSnapshot& operator=(const RestorePythonTLSSnapshot&) = delete; - RestorePythonTLSSnapshot& operator=(RestorePythonTLSSnapshot&&) = delete; ~RestorePythonTLSSnapshot(); private: @@ -22,10 +18,6 @@ struct TORCH_API RestorePythonTLSSnapshot { struct TORCH_API MaybeSetTLSOnEntryGuard { public: MaybeSetTLSOnEntryGuard(); - MaybeSetTLSOnEntryGuard(MaybeSetTLSOnEntryGuard&& other) = delete; - MaybeSetTLSOnEntryGuard(const MaybeSetTLSOnEntryGuard&) = delete; - MaybeSetTLSOnEntryGuard& operator=(const MaybeSetTLSOnEntryGuard&) = delete; - MaybeSetTLSOnEntryGuard& operator=(MaybeSetTLSOnEntryGuard&&) = delete; ~MaybeSetTLSOnEntryGuard(); private: diff --git a/aten/src/ATen/core/QuantizerBase.h b/aten/src/ATen/core/QuantizerBase.h index a56ead7a30c696..0d2eaeece88980 100644 --- a/aten/src/ATen/core/QuantizerBase.h +++ b/aten/src/ATen/core/QuantizerBase.h @@ -40,7 +40,7 @@ struct TORCH_API Quantizer : public c10::intrusive_ptr_target { // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const ScalarType scalar_type_; explicit Quantizer(ScalarType scalar_type) : scalar_type_(scalar_type) {} - ~Quantizer() override = default; + ~Quantizer() override; // Copied from torch/csrc/jit/ir/scope.h QuantizerPtr intrusive_from_this() { diff --git a/aten/src/ATen/core/Tensor.h b/aten/src/ATen/core/Tensor.h index 63b707767d344d..de887a024c22fb 100644 --- a/aten/src/ATen/core/Tensor.h +++ b/aten/src/ATen/core/Tensor.h @@ -4,7 +4,6 @@ #include namespace at { -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class TORCH_API OptionalTensorRef { public: OptionalTensorRef() = default; @@ -21,7 +20,6 @@ class TORCH_API OptionalTensorRef { OptionalTensorRef(const OptionalTensorRef& rhs) : ref_(Tensor::unsafe_borrow_t{}, rhs.ref_) {} - OptionalTensorRef(OptionalTensorRef&& rhs) = default; OptionalTensorRef& operator=(OptionalTensorRef rhs) { std::swap(ref_, rhs.ref_); return *this; @@ -61,10 +59,6 @@ class TORCH_API TensorRef { TensorRef(const TensorBase& src) : ref_(Tensor::unsafe_borrow_t{}, src) {} - TensorRef(TensorRef&& other) = default; - TensorRef(const TensorRef&) = default; - TensorRef& operator=(const TensorRef&) = default; - TensorRef& operator=(TensorRef&&) = default; const Tensor& operator*() const & { return ref_; diff --git a/aten/src/ATen/core/Vitals.h b/aten/src/ATen/core/Vitals.h index 7ec213938d564a..8a7a51e81e1d27 100644 --- a/aten/src/ATen/core/Vitals.h +++ b/aten/src/ATen/core/Vitals.h @@ -39,8 +39,6 @@ struct TORCH_API TorchVital { explicit TorchVital(std::string n) : name(std::move(n)) {} TorchVital(const TorchVital&) = default; TorchVital(TorchVital&&) = default; - TorchVital& operator=(const TorchVital&) = default; - TorchVital& operator=(TorchVital&&) = default; TorchVital() = delete; TorchVitalAttr& create(const std::string& attr); @@ -73,7 +71,6 @@ class TORCH_API APIVitals { APIVitals(APIVitals&& other) = delete; APIVitals& operator=(const APIVitals&) = delete; APIVitals& operator=(APIVitals&&) = delete; - ~APIVitals() = default; private: std::unordered_map name_map_; diff --git a/aten/src/ATen/core/dynamic_type.h b/aten/src/ATen/core/dynamic_type.h index 697fcec39e34cd..4ad833a295b12c 100644 --- a/aten/src/ATen/core/dynamic_type.h +++ b/aten/src/ATen/core/dynamic_type.h @@ -159,11 +159,6 @@ class DynamicType : public SharedType { explicit DynamicType(Tag, Arguments); explicit DynamicType(Tag, std::string_view, Arguments); - DynamicType(DynamicType&& other) = delete; - DynamicType(const DynamicType&) = delete; - DynamicType& operator=(const DynamicType&) = delete; - DynamicType& operator=(DynamicType&&) = delete; - TypePtr containedType(size_t) const override; size_t containedTypeSize() const override; Tag tag() const { diff --git a/aten/src/ATen/core/jit_type.h b/aten/src/ATen/core/jit_type.h index 58d50de74faed3..5951b4763be3a0 100644 --- a/aten/src/ATen/core/jit_type.h +++ b/aten/src/ATen/core/jit_type.h @@ -2204,7 +2204,7 @@ struct TORCH_API InterfaceType : public NamedType { return is_module_; } static const TypeKind Kind = TypeKind::InterfaceType; - ~InterfaceType() override = default; + ~InterfaceType() override; private: InterfaceType(QualifiedName name, bool is_module); static bool isSubTypeImpl( diff --git a/aten/src/ATen/core/jit_type_base.h b/aten/src/ATen/core/jit_type_base.h index de440787ee686f..8904cd7bc431e3 100644 --- a/aten/src/ATen/core/jit_type_base.h +++ b/aten/src/ATen/core/jit_type_base.h @@ -227,7 +227,6 @@ struct TORCH_API Type { SingletonOrSharedTypePtr(SingletonOrSharedTypePtr&&) noexcept = default; SingletonOrSharedTypePtr& operator=(const SingletonOrSharedTypePtr&) = default; SingletonOrSharedTypePtr& operator=(SingletonOrSharedTypePtr&&) noexcept = default; - ~SingletonOrSharedTypePtr() = default; T* get() const { return repr_.isSharedAndNonNull() ? repr_.shared_.repr_.get() : static_cast(repr_.rawRepr().first); diff --git a/aten/src/ATen/core/rref_interface.h b/aten/src/ATen/core/rref_interface.h index 70273f168d9361..f0749d368792f0 100644 --- a/aten/src/ATen/core/rref_interface.h +++ b/aten/src/ATen/core/rref_interface.h @@ -17,7 +17,6 @@ class C10_EXPORT RRefInterface : public c10::intrusive_ptr_target { // counting. RRefInterface(const RRefInterface& other) = delete; RRefInterface(RRefInterface&& other) = delete; - RRefInterface& operator=(const RRefInterface& other) = delete; RRefInterface& operator=(RRefInterface&& other) = delete; ~RRefInterface() override = default; diff --git a/aten/src/ATen/core/type.cpp b/aten/src/ATen/core/type.cpp index 164ea6d44f5846..92c30e6ec8437c 100644 --- a/aten/src/ATen/core/type.cpp +++ b/aten/src/ATen/core/type.cpp @@ -1037,6 +1037,8 @@ InterfaceType::InterfaceType(QualifiedName name, bool is_module) methods_(std::make_shared>()), is_module_(is_module) {} +InterfaceType::~InterfaceType() = default; + bool containsAnyType(const TypePtr& type) { std::vector to_scan = { type }; while (!to_scan.empty()) { diff --git a/aten/src/ATen/functorch/DynamicLayer.cpp b/aten/src/ATen/functorch/DynamicLayer.cpp index 9bdf155affc2b2..81b82f2556c19e 100644 --- a/aten/src/ATen/functorch/DynamicLayer.cpp +++ b/aten/src/ATen/functorch/DynamicLayer.cpp @@ -202,8 +202,6 @@ struct SaveLocalDispatchKeySet { } SaveLocalDispatchKeySet(const SaveLocalDispatchKeySet&) = delete; SaveLocalDispatchKeySet& operator=(const SaveLocalDispatchKeySet&) = delete; - SaveLocalDispatchKeySet(SaveLocalDispatchKeySet&&) = delete; - SaveLocalDispatchKeySet& operator=(SaveLocalDispatchKeySet&&) = delete; }; const std::vector& getDynamicLayerStack() { @@ -408,10 +406,6 @@ static void dump_local_tls() { struct WithoutTop { WithoutTop(); - WithoutTop(WithoutTop&& other) = delete; - WithoutTop(const WithoutTop&) = delete; - WithoutTop& operator=(const WithoutTop&) = delete; - WithoutTop& operator=(WithoutTop&&) = delete; ~WithoutTop(); DynamicLayer layer_; }; diff --git a/aten/src/ATen/quantized/Quantizer.cpp b/aten/src/ATen/quantized/Quantizer.cpp index fa48b33ce7c0d0..ef8f8deb4973be 100644 --- a/aten/src/ATen/quantized/Quantizer.cpp +++ b/aten/src/ATen/quantized/Quantizer.cpp @@ -313,6 +313,8 @@ Tensor& PerChannelAffineFloatQParamsQuantizer::dequantize_out( return rtensor; } +Quantizer::~Quantizer() = default; + C10_EXPORT void set_quantizer_(const Tensor& self, ConstQuantizerPtr quantizer) { get_qtensorimpl(self)->set_quantizer_(quantizer); } diff --git a/aten/src/ATen/record_function.h b/aten/src/ATen/record_function.h index 52115b4a65af66..15130c91367522 100644 --- a/aten/src/ATen/record_function.h +++ b/aten/src/ATen/record_function.h @@ -353,8 +353,6 @@ struct TORCH_API RecordFunction { RecordFunction(const RecordFunction&) = delete; RecordFunction& operator=(const RecordFunction&) = delete; - RecordFunction(RecordFunction&&) = delete; - RecordFunction& operator=(RecordFunction&&) = delete; const char* name() const; @@ -766,10 +764,6 @@ class TORCH_API RecordFunctionGuard { enableRecordFunction(is_enabled); } - RecordFunctionGuard(RecordFunctionGuard&& other) = delete; - RecordFunctionGuard(const RecordFunctionGuard&) = delete; - RecordFunctionGuard& operator=(const RecordFunctionGuard&) = delete; - RecordFunctionGuard& operator=(RecordFunctionGuard&&) = delete; virtual ~RecordFunctionGuard() { enableRecordFunction(prev_value_); } diff --git a/c10/core/impl/PythonDispatcherTLS.h b/c10/core/impl/PythonDispatcherTLS.h index 7b91aab686eca1..12c0677f36fdb5 100644 --- a/c10/core/impl/PythonDispatcherTLS.h +++ b/c10/core/impl/PythonDispatcherTLS.h @@ -16,10 +16,6 @@ struct C10_API DisablePythonDispatcher { PythonDispatcherTLS::set_state({}); } - DisablePythonDispatcher(DisablePythonDispatcher&& other) = delete; - DisablePythonDispatcher(const DisablePythonDispatcher&) = delete; - DisablePythonDispatcher& operator=(const DisablePythonDispatcher&) = delete; - DisablePythonDispatcher& operator=(DisablePythonDispatcher&&) = delete; ~DisablePythonDispatcher() { PythonDispatcherTLS::set_state(old_); } diff --git a/c10/test/util/Metaprogramming_test.cpp b/c10/test/util/Metaprogramming_test.cpp index a7bca7a5b511f9..ad301462bd5147 100644 --- a/c10/test/util/Metaprogramming_test.cpp +++ b/c10/test/util/Metaprogramming_test.cpp @@ -5,7 +5,7 @@ using namespace c10::guts; -// NOLINTBEGIN(modernize*, cppcoreguidelines-special-member-functions) +// NOLINTBEGIN(modernize*) namespace { namespace test_function_traits { @@ -302,4 +302,4 @@ TEST(MetaprogrammingTest, TupleMap_canBeUsedWithAutoLambdas) { } // namespace test_tuple_map } // namespace -// NOLINTEND(modernize*, cppcoreguidelines-special-member-functions) +// NOLINTEND(modernize*) diff --git a/c10/test/util/ThreadLocal_test.cpp b/c10/test/util/ThreadLocal_test.cpp index 29e748e14890e7..bbc10c0c52e443 100644 --- a/c10/test/util/ThreadLocal_test.cpp +++ b/c10/test/util/ThreadLocal_test.cpp @@ -148,7 +148,6 @@ TEST(ThreadLocalTest, TestThreadWithGlobalScopeVar) { TEST(ThreadLocalTest, TestObjectsAreReleased) { static std::atomic ctors{0}; static std::atomic dtors{0}; - // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct A { A() { ++ctors; @@ -184,7 +183,6 @@ TEST(ThreadLocalTest, TestObjectsAreReleased) { TEST(ThreadLocalTest, TestObjectsAreReleasedByNonstaticThreadLocal) { static std::atomic ctors(0); static std::atomic dtors(0); - // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct A { A() { ++ctors; diff --git a/c10/test/util/intrusive_ptr_test.cpp b/c10/test/util/intrusive_ptr_test.cpp index 47e7942950ef79..14c12f422f2cd8 100644 --- a/c10/test/util/intrusive_ptr_test.cpp +++ b/c10/test/util/intrusive_ptr_test.cpp @@ -45,7 +45,6 @@ struct SomeChildClass : SomeBaseClass { SomeChildClass(int v) : SomeBaseClass(v) {} }; -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class DestructableMock : public intrusive_ptr_target { public: DestructableMock(bool* resourcesReleased, bool* wasDestructed) diff --git a/c10/test/util/logging_test.cpp b/c10/test/util/logging_test.cpp index c06dfb43d46cb4..5798b37c18e385 100644 --- a/c10/test/util/logging_test.cpp +++ b/c10/test/util/logging_test.cpp @@ -81,7 +81,6 @@ TEST( } namespace { -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct Noncopyable { int x; diff --git a/c10/test/util/typeid_test.cpp b/c10/test/util/typeid_test.cpp index 8e78ec84e530aa..7587d5eefdbd53 100644 --- a/c10/test/util/typeid_test.cpp +++ b/c10/test/util/typeid_test.cpp @@ -70,7 +70,6 @@ TEST(TypeMetaTest, TypeMeta) { EXPECT_NE(bar_meta.name().find("TypeMetaTestBar"), c10::string_view::npos); } -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class ClassAllowAssignment { public: ClassAllowAssignment() = default; @@ -79,7 +78,6 @@ class ClassAllowAssignment { int x{42}; }; -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class ClassNoAssignment { public: ClassNoAssignment() = default; diff --git a/c10/util/DynamicCounter.cpp b/c10/util/DynamicCounter.cpp index cd9decfc41f3af..0b7906af1b1204 100644 --- a/c10/util/DynamicCounter.cpp +++ b/c10/util/DynamicCounter.cpp @@ -52,11 +52,6 @@ struct DynamicCounter::Guard { } } - Guard(Guard&& other) = delete; - Guard(const Guard&) = delete; - Guard& operator=(const Guard&) = delete; - Guard& operator=(Guard&&) = delete; - ~Guard() { for (const auto& backend : backends_) { backend->unregisterCounter(key_); diff --git a/c10/util/Exception.h b/c10/util/Exception.h index e83b65cc5efc0f..275526cf400823 100644 --- a/c10/util/Exception.h +++ b/c10/util/Exception.h @@ -205,10 +205,6 @@ class C10_API WarningHandlerGuard { : prev_handler_(c10::WarningUtils::get_warning_handler()) { c10::WarningUtils::set_warning_handler(new_handler); } - WarningHandlerGuard(WarningHandlerGuard&& other) = delete; - WarningHandlerGuard(const WarningHandlerGuard&) = delete; - WarningHandlerGuard& operator=(const WarningHandlerGuard&) = delete; - WarningHandlerGuard& operator=(WarningHandlerGuard&&) = delete; ~WarningHandlerGuard() { c10::WarningUtils::set_warning_handler(prev_handler_); } diff --git a/c10/util/LeftRight.h b/c10/util/LeftRight.h index 0ad9a1b346103e..58145b2c779cc3 100644 --- a/c10/util/LeftRight.h +++ b/c10/util/LeftRight.h @@ -18,8 +18,6 @@ struct IncrementRAII final { ~IncrementRAII() { _counter->fetch_sub(1); } - IncrementRAII(IncrementRAII&&) = delete; - IncrementRAII& operator=(IncrementRAII&&) = delete; private: std::atomic* _counter; @@ -203,7 +201,6 @@ class RWSafeLeftRightWrapper final { RWSafeLeftRightWrapper(RWSafeLeftRightWrapper&&) noexcept = delete; RWSafeLeftRightWrapper& operator=(const RWSafeLeftRightWrapper&) = delete; RWSafeLeftRightWrapper& operator=(RWSafeLeftRightWrapper&&) noexcept = delete; - ~RWSafeLeftRightWrapper() = default; template // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) diff --git a/c10/util/order_preserving_flat_hash_map.h b/c10/util/order_preserving_flat_hash_map.h index fd8196432c994c..021995600344ad 100644 --- a/c10/util/order_preserving_flat_hash_map.h +++ b/c10/util/order_preserving_flat_hash_map.h @@ -139,7 +139,6 @@ struct KeyOrValueEquality : functor_storage { }; static constexpr int8_t min_lookups = 4; template -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct sherwood_v3_entry { // NOLINTNEXTLINE(modernize-use-equals-default) sherwood_v3_entry() {} diff --git a/torch/csrc/distributed/rpc/types.h b/torch/csrc/distributed/rpc/types.h index 82cf528bb9bd6b..7844ce270cd471 100644 --- a/torch/csrc/distributed/rpc/types.h +++ b/torch/csrc/distributed/rpc/types.h @@ -13,10 +13,6 @@ TORCH_API void disableJitRRefPickle(); struct TORCH_API JitRRefPickleGuard { JitRRefPickleGuard(); - JitRRefPickleGuard(JitRRefPickleGuard&& other) = delete; - JitRRefPickleGuard(const JitRRefPickleGuard&) = delete; - JitRRefPickleGuard& operator=(const JitRRefPickleGuard&) = delete; - JitRRefPickleGuard& operator=(JitRRefPickleGuard&&) = delete; ~JitRRefPickleGuard(); }; diff --git a/torch/csrc/dynamo/python_compiled_autograd.cpp b/torch/csrc/dynamo/python_compiled_autograd.cpp index 8bd192cde7b1b0..33dac77d74340a 100644 --- a/torch/csrc/dynamo/python_compiled_autograd.cpp +++ b/torch/csrc/dynamo/python_compiled_autograd.cpp @@ -543,10 +543,6 @@ static PyObject* call_end_capture(PyObject* self, const variable_list& inputs) { struct ClosingTHPObjectPtr : public THPObjectPtr { ClosingTHPObjectPtr(PyObject* o) : THPObjectPtr(o) {} - ClosingTHPObjectPtr(ClosingTHPObjectPtr&& other) = default; - ClosingTHPObjectPtr(const ClosingTHPObjectPtr&) = delete; - ClosingTHPObjectPtr& operator=(const ClosingTHPObjectPtr&) = delete; - ClosingTHPObjectPtr& operator=(ClosingTHPObjectPtr&&) = default; ~ClosingTHPObjectPtr() { if (PyErr_Occurred()) { // do nothing, do not attempt to close diff --git a/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h b/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h index 2018cf7573e7a9..5db82bf413668a 100644 --- a/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h +++ b/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h @@ -8,7 +8,6 @@ namespace torch::inductor { // NOTICE: Following APIs are subject to change due to active development // We provide NO BC guarantee for these APIs -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class TORCH_API AOTIModelContainerRunnerCuda : public AOTIModelContainerRunner { public: // @param device_str: cuda device string, e.g. "cuda", "cuda:0" diff --git a/torch/csrc/lazy/core/ir.cpp b/torch/csrc/lazy/core/ir.cpp index 033e4f5cf00b95..fe9cfba2556c06 100644 --- a/torch/csrc/lazy/core/ir.cpp +++ b/torch/csrc/lazy/core/ir.cpp @@ -98,6 +98,8 @@ Node::Node(OpKind op, Shape shape, size_t num_outputs) : Node(op, num_outputs) { shapes_.push_back(std::move(shape)); } +Node::~Node() = default; + // Retrieves the full shape of the IR Node. c10::ArrayRef Node::shapes() const { return shapes_; diff --git a/torch/csrc/lazy/core/ir.h b/torch/csrc/lazy/core/ir.h index 8e384e7982bd58..dd244b7442b2d3 100644 --- a/torch/csrc/lazy/core/ir.h +++ b/torch/csrc/lazy/core/ir.h @@ -101,7 +101,7 @@ class TORCH_API Node { // Construct node with shape and no operands Node(OpKind op, Shape shape, size_t num_outputs = 1); - virtual ~Node() = default; + virtual ~Node(); const OpKind& op() const { return op_; diff --git a/torch/csrc/lazy/core/ir_metadata.h b/torch/csrc/lazy/core/ir_metadata.h index 7e73a593181997..aeaacb596e0a35 100644 --- a/torch/csrc/lazy/core/ir_metadata.h +++ b/torch/csrc/lazy/core/ir_metadata.h @@ -38,10 +38,6 @@ struct TORCH_API MetaData { struct TORCH_API ScopePusher { explicit ScopePusher(const std::string& name); ~ScopePusher(); - ScopePusher(ScopePusher&& other) = delete; - ScopePusher(const ScopePusher&) = delete; - ScopePusher& operator=(const ScopePusher&) = delete; - ScopePusher& operator=(ScopePusher&&) = delete; static void ResetScopes(); }; diff --git a/torch/csrc/lazy/core/metrics.h b/torch/csrc/lazy/core/metrics.h index 05b525778d9a3e..191baa5eb1768e 100644 --- a/torch/csrc/lazy/core/metrics.h +++ b/torch/csrc/lazy/core/metrics.h @@ -258,10 +258,6 @@ class TORCH_API TimedSection { public: explicit TimedSection(Metric* metric) : metric_(metric), start_(NowNs()) {} - TimedSection(TimedSection&& other) = delete; - TimedSection(const TimedSection&) = delete; - TimedSection& operator=(const TimedSection&) = delete; - TimedSection& operator=(TimedSection&&) = delete; ~TimedSection() { int64_t now = NowNs(); metric_->AddSample(now, static_cast(now - start_)); diff --git a/torch/csrc/lazy/core/tensor.h b/torch/csrc/lazy/core/tensor.h index b739399b6bbdb3..dfa317d3b81f90 100644 --- a/torch/csrc/lazy/core/tensor.h +++ b/torch/csrc/lazy/core/tensor.h @@ -42,18 +42,12 @@ class TORCH_API LazyTensor : public c10::intrusive_ptr_target { Data(BackendDevice device) : device(std::move(device)), unique_id(GetNextTensorId()) {} - Data(Data&& other) = delete; - Data(const Data&) = delete; - Data& operator=(const Data&) = delete; - Data& operator=(Data&&) = delete; virtual ~Data(); BackendDataPtr handle; Value ir_value; std::optional tensor_data; - // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const BackendDevice device; - // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const int64_t unique_id = 0; size_t generation = 1; }; @@ -74,8 +68,6 @@ class TORCH_API LazyTensor : public c10::intrusive_ptr_target { LazyTensor() = delete; LazyTensor(const LazyTensor&) = default; LazyTensor(LazyTensor&&) noexcept = default; - LazyTensor& operator=(const LazyTensor&) = default; - LazyTensor& operator=(LazyTensor&&) noexcept = default; ~LazyTensor() override = default; diff --git a/torch/csrc/lazy/core/thread_pool.cpp b/torch/csrc/lazy/core/thread_pool.cpp index e61827e5b0fdc1..3f87aaa96b5191 100644 --- a/torch/csrc/lazy/core/thread_pool.cpp +++ b/torch/csrc/lazy/core/thread_pool.cpp @@ -26,10 +26,6 @@ class ThreadPool { }); } } - ThreadPool(const ThreadPool&) = delete; - ThreadPool(ThreadPool&&) = delete; - ThreadPool& operator=(const ThreadPool&) = delete; - ThreadPool& operator=(ThreadPool&&) = delete; ~ThreadPool() { { diff --git a/torch/csrc/lazy/core/thread_pool.h b/torch/csrc/lazy/core/thread_pool.h index 2e0ae8f89d8e9a..8caa1e0bbc4537 100644 --- a/torch/csrc/lazy/core/thread_pool.h +++ b/torch/csrc/lazy/core/thread_pool.h @@ -13,7 +13,6 @@ namespace torch::lazy { -// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class TORCH_API Completion { public: class Data; diff --git a/torch/csrc/monitor/counters.h b/torch/csrc/monitor/counters.h index 986dfb7b85ca13..bfb1a463b713f3 100644 --- a/torch/csrc/monitor/counters.h +++ b/torch/csrc/monitor/counters.h @@ -122,10 +122,6 @@ class Stat { maxSamples_(maxSamples) { detail::registerStat(this); } - Stat(const Stat&) = delete; - Stat(Stat&&) = delete; - Stat& operator=(const Stat&) = delete; - Stat& operator=(Stat&&) = delete; virtual ~Stat() { { diff --git a/torch/csrc/profiler/collection.cpp b/torch/csrc/profiler/collection.cpp index c5b0d9539d4fd5..eefe5621a293eb 100644 --- a/torch/csrc/profiler/collection.cpp +++ b/torch/csrc/profiler/collection.cpp @@ -402,10 +402,6 @@ struct StealOrDefault { explicit StealOrDefault(T& container) : container_{container}, it_{container.begin()} {} - StealOrDefault(const StealOrDefault&) = delete; - StealOrDefault(StealOrDefault&&) = delete; - StealOrDefault& operator=(const StealOrDefault&) = delete; - StealOrDefault& operator=(StealOrDefault&&) = delete; ~StealOrDefault() { container_.get().clear(); } diff --git a/torch/csrc/profiler/orchestration/observer.h b/torch/csrc/profiler/orchestration/observer.h index 272e2e4f9d5f93..c3beb4cca4d0c7 100644 --- a/torch/csrc/profiler/orchestration/observer.h +++ b/torch/csrc/profiler/orchestration/observer.h @@ -128,10 +128,6 @@ struct TORCH_API ProfilerConfig { // ---------------------------------------------------------------------------- struct TORCH_API ProfilerStateBase : public c10::MemoryReportingInfoBase { explicit ProfilerStateBase(ProfilerConfig config); - ProfilerStateBase(const ProfilerStateBase&) = delete; - ProfilerStateBase(ProfilerStateBase&&) = delete; - ProfilerStateBase& operator=(const ProfilerStateBase&) = delete; - ProfilerStateBase& operator=(ProfilerStateBase&&) = delete; ~ProfilerStateBase() override; static ProfilerStateBase* get(bool global); From 44e4949bcfda3bb41f3d70f0e3f4b137a8087d89 Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Wed, 6 Nov 2024 13:31:26 +0000 Subject: [PATCH 135/503] Revert "[Inductor][CPU] Fuse SmoothQuant int8 linear pattern (#139595)" This reverts commit 22e89ea2aaa3e0ef0ec4504bd2dbf230447a6d2a. Reverted https://github.com/pytorch/pytorch/pull/139595 on behalf of https://github.com/malfet due to It broke number of tests, see https://hud.pytorch.org/pytorch/pytorch/commit/22e89ea2aaa3e0ef0ec4504bd2dbf230447a6d2a ([comment](https://github.com/pytorch/pytorch/pull/139595#issuecomment-2459754355)) --- .../src/ATen/native/quantized/cpu/qlinear.cpp | 23 +-- test/inductor/test_mkldnn_pattern_matcher.py | 86 -------- torch/_inductor/fx_passes/quantization.py | 184 ------------------ 3 files changed, 9 insertions(+), 284 deletions(-) diff --git a/aten/src/ATen/native/quantized/cpu/qlinear.cpp b/aten/src/ATen/native/quantized/cpu/qlinear.cpp index ed5badfb076e08..1c76f986ee1bd8 100644 --- a/aten/src/ATen/native/quantized/cpu/qlinear.cpp +++ b/aten/src/ATen/native/quantized/cpu/qlinear.cpp @@ -932,8 +932,8 @@ static at::Tensor linear_int8_with_onednn_weight( c10::string_view& unary_post_op_algorithm) { using ideep::tensor; const int64_t dim = input.dim(); - TORCH_CHECK(input.scalar_type() == c10::ScalarType::Byte || input.scalar_type() == c10::ScalarType::Char, - "qlinear with mkldnn tensor: data type of input should be uint8 or int8 (unsigned char or char)."); + TORCH_CHECK(input.scalar_type() == c10::ScalarType::Byte, + "qlinear with mkldnn tensor: data type of input should be uint8 (unsigned char)."); TORCH_CHECK(onednn_weight.scalar_type() == c10::ScalarType::Char, "qlinear with mkldnn tensor: data type of weight should be int8 (char)."); TORCH_CHECK( @@ -1022,8 +1022,7 @@ static at::Tensor linear_int8_with_onednn_weight( empty_tensor; // Create onednn primitive - auto src_dtype = input.scalar_type() == c10::kByte ? ideep::data_type::u8 : ideep::data_type::s8; - auto src_desc = tensor::desc(src_dims, src_dtype, ideep::format_tag::any); + auto src_desc = tensor::desc(src_dims, ideep::data_type::u8, ideep::format_tag::any); auto weights_desc = packed_weight.get_desc(); auto dst_dtype = dst.get_data_type(); auto dst_desc = tensor::desc(dst_dims, dst_dtype, ideep::format_tag::any); @@ -1120,14 +1119,12 @@ namespace at::native { torch::List> post_op_args, c10::string_view post_op_algorithm) { #if AT_MKLDNN_ENABLED() - // act_zero_point.numel() == 0 for symmetric quantization - TORCH_CHECK(act_scale.numel() == 1 && act_zero_point.numel() <= 1, - "onednn int8 linear: act scale/zp size should be 1/<=1"); + TORCH_CHECK(act_scale.numel() == 1 && act_zero_point.numel() == 1, + "onednn int8 linear: act scale/zp size should be 1"); static std::optional other = std::nullopt; static const c10::string_view binary_post_op = "none"; - int64_t act_zp = act_zero_point.numel() == 1 ? act_zero_point.item().toLong() : 0; return linear_int8_with_onednn_weight( - act, act_scale.item().toDouble(), act_zp, + act, act_scale.item().toDouble(), act_zero_point.item().toLong(), onednn_weight, weight_scales, weight_zero_points, bias, output_scale, output_zero_point, output_dtype, other, /*other scale*/1.0, /*other zp*/0, @@ -1158,12 +1155,10 @@ namespace at::native { torch::List> unary_post_op_args, c10::string_view unary_post_op_algorithm) { #if AT_MKLDNN_ENABLED() - // act_zero_point.numel() == 0 for symmetric quantization - TORCH_CHECK(act_scale.numel() == 1 && act_zero_point.numel() <= 1, - "onednn int8 linear: act scale/zp size should be 1/<=1"); - int64_t act_zp = act_zero_point.numel() == 1 ? act_zero_point.item().toLong() : 0; + TORCH_CHECK(act_scale.numel() == 1 && act_zero_point.numel() == 1, + "onednn int8 linear: act scale/zp size should be 1"); return linear_int8_with_onednn_weight( - act, act_scale.item().toDouble(), act_zp, + act, act_scale.item().toDouble(), act_zero_point.item().toLong(), onednn_weight, weight_scales, weight_zero_points, bias, output_scale, output_zero_point, output_dtype, other, other_scale, other_zero_point, diff --git a/test/inductor/test_mkldnn_pattern_matcher.py b/test/inductor/test_mkldnn_pattern_matcher.py index 7d211c1b17e78f..772d083b03b361 100644 --- a/test/inductor/test_mkldnn_pattern_matcher.py +++ b/test/inductor/test_mkldnn_pattern_matcher.py @@ -2824,92 +2824,6 @@ def matcher_check_fn(): rtol=0.07, ) - @skipIfNoDynamoSupport - @skipIfNoONEDNN - def test_smooth_quant_with_int_mm(self): - r""" - This testcase check if we can match the SmoothQuant int8 linear pattern from Torchao. - The pattern is: - (no bias) reshape -> _int_mm -> convert_element_type -> (expand -> mul) -> mul -> reshape - or - (with bias) pattern_no_bias -> add -> reshape -> reshape - """ - M = 16 - in_feature = 64 - out_feature = 128 - q_min, q_max = -32, 31 - - class Mod(torch.nn.Module): - def __init__( - self, dtype: torch.dtype, has_bias: bool, per_channel_quant: bool - ): - super().__init__() - self.dtype = dtype - self.has_bias = has_bias - self.b = torch.randint( - q_min, q_max, [in_feature, out_feature], dtype=torch.int8 - ) - self.per_channel_quant = per_channel_quant - self.b_scale = torch.rand([out_feature]) * 0.01 + 0.01 - self.b_scale = self.b_scale.to(dtype) - self.bias = torch.rand([out_feature], dtype=dtype) if has_bias else None - - def forward(self, a, a_scale_per_tensor, a_scale_per_channel): - out_shape = a.shape[:-1] + (self.b.size(-1),) - a_reshaped = a.reshape(-1, a.size(-1)) - c = torch._int_mm(a_reshaped, self.b) - c = c.to(self.dtype) - c_shape = c.shape - a_scale = ( - a_scale_per_channel - if self.per_channel_quant - else a_scale_per_tensor - ) - a_scale = a_scale.expand(c.shape) - c = c * a_scale - c = c * self.b_scale - if self.has_bias: - c = c.reshape([1, *list(c_shape)]) - c = c + self.bias - c = c.reshape(c_shape) - c = c.reshape(out_shape) - return c - - has_bias_list = [True, False] - dype_list = ( - [torch.float, torch.bfloat16] - if torch.ops.mkldnn._is_mkldnn_bf16_supported() - else [torch.float] - ) - per_channel_list = [True, False] - for has_bias, dtype, per_channel_quant in itertools.product( - has_bias_list, dype_list, per_channel_list - ): - mod = Mod(dtype, has_bias, per_channel_quant).eval() - a = torch.randint(q_min, q_max, [1, M, in_feature], dtype=torch.int8) - a_scale_per_tensor = torch.rand([1], dtype=dtype) * 0.01 + 0.01 - a_scale_per_channel = torch.rand([M, 1], dtype=dtype) * 0.01 + 0.01 - a_scale_per_tensor, a_scale_per_channel = ( - a_scale_per_tensor.to(dtype), - a_scale_per_channel.to(dtype), - ) - - def matcher_check_fn(): - self.assertEqual( - counters["inductor"]["qlinear_weight_prepack_matcher_count"], 1 - ) - self.assertEqual( - counters["inductor"]["qlinear_weight_prepack_matcher_nodes"], - 10 if has_bias else 7, - ) - - self._test_common( - mod, - (a, a_scale_per_tensor, a_scale_per_channel), - matcher_check_fn=matcher_check_fn, - check_autocast=dtype, - ) - @dynamo_config.patch({"dynamic_shapes": True, "assume_static_by_default": False}) class TestDynamicPatternMatcher(TestPatternMatcherBase): diff --git a/torch/_inductor/fx_passes/quantization.py b/torch/_inductor/fx_passes/quantization.py index 5161b7473ac278..0188558ada93a0 100644 --- a/torch/_inductor/fx_passes/quantization.py +++ b/torch/_inductor/fx_passes/quantization.py @@ -2529,187 +2529,6 @@ def _register_qlinear_weight_prepack(): ) -def _register_smooth_quant_int_mm_pattern(): - """ - The pattern is: - (no bias) reshape -> _int_mm -> convert_element_type -> (expand -> mul) -> mul -> reshape - or - (with bias) pattern_no_bias -> add -> reshape -> reshape - """ - pattern_no_bias = CallFunction( - aten.reshape.default, - CallFunction( - aten.mul.Tensor, - CallFunction( - aten.mul.Tensor, - CallFunction( - prims.convert_element_type.default, - CallFunction( - aten._int_mm.default, - CallFunction( - aten.reshape.default, - KeywordArg("a"), - KeywordArg("in_shape"), - ), - KeywordArg("b"), - ), - KeywordArg("dtype"), - ), - CallFunction( - aten.expand.default, - KeywordArg("x_scale"), - Arg(), - ), - ), - KeywordArg("w_scale"), - ), - KeywordArg("out_shape_no_bias"), - ) - pattern_with_bias = CallFunction( - aten.reshape.default, - CallFunction( - aten.reshape.default, - CallFunction( - aten.add.Tensor, - pattern_no_bias, - KeywordArg("bias"), - ), - Arg(), - ), - KeywordArg("out_shape_with_bias"), - ) - - def _validate_pattern(match: Match): - return len(match.nodes) in [7, 10] - - for pattern in [pattern_with_bias, pattern_no_bias]: - - @register_freezing_graph_pattern( - pattern, - extra_check=_validate_pattern, - pass_number=0, - ) - def _int_mm_weight_prepack(match: Match, *args, **kwargs): - bias = kwargs.get("bias", None) - if bias is not None: - if len(bias.meta.get("tensor_meta").shape) != 1: - # we expect bias is a vector - return - x = kwargs["a"] - weight = kwargs["b"] - dtype = kwargs["dtype"] - x_scale = kwargs["x_scale"] - w_scale = kwargs["w_scale"] - x_shape = x.meta.get("tensor_meta").shape - if has_free_symbols(x_shape): - # For dynamic shape case, we can't get activation shape ahead of runtime. - x_shape = None - - out_node = match.output_node() - with match.graph.inserting_before(out_node): - transpose_node = match.graph.call_function( - aten.permute.default, args=(weight, [1, 0]) - ) - contig_node = match.graph.call_function( - aten.contiguous.default, args=(transpose_node,) - ) - packed_weight_inputs = ( - contig_node, - x_shape, - ) - packed_weight_op = torch.ops.onednn.qlinear_prepack - prepack_weight_node = match.graph.call_function( - packed_weight_op, args=packed_weight_inputs - ) - - dummy_zp = match.graph.call_function(aten.empty, args=([0],)) - w_scale = match.graph.call_function( - prims.convert_element_type.default, args=(w_scale, torch.float32) - ) - - x_scale_shape = x_scale.meta.get("tensor_meta").shape - x_scale_is_scalar = False - if not has_free_symbols(x_scale_shape): - prod = 1 - for d in x_scale_shape: - prod *= d - x_scale_is_scalar = prod == 1 - - new_args: Tuple[Any, ...] - if x_scale_is_scalar: - # in this case, we can call onednn.qlinear directly - new_args = ( - x, - x_scale, - dummy_zp, # x_zp - prepack_weight_node, - w_scale, - dummy_zp, # w_zp - bias, - 1.0, # output_scale - 0, # output_zero_point - dtype, # output_dtype - "none", # post op name - [], # post op args - "", # post op algorithm - ) - new_linear_node = match.graph.call_function( - torch.ops.onednn.qlinear_pointwise.tensor, args=new_args - ) - out_node.replace_all_uses_with(new_linear_node) - new_linear_node.meta.update(out_node.meta) - else: - # onednn.qlinear does not support per-channel quantization of x - # so in this case, we have to apply x scale and add bias ourselves after qlinear - x_reshaped = match.graph.call_function( - aten.reshape.default, args=(x, kwargs["in_shape"]) - ) - new_args = ( - x_reshaped, - 1.0, # x_scale - 0, # x_zp - prepack_weight_node, - w_scale, - dummy_zp, # w_zp - None, # bias - 1.0, # output_scale - 0, # output_zero_point - dtype, # output_dtype - "none", # post op name - [], # post op args - "", # post op algorithm - ) - new_linear_node = match.graph.call_function( - torch.ops.onednn.qlinear_pointwise, args=new_args - ) - # apply x scale - new_out_node = match.graph.call_function( - aten.mul.Tensor, args=(new_linear_node, x_scale) - ) - # Add bias and reshape - if bias is not None: - new_out_node = match.graph.call_function( - aten.add.Tensor, args=(new_out_node, bias) - ) - new_out_node = match.graph.call_function( - aten.reshape.default, - args=(new_out_node, kwargs["out_shape_with_bias"]), - ) - else: - new_out_node = match.graph.call_function( - aten.reshape.default, - args=(new_out_node, kwargs["out_shape_no_bias"]), - ) - out_node.replace_all_uses_with(new_out_node) - new_out_node.meta.update(out_node.meta) - for node in reversed(match.nodes): - match.graph.erase_node(node) - counters["inductor"]["qlinear_weight_prepack_matcher_count"] += 1 - counters["inductor"]["qlinear_weight_prepack_matcher_nodes"] += len( - match.nodes - ) - - @functools.lru_cache(None) def _register_quantization_weight_pack_pass(): # Step 1: Dequant promotion for int8-mixed-fp32/bf16 @@ -2721,9 +2540,6 @@ def _register_quantization_weight_pack_pass(): # Step 3: QLinear weight prepack _register_qlinear_weight_prepack() - # Step 4: weight prepack for SmoothQuant from Torchao - _register_smooth_quant_int_mm_pattern() - def quant_lift_up(graph_module: torch.fx.GraphModule): """ From c0c6bf4ef2fd8f1c76cd9b1830af00db37570441 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Wed, 6 Nov 2024 05:31:58 -0800 Subject: [PATCH 136/503] Don't use deprecated type properties in UpsampleKernel (#139399) By replacing `at::CPU(dtype)` pattern with `at::device(kCPU).dtype(dtype)` pattern Pull Request resolved: https://github.com/pytorch/pytorch/pull/139399 Approved by: https://github.com/Skylion007 ghstack dependencies: #139353 --- aten/src/ATen/native/cpu/UpSampleKernel.cpp | 18 +++++++++--------- torch/csrc/jit/mobile/flatbuffer_loader.cpp | 4 ++-- .../csrc/jit/runtime/register_special_ops.cpp | 2 +- torch/csrc/jit/serialization/unpickler.cpp | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/aten/src/ATen/native/cpu/UpSampleKernel.cpp b/aten/src/ATen/native/cpu/UpSampleKernel.cpp index 3cc02b5077665e..74fb38779ea156 100644 --- a/aten/src/ATen/native/cpu/UpSampleKernel.cpp +++ b/aten/src/ATen/native/cpu/UpSampleKernel.cpp @@ -735,8 +735,8 @@ struct HelperInterpBase { for ([[maybe_unused]] const auto j : c10::irange(interp_size)) { output.emplace_back( - empty(new_shape, CPU(c10::CppTypeToScalarType()))); - output.emplace_back(empty(new_shape, CPU(output_type))); + empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); + output.emplace_back(empty(new_shape, at::device(kCPU).dtype(output_type))); } } @@ -878,16 +878,16 @@ struct HelperInterpBase { // Bounds approach as in PIL: xmin/xmax output.emplace_back( - empty(new_shape, CPU(c10::CppTypeToScalarType()))); + empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); output.emplace_back( - empty(new_shape, CPU(c10::CppTypeToScalarType()))); + empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); output.emplace_back( - empty(new_shape, CPU(c10::CppTypeToScalarType()))); + empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); { // Weights new_shape[reshape_dim] = output_size * max_interp_size; - auto wts = empty(new_shape, CPU(c10::CppTypeToScalarType())); + auto wts = empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType())); auto strides = wts.strides().vec(); strides[reshape_dim] = 0; new_shape[reshape_dim] = output_size; @@ -895,7 +895,7 @@ struct HelperInterpBase { output.emplace_back(wts); // Weights indices output.emplace_back( - empty(new_shape, CPU(c10::CppTypeToScalarType()))); + empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); } int64_t* idx_ptr_xmin = output[0].data_ptr(); @@ -1050,9 +1050,9 @@ struct HelperInterpNearest : public HelperInterpBase { for ([[maybe_unused]] const auto j : c10::irange(interp_size)) { output.emplace_back( - empty(new_shape, CPU(c10::CppTypeToScalarType()))); + empty(new_shape, at::device(kCPU).dtype(c10::CppTypeToScalarType()))); // Defines weights for consistency, but not used - output.emplace_back(at::ones(new_shape, CPU(output_type))); + output.emplace_back(at::ones(new_shape, at::device(kCPU).dtype(output_type))); } } diff --git a/torch/csrc/jit/mobile/flatbuffer_loader.cpp b/torch/csrc/jit/mobile/flatbuffer_loader.cpp index 246bee03d6fdd6..f56b5818ecaccf 100644 --- a/torch/csrc/jit/mobile/flatbuffer_loader.cpp +++ b/torch/csrc/jit/mobile/flatbuffer_loader.cpp @@ -469,8 +469,8 @@ IValue parseBasic( at::Tensor parseTensorFromMetadata( FlatbufferLoader* loader, const mobile::serialization::TensorMetadata* tensor_md) { - at::ScalarType type = static_cast(tensor_md->scalar_type()); - auto options = at::CPU(type).options(); + auto type = static_cast(tensor_md->scalar_type()); + auto options = at::device(at::kCPU).dtype(type); at::Tensor tensor; if (tensor_md->quantized_schema() != nullptr) { // is quantized diff --git a/torch/csrc/jit/runtime/register_special_ops.cpp b/torch/csrc/jit/runtime/register_special_ops.cpp index 783aaf87ef7d7d..0f2447e05a9f8d 100644 --- a/torch/csrc/jit/runtime/register_special_ops.cpp +++ b/torch/csrc/jit/runtime/register_special_ops.cpp @@ -293,7 +293,7 @@ RegisterOperators reg({ DEFINE_TORCH_TENSOR_OP( bool, bool, - at::empty({}, at::CPU(at::kBool).options()).fill_(scalar_val)) + at::empty({}, at::device(at::kCPU).dtype(at::kBool)).fill_(scalar_val)) DEFINE_TORCH_TENSOR_OP( float, double, diff --git a/torch/csrc/jit/serialization/unpickler.cpp b/torch/csrc/jit/serialization/unpickler.cpp index fc95f7fe9a4a65..5a81a25c358e20 100644 --- a/torch/csrc/jit/serialization/unpickler.cpp +++ b/torch/csrc/jit/serialization/unpickler.cpp @@ -586,7 +586,7 @@ PickleOpCode Unpickler::readInstruction() { storage = storage_context_->getStorage(key); } else { int64_t numel = args.at(4).toInt(); - caffe2::TypeMeta dtype = at::CPU(type).typeMeta(); + auto dtype = scalarTypeToTypeMeta(type); at::DataPtr storage_ptr; if (numel > 0) { @@ -608,7 +608,7 @@ PickleOpCode Unpickler::readInstruction() { } } - auto options = at::CPU(type).options(); + auto options = at::device(at::kCPU).dtype(type); if (use_storage_device_) { options = options.device(storage.device()); device = storage.device(); From d558c1a04713ebca7f4085145ad8c2a415da144c Mon Sep 17 00:00:00 2001 From: cyy Date: Wed, 6 Nov 2024 13:42:20 +0000 Subject: [PATCH 137/503] Enable cppcoreguidelines-special-member-functions (#139132) Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/139132 Approved by: https://github.com/sraikund16 --- .clang-tidy | 3 ++- aten/src/ATen/Context.h | 8 ++++++++ aten/src/ATen/DynamicLibrary.h | 2 ++ aten/src/ATen/SparseCsrTensorUtils.h | 6 ++++++ aten/src/ATen/TensorIterator.h | 1 + aten/src/ATen/ThreadLocalState.h | 4 ++++ aten/src/ATen/core/Dict.h | 1 + aten/src/ATen/core/PythonFallbackKernel.h | 8 ++++++++ aten/src/ATen/core/QuantizerBase.h | 2 +- aten/src/ATen/core/Tensor.h | 6 ++++++ aten/src/ATen/core/Vitals.h | 3 +++ aten/src/ATen/core/dynamic_type.h | 5 +++++ aten/src/ATen/core/jit_type.h | 2 +- aten/src/ATen/core/jit_type_base.h | 1 + aten/src/ATen/core/rref_interface.h | 1 + aten/src/ATen/core/type.cpp | 2 -- aten/src/ATen/functorch/DynamicLayer.cpp | 6 ++++++ aten/src/ATen/quantized/Quantizer.cpp | 2 -- aten/src/ATen/record_function.h | 6 ++++++ c10/core/impl/PythonDispatcherTLS.h | 4 ++++ c10/test/util/Metaprogramming_test.cpp | 4 ++-- c10/test/util/ThreadLocal_test.cpp | 2 ++ c10/test/util/intrusive_ptr_test.cpp | 1 + c10/test/util/logging_test.cpp | 1 + c10/test/util/typeid_test.cpp | 2 ++ c10/util/DynamicCounter.cpp | 5 +++++ c10/util/Exception.h | 4 ++++ c10/util/LeftRight.h | 3 +++ c10/util/order_preserving_flat_hash_map.h | 1 + torch/csrc/distributed/rpc/types.h | 4 ++++ torch/csrc/dynamo/python_compiled_autograd.cpp | 4 ++++ .../inductor/aoti_runner/model_container_runner_cuda.h | 1 + torch/csrc/lazy/core/ir.cpp | 2 -- torch/csrc/lazy/core/ir.h | 2 +- torch/csrc/lazy/core/ir_metadata.h | 4 ++++ torch/csrc/lazy/core/metrics.h | 4 ++++ torch/csrc/lazy/core/tensor.h | 8 ++++++++ torch/csrc/lazy/core/thread_pool.cpp | 4 ++++ torch/csrc/lazy/core/thread_pool.h | 1 + torch/csrc/monitor/counters.h | 4 ++++ torch/csrc/profiler/collection.cpp | 4 ++++ torch/csrc/profiler/orchestration/observer.h | 4 ++++ 42 files changed, 130 insertions(+), 12 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index 3b03412a405095..5776dabe00728a 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -29,7 +29,6 @@ cppcoreguidelines-*, -cppcoreguidelines-pro-type-static-cast-downcast, -cppcoreguidelines-pro-type-union-access, -cppcoreguidelines-pro-type-vararg, --cppcoreguidelines-special-member-functions, -cppcoreguidelines-non-private-member-variables-in-classes, -facebook-hte-RelativeInclude, hicpp-exception-baseclass, @@ -64,5 +63,7 @@ readability-string-compare, HeaderFilterRegex: '^(aten/|c10/|torch/).*$' WarningsAsErrors: '*' CheckOptions: + cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor: true + cppcoreguidelines-special-member-functions.AllowImplicitlyDeletedCopyOrMove: true misc-header-include-cycle.IgnoredFilesList: 'format.h;ivalue.h;custom_class.h;Dict.h;List.h;IListRef.h' ... diff --git a/aten/src/ATen/Context.h b/aten/src/ATen/Context.h index 40e843cbfa3380..e37fa9ea516c19 100644 --- a/aten/src/ATen/Context.h +++ b/aten/src/ATen/Context.h @@ -604,6 +604,10 @@ inline void manual_seed(uint64_t seed) { // NoTF32Guard disable_tf32; struct TORCH_API NoTF32Guard { NoTF32Guard(); + NoTF32Guard(NoTF32Guard&& other) = delete; + NoTF32Guard(const NoTF32Guard&) = delete; + NoTF32Guard& operator=(const NoTF32Guard&) = delete; + NoTF32Guard& operator=(NoTF32Guard&&) = delete; ~NoTF32Guard(); static bool should_disable_tf32(); @@ -613,6 +617,10 @@ struct TORCH_API NoTF32Guard { struct TORCH_API ROCmBackwardPassGuard { ROCmBackwardPassGuard(); + ROCmBackwardPassGuard(ROCmBackwardPassGuard&& other) = delete; + ROCmBackwardPassGuard(const ROCmBackwardPassGuard&) = delete; + ROCmBackwardPassGuard& operator=(const ROCmBackwardPassGuard&) = delete; + ROCmBackwardPassGuard& operator=(ROCmBackwardPassGuard&&) = delete; ~ROCmBackwardPassGuard(); static bool is_backward_pass(); }; diff --git a/aten/src/ATen/DynamicLibrary.h b/aten/src/ATen/DynamicLibrary.h index 523a21985f225e..061456c081e611 100644 --- a/aten/src/ATen/DynamicLibrary.h +++ b/aten/src/ATen/DynamicLibrary.h @@ -16,6 +16,8 @@ namespace at { struct DynamicLibrary { AT_DISALLOW_COPY_AND_ASSIGN(DynamicLibrary); + DynamicLibrary(DynamicLibrary&& other) = delete; + DynamicLibrary& operator=(DynamicLibrary&&) = delete; TORCH_API DynamicLibrary( const char* name, diff --git a/aten/src/ATen/SparseCsrTensorUtils.h b/aten/src/ATen/SparseCsrTensorUtils.h index 2ec973013c4941..3c6877083aeebf 100644 --- a/aten/src/ATen/SparseCsrTensorUtils.h +++ b/aten/src/ATen/SparseCsrTensorUtils.h @@ -155,6 +155,12 @@ class CheckSparseTensorInvariants { : old_state(at::globalContext().checkSparseTensorInvariants()) { at::globalContext().setCheckSparseTensorInvariants(state); } + CheckSparseTensorInvariants(CheckSparseTensorInvariants&& other) = delete; + CheckSparseTensorInvariants(const CheckSparseTensorInvariants&) = delete; + CheckSparseTensorInvariants& operator=(const CheckSparseTensorInvariants&) = + delete; + CheckSparseTensorInvariants& operator=(CheckSparseTensorInvariants&&) = + delete; ~CheckSparseTensorInvariants() { at::globalContext().setCheckSparseTensorInvariants(old_state); diff --git a/aten/src/ATen/TensorIterator.h b/aten/src/ATen/TensorIterator.h index 471faf664e271a..7bbd68b91ba837 100644 --- a/aten/src/ATen/TensorIterator.h +++ b/aten/src/ATen/TensorIterator.h @@ -995,6 +995,7 @@ class TORCH_API TensorIteratorConfig final { /// TensorIterator that can use 32-bit indexing. Taken together the splits cover /// the original TensorIterator. struct TORCH_API SplitUntil32Bit { + // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct TORCH_API iterator { iterator() = default; iterator(const TensorIteratorBase& iter); diff --git a/aten/src/ATen/ThreadLocalState.h b/aten/src/ATen/ThreadLocalState.h index 2469cb1c3c47e1..bb28175c5f42e0 100644 --- a/aten/src/ATen/ThreadLocalState.h +++ b/aten/src/ATen/ThreadLocalState.h @@ -96,6 +96,10 @@ class TORCH_API ThreadLocalStateGuard { // set the given state across the thread boundary ThreadLocalState::setThreadLocalState(state); } + ThreadLocalStateGuard(ThreadLocalStateGuard&& other) = delete; + ThreadLocalStateGuard(const ThreadLocalStateGuard&) = delete; + ThreadLocalStateGuard& operator=(const ThreadLocalStateGuard&) = delete; + ThreadLocalStateGuard& operator=(ThreadLocalStateGuard&&) = delete; ~ThreadLocalStateGuard() { // restore previously set variables diff --git a/aten/src/ATen/core/Dict.h b/aten/src/ATen/core/Dict.h index a1d4da07520fa3..d187d7b7c11699 100644 --- a/aten/src/ATen/core/Dict.h +++ b/aten/src/ATen/core/Dict.h @@ -206,6 +206,7 @@ template Dict toGenericDict(Dict +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class Dict final { private: static_assert((std::is_same_v && std::is_same_v) || guts::typelist::contains::value, "Invalid Key type for Dict. We only support int64_t, double, bool, and string."); diff --git a/aten/src/ATen/core/PythonFallbackKernel.h b/aten/src/ATen/core/PythonFallbackKernel.h index 67f24795eeb58d..1d2b613166d3f3 100644 --- a/aten/src/ATen/core/PythonFallbackKernel.h +++ b/aten/src/ATen/core/PythonFallbackKernel.h @@ -6,6 +6,10 @@ namespace at::impl { struct TORCH_API RestorePythonTLSSnapshot { RestorePythonTLSSnapshot(); + RestorePythonTLSSnapshot(RestorePythonTLSSnapshot&& other) = delete; + RestorePythonTLSSnapshot(const RestorePythonTLSSnapshot&) = delete; + RestorePythonTLSSnapshot& operator=(const RestorePythonTLSSnapshot&) = delete; + RestorePythonTLSSnapshot& operator=(RestorePythonTLSSnapshot&&) = delete; ~RestorePythonTLSSnapshot(); private: @@ -18,6 +22,10 @@ struct TORCH_API RestorePythonTLSSnapshot { struct TORCH_API MaybeSetTLSOnEntryGuard { public: MaybeSetTLSOnEntryGuard(); + MaybeSetTLSOnEntryGuard(MaybeSetTLSOnEntryGuard&& other) = delete; + MaybeSetTLSOnEntryGuard(const MaybeSetTLSOnEntryGuard&) = delete; + MaybeSetTLSOnEntryGuard& operator=(const MaybeSetTLSOnEntryGuard&) = delete; + MaybeSetTLSOnEntryGuard& operator=(MaybeSetTLSOnEntryGuard&&) = delete; ~MaybeSetTLSOnEntryGuard(); private: diff --git a/aten/src/ATen/core/QuantizerBase.h b/aten/src/ATen/core/QuantizerBase.h index 0d2eaeece88980..a56ead7a30c696 100644 --- a/aten/src/ATen/core/QuantizerBase.h +++ b/aten/src/ATen/core/QuantizerBase.h @@ -40,7 +40,7 @@ struct TORCH_API Quantizer : public c10::intrusive_ptr_target { // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const ScalarType scalar_type_; explicit Quantizer(ScalarType scalar_type) : scalar_type_(scalar_type) {} - ~Quantizer() override; + ~Quantizer() override = default; // Copied from torch/csrc/jit/ir/scope.h QuantizerPtr intrusive_from_this() { diff --git a/aten/src/ATen/core/Tensor.h b/aten/src/ATen/core/Tensor.h index de887a024c22fb..63b707767d344d 100644 --- a/aten/src/ATen/core/Tensor.h +++ b/aten/src/ATen/core/Tensor.h @@ -4,6 +4,7 @@ #include namespace at { +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class TORCH_API OptionalTensorRef { public: OptionalTensorRef() = default; @@ -20,6 +21,7 @@ class TORCH_API OptionalTensorRef { OptionalTensorRef(const OptionalTensorRef& rhs) : ref_(Tensor::unsafe_borrow_t{}, rhs.ref_) {} + OptionalTensorRef(OptionalTensorRef&& rhs) = default; OptionalTensorRef& operator=(OptionalTensorRef rhs) { std::swap(ref_, rhs.ref_); return *this; @@ -59,6 +61,10 @@ class TORCH_API TensorRef { TensorRef(const TensorBase& src) : ref_(Tensor::unsafe_borrow_t{}, src) {} + TensorRef(TensorRef&& other) = default; + TensorRef(const TensorRef&) = default; + TensorRef& operator=(const TensorRef&) = default; + TensorRef& operator=(TensorRef&&) = default; const Tensor& operator*() const & { return ref_; diff --git a/aten/src/ATen/core/Vitals.h b/aten/src/ATen/core/Vitals.h index 8a7a51e81e1d27..7ec213938d564a 100644 --- a/aten/src/ATen/core/Vitals.h +++ b/aten/src/ATen/core/Vitals.h @@ -39,6 +39,8 @@ struct TORCH_API TorchVital { explicit TorchVital(std::string n) : name(std::move(n)) {} TorchVital(const TorchVital&) = default; TorchVital(TorchVital&&) = default; + TorchVital& operator=(const TorchVital&) = default; + TorchVital& operator=(TorchVital&&) = default; TorchVital() = delete; TorchVitalAttr& create(const std::string& attr); @@ -71,6 +73,7 @@ class TORCH_API APIVitals { APIVitals(APIVitals&& other) = delete; APIVitals& operator=(const APIVitals&) = delete; APIVitals& operator=(APIVitals&&) = delete; + ~APIVitals() = default; private: std::unordered_map name_map_; diff --git a/aten/src/ATen/core/dynamic_type.h b/aten/src/ATen/core/dynamic_type.h index 4ad833a295b12c..697fcec39e34cd 100644 --- a/aten/src/ATen/core/dynamic_type.h +++ b/aten/src/ATen/core/dynamic_type.h @@ -159,6 +159,11 @@ class DynamicType : public SharedType { explicit DynamicType(Tag, Arguments); explicit DynamicType(Tag, std::string_view, Arguments); + DynamicType(DynamicType&& other) = delete; + DynamicType(const DynamicType&) = delete; + DynamicType& operator=(const DynamicType&) = delete; + DynamicType& operator=(DynamicType&&) = delete; + TypePtr containedType(size_t) const override; size_t containedTypeSize() const override; Tag tag() const { diff --git a/aten/src/ATen/core/jit_type.h b/aten/src/ATen/core/jit_type.h index 5951b4763be3a0..58d50de74faed3 100644 --- a/aten/src/ATen/core/jit_type.h +++ b/aten/src/ATen/core/jit_type.h @@ -2204,7 +2204,7 @@ struct TORCH_API InterfaceType : public NamedType { return is_module_; } static const TypeKind Kind = TypeKind::InterfaceType; - ~InterfaceType() override; + ~InterfaceType() override = default; private: InterfaceType(QualifiedName name, bool is_module); static bool isSubTypeImpl( diff --git a/aten/src/ATen/core/jit_type_base.h b/aten/src/ATen/core/jit_type_base.h index 8904cd7bc431e3..de440787ee686f 100644 --- a/aten/src/ATen/core/jit_type_base.h +++ b/aten/src/ATen/core/jit_type_base.h @@ -227,6 +227,7 @@ struct TORCH_API Type { SingletonOrSharedTypePtr(SingletonOrSharedTypePtr&&) noexcept = default; SingletonOrSharedTypePtr& operator=(const SingletonOrSharedTypePtr&) = default; SingletonOrSharedTypePtr& operator=(SingletonOrSharedTypePtr&&) noexcept = default; + ~SingletonOrSharedTypePtr() = default; T* get() const { return repr_.isSharedAndNonNull() ? repr_.shared_.repr_.get() : static_cast(repr_.rawRepr().first); diff --git a/aten/src/ATen/core/rref_interface.h b/aten/src/ATen/core/rref_interface.h index f0749d368792f0..70273f168d9361 100644 --- a/aten/src/ATen/core/rref_interface.h +++ b/aten/src/ATen/core/rref_interface.h @@ -17,6 +17,7 @@ class C10_EXPORT RRefInterface : public c10::intrusive_ptr_target { // counting. RRefInterface(const RRefInterface& other) = delete; RRefInterface(RRefInterface&& other) = delete; + RRefInterface& operator=(const RRefInterface& other) = delete; RRefInterface& operator=(RRefInterface&& other) = delete; ~RRefInterface() override = default; diff --git a/aten/src/ATen/core/type.cpp b/aten/src/ATen/core/type.cpp index 92c30e6ec8437c..164ea6d44f5846 100644 --- a/aten/src/ATen/core/type.cpp +++ b/aten/src/ATen/core/type.cpp @@ -1037,8 +1037,6 @@ InterfaceType::InterfaceType(QualifiedName name, bool is_module) methods_(std::make_shared>()), is_module_(is_module) {} -InterfaceType::~InterfaceType() = default; - bool containsAnyType(const TypePtr& type) { std::vector to_scan = { type }; while (!to_scan.empty()) { diff --git a/aten/src/ATen/functorch/DynamicLayer.cpp b/aten/src/ATen/functorch/DynamicLayer.cpp index 81b82f2556c19e..9bdf155affc2b2 100644 --- a/aten/src/ATen/functorch/DynamicLayer.cpp +++ b/aten/src/ATen/functorch/DynamicLayer.cpp @@ -202,6 +202,8 @@ struct SaveLocalDispatchKeySet { } SaveLocalDispatchKeySet(const SaveLocalDispatchKeySet&) = delete; SaveLocalDispatchKeySet& operator=(const SaveLocalDispatchKeySet&) = delete; + SaveLocalDispatchKeySet(SaveLocalDispatchKeySet&&) = delete; + SaveLocalDispatchKeySet& operator=(SaveLocalDispatchKeySet&&) = delete; }; const std::vector& getDynamicLayerStack() { @@ -406,6 +408,10 @@ static void dump_local_tls() { struct WithoutTop { WithoutTop(); + WithoutTop(WithoutTop&& other) = delete; + WithoutTop(const WithoutTop&) = delete; + WithoutTop& operator=(const WithoutTop&) = delete; + WithoutTop& operator=(WithoutTop&&) = delete; ~WithoutTop(); DynamicLayer layer_; }; diff --git a/aten/src/ATen/quantized/Quantizer.cpp b/aten/src/ATen/quantized/Quantizer.cpp index ef8f8deb4973be..fa48b33ce7c0d0 100644 --- a/aten/src/ATen/quantized/Quantizer.cpp +++ b/aten/src/ATen/quantized/Quantizer.cpp @@ -313,8 +313,6 @@ Tensor& PerChannelAffineFloatQParamsQuantizer::dequantize_out( return rtensor; } -Quantizer::~Quantizer() = default; - C10_EXPORT void set_quantizer_(const Tensor& self, ConstQuantizerPtr quantizer) { get_qtensorimpl(self)->set_quantizer_(quantizer); } diff --git a/aten/src/ATen/record_function.h b/aten/src/ATen/record_function.h index 15130c91367522..52115b4a65af66 100644 --- a/aten/src/ATen/record_function.h +++ b/aten/src/ATen/record_function.h @@ -353,6 +353,8 @@ struct TORCH_API RecordFunction { RecordFunction(const RecordFunction&) = delete; RecordFunction& operator=(const RecordFunction&) = delete; + RecordFunction(RecordFunction&&) = delete; + RecordFunction& operator=(RecordFunction&&) = delete; const char* name() const; @@ -764,6 +766,10 @@ class TORCH_API RecordFunctionGuard { enableRecordFunction(is_enabled); } + RecordFunctionGuard(RecordFunctionGuard&& other) = delete; + RecordFunctionGuard(const RecordFunctionGuard&) = delete; + RecordFunctionGuard& operator=(const RecordFunctionGuard&) = delete; + RecordFunctionGuard& operator=(RecordFunctionGuard&&) = delete; virtual ~RecordFunctionGuard() { enableRecordFunction(prev_value_); } diff --git a/c10/core/impl/PythonDispatcherTLS.h b/c10/core/impl/PythonDispatcherTLS.h index 12c0677f36fdb5..7b91aab686eca1 100644 --- a/c10/core/impl/PythonDispatcherTLS.h +++ b/c10/core/impl/PythonDispatcherTLS.h @@ -16,6 +16,10 @@ struct C10_API DisablePythonDispatcher { PythonDispatcherTLS::set_state({}); } + DisablePythonDispatcher(DisablePythonDispatcher&& other) = delete; + DisablePythonDispatcher(const DisablePythonDispatcher&) = delete; + DisablePythonDispatcher& operator=(const DisablePythonDispatcher&) = delete; + DisablePythonDispatcher& operator=(DisablePythonDispatcher&&) = delete; ~DisablePythonDispatcher() { PythonDispatcherTLS::set_state(old_); } diff --git a/c10/test/util/Metaprogramming_test.cpp b/c10/test/util/Metaprogramming_test.cpp index ad301462bd5147..a7bca7a5b511f9 100644 --- a/c10/test/util/Metaprogramming_test.cpp +++ b/c10/test/util/Metaprogramming_test.cpp @@ -5,7 +5,7 @@ using namespace c10::guts; -// NOLINTBEGIN(modernize*) +// NOLINTBEGIN(modernize*, cppcoreguidelines-special-member-functions) namespace { namespace test_function_traits { @@ -302,4 +302,4 @@ TEST(MetaprogrammingTest, TupleMap_canBeUsedWithAutoLambdas) { } // namespace test_tuple_map } // namespace -// NOLINTEND(modernize*) +// NOLINTEND(modernize*, cppcoreguidelines-special-member-functions) diff --git a/c10/test/util/ThreadLocal_test.cpp b/c10/test/util/ThreadLocal_test.cpp index bbc10c0c52e443..29e748e14890e7 100644 --- a/c10/test/util/ThreadLocal_test.cpp +++ b/c10/test/util/ThreadLocal_test.cpp @@ -148,6 +148,7 @@ TEST(ThreadLocalTest, TestThreadWithGlobalScopeVar) { TEST(ThreadLocalTest, TestObjectsAreReleased) { static std::atomic ctors{0}; static std::atomic dtors{0}; + // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct A { A() { ++ctors; @@ -183,6 +184,7 @@ TEST(ThreadLocalTest, TestObjectsAreReleased) { TEST(ThreadLocalTest, TestObjectsAreReleasedByNonstaticThreadLocal) { static std::atomic ctors(0); static std::atomic dtors(0); + // NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct A { A() { ++ctors; diff --git a/c10/test/util/intrusive_ptr_test.cpp b/c10/test/util/intrusive_ptr_test.cpp index 14c12f422f2cd8..47e7942950ef79 100644 --- a/c10/test/util/intrusive_ptr_test.cpp +++ b/c10/test/util/intrusive_ptr_test.cpp @@ -45,6 +45,7 @@ struct SomeChildClass : SomeBaseClass { SomeChildClass(int v) : SomeBaseClass(v) {} }; +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class DestructableMock : public intrusive_ptr_target { public: DestructableMock(bool* resourcesReleased, bool* wasDestructed) diff --git a/c10/test/util/logging_test.cpp b/c10/test/util/logging_test.cpp index 5798b37c18e385..c06dfb43d46cb4 100644 --- a/c10/test/util/logging_test.cpp +++ b/c10/test/util/logging_test.cpp @@ -81,6 +81,7 @@ TEST( } namespace { +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct Noncopyable { int x; diff --git a/c10/test/util/typeid_test.cpp b/c10/test/util/typeid_test.cpp index 7587d5eefdbd53..8e78ec84e530aa 100644 --- a/c10/test/util/typeid_test.cpp +++ b/c10/test/util/typeid_test.cpp @@ -70,6 +70,7 @@ TEST(TypeMetaTest, TypeMeta) { EXPECT_NE(bar_meta.name().find("TypeMetaTestBar"), c10::string_view::npos); } +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class ClassAllowAssignment { public: ClassAllowAssignment() = default; @@ -78,6 +79,7 @@ class ClassAllowAssignment { int x{42}; }; +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class ClassNoAssignment { public: ClassNoAssignment() = default; diff --git a/c10/util/DynamicCounter.cpp b/c10/util/DynamicCounter.cpp index 0b7906af1b1204..cd9decfc41f3af 100644 --- a/c10/util/DynamicCounter.cpp +++ b/c10/util/DynamicCounter.cpp @@ -52,6 +52,11 @@ struct DynamicCounter::Guard { } } + Guard(Guard&& other) = delete; + Guard(const Guard&) = delete; + Guard& operator=(const Guard&) = delete; + Guard& operator=(Guard&&) = delete; + ~Guard() { for (const auto& backend : backends_) { backend->unregisterCounter(key_); diff --git a/c10/util/Exception.h b/c10/util/Exception.h index 275526cf400823..e83b65cc5efc0f 100644 --- a/c10/util/Exception.h +++ b/c10/util/Exception.h @@ -205,6 +205,10 @@ class C10_API WarningHandlerGuard { : prev_handler_(c10::WarningUtils::get_warning_handler()) { c10::WarningUtils::set_warning_handler(new_handler); } + WarningHandlerGuard(WarningHandlerGuard&& other) = delete; + WarningHandlerGuard(const WarningHandlerGuard&) = delete; + WarningHandlerGuard& operator=(const WarningHandlerGuard&) = delete; + WarningHandlerGuard& operator=(WarningHandlerGuard&&) = delete; ~WarningHandlerGuard() { c10::WarningUtils::set_warning_handler(prev_handler_); } diff --git a/c10/util/LeftRight.h b/c10/util/LeftRight.h index 58145b2c779cc3..0ad9a1b346103e 100644 --- a/c10/util/LeftRight.h +++ b/c10/util/LeftRight.h @@ -18,6 +18,8 @@ struct IncrementRAII final { ~IncrementRAII() { _counter->fetch_sub(1); } + IncrementRAII(IncrementRAII&&) = delete; + IncrementRAII& operator=(IncrementRAII&&) = delete; private: std::atomic* _counter; @@ -201,6 +203,7 @@ class RWSafeLeftRightWrapper final { RWSafeLeftRightWrapper(RWSafeLeftRightWrapper&&) noexcept = delete; RWSafeLeftRightWrapper& operator=(const RWSafeLeftRightWrapper&) = delete; RWSafeLeftRightWrapper& operator=(RWSafeLeftRightWrapper&&) noexcept = delete; + ~RWSafeLeftRightWrapper() = default; template // NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) diff --git a/c10/util/order_preserving_flat_hash_map.h b/c10/util/order_preserving_flat_hash_map.h index 021995600344ad..fd8196432c994c 100644 --- a/c10/util/order_preserving_flat_hash_map.h +++ b/c10/util/order_preserving_flat_hash_map.h @@ -139,6 +139,7 @@ struct KeyOrValueEquality : functor_storage { }; static constexpr int8_t min_lookups = 4; template +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) struct sherwood_v3_entry { // NOLINTNEXTLINE(modernize-use-equals-default) sherwood_v3_entry() {} diff --git a/torch/csrc/distributed/rpc/types.h b/torch/csrc/distributed/rpc/types.h index 7844ce270cd471..82cf528bb9bd6b 100644 --- a/torch/csrc/distributed/rpc/types.h +++ b/torch/csrc/distributed/rpc/types.h @@ -13,6 +13,10 @@ TORCH_API void disableJitRRefPickle(); struct TORCH_API JitRRefPickleGuard { JitRRefPickleGuard(); + JitRRefPickleGuard(JitRRefPickleGuard&& other) = delete; + JitRRefPickleGuard(const JitRRefPickleGuard&) = delete; + JitRRefPickleGuard& operator=(const JitRRefPickleGuard&) = delete; + JitRRefPickleGuard& operator=(JitRRefPickleGuard&&) = delete; ~JitRRefPickleGuard(); }; diff --git a/torch/csrc/dynamo/python_compiled_autograd.cpp b/torch/csrc/dynamo/python_compiled_autograd.cpp index 33dac77d74340a..8bd192cde7b1b0 100644 --- a/torch/csrc/dynamo/python_compiled_autograd.cpp +++ b/torch/csrc/dynamo/python_compiled_autograd.cpp @@ -543,6 +543,10 @@ static PyObject* call_end_capture(PyObject* self, const variable_list& inputs) { struct ClosingTHPObjectPtr : public THPObjectPtr { ClosingTHPObjectPtr(PyObject* o) : THPObjectPtr(o) {} + ClosingTHPObjectPtr(ClosingTHPObjectPtr&& other) = default; + ClosingTHPObjectPtr(const ClosingTHPObjectPtr&) = delete; + ClosingTHPObjectPtr& operator=(const ClosingTHPObjectPtr&) = delete; + ClosingTHPObjectPtr& operator=(ClosingTHPObjectPtr&&) = default; ~ClosingTHPObjectPtr() { if (PyErr_Occurred()) { // do nothing, do not attempt to close diff --git a/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h b/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h index 5db82bf413668a..2018cf7573e7a9 100644 --- a/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h +++ b/torch/csrc/inductor/aoti_runner/model_container_runner_cuda.h @@ -8,6 +8,7 @@ namespace torch::inductor { // NOTICE: Following APIs are subject to change due to active development // We provide NO BC guarantee for these APIs +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class TORCH_API AOTIModelContainerRunnerCuda : public AOTIModelContainerRunner { public: // @param device_str: cuda device string, e.g. "cuda", "cuda:0" diff --git a/torch/csrc/lazy/core/ir.cpp b/torch/csrc/lazy/core/ir.cpp index fe9cfba2556c06..033e4f5cf00b95 100644 --- a/torch/csrc/lazy/core/ir.cpp +++ b/torch/csrc/lazy/core/ir.cpp @@ -98,8 +98,6 @@ Node::Node(OpKind op, Shape shape, size_t num_outputs) : Node(op, num_outputs) { shapes_.push_back(std::move(shape)); } -Node::~Node() = default; - // Retrieves the full shape of the IR Node. c10::ArrayRef Node::shapes() const { return shapes_; diff --git a/torch/csrc/lazy/core/ir.h b/torch/csrc/lazy/core/ir.h index dd244b7442b2d3..8e384e7982bd58 100644 --- a/torch/csrc/lazy/core/ir.h +++ b/torch/csrc/lazy/core/ir.h @@ -101,7 +101,7 @@ class TORCH_API Node { // Construct node with shape and no operands Node(OpKind op, Shape shape, size_t num_outputs = 1); - virtual ~Node(); + virtual ~Node() = default; const OpKind& op() const { return op_; diff --git a/torch/csrc/lazy/core/ir_metadata.h b/torch/csrc/lazy/core/ir_metadata.h index aeaacb596e0a35..7e73a593181997 100644 --- a/torch/csrc/lazy/core/ir_metadata.h +++ b/torch/csrc/lazy/core/ir_metadata.h @@ -38,6 +38,10 @@ struct TORCH_API MetaData { struct TORCH_API ScopePusher { explicit ScopePusher(const std::string& name); ~ScopePusher(); + ScopePusher(ScopePusher&& other) = delete; + ScopePusher(const ScopePusher&) = delete; + ScopePusher& operator=(const ScopePusher&) = delete; + ScopePusher& operator=(ScopePusher&&) = delete; static void ResetScopes(); }; diff --git a/torch/csrc/lazy/core/metrics.h b/torch/csrc/lazy/core/metrics.h index 191baa5eb1768e..05b525778d9a3e 100644 --- a/torch/csrc/lazy/core/metrics.h +++ b/torch/csrc/lazy/core/metrics.h @@ -258,6 +258,10 @@ class TORCH_API TimedSection { public: explicit TimedSection(Metric* metric) : metric_(metric), start_(NowNs()) {} + TimedSection(TimedSection&& other) = delete; + TimedSection(const TimedSection&) = delete; + TimedSection& operator=(const TimedSection&) = delete; + TimedSection& operator=(TimedSection&&) = delete; ~TimedSection() { int64_t now = NowNs(); metric_->AddSample(now, static_cast(now - start_)); diff --git a/torch/csrc/lazy/core/tensor.h b/torch/csrc/lazy/core/tensor.h index dfa317d3b81f90..b739399b6bbdb3 100644 --- a/torch/csrc/lazy/core/tensor.h +++ b/torch/csrc/lazy/core/tensor.h @@ -42,12 +42,18 @@ class TORCH_API LazyTensor : public c10::intrusive_ptr_target { Data(BackendDevice device) : device(std::move(device)), unique_id(GetNextTensorId()) {} + Data(Data&& other) = delete; + Data(const Data&) = delete; + Data& operator=(const Data&) = delete; + Data& operator=(Data&&) = delete; virtual ~Data(); BackendDataPtr handle; Value ir_value; std::optional tensor_data; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const BackendDevice device; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) const int64_t unique_id = 0; size_t generation = 1; }; @@ -68,6 +74,8 @@ class TORCH_API LazyTensor : public c10::intrusive_ptr_target { LazyTensor() = delete; LazyTensor(const LazyTensor&) = default; LazyTensor(LazyTensor&&) noexcept = default; + LazyTensor& operator=(const LazyTensor&) = default; + LazyTensor& operator=(LazyTensor&&) noexcept = default; ~LazyTensor() override = default; diff --git a/torch/csrc/lazy/core/thread_pool.cpp b/torch/csrc/lazy/core/thread_pool.cpp index 3f87aaa96b5191..e61827e5b0fdc1 100644 --- a/torch/csrc/lazy/core/thread_pool.cpp +++ b/torch/csrc/lazy/core/thread_pool.cpp @@ -26,6 +26,10 @@ class ThreadPool { }); } } + ThreadPool(const ThreadPool&) = delete; + ThreadPool(ThreadPool&&) = delete; + ThreadPool& operator=(const ThreadPool&) = delete; + ThreadPool& operator=(ThreadPool&&) = delete; ~ThreadPool() { { diff --git a/torch/csrc/lazy/core/thread_pool.h b/torch/csrc/lazy/core/thread_pool.h index 8caa1e0bbc4537..2e0ae8f89d8e9a 100644 --- a/torch/csrc/lazy/core/thread_pool.h +++ b/torch/csrc/lazy/core/thread_pool.h @@ -13,6 +13,7 @@ namespace torch::lazy { +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) class TORCH_API Completion { public: class Data; diff --git a/torch/csrc/monitor/counters.h b/torch/csrc/monitor/counters.h index bfb1a463b713f3..986dfb7b85ca13 100644 --- a/torch/csrc/monitor/counters.h +++ b/torch/csrc/monitor/counters.h @@ -122,6 +122,10 @@ class Stat { maxSamples_(maxSamples) { detail::registerStat(this); } + Stat(const Stat&) = delete; + Stat(Stat&&) = delete; + Stat& operator=(const Stat&) = delete; + Stat& operator=(Stat&&) = delete; virtual ~Stat() { { diff --git a/torch/csrc/profiler/collection.cpp b/torch/csrc/profiler/collection.cpp index eefe5621a293eb..c5b0d9539d4fd5 100644 --- a/torch/csrc/profiler/collection.cpp +++ b/torch/csrc/profiler/collection.cpp @@ -402,6 +402,10 @@ struct StealOrDefault { explicit StealOrDefault(T& container) : container_{container}, it_{container.begin()} {} + StealOrDefault(const StealOrDefault&) = delete; + StealOrDefault(StealOrDefault&&) = delete; + StealOrDefault& operator=(const StealOrDefault&) = delete; + StealOrDefault& operator=(StealOrDefault&&) = delete; ~StealOrDefault() { container_.get().clear(); } diff --git a/torch/csrc/profiler/orchestration/observer.h b/torch/csrc/profiler/orchestration/observer.h index c3beb4cca4d0c7..272e2e4f9d5f93 100644 --- a/torch/csrc/profiler/orchestration/observer.h +++ b/torch/csrc/profiler/orchestration/observer.h @@ -128,6 +128,10 @@ struct TORCH_API ProfilerConfig { // ---------------------------------------------------------------------------- struct TORCH_API ProfilerStateBase : public c10::MemoryReportingInfoBase { explicit ProfilerStateBase(ProfilerConfig config); + ProfilerStateBase(const ProfilerStateBase&) = delete; + ProfilerStateBase(ProfilerStateBase&&) = delete; + ProfilerStateBase& operator=(const ProfilerStateBase&) = delete; + ProfilerStateBase& operator=(ProfilerStateBase&&) = delete; ~ProfilerStateBase() override; static ProfilerStateBase* get(bool global); From 44df6522ee444b8ffb3023165970f6a648ed519d Mon Sep 17 00:00:00 2001 From: "Sun, Jiayi" Date: Wed, 6 Nov 2024 09:40:02 +0000 Subject: [PATCH 138/503] add Half/BFloat16 support for grid_sample on CPU (#134812) Fix https://github.com/pytorch/pytorch/issues/127224. Pull Request resolved: https://github.com/pytorch/pytorch/pull/134812 Approved by: https://github.com/Skylion007, https://github.com/mingfeima --- .../src/ATen/cpu/vec/vec512/vec512_bfloat16.h | 68 +------------------ aten/src/ATen/native/GridSampler.cpp | 12 ++-- .../src/ATen/native/cpu/GridSamplerKernel.cpp | 4 +- test/inductor/test_torchinductor_opinfo.py | 2 +- test/test_mps.py | 6 +- .../_internal/common_methods_invocations.py | 6 +- 6 files changed, 13 insertions(+), 85 deletions(-) diff --git a/aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h b/aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h index 843543c2f7cdec..ccfda12bd64cd6 100644 --- a/aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h +++ b/aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h @@ -221,73 +221,7 @@ static_assert( } template static Vectorized blend(const Vectorized& a, const Vectorized& b) { - __at_align__ int16_t tmp_values[size()]; - a.store(tmp_values); - if (mask & 0x01) - tmp_values[0] = b.values[31]; - if (mask & 0x02) - tmp_values[1] = b.values[30]; - if (mask & 0x04) - tmp_values[2] = b.values[29]; - if (mask & 0x08) - tmp_values[3] = b.values[28]; - if (mask & 0x10) - tmp_values[4] = b.values[27]; - if (mask & 0x20) - tmp_values[5] = b.values[26]; - if (mask & 0x40) - tmp_values[6] = b.values[25]; - if (mask & 0x80) - tmp_values[7] = b.values[24]; - if (mask & 0x100) - tmp_values[8] = b.values[23]; - if (mask & 0x200) - tmp_values[9] = b.values[22]; - if (mask & 0x400) - tmp_values[10] = b.values[21]; - if (mask & 0x800) - tmp_values[11] = b.values[20]; - if (mask & 0x1000) - tmp_values[12] = b.values[19]; - if (mask & 0x2000) - tmp_values[13] = b.values[18]; - if (mask & 0x4000) - tmp_values[14] = b.values[17]; - if (mask & 0x8000) - tmp_values[15] = b.values[16]; - if (mask & 0x10000) - tmp_values[16] = b.values[15]; - if (mask & 0x20000) - tmp_values[17] = b.values[14]; - if (mask & 0x40000) - tmp_values[18] = b.values[13]; - if (mask & 0x80000) - tmp_values[19] = b.values[12]; - if (mask & 0x100000) - tmp_values[20] = b.values[11]; - if (mask & 0x200000) - tmp_values[21] = b.values[10]; - if (mask & 0x400000) - tmp_values[22] = b.values[9]; - if (mask & 0x800000) - tmp_values[23] = b.values[8]; - if (mask & 0x1000000) - tmp_values[24] = b.values[7]; - if (mask & 0x2000000) - tmp_values[25] = b.values[6]; - if (mask & 0x4000000) - tmp_values[26] = b.values[5]; - if (mask & 0x8000000) - tmp_values[27] = b.values[4]; - if (mask & 0x10000000) - tmp_values[28] = b.values[3]; - if (mask & 0x20000000) - tmp_values[29] = b.values[2]; - if (mask & 0x40000000) - tmp_values[30] = b.values[1]; - if (mask & 0x80000000) - tmp_values[31] = b.values[0]; - return loadu(tmp_values); + return _mm512_mask_blend_epi16(mask, a.values, b.values); } static Vectorized blendv(const Vectorized& a, const Vectorized& b, const Vectorized& mask) { diff --git a/aten/src/ATen/native/GridSampler.cpp b/aten/src/ATen/native/GridSampler.cpp index 5d0259eeb1ba25..d7fd0541116dce 100644 --- a/aten/src/ATen/native/GridSampler.cpp +++ b/aten/src/ATen/native/GridSampler.cpp @@ -930,9 +930,7 @@ Tensor grid_sampler_2d_cpu(const Tensor& input, const Tensor& grid, } // AVX gather instructions use signed 32-bit offsets to gather float values. // Check for possible overflow and fallback to scalar implementation - if (input.scalar_type() != kDouble) { - TORCH_CHECK(input.scalar_type() == kFloat, - "grid_sampler_2d_cpu not implemented for ", input.scalar_type()); + if (input.scalar_type() == kFloat) { auto sizes = input.sizes(); auto strides = input.strides(); const auto grid_sW = grid.strides()[2]; @@ -968,7 +966,7 @@ Tensor grid_sampler_3d_cpu(const Tensor& input, const Tensor& grid, check_grid_sampler_common(input, grid); check_grid_sampler_3d(input, grid, interpolation_mode); - return AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "grid_sampler3d_cpu", [&] { + return AT_DISPATCH_FLOATING_TYPES_AND2(kHalf, kBFloat16, input.scalar_type(), "grid_sampler3d_cpu", [&] { return grid_sampler_3d_cpu_impl( input, grid, static_cast(interpolation_mode), static_cast(padding_mode), align_corners); @@ -986,9 +984,7 @@ grid_sampler_2d_backward_cpu(const Tensor& grad_output, const Tensor& input, con // AVX gather instructions use signed 32-bit offsets to gather float values. // Check for possible overflow and fallback to scalar implementation - if (input.scalar_type() != kDouble) { - TORCH_CHECK(input.scalar_type() == kFloat, - "grid_sampler_2d_backward_cpu not implemented for ", input.scalar_type()); + if (input.scalar_type() == kFloat) { auto isizes = input.sizes(); auto istrides = input.strides(); auto gsizes = grad_output.sizes(); @@ -1033,7 +1029,7 @@ grid_sampler_3d_backward_cpu(const Tensor& grad_output, const Tensor& input, con check_grid_sampler_common(input, grid); check_grid_sampler_3d(input, grid, interpolation_mode); - return AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "grid_sampler_3d_backward_cpu", [&] { + return AT_DISPATCH_FLOATING_TYPES_AND2(kHalf, kBFloat16, input.scalar_type(), "grid_sampler_3d_backward_cpu", [&] { return grid_sampler_3d_backward_cpu_impl( grad_output, input, grid, static_cast(interpolation_mode), diff --git a/aten/src/ATen/native/cpu/GridSamplerKernel.cpp b/aten/src/ATen/native/cpu/GridSamplerKernel.cpp index 9c6e62bc1ce9f0..ec5e9dfb6420bb 100644 --- a/aten/src/ATen/native/cpu/GridSamplerKernel.cpp +++ b/aten/src/ATen/native/cpu/GridSamplerKernel.cpp @@ -1184,7 +1184,7 @@ void grid_sampler_2d_cpu_kernel_impl( return; \ } - AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "grid_sampler_2d_cpu_kernel_impl", [&] { + AT_DISPATCH_FLOATING_TYPES_AND2(kHalf, kBFloat16, input.scalar_type(), "grid_sampler_2d_cpu_kernel_impl", [&] { auto out_acc = output.accessor(); auto inp_acc = input.accessor(); auto grid_acc = grid.accessor(); @@ -1272,7 +1272,7 @@ void grid_sampler_2d_backward_cpu_kernel_impl( return; \ } - AT_DISPATCH_FLOATING_TYPES(input.scalar_type(), "grid_sampler_2d_backward_cpu_kernel_impl", [&] { + AT_DISPATCH_FLOATING_TYPES_AND2(kHalf, kBFloat16, input.scalar_type(), "grid_sampler_2d_backward_cpu_kernel_impl", [&] { auto gGrid_acc = grad_grid.accessor(); auto inp_acc = input.accessor(); auto grid_acc = grid.accessor(); diff --git a/test/inductor/test_torchinductor_opinfo.py b/test/inductor/test_torchinductor_opinfo.py index f7ff0f8c7d6113..7e224ef4520ac0 100644 --- a/test/inductor/test_torchinductor_opinfo.py +++ b/test/inductor/test_torchinductor_opinfo.py @@ -693,7 +693,7 @@ def wrapper_noop_set_seed(op, *args, **kwargs): "nn.functional.cosine_similarity": {f16}, "nn.functional.cross_entropy": {f16, f32, f64}, "nn.functional.gaussian_nll_loss": {f16}, - "nn.functional.grid_sample": {f32, f64}, + "nn.functional.grid_sample": {f32, f64, f16}, "nn.functional.interpolate.area": {f16}, "nn.functional.nll_loss": {f16, f32, f64}, "normal": {f16, f32, f64}, diff --git a/test/test_mps.py b/test/test_mps.py index 5a5f7944d486e6..61312aa31a0c52 100644 --- a/test/test_mps.py +++ b/test/test_mps.py @@ -152,7 +152,7 @@ def mps_ops_grad_modifier(ops): MACOS_12_3_XFAILLIST_GRAD = { # Unsupported Border padding mode, forward pass success as fallback to cpu - 'grid_sampler_2d': [torch.float32], + 'grid_sampler_2d': [torch.float32, torch.float16, torch.bfloat16], # Unimplemented 'logaddexp2': [torch.float32], @@ -165,7 +165,7 @@ def mps_ops_grad_modifier(ops): 'masked.log_softmax': [torch.float32, torch.float16], # Unsupported Border padding mode, forward pass success as fallback to cpu - 'grid_sampler_2d': [torch.float32], + 'grid_sampler_2d': [torch.float32, torch.float16, torch.bfloat16], # Same issue as `argsort` and `sort` with duplicate elements (undefined behaviour). # Forward pass is passing since `msort` doesn't return the indices, just the values, which match the CPU. @@ -638,7 +638,7 @@ def mps_ops_modifier(ops): MACOS_AFTER_13_1_XFAILLIST = { # before macOS 13.2 it falls back to cpu and pass the forward pass - 'grid_sampler_2d': [torch.float32], # Unsupported Border padding mode + 'grid_sampler_2d': [torch.float32, torch.float16, torch.bfloat16], # Unsupported Border padding mode # inconsistency errors between cpu and mps, max seen atol is 2 'nn.functional.interpolatebilinear': [torch.uint8], } diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index d52a647497741f..0fd7da050dc2c5 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -20811,8 +20811,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): ), OpInfo( "nn.functional.grid_sample", - dtypes=floating_types(), - dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16), + dtypes=floating_types_and(torch.float16, torch.bfloat16), supports_out=False, sample_inputs_func=sample_inputs_grid_sample, reference_inputs_func=reference_inputs_grid_sample, @@ -20821,8 +20820,7 @@ def sample_inputs_alias_copy(op_info, device, dtype, requires_grad, **kwargs): # TODO: delete this OpInfo once we add meta support for grid_sampler_3d OpInfo( "grid_sampler_2d", - dtypes=floating_types(), - dtypesIfCUDA=floating_types_and(torch.float16, torch.bfloat16), + dtypes=floating_types_and(torch.float16, torch.bfloat16), supports_out=False, sample_inputs_func=sample_inputs_grid_sampler_2d, supports_gradgrad=False, From 641ca67d5a3853258d4ca9857aca1d5cecef1097 Mon Sep 17 00:00:00 2001 From: "Nichols A. Romero" Date: Wed, 6 Nov 2024 14:37:45 +0000 Subject: [PATCH 139/503] [ROCM] Fix hipBLASLt version check in TunableOp test (#139811) Allow 3 or more digits for hipBLASLt version check in TunableOp test. Needed due to upcoming ROCm 6.3 release. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139811 Approved by: https://github.com/eqy, https://github.com/malfet --- test/test_linalg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_linalg.py b/test/test_linalg.py index 7b4c1f04253055..8bbd0049855d75 100644 --- a/test/test_linalg.py +++ b/test/test_linalg.py @@ -4544,7 +4544,7 @@ def test_matmul_small_brute_force_tunableop(self, device, dtype): validators[key] = value if torch.version.hip: assert "HIPBLASLT_VERSION" in validators - assert re.match(r'^\d{3}-[a-z0-9]{8}$', validators["HIPBLASLT_VERSION"]) + assert re.match(r'^\d{3,}-[a-z0-9]{8}$', validators["HIPBLASLT_VERSION"]) assert len(torch.cuda.tunable.get_results()) > 0 assert torch.cuda.tunable.write_file() # use default filename From 59cf4bc5ae64aea2c6a9b870243821695adfc30b Mon Sep 17 00:00:00 2001 From: Alan Du Date: Wed, 6 Nov 2024 15:16:12 +0000 Subject: [PATCH 140/503] Fix the use of fsspec transactions (#135541) fsspec transactions do not support concurrency and assumes that there is at most 1 running transaction per filesystem. This is *not* true in our usage, where because of multi-threading we usually have multiple concurrent transactions running at once. Previously, this would just (unsafely) pass but lead to hard-to-debug race conditions (since the commit of one transaction will blow away the state of the other transaction). In fsspec 2024.3.0, trying to commit concurrent transactions will actually crash (see the code at https://github.com/fsspec/filesystem_spec/blob/76ca4a68885d572880ac6800f079738df562f02c/fsspec/transaction.py#L39 -- because each filesystem can have a single transaction, this tear-down logic will error). Instead, let's manually handle committing / discarding changes to the file. I don't have a minimal test-case, but in Meta this solves a broken test on `fsspec >= 2024.3.0`: Before: https://www.internalfb.com/intern/testinfra/testrun/7318349626774607 After: https://www.internalfb.com/intern/testinfra/testrun/2251800062722633 Pull Request resolved: https://github.com/pytorch/pytorch/pull/135541 Approved by: https://github.com/Skylion007 --- .../checkpoint/_fsspec_filesystem.py | 33 +++++++++++++++---- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/torch/distributed/checkpoint/_fsspec_filesystem.py b/torch/distributed/checkpoint/_fsspec_filesystem.py index b57df9c3456ca6..c8230af7d66dd2 100644 --- a/torch/distributed/checkpoint/_fsspec_filesystem.py +++ b/torch/distributed/checkpoint/_fsspec_filesystem.py @@ -5,10 +5,8 @@ import os from contextlib import contextmanager from pathlib import Path -from typing import Generator, Optional, Union +from typing import Generator, Optional, TYPE_CHECKING, Union -import fsspec -from fsspec import AbstractFileSystem from fsspec.core import url_to_fs from torch.distributed.checkpoint.filesystem import ( @@ -18,6 +16,10 @@ ) +if TYPE_CHECKING: + from fsspec import AbstractFileSystem + + __all__ = [ "FsspecWriter", "FsspecReader", @@ -33,9 +35,28 @@ def create_stream( self, path: Union[str, os.PathLike], mode: str ) -> Generator[io.IOBase, None, None]: assert self.fs is not None - with self.fs.transaction: - with fsspec.open(str(path), mode) as stream: + # _open only supports binary mode + if "b" not in mode: + with self.create_stream(path, mode.replace("t", "") + "b") as stream: + yield io.TextIOWrapper(stream) + return + + # fsspec does not support concurrent transactions, so just + # manually handle commit/discard for each file. + # + # This is safe as long as you don't call `create_stream` on + # the same path concurrently + assert self.fs is not None + with self.fs._open(os.fspath(path), mode, autocommit=False) as stream: + try: yield stream + except: # noqa: B001,E722 + if stream.writable(): + stream.discard() + raise + else: + if stream.writable(): + stream.commit() def concat_path( self, path: Union[str, os.PathLike], suffix: str @@ -51,7 +72,7 @@ def rename( ) -> None: self.fs.rename(path, new_path) - def mkdir(self, path: [str, os.PathLike]) -> None: + def mkdir(self, path: Union[str, os.PathLike]) -> None: self.fs.makedirs(path, exist_ok=True) @classmethod From 68ef445c330d465d3d20f253583c2370c5df0139 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Wed, 6 Nov 2024 06:57:43 -0800 Subject: [PATCH 141/503] [MPS][Perf] Dispatch to SDP-math-mps for non-contig Tensors (#139791) As MacOS-15 or newer supports those out of the box. This significantly reduces memory requirements and improves performance for some stable diffision networks. Test plan: Run ```python from diffusers import StableDiffusionXLPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler import torch import time vae = AutoencoderKL.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", subfolder='vae', torch_dtype=torch.bfloat16, force_upcast=False).to('mps') pipe = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", vae=vae, torch_dtype=torch.bfloat16, variant="fp16").to('mps') pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config) start_time = time.time() start_mps_mem = torch.mps.driver_allocated_memory() image = pipe(prompt="Spherical cow in vacuum", num_inference_steps=10, guidance_scale=8, generator=torch.Generator("mps").manual_seed(42), ).images[0] end_mps_mem = torch.mps.driver_allocated_memory() run_time = time.time() - start_time print(f"run time in {run_time:.2f} sec, end_mps_mem {end_mps_mem/1024.0**2:.2f} Mb mem increase {(end_mps_mem-start_time)/1024.0**2:.2f} Mb") image.save(f'bfloat16.png') ``` Before the change total memory use were 16Gb and needed 65 sec to complete, after it drops down to 14Gb and takes 50 sec to finish on M2Pro, though generated image remains the same: ![image](https://github.com/user-attachments/assets/1a35efef-9f80-4cd0-ac9c-30203eab6bb1) Fixes https://github.com/pytorch/pytorch/issues/139389 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139791 Approved by: https://github.com/drisspg, https://github.com/Skylion007 ghstack dependencies: #139788, #139784, #139763 --- aten/src/ATen/native/mps/operations/Attention.mm | 4 ++-- aten/src/ATen/native/transformers/attention.cpp | 5 ++++- torch/testing/_internal/common_modules.py | 3 +++ 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/aten/src/ATen/native/mps/operations/Attention.mm b/aten/src/ATen/native/mps/operations/Attention.mm index d1bbbf4346419a..ddeafa9e848ba5 100644 --- a/aten/src/ATen/native/mps/operations/Attention.mm +++ b/aten/src/ATen/native/mps/operations/Attention.mm @@ -27,13 +27,14 @@ bool is_causal, const std::optional& dropout_mask, std::optional scale) { + const auto macOS15_0_plus = is_macos_13_or_newer(MacOSVersion::MACOS_VER_15_0_PLUS); if (is_causal) { TORCH_CHECK(!attn_mask.has_value(), "_scaled_dot_product_attention: Explicit attn_mask should not be set when is_causal=True"); } TORCH_CHECK(dropout_p == 0.0, "_scaled_dot_product_attention_math_for_mps: dropout_p != 0.0 is not supported"); - TORCH_CHECK(query.is_contiguous() && key.is_contiguous() && value.is_contiguous(), + TORCH_CHECK(macOS15_0_plus || (query.is_contiguous() && key.is_contiguous() && value.is_contiguous()), "_scaled_dot_product_attention_math_for_mps: query, key, and value must be contiguous"); TORCH_CHECK(!query.is_nested() && !key.is_nested() && !value.is_nested(), "_scaled_dot_product_attention_math_for_mps: query, key, and value must not be nested"); @@ -68,7 +69,6 @@ auto maskedMM = [mpsGraph matrixMultiplicationWithPrimaryTensor:qTensor secondaryTensor:kT name:nil]; - bool macOS15_0_plus = is_macos_13_or_newer(MacOSVersion::MACOS_VER_15_0_PLUS); if (macOS15_0_plus && [maskedMM dataType] == MPSDataTypeFloat32) { // TODO: In MacOS15 beta, there is a MPSGraph issue when the SDPA sequence gets remapped to use // an improved kernel for the computation, causing NaNs in the result. This identity prevents the remapping. diff --git a/aten/src/ATen/native/transformers/attention.cpp b/aten/src/ATen/native/transformers/attention.cpp index 617b6d99da3419..120f4cba375a36 100644 --- a/aten/src/ATen/native/transformers/attention.cpp +++ b/aten/src/ATen/native/transformers/attention.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -752,12 +753,13 @@ Tensor scaled_dot_product_attention( return std::get<0>(out_lse_softmax); } case SDPBackend::math: { +#ifdef USE_MPS const auto any_nested = query_.is_nested() || key.is_nested() || value.is_nested(); const bool any_inputs_require_grad = query_.requires_grad() || key.requires_grad() || value.requires_grad(); const auto all_contiguous = query_.is_contiguous() && key.is_contiguous() && value.is_contiguous(); if (query_device_type == DeviceType::MPS && dropout_p == 0.0 && !(GradMode::is_enabled() && any_inputs_require_grad) - && all_contiguous + && (all_contiguous || mps::is_macos_13_or_newer(mps::MacOSVersion::MACOS_VER_15_0_PLUS)) && !any_nested) { return std::get<0>(at::_scaled_dot_product_attention_math_for_mps( query_, @@ -769,6 +771,7 @@ Tensor scaled_dot_product_attention( std::nullopt, /*dropout_mask*/ scale)); } +#endif return std::get<0>(at::_scaled_dot_product_attention_math( query_, key, diff --git a/torch/testing/_internal/common_modules.py b/torch/testing/_internal/common_modules.py index 69dabb07c1c29c..3675d3468895c8 100644 --- a/torch/testing/_internal/common_modules.py +++ b/torch/testing/_internal/common_modules.py @@ -4140,6 +4140,9 @@ def module_error_inputs_torch_nn_Pad3d(module_info, device, dtype, requires_grad DecorateInfo(toleranceOverride({torch.float32: tol(atol=1e-4, rtol=1e-4)}), 'TestModule', 'test_non_contiguous_tensors', device_type='cpu', active_if=IS_WINDOWS), + DecorateInfo(toleranceOverride({torch.float16: tol(atol=1e-4, rtol=2e-3)}), + 'TestModule', 'test_forward', + device_type='mps'), # Not implemented for SDPA backward derivative DecorateInfo(unittest.skip("Skipped!"), 'TestModule', 'test_gradgrad', device_type='cpu'), From e05a096c490f5058d2f83b5f3b10e11ab46eb55f Mon Sep 17 00:00:00 2001 From: "Edward Z. Yang" Date: Tue, 5 Nov 2024 20:24:50 -0800 Subject: [PATCH 142/503] Ignore polyfill when reporting user backtraces in summarized form (#139850) Fixes https://github.com/pytorch/pytorch/issues/139316 Signed-off-by: Edward Z. Yang Pull Request resolved: https://github.com/pytorch/pytorch/pull/139850 Approved by: https://github.com/bobrenjc93 --- test/dynamo/test_logging.py | 16 ++++++++++++++++ torch/_dynamo/guards.py | 10 +++++++--- torch/fx/experimental/symbolic_shapes.py | 11 +++++++++-- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/test/dynamo/test_logging.py b/test/dynamo/test_logging.py index 9ef49da2037fdd..35c1f916d2b218 100644 --- a/test/dynamo/test_logging.py +++ b/test/dynamo/test_logging.py @@ -672,6 +672,22 @@ def f(x, y, z): +- LAMBDA_GUARD: 2 <= L['z'].size()[0] # return x + torch.cat([y, z]) # #:# in # (user code shown is first use of this value--the guard itself is not due user code but due to 0/1 specialization in the framework; to avoid specialization try torch._dynamo.mark_unbacked(tensor, dim))""", # noqa: B950 ) + @make_logging_test(guards=True) + def test_guards_polyfill_sloc(self, records): + @torch.compile(dynamic=True, backend="eager") + def f(x, y): + return any([x.size(0) == y.size(0) * 2]) + + f(torch.randn(6), torch.randn(3)) + + record = self.getRecord(records, "TREE_GUARD_MANAGER") + self.assertExpectedInline( + munge_shape_guards(record.getMessage()), + """\ ++- LAMBDA_GUARD: L['x'].size()[0] == 2*L['y'].size()[0] # return any([x.size(0) == y.size(0) * 2]) # #:# in # #:# in # ++- LAMBDA_GUARD: 2 <= L['y'].size()[0] # return any([x.size(0) == y.size(0) * 2]) # #:# in # (user code shown is first use of this value--the guard itself is not due user code but due to 0/1 specialization in the framework; to avoid specialization try torch._dynamo.mark_unbacked(tensor, dim))""", # noqa: B950 + ) + @make_logging_test(guards=True) def test_guards_sloc_vr(self, records): @torch.compile(dynamic=True, backend="eager") diff --git a/torch/_dynamo/guards.py b/torch/_dynamo/guards.py index 26e6d01b5bddf3..f0e623c8c9aafe 100644 --- a/torch/_dynamo/guards.py +++ b/torch/_dynamo/guards.py @@ -305,10 +305,14 @@ def from_numpy(a): @functools.lru_cache(None) def uninteresting_files(): import torch._dynamo.external_utils + import torch._dynamo.polyfills + + mods = [torch._dynamo.external_utils, torch._dynamo.polyfills] + + from torch._dynamo.polyfills.loader import POLYFILLED_MODULES + + mods.extend(POLYFILLED_MODULES) - mods = [ - torch._dynamo.external_utils, - ] return {inspect.getfile(m) for m in mods} diff --git a/torch/fx/experimental/symbolic_shapes.py b/torch/fx/experimental/symbolic_shapes.py index 2cf0e6ecab6455..614ea57e8b6bd8 100644 --- a/torch/fx/experimental/symbolic_shapes.py +++ b/torch/fx/experimental/symbolic_shapes.py @@ -302,7 +302,11 @@ def uninteresting_files() -> Set[str]: torch._subclasses.meta_utils, torch._subclasses.fake_tensor, ] - return {inspect.getfile(m) for m in mods} + import torch._dynamo.guards + + return { + inspect.getfile(m) for m in mods + } | torch._dynamo.guards.uninteresting_files() class ConstraintViolationError(RuntimeError): @@ -6042,7 +6046,10 @@ def _get_stack_summary( maybe_user_loc = None user_tb = TracingContext.extract_stack() if user_tb: - maybe_user_loc = format_frame(user_tb[-1], line=True) + idx = len(user_tb) - 1 + while idx > 0 and user_tb[idx].filename in uninteresting_files(): + idx -= 1 + maybe_user_loc = format_frame(user_tb[idx], line=True) maybe_extra_debug = "" if is_debug and user_tb: From dd6a5de00da19842e792a9f2090cd1f51a34bc3d Mon Sep 17 00:00:00 2001 From: James Wu Date: Wed, 6 Nov 2024 16:34:00 +0000 Subject: [PATCH 143/503] Allow OpOverloadPackets as safe torch functions, sanitize dynamo gm before running aotdispatch with cache (#139785) Summary: This diff implements two things to improve cache hit rates after testing AOTAutogradCache with internal cogwheel jobs: - We should allow torch functions that are OpOverloadPackets - When running with cache, there are some fields that dynamo puts into the input graph module to aotdispatch that are not stable between runs. We use a context manager to null these out so that they can't be used to affect the output of AOTAutograd, and then we put the fields back onto the gm before returning from AOTAutogradCache.load(). Test Plan: New unit tests + running nanogpt with AOTAutogradCache. Meta: Run on a long running job Cache miss: {F1953831996} Cache hit: {F1953830872} Servicelabs here: https://www.internalfb.com/servicelab/experiment/4301352991/ Cache hit: https://interncache-all.fbcdn.net/manifold/tlparse_reports/tree/logs/f660597709-TrainingApplication/attempt_0/version_0/rank_0/index.html Cache miss: https://interncache-all.fbcdn.net/manifold/tlparse_reports/tree/logs/f660569960-TrainingApplication/attempt_0/version_0/rank_0/index.html We can see that with these changes, autograd cache hits and saves compile time: https://fburl.com/scuba/pt2_compile_events/ycddxstd Differential Revision: D65436373 Pull Request resolved: https://github.com/pytorch/pytorch/pull/139785 Approved by: https://github.com/bdhirsh --- test/dynamo/test_aot_autograd_cache.py | 26 ++ .../_aot_autograd/autograd_cache.py | 225 ++++++++++-------- 2 files changed, 157 insertions(+), 94 deletions(-) diff --git a/test/dynamo/test_aot_autograd_cache.py b/test/dynamo/test_aot_autograd_cache.py index c8a72c839b9d60..27804f53f905bc 100644 --- a/test/dynamo/test_aot_autograd_cache.py +++ b/test/dynamo/test_aot_autograd_cache.py @@ -14,6 +14,7 @@ AOTAutogradCache, autograd_cache_key, BypassAOTAutogradCache, + sanitize_gm_for_cache, ) from torch._functorch._aot_autograd.schemas import AOTConfig from torch._inductor import config as inductor_config @@ -776,6 +777,31 @@ def fn(x): config = self.default_config() self.gen_cache_key(fn, config) + def test_sanitize_gm_for_cache(self): + def fn(x): + y = torch.sin(x) + z = torch.cos(x) + w = y + z + w.abs() + return w + + _, fx_g, example_inputs = self._get_dynamo_output(fn, torch.ones(3)) + fx_g.meta = {"foo": "bar"} + fx_g.compile_subgraph_reason = "Blah" + config = self.default_config() + with sanitize_gm_for_cache(fx_g): + c1 = autograd_cache_key(fx_g, example_inputs, config, {}) + c3 = autograd_cache_key(fx_g, example_inputs, config, {}) + + fx_g.meta = {"foo": "baz"} + fx_g.compile_subgraph_reason = None + with sanitize_gm_for_cache(fx_g): + c2 = autograd_cache_key(fx_g, example_inputs, config, {}) + c4 = autograd_cache_key(fx_g, example_inputs, config, {}) + + self.assertEqual(c1, c2) + self.assertNotEqual(c3, c4) + if __name__ == "__main__": from torch._dynamo.test_case import run_tests diff --git a/torch/_functorch/_aot_autograd/autograd_cache.py b/torch/_functorch/_aot_autograd/autograd_cache.py index 4a3f7dd578cbcc..bb6740882937a6 100644 --- a/torch/_functorch/_aot_autograd/autograd_cache.py +++ b/torch/_functorch/_aot_autograd/autograd_cache.py @@ -5,6 +5,7 @@ from __future__ import annotations import base64 +import contextlib import functools import json import logging @@ -125,7 +126,7 @@ def is_public_torch_api(target): ) def is_torch_function(target): - if isinstance(target, torch._ops.OpOverload): + if isinstance(target, (torch._ops.OpOverload, torch._ops.OpOverloadPacket)): return True if is_public_torch_api(target): return True @@ -324,7 +325,7 @@ def load(self, example_inputs, fx_config: _CompileFxKwargs) -> CompiledFxGraph: torch._logging.trace_structured( "artifact", metadata_fn=lambda: { - "name": "fx_graph_cache_hash", + "name": "fx_graph_cache_hit", # always a hit "encoding": "json", }, payload_fn=lambda: json.dumps(cache_info), @@ -508,6 +509,36 @@ def wrap_post_compile( return compiled_function +@contextlib.contextmanager +def sanitize_gm_for_cache(gm: torch.fx.GraphModule): + """ + Clears a few fields in a dynamo supplied Graph Module that are not stable between graph inputs, but don't + affect inductor or aotdispatch correctness. + + These fields **can** be used by code calling into aotdispatch (namely, dynamo), so we can't null them out completely. + + To ensure that these fields are not accessed by inductor or aotdispatch, we clear them during AOTAutogradCache.load, + and then put them back before returning. This way, we generate a cache key based off of a canonical graph + without these fields, and also guarantee they aren't used to affect the cache's output. + """ + IGNORED_FIELDS = ( + "meta", # metadata used by export + "compile_subgraph_reason", # Used by dynamo only for logging, no change in inductor/autograd behavior + "_param_name_to_source", # Encapsulated by aot_config.aot_autograd_arg_pos_to_source + ) + saved_fields = {} + for field in IGNORED_FIELDS: + saved_fields[field] = getattr(gm, field, None) + # Clear the field + setattr(gm, field, None) + try: + yield + finally: + # Put the fields back after dispatch_and_compile is complete + for field, value in saved_fields.items(): + setattr(gm, field, value) + + class AOTAutogradCache: """ Caches the results of running AOTAutograd. This class mostly handles the save and load logic, whereas @@ -566,107 +597,113 @@ def load( Load a result from the cache, and reconstruct a runtime wrapper around the object """ gm = mod.gm if isinstance(mod, torch._dynamo.utils.GmWrapper) else mod - compiled_fn = None - cache_info: Dict[str, Any] = {} - cache_key = None - debug_lines: List[str] = [] - cache_event_time = time.time_ns() - cache_state = None - fx_config: _CompileFxKwargs = {"cudagraphs": cudagraphs} - try: - cache_key, debug_lines = autograd_cache_key(gm, args, aot_config, fx_config) - entry: Optional[AOTAutogradCacheEntry] = AOTAutogradCache._lookup( - cache_key, local, remote - ) - if entry is not None: - compiled_fn = entry.wrap_post_compile(args, aot_config, fx_config) - log.info("AOTAutograd cache hit for key %s", cache_key) - counters["aot_autograd"]["autograd_cache_hit"] += 1 - cache_state = "hit" - cache_event_time = time.time_ns() - forward_time_saved = entry.forward_time_taken_ns // 1e6 - backward_time_saved = entry.backward_time_taken_ns // 1e6 - cache_info.update( - { - "forward_time_saved_ms": forward_time_saved, - "backward_time_saved_ms": backward_time_saved, - "time_saved_ms": forward_time_saved + backward_time_saved, - } + with sanitize_gm_for_cache(gm): + compiled_fn = None + cache_info: Dict[str, Any] = {} + cache_key = None + debug_lines: List[str] = [] + cache_event_time = time.time_ns() + cache_state = None + fx_config: _CompileFxKwargs = {"cudagraphs": cudagraphs} + try: + cache_key, debug_lines = autograd_cache_key( + gm, args, aot_config, fx_config ) - time_saved_ns = ( - entry.forward_time_taken_ns + entry.backward_time_taken_ns + entry: Optional[AOTAutogradCacheEntry] = AOTAutogradCache._lookup( + cache_key, local, remote ) - # TODO: should we use the same field for remote cache time saved for both - # FXGraphCache and AOTAutogradCache? - # add_remote_cache_time_saved(time_saved_ns, is_backward=False) - if ( - ephemeral_increase := add_ephemeral_timeout_increase_for_distributed( - time_saved_ns + if entry is not None: + compiled_fn = entry.wrap_post_compile(args, aot_config, fx_config) + log.info("AOTAutograd cache hit for key %s", cache_key) + counters["aot_autograd"]["autograd_cache_hit"] += 1 + cache_state = "hit" + cache_event_time = time.time_ns() + forward_time_saved = entry.forward_time_taken_ns // 1e6 + backward_time_saved = entry.backward_time_taken_ns // 1e6 + cache_info.update( + { + "forward_time_saved_ms": forward_time_saved, + "backward_time_saved_ms": backward_time_saved, + "time_saved_ms": forward_time_saved + backward_time_saved, + } ) - ) != 0: - cache_info["ephemeral_timeout_increase"] = ephemeral_increase - - if compiled_fn is None: - log.info("AOTAutograd cache miss for key %s", cache_key) + time_saved_ns = ( + entry.forward_time_taken_ns + entry.backward_time_taken_ns + ) + # TODO: should we use the same field for remote cache time saved for both + # FXGraphCache and AOTAutogradCache? + # add_remote_cache_time_saved(time_saved_ns, is_backward=False) + if ( + ephemeral_increase := add_ephemeral_timeout_increase_for_distributed( + time_saved_ns + ) + ) != 0: + cache_info["ephemeral_timeout_increase"] = ephemeral_increase + + if compiled_fn is None: + log.info("AOTAutograd cache miss for key %s", cache_key) + counters["aot_autograd"]["autograd_cache_miss"] += 1 + cache_state = "miss" + cache_event_time = time.time_ns() + # Count missing the FXGraphCache as a miss not a bypass + except FXGraphCacheMiss as e: counters["aot_autograd"]["autograd_cache_miss"] += 1 + # Special counter when we pass autograd cache but + # fail when on inductor guards + counters["aot_autograd"]["autograd_cache_guard_miss"] += 1 cache_state = "miss" + if config.strict_autograd_cache: + raise e + except BypassAOTAutogradCache as e: + cache_key = None + counters["aot_autograd"]["autograd_cache_bypass"] += 1 + cache_state = "bypass" cache_event_time = time.time_ns() - # Count missing the FXGraphCache as a miss not a bypass - except FXGraphCacheMiss as e: - counters["aot_autograd"]["autograd_cache_miss"] += 1 - # Special counter when we pass autograd cache but - # fail when on inductor guards - counters["aot_autograd"]["autograd_cache_guard_miss"] += 1 - if config.strict_autograd_cache: - raise e - except BypassAOTAutogradCache as e: - cache_key = None - counters["aot_autograd"]["autograd_cache_bypass"] += 1 - cache_state = "bypass" - cache_event_time = time.time_ns() - cache_info["cache_bypass_reason"] = str(e) - if remote: - log_cache_bypass("bypass_aot_autograd", str(e)) - if config.strict_autograd_cache: - raise e - if compiled_fn is None: - # Set the cache key so we can save a cache result later - if cache_key is not None: - aot_config.cache_info = AOTAutogradCacheInfo(cache_key, time.time_ns()) - compiled_fn = dispatch_and_compile() + cache_info["cache_bypass_reason"] = str(e) + if remote: + log_cache_bypass("bypass_aot_autograd", str(e)) + if config.strict_autograd_cache: + raise e + if compiled_fn is None: + # Set the cache key so we can save a cache result later + if cache_key is not None: + aot_config.cache_info = AOTAutogradCacheInfo( + cache_key, time.time_ns() + ) + compiled_fn = dispatch_and_compile() - cache_info.update( - { - "key": cache_key, - "cache_state": cache_state, - "components": debug_lines, - } - ) - chromium_log = get_chromium_event_logger() - chromium_log.log_instant_event( - f"autograd_cache_{cache_state}", cache_event_time, metadata=cache_info - ) + cache_info.update( + { + "key": cache_key, + "cache_state": cache_state, + "components": debug_lines, + } + ) + chromium_log = get_chromium_event_logger() + chromium_log.log_instant_event( + f"autograd_cache_{cache_state}", cache_event_time, metadata=cache_info + ) - chromium_log.add_event_data( - "backend_compile", - cache_state=cache_state, - cache_event_time=cache_event_time, - key=cache_info.get("key"), - components=cache_info.get("components"), - cache_bypass_reason=cache_info.get("cache_bypass_reason"), - remote_cache_enabled=remote, - local_cache_enabled=local, - ) + chromium_log.add_event_data( + "backend_compile", + cache_state=cache_state, + cache_event_time=cache_event_time, + key=cache_info.get("key"), + components=cache_info.get("components"), + cache_bypass_reason=cache_info.get("cache_bypass_reason"), + remote_cache_enabled=remote, + local_cache_enabled=local, + ) - torch._logging.trace_structured( - "artifact", - metadata_fn=lambda: { - "name": "aotautograd_cache_hash", - "encoding": "json", - }, - payload_fn=lambda: json.dumps(cache_info), - ) - return compiled_fn + torch._logging.trace_structured( + "artifact", + metadata_fn=lambda: { + "name": "aotautograd_cache_hash", + "encoding": "json", + }, + payload_fn=lambda: json.dumps(cache_info), + ) + return compiled_fn @staticmethod def _get_tmp_dir() -> str: From 6bdbc8655073840433612edfca04a25553e8f893 Mon Sep 17 00:00:00 2001 From: Bin Bao Date: Tue, 5 Nov 2024 19:54:49 -0800 Subject: [PATCH 144/503] [AOTI] Fix a cubin file path issue (#139848) Summary: When we use aoti_compile_and_package to package the AOTI compiled artifacts, cubin files will be included, and at the deploy time, we should setup the cubin file directory to the right path that contains unziped cubin files. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139848 Approved by: https://github.com/aakhundov --- test/inductor/test_aot_inductor_package.py | 39 +++++++++++++++++++ .../aoti_package/model_package_loader.cpp | 15 +++++-- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/test/inductor/test_aot_inductor_package.py b/test/inductor/test_aot_inductor_package.py index f0953c44440567..4e2e686ecbd478 100644 --- a/test/inductor/test_aot_inductor_package.py +++ b/test/inductor/test_aot_inductor_package.py @@ -9,6 +9,7 @@ import torch from torch._inductor.package import AOTICompiledModel, load_package, package_aoti from torch._inductor.test_case import TestCase +from torch._inductor.utils import fresh_inductor_cache from torch.export import Dim from torch.testing._internal.common_utils import IS_FBCODE from torch.testing._internal.triton_utils import HAS_CUDA @@ -107,6 +108,44 @@ def forward(self, x, y): ) self.check_model(Model(), example_inputs) + def test_remove_intermediate_files(self): + # For CUDA, generated cpp files contain absolute path to the generated cubin files. + # With the package artifact, that cubin path should be overriden at the run time, + # so removing those intermeidate files in this test to verify that. + class Model(torch.nn.Module): + def forward(self, x, y): + return x + y + + example_inputs = ( + torch.randn(10, 10, device=self.device), + torch.randn(10, 10, device=self.device), + ) + model = Model() + with torch.no_grad(): + torch.manual_seed(0) + model = model.to(self.device) + ref_model = copy.deepcopy(model) + ref_inputs = copy.deepcopy(example_inputs) + expected = ref_model(*ref_inputs) + + torch.manual_seed(0) + with tempfile.NamedTemporaryFile(suffix=".pt2") as f: + ep = torch.export.export( + model, + example_inputs, + ) + with fresh_inductor_cache(): + # cubin files are removed when exiting this context + package_path = torch._inductor.aoti_compile_and_package( + ep, + example_inputs, + package_path=f.name, + ) # type: ignore[arg-type] + loaded = torch._inductor.aoti_load_package(package_path) + actual = loaded(*example_inputs) + + self.assertEqual(actual, expected) + def test_linear(self): class Model(torch.nn.Module): def __init__(self) -> None: diff --git a/torch/csrc/inductor/aoti_package/model_package_loader.cpp b/torch/csrc/inductor/aoti_package/model_package_loader.cpp index 5443e729d15204..a8fd4b0da8c19b 100644 --- a/torch/csrc/inductor/aoti_package/model_package_loader.cpp +++ b/torch/csrc/inductor/aoti_package/model_package_loader.cpp @@ -54,6 +54,13 @@ std::string create_temp_dir() { return temp_dir; #endif } + +#ifdef _WIN32 +const std::string k_separator = "\\"; +#else +const std::string k_separator = "/"; +#endif + } // namespace namespace torch::inductor { @@ -286,6 +293,8 @@ AOTIModelPackageLoader::AOTIModelPackageLoader( std::string cpp_filename = ""; std::string consts_filename = ""; std::string found_filenames = ""; // Saving for bookkeeping + std::string model_directory = + "data" + k_separator + "aotinductor" + k_separator + model_name; for (uint32_t i = 0; i < zip_archive.m_total_files; i++) { uint32_t filename_len = @@ -303,11 +312,10 @@ AOTIModelPackageLoader::AOTIModelPackageLoader( found_filenames += " "; // Only compile files in the specified model directory - std::string model_directory = "data/aotinductor/" + model_name; if (filename_str.length() >= model_directory.length() && filename_str.substr(0, model_directory.length()) == model_directory) { std::string output_path_str = temp_dir; - output_path_str += "/"; + output_path_str += k_separator; output_path_str += filename_str; // Create the parent directory if it doesn't exist @@ -378,7 +386,8 @@ AOTIModelPackageLoader::AOTIModelPackageLoader( throw std::runtime_error("Unsupported device found: " + device); } - runner_ = registered_aoti_runner[device](so_path, 1, device, ""); + std::string cubin_dir = temp_dir + k_separator + model_directory; + runner_ = registered_aoti_runner[device](so_path, 1, device, cubin_dir); std::remove(temp_dir.c_str()); } From 8d983aaf6808da26503fa4fa852f6cf18a100e70 Mon Sep 17 00:00:00 2001 From: atalman Date: Wed, 6 Nov 2024 17:14:27 +0000 Subject: [PATCH 145/503] Add conda install to Manylinux 2_28 images (#139894) This way we can use these images instead of conda-build images for all workflows in test-infra. Please note: - I am using existing conda install script, thats alredy used in https://github.com/pytorch/pytorch/blob/main/.ci/docker/conda/Dockerfile#L47 - PR with update to miniforge will be posted as followup Pull Request resolved: https://github.com/pytorch/pytorch/pull/139894 Approved by: https://github.com/Skylion007, https://github.com/seemethere --- .ci/docker/manywheel/Dockerfile_2_28 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.ci/docker/manywheel/Dockerfile_2_28 b/.ci/docker/manywheel/Dockerfile_2_28 index 2e2998c6770ed3..fd31f3b43f3f7a 100644 --- a/.ci/docker/manywheel/Dockerfile_2_28 +++ b/.ci/docker/manywheel/Dockerfile_2_28 @@ -117,6 +117,9 @@ COPY --from=jni /usr/local/include/jni.h /usr/local/ FROM common as cpu_final ARG BASE_CUDA_VERSION=11.8 ARG DEVTOOLSET_VERSION=11 +# Install Anaconda +ADD ./common/install_conda_docker.sh install_conda.sh +RUN bash ./install_conda.sh && rm install_conda.sh # Ensure the expected devtoolset is used ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH @@ -138,6 +141,7 @@ COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BAS RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda ENV PATH=/usr/local/cuda/bin:$PATH + FROM common as rocm_final ARG ROCM_VERSION=3.7 # Install ROCm From d6034016e2790c397d79b7efbcf45e95d9aeb820 Mon Sep 17 00:00:00 2001 From: Huy Do Date: Wed, 6 Nov 2024 17:21:39 +0000 Subject: [PATCH 146/503] Run slow jobs in trunk commits (#139842) Per our discussion in https://fburl.com/gdoc/voce5o06, we will run slow jobs more frequently on all trunk commits. Note that slowgradcheck jobs are moved to periodic as they are not about running slow tests. There are currently 3 GPU + 2 ROCm + some CPU `linux.4xlarge` runners running slow jobs. So, I don't expect to see a big increase in CI cost after this. Also, these slow jobs will only run in trunk commits, not in PRs, so their duration won't affect PR TTS. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139842 Approved by: https://github.com/clee2000 --- .github/workflows/periodic.yml | 33 ++++++++++++++++++++++++++ .github/workflows/slow.yml | 43 ++++------------------------------ 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/.github/workflows/periodic.yml b/.github/workflows/periodic.yml index 1063df69be5d65..31a233aae513f4 100644 --- a/.github/workflows/periodic.yml +++ b/.github/workflows/periodic.yml @@ -290,3 +290,36 @@ jobs: build-environment: linux-focal-cuda11.8-py3.10-gcc9-experimental-split-build docker-image: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.docker-image }} test-matrix: ${{ needs.linux-focal-cuda11_8-py3_10-gcc9-experimental-split-build.outputs.test-matrix }} + + linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build: + name: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck + uses: ./.github/workflows/_linux-build.yml + needs: get-label-type + with: + runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" + build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck + docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9 + cuda-arch-list: 8.6 + test-matrix: | + { include: [ + { config: "default", shard: 1, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + { config: "default", shard: 2, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + { config: "default", shard: 3, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + { config: "default", shard: 4, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + { config: "default", shard: 5, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + { config: "default", shard: 6, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + { config: "default", shard: 7, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + { config: "default", shard: 8, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, + ]} + + linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-test: + name: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck + uses: ./.github/workflows/_linux-test.yml + needs: + - linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build + - target-determination + with: + build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck + docker-image: ${{ needs.linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build.outputs.docker-image }} + test-matrix: ${{ needs.linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build.outputs.test-matrix }} + timeout-minutes: 300 diff --git a/.github/workflows/slow.yml b/.github/workflows/slow.yml index 5aeaf0f13a8c17..cf7599063111e5 100644 --- a/.github/workflows/slow.yml +++ b/.github/workflows/slow.yml @@ -4,14 +4,14 @@ name: slow on: - schedule: - - cron: 45 0,4,8,12,16,20 * * * - - cron: 29 8 * * * # about 1:29am PDT, for mem leak check and rerun disabled tests push: - tags: - - ciflow/slow/* branches: + - main - release/* + tags: + - ciflow/slow/* + schedule: + - cron: 29 8 * * * # about 1:29am PDT, for mem leak check and rerun disabled tests workflow_dispatch: concurrency: @@ -47,39 +47,6 @@ jobs: curr_branch: ${{ github.head_ref || github.ref_name }} curr_ref_type: ${{ github.ref_type }} - linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build: - name: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck - uses: ./.github/workflows/_linux-build.yml - needs: get-label-type - with: - runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" - build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck - docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9 - cuda-arch-list: 8.6 - test-matrix: | - { include: [ - { config: "default", shard: 1, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, - { config: "default", shard: 2, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, - { config: "default", shard: 3, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, - { config: "default", shard: 4, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, - { config: "default", shard: 5, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, - { config: "default", shard: 6, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, - { config: "default", shard: 7, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, - { config: "default", shard: 8, num_shards: 8, runner: "linux.g5.4xlarge.nvidia.gpu", owners: ["module:slowgradcheck"] }, - ]} - - linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-test: - name: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck - uses: ./.github/workflows/_linux-test.yml - needs: - - linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build - - target-determination - with: - build-environment: linux-focal-cuda12.1-py3-gcc9-slow-gradcheck - docker-image: ${{ needs.linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build.outputs.docker-image }} - test-matrix: ${{ needs.linux-focal-cuda12_1-py3-gcc9-slow-gradcheck-build.outputs.test-matrix }} - timeout-minutes: 300 - linux-focal-cuda12_1-py3_10-gcc9-sm86-build: name: linux-focal-cuda12.1-py3.10-gcc9-sm86 uses: ./.github/workflows/_linux-build.yml From 99deedff57feca48af8a364e49325c99acc0a541 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 6 Nov 2024 17:29:08 +0000 Subject: [PATCH 147/503] [ONNX] Describe memory usage of TorchDynamo-based exporter. (#139388) Add a new documentation to show one memory usage benefit brought by TorchDynamo-based ONNX exporter. Also add a unit test to make sure TorchDynamo-based ONNX exporter works well under FakeTensorMode. Pull Request resolved: https://github.com/pytorch/pytorch/pull/139388 Approved by: https://github.com/xadupre --- .../torch_dynamo_exporter_memory_usage.png | Bin 0 -> 1263559 bytes .../torch_script_exporter_memory_usage.png | Bin 0 -> 1389450 bytes docs/source/onnx_dynamo.rst | 8 ++ docs/source/onnx_dynamo_memory_usage.rst | 111 ++++++++++++++++++ 4 files changed, 119 insertions(+) create mode 100644 docs/source/_static/img/onnx/torch_dynamo_exporter_memory_usage.png create mode 100644 docs/source/_static/img/onnx/torch_script_exporter_memory_usage.png create mode 100644 docs/source/onnx_dynamo_memory_usage.rst diff --git a/docs/source/_static/img/onnx/torch_dynamo_exporter_memory_usage.png b/docs/source/_static/img/onnx/torch_dynamo_exporter_memory_usage.png new file mode 100644 index 0000000000000000000000000000000000000000..52701155a0c853aad8fdc2e2befb9494cbb68127 GIT binary patch literal 1263559 zcmZU)bx>Px&^8REKq(a3Qe0As6?Y5P7IzKqT2kB{T3Vzy6nBRNch^F25AN>4Ed<{F z-sgS3f4-eLb7s%nGIQ?PYxmmSJ4{ts=G9B`muP5cujFJU)zQ!}@zBt)nqOf3`x0u( zck}Os?xHU91+9FPa{r(5-1@W9XEd~`Xxs-=%zrwLqpYqA8roae{~q)eD}W~&+Eam? zo<<`0Jr56|Mv?MW6#WNa3w!h42T3U@ z)4@MX{ey$&QK&<={Ygx>gXuH>|MtzD6twG9{qajqPOezJ;I{|fn>Q%KGNU$f_ZF%$ zy}I)#ntyB5+b=_&a&p}cXDa7ySE>Tq=H_(#SW7|B#Q(;>d6O`0gAweqJ1X_*6Powc z!8Vl(w=q_)sD>`F+5=&difF^1az%n~iMB(W#zr>22#fUC*ay+8=5*EyQZJ#blA2g+ z(>oXTV_;Hmla6#|_4MLA^l*x0^*ph*T%l&#r3Ll-#)AoFEH(3#Vx9Tf(^D`r;L!^) z=c2GMGC%jM{d!O1fm^<# zP5v}{gY3w}2Kb($M^Os>CNc6vH#Id~IkVQ5t4H3RZBFD$;qp4HEC~nycfRrm=}SG8 z7UEle%Yi=zr?Ec*q3UOrYK<=HiOOL`A}=*%?{<@m99QU11R(%~CFtIp4OKmw?`xg# zZb-j2en{sxcM_+K?!xK*yEJR2-v}4`#fgBa`2`Z+rhDVDGcj!wul5^y=I$nzqKZi3 zQu56phv|KJmxcYix9RM5rre1A8(Laz~_%97nv?%{HPnj zBz@5B>y^dKQ5LGkC=hA31KOlRi^$EdMqDh zHTifZ!Y7;5Re-fRcB0`X9j@i-op+J+1Z&kV7zPDV;9m_C1jjv0C-FR3P&c|vE8>f0 z4I{D|<`iEh9P;7{y9MT1xT!b49~kh;G_d~lj;G{QB(4UJTkI)6_goCVpTtlLz*RRs+Gw`L4Vug zD%thq){mr|FiOSK_hS%OyZIzusj|EeXtQW0KU6Dl+C5N70iPyaITSdyk%(O1zw5Au z)WxGJtZ+ejRn|&B( zvAKfpV~=KjiCW*77QOL=Z$tq}s-|S~gA|8N%(mwFZfw9E{Zm(I^Dl7W!v(SGmMmX$ za`n({cf2yYoZKLTjjtg_h40N{e!PaAxu_idxgGAR$lbt$)Hcl~T^SFp!m+sc24pD2#KX6F#X4Ni%!vA& zdHp(fD(u;Douh=BnSgyKmA1iejt;7zO>mFg$IUIxFKk_T@_+pBI+RJnmHn{IuVyeF zw-4OnwfX3%MIx}vJIIXv1n|$8C8+3_f3U(DN^=jk5vBRtZ1x>&VV9f8(~Y3nT)URh zWDyVHY81Al#N$0n;&WUAMCZmGZY9{`H~ISKovyORoo0&DbD!09Px%!!J%GX%{MOUE z{guDG(CuBgwrY|3M=;-Es9)Hx=)eLC0NtIkFb@ z?R0HfU^Aen{YxC(1Wv-UtG%I)Tgw-Iw&R%RuQ2cW-enB!Ulj4s%Y21nrQ6K4m|~u^ zJ~B_27~;0w|sy;>z}!^p6{HVMJ0IACCHRPSg_~A7-KRInH;r(hIiS5KUgN!aZPS*vv>`V=F!q` z<{O+FHETR_-5=p^Mlc(boBK;iA9M-8-y2W zA3Piy>#(@`EQah$QS7^I?Es%*vR(4u>^Tp_2xP^0fSc_BedoU0NnLAd2*3S&gFa$e z73IIvavf$HTP6Lxx)*lBOdYjSat>RY`sI&l9aIM$SPi_`2;@{erRCxfcfK!XlBxQX z1#-yKrUy&6d?NUKA$pi*v(1aCz125MkwNbGe5cH_p(cXw{5qj088+u^1Wu>pO0lq~ z#lC|MLF%Jh1)$>P!<6|2@|EMcMPzhitxI%mnJT+F@8+>1+^)xzh2zX1ltmNFKvCu6zP@)RNhwSmO*Ewafre?eW%+^!JZTZf z1hIqoeOe*Af~*MDRGl}Z+@1_M#KfG;EKY2WO38a;O;*F5mzWD~lC5mX){%7k7xl6J zMT1}>!Lq_b)ZG@pwW|4*m_unBhm1Ks&0PsMM97k0@`9XD zQwh?nImD_;oslF_#N#|{UZpc$>_jVq9Om)7p0_@md!qg{>pr4J!+JP~Dy_OR7 z?}rD$8)fN=3TX_FC8R4y1W)!JF7LNi`Kv*Z>O-z=OYsXu(?L#nvAo>IMopG_H%f?O$@iz)A1l7b?R3g#R!crT>S< z1p;uh`>DtDRPYb+?ZP2PQ5CvM-WqXei|zi=!&jacJ(Wzb&t++GIE8&HJa4>{R8grCAPEjq>)o!tF4NVPHZJ`1N(#| zTB){t=(%^ywqfBr4f!o6!Rh8={0t8=??@1;m^mmlzdBZg(wm~i0%O~vc%#HyBdEtt5o?;fs!msjK}M^c4ey^x>)IP-cx7nXG5=b*h7fJ# zAJ!-4hz-^!aUXV2C#30d%+EkxMv7dCFoAiX+|W1bAn)JyfPXdvJubR_l_GN;+1fho zJbj&T?>Z#gW&{y~`(%W$4#gH!tJxVv#`ul)>&JFRkaHw0wPXl+uTCs#ZX@)yOU$XN z#}ApzTV2ysX13$5KY9&a^DKN`uj#fBYq(c&djR>k4g!Uyrdv~*m)h%XO9@pwq*x*{ zfK)dtZrHh{)d*&F@q@YVNAL2|!CvYfoC9KzL`_d&&l}tjpiY0i3&izt5;+B#l=m4` z0)hVgVy!d3jbgf+nWmt;^O^cA)FwexL1*)u?utGs$O>k<0{FHTYWs_Yy@k?ecC_!& zPuLYjJPh)Sy9E0*)l5ydJ5Z+O8@aKl*{zoFHw(*9F-|+NXwVasz@HN`s`NosR=+c^ zqeIk+@6M?RBLbt3Hs@uNl%Uf4^$ajoYe)kXWV<*TGy3q?Kz%L5kN)L%Fqh~U;6s(N@4ZAGnj6*sz!lI?WCzhTlk*Q#Qc4{Z<9~nkkT)!L9wWZV@gP z6tG5vF?Fr?HXi&fP~$R4^$;;e}T9dm2cDU{GM5hXw98&mpimJCOyMc)QE#9K_UEFOC~u*55cd$zq@E;(CJy(-m4*FU z(ELo#mZaZ~{U2v8UiybdUz3Gw-coz6(Hx1pl8OR7x_HLgpS9c(HkuuROB%GWJ%BmOgCi2wcKdT@`qx*PB% zhPTJxeB+{h{OaKb>nR}bw`F{ecCjxBOxTCoY!Yo)Bb;v6Z$(Z!7{@EsBn%cB2>c&R zpD7pHF)T`qcE`WBIVv3S-2Rm`-t5Q7Yo`tnct!SOm83#CRZmVRq<71`xVfE*V8xF_ zI^q^BGPSN7K5G!3Wb8{uyW&R>&-nD?k;J$hNL^#%T=4$x*2x!jrQ>?&CU!7g@~bO| zy|)%eBR1whb#W>;F+F_M`tb*t?_doU<}zO3LvY!$_Jviy>NB{jVlOI`xkje0C_-^K zjgKJ7(2EZ|TNZSZv0(mJpVsk%ZhZ-e-pOS0)@g5|@RfgtVDH{#@lK`vgc)A@0Xd%E zA+AM!A@n--Cum4}?Ym5-#D&(eYK=7C<-s&Mf&dUkxUX)M2KGrw<~EdNZ%miA z*~+k&ta_Wjv9)OQ@X;6N4K5Min=;S+CTmS0hFq5h>aSC|LYV>AmbUX=I&}cr?a?xQ zr)47TH7G~N47wClZ)*6K_V%)dW4y%Lc)C~(gXHQs9MGpMt(6F7gZNJFC$Sev9mA|V zfY*qHFl>N7vA~Kq3BSV%3x%Q*-{{9U44pttPvw7xI9Gbzfj#}Wg2tA{*BehD{#&#K ze-ACDRzfr>Jb`qH}+xb_~2Y zRW4m^rK%rP;@XSDT^DR8`Zn_r_T26LpZZ*iDqFw0Xhuq*DAS6Lvg^gDV&e*Yd#z*d z(TMp_?PK$WpG=--^GTd;gYOL7?ua}Ok7MVW>{!a1@5x*5c3^@|jwC1TC(a@l4=jSQ zrdtsGKeVqlJD$kPfLC2P=JT9G;xGRKdd9?+)L;$h2ZEDmD2~1GD&v4?a~zwyN1E){ z(NbjroX^S_mRzO8SyP^H@fk>y77s0(l;$XWNY7w|6Hp(IpU*M(gWt7VE6LX8Ooz7-wKc1rKh$#90q(_4%ujpOrd z`=rUJXgkA5B%%o;TI_=Pr1eRy_HdkNOze!)p4#_KFg`_a-oa_ktl=b-qdY0o^O=em zic%ePE$v4xLS3`q_YD3zd!b49n8LmeOM1uy;&k4|h)fvR{51Kvd@sC%B540CEiZz9 zG*{ZLpl>obOz*i3H!WVxM*jy25z#Ucow1D;is`(ByEkoUBdNSyHPIbEse}m*&??ze zXmzOq%;3I=VsmX7H(Q5LeTPOPzogO`bkkx>pKk^gKk&|wH=CovC!H4@e%ViDv{#+@ zB6qj6o~LIyx6Z?Y2HEXTX}qNcQB+ChT^r)Dxx{%;g#~+gaU3C9H5-iM{gQ|3^}}KT33}TH#11onV~K(T?RQuG?GzCAjZVt)d*PF))a#-m`ddmrPLpvMOISTNB|Z!gkP+#vl)5>x-s7bEWb}fo zV6#r!2*x5I{t5A@y^&dX==VdWzKm zn?+$$YcVMz>_>~0SRO0h*uz|Irz_g4o5%{89G9U&Qq+LV>v#C3S*~T>4 zt)C$)wf7|#p5MMxsj&$5nE2)UOy6}oJaT77hNn}eT#Ra`#pS1-?}T7!IFo{q|MRT& zn>S)eGhX(_a|U1J@owWotMu(d-p%Qb&+0~hSm}nae5&GAp=!n&3ep<=h;XtMOb*LI zM0Q&PiksD!j|7Z8rBJ7jRP%*sc ztPQD1XUjVhpR`}tD`wPD3MnUnb(McY+|L#{G^)h-(GiiEzmWiM;)Ydk4*S*}EQ9eH z66r`!3=~j6;`)0XDZlUY_RJ>Y*<}=GABq0Se(!{(uDz*AtnSl~;nb zDL7b4cuBx&wAcnsf@Sp{vt(QsN9#9h+9SV zDm5&H(++@fzN`I`eUR~qf-WSQmv~+ZBX_g7&A_)MUi-3S1Q6#UrWdQBb(%+jr z&#D+tFBeyzttXs~A|3K%VEnfyZLvDlU&F-0;rR=RECR18I&R2jj>BQQtGTgV{&EncrRzm_6hh zJ8E8SxH2vQ{={hu9Gy_=s7IO2RA_Lz_q`OkUK%33e`uSlHl;W6@)E4K7cUN?WN26@ zSN0l@!BGb7&xbjI4XHa}<_w#0`cEOo{wbljK5jEl#ivs$gckU90T6CTuMWIDsVP8I zI{FS>pYW*hqKk1n$IGyvtS?Tv-*U#vyHU~6ow0NT!R;69I+qP4aMI!o@W z8jdo2nyVq=X45_nYjcSSjr+e9D$Op?w^EN@oaV33tqVQ?U8o>4;NVz9bf=+!QLxbn zv)SrO+Z=Jxf%@-UIzSj~0iHI2Y`rjT@R9K*YPZ!8yTiVcQGo|gf>AYV>PigSeiTF7 zo%|}q&Q$ZBS7mQPHQe~Tj|4^BH>Gav|9qQ>xf&kQX=1@0Yrp4jzdNAiEqxaXL$qS?5VLB*_(7^Qsj2L9|ot(jN-pIDy?xVPNCAYC-~WSd8W-AiICIv5Ws8twJxO#ZiLZu88mYCoRTL zI*MZc&vkA3jwe_&ArWczEf@r1o-FK*mEzQ`yIQ;zgki^v{QJnSmPK18-M0gKlbgQq z?_NYCmxn$FoWh}vs$tnhVD!)fJu~18GkrCWMT^zw{gY>Tauw}08apNDYX>~PS0w4P zR3w)jug63lUo_X*M6#vv(=IdJSgVH_k`)_vh@JRt+2FO`lY`3BI33r}Y?m6*Rm5*Z z*jo;`U0Y6lTTh1xWkucboP6B7j%2mO|IBVCVSYR7B&=AAl|g{q773!ok(AW!yUKsd zjl*6Y&JsBZnVPE483;~laC-D9&M6YNiayw-2Rz;hR@``bD#N|f3hTJ6SDqayIno%| zZR#VMadAT2&CV{fP6uXJ&F6Q5JogKdRQcrjsQkv5;gK5Rx5BRbzh*<)Lfawb0N+cR zCFiLtou*rlqm^p8tF%qdv)TRrN>Eo#MeDC`6>XHL`x{tsw0IxpGSGZ7*&!x9^;yvX zv!V9$5NXpr{hkS^wKb@I6lQ>$kf+{1uyM8P~M)^8mB$h z0Mmood7T=8H`+!XxT&)h*+W%N{WzBg%M>c&XD_e}mayCIFaNSg7=Nl@FRKC{O9y;H zgS1|+eOz)q6m3L~E^03BMf+5=OWNr$LiiA&|G*g!g*5(webS7vY@XAt?eII>xhepG=s9Pzhh6zXDgK+gAsPe7!#{vV3u141$I*WNcA?~W zx<Twm;VmWF02(86DsI&5FkD8U1lYZur@c1tV=EC(I}+kt78z@R>p7p;l%3 zxyoe+FRJ&1$sl$WQN@X9t_NOX-0&)&*+RFzvq=jr*dNbQgBo;gqZwoD7Bdv zjeuy;C}GXTe;k4Kq(Fp4{BE8xv0NjLPQQN+s(;V%=d5jR)WBG=ojVS7Gm4?M%n&`~=2!v( zB(?rqRD$SH6MWi0_w@U#>K1Vv_85ek2(zq)DHDx_0o#dBl|AFpn@KLMG`KVoJ!boAdRi>&w%L+mVk z0nM>_QGnwX_a6NoN*O;x`(Iq4uAbdP5uN23Ehc+Vj@+kTeq2_=lKJ}qZ1H8bF?RIG#y=8K1kMIA zNWvCT2433*YL8`P4A?kneexT74^Q+;O;eANvid(-0_%_E*((b*ol$TP=bMvdxI=6t@MELbg%v6zt}JTcJo+6+>*+K9 z`+Y`~sOPVRC#ZJCNyde%=Am;5f-n3Phy^shXdHiRudzyCg*&M+70Y&w0 zc){I5YlrwXwxc0Gci7Ts7UF+5mIXjI5M3vE^J8DbwcP_gBL+6KlEqemH8<$>oB&2 zdA#hJDQl~i*LImG90S|D%6Z=Qfpdq#kAF3}YP0g&su-(&Grr7C(SeW7j$5}SOR;<5 zHHP(9%i8@JLbkNo8MklMKXaUxQ@+f@rDy(Ja;|hisFg0q2H#(i z=a|;Luj6SPFQ2`G_>braIg~?KY$CyV!rWCyH<(|S+60(+Lnnv0N{*?EqZXq@u6Q$B zT`IwgLclp%>wu=|Dx-e!YT3p3_wMyVcnJq}pF$E}Rr#OM3D)p{cpA)R&h1`64FDPX zyn#Z(M8X~==8{Lswu`Nl<{eL1=6w;p$$&mHcyrxcrBf2Hy?3DO>NlN20&N=0ds0!K zo1UElXpHxJLMsn)d}^^k4A|sE8}jaj+Ny?@pNlHt5rq1>TsLRt>iLYra^RVGWbaKL zZ0!)hU~)29*YDh1>3XN0Frz~N1xh#At&aK}4gOl*`QlnPSUj1__rq+f@!8*>3#uHG zFRzHQ5uclV0=V%r`n!5DkhFAvZ~R_7*L>O6iSAeV_0gFQW?dqc{D$&P6nP~1hxdO4 zrMt}3V4o8j;87S90^PCc&s4OY`KPp@DFH9FkO5Lx%VxfWNvSIVc=wrw_aHm_8lnuJ zOUdl}6aJEWt`w>lVK1eQ`R!h-X|Ty9u*gzvQb;boHcbl1uH2CPK`C*)RtZc*i3P;n zka81?M^J?HIKj@4Kg46R+x505&onMNZSke+QrXnj@6*!B4{a6CSvq{3W%ON0io zQGl`OzP8up7h0ri|ChLY ztCcYGG5kIUT$Ag6tk+z6Z=29b%4@fxXZ)qXZ{&*wBURr{hVXg>1cIUeHvJEui$cse zQbJp{>599}TCU&6TI(%86o0Qmqi|!9OMIiop>KUtMGRIgPJB?*ZnXK8p(BkL%*$ym& z#<aH)uN#?RXuEY2Qnx3eC@J=jUq8 zqw$p`7xHN{p@%bHze%@k1Afyd?74rSUl)8kj9&F}!UD})r;Kr*e&`i6D)<{)BV z(94O2py{jE#G&}di`mYK5exNNwTMXtH{0zanuHV)K^iu6{eL}Xr71OMsK&6{heO2K zs1IT@dRrkjCgy88oL;jhrpCXtd5V1rl4!uG;M1^ZDVJ)5?EqWN6W4)@y$O^jVmWyK zgR3Id-Yw6nPJYf~Eyf{XNb6j0cMbbYR=C3Q3{pJQSj zIFm=%(RP`5ZK@;n4GdmtJ)B|x$dH4q%&e_BHI+7T((c~=w6G8k1X1vto=50{^_6R(^%qN0>qqPq*ho)Zx`b3Uh>l&Zpz#h)4a z4LN#cdMkcfvbdTV0ZqgPfU*1x@>hT7X3nJpH0id?LxkQ_&KgZ(FD8yiZ-fQOj<~HC zkqu2Cb7_iSQ$N?RuV29a*L1V6@lc=uLZ+2R%XKJb{Bm0~yRwXMV@{Qww)5Ca3!OYA9gpX}p@no#gEZdhtYY)0_Uw6CQ>JeZN)8Pe-wv$u(wWdONIj z!~5Wr8m@r#x32cfI(2^Yrdy3{$HV#8azgrAu{9hZ(SFYcpfYkO|KP8cx(pZMMzwG$ ze#%zD!d0{Vy0EaAC@d=KrvzNWs?gwrUH<)^Q8efQTmXI0-~eJx0?5zF64 zd~Whgi!MxX6BWV~rlL?nhsQfv91h>TW88TAfz8uE2*TCvXaYu>Gc#|c4x<%1E#WAe zu4Q#QayLn0p1t(CfVVR)Do4?{IEru)EerEX1hS}&=sbv9-u0uxw*5oSK@PqI+koYh>)wp+07Y!2RJMZ^{@JKz34ETMAu-qs8w{u@)@NIcR&{^;$vgcchEH_zEXai zPw86%$}u%C5ZhoIK%I0 z%~*uiA(~IM%oqcY(iwVSJM!0+Z#bQcP{V4pH-A2y>tNYbutN>++4h&wbXHcR3)^eb z@~j`tu3ZN3ff3{Yhp1fBFefZj+RN)u*R8atyW4f1ek~Q+SCUKoUZp>=^$6YB+!xgT z%xyid52>XS>uy!EIFaWN;Cl-fR7~MV=gEJl@z>~~WK9;sNBQt$u>K<6v7GMSpFjE@ zTCN4Ch<=cVPmOt#@mLM_rSDIjB5M&`M=3t9>D<^eb|c=|E;p0O?dwA1ZEwv7xbp`i zFD&GmnC2QyxL3T7LB5xFH@ALw41OwInSLCfTB45OVmSvgagdI;K+eaDgbAda8zlcC zQ$TryhRW9XSIP03T9AmGPQY{k5qyV-hx;`ZRRP9i+$hYfzQm9Ic^e0L=vV-0#eq|t zS~Slt)Y~9*UZX>5RT*>%B(BrliqM_+Z?_zP9_`{6VSoNiQ;9t|6;W^msZ}?)5_``n z%U5FXu;o{<2`BShq`u$?h-d3-HTt?lw%J=T2FQ@HCBLM$fhc*H7 z%yc};aKjLKs1|J(Q=NszBG-%k{git_$bh&>@pY>M!v3MjGRywCwEplSyO)e$iWO+6 zF597HC)82))Pv9cSU8j@4Ik?6k?|;lb0__x5K+ zAsE$vo3;ifAMXjl7Rt)?k1}X2?QcxiiP@$Hv9zS3?Wxw4SANm@2$;ZsOT8%5!r;?u z;9(wtP7|~ZFmiO}9!OelvNb39#xxJ!ltBpxpo(hx+I~fgr8JEX3u$K~of%@Gi?PoS7M9ut7jC3S>lZ9TPJ!o1Acs=9d8}FBSjs~CUYA`Y1hDS zWD3w6{oG=4$cRwh+h2>!%6^tEQ-U% znm2t_X^MyyY~$FYoW8Nrvf@s+$Ns;wPx+dP@YMiiMDWNW?ZvGrT_5#jls4tOPPGYorvIH1rPAp15cBWRi8Pz- zmtCP@oTg)=hW&~+QT$CY{(|`58cwt$2MS$*JzcCH*`k=L$@7+IqQRP#y3jU_i zpSI{^3qMk2g40DQg>i9oss;}PBNX1{_f6`HXy}(-%VWzkJ~I;%_$eCVccjL?#P7n)CUIa)ZP; z=RraSju!N$vr$UT26|R6|2e^EO_kRW0dRO{@j)>uao}(pG{#wEz^6Pc{rJxhkiI)j zwzH__?~?ojgHeN6{08N0vg7`OM0#uJK*1KnbF7eTt1_^xoMPv?AGWFbcG75rR9&xm zl`yBziW%ip5{MY^S=4PAT9Rm@Su#sWCI@qJT$FE2CvKw*??tzEf61Vn5>nO@4XK6v zR2FczTL{PL7?qesPugsTa`Zm-5>`2{BV^T&`2=WjH3CvzD44u3^4m4{y@O+uUN2mb zjz7H>q=FD_AG1n#`35Bht@uin$iyjsb8I`nWFSlbJ(s=lxb!osr{`~Z{~*J~P?uAlYu~Y5 zD;>wPKOYDZ9w@2FvkP29uB%KJ?Jt|}rSYC_HGF{w<`LvqOk&i0FSDY2F-7&XVGcD` z@7X`w*|h-qGgnEf4z-6Qirowqj<^59){W*tOZQdtX&-)pbjKYqBl}=I(rB$JCGVWp3shP={#Zeb&h))EBTDy&<3n%QIA( zZsJcD8)Cjub2hCS?D2WcDbJ{h!9PFVu4|}?C);!HCd_H~sO-8oj>!ilZ2+CJG<=d< z=Zp0XA6(SdBFv@jnf4B*ara=|*qDqwW`Mn}tE;at{?!zoNUM`?EOxU zYQ_$H$hC@9Kl9a1)zy8duUwEe1u-8kHXkijjtYL(7A(8ZjBmmxAfGOK;YKQmNxOP_ z>WD`L)J#$stxiP#!!50{=iC4Hx?#1TP#oxOGhqF@=%=gChC*|7)*Kre@oxI*qMHo`%)vtMBK6&*;>i*>rz~ZT98Z*Ib8|>gjt(t4I}Um5~0#^={~%lnUqiwg9;Adfn1s+u5(u z@we-*L4SNAs?^@~>+7)7Y)-}W8iJhly764)h^OaK8f;1)aZ32h^9pi&v@DEh*wKI2 zb(Tzz7Ajxx_}}3i3fOAzYuexWOJp&ttnzv#bd>-jUz7$Wj-=&US_&`(Am1cDHen!d zj(dZpG`LQZN=`NHqWahtB)VS|5s+fhi13nXU)lxdVK=NzaS>L!!_fVj7JsZ-t;OHS zm|eGG$`131jpWZz&uJG29JUIw=cIMVTKZJDeh` z*&qhU)7{pTWML0>2=8$y`AEzA>@RldvsFZtRSI0@8#=a2RRkes4W^@is&1Y73|y%^ zX_+NKk})v!nggZ1^6z?HUI{=>eUZuqN?Cb1$)oR^j>WuGEwxfH@SjYZeP+D?iYkXE zu_L`IQsYR{U=Eq5h#;Hla_5V<~lkX1(i-%T~U#g>5XuH$cK zQ#Zz&14%1g*BB)bOwSls5%kS~^dMVFlYUh6+VAgv<7K%RJOR{^e8|bz_4@8@cN%)w zFVq=RsIWIBvc62~_+G|#zE&gCT0PtEo$8h+PE^0%UMhEA^7g-QGMU%ut$$h@aR~ki ztbRoQbY_?Ejb0NM``!Y27bK5iaY#T#^6Tqnx3B2o%!{{9@m1g}{G)jHE0}@r5lZjnt#^9Lz2S;t6abNb?d)@XS>({XyIq;`Dp{D{+lfk zEmoYJTE1+To`V@}Sf|8t?*@+gsPMM^$woxWPBbttG|U#qy`{zhe;?loP@eyVw}NDPAzY260mf~S3`#nIb0`@nM+7@- zIyb(Au|M^NmYqa1^G~jDM1{vrhmi5O>L~9YABw?UZiTV#M80!023(*KkW^`5diF-{ zPmbJF-`9yM^kBTbKs$jBe&AJ!@ygGPc@NHU_g!?o9T0e$ADbwQ>{(9A1d9zB7I z@V+bXCaj9^6#2nCV5QxPto`ZsmG9NaS6i`E`dwj?HpJi~mZYkami3>PeJ#~?Ub2s&F;=>-hUtPeu5Y2MF~ z7_g;>`|_o|A*KqXX8UFWPhHk&S&Sqe(WhbPC-J`kIjwjnBpVq#S`H$+Ek8c!7F+I5 zy^gf(ll+zK5q&NA#^as%(w8hpeFITa34uy zV$I8Knh$Sd;)epa6*sHLn+8gbC%E<&YOrV6?x;GHbVEfC1go9ru^xctqPe-uc}n4_ zcPgoDogb5Kf<9OeDKwpQJUv!FZQ}fQH8Vjr>{)Ox>d@{=FJU5Ej?MReLWcCF$D-2+ zM_|QE94G+&J+SH@=wTFq;n0dC+i?I!w4<;$1}_%hcOI!_@Oi!aiS4e^JaHELp!7Wy zVnh%v;z~kk6LLDjNXzBA$BFmk{p=a@OgF8EakOlUz8h9PhV|z_VHkii-Y0Xr_^tq8 z9)D;aq$zW}usBiTCNZ3LOSVGIkB9Jkc!f`;A|dNpe9*FU04VYuK1PuD$Nv8 zCCTeB@fOs*^@i!37Bre1i=S~eTa0FKsV~K0X-)p6r5g{ z+JQVW@U}q}wmI-WVvZ2&6?SL~8tdcu^nciUtEjq~rdv2T1c%`63GObzgS)$vjT7A6 zCAdRy*toj~cXxMpx3lxSPu~ChW1OpVd%ktaSc}!WS9f*QteUgFIk2rbS$Qi*!ubs1 z`nF@~M(|_Zb@$CmF#;YK5j>xTJ=m==H5+Wg(`*_M47wB<`j&Ws(+Rmz|b$S}Iw0r2))nwZkE{Lxi%2nfWXbV8zmJ}gH4dyHd z4aSw#o7IoC?0|y|A^lnNHIi~vwb2Sk*4F5J@K&93d*{Ppxa~9De6~#kC8J6H#(iKo zllvm2w(IGrxoQRX(F`!mE9s^uGypxB&k+|N;GW5GfJ;n;Xe59ihT0Rnea#eC`Td@x z_GkjNsrd4#l+385;#T+Lm;0~7mC(hPlmXl&6+(8SjG>4&UoI&Rf*<$R@(|k ztuR_Rw%N>3$c?d#n%O>9i>(R758<)@6wQD28I$2nm_ZMGk?Za=WMHt-^3dXg%ML68 z9v3c;Vf4n8;Oj(_C(SBhel7#D($81%vq1F*38@vYcQQ`G7M%6A!lkZC=gUhqft>F( zrj3_<$DmHSJ;kQ}Kn^?{Qp~lV^3GwDiyRQUPNtQSLrb(?tmqH~8u_L=0R-(h* zYWqf*&bL>yE4J7Y&i4gIh5Qo<>|S@VNVQc8s$O?1oZnxAe-ayf{p!Piy)S*DX0;hT z3Nuk8q2!=MaPzg!>Sl3`Ihl@ev3qiRiv|?~Eu^=mfS7MNpunnmYf${W`zfF0@-l?u zvQO)LGkAUOm*f7mtxCrk+Oe>48zw9~V)u!SnEu(wA!lOoxc3}##iUv7k^6MuFNtMc z0H2VZ!@92aLT(v)`&qu62x9;ch)s!4YanRR(N-m9Ek0SX)C2e5M5*RqnNoS~HbA)5P?I``_0D(GC# z^%5qRiC)fHCrq)nS6p3hPfjrpe$2UprK1HeHtGS1c;Fa$1+iKA*swvYTdd<)fT*X* zKJZn$UdD$?CP9$l8zLC+rR#~4xL+iR+Gur;0K^nM6I{u3d~kW7Iclu%BJ*G-Y`LS_ zsz0U|kK-okoq^7XI#=Hy6YTlgG+*jQm(i{D)#`Esc;NX4bue9yL>c*}?wQo?i`LUx0o63#ct9`peqY&F{bZ_@EJA7+Cw)&`es8|O}6q# zeR1Zf%mK02z5UIXH9mPRp#&)tLZy66!)}^ie$*brn02-f3rrqtc1S_(FKA1%Q9jE? zqHVGewvQVnD&kEaPXL5*xLDYj5vITswh~q4RhU_oh*IAuPx@ZOdA-P|Izwe{SKCNU zh8hf+R5nrg*~C(SuUiH!y>c;%Pvu7y1NdUU=3CrffVLoW%?@a8)-VM}Tu802kbvSb z<9CvVt=4iY6cS|GpDdUvRQ%P7bT>zn7zRK&fHtdZvu3Q2R4~^EHh`+&a=|SsL3X|G z(T&)gc@RHDdH1avvWWCg0DRMzZy|ER`C>W~?^A*y9~G_oV#6gddQdg1FiB}<1h<=K zC)LZ`AzYIqbR5HasDG6MAp9$uKUpm|h${!;9?aBZ2%XTkMI-|qC+n2(0-z-&R-V2O zq77Irm9qf)1B#*&P^A%-GYZ6jnShz1kK$7#(%3w>^Xqm3+|I|@=NuX(vcd@MyG{;A zOCJTPk<W6SStVnh?Ynyozk%a zxtHz<0xYZ>p%8(A$7AN3WnRsy7jkSiYrF*d@l?TkNeQQl!HeGN1XJivmqY(@O48Lh9*`at%%;k6g|KhAnGp>?!n-eZh}RfvFjn8_4eMwSM;0m&MM z6SV|#C(HAq;s9yOJqo#Y7U=>HnCa^!G{B*0f6rZ7kxY47WYfc(x#5B<){}k%l1@t@_1h;Pos*EGBX3Qb|EygD?ov^6i1MlQ+h(Cq2K#~gAtE+! z5kq(WY00B;Rzyf~n6(ZM88xjuk#H#hHlo}&h;BL83&Rm1!QOSHav4It<~yEpiI!AZ z1C#-i%0!(CpE6QI_a-|@jy*xTckgYZ{lIKdL0`)1;*rZi5L+uVF1t-|5E~7qWl1Lf zt9(%?JbSKVh1A4*>s`nl;c;LdPm>q|^O|W*j@xRV`5KU*8^RBkH$o_IKCrYm$63(EnobAnA)J)uZs?@ zl4d^Jh?J^^tn}PK0G5q0S-b6u?Qy+o4$29##6fBj965_H9EyA<3q z(&oy-h-af{yN(v78!1xRO2Kd04jIh81}ur4a_30q%h&Nc)talS?OUkftpSjQM@{({ zFNQ2YqTX5f3<4UpauH3Vi%OixD#y#A5_cQ`Vz zB71n1JQMBAHc)dBafvzWU44jtfWP*=8Tr@v)Xy4Rp;Ow$?PqLuR1wq<;uEYUTh+J7qc70afJ2dn4B<*!M(ss#7b z9_}DeI&-gxJ8eeV6c{L(nzUXIy%yn+563dQG6Y^0$u;Bmx!~-8p{L?92ok)C=zeL} zwBdRd8f&d7{z5ss7&lfe>}c$;zd(oF3p+ENeK#dHg>AI8($N4W1M?U zeM}}A*E~4uiY-ukn_r;ltf&Nd6jk^kNu80AF8C#bGg|9Nb}Xmr(CYJUY;)pQpEa*o zUFYtd3?M`Aw(8^XB5%RAdINxuTl)^BKqe*XEd%$6+t%fHUR{8p*kaL3(TQzfk)auB zhWNV&!Hl2drjBEq?v$zD&)lUz>L`xn&nC*Z$1(Ox4qnwrXqN2t9nvhtb@BSO0s1Ol zK&zumZsLPuaQCEbimcr$@nRaT^IpSmtaVs!`{ zLJs_B7cB1g7#adxMuVRVE!{cn`Bc?sT2-)N?(ENfm!(sxRIto0}|y zuBnncurJb#e^sw`N&~;C&Wms({1Qx^Ks#K7J}5j}2#GbC{nQ@pTMZ82n>owgXIn~9 z4yuSU@m4!7-@C79!SyiKd>enKwCD8ROEJ4?Yu{ns1JAQIelBr~v)Wu2OPvX`UZqAZ zDHk{D;J72(uKvn(ytok6{;Cx<*yk{-IKv+2y7Rq(o~;Al#&T^JJ4;#d_2(zHNyC%N zzPGLXELetR`9!0W@hg3TL9%n6En~tYUT>AF_;bi4z&?-{P_kd-VJr}24l4#YE=G|KZXfSw^M*8EjM!RU-OVQ9 zRo{DwlAA6+Jgwd1mY>ZdT)j+o*aQo+Cm}$!h~k8bUrZyf>!0rN0U;>fn=7V1X~Mys zK#R`)R+8fl!o^TFTjFWoxnjeZO$p!u!&9{qSso_piZ5odwcCe&^kWfCy4L<#-Zber zA~&?9cC2aZ?^_q7UId0e#=GzP1x7kms{xvcQMq}14JGss=d9x&+nw(Xq!*xxu<%UEc- zlN)((WM&3BcmC|x6>FiU^pv;~kO^&Yw6iJ+xeUDK?bizV$Ys@e!O-nP0>XL}78CeQ z9mIC1+Rn{92Qw06^ZNrM;?c2Rr^g?>&a%vxZ+UOdSCrRvsyvdMo^BJO3mlczQk?f-qe&3A!dc zvk(a>*0VO(E6sFX8$!LO9KS%(x=^J$^!3`U>4Ar>{lcx?3Z?g!I1Y`_Piy>r20Go( zPcNOM8Gx27M=HTkkD@n?6H1Q{t!l4s013Tr;A#d`CsG!Z)O(+e@Tg z3K~O?P2Sz<;zS{{xT@C|tt+Ahu|G-_2gdss2*T@fgB^q8Zd}6jEQA*9XMId&kg2TJ zKAH(TYk`II8Omy#QIZPV2<J>94r`7av|+?%l$V7H(2JWJ%L8Qx7*&rh8^ zt7}I;9gqB|73{ZPJX*tPlS6ItJ|{mP&b2&Le^TA? zi<|`D&=*2j-vR}WSRbcxj7Bj_aA~Pl)rt?&J&i=Oal#abc2Y-_EHydOzhA010Vsg% zXCXsuzym4z@f+rf?qh%@k?X!+&*tyF$YuR%&B<0rDH~WOJcbp07oGY~vgrrx-;la7|1dl_xoF`2b^rC3X7_6w?4J+O57pq z2w@Vg{bsB?Yl^>voJv*{~S6@FhsimwWd|qFlvS&MdMNUflC{SxR__{LF9<8o*A=K6YN7OUrb*$4Bg_WA{= zdo`fpWzWC`!s?ZkYed&MMmS+A&7bN`o#x3}<(fgW&`**$x9Y-g z6whXtu^w_w+=UdU3tF6P>D(ns2|MfJOA_a6Z833i{-8pTL%ZHnHL5Swz$-D_b6K_Z zYToU-PSRV>Tk*(|tXziBbDgf%StF6@8CO z*#;Tfqn2+$mXKWrNIohATDO;V3*Z=v-S#{CQswuD(?eO#3M@MeMF{O9vp)rz-n4(; z$5IDS={<&P3V~4-rx>sXAFa#!rKTgWsg2uDz~e7e6|>TyYDw(@MP^%Q@rBw2I!^ zUG3W+bxTUFU&&=nQF%Ai)k^BtS5u!;JX{TA*(~nt>bDLb-(_Uxu{Cc)jY-fZR-mgZ zKYwn!D)A>B&hL9nk`KFjl8ob!8XQjHI3H~Klqk^fQY?%08>P>h0Xbw~dET*Kw+~NZ zsFZw|0~`Lj!h(zTc|A`o64$m#o73&4$XR-Dr$SgI-?eqQ!MKk0yYWw))~2GQmoyc* zNeN^4OzLscD{9}^hFo6PVo&j^>3Au$L-NOAO0w3;AMcITmvpNG6z6g08K1r2VkLuO zlgPZ=bBuq01S~VP9tw5^+ObAd#`DD_v?lDx&&{?o5c;Zi9s1?DC{u1nf^uT_chzqk zxPK&2s|Agk3TtcQgQ|r)B@9Gup!ifhXHGk@RmyX6b5YSqInqe}Y@I1;g--eOcxnD2 zhBol4!ojoyJa@$_4_R5I*rqVj!|SBnRt##j(6E#a-h3S*QPXjbWt_dc27QNNPcR4^ zS-lt&JsTrgJ?9pO>qOn}!x}fK!|FK3rNsz>KXZQ25yci(ShjzVl1Nh4-Myj{5Fk7M zQJ3*v?s1wI5PUaH{fZeEzguHT7OPC?x;wsYS&J~JcSYCXc)zR8zN;3s1#b^TdrH)q zWBc5H<^JfWs7xrk*8b-GHMS#=U8h|c!0FnOIclY}LIB} zEUTh#RoVS_Xr!pW{ z=;r<|`XhGYJMys7d-7|^>#CPDUR}cbif?y5&-GLMUZd@%P^ngftblF*sZ+^H>n7JN z)n`s@L_Btp22i+JV=D8``#8?ZtMcVzAzhibPNzI&dYlT^tSSk2BhcsXtPVPbKjdofB9uUqU zL3Zwnq2{7F-eYyh%Sq?0I$e5cbgMVKO1}$yXJ_YNywFmLSyQG#z07>2i0SF}I2bSs z%!pqb5bl`Hio}UzW$sg>e%CIf-{aa--W^mCttMSI3LZsJpHYv)g%QWB)@E-k?Sicp z%;(g2W8-HLRgrw$mGxHger&g8|9Fv9F{7bW?rsdn@_au>nV@{QtfQd`XsWmVQ=32? z$np_;s8qI43As1!#N;QDd++c4qL)OE_#kH`b}CsTnH0)Yd~PmV!Y&INUp~4&-L%AGWQI<)Tu-G0j%er$_`sW8E{#9p1;VLJUDoPOOW{10-hkOCgWEam3L9UB*C zcT;pJ|FJ2r_S=(l?tRizabo=WeLe$-#dQRpDYThY1?6X3s4*dN?WQ+eVBD18n&KuV zQMQ(@Avi8=w|fz(_I|!lrPM%@`{f8u^6j@PdE~i>5LqA`h%t)IYD=8T`u$^x%2*FN zvr3(WGrQN@lal2+PPnub`3fcc2q6_MCcI$|P4@Tjy`t$#5^B}*unCkFeyxfkmODwY zJFr5{YJx?eHl_+>i;bh??}jl&wZm^KEzrpP_egtIlHD}pe4*~vjrf>m_;2YKIAe~gX|SH{{^;#!fw#`SNW5E$yr-^D6s@09iYl2|1xF8j@K(=3^w*78DSE+t z$%Gr$?GquG4e4H%286zYODWXZS`0t39Mp|5pZ=J@^je4CX51(mB~0Z_hwR$C2On8W z(E@C1VtR^kOztwO%wI;K?UgF;)hX+{ z^16(l9<0~!Nk{psb8HH)Fk7N&>fXTy@w-tSsnB|uQv68{gDz0a@R>DM4*;Ov8_Ng> zQ5;EWXi7SJzK~z^_zHgyu8x(@W|*mWKG*f^v*>!+X~Q#hrU$3BMj+rvQ!*Rxss#}g zt^*_3L)DI1%`1W1mi3KY!8@Gb&{;E^@&ikl+oG>v;IY z!FNNRSh{$SAb-7fDc)Sw6Ih-5=g^~RW`^JNhiL!r|9_Ui0mvo|6 zX(t{jkV&hwS?A}(7yH)BaViZiAYvvt<}f-i@Of*>1+FONCVyWrCIRIWKvcw-95`Pd zZ0$+^J5CNHqO$IZ>F?gq1LW#3yh$di+z!Cw>Kh5GwK(6f+G+xx;`wr~|N8cP&#C`| zl8lUp0M44HdIYSm*3f0eno~trHfcGjE@|gG1np)ebO15sfS(Vz*#+i(CTI=(xei7W zKI1;8=l4nK72pn^mrXL_jvD#et3;m`6l1uC!3lU z(5R!ubbl{kXMRqil5DilZi=xtlH#9m+(1gNG=``A*feGPB2SAsmQi-~K&a>WOxON= zhBxwL#g9Z`JS;l;WC-wd{V_*kxmNq}hnjWg=5GhSJ95eln*Xc_RE4H5w-ke*6Hb(W zBjBDfuOpQ=q+eqMy(Qc#qu@g_?PSq>DQfxoJVNFOB&}W(DqJX<%T3YykIipkJv&T# z`Ui2*TmGo$hAWM5*q39$E?0~57mLv>C|QZ{#socA6!1(}(?nI)v(Os@gd-W8NIk*G z{R?_#hH{4VD@PqSjXDo!HOLozB@Elw4DFrw0&gMrYwlRl{BNJuUJiKhC78P7?U99@ zr&J@}4=q|*ucS)X9T)Jh<2%|(>Uv7HjIAidlFf-i#BohUv=VW}S9Vc&d1RIaX zcslS_hOAJvLl%sL`a^^zwk`s zAQ-te?*@8gu=A+?oQ5Eu^hfTDLWt>TYBtEJuGB1hVma6O<_+WP>kHZw7Y$PDDgM2B z;o8Y~l+N_h^f0v6NlQ%CD%)e7MDS``7iZ+!d>f>!C03$7iwHG~A8XhUZjU&G3Z3du zS}f?0ep7TCee~@$WTPKXdLO0q?8~(iD<|{~k;G6aaT66;$f$I&-y9hfoQiQ#7c%^y zg|a30yQkR3@W36s+J>`ky6}&q`RAfeQG`L*r7h3obI)-)UE;;lz!1tD7Ob)}?*%pkyMKjj!{U}>1I`N6ke>)-d)J#4KuTwJ`~Z>>F_^}I&& z2WI!I!&Kulws;5<&-+f({~hZ+0#=dpKP#h|$o|Z+j@Q zS@Wv7L82g_V|REZh?I=X?y?sv7gQPkM3@FWNP}ftu<@8i--Fyy+DUcIk*jsT2dkvy zu}^}lyfpHf{M;+Q^3=s!+i|JVdOcubj-MLUU-3u;qUlEiSKWzo6bK+oc&!MxyQnE| z0hh)ktOd~4jqhBbl1I4v>|U3Hp23HmXh@HTm-mYaid6^AwulwTH$LhTumSeZ`EQady@QE0gO#N38wep~WP6QXE zvfuvP`pE}~*iN(G$*qRgOkAG48w?x`DORp_S`KFGNq_;3@+6!vaQ(3p)x&h=%XKh) zaxW;TbLh52x*;&ZQFqV?dS;zJ|2Y~$LdHUwVGyoqf@u!~Mh+be-(DWoYt2zttUVz* z&`R_=`2Y0Zm;vcQP2`Wdg7siowH5ljghbfY_zk!)Cc7Ts_qckDe%MMIJYYYN zD~AqTfP<^wyMr^uf_Ykpi{K4O~}jZ@F}$~8TDvTg~tHjsdO!(U|%4F z5Q4rx{*nyUB*tg++{O05`M=FZCb?ATqjeL$BEle<5bQ8-5%Rfh1mRhK1^G9vWk)lh zipGgHcV`5API-YQt*b9lziUi@7_#a!B@8A`rj)9*+CGIu-k;9B*`e8%bo{i=8et+! zpm5EIQz0hW^vpz5Y7AEb&a14B?HY}_-m24LwxOe_$e*0aqs3&TLl$b* z2T-n32Tq^*Y9WFY2tXMO%#4K8)P&ISjt6|gLUh?0NTSHrf4@P<+eZ^5>>-Un-kP67 z{>1 z_+jsvi0L`{c7fphYZj)Uu4g^_{%7%DRClwqg4?$vm)1!lgK2S(aW3oHNq z%HFZ$eR@NZKZ>Ydu6vnGplc011r!DQu4X_LQheYmZ-lF8zu`?)LG%^1zc=1qh{|v| zzn#DA&BGZ$Dh`2XXoHsHdnRq3L+qSe_wL#j7U&9#tg6&d!6_qL@#rsWJS=FLthh&U zIGceJka zRb2pNpHb_e-qI`8ZHoO?8o$|>r_cfVM*EC`kr6Qu$sp=z$$dLAR#Nc2U$y~|8U{(j z;kT|qz*c%J{NmyexnZG#<$#Hc&jI3r#0zb&+(z!Xk)C zDk533hvS+l?GDU#4UNzekD>=vex>m%13F~VHr7o4%(U=jAqF=9Q(bU@WBGe7kU=9H z3Uto@9_OvzgExNiOA34}7-W8RPt2P&8IU@^{AOEqzN_jwM{W7Viw9hLpLqBbuVLHX}Z)N&vrkU_!4#f8o9!3jF_L5+M&@sVJvYic&_o8()ZtRnEa(2}(6 zL!GJrI+Tz4oj(s~?YJ|NS9r3)kKCQFNB4$f9kXB_&lLMd^FP5tz#!eoQv7`l z(Vy3d+Oz-CPSqyd8_$Y1%ksoLp0Ah(`y(2Q;t!2@^}pThaR4`|VO*uIU$Z!50&+t< zn3?~5d4}VEjTS2E|G(K%|36ui|J?V;r2lv2i2NU#D*d~tcK+v)f2S1{^?zpU|6;*B zMDGrAq=zc)DiA&Pf= z2Ld5rJ>InFyldGf#=7hB@zXL4-@1PkB1|R_r^PbJU`x?j(0x){Q!0n}_d88?;6ntx z+nT_|XTRM(nO_Nhv_2S2yHt(C>K6R0H8D=-B zU$$6R&R)D=-0iSzS2M;iN>XA8KyYq3sIti1;a~~XmpWU9g2ypWJ7F{l=)U8t4Cc9(7qZg;v*><)>ue)mS$CcUs_TSd1wo@P@` z76FL;Ze4n}A*b#(U-t=n+|iSWsfw77=86!~iVsZVjF7}4K^3dr+sHXGZ9`DqzM@}H z*p*HES)jB)MbJdU^ZKg=tDNaSp^}E-N<jx7m{*Y>Bs;>w_j&o6iFx>2 ziC51Kd`>>uOlI}|$X#0;D9_gSgawI^<@NNvRHr5JzegFj1vcXgs0th&-PU1RQ`>&= zbfH-Qb$;dg1GKIe7x;=srDUgkVO((11Gp}wh%zwBZB#2;s1nYJPZm77>DyX80`iI* zSzm>WP>02N51pE>kFEQrsgZWX2`dyaBM?oK!zzM0#_^LldTN`JiG5?tn6} zmFTmHiO{|QKC+k?L@Nb}%TXM03Q}x%mx(s~omP?K6_3hD`$Q|Og5->JSFRZ~pXmi& z;!Mz4B~4610c-94$&^0JU-lGm_$yaId_Rgo|FaqPT)7S&k>?F6G!p(nhUCAyQkfq# zD;%*KY1_n^+wCohQ*(#U4Kc7jwlo z6ZUm~KczQg^Sg1EpD%wveY5aEM5B*?muBoSX6`W@%o+y?av;Ro86QJyd#_rlx3Lgf zL9c@e>fz$v=v_W)EXoQLr zjt?~rX}+D`Kopfzm%X~|Z?+sgB|U4M{v7vWKZoR1H+M)2NUzmt`LW-)yq>(!=v(YR zsFs3A?-o&H%eWeQKG`RxW^U|;Z&IPx!|6cUBYM70V0Oc`#dV+qFwD3@(em;F*Q0ahxq2Ku~7}o z9#~LUIQ}(2(b=St%;QQuX9dzMtpmwUZey}qp#*h);3)-aqxsaTTUEk9l2nV)-Lza7 zvRbI)1=rK}cjpgR3Wx?1T*rW`tH2|RtiL5~?%@S@##Lf;c-)iB*+eT@s5Y_!5$p?$ zoVXL;+0o;?wIT-g_!xJNEG9X*KY>{X$CO44Jx`hRJ^pJI4lE5iFKj9rJAX#pBf1tx zsG&QZF9eqH+;wgaiMp9Gx4&;0S!!OwL=kfPH0B0Znav}CB#t1?Bk)7_bqT{7*55H{ z=KD#PO#;SPo!O&K7ei#UoXJS~BThyfUYO5lN*|0!kO-Edji|FAo(-$?*LYjs-^5+s zcIPjjE|4qrdsU6OuXl4XXl=mIUWNv{!EBL?!@7Kug{e% zdn(dpgU(FfUa0isF^o&6f4CvcP)Uy5!RYpzfbn4;3q3_@ytBz!OAVAAGUN_pT}S=rE9m3w1~cTlv_=-8W4zxi+7$nkxPs%Zx>!e0@7Q!HxrI-}iq zK+>KnM&2Vp5VqYFQ?D_jrw;gV^xjHE{M?A{lbd!qaZPr4d2m=X<@!PQ-GR2;952e; z^9+9di3g{ZU1@i zKy(L4JziR*g2v`|j~stEJ1UOLEf5TwjRTX)QibV9?+Kb-EG<|&NW1fo?EQ5V7fW$* zv3i>u8>qWzS8(RP7|x`Ik*nxsdtnD1Tr~!&ISs4qSh0UPoS8kz0&d70L36IzyM^uH z2Fbx<6BIzX%nyX!vx(#i^@bIxwBRnW6$eUYQ(EfPp@h$$ib#Ibaj={QQNc=oN(K5` z`kF6Pqr2(xqWG6LqVc&qiDVq9pq`FWQ5Jv$=76E9qkvzcP+07xTHzGlXF`(n6V+03 z%3%lrUi@M70MIsc*-_m1^l~$MzYvG^K}%h9<+%e7)sn@}o+<UN=e)DaT(LKb7%sUWqjTJ-&p@pe%36z@(B(=t047zTLg)`bSEWCwnP*~}tI|(c zM7$g4HZ0Q}#3ituVPo2sY&2uhUS&UNa%egAyhzt&#~f422_ij@a>BO zPYL|iX$t~`xXoQIHU#mx9biF%baNFZIOO`tiHSjZdE_ehF$e1%0=OL3Cpx(Q@!3!n z9yf4;-l?m<)XYfHugJ2j5aMmEsxF+U2BgV6NjLl9Q(IvoS!+gdWO<@wjV<|`CJ6*8 zRPaZv(Ukzd<6JaW#~g=#4TFcR*V5-mYm>vi69>b%UCa0|-?i)V)Enb+|BPWilaH+X zykVrhQcT~i5*8;M*r$|mtCYHn&@%$zvJpn|;I_uUK*QemavMJJOY;|hoLr)s&oQU{ zJ#9b4cyzZKdQ#3c2FKt|=G`iMnozr$JILSp%|BJrRr7Oh%KY4kv3^SUM{(G1-{OD~h`7Q@bjN z`5_g<{<1sY`#_p;)JcM4l(mbpNY8&;3@3oR>i5B`-rk)ueywq1QDbFK z({Z^$%CFQvBnSZ!+Fwi27CuOk)3+MJx-}F-xbLAxyxj<&EX4hq9=r~X)Gt!l+kp^g zGI%0-__1#=bGicY`g9{=rLxn?^7aw>c4u%LnugXO=3v(Q12kwN#MQ?8YtQ-nPZob% zoZbZ5avb^&m$!hG0U$|<^;R(l=-(gE7>99ER4tkiuJX+P#u_z|pfxei)MeO^Yc%~- z*;rF)c#8Q_Wr>le`7O{gUGFGb*{;y>^u`0jCJo8EjG7k&XqJ4nOXc^ z7~y^@BYKKW49V(y30b`)bU86_c?jh}u}lh1OTN*F2ix>5!p4b2lu9xf9Wqg`wZuWn zOS9W7i{5A+9v9yIuw8B8$(~IH0Z%FWJIj45#sscU#(6E^NG0;C`P)SXq}*;qcZ~$S zO(sZ8U~6m31^F+E#BJe%+~ZBKm`?AKzW7a!GwQSors^O%U%lOd3#7;ufgUOWC`r4? zFGD=$_*zpiI+@?^YCcy$K&@8nv+)(rlG27zdJNiOmzahE!`<-SXfjMiz1jiluJ=sP z!vmbx{wQ-X0#xylE0`;Guz1W{3s^?QC{Yy_2|)y-Tn`*Rs-S^m@qR!UM|y_UGTgQs z_Ltij+}9Ip3j^yD7Qf*Nm#rJ1{Y}K>2g;V}@1;L3R6DkKZ3HiG`>7;waW|u-vRZ`c zwz=mD&u12CmT;3o&~hVdqz-Ne(14m9=rVqQug~kj*Mw?8st}ZJRDpRMdE%9-lkJz& zzcI_&3N#@5c~rgKZDIYp#-=r#y{-p!EBtw|N1a)2-FAdabae6v*YpqK9?YeeO@tbcH3__0Zv>M+P>QU=kWz`KgIrr z23QS-syoUF7&|1?5{iC9w;?lzU1W1i^l}Qkc+^f@Xu39*ecPkU2)nv6fBgp$ z_+hirSh+V-6GhiNGLBPD#cAzt&d{Bh3Silrg0Oc25flt<{igv+bTk}Fh>O<7O)CHm zfU7BwzRK{&T2jV=Ae1yjC$^4r>kw}J&yPC=WVhT9 zWK?W~gI}5kL?FSt{ZMc*B7VIf2VXXVnNc#!;tOj!p)im77RmF0JEcK>zTQ>oY(mD) z46nia>z{V`gMbCL#ajEMueZR4>VWA8c<0E-a$Om*kYSEbGK#~*_x$nmevb#&ux%`O zM&YD##)&o0meg4AE0jjMa9bEDx>J(iZ3u$`HX7~jXhYQzzjVB)6EfANv?6OCr}h<& zbBF+>CRNz3B@R&6H+vK%Rwbr=t{Rrq40@gtMasd9f8+MI1lg$E ziS?_!3Uh*^>CoO`(4W`gCvD`7=y4Hd$TavbJW_ zVGQfQ^Dm70WTOv4V)71tEPEJ@Gghh)+BMb?4qwx4XhR`hA1NoglqdcT4{y{AJ1_LI z^Ri2o0@{YqY-sTdd3%y~4>N9|{^sUh_d!q*vh7jLvWHT*oWVdAJ%d%3T%ml9$iC3# z=iR2udD?`(@X!hf0s{TF&C;CdgwH6dIe(!cksG;w5RCJ6(ttlpNp5soN{!J|V(%|R zo%4U;J|r<%6@b6l5718g#6A)p6Cu?I%c8 zdJ1c#OML(D|CI&ZhRn@XdCLIn$fPO$7m=a=W8l9GH(3|*H;ipRwuo+z-FS3v?bAt} zKE9qF{T+1C&7ZeW%b-jcQ}Z+4H^5OQvN@^CI>26iy8XMaZsfXrHant_bZ~60FC%A_ zcr)Ytv`n~aWD5mg1gp3FVoknWA3hxa9npb67zk(tMR!qWJnsKl=BF zQhOs{zimH`)zILNQ1gyGXlqXl=M@b8)Z45LgmCW7h;(KLw-7Z+0rg|o$|>zwer~^U z8D#o5I(fxn)PJam5lE)kWPyv|nqAgrbp?IHhhz$L1ynR2Gc5SxW? z)JACbl5oQ;2JPy-=gX$IXd-_;D?~13^Th$jgmLJ5JX!P;?RAq$2H%@&L>FXqF}aNH zUaZM}ky^?$cAPgKJi~vD&U*LGreor}(3(#guIx#)=u{}AaT@q0>*B^`-<32}ee=_= zzF-k$q5M=fQ~<=x`{OrVdd?}unDqFN_y|j5W3Cx&nicBtUlX+t-H zhuAr|MN&3F5c7v-L-`de@#vsJ2FVA4`(knjwE zPH`Z_|UH?JfQP5BV<8$Dxt%VL;C;Rp!0zpt?McceM>x`zq2q zR7r|uS~my}eG@@BQ7o>vazCI{q{r8d#~5F8l5T5KK*H{X{H9(9C#6;{DCEIArKmXL zfWBglm1CGApATSjIGu#BT5=RA<90+XW&Y`-IS3vs_ERi?5{$u&P?B-6l)t zOr$td?La(k?8nClTL+jG3q~TC$nm6_v6w+n5cE-*lgJ7>2zY+bU2z#zQEz_zT#X7D zJ?O6)nPxbXw4N=R3j{>i0m^9W^eg2nVxEq{?YF9ok04tg=FArCK5IlG`VV3TU9xzI zwanc*5Vm)2H~oj>^k;=-z=M9i)Q*)19JJ&w20|()>pltyo1UQ6{>k%Z9wz;XuaGU% z`f*b6;qq0QR<(e2C=4>oO3A29=|;t9qstYp2lE*$^&h_jp-;z!JFHJ9%QZY9wR|FW zI4!lGVL-nxFvC4M+j`i#@iBXUqQPBBL_-RlQQ5@aRDb?z{bI~LYjfTNX0laS?dE6} z>Qs|WNV$?=@yq2xyB*tHscv}Lay{8ESsv}z&L53OFLybf1~8mNIFAd|k@X)RJr*+>-2K+|=T1+0#^5r1~CU^~58W zQWap0we@B1Gwq@aiYK&A;J&U07}kxYf6y%;TXht-NUH*A$p+-jMa-igO_MAL^r zyn3=)1#hN^)}Rh;<+IXy2_r$MyYn9^P|ry(o_wLFP3u*kR%5L<2kUovj&#l&d4!=)pKRvn>M;9yeZ)w(2KPyy!c++c{~ZVXNt9_nN_x>oyC! zptAG>M4$tefH_Ps($mZ8#jbXEd$U@vEb#obA4MJ)*;McH?JQd!6{Ox9@L#?p^LVmv z{jfBj$i_;YDG7VkdjGxk_^0GG>+(A9OY+8Dq6tTG(=Ul1#n?hTn)tA)cQ`(d4#V}CGf{;HoqfM zu4B&F2~q6bWN*Fy7kh6R)mGQFjn-RAfdU1J6xvcsfg;7FKwGT11`kpSf#Ag@PyOj3yNBQo!GZ?~c5=7(^S$plW1KV2k2A*i{dn_hldx^>x%Qg#n%A1wQ)-pO z&wjpJ(p3mvyPC@rJPQjejL+R`Bm>Eju&JAa6^m#D%~* z<104t^aGhL9N+bnM9*}oRkA1v4A^Df_eTGjU_tbR$iYPRv2v5q$?bAa#}CabAsvp( zPhWI=T?eo?&^`1kgPwlqASqwim1An9B%Apr7I05GOO~o=xxdxjY|NqNDs=csBKtbo4slK7$GR2)i#p=lfW7C~@k}9bTA*-(y`U>#rE+Ypfdk1nO99 z(R~pY?&&Y*Vx+k_>V^90>TJ9nOmPP!9(jegWpbB0VRFTebF1nMs=aJ`HnLglj+p{< zAqHgx<`K!8xJBXTDhd8Fish>Z7enr!=9EOf)AFs%9hr*_q_v-{y2(q9J#98|sU8uf zQQsOSERE8waepNn{7{{%IhdBudG0@P_o@vcMPSN^`)u%~FyZ^SP4?h#<}J@Ns+!}4 zefPaIF-LT7Y?@rGw&iFAc|y5m;3%|uA)TY3QJhTDFR`Hh+F-J-QsRz?&zU*R)Lt)ad*d9pV6DL+k*b2?w);cpA=HE~k2h!qZFa*wzX(Rf3LN~aO6 za=O<%@x+-N6)StvAI*Lp*_Bf=R=SynYlSlMZ65L%I;<@}9IF@7DCcfn+OBU2g~fMh zusPTDy|ghXx=)$e?^oh!ZRm;fL$?Z((_QRZ66LA8qW*Nx(u91Uh=ke+ppJXnJ z_l{QtS6rI?f9T^JH#mh-f##H>FOx!3EVyKU)(wq?97C#Kc{K^QFsjPNT5m}Qa6KY7 zcgnH(qzYKqEY?Q|6AE2BO9aHWn};C3b>r48Pp5wgy-!8O2*GCijvqpWsxIQO-tz@A zTY@rb?N3mOC!x1$^7m(9GRAaCG@tkPDzxmzd8)-bI?9?CcOUaSe(>c>BQH6C9q{g) z5MO<~d6;h$2y3VOm6`dVCNf!iiUIul`YCCfX?vP*oEdw%dbdS3gT~3k7)trfxAAz?W-K$7xf0`du>_@CbY1AU&}9&IzE;?lYQET(O>c{eRixiT9r6ZCR;ld3pH@K!a3pE9!-OLgL+ZzhS`Za zF=e$Vp(Pa0+s$Kv+FXRU8P=p^*+3Cl`=8*r>z|?iA}h#H{t6pSnk&tR@_d36bZ}#b zy0J-D4YDJ~fZkKnlEUWG^3(YSJ0-lisJ89vpX`64S!~$ zIGC-_vP#LqL~y1%%+)WL-ZyFv1gDlyId%O&ETr0eB z?fmF|)aNwLI|TU~!a|(fd!WOs!6$b7hYY-2A=GUNieRCkut&}IPAg`FcK+Fed6f8r z^)D0f4v%gM#`BaTgbzlV$0nTWmF1kw>LiTWHBbL1n^}GHkl*m`Vw6Wm&cM}JV+iF0 zI!yN0%K@>|_+KUWmh&3+9&WmdaZCN10;HGp?fqPUs9sV~dp!oDv%lo^I1|>M(TR;- z28tV$Jr)oFv^S`;qc{oQn_t<5T7YD)gLAp)KZDg>b(JaF`Cy-- z=?KPc`66ymXotOPV^mMdwkt0h7ZZ^bJYDT-vhbbg_;|SBmD`EMi2XEG*z1cMo^DqD zYKgm1X1(#jh#;TFj&P>A?!iU!*sEH01BAHT<9ndd!*@^92P#}`8OCUW1UO<%F)KJ> z&u65h0hIByRzId|iH8C4l9zZdrR3^+^QgqRb{ zpbDe!P;N~pl3eT;m+vV3KJPDP{|*NyzoRzHqHLtKyLo%>gPGYYw=Kh#*48Jyyc7go z1G&YvkpCX7bvAF%U5bBHOjw!{XHC+}5w7zk8?~JX2ILBT7>?0>mSz5=JDj9s{CbH3 zrDhs0I{*=sX)BuMm>6m~lX?p-d$SpQ2=#>XI*Wx<`Cbt#w`)kJqA&EG)=qk_B&V-` zYm-`FJUXZ&`@J+EKXer63Dd-ISx#h=K9#a5+<0BXAh**#Mz%9DIctiobH3*>C-Qo4 zYTuH9q~qp*ryk>NhPB-v(=|jK7lr{cxq>FMMf|@rvmQ4reAVNMFPk6-){-V5w||BL zt5Ws$GqWkkKVJzHS+`+m7{pc_d{x|;)FylVM4)hg^;66Dw~VdVAdY3MG*$s-uRNCw z+!tYv@A+j?yIH>N9v70Y4r(ESxDT^NAFavloc*%iIoZU%FV$(;GuiV+N+oJW`{NoeFWtQ06-Oa(@q`m?q2&=quz7`g~8AG(i`^gRr zc0)t+U`1nPl8kI37A+&-&vY)}$1g9QLyGHhf1BG3@j!KKeKgjPgOrvOGerHmm?;O7$vwZ85Mj-@yIbKaBoCE)MzSuv4jh^<)&{>laJF|0B5tlzCsP4V+rT_qD*3xNBLwgjU+yN{ElXK-xi7VX&C%Fh!RZ&d2C+xw z^^ihGD&2(4eDU+?;Kd+1X7E7>}d(+o*)^6PaW}2;4P7@_qu6n9X8go zX6z^D5?LyXo|%AQpBE1V`_mVX@SDepBPFNciMt|;I~|}&UMjkB7x()Qul;j%pPT0$ zR3n<6SS$(}q=##lj`8$h_CuSPsHb<<{O7Q^{hg|7zhi z*`JKtJpC{sWGlKIU^}cg@troH{b0@Q5M4^OLG#bWTuDVdFP+F8(dp3ZGSs0aHX}K=GCd2gP7Go@xG_(F5MpwzG6%H#grbi z+mg&r5hI7(lWtN;edrpq$#N^`yHbvTYR{s?n4k@37Q*6mW;i)5#RGd_6+|1)IhfHW zER|_$@HEr?aIEpT%w?r=ToSXr8fOz&R?1ywKIuP=!E@Re?AA5k7RkQx=Ul=X_aBzs z6-*Kx_zdQr9OFS)l!Fx%wG3ZI0~A6Q-_SR;eiI&+ZPCh}%IR8or&(VS|6#HyLsy1{ zY63{Bj=NxS%5)jNU`$J)m9KagzWxCf6`kEw3j8oJV>_9DCnrxI&WlBdm0zV_@`FEM z8@DRs;5B2{F_nk@v+E}|MJf@ zzRAS|GHg1DI^JpVmA*}(Au!b@A=Da=(8*1Rcn1;Q;5=U|2(#*37lnHEb>dM>zS#$# zRU40~SIo$vwKNfMw^d2E^S8df-c>*0=L+%%ooLH^BX2`;l*iX~KFjZ0$0uY0*>31D zx=G&}6*C;H6g&Q)P?oB-8o5~l@I0y|BZ$uJBYm20q5P!w@ZY*RH&UN`3s^s<3?06* z9~T{oc$TshCTK>Nr)hcWIqlGkN~kV=6Rn`uvZ2|3^*RpiJDZj7!KT2Anj)`0r3J}- zJgl@Umw>oc`RpauTdh?=E%bhRf8n3L9Qj5knjJ9gKZzkE*lqWe<9TKFG5tCOc~zIA zX*cVImcSz9Uk|x``^&t}SCU1WJKAXxAsh#reEsXmzLI%V8`c2*VLeJN5zR~QrSh)5 zZlb0<{2hc!bRzZoB-$t~z#?B~wg_Z36yMI?`#h-kj>lOScvc3Ku=FY2df_S<0$WCLIilUasASq|EkPb11d*Bf2w=TG+ z1$69Ib?)j&?h-U}(+?YYQEXnxCA{d|t6QbHocu8Z8F?Us;aFROaP~p<)?gq-*aj znX}FW?`0&OplQD9Z*Q1IZ+Gm!eM$?}*^4GpX5gDxoVK=Ha(d=@7YD`KkTZ z`P`}IK?5^)M{IId%Gq8yepl6gScV2#unP)7%g`9-FJQQW?szqK>W>cLE zxd8MYoA`x364!3_bk+Xa4${U`35E&!AZE)h#?$T*U;XpkNofd^80Ryu^Tq{E>w|UT z{#CE?bFCoa-vzU0PUpu4=xhkj2Z<}LvcTH5C&tkyC5ra-C;4SI2#q77>&^!q>hRo` z=rR&(r1u(OxxS5`cHVfNvRIdv5slhAg(_Z+2&x0@In3O^4JPQN+^QCCQF~aEoOluL z>Nn)v%Q=fCV!fx{p5)r~UPW8uLqvooA1Yr;2gf9$rK2pWeq9U%z6eJ1FCz3Nw75_$ z2_>9zPbK!{AtnWK*a1h*<#kyZ`PhemX^9j&=_;O)kaPpjzYh zT0+5WswtE}fp2?Q>-1?@ZD8gxOO-&R%b)vyWAMgg?A9i&reALgdrq+yM@-(pja&$ z?}Nr_3n_5Q$ujZtY~$IwbyO`3-0cdUX=t?ebfrTF7=(Q`2~42sg=pF_bId zdSGdO^O{qZVAtM9=R40*g{v-K z7ZWlna8jkuN_rD*QMdEM=lFbkO~}{leF=@d^VUZjsY*+skmgeS!@+!ke)f%0@UH$o zyW+(gv1N>T1yi4Rz?zPRe!hkVHu?q!(r(d$>Vb~X56MNycx2>t_yNfJc-0IC?N*J= z`dG^d*`)0?s2I#)+L(Twqy0PmjlEK08hS=Q^`cpJBX7T7`;%SFSBrYAcWXZ^KVJm^ z0KoI|&YChZGkr)|{tSSIv?gWMi3j_zJdVN4{+QKh8sp)8Ze>kR^wk9ST`EyrR8g0L zs^$Q*hjq8sZfCWqc@qxr4vOik`}p;pPc(Lj6EnYu zaPYKFnz{Y>QhGOg=&s+p1R?um!a6W_H5DDBayzJz`}|E#U{zFN@*m57mhx{XL|Vsb z$uzOg@~a-5?W#8yD2`u?gWYBm6PPR20qSM$!NCCZ_vo1Lw51m5aX$NvZ$)EQ-ic||t=pc@ zvDOdQ_WQyw+LLpPry%tz^rJ?4gLJ*c23}268f*GMuL%#&&BaeFE4blP#aXpj*Q*(E zI2HGCHTSIb(ca-+$h3|+4^*I(cY z5_nAC*>L{#(#A%%P6Sjrdx0&w9X14Jd(bm72y3(Ii3^9b&muC2B?sl4EH;|Qoet)c zMDXr?xAjGRlh@ffsA7!8T(?G(!X!qM$S{_;u4Q;W+&Lll@1s18)#)T++rv{(x!MUY0KT>-cU8Of4T!Ip#r^tC&KfL$bA#uDgJND9 z5BETWu^hu*&wf8L5VW*@8@|*%@>!x5#mSNSrZl2?)VVCAR>)PdmI%wgRT1etev$<~ zia~jrt`F;1dw*++zUhgn4Lew+??xf|?*oPBwOrPIJs{SFJ+VH2PZyS*b{zJk53_Hz z)Dd2VU(Y2|5V<&BjQ(Y^ZA0?MC=RPU&4mIbl9jS-0BL}EHPUfWKU&BC!Qt2)PW+-v zfsW;R)@TgerZb>wYjoDi)XFPFY|;IENl1uY;GtST={=Xeew|NWRRwMa9kd&(4%Qp# zr@!wrIbR^Ww>SC1bgEO69QxKKW(uECXiRS@C*YcV`Im(*hY0eDKu;WDAgS7Gmb+fX zM9r=WFI>0tylWrwfNSKZ$oBeBcU_vfL9WZ8a{U0e&xIL@4SFYP$?~&KHKfw0=E(N| zR$rf!zVrm&%I%viPXpZw=6EQ^a?&)M&Ua6$6NvmUsf1LF*61m#TPH~Sod2vliM$;4 zqNg~Rrso=vVRao$x!A3-@YRPW+SL-^NuT6Tn%6CJ84yTgV<~@qtHFxA*zGZq~Xxot6>!1h$f11 zwYsUf2ijaX0Hv@u(HZ7_BO}EN6t**^@+BDW)T|T_qp;WeKLu$5_)Yf!cs!a*=aNxm z(E9mAnx8_cX``6ur*%zF#|eW8y=8R^%$CAIyNZb5BMS=*JD-T&20Gv6UD3j$$s~z% zA<~=y?P2uo5{Us#RmlQYV;@bbFNVNRy(Cq0Ny8(r#9mQ8vr|*w#K*_e zN&3{_53%$cM1PD-=mF(#$}v1emg}fS<#@3#taub$m}HV$Chfvh52IxDR&=^6W_h;c zbkHy7n_=U7@SJ&9;&nJ3O6!3&DvMaaYxE_jZjpXs*>(&%G4*?k^hqrvYD7y6^dO+- z?Xv4u#>I7i%YzEXb#zIs#`QY9(~Px{^%kWsD(vDQd&NF`Pb)8tuvKBNSK5+4m%?c!mLuPK!vqNDdNg7e>XRahl1iaC%|0%uXSy7 zg*ECvOqy zeuvujiRWDhpol%6=s8=S$$o$D0Ga6ZrZUH{2+hbO(U5hSrn@K<9A4E{YBT%o^Vi;x_K{o! zTtgWsZ91&2@hQSPQv)U%K=i*rMbe{ZYz*YDBl-(|k37i8WMN}_wd#FepS02D=Zed_ zs+JaM7FJdc0Ra@JynLs^omWxLz0Ndh*fdtq|4Hjs z>_q2U0{IKSm6HE!$17J1|F>+FI);y#bsqw+H|X5{*G#s$uos2nh67x|G*=$~4aDaZ zKe=BMRoxsMT$O1iAEQ#%zINR8^gq81D_OeV`P8zK-rz1o-t_K@^1m>t!!r@3abQhi z=JfdE-TNP7uIQ6rlltQ|bq|a$dg{2#-LA;7|NGV4o-ivLU;F)IcIJPB|0{F!u1YYx>gysGPThrAWTh-g6ErFC~*8ikjr7GlpQZD+{ zXPOZHeUg(NYcVuWnL_e&(5-f|IcTK6ZJ%ZGnE5W41a08uHGNzu=P~VR-9yC6#>P4P z($t512Ow*5rc@_ez#;itlZe$Py*y{{CY-gJZSFV`b%sC}XwYoR_T;=cRX1H^$xoqz9KD?jY0m2qP_IA~|KCzH$@#deWJeM2P zP{_H{!uaTv($gbJna;M1`?l7JiB-<<@ma_Doufmqsa>~^mu{tXN9VJ*{_gW$&ScC2 zE5`r27|A2yR!+LWhO(3T?)T@%D>wXYRbDsXz2A;vi03!8PbZ2`L{Cf1nfvUE}X4U5ZtTjgT% zHWu-SmkgOXBFxQf+Igl9Wjl22zh%o(GCJNiT1a(xZ)Q`YTF~fe?+vF@i__x864{ z3p?a))pU!{Szj$)us|>K_IACFaW$SUgw`vV7IWv3m|Y%M_3?-E-F{z`9I=j-8}j2} z+gh&C`*0xr0aUSV<6%kfk9p?avi9|bg6ZGA=DjHUEpdJ`;#jN2c0Om+??5^U?G{X)w-ZUP4alg7Vo|*3y?{_noQX%^KNe>3Oqb?Q~$N^mz93w ze3=xkq1=MHyvx@Y>6C}#Gy0*sN{SZs8?JW4QkVDui@M!R38>_3X^GZ8O}>VsIlrzg zooHFh;`pT3PAmD}lGW#LAq|BgLAMm1lC}49WcZQs787+&xwzPQYNcV(ruu^G($mConspT6YOFXJwIF zEbu3a>wcN{VwBKkFkNUSF&7?1ieGWdS{*YB$aTsJRZvyc1cTwcCB-APATp?T7ZJV@ zN7JpTo$WRgmxDG+S^Mt3mDgI^`@IBBbWg|fkbrpN;@8+;?yutwE>ikLF*jVW6Eb?V zw1aiLB_kOILXVslgn@3I=rkn3jhHKj-1;Ar(aBIJ^hFlU48Yj1{ zpEZN<(stxL1<#QQuIT+X;rtT@5a4r1T**u}!y(%A(uC$+@7(mM~`5E zdu8o`yzzZ0vlw@A1x00zaut!i5o&An;WaEf0-j=8!Hp8oFEhz$Qizm{%jnB|M5sZ( zf<4Ms&7s9cg(olc+%c@RgO*VXU^(dM{Fim0k}~Zn4iWjaOG8}{X)clQL z9N**K+@Fs;HxO>P9{T+e?~}R9*L=-3ok{^!CIbwJ))~S2w>NPS4AzL0sJRiv>Z|U zGtNI34A9rGTED%#dxPrUal*J+mJ=qvo&Fbk!^tQ%S| zW8BPXns?Ccj^##k6x4iD>-u_=SW8n=GVJfvHK+EN$HuJ7)ehZEc4kn2j0q5;9d2Gu^UO(ZM1q*%f8_I87^dpU;*_66Qba{3ni^@b zGvsKQD_6wF^&MI(>V{7!_<^x!4WEmLhoh4pSf+8)A9?h~MU0p?@IV%;tfMgF2q9zd zcYGF*Jn--rIRW@grBp$!urd>uVcIJYv>;MdOt~^g(2yh^H<$<=lPs!FUud>^`{Kb+ zPdp#4{_?c`c18^WFN4F2b^QOW$vP9*9&r&1PpNDs;Zva}>!T$Bg1%+%?Zq$SlD(In zh~rmbDjvtXIi6=BZl$n^zEzH)6%xgRv&FFW@b*kF{x-3emNvFD8qKA8f)(vtR2@KcikRXPq#sTKEvdip_9Ahi97nPPO;Kgv3Lr2Pu<@yRYM7g zAwQuqsZ~IN(acY%Y$vmUlWqxUc>-HFcCUmo0n3rR=C>5fiiD|%VEG}~;6`>sKI1S6 zT(P$8SiXZx_)1@LF2?xmO(oQGW0RO$4b7#uH=bA|-85KO7)bq`TJQsime7#QnDV)j z@QdYDSeW+XfrF~hzr*`>>^3POJ+Xb$fB|6gfdnp%rpr`dmgPY(Iosa!?;Y-R0-4c$e9lWl>GHs9R(YQ&l|16%M?X$)vSP5pVV-fCyWIa`?7- z?MSt~B+HH0DnkslrV8(q-y_-GY==u!C3CZ8j^D@E%B@dW7mTdF1)9TUztxN(si270 z60G;~oQ%dPVrrgbJbz}{Mn8HQx==Ih~hia`UH-B_|QB_MNqQ9~a23FwT&&f%su6j>Z#}9No2dqb> zq}|}lBs5|hgcNSN5Yx{f$0W7~(!TkV&}W7-%lXVN?4KN5oNnj--w>~VFxfX7A4A!j zo?s`c)HhO2wr~r0e7Msn_J$RzCZm$TaBC5~AXTaPGLg%^I%tvC3*u zx3z^BE)s?IGuL%Po%9^dEGs8K{;g2D9BcbZ1xS^%R;(+n!cU;&z+&vD5E4CkeZwO0`Gxtn4|v?BWIAD3ouekc`*7HAh-!EhPXaD(o^=5M!z9g~ou)g2I!% zdw|$n_frc<*_@QJuWdr6#3_6f@O&+*+it)dlaeD*$)+N__Wh^wwdO?oYS~Vr!zJ?q z@46qwyp#{}Nvtl!N=j|dLHgCE(ME~ovePOxx@ZVigQQ0lFksc~?bv>;Ic%z7N5Q45 z-}!p8FyNfMq@q%1E zIrBL&Ywc4Cl5{ijVup{`$y|CYm{VyTqwIIot4xMy6^QZCtGGD9B!2=x($}MXWH*h< z2Zna#f=j(SKOpq0bc0qPd0{JkgI?RcL$?W&-kw0>NafKLAkPUe(7n-PeN=+FN(9E+ zM3r!QxPe~;UJeZR*K_%TWBfxO!h)ZreAxZ%-6)NdSbt$|j$vECSx!>o+*VOzb#G~{*5Xr3O7(C&ot4;9Py?w2Q@naK!@m=XR4 zu;_lT&y(?ueHn@&0n3A3(XEz2y4SzUDQx1XbLV%Cds1#>lv@|T@MI$mb>gTj&Cm%D z=mp!8?5Gjv42$I)%x9RuI^k8a^4-a0vqF76D$i~o5hQZr2ATmbCAgj}91Kcz=#4n-wEvqEcct*x4=jOJjH z2=DrbSu-bk3Z(ASJAGS;Q1gc|#^?T_!()`8gPp8l8eheM<9W#1__r?v1eAdjpd4TT z7ajlJgOC&+^{ixmS&>ektYi*~m@KYzu5*8@1IPA_7R^wAw&w?aZ~lxmF|QYYIcc`! z|Cl6<^}{ejDd`q})0i2hE6(SpZXfcb3t3&Ej|~6pE&nznlU2;r6xJ|ct{?gxpre^V zX8`Wqullj?NAb!SH1p<-k0u2L#e+h$M6fLt4avP+EgikQ;;tXyCLav(izh7O@qS~a zb+ud{D#{d4090muAy~>Zi!j2P_Kwdo3sToOvZd=)>%?8hf|2&?JeS-6Yk+}X`cSbb%A@Bvk|H`PVKZseP?B+2J6oTswH2O+i@$&<^!ELl-Gte0 zIQT*)hQqUz;?qy|F}YqxTUDo9)dG$SEo1usObLick(nIC;7^Zj>lLT4PMRVeFUhmb zUF~+CX6JQ}$mpnOYtrPy*BaM%P>qtbrLwzog@NM5B{A;a8a#mb%E~U=6hd$BkA#Ne zu);!hrcCpa5XmQc@;T#OJ$>pbM#-Og4?QZeBV2qyS~mH<5RC+41}JT4vh+!$Q}lTL zxAC^unF(b*3oGlB88e^tQerZKU9nSj6PQv>J8l1BjmRhlLXKV9^UgP&kApW0QZ|9H zs1P!;(wg4TdCa0Xj-BM-?%&kZ-%I9uiKVW)!2(*&0iH7O`RA?nn)D?=Un;|Osy|3~ylyX&crZ$Dix zw{2J6G$_sjjrKjmmaI(L4#YsfGaIYZY4zSpq<_(pfNKQ$K+(2k$H?*2N%%C6QC-#H zf0!k8o&<>FD*ZMB7SMeH%P?UnIsvz5{5irE+?{*% z8b4b9{!!>gKxt&Xlc(z5HCjTYUPTsIZF_CN)~b%nlqW4;)&ah8(hC-=h8WlnX)r6J zu?Vg|`@J!EXL9H75gKGnj;WAG@!q%FXYj|~=k4X-!sRbQ3*i5k9sX0xepvV3;C2j!@+dHZHd$+<11n@wV;tC8$qz;^&nI4ke&4%m1;ie&0_vh1LCx4xx~@3lhz z#+fg-qd3C~*2Z83Hm6=|W|FRMzHMWMe6I1eon1c8f9*T~Cnp?Wa1MT=mYa4?LJU32 z^Ml`>=%2v{7mVw8^;&r^znN^pSh0ZXrL=UK>#(O(kQsNYsbs~!2Gge}KnOiVNMRjbyuqvN^CuwLBL7CU!!mmlGpB}YBqF&28#?_`BgvkaqlhT-S>GC@2w@2_^g{< zPd{m{f9PHp5N}17nR}ID%koZ=*T=Ot-K~2xhc(7VgnTzO#J;*06&#ryaod!an30N! zj~DSzSs2%f+02ID)!fuhwrl_^)NoGT+I{@ztd>Eq!>`xxFoe3Bwt{YqYDaG@hDi)1 zi8z&hLg`c4a75Mkn14iS_8UR?3F^-wbmIEh{gP!q18Go~z`bTAF&7z6mvv^(h3F%I zE_pb*6l?p$HFy6Qh3tO=Ihvf+NU=NN{0VMg zz>1Tn~62IrY3+AA|)^Tw?@H+3neuk249z5ZCSxmOOrValafHlveb($b&Xm&#DNhtgB)Zl@IU-VD z``Q8`uO6=LQ>B|5%LULze-WMkn(NZ}y98APY#^Gm3B1czv(E?FZnMTUl&YfgF@vJm zI8z0p7HuK&y&F;Jj99Kj?=(-hB8Y%_J4ms)fk)6njJUX1?egNZ)^pV_egdeTRVRlV z9cVm{j!($mTbR}_c1Y7k=pjsavct2W1)b>W^e5uHQ6Kdk^h-)!5Nof0B`CP|6=UH% z)=!h!GnSkO9{JX&>KJe zH%OE@JJ(qDQL?YBWc7x8&bcMfZE+bt6GFo#{^UM!vyA~#Uku|ieH>yfU@H?+2f-LS zFJ^$)m`x-E8R>B{QuW6HFKmoXzY=xz^{6QnR=!8fw{tQ+obX}KfKTI5WWK3#$vy@K zQt|j!`^(hGF#6v$dG}aOA$L)6ueK-hQ9+oEFJ6~9Ybjd;yODMys`lqq{)e4zZJWp} zW_uu3^WmH8eUcppVRvMSi(gD|+fe0Jni_nQ&;1yYPPxv&qmrHhwlsqo2CywE=oXGhX)j z?9GAe;_lz=P8!MhT}OCawnx;l#l=xjXwD&a-&t#RhECwauOSUeK%rIcX#Qks!h~8L zq35T%@wA1Dl<92u0ilR8U0TDhi8l87CW~85mBu`<7qi$-ezF+bFoQj5*gpPS zn=P~_%ylZ#Uw(fCdgDl{HtGUgF3)|@zkHxkt3-+d%k1n^IY2R2a4#tLpvj4Q-Tp{C zyKCZCsQAeZp!-avg7RI_FL9JIGH^f{1S)atkX#n~UBGf@ke!=z&@4;YP_2G{;gYofc3qa4cHPAud6bqfuuC*w3WEk~ioTDD-d!vZ*TRg*G zL;OK!J4g-|=$2)DPR=^lCvsD6-DAu&y?)9u%@iYKn8~tlv9?|S%*n0yLDBe7!M!SdBOb2))1fj4KCB}pe>t=W%NrNOBARER(cH%TA0b;*?AgrVPZy+ z+?CQaO2UfM`x?mOZ)^B&0w>KL=movF*iqCkhqV#yY}zUv_HHUGzYkjR0k4DD9`%hywz zGU4@_AynYP2I#!|5$Ll_cpKTy4qsm2K?x2EY6#uqx(hr;a-E6baJI|Jpz~G%q6(Wi zfO^ifU+aorEw71M-)O|0GCCghcD82g zF3D;#OjXidv0ofldN+3|xIc$Jf3!~^h#z;NgZvOHN6D3tS0r~7AvEVWQ;)BHC&y&K zXdq6%gfOl*+OEH>Ga%@j;3~VjqB)>*g={Lyf77wsltSWGi<`(h;wmN&HDs*fPbOKz z+H=)_P(l?FZqoco{QOum0l&(3*ZEO;=j>^89puDn{g;VQ{^%BTvJ_URMI}J)(y^VL zw`p@?_R8tt3AOC#)<~Jmu*-(3PZsN}A%&A&D#4MGn8 zr7*f0vDi06tjMeYo>gVak4qz^Wz2{};j*5!e|KZAu}ZT$Sxdnk$rNf^*zf{k<1J$ zwn>!9Js3Lo9SGa@QUr!pzbl`0g~>9Z-hL?#soG!Icj>3kMKZ?~=zvs*SLcCH2-tf# z^7v8~HxDH_B5+Y8{z1EoSd4@FF~cjpiAYZz7?a8Bd%j^TVA}NkUu=M2$*T@arv9e6 zqx>7l0&6YJI?W9ouFxN?#I~tQh zYqStXMTSlidT?Mhto%k!E?jNCjB`RyD-Ztu4N*?~7Hj6kH?!-!`&}5F(<#?=lb=m4 z@Vpl~0P_sKrv}W7Fc~JZ#Zc~p1lGVnI^hr5Ryn#Z0v2cV#7VT@1Vc;HD_!%Xo7iV6 zfz@{vmFU^lhhd4L=Ff%q+3Am{%>;}1NUmFXh@NgbId26@AstV=#dlkr*kAD2j!+ZA zxZVw#S)EU=tWd8(KT$ZL$kyI-rzh5>29u2ksfVBiKY}g@^+o_;k+ax~4Am5gz0-~Q z(oX!}ahFC|F9B}-2KP@3_h8J?I_X1t?p@TcmnGVb6kHs0bB-J4R_Xp4T|Gd(m!6qm zkA6UtQw-M+43zIu>JRqQAP3Ocjf{BtW+mFy$d^xF-0n51(J(i~96lx)ljx1;Vn-O( z8F5HVva5bd_B?#8Ful)9-`iwOu_%&6XoS$y&J&lX*)JxCD0<7S>ZMrCZWbMk zcTBt*6>B+ILx%i-dd{+PI5TtlLqmMw$_j)f=ulbK2;rz2*etva$z-a zjR!)wJ`Qf{eGt<|eC+IyTwIf%cAqw67(kS+>RVY=;n}NytBz@UaA9i z6%J*VhytMWAT4d#?U))@90))6ZQOHnP3K@d^IeFDc@0v%lgqxC*MJEnnki6;fsK6D##;7DedR8m)D*?a4LW zO?-t$Y3H^aAT4lokewMX&xJKisf}WW%4<+DSBiJOr%R98Kn<0VSNrl1HUU3i_7ADn zfk3LgthbJO;B0rBIb{~+O4|v~&K^zDE77X4g_hY*=f#4!G-6G&)~l=wG%mNW4}O#b z{uiYd{+Cj#@DHW7f6UGs?z~tyQ2Ye+?%h{{o+_!rc(HlVjQ74W)q@8w`1mvlx|}+* zbr7@Kf1J<~x_K|)m#gh`Hm;i{Q3~cyNkf|QL&QhEdo4Bj{1*GKs1Kx;amF{iqa8__ z1zA7V*?>!}7s0i=3niXrqXF#f1%$F|n$PuHF9bMc7*RRJdAee*R92JuA5iWTsEngG z%EG7jLg)nF#CLvKU8|i}pdyGv=-Jo92$2hr-i^9DB?^8P3XPski=^;2iq?s;0?m`! zj+bc)JgMKS#*M^dlCsR?$h1`Si~0OefkW$MtC5)P;mfl{JQ9GRR z0JM@!>z1ugC}j+4hDqkfJJE3Y0|-kG`N1vkUDhU) ziHO*K#&LgMi1+}|0SKe(ySGPVqyoZ5kDdBZR-mL*AMxrJ%xrT#Nlp`dILgT3kxFZt zkcXm5(_V)f`m!-Kndd{rs{(g7*1@Pcsk&QawE28}BC4Pir-WOKD5-e1K$X?iP!0{) z|Ha;WMm5!iTcbAQwIC`YN>veQQj{j4DF_HiCm@6(BAw6!QbKHifPm6l0D%BW=#bDs zL_m58H8hbP2#6%K5cszGo-@up_ug^8e|OwF4!<%&va|QI)?Rx(^OtPasjFpJ z`(9G@SUYJV;De45%>@bDYVXix{z__js4PS@&B-~ZFo+n{L;4MCbaX!UA#tuD+#uz4 zk-54VJ!gLtGBdTULw=VWB+f#rLzLuY<~&EmlHc9kzA9JmXZp{UnnOi3+?iZx6;*8>NW8rL3kZ3dg08fRIk@&UwSO&u*0 zf8$&)#-$eudEM@XK)-=c1080;@@G_sGI1BYWchP| zKj}M3@=?($0d8lE|8l?p*+@ujZzYjowub#dD*|A+PYf5-a#i^=`Ze}NhLzsC*U3E%k2eKb?{M2MB_$0xGqGwf6SX~er1Wk28{ z+*)-=1?Pw7N|=97c!#OVzsa+Gnh3OyUUiIDp^SBKkvZP}Np@ zrM3%AiV=wmV^W_+Z8q8Q7sQMSZ3{2tH;U0a2^)80CWUGWW=u?*&H8=1h>yO-fflRh z2W3{woN%TW3~L{77zaWs&ny2%((au|USFBG2fiNZl|3?lpod@!kJN)?{-5f%vYpc4Q6_fzx+{@3Q5@7fJ!}$V$#^85+9$6 zc{%naq16QvZ%}2%G0eJ@t*bbMky^p|ZQAiCZ6>$Yz}Abn;WWpe{WgQwm9&D&O`>oQ5b2;aC&D;TT}OgsX<2EcyuGjlbS>RB>ovsvKj6nGeB7x zh6>+_h?zfila#xTfLPSdJsfwaB3q097ZtwmX8JUIdotyo$Fn~&zn3bQ`Qs&G0tJtt zk;YxL`w|%ej~gCACF{MSbkB8fs5KkU*0SDF*WlV?CZHF!jSY;8OdX4NE;LWr+G z2)dxnbw?*bEZoHdHuh)b4#$)ZH6-h)?#Ot8w4o$5J zyKX0_!04wag{XnBytJ%MvARcoS06CUl2>E7%8~u{4Og(f&ZODsm6nmIVf+_QT<1#G znnH~LpI~N3+1)~`avg~8WkFeCU7KhnjYaL097SaLM1#CFp%p)0^P}#RzrE_Gj<|w% zZFzC~JvE!UzRm+y(k1w_@}3Dq^X@H zXBioK^}190{z@Af9&WrTq04=fJ<@$5B(z9Ez1bM5y9FENJhgyeIHt1FSBJen{^QT~ zUAX^eqWP?qw@>7*XV{=a|Ee#AHodQ=+i>u)<}7C(!0ENjLwaVZMRKC8RYIM79VJ5OQjQ>sX9Ifa1j!BLwwQh58~a;rw36ODxOCoP*u&er=$*LeT<0x!F6m7}b+ z&^0){?)Rv3WNkn;aVk@*sQPiX8na1py22pJ%Y?c*q?O(JzF#%Ab%nSY4K5R+P_m<) z_qv#I*w$o|ZuodvQUBi5B#OEqABtkyVa(rQyTUa$1? zDXT7#a&`rK)U|>y+MU#?iRcPf-y99AtyY1fn;-IDHLH>2DgzB)4dDEtB)b1fvn@ta zYPE5EsC@dGorYiORG=%yGQhX!WSXL>eYBGLq{04aP~M*!-{%8hdPyPXE{_;Qv5wK~ z7+WkQB+Gw^pPl*XXef7_lN$h!qf8p^hx>2y6n&i4>DvHUfOaK1mH{ch>Z(D$KTaTh z*Q#rN4QNrbc@B-rFN+f9N34pi4iD@>2XC)y{v6Vk6BC+1L2L2YTb%mq*3I$bqAD78 ze)}pc^)sRHc?sqqdw53Cu7T=DysLRJ*>)(>j*jaJ=G`LnA|_>M|&du}?!*S|@xMQMlGSq(tUZX14pi zPkJdP6!g6L!pWBjzggSbLGkO<%9_Ygmwdati~ilr_ktK66bp4?VFT@XqzZ!WNIQQ< zP7~cu{QV%ozX8gKkU562dyP4v{vLrl=lt}%*%_BH84FZyDLxmvOM2$S#{%YgfNh6?C{VP1;Db!%Vd2+|bn4F@7fgJ<$H4 zld~;<6>f+1nTnDjN%?wkH=Fsj>-o z>al2|X%G{0tU=99%X`b_>uPQN^Aam=x0sOkSM%J=y-@P1E0iBB%%toVXXNL;_hv$3 z{Q<>|n31Uiz51x+Odu=G6E!`Ro3Rwsrp^+5W*daR!m(Blbh!rV42=lA4!}qn6CNJ^ zWaak?Lc9@>W)p2UG2oti>=p&Pr*%o!pr6CilRr>5;l|Uty=H?U0M0=?inTg?E0ro-|XXgZ%|ElqIiQIvXCpO&Q)$l z6q%>F)K+E9m}n8jD62UfvD6O*EjRb zoE2%0-5TzwFtBn{IpAeFa;`a^27#ydNENOvp4||8OgHkm>lKCAGv>#W~6^T1r^t>v6MbXG|#Z=J8U_6kDm57p#-hBGcnGmT2ic|FF zmA#3XE)x4srZDR{yQPKkB7M)k?3Xf6r$RG{49=9T+I8olguB$$xa=*owZhidGyc0| z2GH()1Z)PymT#!0_h8&uyVOom3d~a|Y7{J{ZDQ_=0>QeBUSHjA$dco@;Spy5Rqo|OSo0qS=v+yaTm0$}IW|9pA=Ya%Pf!Es z+pj_^_$Ks65bJF0h=q-g-fkna7~O_Pk;*`cQEaLx6010LJh~y1Ef-Oc-rPz`f#fY^ z8HQjj`XpI}1_rk6mt+uC>Td9oM>ahTk4(%Dp-f9-sz5D6lM;LhEeLKe>Ncx>9(Lf@ z+en#;>1LPO;Mm)Zb{#HBH`U2scx=Ozi1r^*E1WdL445_ECHfj$2BjNlGc>Y*642f& z{*woj>!jV~@_5kCoA7OQrT(7})X{LYGK1q6{8p3BR63J8`26>qXp+=7I1+6CCVih# zW@%>z-o=(bcV^@fBys5rp{L)ulf@`;0xVqFZB%O{I98{0;2tI;IFNSc4=V-$1TNxY0<5tbTj{Pdzyh_y&+B{z`Ix(js@d!kT>A>w{}l z`7`jWdLWaP93&NJe89Q30mAE3VBREEdi-d2|MS3l_Ew}<3}dM8q9zTC@d6aBaQK= z51v^U1xG5WkT*Rg_m`SF+I8YhU_!#81H;-YI9s|9YuunF%p@9IA|T`i+5 zsE`3VC0w~XU>y$J4u)n`4SHdwTv8=&ZdB5+DLPCMoN(ERjIlMtIpH*gl}Sqb_mIBx zGE$!ICa7JGv`ube;aIWG(B!rfs#ltAB~7cL2wQv@0I z7K#luYXbp7iUqvJ7p!9!gXskPFsPb^kaFW-Rn-o%k{U}rXtz8sx5PUm?8d8s|tBBiamx%_IWkztPNZ`qO(JyOxK!YJ%2+CWFwVx!)Qlp3p} z7Zjtk*F!Y@h=Ned+u2sE2nmiR;=08MaqVn1O5B+8s~zUA#!|Y>)NMV&=byz0I`-D% zr1Ow7zn4ud!xHVGGfH3!$Lc^pDisndpQY{*z#Y`){FdYdHZ-wKZ^?T%I?F*bp&K(n z-nmt-kCaN_yH2MvuZ%ITB!X$A1k$9W*)+5m=GZ50Mwch$>S;Y-v9KTpy2gpkM z$qR){!Z8Cn%!<1vD84Z)7m;2@Gs+_YJM^syL;oGL3HYK;e~FE5AKwT74~7Q!i=3O<~s#gWSf*K%gN zA7sjkQ)>uMj&L3Um}}3X*DuyyD{#*@utn8^&5bY^5(qq^w$8b_u@@JYmNhqMgvvt+ z3EuHnpH)D_6MY_sy*1J|&Wee77H?vNEyE1orh=MmUOV#h@tH1Py{dVKEJ<_IpE(`R z2JIi_vc=?61x5IE$Nb3gtK||@_A+=d7y`&&RcpbO73CAQqR1Whx%`mmRSvXgnIR%xhE_^W_*+P{7CaT}%oRm3EFR zZZ7Qxmpy3+od1;W&l<04w6cM9&fwP6JPopu4sbh|ve9L}`bSIg{U2Lh^>kVoNmDcX z)hE;E(U%)OsQ9oG9$cNgfc>&=s9k&878&?D?$OhK0xf~8B8A@H8bLRBm$=Z#y>$S- zhjC(mb`< z8hkoe@a`{ht1muhAX^Ruw9Uv)S|dl1%f!?t^*^Lp&QH=%@bfkKq4?3OAw3_DH{6Uo z-kd2rCpV$)dVl=>;8gCI)%z`VPNrpcHS;Y~LYXf<1j;+5;~lY69)bdzyanWm*wb1? z$nByyDirMokQp+@LE~c=3t7Y>$Kn^@Ve|+jy>?6*>EoiX*><^Bz!;cFz#s8O>c_vY zj=X&H*8u`ZD*p9?_@7t5L-Rk;_}_>@2{_^_Hzb1BwobdPpE#xdXHLCT3H@9$RCe8@ zj?+4YYvAMEsTYS#Jkf7G{v!OGSAplZi0&It0V$?|E#4>2K9543>mtdr#9TT;cJ^OJ zu>p|6Ifa7e!jqJkcq@0jYUP1y4(IEQHCxVrg0eGd!P>`_0dJH)BX#e$u2s7x#i6Re z5!qVI8~M612`Kms8KrYf=kGgjy7zDuY>RAW9KZ zZ3(F)>lIG&AhHI^y@u-5eW@$sCc9*>`0o)>Lt|FaZSu?-s9FlpwAyL1=Po$lVpq%w z-1?|~tXOgmp!~Wz(-7k+5#pYFTBdL~f?$`tT`+J~_yIRUFSgMq`Ip>tAqS`AB4+G= zDOcC{t{43u>eW9{V-?_!)tg2p2DcFRg5utYsRU2wrTCb3fOby^_H$PIZNrRC|1Ot6 zqnIX0IVlF+LCztT8PqW2qRA{`3Oj6VQ2&%h7~uPK8f=0saR~Jx3+Xx8F;Y418uMrP zr0g*-XFuPhNuGz^X8+7uJaV{CkyiLg$d=H~VZFMG4h;lqmv%^SDO-*B09sETWI^fecu=N@Wu=I>* zbzAH{tAe1J_hF=d#r9J_Z$~2;s^PdIAp5l|oG3cYC-~!(V6BbG}t8q9cY~Ekl|s279!}oDO3Pscr!-SO-UeN3U#>qe&g@fqy=bAhCT07`G;A;hg4QhbL$ zBipbSWH=jj6+5HC==f;b5`@tzGN4r(cENYfdom~G*Vc>^9JC^$@S=CR0-Ixi zVzq{jTcTq+-icOk3C%CsgI{*fT0XK>t)(oCudlq( zO%Tk6vsw5}QUQQ+nO$Pffr+zEm(EBT`FvePYAuK5cNR`sHHZT}=gGcp`mhx&}ExcpX<}^5Zhy8-EO>5LZ|C=ZS zHO*(<_h`-44bRZVVoBMxYYislY=wkT!`|U|z|5JTqiEcGlG*4GAW%4r_Ja;8%DtCa zW4#ZOvyBX^Omz*x!W$Xi*?v(l>` z!?8oU1+}NzAE7j*MG3RLI)n)vIK}&*g}qXeRlhTv50zw(p{LEsM5zM1t@T;Dzo zfezZpLoD@7({-9dsRqVoF$OHhPVd2grT#c`lJ&8Uv7KFlLCnQDELsn$8jOL)TxLTX z!20dF{Si^8eTE$SNf2i#kIo4RemDHg^Fb-8;BAEq zV7Zr1(BUjPI=NW^ZBedQ3Mi7kq`nF@I7=vL3kjB<&`MB8gSJL(D3L3ohT1bAh3z?n zR6vsuM=*%rZ5+$3Hg+`G!zf{Ca>TOlj2k?sC%bp(YZzO;z1{2AuAF{z^p#5{tbZRO zjDTgk@H3kd2S-8tT`dJghFA&7A4x2*e!j$&g4k%V-Rm}4`0!*;^`<+@$P%+XosJ4k z_t&UpDU)2AW05-8xvm+G5W0aQmN|tfT2yP+lH`dssQb+On>=O}-oY0Q?hc}~Oy+S~ zBY0}Yav1HfNbYdDaBl{xH0VW8GVpL|wSvdlX0(JOFLI9`wGYX#Fy|Nul~6K*38*xE zJ;yFzRea2j(?_5b6EYc4{*_HHs7;jAr#ODL)F>jccrf=1=}C^g^Y}?}w(fP^*L9A& zH4eW}DehhR#Ht#-V zAvWyhadShwh5JuXI3NCAtEcykSmu~-2?`2oppQm-feJ^a;G?HbYo8i&q}vNq+YaeU z^tml1$MzaBXq)J^%&2rw)Oz-tzOp>xw0edAkP|pWUbD+7&rk=XlV~ORfCXR_E^602 z8L>pvH&DCX96Fd?hjklhOw^7uOS}AsSUCi}vF5OU5j^lyat1Dl?n|IDENFxWPF+Hw zZmw?3R9vu9GrcAMu=^3BGEk5~VK*_Q_*cy5nlt(Rj6t!Y4&Qhd*t%IsjHVjmI8- zZmRkDLrXGn&KM_V%d|*eM;}%xJogGaEFD84d5Q$Z61hYm8Mn0!bIiCob`D5 z2cH1Gucf#l7XgEUYEkl(CQwTIo2E=k%Py0(zEOn>nsY&1s9mA*$ctM|tRB+i{; zxh3BG+v0Ag%p-A*_oT1J$#ds^Q`!Pfsx6%9NY;LWeFXRsYeq;dzG0?h^Bg>?q<@#z zhA;R5cYpS{>tSIKB_d;G!mHPyag8f}HDYRags%6nt^+!)0WjVAM`v~PD%UJnDe5bu zKL#nIZ|_{QQhpspnlybAO}YV2bXeRv#(F6MB(byA{a)$L0|gpYvRCsGX0nhhr!^7# z5=p(&pB!Xet_vi^#@3f_<~!#_0j#q3k?ro%P9>3k?8f)QWRb9CRN_)x6S878JK3Lt zHc7iS$budqp*kspgyMY3xse^MqJa-h^>M3G*~~I&yrzhjX1X;(6pcoAbau)K@F^%* zSO$1}JNeIFBlyovt!JC1bAoa|A@!eNhyypeY0qDABNq5X%&JX;`Q|}0RW{!H8*R?t zGmnypg|!!7Tk>C{$cac^(%(q1-Rqq`ERbP?8tN2ZsF|0OLF-1#^Cjb4;d0G=%WjE2 zVn}pq&<;f5VAw2D7XCVD8-eKmmhH*jE$-qY{XB>0(x@#q5l~!2?{NdCf52Kz9p)F+ zP%h0U4FzqUigbOZgvm8>8_p}Ho5;{f`t_A{X43~7hlI%K-30&KfVjCH@wCj51V-f5 z5Mg|`QPI_h;TTpS!9I%m+E8TWA$V#kZ-v=+B1<9!M>x(bk4Egzo{CI;@yc~?gUmk2 z2X2?XeS0!rj$d~`!L6V_>VAV|>Shga>gneJ`(i8K)zMEDwLUEgfp>V|l#d6%Gra~Zt?>8yq{i(VqoXwQ1g5zm$>Kj8(VFsrpq(3~L^eYDZaEThKWr2Z z`|ABSsu!uN&g+8p>!l#XfGsICqo$V-xgE-+M(+B2K&SD<4V2ob?*^{PYyMfwU#fiZ z4ZRQ$W&Fwd$rW1S#aaQy<6?CXNMg^B1)^F#`Z3%z!nnv}Tk*K$)9?3p6|A9SB|5?h zw+CWWBPh#1T69cZI1xXn?7RBPdmFQ4BSz2`BSAHL95SRYUb^7Wu1oyl-XWKT1_yl8 zhj|Wvna~$Y7jeRInLYGS0fj56g-aWPf>J7Ue;-5B4beiHOR}+uDF$d+=|qbn3yqfP zYjxkttKX|h_p?q>w5$dzoQx*mruPzbIOX(`!-5v$o<}RN*1CkI2QC3>#AU;qA3i}2 zrlmBc=_k4#)p19^MhiM6re_WDG-4StZV!Z>Q6fYUZ))!lE4`<7?sRDUlGCVZ>q##V z?C;9UC1nohp!YHLydF;gf1*DJiLrw+i0bQCk2xb=t3_QU1?r9hf!Dsn zXO#WrwTKzcC)Vkbo)0Y$w$^XaZYNL8lTP)Afz2vis!#ivB#WiLJb8{Qr()3IzF$?N zk6l;NCqctphlBlH^FI&Sdok`z&AcZ^e&?0Pnomb;ZyY&tNYQxsz{vRPLFN`6TQn0b z!5%F&ajRxHB(%G7(17Qo9X@RPl9kiSsEJ4=r<(#u)Va4tq|mW$!ZKDIkeAiX=O;jl zN2+u>AxTm?voxR^&tG+=x&Q#X0Oqak?vg1dLU^sptrQ|J6Rkov%Ir&6EePI@^$Q~p z?fF4mQcJ=_V}B959gulPp0fiD8$cKb z)tevoxsE=ceIxrXq{L}(BIMq?AJ*2A*X3_`erXo3{p-?~DRN!>>tY|JVs;_Bsnyhi z08MF7e0a{#$?wY6UxxAb65zm0W~%H|KV_8_@wJKTYb5ctzy9Pvdiiveo%6!tmh^MR ziuDIlMXU%`s{zVJgtKUr%iPNvqP=sZc{x8C3{m1ypwVcv%He8kht3U%FRRItGdVY_y5}Geg{f;@z3<*^B%}Pza_m#b1IqeFfh zOU|xrTe|UN|AmhoaDYe5U6&*H#PbC$6lbfW<7~Dd#iVvf1eD8xpV3S2&#{jbG6IGd z!;cf}ZHnHo!CF*W>qA2#hu3QTA4~8~7L!x%Wg;gteC-jWzaOu#02YHW>vr4^q-5fP zDxpcOd9=7rggDGC67-3_EAKe_N|vJV_p^b!0LqPaF!H^2-8lB9Tcfl~Rrf3EaG8VL zR^uCfMQOuxcI8Z*S%27EM~+0@1CDGY-|Bq+D0kBFsS@iK4i6h1y~3+Tfk%aYXR$>!Y5)&QZO=T)dsLg8ZMkJfN|g*5|@QJi{htO4AuN=K;9%w&t!n0 zoScdxA(`B^wJdI9Fz;F~OO}F8T6{9o{akTb&qpU+M|RVjkKtw`8o!-0@m|6l_3O2E z%I?fDs08`Zsb4#+EPc^(a!Un~bxo0ob9Y)#pLW+8a-3*@@ zq{Li|eJp1HgP}TvJHT8UfR&gv`yJ1>QCB52ueK}lCt>Z~q6U6u+aVlf_FJGdjMS`X zf9QH6)|97*p#)xu_B&ys88@;y%Lvr)lAo zWAn&kzhlcaz9c&P-I7>?u8SP(+&X`p&OcuW|NQgagQx#+ayH1lAN8v6--8S7 z5L&UnrIDl<&%tbaT|PzQ1S>k2`n_TOQCr3pP1mrEiOjVQ7c`l?z3qD6SS5 zX+qEtXmn#_dQUrBJ=)MXyf>L^De7|rbnOi70PHkh7wtVgeTV|D$T-{Uqov(Mhk7Q5 zQUbf@Nl6*YC*njxqe5F#q?{$;d*1&|$@s?$KhROLBc~g}T2%BgvF@i9c!eqymzI;m zJ$bWH*2zmWkKy}$G96$=pIc)ykQ6Tv8Y`-texdEw8)y~ZWUgKH>#~J+dkbhDK>J)N z4w2Hf6QymW0xuoHNO4lWMB6!|{tgEJndC)k>na#iv5SOa@lQ6V0@b#acBKx?BOc3n0Ax4g~y2Vk*KBiu9e z_6`>gO1+`*^D}RXly9jf>1M!PFNFXh&yZsWoRgHo&iF(!VZ2zEpjBL#?Ia-p8G(=A zw$CaQ{3sOfYE`&s`2x%%I+o?tBd@OF?`{0qW>~h_73A}0VSR}Y23+3VC2rHc@)wJl zHn*nby9F&u)1P-hLGwx1JKQp`6pAF@ty@CZlQO^NJh!#W#X7gS0L{kGAkO}+tVtr+ z;78YXITTEuLM;=l2yLCl!YF4Y?R)FTm{y0tE% z?%4RrBBVcMrBFgw?m>OcoWsx;S-??RmiXji7?m~hDDMSYzfN3RYL;QoJwp?7p9TzG zwEs->WS(wgK83h(D$`J;ejPNV~TyUa;;)e#&ZT;DUn`k?Np8o!z?)@s@np8ue%$#?xP$^QsJ3fp#E;&=-%Qv z$pFSVb6eEd$^~}muVo|K5k?xHJXs4YKGD|TdP+3<@x7;Gmytm;&*9s(GRt2-am6oH z@bpI^$AJvYAw3jn`c|XqCClZtv3k=V8v@tc^9s6d8rDB-QI#`#VlbbUQ0E4i>z^*P zs&5+9{Wy|=>3Tm`znW{(^wr27pHr{jkx)3E){rH^Te#;dd^>|wCrE-19)Z({?V42M zrN~69K8RbdkKNUpyj$uRR${4|Sp8iU!lD}isCA(vbY#`S&|$wq$*W(~lwQ^UlB`l| zEwZSkjM-;!#Q`2AS56PD^ZmK>&%d?#xh*PRKFDuXOuIkgFex59epsch1lY3FY%4{h zLFm{9*f_F=ieD$i&8iD1jm=Kp^}X7ktee5f0E?Yj`FRJ-uZnynEx9|Ee^t z-CkM}e$Pg_#ihmsZ&AFtA%zH#?};o16?89{?D*UMP+*6rah25@1+*enZzkc94z8v1nTEfR@DAz_O_zS4?U z=EL~Y^=xfz3t)e5Yn3;@-Pm3Y9fTThq)3aGTU(=6kslJp_8vCw%^xm}Pd8c|`mVDG zZRf5K-6k;^^trYgQrQ`)Ks_E5|K7YzYMaufv1C(CjYoIJX+|a0_rX#yCcC-h#@Vqn z){T6;P!52%vTo%9Ol^xX$0qoc#y$VNB0-HH*jloiniZx-FDyJ<7&#WkRO_y)jKK;R zgM*oRUy}wuy&3#BmkRW^9NaXswEX;K58 zjp$a)pbA&OUuV5u--oy~0#XTY@?`~&5sR9j4u#XGxs|oHJW^KJY@E}uT;7A!4sKc^ zl(#|!JC%k>4f4py?Hw;#$=!bh*a1LbM!D+kADtXm3@p4!e;hsZJWUEmr440sKV1T2 ziyX?+8v%~N-U>p<>ud1dW-%g@g>il<>`=a_aerNbWw@!*%R>QjDIm%+Qqy z_w~ti(Pf2RXwAT*l>Ya2@-_&i1^R zIfOE@&#Rdj=#MuDAc`GLA>Lu}o@y~!`gt{W2)_iBj#I6iXTl?=zI^ ztDOwisO9X_c~i((aLviX42R(wD)17O*Ih)yVvHMp<*e22+NV%|Ht!9sa~qCKsW1Ov zhCc}f?r3aYmN8c;C87i*OUx+^Epk%fZf>fWPH z7%fZ(wgZfIq&ResI>!tbn4#IG-M!KpA9W^*jBJUdRya1ZizLaZN6S+_1Z}1Q@)CPX zgUyO&?{51;{lyHQx2-ZEJl|px@P#8HGKXrLBh{Pwat4S^T?D$-toDZZryRN5y$ek2 z63;3ucQ?!Z={OVWFRYuxOo$JXHlw?ul9-O3G#9NsInX&&svSM$k!Sg&M5hoOSwj+2 z%2LR!+ML~z8b#FxpJ8(Xe37TX5jKF7ZFe!Fn7%!&Ii-2$_YGMEd<&a+L9OcJ01I>I;H>X~ zGB7F}QuZ5a78|9F1NMgp8My6H()Yr8*R^EP;``ejt0t7yGsUD9lPSRZYJ8%>0E5LE zn+O#e31wf%R$G2~oOz(1zfPTz$D`_26z0s-)@l`JvQ&h3cf84RdnG9DimZli!+vIK z)T0+bH>_=wE8{rmkme+X&a)KSSY-h*#DNwHHYv*px-Rm?_lN7yKJHZAPJjA#o>C!% zCnLLCs(X*BP)T~%oQLk08sA{LC489}n$eZ6kXm{3cQIGxl$!?k-4|MS^{!qz8+s?; zyJsV%S_CbUqO=GWF|RS^vKZ>P8RP0a+KI@IfLE_-4m$jrqAh(mzSaMZohBS&jquIT zacthhnU#8?q+Le~0L@8&pG13(noRW2sK;!@_*}aax57Xzrf=(Gc?Jb=1l)^S=(76M zEiy3YNOQ`dpXoM9UPS>%b1w0HPO zO$FVy*??DbCk*<(Nc=#fm0h-3kshXfkKZpobgT{UGZN;~;`Z5Gw+LEU>@fs=d>_@5 zsw_ndIuwT<6fs_8Q2D*2zsq`h@424%388FvC}VRX>uDA^?M%Sd^AbF3pkJP@fhNZZztH}v%Z=TIwK{GV^z&VDJKGHw(!0mP0G z8A|3q?p}4SxbgHZ=w#;#HDa+s_K9d^w{$3t5%|3$M@CKONA$%nvC?>q7%CyA!j>CC zx>o92l068B_u7PJq{dr#Icr<`JQWvVKg-tFy6u|rlyh9GDJ!ekT}z(r&oMstynQ|6 zu2*nZ>a39Q%q5!C(8V{kLl^5Cg68|2UekB)7r0jQw*1Sz36VP;(%dhOHHn9mckY&_ z)C*iztIux0^O^76;`h}_X;}P=$z3@oaOIpjy~@cozx>&!L!X)S;QE?vyvWRe49e>- zAz`&n;L5W{%g|DBqpC`1q!^qfay21qUG^_L6$1J*xWCWv#-;ygdG=7?k0Cqo-a8=5 zf6o5M`A`30AAC)@=`S{Z0k*3*KQk4@_T4S41=0VT5gDb0%ei2r!!#+Q{L0roZVDfD&``RwH* z@CW}M_^3+1lb&Cm-0e|=Du%uAzhm-r#8^$ccogP*fOpZVFwU1anLwZUrre^;eI|3{ z-&6O2+FHId&JXxPj~L0$QVhErYxDgCGXqQ|)BfM9SwiLSCtATgnT#dB>RCst7ARkl zx14EK>pw3L`R~}zcN8_weKI!vt=~U8>a`fm8$g# z{A84ec6X7=&&1j~vzQgGbN@JNG5iQ22}Ppa^t8cX{A4?ifhY$LPbyDhcAok1Db~xGB!v>W_Rr9T=vs{&a(U&-IpD-1*+9ZedT2K8J&CTG@Yc&E-mi3PhRD& z`;*1qd%O0ey<#@uE60{jWyJjPz8po3;q1a5D{CYmpUn^-WFG#%xK7N7^35hxKnz@Q z1l6mss(fJw!7f}?Lq|nzOXSL{`BF=KV)~P5_jcRt$%4E1D(d^`PS-bM0b1K0tx#&~ zSQ?=}X~*D&xp`7ESGmlM(YebJ;p>09j6L1adY<-_t2t7^^ArN-JK*|Q6Ilq6a$Dmt zD=h`jZYh~bY+Wf<8*xI{8`9rN?Z> z?dtdU%grhaRk*@HFg_Nj7eDV?1#a#YuQZIW^)s|( zV6U>|GC=N_LzgBBGL&AF>&P8Gwm75~pM)HZi+DYwb1m>lEn8DTOSqMBN~Zv!Ki7BK zA~Vq{&=T=tXWQQ%UMsruwUK0#Sf}1yq&`5^@a~L>!58=GHqw>$sLf)+lj0Bl7kTd) z*3{Ou3+r|(Dk|H8C1Uq5SVpYfl%fRo9FP|D8wrkD@#g}4u<)$0+%v z^jm#tO#Hlo(;%%69oee;gWW425s-I8Ys=N>lx4d7FO-DGjFSvW{$^E3__$h)jOe?7 zmDJic`%Sze4`~ImRi^#L>a+H`sbGb^_&DAT6HPv*UQ~9j5QxSA5any)@!Y9r z_f(gjO(KT>5WrMqeelO2)jQwkfBH4PHAF4g;YO;+8%c~gK?z!)f=nx zuBL}in5y01{FUSh!?rP-Z0OrW)*Q!1(RC`H^VM}xMjqig^hCv(F(m>6s^}4m7E{5q zadFytne^pH3P)0-j9GKWgsVN>D(=ZTIhMR+PY@wDU z8vPr?eyr;4d!G0QE5c@fr)}-MB8*rnV%$k`&?yO;W2jhXon#;K0#H=E3=N$_*MFA3 zLiKG&i!c@jOvtsDq_(bVevc0I+LsKst$G*>G%ssDLm>0+zXBA89 z%%sGwgnZ3VXMfH`SrvO+uwAWM!Fg3Mk@pfag4`;MwWqF%FJv+t6-VA2sr0_ z%K^LI;3q%<2=t1|T`g-LIz`mTfMV%}l1rQ%hQf_%sf97`u(0L743-H;w}2f8TyEFDDuX%@*zvepjREqJ4# z&i79bo%M%@uYq{${3bMigP6$(j3Z{jp)|ex)r*tP1O25Deo3{2I5PzMl#!_RWzM%l zGbd=^qNa)w?)T#j2Yu%{5^V8kO>bB@{T+tHTow;quh{&eM=OYo1=aI2xT}LzWx+#- zbZ)Ab|OF$ zJC}|-y%Q`PIs?QVZ=npAXp}fK(90vowmoH^(J5`?T)nG>6l5ND*kY?*n?DfiXmk8< z%GJf+pzIet^DzD-otbzW@)vuic5JYkIekbDp*8}vgD7hf498t z?C@E8-{^gGlLDy_=TJ>kSqUYu(#)%;yIlO&keA`=l?HM!ZtB`?_ z+Fv5}A$3PGlxc*UW?sq>JxRjWSZN|?>+4c!se&eu4N$+m$&jlY3#5m+!{Kb-e_G%K z&;rKe6PBu#5!d9rHrDn=`?ZHGB6=%A_<$R7l;qbMYt6YXYPr4ARUs8+<8*6kYA{%D zi^)QtlydHa^pIr-wdt>#gShaD>8>yxK`^{~qv44(X~@09zM+nQyO2)FmxqiRyi>k1 z?@BGj;Q~~;+>KI{j2(3cr!|uu+r}SQL6jzOaxhc=O-o9;Jlh%t-Bv*D!_Q#sa@j85ibj2B^-VC3)jbz4&c`4coVI zrx}2Qk7uxl=!2tvbaHeLF$=c6>>S3_E*%1+eDM_bHin`W?|j$-#t;;20loXTpSqDZ zX|!jojK3X+BExtj7xv01Yp8PZAb=tzAZ3)@;`YS#ittzXbKj&~X6pn{xr%%u;|KfR z0#JHM8y&JEtbP)#-)K!;_0Hs{G@;t!MXR`n!sg4gu~&@MZaR*|`Wm?HeHnThSr1lm z?ca`KI1Ytd4~XICvqyo8ZLbBgC&rxAmOgd*&nL@b>J%KVEbG4iYI2dIe>r*eWoUx+ z!pqw~yYQpOz_KK1LV|fK#sXTf8G`J57?X1BJnO=T^-*`M)->reMsN1(WOpxpn@XQW z@fk4xR-Y)hK1kVIVk!!)7`_y}+0%ov=kw?shciO1q{v0JmA4+NXzx&H=EgTY#P@Jy zF&X{bz%JPsYxg}xqLv%Ln-Ec96p7gPO&{SO)G2#iu&>Cux|^_mIc%vjbBMT2vNZ67 z6a(9p*I^s;{MnUEzEIfIX4x!f)N808H0n>0mK-AThmzRiojv4t2XZU9m3Sdpk9VG@ zvNtL2pxSE7(L zBh^647R54eI2^~ib>x$u&2rD>-RHaP;i3qkQrK?`Z~L*fHKAyUkZ&et1E&^|-+a`3 ziG;xu>=$trz8R3q=h34;xI(g(x39PJAeRmSB?n6G=Rdfdec-sP`=>l=HP?RNYWU5n zq_%@zszc3ay!+JF(CSvPV6~Tr?Z6SOJ5gtR-n9;%w<@m=vk|_8#irt1X`iG4&P}>< z39QT?V-B*^j}Zmg9+zR2YUVQmMBA9;1u$AKq*f>WJ>@!*r3Z{jIdVf`sc8}{_|1HY z@w_e1M8jA=UEX%N8xk@dF+@r!8nEYz7wW{R>gG~I$56YIiXBt!L$`bg1kY$nF$2Nm z(u$WRdB32MONGM%nTMX)R5{PpGfD>*Myf!Fc?;^p=EEuf+W*N?YzhWKXgYjt_Q_-q z>D7UOC)iV^%Kl#D9?3?Vf^<}VDSs3E6uYYffH!56AWC0?_;+r$m!FvxUTc(_zh61| z3DkhUh!%XX#2Yb}gWT<;LMRylHy~bCLv5o8L=ahCuU2w^p!#{76aNC_4951!+xw=* zq2A%vkumMVX;^eer;~>aKlJdv9r$3%RB5d42YTtKH>j!f+*k14fi=ZB`BpDpF|)oi zR&b~UtZaU1#D}R>(4W+R&-19NVcJj=v!1&g6c&tuA4y_9Ia6BkP&>LrGt|$gTDmNA za>|Eir!(Aka%ZHZ3)lQq69xYI|1%f|gtGs);gA2vn0)5s<&XVP0B@|Ry_3Ht^>RkB zW~Ll#-JqDJ*XUA>eWl7V4pmH0eqFbNGVc@G+w35pd#n5;P2`wjBfDvT(xONYmAiy? zewXl3E$5ad71>PTf^>U$#G{ZhE6@^^j$m{__-0ej&Um?Z4p=6Y*u24HMqqMCmA=ciZ)u6tPvP5P z6}I-h^HgV+VRU9p(3|p6(z{D&&TldE4r}4t>$DvsBNc%szRT)x@lpOHpZpG@_RgJ- z37qKx*tCr~5e!_R=B(-a7=}NvCc%02Pqaj{TgCiVw^kJEr-BLJw*-ND;rMF*je~Ib zqD7L}8;&0PgJS6vd3o$FZ`k6z>u7$0T%t#J8h-Utv|?SDW$>!*B}isJPV6#{1BH?#&#p!1~_)a~2CPtsU_(tp|OqAz){1&eli-+O|`5O z4?rLao0{y|xoz%rojCGifrWxF4oxSf=76QZ>5#6M5k+v@%CGRQtNAj#JTv>MYp>4hi#}2dGNzzF(26& zpJ+m;0raD_{(lp8CU+<*x%{eS`7L4imsUCH+^7ROT2k zexz63)RC^ufq|5&`vl6?QK}P#R1=Ke-fGjHk7RA#)oxrG3UQe;0L6-8pB_>BE9y+h zbIz!DBnFcgNDIA9+#1KTb0xQ~n>4K^;zlB#B%ZbQo${_=uqo)l0K&#ZCOcko>o3>_ zS!i{Fv0?$CXPxFQ7U&#NwYcKEP#oF1PadkI7r3>I+}!4ef#WncUMhRl$cY{QXo+It z!it@>d9q!b&BsIh$yZXiWRH4Q27P`gP@@nC6$ru__9jUlov!RuY{p&t1OY!#ICCr^p-OGA0-i3x<%QKRCc@?;y-|AF z>wP`!2@jU5qoJ+gke4Mmhs?fn@_a@e7MgmMA3WfB40AUnhW7MR(RTg%3TiibwV4hjuI5W%F^!YX*T=Hszj3Z3(aoZrTHWmrZ^klc7(TDxTi4QWRM$dXo8oc)R^G;Cu;Eoi}6Q=Os3`&NLT zZ5a}AZ5T}^Xxbdm_Lec9oan<8NtDHf&W4gK;pB8Y6)T&WoO@(Qy1?~Rra`{ZVrnAe zO_U@lx2LhlF;2~j%L3Ve2aCp~x~X4(e>$0ht+VgZ*e;hqZrWfe zOn%tSID*lzI!;%*Sa0s7P}p^rCiek0WJa30!APxs*dl|lRv`u|2}h4Bb%aZ6p<{tPOj~=Nwl4+E9-V#WmY)GQ=&l-%| znQSQ`^}F0V8Ci)=hU^YMMK`zy1<5UCf2LZ}VzKvzX{EA~X=nrlSTEKlb*nNo-MtQm zOj~f?8YsW&4F|{!MMzdYE=G|3skqFwPZtI2_}g!k6c1FRV()??yYvvlC+4PPnf>BK zZoG0e*R3i-OC{<~5?LXoEQ;A^?p-#VHK|aiE1MSdR%Qqwt{0w}JJ2>l`=sgYw=-WH zf}J=NXmDT+R-+q3H}E8AOwk2_3duF!wA?N}9({Yq{qN`dj3l%TUc`3UHiV$b-fn`WN(yw&Yu_afZkIaLn31IX z#}3HOJ4yzX^+T-Ynd;2J=qmb>mN=y~sH(GCuF5Gw0=b~iW47a~S=nu~TY^0Uvj@5` zox=i+d4$kEAf3sH@sg=VsmgBcZ>5E8en`RkQ>J&!R(g2u6!YCNCehY%C~mNt!0G)~ zA!P}6nkvb7X%wEV^lsiAECWxPKo8zEmoj_EkDe zdC|L!;OWx<36+lhy!=@zt`OU$m9vvqbMNb2r7^b=9mte$Wut(qm47sODFwpff3)E&lV%~C2$O4NAB!qFgNqmu233lEaz?!=0(Lo^%crNPc}9_HFzyN zHJJARIaER|hjZ@K--h4=Z*x0P#WjQoRZS3Q^K;%uZ|m!rP@%p^_mc z05KP9hoJN076u21L_;A>&J!S|{$7-FsdcFUi$|EgXq`RT-C$9W7V@?gWOJjR_VNr< zJK$4@TXW>$AN6E_Vp}uRH5VX=GU^tNq;~&M6$BQVdjZpKpO8~P&_%uUUALaKQ^J<7 zlB|Z@wo|=b7dH>nq9VSXJPhxG>!WLYr*!3Q>u*>}$$C&;?YZjk0I|RluMejQM6F9L zD%&+e$(D>^^Eiab=dO8I2v*fw{yAqdPRXllo92*jSe&TX*J0@~cBR%-pnDkH5Lawz z+1r_fd;s;50mQ%o40yo5>&x+Kr;#&2$xA|tdf;ONpD7V^ZUQ&3 zlv`0^zW8D>q+;@&`Ptk!GHA#y*Q1O;1`5>scAPILEbVT1HZZ{JF#KuaqwU;#wVj{k z3G8$6rp?3pYU~ohtMKJh*f7Rv&CTyRwhcAS4b$B(gEL-5_hTv50Wn^vSrgmJ3M1qK zKC@eO4S+L^e%-4hw~DAmx5%f{E?$fxh!1zhKQw7hIjXkyw$s0Nu+N~nq?d~4*~(dY zvp22jufUCd9?1c4dbw$w)<&g$I?--&P4iYkntvKcBHkiq>cZV$*h1>rcXDpz$}Hv^ zDk3vuwH`lHtElF)SQ6-)*nbrpNxIh?_lFM-7l+wQtD;h zsxxUXFGij5eU2%vlD6Kl_3?E8K6!oMAk^xP;f46`+T_2HRbDdCb^UwbKgltJ=VxF! zCQw8&M=Z5}QH6hpzf$APU~o44!ZKpjLk@bu-bNYkM@s0_WkDW`Ps@^pr4~G;al_Mnn5R?v zT{F37ju>1>D9q+q9an%9A$j%3L$Q3N>7*AR1nmdJiCYm(%P?5~48M7i(Fr5P zp~8zZQddfkXl-clI{93cu&rhh_sw?|BPQMfZOH*`!^tH9b?VUJ6v&Ahp{J*78oD*s zG@zk0WOdMZsOD;v%hljD0%FAJO3fP6k?<*(PVXwl=)##Ad3LzGPH_6%!C1K@e%+Vx zx*e$XRMWB3+rLqa6|8oB@i&+|DGzPD6*#sc;C8F(h{lqp+EVXn(JA(GHhY+j@`3;~ zT5uFVR?FWIJn7C(sj(%7S9$pG-s;C>UNeoCF(GSl7mBGs|DL}<{CZM&?oLSb$*$lV zCR!?b4CO#KzEoUapKMy5&tc0O8~wFNu6h(S-L@(qh8FIUk*Em6vc;4yOl9Q1z_^2A zM0494O!`*CIDOx!GR4`Y6uGZ}-^@!G!5IhAeej81pU;_(7RH6m=VHp(^vVU;(tES| zuvA)j7%n4d*{R0K{VMOZ4&uxC0=G~{vl^UY*@yX9qGNPM(AV&;;J_*hhT%UKr7)DI z-&INc^Atbzg+oQJ;_D-?{@Y-uq+?s+*UJhmv8A^@aeq557wV*Kkoz0*<$8oNi?umI zKXa(Gs_JbHA9p{~i4*OMlTgg4X{hk>J}_#IUBe&1Ch=}JrG3hgeP58&S|c;ow9s*b zD4i11^}BubN?l@grTdsu=EoY|z*UJzrPRcX5r z7;5!*^~Dk5?pa+~24M@=?i-<61@iBlnuboDOzIpZCiQsCMN-s%4$a}y)`x#>IL1Xj zuJBEgtQ+031%IS~uaPAsPtqqKycT{^=Y(gG`~t*oSPBo;xNz;ZcdmB#!64#!eb}n%cFT2*ZtrN9Me@!BuCxWrawPv^x4mM^__YcS=#ITSnU)ku4ljQM^GpR79oQY(>4|)$k=8ez3R~uhN{F20DgbG_^ zT?Wr?Hr~(aw-b8W(SDjWBk5C}WPh%}+e&ripAiUf{c@g5blkh`?m;+TmjJmZSMW`Q zsHkvpML0vh?w_-UlkY0s+Km=;&)$-lIw6?rqj_i^;QkFZ+;<3)RIphh)5>`X>i-Bp zX1@X`9^JIpb|u%W?5y_{hn~Oq<$ZkJH?NN5K!>{A$t$x>lR+NL&)Ypak8tl=*1cN6 z&nFKHp0nM2JyuXLS6aCQ{&YjK){a-;S{67ZJ`)_A? zjvaXLgxg;NW`%H|J>2iezqzX#`L)}xx-O*s@dX+r8G+x)06&BRw%qH=T|r|n!F9Kr zx93p`FHt+aN9)Jt-dWr`tJ);tjtk(6D{BrdANo}%{myHY(O%5VHjJG!}*LmFS zjxh*IhAbD+{g;nZ_l}y`z0bM#o{yu~YIcVHuGQTDXp!|DW0WsA5C|*Ixs5Tjt04(HgynmufTit~7Z_{Y{k>Jj zaQmt|hBJhsh!q-nYh%Swq0ObJ#xC1uM539UXX?aCGP8 z1*p9^#3GE<4L2u0G|UHz36#RNX4^tIBO~+fqQ$MIT?WqeTgAHt-}`(vRb#zf^H{4m zj5&;Be=hxIo^&}~+_vt1#L0`+g?JH#!TAIa61hLy|8lm)*`oS-HKPoT+uSe1K=JJn zaC~5D9z~RRKYexD(a+d$&KL`=xw!&Zv~tpQYPp6&reOR(z4W)lVBmv)zc^madKCr= zgC)6iNQjyyVMArdmYkGJfkdqk0?ity4}Y;w1!r(PfNq|-F-q(kJm2L@HMHn}D|kr& z$q#FFvWNk1$^L!>mRYTQ-iDgTXj)fH(Sqm9L(Aq1QE1wJD5b=v%AblDa^ozFq^Kb< zUbW-BqB;+tLA!1)S2=0F#5s|bj~<9z2DkHLHbnDs!w z*RV+~^B&{{j^!hn^?LYv4LmI%)5>f3Lg3#t;m8pU^p$_S2tdD%dR*CQN=pcVhpeDo z*yJZu9am!qqO^Pyw0P~r!2)fbe%Sm#X1}a9&n@heyd;0HVD$h#v|nB%Eikq<#%q7Z z3C%?kh5B}GaI?jaJ;|bGiJ`*qR&dXO(=+yk0D~o=bbA<7KTYvit?(!#*GO^;yR~O5 zYT))*K#A}QZy_J%PFt3rU+uSkQ))CyzSw6%TtjWP0|`QPhC_3ISJ4WDU_VgB#Ttd4 zntJlo#&DZf&Ki;B56V}WZBq9hFbNN+vQKTeC9e<%4*g>NvC5F-CO1ba#yM9j5WZ<3 z5T5;A#QNJkJsAZ@hTihY-<}h&A)~gMT8smlTeE^{4~ToNI(FP56&ko%%B4SyMywzk zkaA^5fUthYqdtB7WL(L#yf#6RR^-$mfm}a|r6mq}aV4?lCK?7tlpJ^}*?nW#$rk5} zCVLL&kS!8K^-Ox1NTUH}ylhdC%u%^hiIn0R^cV`hx#Ac%+l_Q_bDB8^^`DYm%}$<~ z9oFIaZOFKftsh8fT=N$XA1kW=x?Ifu7;QZC8MhJgHI_cxE<&@4wqz}o{gSWj&WVqk zZ5@>$H`cgyH7>hDLpJuRah^Y|p80k_RNE^pVARoDJakoGjWr>-I^IdkeZdwFS*EG4 z{vD4ZzqASlZ$})%_+B|vl=AHquZ1Mp?XK~vRDE+YYHD5EIoRhGf?D`j`fjZG!j`!;?SulYM@scdB@|_Z3kpPZoH1E zkok=fN+q=;Q~_MjxzZ3-eP746y;YM<6QmbRUjosNU*-OZ@OU9=8Ryz>s&}S@S76+S zBN?u48CKOia0DBF?4=_KonwV_0%tIBP-H?r$Yr&SPA{T&ZC2rGy90h534wL&T?8}& zlEVX8Jis~lim%W*5u0u-YgUOQ`q zRcc<0&opbf>6lI#9I3bTCe(k>eB%zv32&4!5uH>Cbli9g;LR!|5U62XpTO?cfRA{g zEP88d@~_UI@ldieY4Ftm4QafZk+ywtdBzO;#!<#&@O_^b!Ez<(Vjm_(Tvb|=yC9P4 z;F6;Y2OVF{xY9!ciAa={f>^V-Q5M<5TX98eaai3V-VKa#O32ulys}9x8KHz_^4Lvl zM3c$L1iRBe_8kHEka-278ABiCU*sK& zarCEbB+c`ZU?oC!p{3oL42$&T40Hd@IhUZ!5+oo6IWHy2D=*!i1qg>(Znobk$Lr^2 zw0qNqv;f?B@o?6G(FvYq>`#dw6;9J{au(?Q=a#h1o8?AK;P4a;NYfU@WTiP za1zdYWSYF#40gAGO~!r{Gym&m`-P#hj*O;s3avkf++F%)uR8bHx6%g$T@Wi@2=)C~ z`ondt5blPoKR-Rtqh&gPg}LQzhcW7$X)IJ{p{$GhyVO`YhKSWa%;9QYWKGm>cOFjr zS^KciN%s9lDpZR%Q$RH`3%#f{39p~t=5sZ|qyxRAU#Oy7>$-pJ(JB&7wW z#H#wk7B3aqI0BiQpG_UFhJ5;Sv8m^V+H#(se|v-rO{1n z)dQPq*EjX`3-y2P*#7v3?e|H=#&Bw_L5~(UEhB76Sl*ss3X)1`OkA*h7{Jh8ZBKEJ z@s>dM#ng28BQOL_w8b0kp(J7FVoQWhJV1c^4V}=BTlA<7nzLnLKT_$}MyDcc+&B8A zDb>LZV)j@aA+^7xQbo5c*le|lXO0$V`FVF(aj;-wP_Qq<=QJxb470pRlB*6_)D7Gv zRl{GMqYy^;4KFgrZb)#SO=6B7N3`0^%<^0$3qXRjLBKzi0kE1m2L9AOh;y5|^mRp4 z-AR(bnuxv%SL9 zut~j)Wvl`~=Df!cKdEUMD><92LbGfjH9e8g!Y48ZafnX#rxity?oaZfSG-;x(Oy?F zI^~TLQ<`-&Y*kFNc@S}(f4H7AScKq%<`$A!q;-9%YMu=fRCR37a4x_-r# z+Nj^7$xWO70x7LPo&i*t)ve4^k1MB^q@|A*KI?yRR20v(fck9QR(nV}LD*WC3R$2b z+1(yJvm615fzeUfIRXnwG*-Xw9aAkR$!#z7)%gBl52Ek3h~)!Spi-HIx%uN4J1*ee zUG`pC>o((TLxAa2<2jPEcSU420zBh7(As4HrTiNWzw4+ybg!Ckl$$BJJB-G|g014T{D~5%ZBUK0Sk36OC5-;Q+s(Ih zCtG7JJRqqjFWQ#eUYRbe%7u!+bYV8rTcI_zIqit@vn17Si z@b%21GNyxgxfks&p>5x8(Qs1HK2K%3Dc3@e!o4fpIWR)Yuko;AkVdN5pIN9fb>>F8 zRSk*pM98jKP9eCAlX2~n+$ovGmmjbmh*3opc7suzP;@$%xLx2uvUc&#GQMDw&^z20 zV}L-jx8hCQNewHj%!cUIzYPUz`1)56izLb3riM7H81=?HVFPvC>qf{-+i6g9_`Znd?AdvcF_r7v@$$mPfN+qI;Fxto?D z%sK73wK#2TiyRq~E1x@EF?qEz_}$wVuP6|F5gKhN4?hozNd{PUT4pZkJ}-|@P?0I_ z*i+6Xf=Dr+1~2|cmDOfeILzDAZLnr2%!a?&F67}Ewr+1XRN&E&sA!uNB@uMDr)fQ9 zE&KdoDP8x$_m|vD%FOi`0ayspei88Lo-}A%+Cxk?iZz!~KXZ#D&GQf@09ks~J}qZ8 z{pq}ri4kzMyJNKbn99QRB`IJZNEf=eNcp<#317V1{Jui%Tj6m8c|&CO>zw(Z#UfGt zngh!3VT&C!w41y79iopGTN_j_&KRtHa{yG!aZmMjPfcuMM>cJs73XnCNic+WYB6m6 z{AMwVXrGks^SYICJI-EVD3EcG#}!UuLk3{_DCQ$njJ4J-Nu@!|AE)6#;Lu+e~H=1K>mccoRMmF5wzL2-S%1tfBg)BWnZk@P$1zVqiP z$c|rQ3M&p(i2B}~A8s;=G0FukX5}Il^jUqo%?k>Z>l?cjT8abeQW0NwbLRj8%D_8K z4*C7GGl@*93nLYcEPAwt3(S8rHyPS10>vCR(Yn!Katgiq;8P68z|X6qSUBF@)xCdP z;7pd(5p6rho5^>rQ|Rn$D?mxAUu$5Ev?QB}S+fr+GS*&vu&5ebB06U0sdgY0 zkM77LCF{Ern@xIO6r3r$(ygY;E`17ZpIz+f3jFr`D8+FiC?=ye>WJ#}gLG8iZMDf) z@mEXxP##t21Agd+%PEcgaW&4jctmTmtouUYKEAsydMU=?_awOS!p3%)DqZDR{3^+1-L8A!e19L}l+Ax%msw9+w8S5I886VI`OC zHk6v~?lC-;_;u?tq!MS!1~jVNZ^frH-glsFYnFQFbnw8FP@Cc5Sq|vz*wB8((&A#v zn9(E@8ukZ{+j{dhe~%N-25S0lhbH(tLg`0y=c(5Mx3!{bGDV*|oc2Cc!+bHHE(02NY^m>xw_6>V-ER_YP5$cjlvoivZqq>w?YIi(MhoPe#o`n5IokRe}48z#Y`D(6WKE4F1X7 zeiZ-QvcFDg_m6A+0eoE^o&DP{{vu5>IYYV+&nuJ(?`k-^le_&0nk2l}uu+eA>-esP z>VG8*8wHcD3pN`i|JrhMSA)X8&NQcSW8}Y#{LOu{dm-mvr@Ecs1=?jzv&Cm7`?TRNhYh+%|Gz-_<{8v)y{=2__ z?3Ra|{4%K=GJ5&rLB$9Mxr-+w*LH12?pS}k^TER5q$Skcz4L5EBB5Y=38fh${$&ik zosjDByI;~K*sJvM7w9}a_6w%>3Dgnm0gd+cxz z(CesY)-$dmIi7&}jO7m{8t3k`)78N1KQS$Qhycr?Dh}9J{3*UAAx?aCfe-#AdXfmA zX+y7G4N&q+GW&TagSxtXmjlnMUk0=MxahoFC4sv1t$7v1S{ZZDO|MbUyls(buXy8l zB)Tho)PKftZ9Xk|W=;;qH7FOpUI#RNGq4v8d5jM$rM77Ew|o3Yd%Op}U5dY2AD)LNB~fJ~<2e3Ni6!fhTr*94gAVFd46>Av^p-t+>jji(ghg#FX2+~lOyPv$*x18>YOr&o8Ak9}XcHDPq>?J)z7 z)#v+oEtZm`C^~;TO|Mr%OEK1J*9o1Eznzl#(R%}a1vpuWyezLNLLlC=zl^{BUM+q+ z^LPA&-f8Vt0pcHAsfo!?ZOUWLKH*63$?aF5JqbY0b#}^+LmF^sn!dagB=F4_Mpwh( z3&SAAmzgcjkmm3kU6({pMxLTz2HY0OgL!g_wRg>R<4i5fsn2?LNf`dfxPR>MhI4AeetZHHbD%d!(2|2 zqlHmVqg1*NMw}3QO#A60BA+p}KdeacV*eg}9rb$Mo4|BfGQ@Agy`&yBeYe-=P$oKQ z-BRne2eMdzDvHvb?}-m~?T^r||DH;Hv|zQFt|fCCQ>>~yAX6|@c7J4t@N*As>Z zg}4UE?;;#Up}!}tZ^T0!H!(TPRsH++Iv z8(U@Q`gAPA8yLIdLv*{P+P8JfmX`yi{iar6s()}xzHNV-=UST^Gef7Ca-{_o311S* zERc1MhHW4wcPKCaX#K*gDofUVAP1QKublq?=y==(j^K?~@b1xPO`WQpR)2cBnU7Ks z80PF}9^uf_4w|T!c4--7zE{xjWRYC*Dn1yyq4^^wE8PK}ww!SgWic+hZXma+lNFfQVAfxM zm~z#nR21ihmpnyKUf5;-@$US?IC7#w0~;UAzAoaFGm%|U)6`_V+{QVP& zc&2PM6ETlZOTcuAm5hS$hEhgF{e|>I61%24S>oOd{cO#t8HI$h!6w13E84LPH*Iul zirM5!0wr*}X?ep@eGygi>NMJ|Anl?hSWoBEm#HuNh7$lKi=P?_j|?ub<&xFR`#UTt ztIKkt6>iaQ8|8wd+Jn>X3z;=O|0uL_-RW=Q5elPYV)5nY{M58l3q6 zYE%l&gz8<~ILLPR@iT|}b1d*8i|}pS>=9VCI`!f1*KtHZcKi6&VVzy-gga610-_g$ zrt4b5hw+l;$F_dgo>i;QBY6xiFBiKHHWo@1v6423`2ZNqo%*2X9;vyY9B*Eyop}xbF{e5|kXoY4gpn z44P$F2(tWA96@Wn9T3bqQGs5F|3cxkrFIqa1E^Xg6 zA};VdgAIMRGzGVKH~;PN@4B5oPt7SMOB>{={5o3hxUO9-3npg6SsGZ5yoY5x^E(b$ z#l6c#V{D*~NBT2dpz@ZB!or6prF8%&)$>p5W|dWHi#h|G7xXCEb>*dA!mRaIZ_cm0 zD_g&-25tF4sTDR9oatO?fTfdJpHz}!=!5m$z;yA=!{n+QwVEh%ChQG2+rb2nrg`tW zd-C9pn96MN_ffm>kL5 zEYhRffUNk1`}O?LyN%ZGQIe&5eh9S;-xlUSek~~+nDyOd>!$V)GRPm;)Av5Xb2!^O zDJ1?uAqMCu&A)o<(^Q8c=kRc93a@0J5G*qA*)z&JcPQGZQ^F!AtQ6nUHhJml@Z@X$ zp3wE-XMAFO8z5fzZGO;ITFlvX{tc(0{{((8WOVmCz)}4e=wmi{s~F^=D@E_XS64g# z2t6$k8Zg~3HrbnlgB#|p=6kwb2(c@l9QOEh68YyuoGvhFf+CKLG83)4q{8>!uMi5q4jVau zFq9Ebb8TP_Z2yMC&yT-jxqkFG8(Q-6AncsReLy5E$;vE7N{^xGk~S57t2|J;HuL!U zo89LiCHFqd?LXDAEEvfTY(89HzVPIavK8^pZj`N%_PV;0nmSeFYc=5GqTYY?d7*uA zT*oP>cHblQBF>`XHGDul9Msqrd$MU)NXXTYQGhSLaA5(}t`d7jSngD7kT!qGZ%?n- z$Ktl|uSZa``$7t4t0D``_=ko!m+d)FhVL51Yeb0>cxoIG)8`Ev+`*hjCxh=c=b!ER5y|bcIDu*g{N?7R%Av)*862?Gcyva{_*X*;o6z5$c-}HT{;GUx$;cu?T(zilyZ0~?BTNg%(%-+ zB;9r~`Z0<{P2kpiID9bb9q&ccQbmCI^!oM}^)nuuAeMb>4Nsc-A%;P|@TO8o!zb?g zX~)hCsh@Fjk*~H&9FS2^Wvm7)yJWq>pY7X>lsk0kVUV+BELUO8^{!H=>ejn&tg$RX z#gviBVA@-K#mVEC5f6%$0E8vii&lC z(s7DSxJ1aW)ke1gP~Zo`5MmW?;9)kf9~!VZ;_iQG=+rM^K!}py(oCF* zW7JHEbTn#n2-PPDB$xrIR1i9sb2xJUlCkHmO4 z@2V2`=*5etkNymTA7zkg>2Wg3F}(ua!DXU$p8Q;`&GH+&2)KA4jQj10w8ohxiG@>~ zIZ^0&G|tn~9b(8?x?UDx0_A_%FjYIR$^I6m9VSeGrkkJX?QlsJ!74&^? zP3!Zb>Nqchmht)*6Zg=r4UYet_xksR{vXfu-*)x?H16Lw{NH=>Yj!H8XG%`M13jtl zUJ}@VX$!;k=Y>Viq)GzzcD(qHkN&GB_%AL5MEx(HCgd!}VpGj3@De2_Vt@u+8cys3 zL_W!LE`=SpBR2QUj&z$UeethHH)DqhVSnpvbv_ceuTT`+fKRoeEq=yXadt)<3@+N2Qcv}GogKC7d0j49@(M2@4PmWE%L z?OUsW{#qBhG|XUFU3k;*qffnP-M40X=FjsdkYUu0tTTQ}g%1|UPrEc{0&l%V(3Ig6(SE4B_*CgglX*`3B1yk*Fl7TOlAhg}&nHC{a_DQ0$AZeVtr zBHB6{*D+x135lUcygI1-y)e(r2+PQ-3!+PG(|i5ZT=lVgQy4dAuulkXcxApk92cx@vZw|Pm^xKx zxt_JqOCld;i9ck8+b_plm##}Z3B8u-01POTWBYUR7vIkT)`oIcjbJgc)NzHb0LZy{hR>38kiK^aXp8=q$4*wZ=wJ& zc<|3ECUn^B`wR*0zA&O<(tfAfr;?|9{+mn8PwMFUs(qg$y8uw_K0P4F<+ViVTU_Qy za*BUyp{c=wXiWk`Ynh7~F&{8wCZY7(1)cd9_CN?DX)wosQB-!X;1n*%1HFjMnTuN4X|5A}4R4Tozu{@Kl=|qwDMfY%ZmL$FW6Hr17~Ss!s)6s* z6E;~}AoVbs>6qIeQ-h#&draY&g{o#7wWrp1(^%7iYV^6?fp^1`8>M^k`S(5LFcUjUp3^|Qd?74Yw`QySfX+c9<=ILO5U zzKX3;w0!~YE z?YFXDXhC0T0f1KZn%ALJHX;KVu8cx5YQ3kYC!iQ_N&eOpP$k=xvz288$UNU7{|( zl6lQPXm6%s9h)|qp-k}_w5=JKliJ(XP4U?|leaLxF#Z%g7H$>MjPX*J0mELzUEm8S zCKIb6Z?;ElG+?ql`z=$MP=w(nE&wwZMohp9?z26T!u?fcOd0&wy+PggSHyqf;=`#e z(>U+u7KRKAa6vQ)&xA=)M@gZTuIL*lDHS-!8t<>x&tD*d1ps$s+rOh&aJkDfIoTQ} z-17W|BOSQxQE-$Nq>Q;QzT!|*>_2pDqr4_y$?R)!rUUS3V9Vs=kJ;>ht7RJOwSw^1Lrzji_cqLARYrmRdj*B<7W2%08kUTCKLv17<#sAhATub6FG#pq-Jt&V6vx4(-u;K+sQ39RUggBEl_GfgM<0RFvp$_}@ z@48-J6emU*zju^7sjdozQC_@2(5Lp>S;)z|t2PVBmaHpPdNDY4zdQj9r?;V84foWu zA!W=%1&rALm$eCH_kS75x~R7Pl1>hL;3?~M@9_55Ck+Z2e4Q!G#EJ_&^VrRTG_1&! zeAiBct(Av#WFblkb0fJ4l=%J3n&%z>$J6DUFC!b1(rzC@F ze4jUK&W3Zh%>HrxrnQHM2&D|rr1~rl(39RzNs{a6U6xDpNS?{-hgKl>rV#@G51L}; zHTvxemD>KKS64>BJ+E-+CxEH&ZXa_`{hqX-(|_SXoRz}*$?%N8O^&(+G{_%H{#lvP=Ip(%A<&NQ=6^nqP=ai@TiTZN_)toILq5Fz(ToLwxVD9`Wh zi@=~)xL?#dOdHtF{*0XMr&78TXRO4c|J6Y6fI4iA{==fWwJc=d6xC7%l_^65>85S= z52(#nHrWJUA}M=$XrUzcl~GMAzGPO-4+22NiS|@%m6j<_yvht{d26U?p#gq~5HL-oMIN92m4Vb)%t}V_&&b9vS4S!;nQq^Zj?{CGRUhLa4Xw ztJ(Qte*vkvlVT)NV6aEVQPCiEQ1}!t#&gX<%+A%L7!mlQSoKg!YKu^WFY&Gb==YQ$ z(WG%&aa7hL@m47-z_tY-$~{5fPqaw;R!P5xD%oovvtmE|TkE&Hzx*Qp=3RNB*&EYC zvkooa_$0gkdNzjDR!%>%S|d$KK*>Kj)qs)XB<1^GZGW`?`hZTpg@#lVV=CirvDSWf z^gXPm8FfWy{|=8MY8y(Rda|RKfvpEMvqdP9+5!|{KmPPz+s{X_0BkoQ*#!x_?U)nCxzSpu-# z(K2G7B9ql!w}wkBp-=}i&TzDZQ8{pGTi($?m>Z|;s*30$huO|rk$9Qx>WQNjsqRDD z02++aoKPMx@z6gr-*lk+%M-^?xRb4pPR&S#hgnIfUfcGA5ysuL6?RBGVI$AC8*E^4>xb)1%Pn+ zBP^}1h)-?i_oyTlvklrQsQJzIPSu_PG?4IiuF!mTj$9Q{hRx6$;El?Y82)Br`tWnK znZM^a+Qce5FK$$vzz3b;&UN;&`B} zebPlGEifGLtwbTzSKDfu%>e&4J#z93mPFKe#Son3BI&0ifbXtaVxf*US0vRf^5+8j~%Occ-8#iE5^W;1oBA{J9j(aTBHoiLQog+CDfwg~JhxZ|9368-jo{!&pDq)|JN-YzH}Veg z*we^Raa{(1j0}Tq(ce2yj=!9S<7nMLB%&^M14^>>>8BaZWtYOBFLvj(X7PGo*^`fqC0Y&!&3W+71&&EEItGTF9 z*I(D@FF++>1m_?@;Zu$gU8O{#hAQ=r&u5^-D)|aY-iz>pk5V{XSQS>TL098njeM+& zgd7JFo5Xg37qK+u%ga7dH!fSGzr9U;HS)~HsO?4i^;$Z^$}^A@al)+osk~N=B|d^& zjg9SX$nQUky7GoUBng}`;xErbxj~}VE_N*C~{271Rm2fIa z_`HpdRRo0(H%nQFClwOi13GTPh9+@NQl&$ZksF-OQ;+1tD4l&|7PYF1KDA>F>&RMh z93WW>S>+Q@i)g_yp~bEmsqQp8>o3O*GW_?3UEEqC=S9gHx&s3P?p~0-+3WanzX@y2 zjSl)qWuYD`EeLI>y6j*eWmVMTXQf0Kr6-7)XIquSwn8)&770O*=}t)_Wy|xF`ANK7U)h7}dA=;&9Bnb>g1)Iz z$-x3JwgA1bsz%p5Ki7IFJ3p{d#dOF_cAzJ{aB;|u`W3#uZj9=MKt{?5isu)5@p8SX z#xi+l2GSSw+dI3#c6(L`YVx*s1>G%wteVX6V5xW|X@_e23(7ST>Z`ySF!%2>sPAul z6`|xk8Gu+2NbB~-Ni1%x2E$ND%afSTqx2+o!qbB02Q@<{BFkGBrG6G$56;f`khnF$ zHBp@rvb%}vTi(OCrE*Tk-RHxAQd$D(eoSPrCv8UsY+i5s(X-hsnCX8(nGa{FN2?93 zL*hJ=6J(jiT)=-O@G%5g7^h!Ty9DOTNG`4_-dOjkqQ07?I;}ZC=$5{d5~z-1Sdp{{ zMaoCvQd4)m6``zW*px#=N6Mq#y_i!?d(uP+6+ zRaGrf%AynvL;0&l!wYHFMQxHk9FA8Kz%p-|TGguAT#}``rPiLo>4CwYYl$PZgUJD{ zbuzv!vn_*tKt)XUr`^#B;*K@hr$*39!o8@?(~&-@E9v`=T)ycY0t_!IgO)xNiS-S2 z=c!uj-9Y5XqidIKKN?C$t0ptMb`qE3@z);-6y1ZYPHk#LM3u*n29ko_U6kakO1vA{ zKkt${UqM-64MU4T@&^wvgW6?9W@~R>y;v9mPmjl{+nK@!j9ZEbIPNgp3Sz{y5PNt` zlQ2f=wIfsqyXznv#XC~l3L$ioldQAu(O{ zS2zZ$**f!cDitRbZ8Ht+ypVZ2&w!w{_pWZ6YzJ|4c%ilO9PdAK$0axLcFN+w{6=}# zz&xnj!y@A~iv8WQhJLdUO4OFx?x=U2Exx8ymp33Yy+Pj&*7o4XsEWB*Dlb%>m+V&d z+6#@(#?7@y$ak{{V4W$PQl8BAcQIK97+}{znp;4+sBM0)p6v(0i}GL=d%npnnGDGw?!V64T&M7-^c6d`wYKsgY(ndvAW%l5CMJ z3uXv!|8Np^)6cr&{~6|G+weQK=BYG$XNyZS+IHF1s@02o#KubPeZQ|fwqSX0&Ls%4 zcNQSut=(Jo(#l{a2fRqihQn(2GNmXxD)7{aM_y{G)P_g(>5pmOJ0oftqocsK0@sXO zDn5pev#cS+=*^UXtja4Za7$h#jivt3py<`E(a)+3G?9?;G%t!N15vaNJedm#F7v@G*-Tmb@0h)q^;6wg^>> z7_Jr$OyVZg5P=V?f0cjH==@l!L=u!f!PMty)m&KPM3?v$LPgeNx_eJ3{1Yj zc@(Y^)ea+|*6%L35;{}TWWjQp%MO28>3;~dxl3=cglJ%%k7YetnvUMU#J+)Qq6(y# z8iO%S#TZ2qi89XjYK=?=gGe-7rjJEZ zp=HE4*sGwt#c^mH3D1Lyr)$TkH$dn1n80p`OGFfm#lTqpej5F_K_Mvp$w)bD__sn! zXO1RVuwXUj$%+F{U3U-!89qvk*@-_5oAPZLF+V6QypWb8s|;@_E*?dC1y8WNLkilW zd|3>w1XNN~Q(fJyD(s*qZnP#)NSs6n41;bt)=+@lJC-UVV1JVfCh9!u%jaV;@-|rz ztqu7gr^P=d5H0iDgTmlk?*Xe=Vu?O{j9FR7XnNgW%QQhum!=K8$`;R6%Uv03t+HUR@uM>P47J9}{WuVGDHl{(+bkFJ z#Lr^fFk~b8T-~w)PyuV0ZA3ZE(>gs`{JuN8K zvbWZNk7;cCc09l3$c@`eByT@Z0qtvP3n!igrx9;e?OcAl$OuZ87%9z>@1&@e3S($#Fs8Bi zFX>W@io86=0Eif+T`Fl@G8$8!Q(qAey>NUlokOwHQ zDEDAOf%E4Ax68-m2neFb8+8R2AQ8rhFdvT+9mitG+EdU(j9Z zSYo}jq_DVETOi}ozM1Y`sq%haOpdNv*XYYL^KWvcqBgxfwXvkcU9m8v{6`7MPRD^s z-8yLt_kn6BdfCX80Z(%H*|CT|)~#RlA_hCnZs9Fs+~gQ6sG_Q*YDdzE(*D+FdOXmZ z?K9IdR!thE1hzy23SF+PT(XvS@FGLIy80+g9#&TDpJlm@#u0n-^5E=*gbLT62eaqd zb@a4EHTR6XCtkbd#|MXmtc)P!wVCZ@_Fkf75F#ThB?wp-Ue?MvBxAyf^{`_!k2=5S zg8}=0eU_^^+j!6|i%<3JQFG#q{Tu_dG847?+Guh8W3;L1*QN~yD*)?foz1l}+|29bNupwK;a-YxLQ4K^08L=~_vZx(GayPwUj&{ob7%jmGRAMIu%`16 z!y3Md-b!3@MDIUGLtn)uiz)b&H# zi5=iH!!+OKefB3BTbI)WOrY9>7e@BjSOP*$j9#yHKk*kT^ zp~Pi~m$IRZWtl(K*~Y&odxK$?yY-G6P$#>H;r=MoojJOCI4E> zwLuzRW;oHPzG8JmeO?oL(Mm=_C-9U*Z4jTWqhMU2j7D7WeKFb9rG! zAR1u)rl98DE_j;5dk9mYmxat+m7PPmH5jjcpg`AjWU!%0Qfaz4evL4UovfQ_9MCRJBtrU4iK-#uO9 zPx@sBz*Zjn{P%kOU&^-M5YMc!6tbBT^T+g>XYh4UT+V|M&sZ6n?-x?{FAS`{v0IAc zOKdN|wsyPB%Qz2_|KW4=W5+!`{NyHHE)`wxZT$~0E&2&MWEbzm9+ycMDlZz(!4s^_iQq{@gV^=`lNn)hOF7x^#xF8clAFkc*GZ&^vy24gjk zV{OR+;|E3`F?k=)j3%si$^%RZE8Pzze4Aa?3kisaIyB8*ub9oH663MvT8E=rh;pt>bz}E*s#HW@ZX~U8mye8C-W?n7xo}yly zani*wr&KnxiaTL0<&|DJA+=^+-Huh#rM~O|cdwpoO5U`tCgh|3y<6h;w-5gYG};Fh z{bwLD04Dl7S~`E`3DPe}DYZ_>X07mRs)GA}ka&wS&JqFjMDDSTJ% z@RwLgt}s}fdF4ygxh|m}sr)Iu7~X1HqU^@Unl$6Sbm0Ze_|C2Kk}BugEGw&wIv9pWjlh}Uc3=ed)X5rGjNO1HWJLWZao3$|HdK{{kg51`j1~PCP zDe^moU0TR6`13&6ZH3ch^cA{o_vhuR8}d-s!Q2GIpUj=@MRzn=rZ6o(Eo527hS{9v zTG3?KJI-Dz#|l`V(KOm#Y>HVKV9K>3Uk3U~I{4R`P~D8Ob(CG)sF>j_B6C zI3H;nvKNTmoYa$ZWDZ5uOV!yo40 zgYq0nGGsPhYQAW1k3OQdeFHI(#LlkYo3n6#nBR}lH3DnsvDcYteYMn$pmCSF9Mo2a zV4|Y^OY@L*ibtRtfaA>7q3sp!ZC87_ZDB-rn-c0X2|F7zqRTL67SAjBvnW0bQiYef zo}S*Zxbt8NknAECAj;DV+88kZPn`V+=05Lc3GQWmW{ZhGyFVbD{22TDa_47Sys*IJ z5}Wu0Sda+?p{?>WBNE()*Q9lauIFat8-F&rn?fxdsf18+t@`zcd+c+6elR^1<&iVUhzDIk zPz(yKv9?#*WHQNhC+5pY<`Ka1Yvz_OuxT0t&P&^ylq-bf4-QLhzR`kF1rwFNnhnHX zw8%cr>#vI`>Z`q@R~9gQSXnFABu~#RAv?d+zzT#OJX;Y%VQ=c}t_{H#s2lfELK+0O z*KtMtsg6mt4(WD}mgM#>w(_-# zM$`cy(TsPkOpFvoa8nKAH8^4#Q64yDSuWGE@Wat?s3XF~y?&S3|K?KLUf#>|eVOp_ zAd3WBSlmc?QkGP}7tiXA8c|a_K=LP+nT)7J=BzatH69wY-Pt_y^?l-1&savXl9>V8 z&I<1gwXU(0H7_(vNGux%IAVMA(Y*&*RP=G0O5UzSps%ea+ljnY177H%2*z`bt{Db(U^ve)8X4%}stWs@= zS_C|)i-^(5fznKM)Dj%u|uyTw*3r}6#&khJ=36`xF9(~ zV)OFcIGdjDK6Kk)3?jdaM}?Rfl$#&PkD*~95fF}>nGRh60&=gO*Qx!#*hB7Px zZsxqgB4bBKtUF}1ZuHGp!k~V3uMNp26;xi~-)dlG^Z6UzcxEOSoo|kC3>Ad{{4Dnx zPrck)n)&!f_$cR!n4xFGQ!n&iD92|f)=;QL=L0Czp|F~(++ktnCqq`x8=7>`Qw=A4 z|ClGgvbr@BDIb>QNx8v__AAJu@84&6`5E?l$>lR8ybaeQ zZRP5_Qh!OPPxbNLRNf;s@FuPabDs1Rcs6W zXN9@$r&3}F%U67>5pmI8P&@B$%sh`D|2@m=f&Vhmu7W;5R+?8;Wg#G=-(BfFRV(sW zW4`pB_IIB+TSFUTkG(6b}J zX7XPT|Cg2h?dty_S$+TaCE@#fu=4-S-Taq5|KD^me|xR}|CJQzk4qxXFEW+e=Semy z)kUYa)i6aY{6N}ge)Pn(zK4gWE^txgYbgz%2+^+Ew1rlINELP7VXW5bYz_3359FSP%}@^ zMQa_!MiHhDRq2w6I29!q(?6G;M<`SC17B_pv#N1*(XvC2sprvb*68t4Ii1xl-pqAMVvh6YZW#u7=}f@ck~U0cu_f$7%7z1eznmXXiHFihQ|db{bmw7MH_w2w4)K z1QW#uT?k$<>7qZU@WBLUw1!^)f;gVeHi6O7+2!Znd%xH?F==z6VfV3ErRt;E3yzgZ zo;4Rny7>>iFTLv1b?*zav~bj$ckdJz)AiE#pcte~w# z+?-DIj^so^(0OsO1#;^s;-~Mrh87%yF#UY_kc(?q(ZkXb4egB%esubp%4@np6XpG> z#G(%krc2^Kr5+gXh+7cS{SI?ZKDZKl4H*_RW)9L`o^`<}VMa0Wy5#(tWX1%-a9f!*7Rkr@)S#*py7h*qsoq{BIev5Mgt-R`{pA*JtX@g_ z?s9ufC)&e2U=-e}I5O1Ou$8405Jp?&8PV<-YO~KgK_IT2S!QYzd^wwX-UW}Dt+2Y$ zJBg7J>UN2+-x#MEYtMOVHdx+LZ_an!m@YV$WLe2m7EX0Ml16AJ&hvjacQ6+TJO9Lm z#Zgu?Y4mO^`QvjH)FI}lX=gcD!AxpsgYk9;&uAcijrsm4<2cPtfuYg8?Z@N9w?GQy z(m93t20fMRORhM@Z2qxyG4(o3@|v7nNygHf26YCq*o|-XBI7cH80)XlG_80krwEtq zm6IB(+aas0?ZrkvpY_eeNZi;X`C-ul4s}K z3EG)dY&S+C%IIFp_h#Wf8>=0>if-|>I20x*m=QK?F?;Xe0m1G*;l31B1og+B`B* z5dq0ZUhoIDY@gDEBKd4ite_Us`2fM^IcISRA-O>%y1k1S`%w}3GF=ik{N?rxMbmm( zvg*#*QSS;&v*)ggnzH?;a~%%V*BM5is`=emYGvz{qk~BOBAr8@)%zWha4lbl z=Syu>Ah3t8cervnUC@Z31(Ax9Q#HTe-ZktMeiie@0Vi0|6s>5dsw2#Zl4YMhJRM@z z$cNGof;(@+_AUhu%D$KFu-BcLC)DZ26i%O-S4_Ij^wB}2m`~We9KMB3wmU_az=X9a z-t8;+5fY4gHE|>PQ{D8^jEK7O1>EE0;`H?#o}U70aw>z*#|)mFhAX*pKDfU(wp;m{ z(i@$!cFghoIaZ58ptf z)Pch07ac`3D}r3UOGR+e@|R3rajpm7jsJ500}^Z=E-k4)BlR;nb+zj&+y$AFM%C>o zdK7&IwnUcv-riLNd=*fHn|Iri!I*68IL-5#d98|kVeOq>H^x7B^euIo1P4Yr7B%2c5xFl_l7 z=M#8N169M&03@<@U zHp@F)C@R6|TYK?O+2m9o4kKgc<+@*j9;$2f_0yss5j`W6H1>66cbwrne$KET8;B;JlM741?6tUIpL=n*Q*$RUSj? zF_*W#>|PVy;4&+rsN&IAI>yc3C|^Ncm+26}zw5hLafQ98EdnQ4y9VtZMW_`r~oe8bJ;HUw?GW8CG_CDD|RHD#l|(8Y$SSu^~f z*M82VnX8x3bc8o91pcyj3`k2Oxy0x)-p{`cwslgo@nQ#i#$*H~VZJ@?I>kivAv)!E z{IyUztCcLp;7;0G{mZP%9>qRVdjsl<0AWROij@PfF;hj zZ&TBB=>9SmImH&fz})DoFOt;4cjtCtvm(NvQR460TGbj`T zI72%M{59MARZRL^Evt}>*d5RExB8%~qy+IZ-Yl=m)?}dWB2;6}urnt;N97OCcg30& zcnuKWI+BNi*nz`I!#I);<)K3v>(!SGr_#Zy<;9CkO&OcMFC&Um{nlwFi0&71VM)VlfQ)uSd7Zr?%1p(m za>QPJ-ELrVaPRKYhO^%4x#biE$JVQY!x6#AO9Q#(`GdLVOfxZgmIF0up)%1D^N8_< zMb*xky6Q?B-k++Bde5rbU3L4^TP`_1m)>IoqQ5r})|xSWgYhjJ3wftzke*VRmKDX> zY1-QiwLf#ABunU6nQhAFAb;sm=GcQycgYzf+P@G(c&=Hbx!7qzx8Ntmo0*e90Cl5T zp+xzZ;`OLgxmgc*$A)m1iej!@gpw-nk}#MY*}fb<)2J4s)ErTG_ule_SH#M?)VELT z;?zFU%Ezlr#Xy+6JG`Pp8h3dac*6I=-JNX$#7Y)6)<}0>W3AJYU*AbjWe>gFAfXMJ znnC5P&skaCkuA}Fg#6Sn!GW5stsPu29mAI?){@`0TWcv+cCrf~B9S-)0$WK;yE&(s z%V6jx03Ni!JU`fAAlj4RF0(fjxpOp(uAC_&g%Y1!1QwMFKdgu?E?d1*Fhf~Stz21jE)C)xHA1*e_j?voyPrg_xAgOs!Ji`O?1(2JuL2S_Q*ikG7foWkN?I=|eH zUOF_8*;S5TFHPIi7htIBXS67Bj8CuKErSk5luCAwW^w0~wFrx@p#=iG z;Z|Q&eTCghSuORfGOUStil6+-<8R7D&FF?-o>imqkx3cU61=8=4H$-z&Wxr4>BX8U zN{hu{H%2mieVY`~pNj5{eJe1k>aTNhFAeEMV`!|@bG*XMgPqajMHl*N+D|fNYI>%G zZ6rF|&hWLWMbgHg>%Loss#PcAaj6E#;CI7|AE_=@O9?6{`3d@1{7dH10DB|+M}%`2 zZm!tv_HJY0&82MKN5?MPDys{_q+S~KmwVWI3Z&@;^tXV4#A-8`EX$3qMegGzk!D^<5*py|H%A7Ro^vtMN|IN*e7 z4KMAmjcnEevjoZzFWuTAIHGFA;%NP8Qi%KTQHEF)*)gnz3R2vZ5$@`x%G<}h01Ke( z_U4v&f|l9YH2DMP@aSvaI|8v^es**a$q%pj8Zi|JpB&z#JW25YwaL^B|8NEX0s`uD z&m`!Pnv!czujN|xKh(S8?ChMYYkH}ht0p zR;j|sLCm`;_Y)+O)w^`lTRxtBfoEp)$}?bl!;j3)ZOP~~*SLzJ%X8mP=^~#bTiQ)0 z*gPTdp@-;-W$rJ%zjAt|VL6)I|Csz5($6bV_-ezFCYkLw?gsAiy|r*;!^th^x5FPf z(5YkLF^ks=`*5W3(0er|mY5$Z;0UR@ZOL0r=Z=d^%AY@plQd(IDIn&Zx^eb`X%a^h zB1MHc5$sw}6Ph<`_~?bhapB|N?HiAM5?Q#t6%|qGTO+C*9-hu0eaqD^R@WND%IB=u zi1^s|`Ur~F%hBGVa_w!ilYn*YI^XapS!?&l!r45}xJgn`UCfV_M}M@u93NOVGOUkG z21kE$?&P||DqnR^t7-Mt-8)hk_te&hzqoEdP$Zs5B+2^4n0d?wD|*doN+++U4pN`| zv)!iPyPVil2ATi9)zHhNfen$APmS?Amj}-mFQ!WF-uhO&nUFt)ECYRInNpXkyU)8? zhVLu{&wVjanG?=5>eZe&g1=12SlX$XOS<|442pX@fHM+7K2+}}>P9*I=3<+C(`}x2 z^n%=72t-+wRBaEV-@T=QcpF=yxRspNaCP-vLWQkOmD^&UU99v&-1%CwaB_VPy`$yC z!(s{JbJ{tz-W-C0{ui!5o(mdWa21e`7!TEZIU@u8sLHV)%!2 z_StR%i?Te(zTT^l{)@liWusN$GM|<#&$$3c%@H-}(`g&$#MoTWUq3A6q`~4i31PgH zs_gW_kLzNJ`9kA=;CmFx->Kn=10>5(sakKL2qB|CaJZag<8!LZIsV#7K2NaGQGe8f zq^c6WN!5;E8Y{rwf7m8GG}GWmR-P!^?pduyTId7Kg~8$LXW(`zlh1^MzoO4xyi;EQ_FWh6c2wd zd;DWE`5q*tdSd>cKe6P6j*f)*GO0%G_lLBfM z?Fo%@(vOU3TA1a4g96x$%L<)FlLHlmjBnMvZTD7>tPg?92K+B_YgYD?%d*78SoeJP zeDp;;H7<@1q15lBw^m2aPu%2*AskFauEbh&ZG603`*LgEn};Xb!O!`iBlq2c>gt9e zyrVI9c!5ku6#wIZCnp8^NX%ERpS7>KU(=Wtvgnf3?XYe;Z#x&5V+&%d< zi;oGkz}`b9jS-5Ppo?R|ZAmdi;trOjAM%C7PO z_ke$aZ(W4|iY-5W3)gt*tbjv$HjnnF4r%G14G|U}BT{@mC$|uP9g#UN3~b!du99dI z=-5CkLRQFy^Wq{>Q?}Y0331N`~Z|^PoD*}P_kG~Yf_;;3|r9}!Qr$6Gteuk1QQ=2xQ^0yF+ zjxr!))m#!E1&%5~s>|J5MoqwBuY=m?936634oT`Swe>~$e~yaf_4iD^ad+WrQFTKn zDGA}?;A3awBTn)QXE-VM)m=zB_`dVShp7D5oPMcr{H*Yqsvt++etFhaA(_{TN*ZCS zxq*=nvVYNzViP<=lcA9xWt4{ee+@|0@*Q_|&QZngZ>y@+9QoMHXJ^OzSgU~94_+oNUpr=RO0j;*aK ztY$?61A9;VqiR_2!RH5f&KL{{|BT_~>8t2`FeLnaVU`a*QX6UX0rM6a4GlPHeC^sN zlbz&;Fz{Vu;$;l))w4xMM5Fo~MMP3x*vHihreS0>dhTG{t~tJ}^KjGZenAJ1f?)M|6mm-?^m9S<8Af+V+uI z;f2E5=+rTo#zueNPi}{{E{LQFouHkx6{4Jgj4R8f8FjZ7ne8#tIn8$Iz7t3Uc~cKz zm;BRed`sqV{Igi<>tk~pzFOC(=HlJ;>O5R;)a8jdC*7RtjM&5t$u&yGzIf~)DKAsj z^Zxjs(Gg}ah=SagoXxY<+d=OVKWk%!3M$Oreznu*4;YX?BY!HPd-sXlx%ju6#~sEj zMx1+I;h(PdoO5ByZ7L|gyFe5F(K@FxHT9cs<*6Nf{f=xZdotme$aFA47YdCO8wvVz z93@z-GNv9`f!q0bcJPl?hKdPA$yeIbXyRZ(13|$`Lo|(~kb= z9RdqElF=0Uy^1dG%U1~Y>B{na5(Q(Purtg_wq3KyFy+Kf*Ff4YBYYkY-kgnd?CUT> zOtdH8tgahFteQJtmJ{jeQ-13-=^zjG8W1kL^d5gn|L|=MDdR0*&Vy-!vo2jp6UJ z@cfI!^QVQ??`Xt)ND_WpUU2kyvBjGs+1PjY>YsG-%gm;VEH)e2?q0k+aq&ph5tBV% z<6vD4Ara^7mEE+un5-_O_(%o!ar>Km>d0eI4f^W2r!5@ES9Z~Ud;a<&Ty5!{cch=W zK53pD{k?^=`aW!z%a)z<`?tY~S{YDyLfetMmEhFC|=y}K4{i?{vNm+5jMyj24AAQ*CbJKv{%!Mn9KT6Y(M+tR1mtn#} zr=`!BaU54aq-Pf_7G7k1$HrLJGWS}X{(1;v=h&tE%+c0zQvn`s{j5v*-m7b-z?d

oVb~Ovzuki*-SK#?;h@X8zC&8p6Xv-jJC=wKXR1oR)9$*#v#=IN*Cd4-q#Ze zXdns{&cCooQq)k0c+>iL^Vnw9Tcn&}dT|}!ojb}s#Qqnv7(OK!?>UvTC(;l0O1{?F z;$gz4+B1DCjg4PpFmg)e%*My`Zy84)O!+#!vv4jkF}8VrV&smmulge74!;27S@#A1 zgc~wKLuHfC;Zx>qvH~qStouP3Ck34bT$h)${rwL`$_lm1B$jyB%eL-n{2}aFOKg-e zSGcNhW7xWhniw?Vsyx$oNI8YG@z#rp`SdBl#8kYU#Pgf_9=~Ch+I9SH#U}C$_&s;} zgUD5QJst6&?%nuIk=oZKllHYY_!fVkq~HoqyDs$?@HF-xw>51oS~j0bRf{hF>MR{n zRH?c}jtos3YwDT3ba0(#A@7oRW=^cyKqzL=ungmW_N?5&y|Z9bTEn-L(yPv55B#=7 z-SZ)HVx*q2o^ZJlcZ9K+RxMKO=E1tw$n_tcAFkG}?eV^RdG|C(Ep{VGq2DZOxqk29 zNLa$Vjy;a;pc30t^(j6pH*_t9e7N9eZph#Q`%Ry{9C&89{j#1tBxUdVUgH4yM+?}cnK2;HN~;l^pZ8oeJgBxEa}#C#I$&^LITgL(sd z5C+7X9Q*j;M$J*!x}Kl?DR{f?eUpdJ84NzZvg=>dt9bLP54?Q+I#0bzZoS7>;9RAR z+1vItk}JeZ#{eqf9It;Q^3C(K3O-Md{PkX^ezUPGPaluz+|9bIc$jsi83!p(-KW&p z)vnBCd#$*^v&F;MXWz<&-0zRdi9AP)GaAd0z7(q=A79k&GspTSN0C5wa=tNr6mUCQ zHP`@EtMt~l2+0W*komTx7JXW6iEXf|S*}LcK2e>b( z>3qSfyiEDr(>(~{s`?>+TIBO^ATeGv$|^zY*63i zmQ~e9+a4j9xIBvI=eW%5K{o05yd!a_s@``^cgr9CN6Vg<5N*6~YgQ@Wyl(54+aW7Zea74Y`kW8XU8a4up|z$8*NckK+%trp-f?tyP)PdZQJ z&!hH1hH#1op#?#Ndz}O`?N4xG5k#tSuEnheW`6lF)G*KgeXe;iWt8avy{Sr;n zdZd-tMj@k@k0u)@NqIS|$w+=NRy&T!FIBVFoJMj=Jt?8ikT&Wvt()bXzGJ)U!gGGghs(0zP|B9>P2)6OXFD*M@3hD4okQTRgIBUhGZQE~Y! zRss`|vF8AS7@U-w|3t*LTr{61bt$`%2Gwuvujs$T3yAqh>zTP&?hJoX?X?6%1ncgm-Jk# zPukKxq6<=9j;-tP=DzXB2LCR$#ge zZTWZO$4P9@YG$r@6U7x(Xb_ynIy#tFcC|I*-1$qGGP`5jmMj?>6d}2b>{n`9nng>~9h8?9qoJ`C zjm&Wxy9N;u9)U2n=gd*bwuh5x4sB(ZFlN>v`_LJL`MRlbNl(NsoSsX*%QvZ2*|PS7 z-{c>S2WoDIxzCNuWhkNS^0ww6JSNOhm!m)YDESD}fwJo%PRyl0+-NVqDw2M%Kg{Vd(22kN#y=ov%OPZD#Hn0L+LAtM zD#&t--)^q1@CuATW_~{MvQn7?X*XE4vKm*Mp2zOsvhlHq!Bp)VU&UPP8 zGK57~r#tcHhnG>>F^~pU}6E@K0X_kUf++L;?S;EiH3az)`@<+@w2ya@QJ6D z!VL6uV{B?3M!H{LS*^m$7MGS`SXqIWpFjKp{SoL-#d0y@t+y{DBsc*_5AJ}T&PvtH zOtbD*J#O5(gWvwsKPJ$2Q!>#)IfK#kD1*TVsA}v&U~D>KBfKy_(nu%RiioIe#6<;= zPbSpgZb0#ctB7Ur`Nlu}J1kLIIDBL`QSO@|G^qB3cVKQ(9X}gagFwh4&%P^{I8t~l@tKh1aM4t}YJdJ_6 ztC+U9!ar>b!aY_n-Cd1u|Lh97mOPM@6pM%eAB@-6qo%YRS^M_Gmwk0N593y4CBAj8 z68g|+q{c()`t_E~he1td8-@?3mEaGD$5a8{K+m&?~9h@Ra%2zsS zn;XFozVicAw9g_vCm*4{UYMGgW?$P892|n!1lDa?LQC;wym_t^My~_~DD|#Z^0OJU zb7o}H@V&fb;>{s>P5gI}@XCg{nF(CHbQa|`EeMFFY^ab+6HOdXD?&oE5Em1oCV+k! zl9n_VVt{#Z57g=Q=-%%i)#8w<-e_{T;e zDn1JSzS>GErSVMgWJPB$0YLxYD8|Od_?f`i=oIZ^20np7>vugRz#He##l&)Ocnm`m zb67IF!H-UReKiNL*M7DpJg-JtjO+O(qclaSrIjKQ83>SqatrWSDddNmdoY}mVV z8-l5KLgGG0Mn_Rpatoo6u?Pw9MzG5~iY{HopvfQkg*ouyKtH}|a+PeIoS9b>uD$_* z2w+`70R#sV<9PL_SMb!SW5|rM;RAxPAJM-?#l<6kOAg%W2P$q{#FE75`(MM-6r5ufFvG4jetHY&X!;g30zW)DaB$ zX6;8rpicR|PezickE6Mv8Q*`W9LFDj9;c4(fj0-Qg@7fPXM0e3s{wVLvl#5?!1c3l zqhS9o#HOVvKfmHuD}R=X<#0`>6MTQSU}s`OS1@^ZH_4*0^wVnttb8=5+L;Tax< z&|nu>7P?SW-Uv^>WF#j&YA4SgLaR$KP4wc!o1KVG+k<5GXR@z@@njk%7oFz+UF}$*y)-p9 z!|ve+&ovN1S*=WCXqYxeATKMhCI)LU(A9y`;xbGU><2|As>d8AXpgJzUTP(%$j6ll zrPSHdSg&r%j7-hK=;ERF`4jgdJdjwwvMei|o6*-lfZ_33EUnlX7yTTo`=yk|t?=kj z*liZ-@0-ZV*~Zu$<>;fcgywY~Ogz6Tv0@3GOnwH3#xOcMN8d4zpzvVkuG&P5Rs;WC zrSBSP!=;*Tcx4>G?u=;osdE}RArY3QaPDVsB6`bVq;APmb}TYcu~#KtbC?(%Kv&lQ z2FIr{zOW2WPp=ItB7_7`tzeS+JJQvM;gMMa@&$Dh%gw{V1;HPt1+=#`sa3VZtZQn) z41Xr(a*xhRkUcigMBLbpiqZ-UF4_n1izepNKK8W5Aq@g3rA!) zysIF89yOC9}1A8y}iLq&Zr_UtOa0&&X-?PAgF z%2=VTijYZt+FgPR>WBqN6c>`E-@gfGK3|ZF2MyxVEK%Ci4+0g#T7c23k7c*3% z3QRCrG%U@-Y@8t8YvB=hth#xjsi_^~v_;>bV7SsyrGIuN!xKa8=T3sj`Z`uS4s0L_T_9bGd`N8M$p5gcu3*)UfjG< z1lP!PHzLZ9aC*gsq&>f!PN-uv$N5So7k2hSWr z5c{L!ak-%;tAn~2y9WAtP*zcn1=bNCpRBOX=*$vj=LT=(rv-D<{w9VP$2%}QJdIKM zQ4e479hyzcM3-rC30<8n#IJ4`pB&{lHGjKF*B?UbvXT^e5i{dM=;-XlpiDf?EKt8( z5lG!!uZG`fw<`-vD7ktKy3lZJ*|!INWWZ)VvK$lidFF%P`_U`-)K`BS!FNp<%8fq@ zZBue8keQzgaDeyvi@^Q=p5I*8p2 z6C0ZB-thC4Cxc9w8Xm@QXAj!R`)kF;NF=tlySQSCv0~B@E8OJ9N0(G;ufiZ{vh5EPR!kpHz>PqA{H$RWD;Q<6Pzv^ml#o)*o zCW&oUy=7IMwm`<&e;0W=Z>R^u)B&qckjk4ioswuO<(wtgNjoMV+QO zINn`Ti7QPbh~0OZxX~Bhsut#4^e4ZcA~o7gO(IJjL4LmQUs=NU{_Wq8y5k`B>?y!xOFRDTfBY{jdiZ1a?p=tWo=$YP z;G-XY7Z;eD1mzvY-rQ)c&iBz54`O=B&KMJon7CL3urKb~tOnzx87qugfBYwZj)d)} z5TBfgVCEY$Gt(%qtU_dHC{h#Sm2N$#@9VnmAiE95p<(>p-+dE_yAI-|&%6kK3R;%@ z$mE@AehTfkZla;N0$s!m?_Fv@OzL42=0|LJn!|?GWnzW*-oe$|o$!lEL0Cv2bA$jy zM}#3EI)Y=cFc;~=EAPBVBk@8$^GM;BKR>eSI3_Y2J~RxQX$iN9$xBL@f7*@s7Wo*F zw+~x)5aAZya{BR*je&>qKR9 z50-Q?7ZVI$4!-1OIVMJ?QMi38JRJT=dM7+~(mirujA<#ohKZF`q!bjY?Qez$hcQ9y zr1J@Y4;6l8d69kYg^jpIbWznyf3wOsGeH}ko*aR%uN!0W2)ep^P<6Wz-NZZ*ap738 zn;BcDU?3l~SRsu{+_9j)wH>4VBkHN93Hr5w&>+=8t>278E3{SSa$Vj1=p$AbHJIT^ zALFxz=VUZ)@aOnr=+Mu-{VEJ@;mFvQ4Tg19&##VbSeh9@cWXUH8MofMT7saYZ1{Rp z-^WHU$Cv@e9l5#g!hR1@hx>*n;pQ8NaN12Y_0L;9WhS(W(djYF%#OiTwg&5LW3;Da z>HGRxS}{jGa%UdmPN$_-!V-&)4z#16F;%dk(aM~~N1GsDAEOns6Mf9QSnUqWI` zFgzKs7O33SCAV;c2~|6RcAuM0jAubAj1aiTlVMx!;K{k1qo12B4>;yXJDHKT;>~wU zv1rnxpdgh-x`IpRt|5*>T>65%|hiK93Xowjwtn9D^;DxK!PZpp3)F3ZaAh;1yhLoy4)1zk+8@9>DgzB*ZaO z{=2{bd-!J*AU8i96d=C)%1k8U%tdtmFfOoGY{laId;iQy?r#yEriUw!>m zY+>;J%okq5_QG7`XQv{NiGZt@KS7_DTIDr8I>s?yMs-6oZq_!afn;T*sxhg4u#7wk zT12~&iQn$7c78f=wWtCOExp*bb365zj)(zn#lT=vQH@KNuA!s1A1zEi26`tE8tM;! zI!UQdO*XGCVW_1V|MbJRFlo{e2qn<*uSNKEa`+KvklV-?i`hUOZD3%YLq={sljbqn zfiDj3+fLxCB}l>vGlRDW0pO`e9>JE}ROBZ4qqDUOZxr|7u_qo^E01L#7cDt{5YaEW z>ySu4kd>Q@9Q8>@c3L=^t7#`wF8ICQ{u2B#fLk?n7`HJnM8_gFHjeBwqpiLXQ${oV z2t0Of$woATVP{i4DvL{Sr=|lRoV$*k!a@YC-CbOv-&V_TeQVddj!7t%3^Y}?aZcYqk=ZjZgyNYew3Xql>#awZf;L3{7 z*aYl6as&nWambAI#yjV$uw)6q4hC_5CJJ$hNyt?7(2jF5kewclfv!F@bk5*6{=;t~ zCftXK$P~f+9l9DT;uGT0HfV&+HwDM{=cvckh$PhH@moK8PpuGs`SZVt6DN;iYhEOR zy*=>PfA<b_9k;Av|1Guv@S^H;t0gTey6!hycG2t&DHoLoy$p1Ng$%{Ybn zx;v^&b6dBXRGT6w3=EB+e~=_VY(Zo#ov|Y!J`p81O4W+jW2a6Myf`OhG=HPf2K%|| zkWFs*^>*RPwHs=cc29dhii^wP6B0^0k(Ftp^A1`yz%g}7EBuz5GFT4`dXS%+6apM5E4PTGEuIpxQ^=b zax~P|qkopNgBL-a$fgbc!QlvJIZrn;`r8|E`D!r}oG!fg!BuR@FF;h-hDYfbPgj=a z(A?NS{*<7up^5$OL1RZROg0@N=|lCNjJpf-c=cQVjB(=vea|E+Zrw&}dk@O*v?4hr zQLUoaZtBTLYTwAckD*Pn<3F_bfS^#u_B>=~Bq5pdxmnkagJ1m5c>2*j*pZWtsEAu+R|bh>5Pf_a?_zhpL7yH22IPC@>U5y{+i( z8Dc`AM_P)u_mFXE8hy<*C}TXSZ|Ycg6e<$hkt^%zXxtHxqa{5Ye=VWx?Ox0*GtRMH3JlG$%LoHfZ_f=T)R<% zirdxb>FGg3M?aQVy%8Pe4{r|_6kofleEaM~FN%sTV`ypxGjkS{UAcyfmo6h^>rOQ> z1A_q#HB~5M!dutaj1KBmN6#?j?19iAe}a1t7^bK2+7G^ip3wm%=R1rw?X=gM#dV0J zPQ^y~qqn;QfBR40Cq5%yQu|@bMBOslSni$oFT27Sh^=%PNT5dx_hd=;Nvo znpS|ky$2A)z#~3!jW+40lVtUlnF(t@sw+!yyQ&6lw1>L39xRfdvEiECOihfTucH+c zc`QAQKxS-R=0BzxgY;1^A+|r46<8-wh4j z)c0w`5xe>aIB#y=OPhyk!@}$c8p)fR#g*zwou;-n3=NMWIwlD|KI^BI}_QC?g4lbOGihBLC093R)Q_ep|+ZRXSt5f9@Nr@3=c0LDl!PZ z0p4h-uECv~rKl$kts_=;V;qu4XL~zFnTL#`U`Iav0^_%No_)HBTeqrFa=Quj9g`^7 z%G{LskBnW7-)>ivFqPDWimGz7v~}^*jUFcLdjA0Rk`!4D-PCX!uf6hpUh9jpvT`)H zG@*fI7gjtNbEDLY3A7mJLR>J03vaxFcZ+KgoVEi|R7~O3W*PC{wF7ItA=&8K&FEvC z?P}{senv7}XGT#oZ9&TMXOI)&MI0v=(%xt{KYQ&Qb`bJ;^B4bv z{>9fZF~@WK%#I9UptXr{*c$<1I#f3skeYi2JMx3oB#~O#yJ|sqV>Q10?SDtk?vr@t z#h0*i8+no~i!>tD6CHwyhR4TH)!2js2M*#h%$^oW7xJm7jk<=oBeQwm)-}dlvj3Mp5lEr^WEM%MQ9(JN5;NK5UzK@^q^pE z#=nRpBoO!CxOx%Co_i6|#G__*@-+(ZsGT z(OzPZyYmS46l}p3=6OBzeP`*%9((F>_z@4>9S`p(13Evnma(6+23R=8HsX-BrbblO zv_kJ2s`isMF3#fb|K+>%_ZDo)%W~+W6-)CAcV$7Y7@U|!fL@O<#|=2?h4aDwh)_4o40PfL@7==5$DTnb!J#b9y8oaz&?hM) zM1MBY+ktBzzE4ctigNbJ>>Yx|1p@{;S}@DMq0z))ex6LLW>HT4FCmU8s;oiJ$Ru)8 zQxNRfZb^K@^^yxHEx7`B>k7_ZxQfR935CH+K6(o^O~jhUzrXI4`M8fW@-lD$C|9U#@04ketaY@F-Lj>aO-DnzEKxSeL zeCIpRSW%2Jg5`mc2`r0^Yn726+@Ccv>(J3S&0z11J$rW{Jvk85!xd_lT3e<$$KTEP z?oZyulr9>X+qPrN)_fFXCc~GYL>`e76>&8Up{l9^ZU}(%(I*-Eh{1xMj4b!@xhI9^o~w` z+(;a>k*o4(o+p-Vo+xSS=!b_tcmw_s#k)_UilRTo%h47m92IYcDI zBWZgY+&riP1kYD5U&56umr&bMPh*|O&8t^%>FNz8q}Avf9-^^NqphVI23rjF>^g!7 zB7}uGZLbC&DpNTF=g`bNosS-QJGbG?bC1C@+#fn7r|Sz=Nd)!gi6Pv+evR#wG7;${ zDCuRexPeOqyce%h)}za?t?Jk}AMDw`6QSYywfUCt$SCy~h{0e|%DFf+iT0X$9NfQK ztsJ$RW(n*k39JIq)jfyKh5^dOhRU)=8d4gzXD7hVjUaoX8>6NP_)yPYXL7Yn|2Z{k zM155wo;-dO$)S4IHHopAek|&3=$)Qrf;EKh5i9C?44T7F383>eW2CbM0j{&KQBO+l zRD<{*=b0e7hXmpA(|f2(78n=Cu{hj`(3MH_H#eYf+2NPl-a*jWOi1?U6xsH&^Pz~q>~ z1D9M{})BoHF7HN zL+P$EtFD;n9K-vx)kmK_&A{)+Vo&QfxGCF|nK?I2JB!majH)IVK;wo$__@Vp8U-_AA}s{O`K%K4f!)am0x7 z8`ao-a6gl7(SVJlhi7ma{qO;t*uRy5*fACP(27M{35Hq5)59O0I$!N(k8Z%d z%T|v<3B>dBi-4z>H;kr5*lc&n+wsrR@HF0i^Boi%-G``@a8?WNvAC2%4Jv(+!v7*&!mPvP>5EEnV|+W_m3fQ&r^t$RiD(_E++FHo_bi8rf}!ht$TFw!L&*_ zSZVn6^)VL|2EDtNvN9%L(W`z`X5vvtJ@>XCSI|c( zKeb+tUXPyzwOZwy|8frfD6W)I1Tj-ixg4H_+QOR;cs{n~i zXd=Qwkw!Ubve}BL@F4Z3SK*_7sH{59L{4fnN-tl<{P@h~v00We+2kI=cOM`k|z#d;g7N^tSR^Xlmyk@WpI z_p$WQ%fzShH2+a{zD6rpWsEB=s>Zt?)xljKfh}9|us1sfw*DG?`@26x$AlSHcRdnv zwjgIKu~~c!JiNlyjnb_WSGHy%Jt1)I$&$szWwlpnQd&08I9ptX(4-91Tr0w*pZtXN*kGa_y!YNs+Wso6qn)_;(MPE0 zHX<=^2m753A7aH@@4wH)stqP4xP$HOxcUBj7#SUd&C4IJzjIw}>r#*sN&EQ_Wd!kN z@@}N16+iyrPcdK$Ldv#%*t#VPZdMa6z4J5Nxzh+ERnJ7MR9SWbKX~UNZuOZFn@;ST zpF=yH!mHo;HcU1f-2DR3-PD9@=gwh81TRs*o)dKBO8rwRKrS<>VtbicpaQwdh8 zx&=2c7h$MpSlFVD!7@+(*N)QLZ7?glS;fG>2;RR?iW%9aVCz=w*ph)Tk7XPG8STzX8XPq_=(+yKo2H4&G80QL=Pb&;Dq2l7(c;oUNEbvn2)f;P4-|u0fgG4VJ z*k=<2vM<9v*Ny8puH(m5eeg=kS2@*I;_1oycX0OH`{K!M%X9dU`g!hZ9YSN$ znV)UN?)-Fk*$r4CXcv`{txgt=%a~`Lm$Pk`viIb0AGBV38`tRvr&j12V-k^CSctUr zZ1{WmAtf!7dPAKkNTV#1;3;uXP^&Hgu}SI7Qxm8o78I4$5!*WalIlG5YGEGbw86_o zHLyo*L)Nz4>Sl3QV=dnN;cMy%3DNku;a0ry;~(R#>vaf8+e+WeezUJ1y-E8WWDHQG z)haK^skwC>XU|?j-w6G_I=WFtj&B<~vPN!{ffs_KBC-GE0p#x4iV$y(MxMxV3QCdu zu)~8ma-zQjx0!#Imfu40tt!kc7}X>8cas(YH&){8z{bKNg+$W)^T;{+9C1=2 zym)xo?1lj9{L$=0baH${J>3|bo=4*TqX>&nXIg2J`v~v|h(%gvI`T7P$j5q&^$gMM zltWk}XN!VZRS)E`9DcmKz2L3aZ|ddUw6V7uA55@V40z+(P2>=hX2gZ7NjWV%$n7w9 z_9+aRdyiw!k)ucn_J+>RI3%Z4<+D$$uc@U5?@5}OeRc>R=a_#*|)=28nH9n7OV)20JXyS@& ztW1sK;u}9vtBm3859`7V&c5*~dBd14m#r>hY;*{3o-fCuo$ne;Co$a9j=q*&O=liF=ykepnY-a5Q7zPHfFcK_K-zCOLz+MzG9QWThq}RIjZ!wKFe@OH5>J%VNH{fcB=_Fc{|6 zY7~qy%AEV`&tAje$SBzxfmdF64|R8HnS8#ao~w(aQyTmIR3~<96PWVddJ-#$wy6NeJCw127=>p^697X2+JKiaTJ+} zv1?1!WlXZ7N-$M)d75DSLg#HaOBu=Jo3nCJbCH}qP?vsxpoPY1YGNFGD~EjXsm8Red`F0o;*pT^HFy| zmO7?a5fS(Y4*$=+U6->OD^$FLfQ zJoYm)zW}?=ORbE9UN~og?3cD_!jR+};E!!Pwqwtpy+}$*LS$qF+u4r2`}Se??wtq? z3+B*dyrLBRBX@GdITOHERJFlq_C!K#9NZZ!j0`@;scEFgM!>)%psukAWw-CZ)5jm7 z)XV9ydCW|in7qvqj0GV`XCuICWZ!S2v%eE39zBVgnpO~RU~=wx*ie`j7ExVs z2Y2f0;1#4paCkTd2WBxeVSv$9k8t+U!)iorO)W}_N~td^D9DLJYfmRbCs5tpkJz+i z1bD4rY^VptMdj%0?#7PP2)OgQk?BPY%o#A}szaDO%59oPeN8p%x`nt9f24+Zv(KaJ zljIK~u{HAoCMTxJ?LdSDM`54xc!I1Bdq_KH8u9#sI@A z*4khhJp=QuJ^N@^CvoD~A$WVaAUQ1wFMR3+1o(L|kg?~>v$%5pBP`h5apt+_aEdng zUBdzR4jH;hx4+t)Qd_s(*4GKH@Q=f;}g>n70bkv;6m;D zw|3n+hcxga9|>)%SXeZprnLhRvZu=Z_u%~)5(yDdo0sv&b zIV*^Wjzkm#c4u8R+Sry`Kn$X3N2}Dl6()tN3^?)0>GVO5;P8R{@bhE}!oG`4Hp`aw zL!f%QqMWiTA-FC=1#KZNE)khYp$=cYe$D1sG?fFVeHkEhrL{LO3 z5)u+2FHM$J22w^?dapF>7?%BTZ(c*m^%7jZR)UnQY$T`4suYr3q$X%rDQS+%>Vo0U zF-(k1AtWpmo{qh2G!lNwOqmr|FP36-(uilDdx|=to%JF>iItZ~1V_M=@xWdAEB>cC z&wu33@4_^ON2d`UodIuGH?-8;LFKJlbPY`4%*kVDrv5EkDI3bCx2YASrMFRV@D$Rr za^dMhpeVYdNbb({VAaC|-jRtg^KjMeS`^ZxPMi1EX_@14i=5^uB#+t27QA*ogptJ@N|= z;K+$bR7^`rPR7AwM{(+rqd5J@BS?;qh0fIm`{ERS`lIh5it+8yr=R5cGkBcwa!*b= zuAco6V>8R-mt7{B<4xhs5XP#o& zV>rcxE`z?!lYv+K#w!1&WoP5?ah_vckDWP*lY8>fTwjH@fpO@Fsq(h%#Zk&8J2f6j z1cwKXuFFOq!;($y#V1F_r!uY|#fhW)*cT=sasyz);LQekgK`Oadi!zU)Z)Eb7$!Q@4NQwyERLOi9B0_?(~lg+i;o>cRe1$!8@lKQ+_3%7VH`Sjj9@nhKK^0k z+aAW1<2cP@1^H=?$CTt|bpTRnp9h%?KJv&ZJoUs04B$?Sy&wI2!_@X& zwA;+kF(B+EPV@HkLRV`m_4Ob!Gc(lVrVBGG7-BMctE>#!h5K;&@yC>Go_gdstW(3N zy;F@jGa)_PT5!W^uZk{B~BhA zo@Tm;mFaPGcXl8+E){9HS@*2!+4x-};h46Uu`6}bb=Bzv*ns1PvJGC!Q%gzN%wHZ0 zl~rat_8+3IpTt4pQ*EzUxy*>>s#25_%N{->c18Q5jGue#AQtAvajCRg$!~G66Xj*M zpo>b!$)}#89-PLp<42JyZz`7X!7>k{^_!`bA>C9|bPRUxI*e1N9>bYOS=WI>SYYlT zPbf*+96=nrZF?cM?%k^%IggKzXVShE`}XX>ZenRqZ(n%(`s2{?&a%vJ1lM^YQ zM{wrU37kB%mkDr$ij5;(ov6K4frO-V>iZMq0qykpC*kewjq?{TGj<5K$WK|>&w)fT zh8;S7O634gJa!s8w`F75Ft2XTYIILps%V`DE(iz;L;kiM^h0M@&v86+az7%8)z4pH z5=w^3^=oyg3$?57SFe-qz9Fs1wu1ToHe!w&@JYzVBa%~bd`~@j0=u)q851kf*fpb8 z{ZIC{;4I^R;+BJW?CEE4hPrTuw!DpcCod+G+Qm=%`TL=uaGTN<$Z7I{E* zeWW%F_xGZtq!fW+u{e7A8OrZ*JWigU+FOWQmp`C?9Z)gM&F;pW#Db)p0{SG{)}tq= z4+WT#7qtzKI_lKOOYf`p@IA19AEH8nB=d2QLL-jv_b2f%F0P)4psl86rxLu{iDM-9 z(k6h`i8&eLZBBj;%r+y6Zd}K;E0^)k8$ZF>k3PiU#N1u6*GYHl&fWc?E?Uj%{RQ=m zSx?E#MHDTY$ zC)J8Unz$ogN=aAvheRMMEgLR+y#pJntj56$vA-3nvH@k~b@1>ErB82B^A*>xoW+}O zy^eENZen&xrtIC|&wM|bxx~!)1g>1Yic8F8-hJye+$^uegxoA*?DEw6BcE|guQt!N zI^=h+G(-o@OS5RIzk?gJp^wgA#W3SX-nN|x@{`REfS~Y1Bob@uPS*-JZgj$ivr zUC;#}BYP`C0_Dk&b;)lkC-@9<-i7iLe`AJmZVCG#6-=vuA{ZB3vR^Y&zw1ip2iBacMocDcx6F7{yJ}Urwu=*_>_@B zzwyWk^>}w|R2=s2-LHJeqdc}XI}Li~kg}Mj^jZ-{`bO}`<4+M6K7x~H4&%8O9!GO~ z7b>b-RDVn+Bl>%~uw*bIhkA5^e&OViLr70aR*zc?t&K~8LNEH0T;?*7Xl<%kJ15+` zpR^uE>*Y6I>5A~EcpQ4+wU|nJc-XV|Wsa#ue0+7h!_>x%c>^$KOj7s8k49P$w$uL1@N!$lzPtjYMMxd)2oDzL z?2So|eQMFBIH6r@Wa2xB*Dn>rBXJk@Y~O`&N>|j&!zUPrjvT|`!$*)$L-3-%0g*R9nXGi2_GKpf6$D)6STF^t@$Svf zfO%m-<`b~ILer9Q&W|v#-9%Zj2(xcAm4M~;bwu^V&X?8bX^rVuf2T@ zmrHBl6|)_CPn;E5;p!Pk2ccgio~AO6?Z@XlNBp`mj?-QnNp zwmzO%n*HzT=)(C6SFn4>4kp8q8gP;KWFNvG{`T+Uv#0kVM0M=NOKwM@547T~Z?t+V@nK0JT zkMlo12REC%q+lCC%*F{r#+%h8%uEjBpWnQSU5~tsota6Hx!iS7PTI(@jYR)rw^(4Y zT4{@R)Ze;|qO+G^TBZLu^)z-cXs*srz}0F{ZwfzG+)khwgOmGn;p-`PkDU=w{uMhl zFtDx=q)gGr)Zx-UqwW^%zafkwrh z1_b%K;!fEOWrK3J+-jO2&=|xxZPZ3^=@FcOq{L{9wq3(VZ~qL}u3b@ktk}I6W8Lny zMYcq~`iBJ}$lD9Uqm%dOucCYO#z}T~xLe@kV!)lsQe3;veic<&vguP)JswC-jze~o9{q!T7&S2dxwx|}JC5$z zg-j-T(p7mWK@@exXr6|RetNTSceHa?zkXR-@4W%DqxBe?8bMTEAwr`Z3AMWpxqJ2^ zgnBbrQ$zb#g*VW~@yqx1kL-W8I5~mx;+y3Ab=+X$wrH_qX3-#9IBvRfo$fw*|5BTY zOj#8?}bXxW&gIDIo>{E*2~=8?mKuJ9*(we{RK+aRJO7QPS0gnDkWSr#SWo zXHuOOABioI?wB0z!?<8WCWDfY=4Yg%AS)5x>KRZsY~Q;Rzx|(n2YPoe=v`fK=13m> z(ip1Sm^7?6P{{dP{^3WkVsx-KvSM{;tgFBqA6&#) z+H>#pGIXBA9m)nxxOM3qJiM$Jp6)$>b~qfCU3{?x?P6i%U9@U%F*9D0rR5VB$THc+z}d^f-U=x zVaK5Zh!6HdQgRx0AK8P$M|a}rp`8d|a;9uatMvgSD8_)=)&}&?&8n4Wt)%r}XNRQFB9|#H!!I@_sV{&<%NX_{m-R5a0=NL8OFWP&E zzdw4Ln^1J+Dz0BIMrn<#`t+imEU9kE^=ox(s9@tw3GIaAz=qs!lPj{VVq$QVpJD9E z&qs)_R?bE5gPbi}v23JV`^VwRe6XXb6h=>XE#cz^Bf zyz)fKk)7G_HjSgZrw_~2LyIjMdhcKyKeiX%p2PvvA0NFQep2bO++15HT~HKGzWkf` zgWvrv>`3%dHv4drtF;=OecU*tjdg1zXIZP8Kc4;K7xDaWd;v#KpHvfc2eX3k;cNej z_ls_0fu%OOQB7;Kn(wVsh)1b}#)*0S?8k3m_rZP0ElgEqZ7xx-XGzK;Yi5H zrl0LVfNuzrf<2+L8JSj^VB2VpY|7crW{p+NXcLCjw}=Z%alN<%o!tWn@`<7@1gh(_ zkF$mckWBnhj`DWo!3h&{|1IktVMCLfTi0m)n6zOf99Uv(n#1_00bz-G*njeI96PoT zDG6bC>zy}I*VM+6?)3FZc=0n|#!H|60*)U)sqmrqTpiwc-+>nKW|-{ zKvPX6ZV>O>uA`mmg5Vtx0Ej?$zfOE9nD{=#s_%jW_mCPJ|G9rLIcmr9+z4)5{29vI zy0GQZ=iw6|FRT$H^D(3STPSH`UtB$)=ioMvjTaEZ>3jo`oSlHK z!IriDI9u09v{1Bab8qjabsb;7auYq^^84P1anR;}DIQh^}<*w2AIKKarcW|Yu8^<4e7W=3m z>Jb6fA;2^uA`-b<3YqBa!>)b%@aUr_5gQ#r08tAw9j!*v;DZ`vgs{w@xuY3nZJp>E z7^IS4prfh6Je|b#qH=T(&^21!Fh4mBAAdhJljjt{DiaO$;dhzGw3DIB1l!&Le(NgW zA`^DvTGzduf+F&WrH8gxq3WHiMkA2envu*@%AtG$Ks*%fHLYh( zN2mcAey2m0{RPAPoC!L?0YS8E7Ah||urold_0DENYh^j=yZW&8#1n{#(UxGVJ~Ba2 z-)MNQ{J5;j&u+Y-B@7|C6ZQIobzJ8uAOjw;ONQI<- z@>l**Uo?`j%k5hEONXmWUG65S|1|!n-e^N533@l7v$+9(`?bHuqTLg}`$zv3k->WQ z)dY8T(70?sX;~RsDC?B;JnTKN7l~2w2MeCXOpN@(n_H8IlwgH)LWvn!c;tyQ`0s!IU-0q^XOKiEfAOQY@UP!} z9dk6(wM*`zdwm@bFljPQkK)aD&Ow&IA3S&nKB6X$(;O7Whet5LWc*r5HHygB;^IM)Ozfy8?irkY4ZKDb2-@Al={P%ZIxc?Mh zeC7n?CfDYsh0_+reF=o}>t5nusn74d{T6KA0XX&SQ(FJo2J=JM%(OFtqxhIe6cVr| zCqy7MHJUbKhk3!sOP4pv(@C2_{W{5=rV}Sl;E5+5!_#bQ$F2f2);HmHc^9l3u!nQB z4Ewso8Qz6UVQ(33@lmj=%-$WFV)wB?QR;=0|)3v z*XIQf+G=gpEhh0-k({2X_EpWv&1Th$2=j4+Zh0D2x605oG>ykzc$rDB?BPi;?8-Pn zZn&tKP9``EswSfmRb@9(eY*^? zH(1`3r(;B=5G+ilWUry;Pwiup{Wc6sljt5BL+!`{!R=!RptIe`E|HtGzfmemPK*aL z?B9~nf{u<>+$g?i7sr{cbMQa~GI+e&eZ)v{u~lSMy^wOpT7HiOCP%{{UBuZld^R2`Z|q z;cmC6M}ao;(^()U`8i)!y2*-%>N z5lp`5RUFl9!eJSL9T=Zx#|H7^pT3T_-~W*OyovG~MQE<6A=tIDZj}zNk(*>pY;0rC zH=h4ETCv)RjNB0#9>$n0S}DXqxHEn;)));2b#qNtBg@;cLu4N@me9Dtf9Zf$)n{W$ zCgYG8txeyr;f+s9pJ)I~%$DVa4@{)PA{1OvTwp)lm~{AhdSh{FT6+*d^;YY#I6Z+% z>%LFEu&>rdbhUTj%{R~EgAdN*Mo|%N-Mogb?lvY|+9Ou?BN4xlR{MiW-Aybl_=cSh z@b{scq@QNIcGLUvl(rH`)_z;8Hq6c&3EXEfAc|ex+rtZpP z?P^?PSU_+fw(r^j9WkHqFDEw{1zU4abhU)`Z%1d}1l8CL`}XFF_WW|h_EozM5?RG3 zCgGW9o<(v}5}NDl@cNHm#Si}dUvQ_P2@4W9?KX@I_F&OsL{UX8uH7ufP5Ppe+x74b z2tjsIJUneS*cl6D1-2LQ{kmR?e*c`BO1_$Br*Y-%S-kS<>y%5W`qWg_V2KHpOytXr zU#oChR^-qJ>nK;TFssI5HAb;F`Ka%Vmu@z9xZ1_W9SQFm*)cLVjbXz)T3Q=XLH)cz zyi;_eoGN3*maJ50FXmxg{NqZd`FO}a&syE?{4Q@+O-V^o_N&>soYAgvwxwCI@IipZ z3T_ow;yeHRJ>vTtD5f8kefI_jM^qnW?{_Dy*R=lk=OXfFMe;bxD2%$iRtE>8_mSQ9 zTwx%dTD6%mJ2ebX{{XlXqf2kqSOh@xqxdPl$25oL+BWQ%1&~2 zm6+7c557Jgs$Jo?NLrqZauZx4m#KeAZk~urPDf5wI{cZKX)=2_ZS=zT$l<;R#Bq8u z&&WjqbEMtOt@rHRiKm}9ruJcb|3VS-D`%gie}X4?i98$5zk<6qr~yezGM*E=mt3nr z@9+{{eBrsZx6<3`-%P6pVsYknKSzwaT%{XLB9*F2vhR&a>iitc*!|zd(3VR^l&RSf{7(n?6XbJY|#1%d424cfrue z7%|69+_=ejL;TUy+Jp?o8`&dT7MH=r7yFJqhA)2ld6=gAP<-_q{^ei)2|s@Oef8qF zPvW;oGDycj9kad85U9SZ!I5or5VMmLc;$_caPCGm_8)svJuVgMD{t8$J9NJA4-G<4 zgdRS0V3Kf)aNKx?}42`vGCDHkt&m&vWHerGr|+1 zm|*y+0IC60A;m%d`!I8P0IBYY&KS_$d;->&j}npycf}d9DO`v~p5>bVxvFCuinbrjhvvU|*vLP-m7J82JK}M)dL1g1i*+hg<=Z6@0 zOelElairyBAb_&z?r%k4OejK`1jq!jCL1e;dK%~$-$eJOM*trnf1yq4;wnP%o{hIFbQm3z559*Zj${cQA{RnN{Q#kPGuOT8L zTqW76sQlyYiIC_hq%kNz^U`y8@#UBB{L8nI6`V+qP^-oi$+G#`VH7h+ zwDgcC)`yIXBxmne!r4F3VCjM+(VIzrn_*uz;N}Nc@vVRQ5p7}ze*G(7LRy5svIWOb zNvp8Ri&Pq`aQS)>I>(&*F>NH)M~5@gK8*K%b^)LN@@EklEnFZmVyv#%VVF0l-gx`! zu>bT^c;d+?5v}KMf)f{$U9FH^^JR{|;=|O(@!Q#}RVJ}IpD<+Q6{4VE8wz*iBARob``)^31H?%dvWcpAEJmhoc!2xc;wkLAa2?)0mioY|HAMTUj5#War&t< zNXd#)J=h#@wLVGfLJHg~e(@g{4|p*NO^OafL&Y^@?<6RE>|deWRFb;sfPKF z)K9%N+pOA44V`_jwo*9&;=sYl`ONz7n(N>#?< z;FCl4_xM!~SsS&KQ=C=5)=8YP4q@R@SemAT6Xq%+pkwl}v}lBfPY`^9!r|gY-|p+B zZh{yWH$N7wK%rm&+YD}9{|MiG>oRtq`4s)xr||;$`qYU-2=bCG&Np1eK6_F&HmjBL zamrich1Sc>eO13U>Nu zM8i~n2d-Saj62Q!>M_=zi|(m#oz_q3;H8%r95BwB7Stnb8p-~EMl3Obk4a9IoL%cT z{hFt@p5@G1GO8+a$de%6vM_-6&R<4Z*AjLgdjc=N{32d>=}8>fmk+;9tCKfMO{>~T zQkV950Nr&$`zie6H7bs1A9bAnxVyO_$UhL#G4VL@=+o@aOL&>O{QPG>kFWlh|Bffl zJc3A{RkhMwlfs71IO>u^GT9<~=!dbc#o2k9l}uDJW-1V}#E%@)i~%dXEF ziQjegi9l-ME}%~!IQ2LpF%}^)33%#-XYk@nFVf~e zhvz@T^1t(Y_`(whp>tV=8xsgO+L6($t>%)?!omz@7N%gm3%5JZ3vJ}AA|{@r4b^z< zwb!A~+KER#{TaOU^2>Pg(UVAtl|<_99AGm`@2QU@FV4}sdSiKUnSI$98RZEkFZhMi z7L%f}|M*e7z_z5lmpRVg|LSkxKYitk@bi!;M_%%xhl=kWJJ$N6edJ%Zyt)h{dGeQk z^-b9HF~nD&#*3eN0nb1GD7J5t7Y}S~K>NOq-L$%dv?KSnw~B?Nm7%1n0AYj^SkqiB8w4S^|w)ey9y&q^zn|v4ZU0%uyZn)BgI3WnW%pn%$7S;V#jo5eQUL$=2xOVO)TARDn-l!XSrWH0|VX7bB zedjW=4?TzNx!EA>P&iZuKq~N%*GeK@GO$kV%aKt!0W|FYi+LgUHdK{ zt(WRwb=pO}955v_A6vHN5wq^X!QI=D8s>&Ow~H~pWN}RHNbTz*bke#l@GtGp!_yOb zH{#{x#SNvMZ41d?=^VMIu66#FZ=wr2=$IFW#)QBx$OC%yVjP}#(ZR9E5PNoK;=ndP%*^zobx^jZ z5@Mf~E^ED(sy2>l!x{B$oz~lQj#PBPYMepkjf?oV*RQ}DwFjU7^k=a(#vd|&cXti< z(Auz^?P)rwk*nG!Xu;%(;NT#2;~P3Z96j|k7RHCr)!KuK&S{uDg4DjM>p7M1-yI%$ z57<1+ShgBA)UKU#_4I)+^T?RYR6PB{^U8<5M2z+MPkjo1@w@*4PaQp|<-_j22u<3? znEN^W@qhd>K6|4lh(g$QhG2s>(e;1-dX1t>MYyz^7JD%j8!ji zeDXQLLobn6FX1y^_%#08pZ{m<$j^Yb5Zn`iVaa%cx#xfT&wqfIpF4xBlxUp0cn+nN zwTkl}PXd1FxTSF$0k!Pe;zqTVp$MZiw@K$QJ%ZO?{UP3b{{l{&c@|&%+-DFTtlj-n z55T$lAT>1+Ub+Qzbv9zzBzNiDF+4Jfw)S46retzpp8%q;!rA^|j17(9(Pv-6Z+-Q* z@Wn5F37`G)uj8wK_y4wM1rgbPOM!y9_qX;cz@;l4nRb z!lNS4*IY$>yuu)7Kut{@n%O6-HnqJ*BJcYJ>CI$;nNIfg*M5Zm`A^?NW9Q&T4Y-S( zb=|K{+2cSsr|xa4O=4TR`cd0IjkMgY2+_My-$XVB)Kxd2p=%oZ_a8)3jBKpDPEN=2 zKoSS#6@a{)bZkipWFq7a->7IhZ+G-{cc8qc8rgexGm#2qdp5Yr^SvTeUS}oXl$9RR zo+jr9(cR;dew)zMQit#U^EcHS$bRb&{uoD2?x&-7n2D<=`;mZsM~}hN#g2sVAOz`k zn4dAC=vF8E83?y;Nkm|v7k2O8!{2_0q5}!$Z@JT7-_VWXnl`-j!sBW+TUb;u4jws# zRhtcQF`*0qpc7ue_1b>5=j4tpF-V9GLT*+D{CF}dG6F$9)OJ~wc=Z-6UXj?hW0wk; z$9ESX)KA{_ti4{}-D*a6Zx^n&4k9&w2NJ^rm|%w>KP46cdOEDoX#RD_ieVlfUA+mn zs8nQUXDK69PVGLV)J$aK5un~YT$mBY_FUl?70bNV9Rq!xD8EyU6b6;Z=xFghr{{uKFXYkydVfVH=y4HvIn$AKfqk(ZYN**d~;ma<|J#{j%z{{c)4^&`yB z135Wa$j;42dTs&IQ&JE?IVhc$?$}M}ZMcQM`uf-L%DY!EyQodbf2^KwB!V3m`-)t| zDs}uwkkj_JlWJ7InlIVKN~NY9GOu3lVgG5k<`H`6-52r zyx?wh!-`SvhzVy|pChxDlu`Dv5NEFSThj#jrX_HW9~fKa3|o`x0W~LLD!FaQYdMpBZOAxT01TvXRu^ruz+L^;^waL*1rOg4^|pS0k7Y#0MZq&b9{sYTy81RMo11a!ndg{bHmWkJTBa%+i;dmL0E?SZi21?t;m&Nlf!x zd~C#;?c9&Gg7*o`z>E1yl8zHGvKFVe!?ouzr+8u*dzDkM;Op&)4rY6T@Y*^l=*}?k7gIUo` zP~JLYM`lhgV*Pb$BK&@&`a?gov0tlZ0~$*&6RPYeCX*1g*ufs+5YdLt|%l6{*(PkWafROK~}(nl^4Uv_uH zU9U&}wrz|f+0c1=Q9ozMcP%QZs?-Vra!)Q^rLW>g&Ivz-LyGUD^?q)kjZ9MCHJ6CkhW=#Qm%QxFn?6>pOwuk{qqUkHF;-Ui?9NL^ zXJak8I(oD|+stU~>O=dKMeW@f6`&(7lU1zR_0Brhl1#^6bwkoK5Zkx!pwE&wRsImQ zgA3&MamE};%RUK-pO=!Z2n*6ZDCb8;r1is&>G2s%Pg;n<5>b$y zfoC5*P0XH%smW;=EiC1($F{u(VOd;&i(wjDvePNUJY*3Ih@VN2=cQ=0KGcDLAb$+C zRl`cT&KVZf(;OYWL$KY2+nsdZqx2Kgqk|Y7A4SU6LZp+o(eaTmnwjV>F0C`qNq7Ig z4|(a_5bp1d!R{`MjZZ6G5^WPSzsjU6EiDCs6y!2<4cSvTo4SxAZ=>q`Kty__rogCCKS>7=d@+PLOdvARC17iwRol3yTn{~WTqq{z|DfDnmd@CmB}V~BR*sJ z&D+pNrXnkoc`D0m_?LAmpstbAd#cIL4SidlnZQS{{yYBiYk!N{Zu%j|@duOC@F;-DOJZbk*ijj@~N`oRBmQmPHK5oP--Kc8n!xN{EtG8WiwOV1LZQZGvl(nsE*ZXZw-*YqB~0Cv^J$vTF3ZDZ68Y&4Aa&OVHePBTxD{k&PTQ#Ok4AYYbov$`80p<&2RkA}Azwo^9p z-JSSY@SU0&Q9Mw5kbjlk%LQ`A1;!OK#u>ZhZNC0Kj>q=dLl^ZF1FsMa$1zKfg){4Q ziEhhBEL6keQnSPu>4K*D#88F_l;6C8fBV5}@D7f`mwxlBD9A}yxv7*}xBHJHmXiIE zIZoLRQu=A44WGMF0q^Kk#Ky-lkLN)bPsFEX;j#T&m@j<~?_I0G{xdHkg#OT(xoBhe zLS$F~Q;#w9^|fM|m`Wrjao$cXbYYy>yMG5JX9l4oe#*|yMXtQno0v2&BOQ?;!Kx;C z>B&gX2topav6Zpp=z+aBvU4Xa<`qoQ=WHIE{4&uOO`@r}6N#yLif8M&rTn#GtiK!o z{JnL6*o{-d5g}c9!0mB;N=@YXF7q2$w|basPGA#1~4BRY|Wo8>5}>%!^J{RRrN;}I1c%w#SQVc{X@@9sxy zO*MAyAqY*1!;Run80Ho*IMj#kj&_)4=h4}zkr&m+YD+8=!`uM= z{YUR%W;GbkJ^w6{38*9$au;S!+SWY?j!3{jPaB$BTG2(& z7Z;ldPa2fo+Yh}wHa5_U;gKPfmeioGbrO3HAH@M#HK4O2Ffb6^1H+VkCuYWm(AL_9 zpPem6Y-S;T?K3YSfWmTT0Mq;Uqpq%&Ny#Xaz#fz_(XSs^!hxfwuz!0t0=+G?VK?>2 z^2F#ECWm@ZS6hv9#WgtcW7aJY&wuJg zBt-_n*TW9IhYj6*V;CMB!^~h8?o^cG`kgi$efl%lwUd4KV9+2(G$(Mk9teqS+~LQF zvuYeieOni0vyA?pb`)K^%5bO0si&VsN_;eQ1b`}d*;mkf>l$8r;|e;)T=Cg2eo8&I ztht=~(|YTgY$p0U(c03a_KIn0YC&^T8ycz_(A+(a6VJSa!~1jLWm}>>wc_fvo8&_+ zDsJD#{Ol6JyB$-r3-AsMQTzHV&J5tYKl%ynwH?thahRDHKznmDnkZ9v=n)zc%3$aU zf4@LfS5;%UuOCxm!*tlK^dFrBjs^sUheO9Q@^}vH^Xe^u-~FH%t|3V{wr>Z5<*|?r zz-=SR(RuS10xe6nN$|PFW1e{-(CLT6ND{@oOP*I6H zyA6`XRFq5ht%CMn<{<{fweJ~CXJf_EK ze&Qb9+>fXe0hmTfIDnc?3;(5KPF~oF|)Xg zhzKUdtmpcLk5FA(uXL)lz71uJDN_u{k3U1dnivB&Gn1RkZ{ctL=?56M1tT>x7iRj3 zPV&9Ar4yche(?44h0eu*$&ms4$N%;X96bFTUU=?NwPIJGmUB!i%Zt>tDwN!~j)uBM z6jd}~dP!FCSjewY>Rf=DJdI$Y(bry&j)5^ID>GTu;+ zDW3WCuVD}Um8=k2=bN*qcS&K@1iN9DdfAAX83UG=#?aQ*j1qaPqumFe`{L*6|AUl{ z$O;%2cN2_DR($;*--U;F2)_E&&mn?FzcCKLm33N|aHFINQ^SjxogP7RYZKhO^zf&? zdwSd9=cB{*8|9c|qBPjsg~sMKGy0AuDx&vxy#LV!RJZry*MIwW$RlqRch-6EV3Jk8wy8PXxmgU8yBm5bvyP57 zl-{aAWoDo}hgN z6Qk%5#5gfO){4r;7W7RmGhTM1Rr#wG#3#hULubYE{5*Y36?{E?Fx1_P_FBf9JLKKO z0=Aue23g6W%8pz_Hns(hs}&=Cvsj!PSF(|$+Ako8ad!pf*Ds=~x*B!$t+;Tp2%%vy z7@wHH(AWfG65`-ZVf%SnFgQrx+1ici$x*a4*W!9*6Lg`ec;xgkBt|e+FHYj!x86YF zj-yD+&w-w*CvQhAy>$bF!z08NpMpDMj)FHU^jVFic;$mDnDPk6-W>%BU;IME|w{grKM+=(GP>DzniOEE_M>KzzDYeNf)Zr(!fo&z{~`l#A!K;8x^ zrXnD@p`nhpT}xNKgvM6Jed>Q)WDH!%tN!6pbQ8armZmX8a4&whvZ@Y)20xs7?0KXH zx?pCcn|!>2#-?WVgvvnwFnoy*dimFrcI@xxy#B`PjOmXeA}#?x{MM^T zkB`A4r*^Q99k^EAkI#MPS;R*$M$FGC%rHG6FBWM>T}wN<21eoQ>%(~Cg|ge#n3|fv zBl`+5H$972Ke&ZYz4TcmF!%YW`yo=D3ZP>DFF9K+EI!BCLigoY6_f?GcRN}ioc^#qI$METw zpFx1a3?Dy!jYKH)s&iAgdGiWvbaKt~q4mTM-J@p4oCLh^%zpT}TbOM1p_y2zvAqrT z!~x>d`$lGvRUmJB&PAYyJ3@j&Fxc6Tn&MlGxdztNOkHj!mKs1nXe2S3w`_5V(dilW z6v-0#+D?BpJ3NHpo*#NNY43rIagumX40PbfZ@!DiKm8w& z5X)SJbwAK=O7qS^ZXB#*VxhIY6CItc#OkYPqdj!A)}gqn117gveEP*>#40BAc6FlU z+6^?;)T$>qW|xgH+3Xl2wg_Mh^z+uK{k?Bqy^h*y;_y2)xOAfk(TRz)xgq*KGvcD@ zi@ZG5lOM7z#|&}wAaknQ6^&@=oW+gmoeUt1?S+q;pJmZDyWWTaoca{eqD>l;u;JX_z=4gUZ?42|?FKNKQ)qz`eXVH{Pp zEf^k~qyOkYZA}%*m~S39{S5Z*-Uc5gseSbIXD^(`7k~G+;6;7cF+z+Ej-%vyHFoaX zi9~sco5+E@H%<-U+&e$U&6WvlKlUQB6Z{=d*nJX`7%DGU97Nfb>u9d6L519~?4Q7@ zw?F1b2k56<5EAIioXd!kq8rR*tI5Yk`nGmlE~-UPOget!w|)&F$mAsKJbs9HUFXnc5_NrPtQX(>tN(@YynQ(H^0NqHXm{eP6{{JgW!LG`7Vzj} zPpMV4n!k`qSre+6=NM+qIJP|fzjv02)~Fuwo8 zAHzL37GvWh^zAjQqX`4!792To0)<=C?z0Wh{fLDqOAqE2-5nj|Lp@GB_K4cvL2@AH zgrmgR+NvV;lBheh)8gu0*xh`wxIBd^#_jNsFy?RWh$1$;Q&EDB!AT5_)7P@TGUAW0 zm@M_8A|E=2snKDSUcQ2c`g$}FrwRsc8<@u4BPX$cZvlcmWwrGp%8N@;!#t^_z7}V% z+<=Q;D9mOHdb$SG6K8@^rp88a{SxC6aYs{23+fpM8>#<^soR)$?Lm~UD-6?vC@sB3 zTvCfWRaK1L74Qp>zzF*{J~fL(;sJO299o-d)m8^}^^GVmE5+!{G(3E%@1x^z)B6)6 zx?-%k6o2^--$!xn0Jd)30)L$u1MSQg*!DE-IV>jX;eN+SwnYpyRie6c7;`H+jCQx8 zj`>0rao^#mp2MCUTbPH+)295-jrzK}fWP_L*Dz)e#_#>nA0RrwYXf$de`!-T0|pw2 z*Sh*Jx@biUeb}POf?(#BvQ?Vk_V&g*Xl&_GK0{(oBlF76zG0PXdU?2Fd1eZ2%#~Uw zms*agnenEheFPpM3D|vL9}N8dIyKB-=27)V>4pXfwI zWfdxjT}mkLzTQ6e)fN4WgPy+rj1>XYfiiToHR0D@ei?m3Q~2oz*YVlsA47!uBF>f6 z!8I}uj~~fJsD}X^J@kj2J(wOJXWVVY9paBsV*T)tVCZ$^zI6rFrN!tZ{;h9pR2a0S zeGmq-Cr%wd_Q@s;7YG6>OH1fvo0<5T5FVXKdvw8sJj!XcqF`Gd0S8cDbsIKUKZM06 zW7)8T$)SD>kBq4mK(SGgNQjPv7YioFM=_Z)(XhHPM#c+BfBDm2Kz3FVJXz1jDdy^m zMXgwHrLpZfdISNUu4_)tg#pFK%LmJg^T^qngOsc+1P278xv?IDLqmv-iN%Z0zla5+ z4TXCTB0Sg^9$p>@4~tZLPqk8^toBtL*u592$r%WWOF>FT2HRLAFkQxs(G9zH??QZ( zB&?1`@1|8Nbh4L?i!TDBQ*i3YK7{#uI}G7&dSE4RV^A`#5JV48W1I#)IXZ;Us2Du= zxz8XuJ_72gV}c#y+%!7+XOKg{wW~0fPG5|Yr$6XPP)UJ(rphT{b8vj4!7&%cDstW#Ng+Fd*#hp=HXQNtej?IP1tGvl+a$**3O^p~I8G@I*vGh~FhHL@_k&TOs zUG3ND?GvOPBOG8|lamuTcXuYoq|6CL5in8{%UkRDbIlJ1{}n zczb%_v8SIyPF{w33%NL3?e3ISO%}ZNleb|HPQ~ZH@-l;fxZnGevvncuv0OU>DZ?H* zrAh*i-o8N$4~>!!Gx*FGzJlYYjuY&WR~94r-G(dIi<#t1!`oX=pr|KLrdW{<2~5D{ zuF=Bu7#iAosdwHm)5a!7MleD-4zbSYxFjSb#1O=~A&h)aNlGFJt!C0YsLqcJPtrcU zk(xoUK*Xo{1`BnU$+}HXhkp#)G85HX&wn0TCD`d~Y(PobE%c8}z>C2%ATSV%^c!Pi zQ*1AtJ|PH0T`d?RppVJShlQe;9O+Z~IYyA{>*Is$j8y83W1ew2I6fdl45Q`)X41j_X$$OpJ@m zNXbq^L`)y*oU*u$Fu}vG!zyGd!i`mfd7)JT=^wHz<|KZEXOiNM|elRbgzq1oNRu4_eK|A_K^F>!JD@^5{Ww%`SwyC;(yCVKvU z>Jz{~?c=G3)nY|LW)imVDIgf~q%U$ubVM-12;Li*sC4xWVPbR=$4{J6d%}3r$K1>F z`$@n?!0QR4iGYs$8y_EHQZoP#eIP#fCE*o8HZRls_X__pz-o5GYwz5~k>jWE)Z@F= zn*p`+4x&Gjm03io~rF+zsv;A5(jr>S+5f(|(rCHXJ*8NZp0DnT<^FZ=)wYAQwFK+|yX4zscRP6Y=5x%FboAfV`>ST^ERv!HG55M8?D`%;L%OMHkL7 zF1Jx0vrI0MU}bzA9i3Gh{}<98J>6Fk80g0s7lYn`QFL{%EOjO-J_C%d=KD`ncVQ3-!~+E-krwA}%_Di8#>O*^TbrA^QGN?A*N%Pdv{=HNaEzhoTcq z2K7FEN2F*7}j`FR7A*fd1Z|364iohuH+*T_cwQ`GUD2M-`LI1J;1 zqbS&tgDqP!m=s%K)A`}pp&g7TdL$;LV8vuZLtPzZGt9Op)C6;KLNvldgE3DG733Fy zg4}G@?tz(QSDZL{5TX8V^nGJU+rA&$vr`e_?}?=NL^QN?W1zp6V;n$4Y!V)Q_C@Nt z$eHnk;D2^*9w|9Hkd~IFcq`-CL<(OBhFZbQSUWD)gyfScBRAH-9~(_$LQb)%Bw2TJ4iW?PGIlBqd0NuG(v*B z=^tI;t@p*wZH4G;X{IjpFjfd&5bU>z%$$7cgCE?zJrNcb0mJ+P+S^;zD(R#94m zpCS6&ainKtGKPeb3zBmfF}+|SUVKzNttGMJzA_PYyq}^v$b&vGGcy}a#HxZpXQn5y zbJs3p=Iw@mU?dLi$>I1`(B0mFlA>Y^6Whz)keEJVKkdCJ~7Sw&L4R>X==q!RCKI*6u-Wvnz}xUnOOt&9X?4pA3_kZuS{~e zF|KaQ%7LDEx`Ft(k2Wz(d(6wrLq>Y4O4Qw%pT$HcGbT?{S4J?mIE&mI+Vj>e2#ZZY zc5W^L84OkIr;|uYPDIzpwBqH&&?KfOCb9qE9`&~QWsc?CM<1!T^_y%i2#X}9UYMh7 z78plDn41!dx~(ELF^;*HD_S~w*`FS@`t;c8r*Z7mNiF9fzfA@smMvx+J##|6g<0)0 zP1`o{vxOKeG%7@6KII~dQdZ2%h)gTMologI#JjLZ22xoS^T(J_ryu{b_L{6B<)kH3JF^aS{DETZe8YO?x&fxMHOsfF8iQdacN z~;%}{4%uzTMQ`UGZGD`pH5-_4SL zl3R`q_cNdOz~fK7hyw?AL3Wc+edp)nN%oZm{9R0V<<%b`ea{i%n((#p3oXn{ss$Vk z%t2?D?CQX>dNCdN-Q8Z2jtzxcFO<8)grVsH4~RlAe{bMM?W_~d^I<-fSrZR zcM00YiMy_xzl7fYJ|)9NqZvE*9>JM2k0OHpQLw2tDkt-%Eepn&o}GokWI;@P3gSb3Fy7yZzDW~;!V=-(ZpYlj z5Ju^XB>&Rs188fRDvxpU?SVvWA#)QMPikbeO4z(s{=5>qm$(~N%$v&7_H>giIZ ze2ghpoI1RZK2}Ftaz$KPJ_<8q5lyTa8=IiwZAV)>`7}cRz+9a+mYtfSbX^QMG&mR+ zE?q`fx8!IeSTb7i+0T6mgG0pVBc)jRz$hEx)S91%GMX3%putA zQ~(pqtAv_zr;&%`I<4=k1aNjb=M`<^M+OOTswR_Jse=a{l{bUgve5)Lwkgll%hK{y z)@d?xoK_1RD96NdE;f#ZGc2}vWRT9ZdxI4 z{Pe1f^P@=ho98JTxjQI(XUWPz4UTwa`I+M%GOO19O*Z8&$w(-GL)Wp z=)B=U=PJwL)k165Czl$G%Ty1_lcih zp2zX8ejWei&%TOKp1dDGktT%JTbCpJupSHbMC}E`pK20@3|6n%r_o4xd26GE=S{RV zb=6uW!g~*R(usL+fU>!@@X5l86qZn=NyNij4^K4_=#aC`4uia5PU(Sah2X-Sw(OK6 zN%kjVGPEc&csjj$gWfvG*+|aD);hLM{JvXix* zi9J~9GpHwaZ$EW6T+=m;R&A0zme)VJBR0CY zEd7;{X%B$7xziUhVH0LLb>@5B9j?!@>_0@t?|=IqQ_>ViU&e=7oGAoLN0Vjdd4Mp%2~Zog!U;f z{jiA+2+yeJUV6<>O5fz~CXJ##TR2{MbjwX*4G(F)h&-n(4V0yImGWZWJ?Ud~j<>bQ zwfFS%9%6skrxp5j>AKolnZ_dAT!m5*xBAeLQkEMi9y-oK=^5mVAm64qNbe6YL zQH_lELNZz8E^qxM))HSN@ne~Cwi3@tapAf6Jd>69iGD~Xt?go?^zAGv6+3FB+^7?t zO0P5@`LV>9B{xXi88cjHpUX?j@NlD_@}gAf3=I}{;v#vIvep-~nX;t)$PbAra*ZT4 z@-itiv4r3kbpwq0KF{I#2*!1>QvL?4c*=GluF#3@vE8L*VicvLHkIt^=x5gA3UQ%X z_V0FMtYQ6%e@aK?tc+G0v6MC&rt$iZ{|!HR?<)TKJMSYiAy_3q8oyO-4qCrvGx4}E zl6?_ON64dW#gpyIY9f<`nA)52X0j_q?bJ^rW1)Ijj@Nm55jV*u@W-83$a6zvSg01pLILjDe0Zf2<6Jf3#!S4|0LNQDVH~( zg;!Ha-N|z&|HMx1l=q5`_T#{W>uorCs_K;eNGX5iYelbIRURaGN5xB)F_RBw35NVG zS4ymu8?PQ5pt3*e=+d$o9zKD@!iqg>pZk&2vH6^;ReiJ5FDxx-TMEdN8eU#*v@>_M zFZ;7;6JFxiXQii^QdTo}M#fWaP0}|h?j9qe-7h|tXLJLop4VoVpJTCr4=fV7jG7*xpS}US| z)MH{Sc_D%;9_@|M&R&Z5vPjHecf1(O*6Y@Iwz5Kh zZYN%$?Z}OEGx4f$Rz|M9`Ib6P;rLKCa&t{?WXa1%SJeE1)WzPbxdZwyt^10c?LVAE zK;+eAFLCN0U@R~lU;o z6->JuB%!2fvdBOE(Y;Mf~eI;yp#zDb={d5IS>mcl&Dd8zB(^z-Uz1Et5r zI|j4TLnRBjn0ljE{FFL)gtjBLDi8Mxxsylkl%F^U8JqNz=#XVCsJ(Fw|Lsrz0>Ae+ z|Bjvex2wJL*VT`Wu-xLvDBk$TZ{qFhHvHN*{uyTqQxK#TxQn#*q~AiDc_?*>ZpmDO z%oB)CP#fF@qtO4U+*#zO$yuw3ymQD~=FB7yvREV+Bo-#_lyXbVhkU&y*D^D|rH#7# zsT*Lj6^-PP%k;^DorGlmMsjtznJzw5IU>qU;;voduJD*X#!D|Rc;eU16NXwxG}=70 zhmrHDPkx2aFV@!oizF;u_x1mG$MTEG;#W=o2YMk*{7S3;Kdp~+77SPth{U-`Qhq;@ z1X+TRoOL>_wcg#sgHJ8raYvF&Fr2e;Sz$cdf zg9<*phrdk!*RR#S-9K3T-=SZ8Tw0)9UYNnVKY0ajymt+s`i(!qQ%@cy$kyun#m4u` zaQgnF`Q!W4Crv%D8D~F)9^98-eQ$pmqD)q0HqnHMkv{z8*ZvJlZb|t1U;ip z>i;+iq>#xGn+266xA13w@|QUM<^PD!ef}8*3N_gK^V9!ZyuTl9)FGk&H+#C?%ZGU; z3kV%WxkJ2@%z@eFa6)=K_-*M2h6>eT+#>ikvJ{Qn|t z=I3U5*tw0gIX?dHkjeiUx_7*vlmmB{)RJ-S`>%=zckz`xS8d#bOYSQ7^U`|9|BsTh z*AHs?XW*Ab}KY1@Ekr5;E>cl*CmS=_(% zUv{oRL9yu?^rPXGpWhMv`0FmS5oO8}O^vrbb>7MUBb-O13 z0}zb6|MU2ejp@F(Zq=z%C%mU_-MaXj|L1?8f82=w{qvt7$lp_qiFSiP<6|KSEG(6W zrTKlGUzg;Hv$$`60_w||Gg0J&4GIlOO1mfwUX&XTnD zb+mHtq|>ikBfnX>-b>OxKhq{m4m9D1|MV^@nx^o7{_(F6mk^*%-Gha{3X*2OvF?7; zGIqCjZ(H|2Gnb~&*V%}hH?E?sqYt$$efSUm_3yAGBi8l!rkm#W%T2R2KU`b)(*3U! zaA?Nl*ciV5Z~ilut=Wt}`wzdbR^^MA`@9pGbGiS#Jl>zaXs^EnNt0~u%d|XK7DDsu z_<_Ltba zDplR|`Z{l@e)V$vYHO5vl-ZiTiP2`h>ojAqw*z->XQ8!Y05=Mo@IU_Ue`3wjB!w%4 zi?524&n+$H++x=kSu*i7TdxpN5@u4G~J-lKT> z=_6{@wZu#Jq~e3=ZqDSleB8euhU2-{JWTnoj{fh2;a5!adpdjPYF6bQp6->}{pEQ7 zGhZEj6K%NZn{4YFCFz)SN`jf)jGzAV4^h|NjVW1!zX16LhGN^J2XOSrKDB4ry`|<# zN)mNhIpC=#ASGOCs|3DD`esMYEzYl>X8ZMZfwc#dwD0ciH{JFenwIA!4d`ke#NX4y4C04Bc^AL`$A5~*FnQcXEBlqT^QGwP zsr6y#%e3si)=l@;@$j`ju%v6{s=Sfd$|xfGq;6^b`o#A6NqD^ITzd=Z!`1X<82{3p{=Y=} zyd(d`vC9|HvAfA#$6We)bU^E;eEU50KU^Q@_To!PW8PSN74p5i&d*O@4Hv&j#2yi| zPL1~9{Pk?auHK3DtCnal+tLbrE%epP@vG7g)}X3z*C+J#Nn%zXB{#LAwY~WXteb~viho{dit!W3GhGD$*`s>(m@Dx&)rMvcamOAFtCi%7GK%Rau z^Q@d#Ps}{D-QCUE+;I^4B6WOobpJ<^wM_)R2Kpx2m{0c?uXct*{Sa)(Qd)s}@*KU? zHG9pyPFfNs`N&iI+I@_#4>|Gxt0GRTJCMOpnb-I}5iZCFFDe5_4&mcNt^(`A|OWH;UYS z_$nA*sIFNVE!4*STAw{GRv~8>xb_}DK#~jZE&6aY?^^lco;G`UOZ$AbIor#5G>;FL zZ=0SoCG~)+?^EOFBzG-BQq#S)&SUW|(n`y}!n@ml^YfsK?n_#2GZ(sl3Vmi>a|`pk z^1TunG=D4~#TP%?FV_)QoRp^rMZL81Lhdp`vwmI9&zAY7$jyhFwBHgv=F&IM*Zs*Y zs|V4nbiXJ`4Wf6HgL}frRoQ3O`7jKAZN$c9;+OiV?b|em6}M1??&0stOKcWvX*azz z3;DX~tKeJ6?Qx~1xpj#=`BC!JTDgBqQiGHdn)O!;(gROwv~XM!UR-U+o056n-Qs+U zb(1vYZcNH*{nWT_oJdXvkk}FEBT`ziA`NGfFqK6#kp-!`-djzjgN+q4{U;pQn9!n$OSuxqA@F zRq}E9frV`IbJ8py_qSlNJ)tk#))yl;heFc+EXVWd?&EW>6PnFig%;|`JvA_fsh6;* zo0zg^r6!yC5|XP#XJ#LWU8wG_lkVnwK9P47i{*8jZqGVP3tub!(ay+Dyz)(H2lYVq zY1Zhod--6#J`YJv{3tzSZPY`DtJb^w^Uc#--Y7NqzNW6vd~%gj@$xQQ^czRg3!$%$ z!-sn5$yZD6`h~t3QQyBu+8GI+59UP58R4)RF>VWhPhjv}Z!ymzc z{s>e%O8&ZtZf44gNxdF+I!(KW4jz7i2np2QS~TN4omN3((Hzl{f7p#d_x zdLfRy0WK&g2;QMemm^JfUxwU#2}w=bZ->=^NrM4_;i1gK zwatqcYDcy2w83GTz_^88J1`6(vH|}WV$99U7wM7vTI!L*4)f$FtTqBC4}W<0>fj%$ zy}(P|v80~K-As!?HXWaWRbDLRqeEB_eKI?`kk&0YG4Qp}LW27;a{0|9caMa=c&{{{ zF>Uw2#3*rrY^ENmZ4iHd{P6$J^kwy68V;KtCW9W+elhB$UV>4y>5H$EzB>I~(C5m_ z(}*99R-O$CQ;+9scD@i1uT5KEGZJrFy%7)+27k3_{I8LS4Na5d>Ln2BrH8rz_yzl` zl{&Niu)}U1g;8EqB4@n(;2#{OUP?5-ulKX8xyOY*&sf~A&ToPi)}4iq{<>(&H8uv0 zO#qypeh8#q$=GOqNVs*Yw;YnGOLaMvMp~wvrdh zXnZX!!F$ih5$Yc8Y}DjK^$TX4*Iv}|)zNIL@=fS-jj!zE=CEPRI1O+AAOz64eOce8 z@gZ5PwkP`dgcW|w?{p%+-w5TqTT%;2&V5Yi?s9I@j?2;ebxZTTJUfGh7HaD*qHi-B zCSY{>!aFEby-oi9zWM8+2XZMZ+N_h9aOhwcou2^x)dfEZi(^L`-xHqQkz-R^fAd zb3d9L4NYQmN>4j6Wg5q)XWm0uc`@?Js?ak5EOspima!2j*eym>mE`00jq4~aE<#=J zC<2ofAtFG6F6(y5_iira+J}+i=c74&mi_+7+xVmr4sSp8rpX7jCg8ncCtJp{Qg6Z*_c-8Q3*>a14Gy>x*J-Gx3k$%U@^+$*89A9JeXH+6@P zAEiU{`I=AK`LBlVSJ(a9c+eHvV_X&t_xIz>nJY+0NJdDg>y4pbSbh&n8fUYX=9ZxR z;SpEcn(XDLJen>g1RS`pagUWIe|1du?oeOcHP4QXkgHmf9N`(4?{=RTnk^%x#G{M1 z=24b)&N(@^+~*^?(*2P(G+ujKyYb%J?_u$})$q~zs-7;C)Tpa$CNvi1;GMJAVDgGV zYI20yd*t4JeiqI0@n90@s*Q%*4&4$KIZn}^6=vQ*ZdNXeiW|_{-G`)Qi{a3Ex-t>W8{K`mpX1m(GWHvgB-dsl>f{!oC=`=bU z%W(dqUt!qng{UOj1P$WrnJ|>zxcaJyJkw`s{!pVYvI<$V^Sdr`PzTy-art^CrX$l4 z9_FW#K6lGP+7)tydg?Tw@XmF-{qZ$KCoV#4WXOWPZ4T!Trh6NjN7~H>k%h}XNOOzX zj1wr#y@Tw`J18nDMAMKFk!ed1;_ERdd3B(vx)9edUqEGf6^hGh5Sy_A!NJ6AJge$c z@|17x&s>^&{O)s~MGv};jXLbmj|^d&Nwd8D{=O|J{EQd^8`m(HQGq8j-{Wr)jIg^&>XEuPiT;xvZ)x=?ug8uIeWkXKlZDNk?OU;<>0 zVa5hO2mcSpy_Di-S{5~o;+?lXLSkY%fCGIA-=bgnu^RHcqtOh;qT>BJP zvP%&dn~s?9fNy-lP_sYn+dbqoVx+kYH%e<^3Qj~!hz`C=uG;AZdnoyq(~5zfZk+k} z6BJd~qp-FGI^yKCxEPjSpq&M1!MY!s?l-O)-COsJg1G+PhvxKMBRMcTlcaevcQNhQ z{FpX-RwMcQ8Ev{~n)#B4(~L=@3C1aTW3%>V@%xdqB6PpnA9UV*PUzvdyPLFreHIUH z(j;=f(%SJkeVxnREX_UYv>Gtl*M^?XHgt4$qrH0=X1kmr{CC*U*WIsHLue#dI$^b2 zF*r7gNz*tFSzu&980hFkPkRSCy1LN9M5$wB66PtEpT{GIczOyGL*3}e*JN@%W9|kxkU0w8@omw4TgJUqV zlKEpJNvsK`2|YSHs8d4|s@}WDPi~etEkhXW??h*J7h1de(J`opg%0WNvR{hWq3)Ir z{Odpd2>JCI z=l84Xp(wfshKBlZ{q{vv)m5sg=^5F({(c#9o6^E2b)ObK)BbF9Q{(6#976j5$Afa! zCWfYnt*z+q9e~*^8vS`lkI{}?oWFVrRsH7s#3kW(nzlHO58nF_pI*2GJMB(8I-kA@ z*?oD^bk}J$VTAsmubsYuf7^&FTKh(!H(FJRuanm<`lC-iK7-=oVpt@Yd|~>mYuOX} zbDMFDZPNdB^rF3kK67Av*0+78UOgl^Z5VH<#hG`0g}ROb<*$V9tLsiD%1VpztGC`F zrtSXn^8So|cud=29v?tA<SOM9v#frX5*%$ik`4JJQ#1LF-$*s`ON#s%F2d;x!JsM`8oN2xZ>2M@nKy1rko7M=NWicopUew%*#Kkz*j#ia-+mzHX}DD3un)M zjIN%c`}>Ap7coZlG#1n6o zK9Al!1l?1e1Lny==9cx0Uu|j*q-WHi;*R7jljC~y51Z!1G|8RJ7}t+MKW1Pq@bG%& z;bFqGp%FPbS8%bQ9tIwJn5(6X`zHCrlDnHY zL1A`2E`59%_03JFYidMgeG{5Hy5Q^Whp^BHczes-x0oyezTJe6-gyhz`K9QY@WArr z8So{0+7tOqK&Q+Y8R$kqP62YW@=@E+fSUSx*qmO7h>n53&Nch{nX8-Ro5rt3S{b)5 z_YlB=A#~Lh;ZEHIwjVl;)G(-7en~oJ>yoRc_}vaKk16>0MEcb>~qmjr4|a zueBv4{A<0I+5FjG?(;%!-d!J6R{hcbs;}}*dLbVI@>+ zAHbrNaQLy1`)V0hRr-PBJX4zgx@wnkaP281XN9z~4@<&?>uS~`b+|vO1(lJ?IVmIV zOMYwZX`E;ujaZI#iBplu*?F>#JLqLJRV(%80zkt&*v`b|E=FJN`o2aVa6>u2JO0V@iib;${r>jxc!V;PEG( zLSkYhi_WelbL*|Umb+_HN2D%Z<6f^V=j1cLtem-PY@~qFB?oL41G?HCwW4=m6frST2o4N@Co9u>?HU_b|KyjD>ZAI?kJy&-mGXP;_=$CR zptq?D=RdiC7k=;xfJnJRLZb% z#hV%*sX#b%eIaRlOFyOdSzj!xC};Rr|Jqp0f;-o);Kq$xsO=oX;+1REqb|zVvM=5~ zK3KA16}D~M$}ur9d8o(1Q>PFXpluT&{qywlMOwxpY}>g59$pqSw^U;HBYP>!1lBXd z0g3wOlqq`EVg7r!uDgE;|MC&idLu`LW_u||X2(YAn(dqG3jW{H+JN`Y{0d$^eptP3 z^PKB7nr&CEmd|{?)V{bTBF%ALYmHN-`#x4zQ7uHs)oCW7pSrHR3jy%%1oPJ z>}$c>A6-FaNh4wt5)l?5I%9*)IF29x_zlEFCm|*_oaHr*)c(rVcgvIYE4>^+Yw0!o z{9y`Q~?>dG3_x3;3Xp$WB( zEif2N2oH;-@Ah@svXF}tWj9in#+#6)Gjf*4c>bOnkLnt&l%{D@3OhwcM`6W=JxEMV zg+B!~v${!<)XtBLv(OyhAT-+#w+3jkojuRKbNnQNjt=&ttgr}IZf2scu>-Yros4HA zNK8pk@lunkT&=YwBXMtALUP32ij=vl9PzKMxdK-UDv|N%F)U4uP(E2`eqF3XHdYro zc={kAVXt_&mUuT5vA(kzBQ-GP7ZhssS7LHZxH(}kfm1wG{fKOC3*6iAWAjYnlRidhP@*#fBRkxd4H@VAc zXM`4#r;uDlIWd>ZyLKI~zx5uT_|^}wW!EO9SEBP;y{t?5QPO?wU8Xj9ZGzW++!T|I(AW=j9_FR6E0?@^vA;lzco2XKV5aXXwKI;q}8U;T_yPQ-ZQRmE`RQ1f1T{N zr}TTKu!gWzN%cie%xaU1J9S>GO@6VAl0T2Qs+y@z{VTGSF?K*lU!Ssc4OXt-fap+d z_4h2f>T+K-XH4eRr+$dcrg&c4LsJpIXvO8oET?YSy30usZ488`+1Lf=Bt{yFaJzLJ zQJaonZ9))&wB{c`EI=N@BEqqA*ES@jCo>nUMoMBlc5K+BTA5GeN#w6iX!4xpi{Iwx zibivJ*SJi_#-$p z82&mYHa_0!agns-U>L{8@VmeF7ubLBaipck^I~ai3N0P&c6tiN_G= zCt;19nYLoErwi8zO7co;G36hD;E(_&u-?ki#KpuSB+#GVDK5JSy&WwW7#zmn=r{)T zCIkhCsHN&!ALU;OS|*H*j-sop7rkQ>7&1>m=i>{1l@X7kqqY>;^<&t+?-;x%`=IaZ zNB`J3d;@|g3jtSlcsRx}GCYXh;YkF9CL%RsG2){cl;94O z)WI<`0?8NiT5<6#NFXs~#c*#M2Kst1G%}9ikr9mWGa;)X{3t6X>|U^-w6qAHUc5+9 z-vn<~uxiCJCMdGQjC&mUFJ%pTKXM6LTPDMC^vOL4;hA~yN(wvm=&8&>LGKjOcRYdR z5!!^IMjY2^i(1Cr*4}~s(Fyd98`N8VgZ*`L+=D2b4JN%FgDf{RG)dW;;X$48rg2I$ z%A}lT=m)#e)7y^$_D|~b@X;ZF`X&308Akij*WQe_>KfcCYe4j}wFqHGJvlN2gN?@K zAE@M|+%rMPV0Rn(28J-mx@4S#LqiC>rCS_dhZz$Sljs>T5Dfcbw6~dMhtWStSrTNZ z2{rPi$?G9V?MuY^+BrT$qwL=#3|3DB2Kqs5S?O?4hlgRYjAP1aL|@+^`Ug3FMjLzr zf~kKZ1NOye#Lz$=LYNt;x}?m65!1fZXM%PqGs6@0!7$W=e)i4k7ozk^3r(WGR^(j2 zg>&aG0fFHN4WV8d2at2;HYR98i&tjAkKoBYMoznt2~#J=4HkI%yXE9=%T?#J9fKWR z)WJav4I415Hy}8iWeLPR9AoI|?!xulDzr7Vq2T5yf9Fy=y^t0+z9Mynj9u z*GfB801FB5!eC1|s>*T^y>vZ7qQc?BMkO^?I%Be;y?2=3JrKY#xrb=`4$8E%kFs^r z2kCstkL%nFi98G#>gz^tAN7HHHDPwbKOjJR1VmOmj`XR%!#{`#ylX5BVq=5w)%g)@ z$rKdpaoW+_(uR@#KHA`j7=D*<6&xSluwWZ$Z0`Mc{x7$~!4}SIP&(PRfkD87SEZVvs zG2#BMWN;D=Q#bBhDaE;uD`BBM1w;hGpGpwu>p|P{M-=%KIg6j180kb$SBvs#gX2b) z*QuZ_b~6*}oK{$dd+8f`=^N-rMvd^)1=A;M6U@C$ow)GHCwS$1&tqIajA7dSh<*Z2 zAAgRYx6%m@hYi;8A$0e2W6CE0Da%$NIyDi&exCC{q24$K{lqW=SaE+R{o3FVhDOI} zr}W3-+eP#qvMN)L;h_Q6H#Fmm`(|Q=5Bn?38)4P=;D`VEI{n!+uHPuZV+Wo>RJf0t zWSJ!fV%roNE2~i=6NV>GBRs%+X2MXk!5yLMn<`ORU54Fz4j?uragNTAWb%Wy+|$;o z#S!ZIxXFQlK=i=&?rg>|&kv3nVDezU z{Cz1eD@^0#=xy&pb8{mK%Zk}X7^0)%DXR&L8LZSp9raL~XplH<(7WmymbxY|t;U9O zkn;{F&+9Qd(hn~m8)L+_BG$9u`hOGMsHU;My($32@0Gq12;MBuyGu_ z>DP@MZ=J8VHqO%%7-_G-Km72QD6MKmjKt-|83?7mTgC_RZ~x@~!R9Tyv2eM6S9umKYdePtAg8G^opLwtNx7LIY-br+$u8$H&I$FZ_dDen@<;36{wrboEaFzM<4DIqsz2 zGoY=dk-9ggdl^S9PD~8Slu%eYM$aLTlFm`~)ZQn41&3#e$K6ewb}Gc+)T2O(PiUX@uGCL`>3Bq@=GP zh9U2~K>DYFTDg+Ah+{C!_6B+f=tB%JSnSYI-{lcJO(#Uxi6{C8HC?Blu{gyKi7eQ6 zVj=6~INpEz9ByZqQ^XMnAb#}obu#99ASf^tvC%SlOk!Le<9PQHrwxf88Z!bu!7R&I zIwMw&wM+&W#}zIT{4qd1_9B)MtnA_7csAGIYC$rSSmMiVhZ54?Tt)CIAd0Sr$N6L|TvJ@>W-)MJN* zIJu9Xe&w%*iEqUgM`*A9At8)$f*%;Wh*@8M^IbGFwKIuo$L0+i5W~2vNcGXxQO5j+ z!c0_+`C#qdCy>HyNkf@#VmabAYi|qQKX(N?pZyLJ=hgAWt@FQ<~Vlc{6_UQJb%)L0y zrilRzbT?wKs}t{kbRH>dwjn4qOxdZP*j@G`7d=(HQC8!_-RNdr!yLa6;t;!zcC2oS z$=H|`9vwEAh|T;lB;}}I;{T_;iCG!HX8TARWULq*rkzgE2Hm)09gPc=#wd&R1+aCk%Jupm+!q?wHy&6Zy0OK5G5FoOU31+#eK0sX;Jy7_=$h?Hu z!Zqou#SwleQztRso9};wJtux^&V-?xv~pJBj;!l9)r8^NT{{p$g%UriNnQ9e(RcKt zley9$$FF~E5}t0{K2uKkGSk0nxJYuUVGH9VvA4f~c14mW=rKOff&cN3Zy;&ec6|Q_ z-$MXpKHDDu5^IZJmzBVStZ$G$#yU;gBEaxcxuzMmNv;1Drw$3pOOeJLGl*j{iw(tB zF}arn zDNl=&<3hacE#QheB^b1$cZ3S5qi-HUAMx7Ypc%vTe<92PWa3czIG00><{r8CtefVY zkqJ;6^YqRx*t}^gV!v|O?x0OlKPC#n zf|CWex6($36+W3TTA&LKo*5g7A$o~h9P5CvD5NZ1g|OHt1hS4YwlL54BkRQL91m{~@;lbWoL+npa|`OZ53!T4q#csd9ZeQPW23l} zmxq*uBy8U>dxLi-rqU)x`?`pC`pLIoH51dKP_@UYr%aU5j{DnN;LUL{n3%s%Z-!|1 z24b;5<}ltfZPDjWN*ovcq}(Ydu`P)^R%)oXzn`*4WgB;qTPxS0YI3~BtANS8yB)V@$}Qog9xtWDJYmn@aC^Rq5qn|hD~eL zwi$AKieuB&#r%3`4Bhl;7OG_k^LbC|r_(fn?)G}(zE<2StjDM=5Gyj0SQT>t#$Fro zW}x_6n*rVZW3WwoQ*XvF(9=x2G0|>J2n-ET^ZdfU!dfnz*`spQN&28^O`dZEBz8E| z*Qv%v}jb>hUQ&^tVYtfC?;PL9Q<<>|=Jucd*+;dg%bx3T}= zZUU2~i1wMr2j{Xdptoc5rd0?EAgFYXqa-^EnPpx0-9P!yxPI|EHm~1+gv2m{1qNfw zFmB(@#m5&jv2Mp+oOt4 zQ)L+nN-B7@C(_bW$(IAuRy`_8OK|g6KJtpH(9|-3rHh#)P$6f?W<=q~@1v}dK!}NE zVSWLs$y-iN6{2I}5fc+m1El;2GP{@@T>A7Ps+pM7*3}>@Cl{GHMPTe=l9GY_YkIs`HvW+RHPT|yz9eGki)l$4>Wq87y!%?JvQQ~MGL_-L%F#CyMbojMXi{%@eV zt`-$_O&GIzBAJPq|C}Uiiq1Ia!Us5Ws|bE6YmpG?2VZ4t1aJhi|MPGE1>tdv@clph zeeBz_9ov@1qpY|LZ{4cE-a`ivOOL7bZ<+zF6IVWZ14T702+!Dnupl2-q9Ici)37ms zx3)Ck(%B1WsBK0~V;7nTT$9t25gMeU2u4v=U5}0-D>9Ok;hY%6AOq*lqt7Bt=SjZk z+MT0lEVzzWFBD_d-qZN*i_hTE9c!>WE*P)=^5^JvhGOfUZ7S*T76&>t$vPYH<5xdX zcC~D2+APR%)3gEoP38FUTW2wB^+9?{5&|UKRV1r*2#RfH)YsPH#+BEhTReAGM6xXyNJe>iTvnv^Js2pKx0!K z%Be4PwCiV{If~@O2sSwd!`L7S3yYDNorlJTR+Q8B$1PUsXCeaq=xANs2{1AZcHsZ~ z@4rX&fEDYutW$tp13B&qL)rxsm+K$Dh3wKAgfH5Nh+qOaIVnl$_yB(T^S98}Gmd9p z{5GC==1FYdx=Hyy4+6c&PzJ53UgQ*1pw%3WCm-LfRuX#>xOy;AyPn^KVMi=ZY+r)5 z+)rQ#UxZ`7{~z(>-i_F~aSf6~d~o6IU!ii)f=zq(fj{9bz__^q=RWxmFMa1E`VDrB zO-Eq;s#FBX_)JfruQm%m{pC$KbSv))L;SrXB^u!P)cOkiV zT~2>@>*giug%d@2MX0QHgzq%r4P~;FF{f;C1@JOFMs(F z4jy_6OO__HtR18MZ79geL0(}wY8q(o{Ma2%M8zfYN(WjiijaGUawsoBN#iig&Hzjd z_Mo-C2K{;q(iblV-6`G2B+h;K3CFGuH;cG1letSBPcoNXkf4GB3Py6{Uq`xN##Jx{x@;rKG48I7;8u z7&|dVtX5C^ym|F1{dEI%ubTPLrAOdZb7%~yI*TvP{( z*&DHO;m|3&q3>onxj9vwm#_A&u5ax`V_O&PDiE>J5tNe+X8MkEnT7C5T2A>6p`sug zMWyvHS#^vfk;JRsY|nyb`i{)ZJQS7J;oQ|itl6+rJ;fvXqQ-Y>3L{PRc>BFy!OFN- zTi=MHf+CbLMvq#E8R8?E7*UIu1opKx;p*iZ#3|KiYOF<8W+txOEJWXk2`ksEMiAqk zr<2&JsfD_hkLRBM7R0Wt5ajs)6=gel!L>GeU*=7IVQ>#rA7GU!c{bO^rC_C zbWVF9DK^G+gI;2Z9u3qx!Nu~lQh9kT^kYUuMu#)u^(UrpK<*9V=duc9<`!d`7@66JR;;Zk-P zCLDe!%gsR@{b?EXtAA(&8N~T=(@x~&K^wFUw&0x)&LO*UfPQ5O!UAWfA(YP&d7Ab3 z^sU!%E~^6Z%Qqv6!B+f>OsbkECh(8{=jVtDjzr|HAi=aWnrX5KYn zsPLTdM%|Fdev9++ki%T5jyaW#;rN6RVUf{{J7R;9QyI|L-bh}vaVNhNb+q?h$*Dqu z5h0V+o-#Qi6FYijUAc&h`5pNEKmSXl#%MPoHJ+G*l4m2wxs>*I{l;xHFb0Xu8|{8b zhz;e~kU84Ta90a%-N{8!NewPv%Vr*xf|P`qImx%+3L`p*V=iC3jB4fywRP1>ZjyhP zraYM2r6Y_OCGY%4sA{Q4KXH3eQ3)zJ#)TCPh=@s`pA4n{7F)0@J-T`Q8Y(KKt`_D2 zo$w8fL?p||%0{Qdg8rUXWZ%wKeyy#c235qRS>>JBwg2e-Ck#c@vxcV;G1&O%07?od zcjm^`^tBDltKjX8$e1Vu_)KXNhVNWJd_pRW=6>YU$H>!wRkaORyka%{T$9?eCuwt8 zDf8WI#`s25&<+Rs#}E+}jzIOa2L07YKYsM;2N*Fihwg4bNk08r4Rh)262xaPr;Q9& zdUdz%Xr!z_`+65Gc!pyP<~Be3=O4itnT~{{Xfas%IxpfmIg@vf>WIOKviEt|2C>sjbK8=s3qq zdjd@6BJ8xAjwbQdIVdWtMhCH9Ep5Na;DaT*pTLICoG^5gNLjO4*%t0LW7FLehJp*H z&6DULp2)m?2gUT04eU#4NiB4~eh8PvPE1lgD1)E<{oi5IQ-{vM5w(|XWmPj}JB;LH zV#AOC@@}W!7(j7BKC-gt8|&*Bcgk^vex#Z{DIh!s@rm(ZYIq-=(dZuCm_yPVjfj0| z58@Yp@~^+ZjwfHh%Rl&S?Af*%TQ+V$T5I}dq*Ea=o^D0WS)z>vX8?9eaOy|y@N}MqnnYRUj-lUV8q5pz>8y( zm46Kdc{kxbWy6I_S5eh8fAO*<@2I?0vcbunke${)Ph?>b1woT(&&WJkd_z)4A}8+Tx&LzpE>V9JbID$$SL_yFfi8DJvfkizyxI-BwC zFJFb(CZL_4@h+StD9mr2KuF3G4p;!4dKa$VDTUSQU?T5;_w*zzgN>+f>qkK9ZtUN= zAL#8yE1l#xMI|*YMDxbz9&(MBi?nYtd2A)?2Ap~CI?i9F^2br0bnHumY$(5e9>4tH z5{As$((&G5FSI}q=WMY$wl%yDh=cqTyQ}fqPk)RzKE4WbKr|9r#%Z6x2OqtUCK~J9p|TrBP?U8E z@4ovXYFa2eb(9jP9(1=b*=`xYqo;nGfiaxU#)38`Yf+IA80=|A4MB}YQlk@NOmH(V zouQMth(;z5HrF}#WPclzjW^!<70RjezNt+1(xRx>LFj4kVljquk8nDz3>-bYA3HWM zm{X~=6(@6udRld-fS@-NyLWG4;_AhqGJ=tDD+Bxzoc-ViOh)AiNYqyLySc6gZ~p3I zWYWP}oOkV?BSeSa(_De~KmHh>G6@);xA&On));y_>hZ%@FQB5!hUlbJq$bBBD$tWj z$bhzFn>N@|o{vHX?=jPC8y#J3xP9jaM)YIs$ux#~I`H~0eu93x2SU>qBR++RG9A7j zfr7Vdj~Iv36N5}bj5epz%e2G*1lk+$(TDFLr=$yV<9e2ynCNfE`@eb(@13~>BdZW; ziv--O$z%-lblRC$&f@&17tk;`LCbTI(}@12GGrCkW5uJVv2N1_^|+AR120*!gw9<; zgYd>4FmTL{h;~f-=C(k$Pj;PQ`NdmS;qhk<;PU0m=<7AAk`AX6xs4t0iipL=&6}`l z(PC6)6~oRBsG6)(sL3fo|HLS^o;re*rLpve(jzbU>H?9HoCuw_fPdLWq8EC*KcbRX zA}J{mf!_4>XFo#bjVuMgWb)fKRF6+@+`unNnHU7cD!Iz!ta+UA!9rB|3MPmLYBJGVI#79=9{k!aQYH{#ePGH7c3Ahun44O*4ldKq}dCjH14}5dZM@ zdGvZm&<~{{g$dVGPY%w$`*W1Gj>!E~@-&X}qDs7bCJTL|vx*hW_MVba24X`Z@z_hZZ%+ZDu8lN zP?PytpPoTIlThJZvrQ)qgIzfH!CQFu!wVSa0BUwaIjD&w7ZI>GROaKIx86Z^QPZs5 zhO1HZrk1jnT#p7Kf&J&g`UAbp`s2~M+YG!usC#AtMNlp{a)GMr9% ztWX8L?r6&m%BMd~!XaHZU@QJNb2Ll|VdkjC0C} z?3?G%)ZD5fmy>e#4UJ=B6@>^tCqDe>9QsEr^T%)6!tt%gTW`OG*DsYZ+Jzv2@y)`v zZh!g_nj4xZGinz7)#dlkqO!RYK1@iHk|SUmYQZ~aK7rFOLfr)MW?iz{qM@!D(^2VI zvvns{QeR8*i!o$z%=43O5{;ORRSVHPZs!pDVeLl&KPo$7 zqGucI#6{*5A6>qQ-XX(0sm%B+<`=gvpTk@4e1!HsBV;RgP2-t+m?rVT`{!`u`Zf6Y zj#6*3@alzfboFukTPk_o1q>MJ7pJ|@H8QDIIf{O!B{BX_&_3MF4?OiCDxO zCpBOSHpUy-zgMv}%`uvs#?`Yoh%2@tE;U@SIICTPwMWs^*odE>$%e_3n4P{aAt4eD z+Yt1lqb$SHv>Q)i=a{%C`b8abZ(T}BqE zEvp<>ixa`##1+;tROes8Pu_SRH|q4zF&|8!|E?{|$7RMstKbh}Kil9Ce*VLspl;ZP z&@}p?cs~pewc)*w&!dxhjf~^y$RKglMU>VyKo^@#eMv@YT!dPw{y%;wP`2!GyS4 z7X5s59|e>ZhGspkeR38JtuhCom9&|ys4A{NZf*h0YNgvG8j7yrXK#Lht5u`$VSbp* zys@de7*{?%Lo6e5U|q&By!KE32W5Q|@K0yFWLyd(Q0{B(WGvOhJMHOS1J)JH133C94kB+*RjFd=UR2SUB^@1Yk!owIb>=@~yUcK=)&Yru0 zZiiqZ#z9LpE?>NW;>HP8=d{CuyxaLWckT}SgP98wV=jttB9C_S$(2GFM3}@x@-B!s zKgvZj2O^%jo=9w!6sbdi&*zu+XVR1bqqVvCkUCpBME#E^ew3Wp-iMEW_O~diVhm>N zg58R9XU^b%zi|%LeI|uJDqvbH5oB8ggtGCfLoq(9+bVLR_h<$TV z*W3?(Z(=1jspTpQ>%jtQKab?RU~vzaF=ZLW>y&r!;>~#Bg;R*oX?2Th4GLmT&e$UD zsIi_~v-&FSyLr%R?QHG9Tf{PE=5vDOmL>#4XC6?iNvAx#;qVJVBKs66_NVyShnetBT7}qT zV!GHU7>2uGH5+I0%=?g?F>)ML4XyA9rlU(uSIc$X7ikR4XJgK=%psOp@B|NfP=$1% zSiK|x_TeUsjPy|mo){ULKtXX89zC$10B#za*Ty3^`x1tXS_)to=s;&rFG4dmAT%aa z-Hn*-%q&lmFvlh(aQH$2$iD#a{^Upnprw;DJ%}>w4Xfe|y95D8` z;nQ;;W7V!Bc;@*R@xqJG;)x@N5zXWzJxzeMtbn59udRZYcK{NXY#<0qL_(-et=5ng z(QX$rca+@REI1@H1r?*(S4FI8+KB%4TDnPp6OL#gmU?$C_pM`0Pb0 z@eqqz(cadE%z`m)t{>bV#2*dx1;OwhMy|6@2s**^dLX*{}nJEB9VKMXWCE}SLc z8o`byj^p?X9OvhblQb<%EM*!eX#5ak1S%)K`#qd|?lhjGEDr76ittePJ$D<&c>-2~&G;qT@x;^5 zlJ8S^=FkBY-MoRGzCK!*2fUrK2jv7p64LPG=@)RC{2zPz37F}4ii&e7d2Lch$XmyR zcTom5tY3%NaQBQq&(cx#cGl5}MIb045hm(ZNl_W@+{%G2C=`p*V$jxDF*D{;UtnM$ zmMvX@#*Y5hF2sk?*#`TduYUk#Wo5XPU4Sjy_oAn55EH~!>KbX< z(-W~VG3t@1wA3{IzFSK*F2f_DuzJl}WMpIzOpEoqhbk~o_I)D9PUsC^)<~n_QFdz_WTRf;}bY? zh(5{38(z{^#WN7h{%zm69nZ0?lg}T;D=!?v_K0)m3Ee&a^0TfJKO zE$N|D~%xuvd!$#O7a zmc^?)K+{bD#;#Qt%DU|ATL3m}+J#f6zeSyT9;c2yi*O%*T)%V$CI@RHuPaKb&Odo1uq}O zr!T>iCywIS;f+ijBVhIPpKs?JN6}xy7y&kI@X#~IaDx8h<>!u}ys#MkJp&x)DGUq@ z;pVMkj>#t4=!haNgZ=rp{j;CIFMfsr@pFf6Y53EB+b2Y9q5t4+_ zHrkHcSvlyp1mN(~C-D4>&*Me*@zEU{5f&OmS+Z8@y^PJe4LfksH8#&3KZ)4DP+a=t z63h-S#HVK9=*g2fbmTECUYw3C+qQ5V*(QD8(?|9z`x4t~uByW2OP8^7!%jT&+{^U+ zlo$Q`Dgx<`-+dFkBL+A;WOC62Cv845Wjzi&{uJ%!IlOS{82!2q6Z(;PZ9(@1T;w3yfV)@85`_fo^11w!*?nm#o=HzjzFX9^HbJ^i=FR@Hk~Aar`u%IQS?+ z{YB?J;OifRC!alz6O_-Im8rA|GwYvYm%iRuyLu@?f`d?AO>k~ykEzr8z9BT%w`1$p zEr^Q^hQl<558iwgo&>F@zV$7IWnMgW0y|bM!TI;!!2kiKHi;tSLBJe>Wy_cIvy8r7 z^B?oIO#17M#fz3=^QH}mlp9Rii5b#F;=W35&4(Zkb7 z?of{pg)%AJv3oaVep=yxRr|=wo z&f1kr)T%SJw2aZ8%1s^gLzo?H8U!D8|(bbNY>7!jq22j-JLd#Mn{vbGvpwf|IA7 zBvw0#6UUDsj{eGNF`%j_4|(~-BzvEvPQ9S~(DO$R!*8SuxwmhjZ<2P&0T9eEIbmT= zu>wyVdxo)|ZN790(ejA-$mkq@}sgbWQ7A;?a4I9=YEF{o1 z#+t6n%~#1c>^3W&czg%^f^{fs8N-e}hp};SJcj!^F*s>c{xc#b8hiKeBj2Yv&QB44 zJdV2BI@Hv*%YsymG();Seb#x2^iH`UQ-HIVv$1^JVVo3M(f(dKeFPZ^;dt-Ea~PD# zOWPPKE6R{l+JLRQ58>sPUsgHQGl%x56+52Zu3O6l3X{DCRF}75%hs(Tj`<{$KMvW7 zqLcMCV%>U<^@)?p=6CH}uO21TEW;Pk35#&}3FZgS9zjA>6l1z4<0TQEOlAntZj8OH zxb*3HtlslD@f>5)i^uW!V~3biWgvq%AwD9AG099!E-&H~+`bi0GEaEnxl`D>aRthY zZo_Pm1t3$X&dWu9RxS=Qo<4h;apJ`jI7C0x*dmKris5jYQCnAslG0i%TfGq{UVH)1 za(s?G^%$bvkNZD}gl)yTAS~kOo^Isb$w5$PG@d&BGJP#E&*@{>LYoNolQo&M1L*1P zi!HnNGbX*nT=XE8r-tF;t?ZeJ19@D1#k!4nhO!ht^z9dpBPDp6`9=Y{^fJBWN%`5? z2kN2D9~*W&if2yJhtO7^XTI0g%G`NwaK4*nS(!ub4lQs}CIguh3l4KKX1HUSjGyGs z^4GzDB&*nbIW`s+h*`85M~)uj*qp}pO{-B;mJ5SH&kK2_*?=qO&LJu#6$hR=qH@cl zlzVbw?2I4vVZTtb7Kez@LPG+G4Rxw*jaU!!fs*1P)YjMGxtG7KaOkPiM{$(7 z1c5yL=r*-t&Wn2Lv`xW3FakS?1x^!#J^$2ZVuk_K(`Kg~(=d&8OIBtiFHT}uAg&{%Jy1ywKYgzwgOL|VEmSR{?XkC4Dh2) z0zr{6ICA`19Dn8z`?Lg0R&T)Z(~Kq5iDOSajuol#La&mr-(^zYRb z`RE>=BsSWo@`;ltj;nbaZ`tn8RrEf@3K+b{$0wMWh9Dq7+jv=oe0SqY@NUOEm(-!T z!=lmMNBsQg_$eG4gQI|y%$g>cn5e8=l8P`M*_IXt%V`=hNU?Dg$o8jl$i%=379hERpo+tN}KuNNAqPnmJQGr2N zoWk)5v|-K8RVc11M|aP}%ni1?<$Yh8DX7VCwhO}1I2!A6&}*E)E;?ato`&UuL$GuI zet1vlF;G(mPwNO1i8c(51>oS`qgWah;F=imMQm&|!IO-O)a*n;bOid^no*dSgZ$h) zl-4&QI4TxB1H*H`*DPE2iqr!Zf~LHRW@Ic`%Ail6pp?{s$fdE^@yJHpzL^KRR5mS% zp^k>X2zwqmgcOb;2+$)}K1#6p5)LemnAyPefy8<&LndV3$iZ`uAHjOIsieh!Wa2G0 zHbx5ZfB69GmoCL)TQ?#`KqK2*w|Y5wmi;uxm?U^Jp^srw5rC6)z)|7;N;jipVzDGS z44rLN)P8OCi16hT7LP|xd>?=Ir+w89Fghbg3B8etOL6qOe}?aU`?s+n)^EB0wR5IzV>_J3bw%Tn7@^+xVsUBA{^HJI{f(@J2FtOgsI&83N_W+b^;1h$j z8@J*A`HLUm_!GO~&o(q`oI{?WNM5rKfBO4>fbTr9hq^xpWlC0Nu;c2PbJ)9gH#Tq1 zP>H9os-qpn#>Mitn{kood{RfKn;sq(4xbg5F6D8&_F_d^Dio9ur^-@ur{cUlZHVxf z0M?7#++5@pl%c+FQo+0_;}CI>Og_5F8zCvH@F)NAukdHT^8zBtihI&fBl+fqu-J4w z^{qd__kZhmuqi%hc5E29$H#`@t@CEG#00~2;q3K7@=K;G9=r}jBdzQ#X}^8_sD})E z7V{XnF<_{x1Epm>*s%2pqRl8IB_|>_)QaN58!%2y6I?qnLXhU~?}sq=-g9$EYk~g~ z3`WJqW3;^)1VjC@&lqE=)dO|aUGyVPJhgiZ62k=ybJ*G0goJpu zA=2=oYDLobN8wo}ZgEjTYD{FSiC`~+vk{r(Qxkweeo22-J33o1E$H85@krC$tl;kOW=`tiI zXP~OA5z}Tn#=0xeG2Da1UHg!}Dns>4lqWDE8sQ-!3T%m`MbPi)J6lm9W0O;enwnOE zya)^q$k;5f+V>uk5)Ry|s)Qpl5_@)RL@0R|ed9N8idWmU&XZe=qT*p zz7^@QAuQ*Gw8iN-`SMAup`Vl}r|FUxg6OY)`%nK9{_JiAvcABL5Cj^fY$ z{LgXXk>%=9{8}-dgy_ZQtH_1JUgvc;7R8=4^hjFQ>7@b2S zY|tMgV@74yZW5`f&qCvK54lUvAuoSKEZd9~tCqvqR*Z@M0kUaDNlh&f8j01*7bAlB zW2~SXoknAipW}_O_vCcw#2Wf=3X7Qa9r}ulyGN-3w1(Nt~9P z&S#Ox-6t9cPyQDE+kgJ|*t$H95*C`vqg*lnDjy$DczHWvvlFLGS&&y=f?<;hK1}Lc z3+elEOVH5TkI>Li6cy&d#-v^LS$p>2TA0SWQQj;o8GT{sYQaEX2U0h#MM%uT4_LW< zo5qdkl@U`G1I~S#k4N`Bjos^4s`w`xcF0SbVq@h6RA$EJI#kz|W7z4Z_QxwCu8^D0 zM$aI$cJ!z>z)qQL7#bX5T#R@1dm-W(Pk*(yuw}RgCAl}%N|)|lf_{%FOq$G~YbO~{ zS#}!+KVR%Q^$an9>uDEHU)oa~ucxv)W%~diIw}H>KDqE;|7BI zxl!e_Xd2Psp*XyIClVw4S+@^@LxT{=q~FT1aS(f$=#TAFMr|Ktaefr2jy4z&91Jh| zE7g|M7lWNv)K+$4&&HJq_L)L@nm_#5hwS18nC>xnxyt?doa7`b}x(@Y~T>j=TZfL`T8tmw%AbC*b2W8f9%}0SiM+LuxfgGGB&4$ zqNk?=?c)|6^iek!LK0%J^T+{2NBgOmC zDyOnoXa_Fj%oy(H?S~+rz}cKiE@IhDYgS|1>J{qoeh-eltZKA5MwovR+Yujrc==NV zgmA?5gD4Z+S5%9EVFRY=7h9St=_5zb)X+k;kHm)6#Lx=H%uYvr7D-Ie;)Br zgcjJp>o8U)g{sH4MLy9n(FmNGkd_O)nRgw)u5~L|hI;E`M@({@+S^?-XHNwBd&9?j z9EJH}H}ZHa^KIhrNgBLSHg2PZiTE6>f8*wD*s*>!f@Huv!?69pQT)mGpF_9|ja0V~ zxkNsnL?h+I{F8GvO$`~xOZ`wPcWCVDNF%f zAl7f*h}Pm;$olj$3K*B_Te=XNo&bmF>^yR}HrtLyG#rJuC*i|5;N#=kXOQ&=5NCS( z>KF^<0C5qIg~X>|*N*L2A@PjZIXFZfEA@thj>ykp$3SZt%4%!i6P|$T>N;ZId=!+| zLl+*;cvT0Z_!vt!8k@_Zi%GMRY zRxXAYAc2yp(eOhfV#F9!jObAhH*VR4{#j7%Eg;U7W( z>`hxU;r!K18g?)$ONvmCd7Z(qm5$erPcPl3Lz||d`lvW%G)}5JXbTPkjG@mVYLf)>bhy`1jfdV@C;3dPmue~cqFxXq_|fO9UX_T>le!8 zO7>`y^V4j2+Q#$Z(C7AD<7PpgTqG;8A_=PHCa-Fb*UOz0`A9uZ8fZv(EFzdRTsij< zGBa~=`$j&hY8tV6^)f8VNKvH>Lu06}YQ|7^JDMx9k(qr96RcbI$tcP%gpJAk+yv+0 zNI1WDEI~*+t{*@+LAFnr`#j51KLUb72>85V9A?5pkUBm#PB7z-=&%?S*fbw4|H@>H zTuH}nGorMp9IwCr9xh$JhU?d^AX}Ei_xGqkIk$Z$otVt>3t)2}cmK68z{!fbFoH?B zL#gC0ZP2I^lj70eH>llZRk9@y)GJ?t1H}^^x@TB2R?3~=_+TvvE0Ws)0^v0coy&{- zuuxz1OgPdLv^SQjHfKmm^W!=g5fq4U*~3D9OnbpkmdAKn)k|;0DLAG>D8f+nk`lQ@ z_U`kdOoQljr%XorJx%u%4_%^(i$*Xg0g?eo@k7IB^Pq3Z_e>!6Ua++xpfzHl0T)%#uz|fAho3`Q61AE}>X{HT3WKU(+B_bCc!lRRVzv`bkIzI*$2tW1fW|CTwa$ZSPVTL)57Q`Mn)tn!EYEx|;}&IA=9 z2kno&bjsup^^J1q>K#EuRI;o8aw|jDggTTSI#d$Iz-2O^iplYZ@4bt27cb%3wVSw+ zna9MmAG&F)y8Ado0$z1N*hxUQcBS^V#(DgyUmP31Af&G**xs-Sk(7@b8yBy>emYpK z7MRUOc{1h!#1UbC=F880f!5B<}1RouI2;R&D;}+WDLbWP#I9OK!e)g|F#nme}aP{g<+_{;BzV>eA`sdpQk>Ah6E!xB9w9D3U1%NjaxUfQC!=F)U$^o zfPfH;v%x9qifO!$;9^PvQI&&f(qdV$YKesGvttU?#l`s9kAI3QSFUqxZsFFAJG7-v z%0U~OncS1|_j1yM%1s#8F^zv211I|Q69`XAL4bN$itr_zc_KPUN1K>pyp^O&OGZraG)!w~C-#!zzmC9}yUdb!*mP+e!jQo#wgck;s#>icZbI z;e(H=g#Ml~^H`;9IBq85CWGBuz2V;1%YhEY>H*6n%1R2RF(CfgmMQOZ- z^?G|Uz9u6hEfJXn;#R8%nwz^YLBPLl_eS~+7MtgXMI*J(EK_n`ChcWo?_s2*PzQKO zV)#rE^~Da`!DG+#v3|ek~J4)lFEsGy@TVGU@37ANscu+Rj}gqDWQ6 z{yGh#YR|B@KlljeFJ8vAtJjg0c?)AhJuKt8k{GbBw*|qmaqtS&bXr|6y1|cNV8M?v z^nSEU*&9)`MI`P`P~%H%EGuZlYpNPu)!v_2j%DN}OEJWdqT@=hRXieQk=6119v%}3 z-ynbFX5Yab+VPz`1-Qh|qkDHFJt;`}00Bk?#kJ__=tXaH4f3;YpsBMNQ#fnS8Y zRLt!Iq^DXuQ<&R2j{4e4y!X*ZID7sa$L1>W;|=t-cgp;%;(NArYLn~7-JDCJJ5Ifh zX5uOL=B0jS7*sBAg!}stUz#z&^UB_N&R3qs(B6co@$c=AZI2#6Mv`p9;pUa+146NE z&01_79$5CV_H^Kvh0@Myr9voxAA;OVy z(tyI-H*w~J4{+h~6eTv$9c%Hvom}cd`cAXZCF{YZW zIz&ej^YW<3YNixpgvK|2)39Qqw7g5+2rK-`1cOY(NnbqeMzl56UV# z#aPnO)WYGFJWDv$J)9k5A2t3F7>3{f_kWJ1$+4)+xry_%jeq?6e?oRa z6=mqcK0^0?agqGida@{O5&AlNarxYN+|JCxovdP1G!dfvfb#5=(Ae%R&P&^{%b!m}gN}rp93~j>9jExeo_W zla|B=*O-v4yE*+bWVJbAAYKm&4pqKyM%G?V#M4L13tpI}vQhSf`Xe`Y-L@<-%gYtJ zM7O+kfryGuQLE@~Ub&1rl-1>ncW8D#*t&TG0%#W+X}uC#5T7Da*8IrlA-3l&KEns& zqoZnJlsoH~k0snR_n0O}$+P7N2zT2VOPArrmtG=UQ^=-2ef{-c;zvLI5o%jIB-*%Y z5RIdL5ktz@Dml15Zc=WNWA96hq~i&dyLtE_Bsv*~_w9tg7d_V0BwCy6p|?!n^6h+F zapAe5>PGs}a3qI^5#Kl&4+qp9o9}*f7N1@wj-jpQWZi<`@mCp?%5}Z2TF0{(#QDGkA8^vKDms(5xon` zXvG&GcGWkG&Myk)zGifHwZljb=MS!n6$z$PG!uiXMp7#iYdf8zX&Qk(a_`N}va3Z= zOoA4|>iMW03Hg3|mGOJqQ9ZcgdEWtoeF9O0N)Jq9^ znj5Gv<^}p7jOtNebR9o^_dKGOKZ?DNJ&8ky4`Kf!yAZDPV&}EJggm|Ft4T66Bg9B)T4W1jNo7x4HSYDc=K>&3nwbVn#%a1al_Yw~0`UH8Cb*P^|9Clek z@480NsKW>SJ;PYMI90G8N$S)B5PjT&B!`u3$lv&2Gjj6tP~Fl63j^saIoLlh$ijin zzW#eB6kWBrzskAU_NFmv)Wa*#N1L4BN%a;gcX^U}k%85Q$x$03sqV9h6ZDKz!l~@;r%_s%kv`*e)i5TM(+FeT@!duwxvHQ`V@`ZnB#v&{A83 znwl!;8AO%Dg;9>9cR(Nl*}<_sf^W4va}Lev%V(0#n|#GENH)U}7^Q4)%Zf$xxel~a zcViM_5X_uFtBLJQTQEw%nN?7Rj_!duEzExNn6@%k=p#5QLStt?tc(`3L4tp&D`Amg zXzS{kKQ^->Cb^Gl2i3m}VEUm+IE)0fP8~(zg@CXCCKGzs_;H*ZRtz%mn;cVcsAE&; z?d%~~zs}^!irxDU;qYUJap=HqwGXO4n-Qn2k^E&JX%~e#chr^$GkYH0MN?J-+Uko@ zRawTMB0F8WhzUkqavIEr33NBr&9Os`G&u-ko@z0qRRHTu0}Rd#hVm$zH-orLO3%u{ zP5f=Mpp>$Wj!i~NS{kN}W@KKwfaIhA6clFTI+L=p;sWNTddS<&iyQh07J`u)6M>gU0rXu$A< zcE|F5r1jVRaTEJLuUpy=ndFsoYS>8gijdsL-G3#f$q(fZm?&AzlZ~-2{&*x zvjoQA<#^)B<5&~xOADkA9BD@llZI)tAF9e~=#OtPc`HRgTsle$O3~RksH}CSjTw?7 zcOSi*!@G&+Mc1Q32}ET-R2NZMr!00@IF4bl;qWAQ@C^zTRfTzI%%yNDrBZ*@N5*D+ z2tWE?{}Wv{Z;s7T?B&=TKCl;Sm!@)Tv=u9E(s}yAXUZEkv!qU1)X+SuJaFQLsL%*Z znXH(S1Vo(^^1|pOe|!3=7e1*8_!--nPxl_>5!ys~VGhdc8fR8&-b3=%;r@0M6&9d* zaNOk|HIld`914)P__b3gl;S&t$%voMU#-60e~3Qj@L?Q0@&ukd^%DN*PyZB45<^uIskLH*!!(52 z+G>>5wZb5i9rut(&!9H5mlp+$nY3}5Pr{{}f02y|&7H&OH-;f4bvYs#ze1uz5D*`Q zO}lnbPKOm=j}v44?ss3t?|<(Vb+btjtWR`2Hf`RHrrZMb&_5J++u;`;jddB}T4Jkg zb5<&|@^XI@U*!#NA3uae1f#3FP2DVUb0?&}b4+~wBA{ceNlss_wzN5X_%R$H_Iv8s zN&J_;_&sb`7B2S;k)Dx)Lh@r0C%h0jjhJZ4#hH)Jq1lpvEeD=a{X4X84;IBoz=z4H z*1i`aB4RKxFbey)n!1}$ssnR5brVm}R^58(I`R-S{{pFb_B#0``sKt3{gf?0hcU*h z;ybr+>1G**9TE8bAAB3hp>pe#anwA5>zP*(!~T@zWU7}++$ryZ$?gGtXDwq;qw?jN zZn#?#ET|4VG^yf@r@sEd@Z}gfjaDuBkTb+T7Nddw%88Ju1P(J3@30sc$Hs`s>e3 z6X&Umwt*Uadhr7C+pSo?=Sgjx5A4I5)Oh%L&1I0^y{H$Ac_y=EM#J3X#d9ISA#e~! z$Rofr!~j2H_2RiuL@^oDo-TM;vM~k>p`or4MOE}ycD6XTBZ_!z%7V7)QWTYxW0;Bi zy>WByMe^PCn}4(kLn~@(8}ZRcHz>Ct>^XQyRPT6Z&635vd&+Yk~x@Qj0 zX+zO$yi~_IhG7xWbj6GZbgr7N^J1q1h6w|6PJe_%CK0BDs1-<)qy5b97M7c`dH$~> zt{0GEoA)G!o7e)AZGZZYJtvRj2mki>aP07IEQyOk-qmZkb|VL4GH$Buy;slWUiRI# zxO1w}PTY~4DyspW0g*WN{A0vBu9ePns+q;@@IWDemMLZziSyOIxc;3hRr&6~@W3Eq!^6>7QA~Zig-;7x zkh*aXzIW;%LX{25Jc8?bUUGMTRISv#K<3V#zF}CiX(tXII!HTxoN?(WPMkb}zxs>c zL0q`#I(;N<$Y!A5bDIr)($Lr>CWym79|`|+`*Sb(dix;UFM#7GdrG@TOk_=LH_|(X zl$1nC=Sg>u=Dp;w5m!wxS2a=>>T-yEGm%@|P5XEZTX(LYozIbs%)OZP$YWfRCvVK` z>w`I+OCl#9_=iSfaY7QRvvXh})^yv1=%}hoX3o@T5ZgE`vyaeIzoz*^a(X*`)U>xu zhkL*`ED<~QKZ`&8qd&q6Pwv8_OLeHv{|LW0a{;nbs)nMZZ5@1QkG4s(tVx<7H~*T6 z$glhKNR)(z7?qrWh{O!+J+xn8r-O%&;_+jz;CH|GJv_5*5kegX^p;%4nNO}jAGA(A zqON@V-d%`StL$eRcQ>i^Tx(Z+%IvTEax;g-_=6CYxE!aCKaW5Cz2C)={o4>7>VuEY ze}tUk3hDiWh>n~=XxqjOhzxKb`}SpYbapbZYLjX*htknC3@4LoA76iDBt@dM@D^+i zyXuO?YE^G*G#RInmbygMD8}U_cUjc%vW(e^tUEc#Dy_!mM-Cz(Dg=#1ne415e(#U} z2+tgQ22VeG5=Wo=HeP)BJMfjao0gS97oCB%YgfbCQ;L%8+e}1e6U(8IF-#g|^HVFD z>&wyK*N+{$_F>14?O4BI3yc;k^_3iQWaS#qz!;<^C&JQOiK@b4Sm@lv;Ti2Vwc)i{ z_7zg15|Oj#^CZt9l0#GiDWN8R6tG}mQ3|VFEAc>NBA@%wtS^>f_drKSH%tZt{@@4S z!O3Ty!n3Dd!12@1Vb!|D1YNAxI)vitYIwvh#q;0#E?zwTG@d?o8Yf@)9)9PgljtbV zLT6XEwt1$stoZWbcLMKeEy0l^^3h04LP`_3c*x$1bSNB4DJOgH$n5jdm7AfPHlQLq z4<1hGjTh{+_N?4O1jj^T$+|Ui3l_;qvGAWBK|#*-nY{s}_`r}J26!EvT{Wxjf4fO$&4Gh`AA&00*MKU z>JEhLyCli8H~`U@DTh_PRkXEz1RFQ)hBx&{j!TWUiDBIQ@J;;f-~DeC)epK7VmTX# z)WsXHJS74C_EBuwxCGwb1mY$mZePEN@Q`S%U%PHjhn*PiZO4bd_-Fj|=RZY5pFwko z(g=0LFDL@*R;Hmm|1z4I8st3=>QXVAHYV9xHFAX;s>%Lf!Ur`%f&u?3&G8My`kjv; zBrFWcF;U2fk5(&|dzpA%yqb*@$B!a{I88Mxb=fCTR$7ez^4I@_k3YT5cHQaqoDV@$ zM@<3#*Z=-|ynX%#^a5Vp#v?rV2VuwFy|{VxQ}t-PoRKSy)YE~XG3_yZ0y$4#9sLiJ zD#Ijs1{!N?aqCtdM)U;x)AIU%Z*1JK6gAa(m^2y4ofo<|)>rRjF|n{y>XI-LQ1lO@ zxvCjSamm>A=p$IaakE-+Yqbdg7Jl9RBhf#M_q5_~|MEZKAO7_Xwb8NY&3u~1SbrDJ zzW%TH*B}22YP-i>W5ZfJ{Ef za7y+vIr##fVO(0bUVHJ3hto{|REe>%2|RK9MVxx>ImWN!c;dth_`x6iDH0+=kbCVC zs}sG^>XlYB9<;s*mP`u7kXrMp*oKxs-mp932u~!>!@<$uozHu!^d)iT5UaowG+#|Oz?tYq8 zj)X<4u{2%YuseSe(oOCo^UhEj|m;d$G`1^nPrF!9pTQYOWX)zK&Ud7-1^S^!0(= z#zton3?UtY!;@h*8Zg*i4LkKx&War5qj4ys=^A18U1_%OE~n%LU&JTGV8gOF3=aqk03Y3UN}{X~lR~ zJ$~`(&+x-H{c?o;_H*Dva*{MXXf3SWO6 zo;!IAFP=P39DNGUfBUzvNgm~98IS2n^mH|&ps^L-`{O^u=@ZZ5*=J7Dr+pVMKmP*K zi1}+v^VKHp$}>%C3F{QcLE@#4!hf2UD}N^694ySQ4*Di}Oi%hF{gW~T+6YfxiWuUx z-jWEJviD9XCHd12Yq4xn86)Mqf2Dp}o4nsP9En zL%rJ5ZTBPlv32WCghoWG8^<1ZFTD@&M^s!4DhdkZ;SSMAH^=Jn)MZODFf!DS#+o{H zMsC{58!IaY%*a@>9O;ovs@1rV<__5Xh%v_8-;7`U_@D8QKYI_I{r7HJB+BMAjN|Mt ze~iERAOC>5&PmrxPaZ(-^Xf09PQTdQ+JV029xPk596P9^Yd379y3t>$3|R_l7NqP; z{Je~f=oYVakBvONqrQo~yJV*wlT|A6ubkEL!S!4BV63AB?Tq_sUY0)1Vz47KCtvNa zos^LdkHA=@CMCeqQ^B071SVnt(O=opV~9DGi8kT@jX-k0Y6Y>efd~qiA~xw&5l=hr zCiaoOd1QD1R^r(;TQ=g+M|R^l^P%|IDCVCX3L~nu@3J|V;??CeE7mQN1=n+k-p2(lG=Ai}_L*jG;YF|Y$ex|BT8#(~kHmKIdpmbv zCv%u>n`JNgGq@q#c!*>aSkV z7#zerJm)5Q2S-((q;G>Gl2getFe%Lg{Nt+j9+L27F%F@=t_ohdVC;G10Jd-4f#|pd zs<+7Repfz-9E`hqA` z=I}E~zud{(aP6w)_}n0Ft;b1#1vk3Dq+kun*t%DOLbz&zN6zxq%A5&z}C{}Yv73X%aJP>y;!m(a}lxMj#|O41;Yo zm>ANl9!Z<3b;0B_eUvL)0-r}buM5QH9lPOSHX$iI0K2wt#dhk#)}4%Fn>Ha`lz&-mZqF@v5ah+{$PP;6#5Eu876!bz%eJjEPL$Gt( zHtgKJ124XK8di%9U0pr!`b(0#d-sx@@kFF-if$f9Np&TLnedGc52J;KSKr)?(QyaT zQxny4a$oxx3i58iNntd#wWGGS3MJ)@h)Z3Eryk#n053C&^2$+P-3%X{7drd8QC?n+ zth`eAN5rcaLZpXz;TJ#pAwpN|#Ph%XoO+>xyx}aKAK7cEzPuQN!-Lp<_-RCk1;8;> zg_@dLbdK6F(AR~Q#zu7Z4id;mB0Sg=6JtZjFQ`PQe=w8q0kjYhRLN$O)eYG2_$jPe znFfDv9RfWa(6^PLsj(U(RJN9mcC>WM8&0PY8XArOoeAxArO2wAz^;SGkrKu+C!nUr z;3q%(CH9~GJ~HIZdgDxpvu>cKwhje_Rj8_ILNFD3U`Y1lV_gxU@Fq|%yK)hOKG9gU z=O7p_A=@N&HFn_RPd>pbfA|9iPzHQ`H}2$SBWBrJOc98gCPy(aGKh};L4^1+d>h*E z{*@B=r0&7VlTR}VOIC79h>t;1h!@Vk`#M6>R$=*uH82fyBj?tqXs)lvot$#C^_vhu zLmHq=0t12&!UWJb-jAA!EL4@1sJ#oyT87}E3xQ#<7dpx)EJPmv3x{=l2o-tPl@hiz zx1h1Q9@&Kz*mCG`?B25pe#~+Le5Nrq-j0&0YIN$SFgP%P*2Xqv#Gz5qbV52ftQHiP zmcwKsaI~Az)>Mo3sxsWntAs6hF-{&otoEStl3SSstBq|vsI9E0{2Vm+76L~jbWE6K zADhr1FO*i*Vq|axV?zUIZfQkkb~*GmAN=6=et=kc)Z9kC%bSr|Ie_P%JC4{86^a)i z_Kdomci}_)@{?Rl1tj3{gL@DW?58Y3pu9Y4D@%^^ic2xf@i6H}(a_Y6wk`vFe0&fW z&B_@>t1_>mv~3tRzeo%dG}M%qA-kd;yAB`4RwmWK-k#`bsKWanyp2U`SEH??4UP5H z$SbNuP-F%H%`=qM?BizE(Jov*^Exi%H)F+~<5<5W68;Ko$jXW!1bf<0U3>=)jzKHI zQB_%$dW2Lqh~K(xr7TEdT@qBC=x?vXg?E3Bh9L`iX0qI@+3%lUs<|ni>YyJakdd10y0a#J?W8AVh?Qs%84} z5~D#TFs2DT+86*YU(7+j-ihD+!+)b*-Xr&TJY{0XHi`n;>rY?1g!sfn>_50u?R}w* zlS>MYaWt`yKl=0ACGsiBfubFe!{;kS%7C+s-=8wCh&1cFNLq55&Yp zaQN)74)>s_z6-4nFiJfim33}tq4`|3G@Re!W{oU5sKo!z6* zcaP)fk;jmb6bRk3Eg3r~Tc&QwOU@H2%#WejiD3VYqnV3W}>5@#lZ}mspgRLVp~GxcCUfN9a)B zGKjk7F+8$uEmBis;51L8DK8)Ti4pYahtb&7jP|a6m~BqP6Fhr4>HEsc&@yI$&d&#( zEw!kts6%c}EgU*u9DU)Kntb#3qg{+nqKf>R7;^@Dd)Q7F{hwVuq7|U?Mo)7mJ~?+D z&k_8G5NP@`tXn3{c>k^Q>IF2**DZlvR%$j@q9{Kb)fH99Dru^mzbEZNk*!YRRR(d642GufSfy5@eFO+X0ssY=4~`o*Dy{L zp|Y_7;c>AT9AupE(lNFL!Zy*%_*kjl+%GHUs_09Dg9ucHm@LpfMaO-}dt^iuuHU$$ zwn-Qr=to_33w6H<{^9X>`q&Y~1QT5O>o7fTM{9O2hQ|ibZ_uNOW7E?!2*2PEB|lI5 zBq}Q_=nsr6?@!yQWvs44PF9tI)z7|iioRWT4kK)uHetA<0;P4W7`6DIr?(G1{X?uL z1VL;oI@A{ft#v4^Y-H^6KznmF$|?W6s&+ha;xsmGSPmcCBpS-|QC!o8y-$A&36VPG zf5blt&08J9=sDU^iy!{eKcacu7bjkN3E^}abK#kgld+`u_659p<|@LMJ%YWvRw9rE zJf;n5!ttY<)ks>t9oyEWBEZ)Z;eH-?>&@3t+R%+BpFM%)OOs(TOd>ZwAHMXZRwndq z^%bZnEkaS_C}NgBf_+;TL9In0$doCOs)iP{wbWDhC(+f>K_94BdjbV`*)cNEiCed` zP~XtVL@yJ)J^f5DqtV+p1mD0=1O~eHWAipa=N!cC@*dnM8N}}0d+^Bi6$qoS<)Ilb zodZ5hn6vYWneY+o^mL-0<5$<(sglLmm}t4LrpbafHr7&#pHl{PU*-nDYDM3N?!X4Rv+I1chj!58Zj-DE95$ zs`ifx_xHqDR}0EY8_0(fos3@%^h32xtq7xD1_j9edUSA}I(h#P+PX_|^Y$GC1}|cK z7Yygmvyz_KDCY)LrrpFfDeCbfFApc?+9_l&<)J>U| z#ul`9_G5|}e;j>-m(HJfY6`toIq0Kb?`Lw}Oj$KHHDQ4MAvih?{*2MdNr@P3X+m9b zE=*1jboBJ1xuKPH4X6Y!DV|uzI7q#1$5@{Qz8+RIx3RBQr_3TfYwGAjn-#2EA=188b#VO$PDu)h_T=-acJ&G@%W z+?J*%B2i)OM8t;%qOY3cO?_Rm^B}xzdeoNOBt9%dZf+53yT+jl2xpvXM{qzO6DW?O z(}vs|H&I_%iJGbkT+J>-XnYz*M)jDOup&M-k+I7k4&v+f;v$R>_hEoozJa(czn}t( z)@;G<1A7ogoE1pD^DqpdIP(%cu{YX!d(qG;_;C#W^nnq~VyA3lsBi2M3 z4w#J-)TwG@7S&+H8H$5P4q-)l4COwC>by)8c3H9e$VsGzd$|170^~Ho+*yks{qlV@ zjQZffzDJP~8+=z;MtXfe-u(HGakIJ?k39K279|Birduqz=Q-Q#$NRD-IPgjZUO2TYwl!aVM1<_`1n%)Mi3Mh zg+TuR#+yjS-ZnIql&}kS#*rpeGfq|3wc)X6Ph;KsMeyb|0bWjwwKAt_s!?y;ZeiW6 zUGlV!yd*9h{<60w@lb%T5mf~@(LEK2^}BaM_R74X<8HoLW6``5}bA~vE=eDBpa;o%#=I-W#isJ#9DAxTY0QU7`e zP+HbR+4(RJDo0sK9h!+RLlUB~@96^wX6{^Z>oP_>!?9}jVMGNmcP39&HD#!-t;XTU zjuO8`(AN7iuX3iKL)7r*^&q(%`NTE^(7URAMcEpfHndUqJD zD9bNFuYL#zPdtSH`n=A%V)PiS@QY4Sx@e$%wf1yjl* z)-!?4JNIJUk_19O$U1L`Q8cWx+9FuG>Z&P+i}H^*at=_m;J4 zzhv#UpQqZ(x~QZ8cEbc^Hh`9H;t$^-#Dw@^iaz~q=C?6vE3kX>T7=BqGfO76MF6se zLtrHRM-{q>U#!Gf;y)Uh2go9|z%cp`26dyJIciB8!-+TLNxN-rMj_){PEi@Q96F9= zYnG$3I3KNb6?o-^7tr24jJMC-z_(5vMXdJ(E@qX(BX%7Q?Ov_sQZ(7$hlIB&D@wAn z(AD0Fy83!zg+{b?3}R&50|)o+(5xZR2MJ0C?xatwi> z4*^sF79~btaC96MjU9?N*)M1L(v>Qh`gqtlc;lFGhGWy_9cq$K-5IxA(A3_8?T4OX zLL3gq=rAsx`vlc=wIoWdRuhNr zG3_K6^oE{&^@&Qxn)Pci+SP`OAAN`}ChsN(6Ue}DIwSUHd{XUyC2trUCzveCy^ZqX zN{ksCYJ+04*@A)YKFT1TU@`$ZogY>#PG#M_D66VLdq)oj2ZphG-vJza;)vQHTy~?9 z$5}IyqhPRkFsW=qAKUHg>c!%vOOUZR9e#8i0Zd9`2vA#SB)u|rOtwUyqT&}JFe;4+ z(>iSvX(kCi1dt&i(HLPO(?O%@?CK<_C+JUGj0ig7h^TP&SWqn!aM@q2y|atXKNR2p zy+2_8VksMOpf-YT8v$q_m7hSL9$-NO62qZG^RnBacLZbWo&(sqaV3IunzK{a(BP9& z(&*%47?|?u(EHHAq{Z7e5ZkwIK!6_|nrp%^v$h+f&OnUNaSru%VfUfOv1k82M9JPR zY^$4z(mU^ch`{(*boBO6-aSZ2OvCdpf13$V5Oq&$Q$F4d1Ooj1h^=Nm+ya@2ErU^u4tigso2N4tC zsnx~tlT||TF`-CLOGHt1Gab<&!DKi3`v>sYlaDdT5YRKZkvH*OzIF>eOtxf|Qb0%; z^b-W3dLv@vQ;`%$y$lFIdU7gy2^i}d>*JykQ-pLITx_XC3`tenman z>8qj>lknJq$1!cPW5@O_Oa{XU)PfMsJ~s9ApmksX6Z8|S*Q`ej!O4LV`DQ-A}o11HtlGchiS&)=(#sF22@zP5 z8jo`Nz1EI?bdCaGJu??O^i0)e6rA_)i*qaz8r3MunWWfMUmk$B>nV_310JkciP zZf@I{83SVz=e7|x00V=cvB8CF8P&p>aCLcfi8~mI%K5N zz9NFKW6u`&d3wM2wO?ff0ZfBxzcoNgzp)_w#4D}7d*H=e-NkR}KvwnOG4(Bws zZr#NMST^@{QV*xq9!UrG?LuUvmwMw}QDrCEm_!dT$>{88rZ1{NRzWoeCOol%V=~a$ zjF=T$uwv5&HQA#9X3C8IBG}It?d{Fjb6_8ARx3XJ=q!CzD<&;w1TZ#COinPlo4FP2b=!R%|btJi>T27R>4k3PPj@X*xM6vvPxafYyT9i z9^p80a5t8uM5u`%^}-7JblDytG&EYh9IT7+ua*9|qi2|U?2A<^m(r)XWnqUmandk- zX#A3O*tc~ZBIPdnTp~tLNrlycNv|+sv{OhR5Rr9!vi?9;OUlib35y9G^bzu8i6mlA zpLibInAAsxc*C1~$;AEgj0`4vdV=gWbkN7QGmf{?->zD<2GJ3rISLQDh_!~N-pIu#ZZY|&2a>hRQI@$!`j^zk7E z3Sc68dImLp?PI(2s%Q%o)fEWk1*^U2ZLzF>BX4P#?t zNL#rC>BL1|HWPBMUqe}0CAtPiiSgoKx7!$B`!P+vHm+YyejRLQ5<^BiHtalrs4&_t z>ynoMnv6!;;9{huCSi;*yCAa|-Nbpl!z0>cHf4}?OZH&w!) zr(avMERFsRD5$KZ&mExrdsKqHmbN7~E_6(G!)Om9lv8_a8+wVUmoHmPzn_Z8jOAFd zW-X%p34Si2hEVSQ=-)tbVcd2RjiPq}@E3>wjix3O=;KahngqQ&%gz86F_3X6Mp0 zE3xX+BLC3ZRKT4TOaoC+e2*YkFH@OqSE9=9E)Ke>!po^VbhLB zu_7^+$+{g)eZ(LAeK0fM*s)^|bbcY&xM>qIGUAoaTs-qJ8mQ;pgF^_6j8RV}^!AJ} z&Us_~+T|Q+E1K&XaOe6hv@`#anJh1zj}fbqDRnU-p*s^k%#dR5?}uh+gga*2y|;b}Pq-ITiJVDn@;io9`C2>i>gFxmdel zE0UN;&7wIcp{P^&w(7ci7Q@!x>r{5tS zmdUi<{z2^AvyD14g+b<2MU_>|sVwO0A5s&fElrIorxKr>!hA2t*Hhhi3rSjvMf71I z>c-SGM)f1~Qy$n!nFlZr3Jvljwp)dga$;KX#U0GgC<~)`3QLzPrcN=A1P3rD@+3Cw zLmmB82;<7GNA?oqgi^28603z#*Ik{xi)7WDw(^i4Viq0q*%aohPA`AjP6v9JgZI#O zQfPnjhE&NJol_3tydiAdycwx-Q{R(=JZ;DDuo+%~$=I`P9YUA~M21I0KQckPX{BBD zv)%+OU9k*7iHopm{d&X)(VA)hZT;g|zma$*ow=V3!VJy*Zl}++tCxG^=M}@7vGh^e zjXcUIK5ia~YR|pYP4ZY6V?jzv26GD9CjFb?R@!Ggj*Ysn0_|eWc?$``Z^ElcYt5DwBi_Q@v`Z$K&?JcOUt3h5)HWHFD5FH<< zate8(PUmAmYfB4q8;5X&ImyN)i8FRTj~3J!IY3Mu;*Sk$SHnb`tE-c|pYo(#c=`pZ z{7mdLDm)xQ+;(QZu7 zkl3^Ocr$iI662+*9JNjGiOQGSVWYUyInJShK5AaW=?I{m_o2I|3nrTpdv@J6~((fu%(r_n!Zz~lQFGXlNPKR$){#p|$XMT(m6yhn`lh=L|g3HJBL z<;&O5(9l5K*M*UB6HcCb8QV6k|mHHdvpSB!H2-0n%w7M90Qxpih;t z!#FvKF@yH_CUYdU@p^EmfMO|Q<#{sfY)e)bB~oJVy#OSL-h1z52Gbk9-??uF7=Qo?N^&;4>q9=w%zO9U zd+zDyzWeSq>a-+^x%@Pf7@CR2h!P-3Iv@fg!X2rOP&zl)KtJrdNQB44G6~ZXA2*HF02AOu z8i-Jz13&nyKf~eE=kXUmdW(r&l6nQqH`IfJ@BIQNZ`R?J@BWioUn)i)!DLj~TR)Ik zY{S&o2bV621|b$c2L1!RUBt(yUS*TlN`yz@Ivo<@W9j7e@VM*@0vQmg*8^oi&~P;< zM;T(W5X*o)0%0<*nX+IW9K?`S){YD%O+(m5qS}R4SSq~@kr@m=Cx*zIejZYX&^|6c zUM+sA@ze(=dD+)5#@@>^Oa|ojLj+^W z+jH}}JTcYeQ}}0OGEJi|awP_o_}Fc3^xHss(twlbg72*$xVhl7SF zxU3fRGWd}foaNs5_(Ud08gH~i%CR_ucAL2mj*ti#SeGm@F-C@{6T=?2nic9+i`$ah zbU105sj7b=ye?D!O@s1^kl-mV=u*O1Mg}NiV&f3apk5LQc^`eS-vk?FOT7p~xk{z& z(AJg=;HB9Rm(xcha77#K!TbCmplPBY0{rQ6#0L(hdpDgqEZM6Vg7~6px~b zwi|sc?N=yq3}c@?ratshCuA0fyqYX_JU%`RMh4*lq$DVi0k2s0H$Y~k-;^~QgCv@C z>WAzU%u~J)xUDwoyai6yFQ5-MP$!brlXSUnm{GA&H_MYJ7Il4t0x6 zsWM?|Q;Ni-I4Lgwd5FC8KmA}I<=HOwmgl1>r_r(W&-@W8`tEje-W#(w4tfxiUv)W{l`9a=CKNC*>M!#)!g~F?gC2vBt zm_g~7kN$>!hslYoJ1baAFqf2+$oRxZp4-%GXxi&-fnKqU*hkuB8F173z{SEp5(|m%9cE%4X^fz+)O1tPOla>B9VC52qaZWmCB_&7 z#9Q>&MLq2v@=_*gH%KV0EiX-@hjG||8a$H~4g`#}9WnG1q12ZVyp$iLUuKYMl~=OW zZ!rU*NJ*yMQnpBd_!2L>!%QFUU<{Cg*ceG*v};03t7%9*G?mS!RFA3e603yJucoE? z?T4bd2OiSAiwUof`Yu%%A{fLcCa7d2fY&g6O&{x%*CACGcv>GBjkpBrCwZ^w#4sEz zBb)9C&hpB*yc#Bfx+Wh^CW0Q~EIUDZ5E@S2&@Z`J_b~m53_8jKJ#8r(z)E{GNS=%S zh|P?bSdnp)x-Rjfl|Cyjl{%u?b9sGP*60e2&?AENhB3x7_xF-c@`|)XEkXlrUW1Wk zQj!=G2#p<94AAgKB&5;4YBDZ^F;?3U40<1Jng{>*KYoV7yjr~YotKfbWDa$l5<{DI zWZ%2^-uM3*ADz9RVv(>ABNAf+n-lqgc(|#XW|t2JCN?sw(KgsaJ0lyxX|bWa+A6QQ zB{NQmk{SN&V{c~{c|$)!yCkn93l9Z5BkfTV<0472q%%&^e2}?JSfLyRvd-?C^a`IDHOqT+k84>H)&&$!9KL|^1A+z^du`U8nCwRqJ9*N_PVE*+v0 z6SW+GHID~rP5DRapyVc2AL%QxEX!;96$FgaEwWalPmgHIuaOn1Se1PjU)J3{h$PCZ zPUlveh`4ldNMJ1L9qOk&@*y%IS>fTKt?IK`A+OHqsVMSR$IuYOwX`U!R}V*Vv-FET$fP%f2zvnDI^0XubD zX7-0sb`*~(Q}R$F@cOsVEL?9OtrFC1DzQ~Q@}-Y4jGc%{?jsw?4M}`!q|6%`y9ps~ zfqBaFI)>BVTN!W4YpT)muC&LFE+$I`))m9LR8Avkh|Z26ErLR+bDMGF zT0Z{#?|y)J%h%#NfBI)gk74ZUFLh@SK2w(X(M6dTzoQes8=XiS9|nh)c@uFDmzaTk zc7{d}PqE_~Zvyd~ybY)e6WL;JX}8l?%H|OAt^(s3=E?EQ{Z*{PvtGuV7RK1Jsgaa% zhcE}GPKeo;f$`WV7ffC4xc|BDVAzA)bC9rCGogJwyyeT=(Ylu=n$LF82QL}b*+m@k1cuV(mDmSq_R$z?@` zqZtcE%LE`75s!>TEUOpY7!RyYfyNn)p5j|f^lh>{nkFs0&VsU$O$>8!HIG4bfI0WT z;1KPoSP+rn2P-?X25LdmByrQE}rEe)bQ4jid!@ z@xV789LR(o1LiUFklJzy^BICIz%3iz}aLMmXcIJ|d zHRNH22-_IY>Jade?lOB@-XN8;0)_}9b(*$G$1u5eKm1>n|uCaY`G^Zw7Nv_7`1{34rNW_*`{ zUVeEEe)j1_8WRRTJU`gpgxCN1A8@Ir8^8OvKf{LkOn%gTw}*-GxqYAS47cf}5-Xmc zLbX%j)1+nm&8V+asOR4m1{(3sU;iPBz0>j3pL`DsQe~yVaX_1JY5y5~`RiXtZf6Z* zVvK6`iWsr~Ks}idZo~aClBEH86IAkc_x`3~r0o&e`MlT!_co}HCf;;eFLI~U=aO-^&)p$Fe-3f!++F^CGRIYvzZY>w6 zUtpqI?$rJl57MF>(&A@nHMmCtW9#^L@mm4(?*i^f=PA@V+P6E__P-gXhTlk+@6`7| zbp#2z|9#^U3*3%A{P`dJBf6Sx_^*Hc$H-YeMfVfu|Jd0G=S#hwMz*ZD01S=!B6owfBR2(=(*p;i?2Kn*;I8*iaVx_yCHy- z9fr?7NS$~UMb!=X@&0qnsiM`iiGj8z{Pdsx9=Xi}`1ara7#niprj%1n40nUq#GvuR z|Kl^(F;x(C|Lm-nffKJ9sJ(df5ORwv&|wF@_RX(j)yie+fu0&z`gepcM3x2Q0S`>* zX==p!yjq01Dj9ZSQBc{QnA1)ZL#jB<$n?}0COk_LhCj%TWl zNa`LbJwm$)u6+iWs!UD}8qRmB zXH@#M@{>Z)rC^D@l?4&FB2Lx{oN~Rlg8=RVqjj-n{+(R!#=|HsJfzAA@bX_B z1M2s!MMvtJ;L-?8+;@L}nPu%X>{_jQh7Glm9txFP8B?R4l4J)IEle2-iie>LbX|JNytLZCVxfqrOJ z2u<&F^XCO0Lc>Dg9I&AJWy3Yc^bxu-$0H462V3M*^g*(wk_&dJD=()gO?elhWep`@R0>|eB zt^I(k3w%a=CV=46s+C*%E8n$a){JB|8|NSX;g|T*!%rhAF@j~K%y{J!2z(DdHyJ&( z-;z^#E$t{QE=I4#hwADYl~Y}0PE}Fght0e8;GRvZ5vSLd-IyGL>_RZGZ4Jl3aXT1W z$H=V&4gaas@%dpqsV3>!B;e!yG9$VVCq6v{gE0=zz5E&$Eu5#`h#9pjpBJW%)o0SB zi9lu($c*gJF!XixqU7ccxE*$s6c;e=yMa>|uAtE#hR471B-SjPO6D8W@|Lecj3_CDZA` z@z}C$5AMDHUg&70M9I{wVxm2jW;Oie+ZTW%As{|qNS_tliC3Qi##Q+nXki4T?vZ+J zeYkk&19W;5u>QekkRBVNo~^VR?NPVy(^ z?w%8$1@j3&y?P|CCfacF>_vQf=mhA_O)!rB14F$Ky<2NB0pS{kzzEf_P zV&Tt%Rp1`^GIjn0wKP%z;g0u9;adx7Rgi~wh51;Rnk>>@$zk{6;Jf>9Hvc*-UN_@J zH(L+Io`;{qu01;t#Xv*7X5~LdJNeo1`#dmJJ-emc#N@s!n7D1mMyz`(I87D)_fq(F z5Wq}e6g#c|{vV$K0(8A&zs1L5i21D#-~G#<;LFc_6AKq*`rH13s3tj;wG*EnIgU@y z+{7?_xK8C%3D~y%KJ2+~r^=~BCqCPNArJqlJInIj_i3v7@p)i8R=00cX5o=f04D$N>7df+VbZ+k5(CYJpwCd)E59R?2{_!8bhpygUVjz3hd!UO>#Fu~Pzhc3h zbZukKk$VJ!@w|IJf8j7nz~3zR%;>7Qfp-sHL{VK2b<0D#c#)Pd3(tM+c}$-rYpGH; zhHaD?59~s@un@KP!f3ubHTX?a_1{C+rUajxcTNm+)#AXXCy<+8$=|nD#-3tW7m1f% z`4(0z&QVLXO$JjQFuWZBM=Kc1#xI)Oli_fy@Z5Ul-vg#Xqp4#gvs!4d)Y=2`{6g-P zTC~4`ul;ftwdG8oUZ!0+3($}$QsRZc50aXDFYgpO3L z_amXY8rs#a@eXxQ;3vW7uS7nyuR#|lZec>Wog8W)jy_Pg_FPQ*G1{(xc2R*{dE3Mgpomu4WODSs4}$TN#7CY62d!hG{t_mk zufP}Er?K>!GTa8g-wbGY3Y`K?t8eVPhDQJyeriS4x}~aicB?t>jCf_&69# zOMgW{g>OA`_b}QM_Q@gN$n_gP)!}qz?jt zAXB>@jHkxCKwqa8jB0fR*Ya6GiOOA86X=7qKh<0BULON(2z>s23QUb^Qc7%)&_(kl z8gIlW20&`yS@kLy&-%*+!$fU-KAUtaOje)ZGVxh~u~#%(E9WM{OZq4G$*M`?eeSJr zdm6~qKwbW83V-#5|M^7`UA}7Nv^&PO8~FU9;r6`IFp)#n(UWh#X?&&*f=w`dR*VAm zOmaUjE4}CCscXFkU$orc8Sc=R&kaHh88j4&;FpInP01NK#>;x3Pyj!H$#4|=BJsFd zp;Db0_3|17YLPSAwE$>k?kv-jUArB~ebOrd>HlcUN^RxWglz^&3EUaK5g((fyIq-4 zX}oiP5InVvq1o(&yJ1MFbuEH z2dBpaU04{x)#9$dss3pgi*$^q`K^4owReFt0-q1VTk8(M?c`x1xSR5m;jikDq((L$ zamvnGv1)Ul+dffKivrtV_vqmZkJJ{9=IJ|QGI`&p3KRC(=YZF3g4-ibFM{?x0-aCnVy>5OWTd@Ws?2qb9}`{2cp+dvvbsD=3bnDO}Hm@zdO(0Jk!TjUK>3-x>T z1{lIa)of0&qgqJng>0tdkOiw{Q?Iaa46~f{b!rQ*e{t0Qtf2MdH`)Fd-^W0W0+ZG% ziG+{1e1m%{WB(2>u=V>x~klo1&e2$AHF-(~ZR zFj)wlv6WoX^iaxZt&hba(EIV2O?F;@y6CgP=?;e-k%*Lao}_brxK$@)eJH@LHmd;3 z2TL>QDjVqebZXNoS!@56HyE_Lsn$-GiD97f?_tyK1X_GC>~Rn+J7c0)*$qR*Pys$q z**_38KKXxqA)xWk>+`_laUq1ZL>G3uCF3W9+sJ`u7;eUoYS#hTfQ)7CRR0&>&%1%! z^yS|LG+B4k2Ur-l8p6YrybFxdO>mr|%=kY85Pa8YPw%GBBGVqoK5s5p2=w}Jgicw> z#CMt1hC*H#X(RQ}MHtoV*|(pm7u<{6>lfGe@u00k=w-eq5B0)m^W>pj;F^jHeW7sJhoO%MlI`CdLcfLofDp8X zVL*BO+`kJ!kYZzwVZTY@pFphw>fBwx_@E*9`{*@kf!}2ihXO95EiK8v5o07@q@22a zbcTX1$g@f|T*L4Q9B$enk8cK{m=6LBnEz1_Hc{W=|!7ime!>gxEKCk(da_Patr z3TrovuS*5jLPbA%>VswcKY5poXbYYF_p85hRSvD3#sxY`{s^u=m^T6=JoMic_@;jG z!swj>((Y&t@=eMKNV!{Rat8<`aKU$}I+y@?OkMcs{r3@@ITw*JbnbFZD{?C^L1-?& ziBFE7S3BmVrzAkHwobYeK63p|W&9A_QT0QkgSI=CAN+D*xAdVn?<_8!zkr+hMd%*1 zBQA5g+WuYh1H6~|R9&2pE9cJO#?1;`y)q&wq@+W_*l(jNBdIPe3z9Vf4ya zkn?VMT{c*q)G}STqV3pH@=MD3Fx1zD3r9aed38PFvT_iqcCHSB@zoQiPMZLZ0>Sr; zKOZ!OH0jK?c|~Q6quksDde|Rva71o?p8aOhE6)#J*;T;&f9OH zvdx5?g>(HoBu-fSUBg(J6RFgCp-@!kH3`sm)E(RA`1bEkek~a4s=&qkD)b{BsfpU| z^8pz3aZ;bWhBKtr?0Nt2Ma0CXAR{@>-@f)&E3N$s+$%8N8s7Z}sIrIriiV9Fv+YDE93epl~Vf(3o z;1cY^rUIjVpZpH?$md}nEbxZvX|FUY$uXXXcS}EkyTdoo@?C<)nmfW|MPzs6?$$;e z`t)PPn}Vu(IHHn}o)oKisNENW-i`+3UObN6-0R58%}1y)0dc7*2&V#mQ92la5xSB7 zvZJrO&0#hT;=+|&wS#qHLOjo_5?VR+Z#>AEiLjdNynqANzDArne+~K7-AGAJKzx*J zXQ#nfotCpQ+t-WZ=gy+Fy&ZFA&Z52Gd9O$9j5rJ@svBBS(>93I)I^G!N3|?A&gb*O zO&<{kD_ZL+(9mr`WI`I;4wri1?021$!U#_O&#A!Vb}6_v&AvBRmE!D?PcXnFG%kHQ zA{bZ%{L;jAObmBXbc&!=*wx*R+#8p{a03ZRsh>dxrS!yin7RjW@zi;^83)FsB&mMi zfgN}3=Y;%{lXr*oBrthjCWin<0>SrAcK$X%9T;{Ep|GSHAD_5J9SuiDdXl2aDBWnk zS@qq>gA^TKeDvHzR|O`bv<#@&(9+b33+MBZnv#ad=-YdqQC;z(v8n*4j~_&FQ7Q7S z7a?WFTtwd~ukhoi^?CAAqu5St#qJI!>iZNx-6nc32Z8viZ~ki3hm_$LRg|C7C)KSn zxxK&aZ9!_#4vX2w^2}>gzBL}Crt!7ix%S@#s6G?!gw5@+3)B$+RaR){al&S^0Xl;k z0~}ii{R?9!kH^dNVJIyrgw0~2?8U?GkcVeF@_=$m`IZ>w@TVt{n3TkvH%jF(pJn36 zZ{h!3nAk@t$r~?zcLGsoCL10H%)LDrpa79TZofn9>9(rv-+6XK*>4A8K!YJ*1Ow-T zuWQ%jw+~*Y1=jv{n1_a7XG4-|3TS2IgcJ7uF7)?wqr0~s-ThWsDns2p-p~eqjXnEW zU~FGL3uxU9U?M-fXliZ7+xtGi^@5w)=?P$bpEQ^vK2j{G=&g0hMLI_Y*jDn{+|`Nx z-d^^IyGNnB2X3;O*y^W4R*|a?XViuUdnOLT&9mUsg;BUVB-_YFFeS2LVN38Ms ztii+-y&I5V`{aS+sHtg?xf4NnkHU!D@?#=>(9YeKYYHan!=3LM55&;tf@Od@LHTvM zHNDV=jJz;ehhVW;wW0!SU}D+@fE*YeKugVK>_2`It&9bPF9PH5lNS1VyYa!hZ{wY} z-=)OOO8YzAH|({;Zf-@tiMmYE1#HMLgBqcs-Rbtrp;q^# z)v3#Jw+)RoH8^wlJbK!D`FpY(hB46BgCG3!_wnnu-+`OOZinea{Rloc{`y!s6zpjO znb4bi#9#I(8#rjTfM#=Y9rMFPGCe5_J222vj1NEh0GEndmF=7yq|+W;yLuV_>!1D! z#iivzG`O|D0W5;CnTlKl%BWA7C1ver|NAgt^(wkc|Fk}aD0|h^)`9xk0_=b5U+Mq$ z!Ne;8xlq99)ns*|uC5XlR}bUYZ~PeNifWYYn<`8N@4E&cv9$H!&^y0EadnHbR{`GM z6%X$KpC5CnVUk)tH=3a^(+}0vR-xwl3A{z#oVrmChq^Xexy9i@V_PrkON;Q<5B?Tc zFJEFzF-oto*xd%-y9LU-*V2QFXHMW3Z-0tzlS64|p!TUG*1We$}J)QV?|3`S| z{rAz)(e1x58Tk0mgLgiB5BomZueIXy!b6*pclI<|yE{=_UWWWDxu~yeLvB$8diu@k zPKVuu;-af)Ypi1;ScbyFn`rCo$H#{*pl8rCLf0=60x}(}N)zf8&sA2G;pad75zd`E z535a^N%=*y}70OyD==JzsrHVygdB#zx)t+GHHVKjj!SJo7B%r0|qKy8>-4n@Ux%% zf_mNKKR*=+MZejJn)+tc7hJ>t`ino|_tq)r&A0O-Q_}@06F8=N&u0Mod5?)P4)gFp zerBT05A@M(cVVz^P{s2jF+KgKk9N#rV%*GvBj;}mQlmq~ksrPD5o*fn16`W}{AOwJ z8>8(w7-`4rgr%hthZ*~|^jTHxHx>g2?51W^7FVIU-h>-@*HKYffR^SC96o#wgF})( z%cK(>Tuukh9ep4B-hB)0LpJ5V1HAip>eg@Je=`KlFchQXH$3Wtf5zD%S(`4A*XJhr^r93CVGU_HjE@SHOgThnFNxSlN zhX-*PLv#764web#%}76`5b5r5qPw#lM?X4%in0onR8*jqrWnVVXHxFK*r_z()s9W5DJ{;tlhjcLWcpBX^Cr$+yoQ3ZY7|%2qN{fh zGp1$25H7E8OZPO&1bC-@`TH){1E5hzb5C+zX8lMpxmWNB>u<%c-h3O|o_ZeH)6zy} zN{r#B`o%h!kPOpVMkQrq>FTwZH9ZRk-IzIE1ZhOd^Ha4Igd0J7Ws28T#le`WJ2X(T zOj>vxICH!*1&dc;`^Gg0?`uS>*^5OxA4ghDU{e;B zBk!lrnuE<-c3|=B9K84LyLja5-$Y7U!pO`X4g7cs1n8;#^2}hega751hNyOolv78*wrOXz zzZz&LYFDKVf#B=r-WFUu_7TqAEJH}rOr$1gv&8HJW*pl8349SoM5m@e&zpqrTFX56 zKhU~XpHNOhBpWWBxrUG5K8gj4<|A`Pn!lV@O8YDL@Sv{BD`-pBZo;zVi`8y_+RA?# z7lQB6`k~P&0Ghl=d7kBWxu*T{-=n_q81}+$9YjreB~BeXjk1bL`lbqWvyXAfY2*?8 zr7BO(_+>BXZvN5g*C23K4y`>o1hf+Uu(jXB>7p*g%-N2$3sVruqDtFXhxSiE?h@H@ z)3=5oE_Eh0Y}|nC)Fi)t`GH?YW|*X*^F7kjm4?P_e_x)fctC1^nkw%xxQ@w_U!uj&a6LrQukw(nSv z)!7EzXy|}`{#LAAKwXn+M!=7cf>ByYTY|It)G*WzWq>tI#J$Qd0sk2uBX_HBTA(iV z@KgM#&j}25;}2f_HlpUQ$D(C(M`k=qc>%31{_^XhoKZFVdF=niLpT;}=`;+2e~;9q z4kbP6OyK$`$Xz1Sqw=f$4b&*WmYJ2y8zbr*`=i?THz;QXHw`}?ET_;XXxRG8&>kkG zq+$E^4cIU%2G?s_5Sp_I8xNzn$DoRS=aCqSii$Do|=h+K4uzlNB#K(mB`#3R-;-kU%-?xJF zpLk0gLSBeuDEcTF4)jp^rqOTM>p@Ix0+y`ah-}6h63>l|E2JzB)v(sO|BU~a4C84s zu4?y5+x)HC3zYH0h#k_d$gk0Q{HN6!g)t!nt?kXYQFsG0X3xW-Mava#x1(18wAfO< z`0L`20N(OfE)BN#;llCr$XU4SkWx|G-%Re^Kp(KVK3M z3aDQb(PBaf)F^Pf(h5T!qh+);JB6ommF=lE<#&KLEF0jB6jIlN?w7Nw4mmfTH}Xvm zE%I{H;>V_@WBr;Hn3bKTbbh2Jb(6HKwH;i8YL}qg7JtdCV(qF%W$n7g8x7=}oE^DS zeN(sdyVk1zjFMC8^sp86HLbXkTZKLM?m=3*c&M?U-LKWI^;aMSnb|Y3b^8u%Uo->n zy!{TgKlKvxid27W8-QE!8T}=7Yxo2xA~Tc4Pir$sR#fSBz0h%)L%W7i-)YdsHqlNiPUpWB7kp8wC7H6w?4*CM4?s(-4SAJh-| zBfd?*uXTP}Y2~C&9;$$LX1p)a%80!d(ENfN0;AW3A5x8G8&%Uphi z7B%B$K@B!+S%=7|2-X?On2a%&(T5KXd<;iaI%dzAg-D%0N52iKzlHyMAZWmF984ep zAq;+PeO>6ax-lFPsn%G~9J~mw8@1JC`1!BiL_?>ULB1YCy|u{8&4bGwi+MQ=2xL2) z05C|i_I0D*?uN$@To*w}AnRoEX6|c8e@`z4O-@)GE`&!$s)5H4uMIuj42Z53p|K9|u!U4+br)i_#=(pDCv{?1b0ai@yE=zz~bM zl(%UNjAQ8Zf(Os-4hKyAeTX!KqPL?B{evboTgDe|RD-5U-l+@l+F|0gF5+Z1vy91% z!9kugJ1MJrgt5_3Sk!FVKmG3CVbB(ipKin=FFjyfVroYRcD+_R_~^kMOi zN0G+tE#OQvfzXbjt{%Mc=398=8`^-OB)lWKDl=%9@b}0}Q{vN4dfEfBF1H7OIx+0A zqg!TD4Vuv{PpGJJQGOXzzyQW=Xa73d*|#AzJK65lkvBpw>WtY8Q)f3?YMJz$yMWBO zGm)5{3Y*CR3u$18GO}CRT0j!8IxYR^COrquR*w;!2UyN%NJy@~&9z=L8q`?r(?qNju@l!K;d)r~RyO23^DdsI+ftaLN zZKpCnzE;YLg^cj(jp`LOWd~(&-(f{tw;4KPG-PZ$L~MuChThH&#b?DEcNmQFdNb=A zrG?ld7kURQlwlVPddejUIhIz^2Jg2qm|-8o4Ya{*JW!^ip$4!2%lGl=*(*qzw*(7k zO-Brcdw}%+yTANv#7>)mnM)QSl0G6#_|i_k3{V$LHkiq0eN+_Nq?`*)hWgM&nJX!( zLOJbb&cYnpSA&vMn=gd;X*SvCW<6b`mD7#+OIIOh>2mf>@0WGjb1!LYvY>CsN8c8B z_@(&_tA%tPw6Pw_mb^bHx+=0xztu~;l&q6{QLIlJe5Ad!TX5~dMJD|x(9~;DzQ*OI z&U<}GicdsJQi7tJ@{_%t=VEFk^Z)4D9R( zcZi>dcGG4Zz;LJ?0|SHT>m%*y$9?)pP3Dz-k@~#!OPv_#p}bNy@ zwwL3?&00jv-+~SEGsg}XQnr0A+AG@7-hm;-8yl;RjL_C+)j)9fVA$ovKpS!I7e7H? zCq*oXKMz^#b=BbL^>T#I*@#Vx zXDHr`V&?y5?ZL?dA0l!7dNp7eLt~}v6+zh{dRd3njE>G8$~xmfk##1Vnyd?mJV-e= z!!g0nC(8KGx?pna1qiH zXk;`N zl3uHq<-ous*kE7^Y3)aKSvfj8d(mO>p^-Ly+qRupzjls_t(5HD2K*cu`MfQN9U^a> zhi;i}-#M z{i1q9#iva`@L81qHhF04DX&9}8~W(Khlc5+se=-?D)9SnQx~sGoXj$V7LSVkHQxA( zQ3q|NP8Ea8n=q9PO}JiKg`ByIuz1m8h1)HxngCRM<&TSc85i~W{iVYvai$jg4RrM3 z!ilq3xO^^UAxXvWCfY}L2(Qx@NIQX?#OHFT8Hpy*XZkps+e5$1B%bw1Ij_qLbH9oB z%bL?>#xe92ENi0E2{Y2hu+W|H4p2cL`1^QZ97nzA>gs3vdKFKJ9AcQf7-HMDVfsz- z#zQ_?tr&DM5A%31)K0x1O-D|*hA~SCK(@=@Cc}?RD`^`AAueBZK0m)JOP*T~9>m^Ot zbKf3hq{XRvf+2wG2+nG-k-z*h6Ee__U%c@aw)qDP#lMa4PV!0*aT**%H)+vB8#Blp zQesu{(V8uGGgmY-o-)w?^fKOVrM>pRMjeTy?~=My+w#zXKD@iTi~2>stPaX4;|p2q zTKtK~>3Epn)SbagS!k`s2gfgB-?eg;+lCQOi7~}arDtX#n(}=0At2`e*1L`h+%UK;_S!o+I*R{|bd=wfKWS`7R!NFeLZYu|ht+cvMnj2PfYKYJaXl!aKjY%U_% zVkr9npB=?F%W>fSlML3wkv1cfMl&ceQ^aAyKG=tUd;LSyH8ELVwonD*iexI7b*mW@ zbrt0}ckUu?mXxEsstH5(5VdxlBqkwlEAo#XKxxYWVyDkV6eBkst8S@o!W+MM6U#QP zr6Xq2L4(jug}-w79IoZ&p`x;u&Zr6%t=(9&bSc&>TA%`G3!PQ|)jZ@~%|%&xCGrc3 z>D=^4Oif1wld=)~>2Tb=t@z20Uq@A+5Azn!QwxueXSc`Fiz`R>ts|Kf~p`Qhek0e;?0$^%-p4ycz4)tVX0khZut$Vb*$_&aa0hb}6=Rn2#8_ zb=Zbsb00b^aW!fgZ`upF&-urg2 zzBO1pKZk*V14&H4#mT!JHdK}s4ZO%fZ%-#~T)v9S z*KVMsq6QT;t%y%dM{;5`%g}-Jx8vt;zKJ;-x1oc{(v8a(Rk_YVCuUB|gi(SSCX~I6 zRk(QZGIFmMc1rSq2P${k9q~OdT08q~7}&U|US6 zDk;UOQ)d_~RiTstc6WC_V&h`zP?>}(`I14}0W{T=l9~qUgbOn=MMkw` z!tJu7y1W!8j-ODxDJrg^0~v=eT;Q}hEYEV{FPbcm|N>aQUqzq+{+IsCAPF>7Hw<{dQr1^~- zc_=I_RRch&X$;nk)K&IX4c1!v@Z%r+5QVK4ELy%qZ3HpiNqTLAC_H-@2hLqbXxai~ zCK>&DW2ZQPa|k4VyL~G0p&&*Ny|njv+QS4tto$g;4fPJ&nlAy@5+t z^HEHiR@BwAtuV@P8f}6Tg_qAD|H^q>%P&A-EdzH$EIJ!%P*qVudblvpKcm!5eDcnp z#??zVkaxWjjUC;XNtOo;tKMacD|*#AE~LdX|Yinoup0%PHx(deV?AemHZmaU|=5?BNjGr zogjg)*Npt!T)h6_F=S-VK}Irtga5LLNmkz3b8v*w2G5zpATf!Fbpq@R?v8$P5IJks zVb0Qphz{{l4V!TK*h!So=2Z}{{Ngf1C8i-MAr7H#6DmuKaN?B6OetzAo0zC0nYAjx| z5JuYU5ShKmeq6d*h)a1jNJ~pX0*U2eAly<@gX1SD>$Lx6)aAzJR%9?Rj*+LeJ}cTA z8}Prr|5NB=Q}M#L{}Ud3XfL+Vj^@moscb_`v^GF7)Ypx|E0=MF@=#Jyjr^NsOr#7* zPECYPB}`1X=yPOXuB5O4hmM^_d0i7_tP?t&4(Z8BOwO1vwUyx1jaGP!GvV#;LHV@{ z$S*2`naNT{Mmh|%-J!Hs4V5K0di*R3OKWlN;tkAYGM|#5@mFz>$!i1k3>ea0(Jolc1E?!2$Jq;4kY7@UQu@^TrZ(zU5_K@zpQL(Wx6?M3m*d2zM^Q!p zT~gf&lhwxLJ`E8vz{(n3)amBRLR`F@OS!9HKg!YB(~HF9R7AxX(c9XHQwR2;v6MFL zVlLcK$;ubiRg|EuqYH5<*+`0`JW#iKJKK?e?HVqN4;Q~wSjEI#kA$RHCbx_QNT*B3 zKE;K?T7(%BaQ^5KRIpx=38&kG%&crC3jSEdXM?@F4j&z%ov7=_^lZjVOcuw2B$p=S zojZlsKRAt93sxd2UJ^>p|FuzG_wPRiJrn%glgH86(g3T@i_5u1#Icx3h7oD$snAge zP2C;1BC*c(o2V$SLT-K`!lM(Al#=34HvGUCz}-`aqj?nwpR)m*Zy7KYpuM#A;`o7& zkT8Ee=8g;)ikwTV=|yixJ8n`R&Ro2Lvg$gN*Vbcb$cp47#sd+;n_+lqC;NKaarNwZ z++?y;Qq|0IZHTA+kE3nS22mvD8$@y5P5j`Or}3wM@js9=gV&S{5Ex^Y=ANbB&PVLD zrHGG~2Z6+o32=SkHGF!C=hNmPEnY{Go6t~KixafvC5*w!8Jm_A*CK^BAu=wGe$WfA zr3*K%-=I&*!ZCjEBInx2gqSUKHeCqrEwgGVzs_bvQ+zpOHNskMG&3 zVid2_iYCU#9vMub|1vuwF>l#oWH1(Yf~?dO%BQ5AKDC}@?zz+S zab>v3Tq4vMg*3{jfjrR?bjqN|g~r-C9NPaW?Pm?jsf(A-pJzN)fI*iYb7y5D+|hwc zl!NP49Tb^flw3H+e4rfl?cKD)35bb}Auq*lIMC73iql7!OO%$WyrH0=it?Mnn2vrz z_|2HStE~lBuUx^EYu6dGSF5^en%l8z#R@E0$T**vjmYw(5QvM+s3ZYTlcM5s+$^t0d`cSQN}+@BQD`eL z8As)O&^0MOrl_27!euk#^`E_igoJdpZe~b`42JjM@}^JhuVGSszok}vUyjTb#*eastM8ya!)&=C}sRiKo9xvaVk(`QacTr_Q?CYVZ3wmI`!t_1;(M3jOj{ooN;Msbv=9$G1To$ zINRIt-dn#wNVJjdRO9O9YbY+SV{GVCGuxw>cZomsP@ik)vo4&!h=QULlv1ZV`)!(S zSL=97j%r799sT?#M^HuiEUzkMTv~;;9y{)T;9fOgI1#1CoKx|*pU@H)4KlBIL*86^ z3DZUf48`tA?qWq7ZOo-Bxwy*Muat2`O=C0lDje|%vM#!ZG%(? zB^C83uWrCJ#!_mao$c6$Oeo3A$Enk2d98-_CSU1SF=M2N#BAoO@yt8etRDo+iTW}n za17wmf%owGsq2_`@6-65Z+;mM?%0BRnCB+OMj|Z2$hCfKLt{QnvH_ znJbKK42*LI=>uvQ1NYHSBvU7%VH+60#S_OA-v+EEK)pIePmUKhWcl#zir#tbqZD1-Hb9d)=$+i@kA`J2S;)Z1{K0f|W|Fof7p zeDRpFhs}K!oH%`sHp0T3!Gt4+j^q5*a>nfo5g!{g0cld8PSAf3bavpvk)y~fC}Q7g zQBmE3xcFqGN-RwtSCVcQP8>yXVKMSbYA_s`jL!Bh=1avGvN@5GH6786`@GD%+M7Em zes1&+b}{BiLR@kdQZutLGdq=i@Q)#gzLGC#^s_gahc?vLVeR&v(5nZ(pF=)=3;)N! zt^RLPu2WbngG|^y#7}?rCaRl<)LKKD@G?L(4dCNXF5#w! zP!VcSM)Lk$Td;HcCd5jh$P0m2ozx$@2j@RMhQ;$2VaKj@RDK_0z(KYZ#rm>v@Z)oE zY76NQWJZA*g>-U1`SBaL$YkB(@vq4@0`kj2MQz5fe(^Sb`Kym%aSSWmRh#N5au=#A zO7WBTFQA}q5MhQ$MAGv5hI(P@?^mxGi32P=dxDPbGJ0(q1qIq@On&wAU%_H`vA$ur z2RpIpZ44la@YWk|(8+Z$ zpj1_BAPEz3|LKo@hnm zlP_V<_KkjrMmxgfbo#Vx7-FJ{mrVKhPle|-ZThwZ{Bn+eN1UX_FXklbmk3{4&U`%g zjTe{@Gq?B~JLXl5-GckeMv12pRq73LMEP>l3uNx8^ZW_)La8m{zcNB-U&hLQUHMgVY!pA`EbwyK(uGU*qiY)96ULF>pw zGB6>qpxmL#%g)aX*b8r7!w(N#M7=EpdOCiCw-5DK-o@cVAEUC@B^p6C+fjBi7a#BY z01Yi22n*LEKHP^eOOJX|s*%G3i`9bamR|BER?*zY}dT5{cyO*~podgre&gdETvLUj>cwDQ8?hw2_}fb}wmQ zL~N)9=Hf$YKsmR;3VT=te1<557%AJ~Y$2SIZj2@!^t27(Os>5u&=Q@L-S%K$&>E^o zeOWP%9{Q9z?+%g!2ihBpvH#OUC~Yun_FRlBgS2ypKgN~(n{Y5NmRYOqE!FtRdq+{z zPdXbTp=Z+EaOEIQe)=goZ61x!0+b8eKpQ?ia0owteIFWHgV((bP>mRP%nVRZ;$MFD z267qLjj^-CGHbzr`Po^BcbZUFRf$0oN@jv;WbzZs7mA9qdU$nu=%Q#B4TET`x`vfQ~ZBgj~&PP*2W{l$QtwN@0i!qr9k=X@gjIn1R>A?I^r>P;D|Wy51b| z_W97<(t@`rR}D>pb;s=R4mRKnlcCc^jj*cjxzSKri%&j0qSgxwH|UW-9+sTik9R&g zia`%KsiI|Do6J}ZrEEqk{@UBi@$u{b z6Xi7wl39e337^}7Ge-~NlLMcqr{j?5;y{MN5FZ9cr-w_C45RjkG0<4XF%DY^I5B6O}N1sE@f)?IXo;`rq-*^wL zLtdrtKChbzXAyq>&bufe4?WQa7>zC_Y=h|UqVLcu#31R}Ibfr>@;d8^bkqKS^kW=l zGSKA?g_E=ljfkbaqYt6Xhv=CcF+i6DCQ=_pKklL)IpAt3!7&E@2Mby;6cR-_rtRyi z!aLBuZ?Z(~6^?evFwbmSXziEETJ0(ng#aZw@t<;NXG%xKh@s=&QOy9lLzwj2ZwR z@;IO)59{(T;Fs?nK&?51{wtdN^`XD=65jpUzoDYNpAj-+jnFuRhDX4lbE2lS5SLCc zX`~E|0xNfzaOK1yr8hmpDXc3_Jp_vgb0geGeNn|2{S6MIyr>${Oy=U#(-1yu9%aIX z?uG(bO%gMNz(Un8Y3^l6U}5s>!}RIth%`o^=*l&?$vk-lb}BGs?h(1_vI@NZ%U`S6 z+Yyw{xM344T}_Nn9n>v}H5e1phYcI!;bm}o@%Ulfym=E=ik&8Z!sj9NX3)R=d>_hN z9LnFwU}Ev*OZfQhH(__vGKaX~v3B7KZSPNi{A*k-tb$qMf?+F)55I|vwBKFIDLFCJ zTZgwlJc#R!w3zgrQLM~da|7@H_($j%un;dVoIO=IPXBl!uMKW}obtUAV;MulGLBlJ zJA%YT&Q25-|I zJ7}os(TF}wS~2!5FR#Zi@d=L_hGDo31-YlO@57Vm8_>M2WuOJ85B~x``PCaZT}WN@ zQC=8B=3Y9DWAx8zHnUWxN`DRzeIYH}b~E08_kGmYHTx?ieLMrW!IqYNf_JBJ!ndEz~sKYbD%Du(h=pL_7@_YUGpMK65ew0*JzYp5NyH8q;7 z%d{qmW5V>xkDHhiT{v?PHPt0@)eq933C%Z8_6{QuL46?4)lGDs*kE6Vc_(a4YJk*0L zr%vPS;dAOO1o68PqrdUXpQEPF3{MneR^lhG)$186=wvAeh1(P$DE^Z#YSQ5tpjYw? zp|QFN?|pCyMdfV>HAEnucEntK79TPuxn9dSi*34TKR){KUHtfGucM@)A7K&kh($ln z?*9coJ$IdcUaIRyWmy@1_3<@yGPaOd+tS;OvmbxN{P#LF#;xq&7a%bTtOF*TICCDe zvZvv(z1xu_$&^4y7&4~KKypGnb9X-pRFhgxqP4E&jdD%?GNZyk!$o{Rzjn1*2QT|) zq`w?wY+)PdRSze8VMZ168!1b=5FPY7+Fa5rO5$UNLVCs+0=A(JTt4+Se)Rg!apXob z2I;dU_bR%61;;+wj{)K>tqyjzG8cOtlg6uzLU6F(Sy$BYJB+S_p$%bF*tpZ`0%X{P?le+ z(cq3iz4w}N;`kXHJzor`SI3x-IVx>PU1>4n7s)qtbQ5vpe;lGIli@@llrdyD<1pd3 zOe6}YKFC8ehiL$f-L2?j?hu=nf$WSKhzZr9_|ipKt#%a?X}l0!G$1u43HNVYh@#x{ zXy`Ee^PJzp{|gX^eWwP@VzQ@A$Go}o5M%VuIuIv9Cpk!iQ&ZoIg^SlA#OYBp?DFU+ zx~X*2GLunNUCtmvmiX5oj0s%Uv~1+e$w4#&0!>jwEG2Sq zUVimOJoD6}NRyc(EEZ;n#)5?lv0~|b3b9wsDw2V!5s<2^PBfHMU}?@QMA1gIb+prw z7vju?Ygn>$108dljDe1T*zKgmM9iH(4@oJ>q|O9`DYWJhdcArjc=pUW>b0AZGV+AT z)Q+>KPNJsAg9n~|j(ER{S6_Y}dvYeypWW46;HQB*?lWUY@P3 zF2#XQKEd)$_u(6_ep}V`T3X5`4x0cT?qVE!GQre^Ow-5hp??;*6l^#AcMdr3uZ1{j~UZuVp?J> z?9CPEZf@h%5GKYploXco`dmb1GZ?;3WfT#H(hnEL^e-FTMH-Ugh!H^UoqX zW*FzsoS^fRWXX-@l53QaTqMukfM;KLiRE6#t1o^HIWse$6XEmMNU39R7J+-V@4(Bi zyoxVB^$3$dD+;b(hKF(@>vLqyUW`YcdJ->vyA#*0T|r_>8hQL0UV7yPyz=w|$efmim0NcrEmPJR zW0KHs!L{o}*h+cO(>dcj&(?@>?NHJlh^F^QhH zaIv2^)SKtOik!3voIQCQ9VQ=gmM+CJFTRM!pL!ZIvu9w}?tAefW&YJyzkwHD{0ajP z$|~(qXI&M8!k=Qsg4OuuYu_SXq}NxU!Mtc4j>@2AmxN7B*r|gSW>voEOzeI5A-uvi zzx~oT5Eo9F>+Mm2Z$LDMsT1=TEWqqpv(;1mi6E5rF^$RF+?=_HkD;CNpBIosNnZ6B z)*0}~0~=9RctSnZl$pRPB=R>Tjy^Uq13UNJr)co4SH6a)wl70fc?B*NwZX+UHtpVv zue|(C+`DTdrV*bfpL-TB@wd?Ru?O#kto$In61%YT{zvdUd9!2VIz<~VgS0@OWctsl zHOpxuT&St4Rj(Qd*!u@?>D&d}|KNQ{N}$bR0DYE8Yys`%6VJa${s>-Q$F}7&aN+DR z6gBsa4j2;T;lT|0iJaMUne0Z5$ecECFw#0wQc|#xHe6om9}7&Bm=q_+#v&&p0Rw$~ z=1$BWx<3HM8_uL@n^n_Z@&B-)-IXOq&7?iLJD`WORQ`8 zvgL?pvUwx-iZ-}Pf7ahni=OUY zUjM~gXzeyrAuK4!&!w$efXtj3%1Yh(gjVbaeNA*6?%8!8o`2~j%I8b?_6tv=vE(xH z@|k3_l+1D*V6gY|H{U~I=4`z7JKrX6$fM_;z?!8C)vKd{n%(rv`cMP5Y~PA+5U+22 z^EuetYEWHO4KaQ6N|?W76<&Ddd3^mVPauZALPwt@v>xS~n+cH_Ll$Oyv2$?$m!44e zap%^JXs#|oXGaT5QqG2Y@bSC9K+4R8`0^{_zh5F=&(p7FVbP)$m_IukAwCD3#J#1d z4bv8FpnaTyneh>5Z*4_gr%eT_g1c%th&YP>+Sd_~r50`r!**-IAqsLA=`}DJT zRe1CAvv_3tI(+!vK9p9puwEY=wju04a2Q>Kc0BX)EBMB1FA)D{v3JiFL^Ckg=s_I9 zBapLrIi7j$IYootrR~d#55bXBXV75-Qt7Lnd-*jy^ZeJ5o*0j+oV}N;(=!dLBci%%_!t>;@#2>G|^c9>ta{+}n ztNc>q=T;Dl#79rgv57b=(qKVLT?Kyfs{@$5cq9A&Eqsf906@oI<;H`Z}@ujb5am8ydzl^6J zeFX8*k+^5eM#M0n*A3Ht*c>nq`Y?w!E$O%j(F|C{2~6I8D7>uyz;o4bj0@@x-GKVP+PSZq{)-kbaHO zhjF>PuN?;uA3|qH6rO&W?MXbtxMJ7VRcdJ%XnS?&RjuDvo^3Oo+`|1X1o%LSt~Z<%U_jP z;3eF*YYTc>8))}!e*7lVtJ}c%qDpBzlv%M4qC?_a1O!<-i@O=)=ow>ZIJq1S4EB34 zW8P9c^&)YnPAo|bLq$muZJIV2!#>o9!$(dbH8TfaewMK>@qY2e=MWzkm=R6E&_!Sl zbCy?Lc?nNE_7JktW$k&*J7_?>+-BsS`dBR!u!_s)zS?0jjj0-nYDtO8N8II&t~RRa`0T zU_9`UrW2I)uRd@Os!L08;YuNFb_d&~TzOsS>g~hgRU3)Z@6g^nj(J(pIDF=^dI;)~ z!Q8xDlr>xNmFHihKO(*8Pu476g2fBwVlHhSyC4nwk6U5#E4M;xYZqEO2C!hxOk~FT z-)@o4G8UCXMRlx9?fxDF+Aq!j2M&>|&W1wuWj7-k-QC6(XCWO-c+e*=t4%$O5DF|8%pvnsDao=p_-0OhGF%hOho8? zC~N9caq@(y{uchP11$ra3WyPlOUM0Rdkz2JAN&qh&5DLGVAm*gPP+{bkBnG{qUw4f z&Yif(BqSMY*X_od*)!lW^)t!RQbqw|@-*D{?6>e=e*ZPBof!!&Too5$Q!^&^eRKv} z?)efP+_BaF$}n>_V+3Z*$WpIrjg`<*ksblT&Fy9~>_Ae4Y?M%oL+A5wv!N4Yk2sxweW4MMU(ra;nuqWF{!947-~9^WB7({g zeNSgU3a{j1>&|UhxoVE~Dgcl8qy(g-rqI#xhq5plzT>bSIpOA)tL0_zM(D6>^9s^O zh81XaRFo%${Iq1?k)9p}t91~?g(WB~tU))OahT44zMf9?v|$8jT%xBh#CQJWzv0_o zdl+dX`a}X?NX*8rr(VGye&^fRloRLYjnv~pZ%-@fACAOCnMtUTc_L78u>a!NGW->M zA_zV0LI`wvy;{G^YU)9V*NV=TCN$LdVeRTYh|nb=XLbgBzB&|FqR*LpGLlhoIu}4a7u5V#mEZ zken2$)yJx4&zY^(3kl$C)amfleS5KP!3$EJHQ zUEXIV=%S*rV$~{yhZ<2^)`BpX3;nI7#H$MnwtoXTixz9IEV7d^@yWRj)p{t@I|EPY;_rTSVP~;YsqT4ipC!P>_k&~fFNQlD& z>leV>+lJECUOITaqHAnMGB!N46LaRJLskxoPoIvbUiuv@T{K@U$gfbP^9@a%h2Q(r zKgS<1V9$v1pC1R|@#)z3$T#tafAIU*y)0eXtzZzmWdMA|!VILx>u^4=7}n582G}Y( zQy{NmWH2e}X=x@63Q$s74U=0>Q>JI4Y6(gtw4STKevt}DR^U_nCbW{^o+Y*a>QKP8 zGZ$~bycwD3Ex!m`f1i3-QBl>29$Pr>S-T3cVsp$Eocib!WThp*V(ubdMX0E)hBtf| z1GM=iOk73>3?mVloP%edeG32Icb;Y4+J-ZyZogDMm15600e(m zMp~VbvKoa(=tRjtSR7_E(Y<%sY!qDptw2)0K8;?x2VQp+YD)T)%~-K)CSrANIC~p$ z`Nnl1Aszi@Gs=n!QB+om*sSU3Xs$u9la(QuO!_}s4Oc)ar-dtfA$rmro?Nq86JhCc#75fw)A5VTH`fCMJ662vGFRRPS)Pu;1`X)pt zrJh! z3M9>(hDX14A8ARVGTBO;vlK6U`}Z+FD;_c|s9Fn&LhQ`7_{P^>z<0j<0On;T%h_>2 z!x)~7d!K#@|Lu=|AL|!ojj?I6V7{#3xoG|z#2IP-C_^b3X_z)W10g;eoDMtac2z7k zZAJp+xmnY}q8jo~2R(gwS4XSDN8uze@S7B5=@Q$qm;)sDt4lvY>679NSk3v-an*wWf>Qm-xFvuzt@ zWXEf^EYyoRD`sNR(mA+#u>c+kZ52q}e%%TVk_7m*jP{|;V#cP8E0IhGqLxTemqv9` z8VFB`!SXFzpo>n&viS?JdvPYF$BN%yj-=2sJ!;T&I zVmW0*W+5A6VvrbVz!3FDn*=4Zl3kcNGZm%uBZYbwh9uMrjA5T2`CB#G;HJSW^K+#?Pk0F6Z6D6UFwfs%q%$m0jn>VjV zw4SjK`IAoG$U3+Vk8DX9MxSBC>X~V1x_koHE}TbRQ8_BBTd{CaU_(bmJ^vAaKwClJ zabaMf4@SnH5wh$Pi_7f(IGMdh+N%B&VZmMK@$l|Fn3EaL-#WDvMPjl6Hit<)=6&TYG zg3xs=Z5?nA%CbmBv~_c^WX&=}NC!j1Fn9fW%*aSbAAOK>V8CBK410I)#Y)1GcQ%RFiE3PQZ|C*iEK)#Lq z^6Sl#MJurN9(g@Fiut)7JGO1d(~p0NHjA`znQ-jWQ%H%Qju4v-Wz0(oiyL4GF~AiQ zitAN3m0vc}-=)xBb~ZQC2Np7Zsex@c3O0KPEWK7yWoZx*NimFZm`Cbj=r^3qF}KoP z`{0u0br>&>z!*fwgHGiy3$q!WcNXK)xzo5>P=*`j)tIwnx!N#BZdJO-Bj7hQN{)h7 zeK_fssn#P*jy7WL+BKLyHwXH-NNm4vCv_ugoL}TY`v8`hd)VF(<87a2Ulb67kO*YWoPpi<-9rV+R*8Z?c2EkT6pn^MUNgFP?{;il zzerS9l~L#reb=A`eGDwKXHJ(hqiFiSXaUm1pmj@YNoFLqF%XSQPDU&pmrN;^wczBc zhb_u%8!6>-4xys(Dn9<`1AO$!Asjk#3P(;|WYW`zaIZ_vDjETKq7xa1-48r~&1)B` zMXyKj3xG7hV0hlDO*EXFkr6*i11YSR*QIi@J&;RM#}2uir}fD#hSHH_SuTxODw8n)=-Eg-0NafuhxFQ)^961nPOF ztXo%CjeYw*#K#Brc<0h_L%0u3bLY&So$Mo4Vv0zpzjFe@c+m7QG^56)IL1S$VO0Hjlsk;Ty5eDr4 z0h_W8busOV#+v}{w4|MtMj)u7TD+Mi`QVsP9~1Y5_pZrk-&4Z$;9SO>v8z#aae42iik=fc(`F78baRL zE9%vxgNF{`_~B!?aPgxzi&yHbi3_k0Pnr)48TW>+xKY^bb8S9ccmx(POw#RgL(xbOzQD zqjk&cL;6^(UB4RJw=73OjJ8O)x*pI4u|r{zw2^c1^uzZdQ`WN6u4u^nJK$lG7?Y8W zxYz`ARM#@O?^gqwa+x0(GWX)#$wPSW{daNj$O+nn+>3sSQ8g3dbSARP<|fl$IA9y>R-5{i(}tWn zeFjGkAH>n4Cvh$}AIlfaK}@LFO@_TpW~|mBxF&nsjmfvHZ&PqH7yCZmhXbD;z|q5p zaqh%Pm<9$ZMgfXB(c0XE(Aa4(CQAb3$2>#}^OSq@k21?fYI7u+kuPrsNJ636 zoI8P^9xN!OW1}LGLYt&vLH!|ZheKTI(?@*?i;72j#taqA-pDJ!^&7=FdHRx?WxaaU za@D5S;X*}OBbsVzVe76(e*QId+J?}@#P9mmYv@(8N>!701qxHj+E`Y*)GJ83NIeYX zmyk!JQz(PuSR_VAW1zK#_sJT?vS_*$GpB1iunP^v%ZmpVxr5AZ@mkT+(1cI-AH_%e z_LI+taq7q+)YR3`cWHilq_i*6Q5j9)r*r^L} z(HHBfgTZwM!_?G}y-^(^C>fSbvzn1Ooe4VQ&%h<*B8zxj5xg_5t)=Qd>mCKhS+S-oh@;ta$_obpz#OAC( z&b$SPra`t@?6l(^CQ<2u7I-fu*D#FCtTc3W^unvD;5d-hMq8Bc-SkbNQL#+ajDEb` zJS)+Q4B7f;hs!O}kUUADt<9M+0~_XL`X5k9FGhQ-eTzZbjFniBGau1Bp=4OQ5G0WD zMMF~l21Sj24CQ*@82uRiqJ6Gn!ujOmkE4W%^`{4q;pcC>j(rC|L3^)xOr4X1)Z(;4cm4hBULuA6D%o<%t`c-$uJtVp3Bk^>UB`j7s1@_Mq6zo z`ulru^6VML$0ukNFO3@aQNAL@;25M66ehnbX%ZCf@Yn#2)3)XobRM1SQSrm=jU zI*jyI1EG_TbQ|$W0z4XVC3Z(o zy~$2Z@pDz=gePUwMcQ?V#rg-G=;`;NuC@+m&m5)9(5@Xmt_Ff~=FU)W?s#dBYU;a? znVI9y6XlVqHfShz{%+{1Y+>M-5IpcX`{D4|5t%tnLQxHOvElSv@iDP*^$*aW%AyHu zd^il5=~*hj6Tb)MBR<+AvwU8ar+6%%cZ=VjMXeE zZWme#Ig>Ke5KdnkGOVXeh9e?M4{sQASo+mKY|MB?}t&IDE>5X~IZl2#-Kqd<@zCXVX z%NUOI>fbRA*w8F*mlR%}>0kM`Gs)|PrCrlrtf1?m^Ecd8wk%L}T})2Y4U6KGp7 z(?Ss0;Khc5l5$kkHM7j9Dv5H+8bz`bUnc_wq0eNvk2r}D@Tnd2v_@qnQV4^)7`QAR zc_62??QvqrWJRyV2M>*n3^H^uS=|5e$7t^vz?@|(v10XVtXa7P=`m4=;*AnKjerE* z4l_y%@=;#js@CMW1wZMIES6ngj?&61blJUr-iW9~A~j21!*;4gs0Ai^okb0Q&@*^# z40aXn0!$D+GF_fB5GGFmHFqtb{F3^@!j!SvvQpR>Ei7BM0^a@(loecoTj#>%8&`1T z=usx1*Wj{QP*&ZHA|~vHh*-=>O+s~bJ(Ep3h(LAzLqJ}CgSi0*Kl%U%Z*(GK=~mKe zEmo~qf_YgE;I<#DX-_-T5x#Zd)R-e(a)Q8Sh;d37EVh+1OuL6vdQkM zp-d*+Ob~;0Q~`g)#l<637tZU-Wop+ML=8I0T%7?aF-DSAS+5b8tosT?F5Om?-Mo&H z>P9tFbBtUBfcn|nT#ZudMvKJ_*9g%xEQH3gYMwhZ0Xd7;Bf?`wPhFXMjYQTG3H6!K zP*aN^zk3oE#A5mCHKfBbtXwn$Neo^_vE=8lj)7}5%_#dp1r5eu`##ch1?rdFrv3DT z=%4}XmM=mi+is-JH+6R4YI!r(?|B-Lk>sEBo%D`MN}yI!A}(DoMtOS|8XXoaU9ku=WM>BUOV&mWH$-Dv=1i8#ivjpZ)Q7kdu=o5j2yV>1c1LM{j%Z8fJbF9EQypsK1K$_8&sUP%Lfx z2CQDSnsqKgMw9{JBZ0MnNoNx5=0P}y#(1MiLF+0@-lEi>6%@kSA=QdByGSg1IGlYQ zxOnUo4j;LKyrL$2^ObL4&b$nk^}uB6#`TI4npFo%E}v8bt7mVvGqLcYu_PCb4Rz}2 z^@O@M4uUjY`%C5rKd4s}^AXj6^0MTDuAa2WI~U+8m4X*0scRl+}2> z0s7D|xGA&RHMZo{QRZz(h>K%VAwj*?LxJG?Cj@yd&ECg^w}yURHd_cbx&e@LL+I)4v) z*92S^JVpvi{9!;$JQIDIt&I(#xX;U)y^vaD|Rr&LkKnV%J0vTu#0R ze*4^NN7(Dvb5YsSt%g5Fu@VRr_TMn_)&E<4h9f3x5uSYR75t}Hp2xN&X-IdK;q2jG z<3MgDv=zao06s?94hJHcj2D#W;qcKDICAP7oW^*3<;CX_&$wRfN1){ZjQ_1#(}ggp zk49u_CgzIYqI^nhv3&h@y!65=cxKBgBzinBQfGa#b1Y*9VO5CF4)dS|W)p=?M(A$= zY3{+Uh-J#hEytocvk=eQ z^pY4%h{LTQI;P=4yjshVfBgd5`%Is4cF?#xl&pus)NLZ#k-$=%}i|_x|S((QkE+lS9cYCIZ&w3x~rUj!>@^#d*h=Cmh4cbGg{H zeGhhS*)(DMBOqUce(DYJ2-l}E*O`k&E5)X*rhQ+Ddv;p_Ko@)Ch#p=EWc~N;Xkp%sWC5%B<7LT*(b% zLJ~t;?Om|3AcX^m){ca}^4hZ9H-O96&g00DP9@FZ1krNr$EXCOgd`wfCG zCghTMAObU2Z^aY8^N0A-V-Mk;Wit@kU5_{a@$b;s+OHM}Q1e8okKuHf@@AuxIoO?e zCg@Fs;@CW&`bHV@+FXd%%X?YBh5uJza$Zj$S;ICH4!`wF{KG%}kU@pTzv&692r(co zF$MQ;Ukh_z6P9hnHE-OU0(}tDn*TYDml@wJfcMkU8$h*J9_x|NaC~sq3 z>b#&6PQw?8dp57a$y1-AsqR2mK$c zVb`9m*s^sCcI?`PCmwqYVRRy;Wz`4?GhoA}E$D7;M&9X5u#nCoGjiDJe7bsM5pV`f z)?Qq`a1P-S>A3%)N3eU(UMyI=1O~mH?fZifCT}wrEkS2@J4y=*;jl}{qKR2&_W<1f zr@E7ZTw+Dq2PK336Oi=|#o5cikX#cskII&`tWjW0&FU1-kUs70ZLrxac=-OkcxcZK zY}&RH_dWd$Y}v9ADN&@KZwR#|*I}{P@$|Fb#I|i)v3ct@tlxP*p8o1LVer~foSUm? zqUt2S#F@xO64H>GaYPsmMu?#dN=P|^{FQaBL<&^NP$q9F>ByNs5BXQmGcnT^WOq4$ zlB#a<*MOyqXE5+GASq)ul9=>XvI*ZAUU?VCd%(l-1vu2pubU_1bUaD6x= z46cx97-LfK@YW5;qm6U8n5gjlu*ZqU<|Y)^G-BO7+i~Ch4`4Hc-K^|X=ww^Z5s>c@ zNKBp%Q%{$AeOU(P12hO&K=q5bi=N54LV|*}}-p6CFTB{}@HeStSADzi9eKa63J{tOP3tHM}lRON1c~%b9t|}{f zN~(m&%WgW_B>AgdmqXP?+m$pulXhty%CDY5QAq*%!lSWZ#R~N>j1elL5;HJw_B5nK z1FKdn#qPZisC8}j-uD2H-H1y{q>YKg+6@~K+6k+(0D>^bt~COPfVvH2WT)gWTq~loNt>B?N=*-DqKg@Z3|6sm-zOxn~Ey z^!U@*bN?rC#{s)I_%wlz-&u_0B=3d+=C_rd*B6jY*AA9e=7Ymjw zWMUMdW<-yG5JVq_={Yk|T~+|EQybJ3yoJ(X5mA^mdlt=+>`+}z^2?P_4E8y3t+0uH za51t{m~_YkErQe>RQtp*?8H!8HQxO7FY)7d4x+hB+c;}1upK7XXFmM^-~EffLs@P2 z2rWlJFqK0f@Tq~!$QMK!YAVrCU7=={-TT1(*tPRsq@~E~-i%>{XbM>+k?K>nx)D7I zr~MV)NSj)JgwZ^dzo8Ba580$9g;GxD@x;<6tc7QYvQbc`U0_{9eHN5e)F3fC1KD%u zA?Wsa((RB>jwFg>KbvI)H_sILf9>gUWkeQxUnAC>+Zm*^ZC$4r(pWRrSdF{Wrh)xm_VDb2sf@>hHXevH8~%S=7vF(R<>Zzu1$z! zRcgmq@u`$v)sF1o%hGD0etr`P&Q3{H%aC-ob-+bu>~@9X0%Osd#wIlgF@oPMpb*x+ zP6lIHI2K%XROVj9Uw-$`@auiYFklNL-oY?g1#({64na#@J7TC4vXPBX2Bx4sddaqyu2c`ciD)O$X+l=Ym?AId_-z8Qh8iBdjbRgO>LxagBCXi%{KL0VAPH(OhO)e!;bCXVfVk^2=g2VQ^>w z3)d~j?)!G(>Bqi==bm|rGJlqO>-66(lH-?0>Waupz{Cdh#%qyD4At1Qel;F_a3{8I zy@xT%!+7TDr)ZCY)<{-CpC6AP2&QIIatgAh%|J{{EPeQL%FMl#nftKofj!uelQecUu; zXJx_ET8hr5Htjr;g=hm%T znGZdP-LkWDMv9Vu?QXe`IU{4=pM3uZ_#fZ>Yn0cuDod{6H3eXwQW;;0-d0srpx{O+ z9E?%aB_0DNGc1gGq``zlSx&}*=DIpkLBOY$U@2`ILQGO7l42yM z7JdF3&#HB1tC>5>dfXcJ;SA+MF*hdz73F!blSgA>zT1G{rDdeD5ndElZ&*z!chUDO z#r^l)&*K3+zA`KJRi)l zTVTw6vbl}Sf;Az2W)8OP-ib#Z+>2Mg@ikNw6{DwbVAPk92F93ZwXS$WS%q5aK-!UO zQsx#~$b~U&X)s2fD0e-BHs;m6n4XfTVzH5{6Wse-_`e6j{yV|{eJtD}y)l7Bg@ z-HrI*jUVB}&1S6H^$g}ui&C#(1VY55=ulj~d>(b}40c2HXsT~QQ%ff-47k%Xl4v;W zbn0y=&M&0VtiYwqmr&nlXTp|_q5fV(M8qh^n#AD0iNT<=zlXuP9gPibIC|tN`fVY2 z;gxS7Ga<_VDvt|3#{kZqx{6=Fc?eky@Rx5|jR+>{n(-2#dJS99O=J64fB!4B6X$bZ zeF6r(Ho&J+D=MaoDs=V64F1uU$irJp?wJ7ga?qD5$B1MQ6qGc{y0Sa2cEfZKx@{ z0;^9CSBMd9_4T-U^(IbVD#YBSE3omN)iB5wa|(SIh8oK8r+@kP=nYH9!;d@!S*Ae? zLq{2I>uJP+w||O5*Xpoj+Y?wkD~`P3btdXYV>A+s!|FBY%Gx%B==7v%JsRZck=cQy zgakxIMZ-GeVPaN^VY?0e13gSCs*y`sh)$bbJLR$2;UjzJU^SD~@C4>QtJ)UK!HMfo_-;KUP^ zfpvQxMRvRnQO0moG{Y12~F zfQi}6r2YC87-XROo}Dl-IBT!1Q0vcBRMz0w`5Vv~V-ObRp`$d>$t6&}!q7ygFR$Ug z{9k`hZ31F|0|Q+HIDY6N)^A#mISXf^zpo3GRb_}^u#%QJ9U<~G+-g$WhiW@6^7S;$VO(=51www4-fxpxPgLuQoa=Ayo`9M^KMqPlYs z#)NbX^z|TIAH}4a395%l7@d#odMI=_bnF;%7B5l`US@=4q>~QB)lKJd^=d9c++I5J zF6w+euH`o&ebxf(Vxp8xn_!60!#mW6x&j7)4hOoJRMj^%p{272q2WyA6XTgXnMtQg z)K+$3c2*`@$eYTNn>c&+3~b>E*l^#I$cPV9x~hj2?RAC7t!aWo7pvBmYHjL3VoI`F zq(3oEhpy&ElvdPI9d&4FtVR)o$*bkJ@9rn;lKXzpP|VfjVGRY28n^!LPlU1u7P%(_~;G1f4TrO*FB8Y3)8fL zB!*ZX>cQLZpP=k6#O95wU{`zg)?514Jnu7f<2u*+|7_mAdL&Fow8r;`B`>YE9U(dly!(pO0wf?CLs`ph(K? z)vI}w9h=%|zM(WvtYC|*=W(kZ~jCfgT+Z*tcUmnEOn@!lgdn>ZDleIxSA(H|I zb*?@f*tZ`)`{WV>vU{*@NzRBJ8kAYIXU{^Pl#KQF?m?o?0fVax*RC=txn7Npdv;;L zlDTlUR-!oXBI2{=pts+w23#&(yg|EKk1cz4GKrtV+N4z-!dzywlwLw@QwwamXf(FA zQx@IIN65m?yvH8 zY{rhQ>(vf*GT0cVGeYmRppI?zGcfNmSt)-Fs3*NTO%^0)WI`7jfdtu2ey9;uWjE12 z;8wf0H#gLwv8A1y(<38^e!YlZT3DF^j z_R88;Ngclo5A~_JvkUdDE$Cz05s~qTkMd%utq7-!dJsQn3)U}4LX%`p{I{fTPFHA!EUE zJoNa3h!Xo7=7rupfc%@e=&<<68zwOgEokfLgD=d0w4^wA9ZY0sUwV2w)K0y9U9I$I z^d*;b(PWFm1CKqe);cvviV{Y9PQ5;R<|dyZ!`{K*MkpZ>HKMo68doQ#0ix5hvUg-pF|wRO&b=S0DKno*WSR7-am}u zo?&cb!ZR~1!GF;YGHA~}e(U=x(GBuP9OpVRK_MeRvrPtD3NK$1bd2x0pUvhv-lb`kD$*Mw<3Hb@bt# zBWoaVO(C#*sFoIv$K5= zO}$PmUB4A;=1)h2kG{LP8KpNFTafPOE?l52$R;k47#K7mHlDW0;G%BVA)mfz{pt`_ntw6Ly()Ko;$fEipp%I~(D#5Xxf~CtAVacMo zitc06|B1lIm`2vs_TolS2^?lC?B)UH0BvaMa56p#XN=6gw)f%O$+MWfY6<2tu9Y3r z!$`mP-uwh}W-r9r&8sQjZgkZYp{S?`Nz>+{i*~)HxD@9v3jIan+GXV>3HPvy~@wuxnBWb+z}dvO6pt- zd1Q2l`t}4QPhS9|T%?Y4kT&v=Cp}%34)Vg$*?@QVAHn9wzKQIt1QoZ- zdYO6oR}mMVjCJeoK@?*%IC@c3QKr`YvkWrcp*&Ymo(Jr1q$Z{yobg$-ZwRMP9D+VB zl{rfvis-*9D%;^31ny`2k(x2i=H3FLH)3zy^t;sgvNDy+esuICGIEwM=cC^kupvDy z17QZnYAoJSbwh28WTsDUu5ZMZi+ON_$6)(I_c3mcA$v`D`<)|LvvxBUNleAO0KNg{ zQ8!W98-XVu+k*&aCu90r4D^R!$$~kI&AjxV%t^bMC%b&iF%o6JC)z=wp%V@nF#NY4 z;M|op2vF(#KKOln)Zsq>ZOCvz&s0TL6n(GNz`hsRMz zqr)9)@=I%%iLvA`?R6DylrVOuAKbWY2iD7mPL$z}nhM5E<;)T2b18%E%p+==O<1^S zHOnkiIh07WJfLxQ*Wvux<1i#;GA`;vYj-~)jFE_mjDT&RAGznx!O47WE$wO?WmFpr z2y}x!f;Kjhjw(O*BD#C1%TC7GjrGL6nRS_vlANY?UG8K*O3A-)SsIEtV{;v2)x1l% zP}q%C8y~_wi(^o9?Ia8{ z$T{JOi)OBm@kb@9>l@HSAK2K`iTaj)^x1rP@QE)mr;66>RfxRhN1IsDh~k1KB*sLc zrL`V|78~`3e5N1N@)%(rK^m8~BtD!Fkzw_k_Qh)#ajCEZPd@h&rX@v>&}|gY-@^Zm zF!_Mtc#r^v#;vop75da1Y`gb~H9p^N6GmP})SVS=aJAdsaT3b8N+18?VM0@aY zCi}@jLdCy229Z6T?TDm<*|cLXvJ)in@YltU)Z`c@D>HF5zf1+{O;ik%*^Cuy)*vxH zih-Gffn_z$9zTV;x>}eWG>pbL3|Z|AxVw>+H3OMxN$RP~3?^yCH)xQl=(Ti8eI^gS z`Ql4hvT(Xu=Rp)k`X+Nr%q9jV4jtBQS&w;3=8RldAWFpKUkrB*jo8xVtFe36I%Qmi z0xCIWP@9&Xh3F_F%8Cln*3v>p*hV82gGCHFqG=dI83-h&rJ%FhhQ_)kG&j{CEh8Np zH|~PTZpVXJR78Ng`ZMVObt5X;S7vxAB53PkyF4{$~xjJ%Q81@$R&v0=vp zn3+sItLrpDVPTk+m5iL3S-5he9Ifp=434YO#)N13?73>)6oVlWX*8s=`)_g4P1G?! ztZ!&UYWg(ndGJBZnUjTxFgwhBJ*aE7qOZ@y0I>!Q^>v6zO2;$LegkXQtVB4SMon!E z;*vA4ZPzXsLbVyHYUx|O9)m+3Sr!FL7tY4CnbWXvP7ZS=6Dk{O*~dl(g^lE;6B{;e zQVt@N0+^bdMY(B3cV{=+yW5bIlzWA5TPD)G@AI_r}eN%FcZ1EaO~Y*qu=7Y-gm zbyXGm=s3LkDALl(_Ii<#J`>pz=(sI7d*U<`oB}kqw!sh;rDj()QLo+JFf3iVfO4fr za!M-2%8P=$JT=JI$V4-J`XW5|rTZ`~Glo2*$msM+W}|rJl`CcE zz;J|8sj`?9)>XHnr>lp7M2lMRe0I(pL?otT1p}buXrT=QRwfYXNwKJ6&|TZyjgE#+ zv^KS4*~%qq_e9y@ZQkrTYDQ~iWd-%G8a;z{tlYF4dvz3R_xicfqe<+jeyvNVd{;OwC#i^DjVx|JcN0fkxDM) zewjH`RojjQ3zuT)lDXu`Fv6Mi^!N0m+3LmGwd=5HDe;evf{q<1Coemh7wwvtc+8$R7skNC;_|qJ0dRANg8}m#?A*B?Neo^zy$ygoO?SXDivh1Xt3uRIvga6^!vS%A+&dk7!lo&Xy7F3Y` zjm;hCqfYPIxfdpf3+p$n!>pOyT9?iAo zIDh&W1KK+J65bUN$6%d0(bR&)3l}0OK3bE7&~QZ1E;+lJ;MT=p&E6-GlO64U*d@j^ zoN^q81mb&>bybi~P1IGfMP|n^makk&pAxP%lvp?`3E?^eF5j$1dt0ZnH8S~W-l8Q) zW>Og&C+l1pnP9i0Sss2+57(?-4tJOiIV(3{!D7-qB$T!=42$Pv!eSo8%}OR-Eo}^7 zNk1kUveUg$=b<|Hp?xR<({ff};j~x{Uyp+}w*k|auE#9uS(ML>QwR3pCX=#;)=uaR zQEEAiRwi*)JCpw9i|A`SF!yx98WxR3>$a!|d$Ksjps62Lo0YP?lJ+oF%`z>b|EuXT zGXbEC*Vi)XtD`?CQiG)Pb7rEc+kwdRCD^fTrJ8{y@*y;iWWweh=)_Q13}!7|2UlMc zuAMxH%JOQMY%F6SZ~ASlvq|wrc1QQRtSGvE375`aKnr>77&g*gg{m3NIwqGhXG~{o z5{GH&$?#eGP*z!k>bgeS&_>LhGY^kE^EB-??G>>Q`#vi>N!h%MH!A4cdYQa5Q!jPo z-!jUJfi@-_vDCfZo(}T21})TyY1vsw%bJC-*lDb8F?|m0SRC<+KzvdPb+`=`WhL}AjTo?c7-Ky`-?a(}MoKUfeUSribT)PB9>kDX zV&6w~Dj1vyb6L=0_91cBQtVj25~(qogpPs?9xxeXGTrBm#-mR>jjUuNC79=_pOzsP zx_T_wvUxQVFdv#tUS!W-gJm<5FxXa$*o+)3UAYc9Y1D1{it3ggG*Ab-n8a<{wi9N% z2WvO3!-DzKnGf4hlz#;m&z?h5oA8F#X;_Ee&R%5-=FG}MR74oc=xcJ%UR1L+9Aa_9 zA{ZkLD!(KfIz&f?(`M>1e@42Zxh!W>PZ^M9TGrjO4V!mvLp%jZ$A5{j5wve{sP7m= zNo^}y8+kC{U$kr~GP6?EPWQ6B446>MT2eoG`w;HgvKiSavOZq0Z6|~{V2tpguFH*H zYZUI?xd}_=GQMHKFeS9~N12&QU-GYCr>r!fwy_1FkxBT<*T0Urvoh4c^-x%@ko0&PF~K(pgsd%lASe^v;o7Z*X%=dSU66dIEfVp4-TCvraud3 zd|XRiFGq7#0WMw1gEK6ZG@Z#Dg0$;(Au?e)Hm;qi4HD6Y^-~6ilc!_uid9H5%7$n0 zn4UcyWrd7oC|lyYWJam%g1dCZa>j--s7N~+8H@bV8g#a_Bi0zH9%Mx(Bx2s2>6kq| z^EUbx48%bnj%>z{VWB$8eI9K)5ef6|i)9J5$YW7rR zMHBt}V$5HXgOE@!vQpyJOrq+hF14FHJYM3w7NHSQj2Tv_*|Bcs3|)*B(&x@&&ayN(jc7=jfXwoo2kh|0F_Fmf*|JJ%pIa1`MT&2|64?9?!_iMtNB=8Yt6^O-PuSoH?J14*GbRpgQbfF52F~xY3C0napuK zCbU%-Bd@j#jlJTx>(RvAp{}A5XDMsh^B2*+C&6d4pthwGYj-|~RR2JS!(l_;Kp(O* zXJWy;MbI-A&B#oGo;0qlu18HxHDjwrR5iB2$0TA2;~j~?vQuMWW2};2UXSLUUg~HT z)~(n8qbCgO)~!TrQjA8hNkIrKcG!ax2M(fK9+oh_4l%}x1xFigrQODuVc7!hO`Z4{ zBpQ6Etf^t{(MTKE$apaxJ0E%o^BD_76YpWSORfLEYV|tiHJOZ+WkAx2jt(=t5gFLM zaRrRF4ipvF;BsL*tdzsXhDx;5m80Uu1(elxB07B`bNMLfMIaf^2|Zex>v1LjCf4qL zm~}5v@q_5Y7??;oWGX0aRdQ-7bF?|?VVLlxk+z|Z_@}0)W8Q)V>H$tfWGH6On28$& zm5ec)8JkpL(B{UPE%z~3-N4*TM;(zl>cglgufp}~H&DymzlOGU`RdKsvuiif6Qk6I znVLlVkewVuKh}c^>Q8-37kc`JFe5u#J@oT99O&upLR`i)EMB!5F*^U6&k6!{fN^gW z?MD`_-YBC@Zf6`rxg!3(v~5e5ELMI=W~2Isy{M`zC#^e(p)7n_FKiWcS+GWn%O#YWR-&1Wn_n=Dn1 z0k(UKe-WT8`Q+he58m2;7+JFxF?N4QZ8kME+xT1fKOI7bCt8GVLSQj*#vN@KWYU(L znGNRVqr#?MwQ$2}b1;c-SK=N{#f^ziL{gfz!0Z?U!-T!Rs~tn$2*jmjBARuLURNFg z!(K0{>)YUF)*^N%BEo>I>2$6#n}gTQgC+*y-H^3MjFeV=0X#%`sQAQ zM?}-HB&h46ENTXdq608QWo~P0hfgvtCfsqPgS@67JFv-YMro^?!R!DtARp^YW{?;i zorIRwc1)i$jSh(vlz^TFU7pMcrDG#wkd&T;ervy4Hzz43p7prkFfkeIHACJa5z=I5 zX0cui3=xq?N{XccRVcA6`{;Ji7&GvQN<~ISJdLl0`xqc^2D{tU?yNCs+3G3ZSl+l< zm1mSU>~|Q0zSN8?#K#ED*p6=)LxW}pWnD}TM8t;GOr3<3bOy@uRKo$gxfdkqK&(%s!gNgxnae#*;#{3+aLC(6oWRnY>9eqeij9?in4fJWe{_#b8Mb@iW zy;#vecCtMF(FvS5d%tBm@76@syB2XsH119th4l+rdMjE)#!@$Jj zmW7__+K7`{6O*5rr2YS&z3%{!>nPIxW@cw|&a2IZVEzDB_~7?c?Si$6i}Eff(mPy-^R?}G1NKc-du=GmkP{PPX|Sq--s(hBdNyxK zD$nW;wA%gAI2*gGjkD7jz;iA^>dMJ zby>ZLNY523ZCPd^!EKs?-Bt<`Jdr7w5%_`u0hSPbh^Bdvf)c9|u|zYG#3*T-bmexU zl?J|1(FsW9HYuJVc0xO-L95P;l;mXOq)>Y%JJC|R;mJZqEIC`IM!Eae6gK^u@+pQ;k z+L4-_i8!)RHPPTcq*u!uFVldJd{BN~Aq_x!k(`oDK~lg98IrZjME2LOgMr#QmlU6n zNWLRkywJg<*PxGRxt!GRkiT{)Xb4B&>^JhM&G00V@b4s7FWH`!(i&Z1h|MZQ0@;q= zKeD{2EE<@!_6$&;<6GH5h@=5&UVgSn%QvVv_%fTUH0Y8Z8Rf^tqvPU`o{=S1z3{}3 zQD;HlfDR73ORVBApt@^laKn$nCQ#7J$vBASzF9j!{eZ?O?1108nVOnLwv#}5M z(5F78BeLQCltgq`{Y5o2We^PNFZ5pj4XVS z&p!Fh^oPt1$ zCudXpmi+rDAl`YURp+1uR#hyr$xfmK(*fW%O#?}@R*TjF8*&SaL_8<@Zq~8ILHy{E zP3>3ZrT$54C1;e0wAe5hdt2aCMI(Xw@@OT=bFdrT8WYJ=A#Si178Ihrt6%u{BC!6}4L8G1IN9RewIn6*mWh<03mTV?j^91zNKL85t?WcZ6gsTp_j>`nH&DXm6*O zk7Ua)Pv8ma9O~nl5c^rD$%u~54w4gZqd{kU?Tc4zrDvv5+leJvQ(LxK(bU-w3;B*P zvU84AH5Q6#oDSrW%$fJ;1=)g2i@vhG_{NWZ4`=?3_~lQ3fZ53;dtZw2!E#{bdeGL? zBwn7yG6)X~r?xxgOIsW4Ht8`}mOH4YoqqP?? zG4V)B<;P2j4vhhNyLvES;s-%#JQW=!;wz2bOl>Ps+#uzbT@3d1LJ<{QC{bAAKEZBqM+SH5Mev@yE+!s^%70$tHPzrXVMW>{^LI zn+G|WG_G(NDekqx*?*IxPs_XdybBY zqrN6b_+ySZnP*-NKBzH}-{%%5O{EC`LE|c(jOs2uj9>oy6Ubb38&=w_Y}Dol zVI%pnE%UZQaS43WiTePyU9x5J9i0Q@vsD!LrY6xCEfxmyo2k^_gmdROCVcWFz4HsS z>=etfj@;B1c&i}x5z(a6ygZ7v*@2N=IjEoQ?$?NwmAvIc0>xiR>C`V%3?t%kmji9> zePknP#A`Ikko<|)3XMh&ojnqT=`qlr{Q%$mx8Eaq?HBR+yEh|IVL^nqA06cf@ypjr zu>6*X@zCe5L3%XTkzy+v>-_#VPvcL2dksJT(<``o!%D=^m9cDwYc^zk!auqxv{j(9 zralp-x7oyJPi&0@9%8c?FKwTs384&vQS{bUSMDb9$aff?&cWTgIKpnpI- zDZ`U=TvjT@_6Zd0u$={!OE%Ek)hpT$-}dK;*DNYCJ~oQN3_EoF{nVGKk-%{#WfWxu zff?oTF7ZX?jMWpzqGtw8EO9;~1Ft8kK)Hn5E=gvd; z?Mr*`gMa>KtXVNn=#J$!TFy`KkAq1}7)}N<$xCNKm%fDQ#`qkMmtxX&<_woqKojI> z+W3&mV~kIb2WE_rcWMX|Y zelVXtf$>Op6jrAq0|6iD5g6_KWr189{v|OH%!>c|M3|hiY^DPrsT_kbQfiZv<5-wJ zN@IZr86F3o`^``B^2;yc*RO3y(cA^#SN15KX6$?CHGKC+Kf}NN`90ip-4gM#9I=pr zF)mY~6Z-2lIlm`iVueqb#c9SzRqkZr=ks!MJWg4BIn<@0^E@DyChm8=_a=V*^z*pw z^WVj1KXa%5O`!sEIgHCtuS-$fsIuU_1&F8g(^p%$ zQlnE8Eb^qmKqKzH{zfFOxgY=eyZ=H;7(exGhN-U-|N5WL;80^YUVr%+%uES2z@I)` zX02ahcwsVVuz&Y%lpH*OUXv4F`j`L0+}WAnb#Q)L|9gZNWe2z5-=FvnZv6U>aLbn- zM5-F%38(hT)41c-J8{)F{sTYw;kS@Vsimhr{*S`0Mf<_`@xYV6!1dq#4IcdR^`f17 zoqZ_T`z9X$?M|$|{rmXt1J@xv!iA>#GE|nH$Gh7PA$QgSeCs>k6)*DOsnC!%8HTmK}s zW(wr+6Mg?Fw)Sc8<)O>OM$UD9PjD9r$6H3I86Uvk)nlV&b-!b1tC75{MzuH%d2CP&f^jf-zLUU)3Ms471TqgyTZk!`4|L{&vKKG=O6g~iKo z$K9Voa%`mJuO`BjKmp39RJO)j5Rj972 z#@RDvD6Q?lH8^JY{qwzzy9uR>N3OaGNY#A1dbj#jH;?e`qZGZs*1kZ|B;TWYEfO?AhvLbPfS9X zQhuuEQiDC46IO#6PyOZvu_erd-~A?%6MgYpz~>YEV`EAy45zJQWI|IVaxr;F!=<2k zDKPDP${G#C--{q*Tz-Co2NRbH{73;gYoi7G_8r2Wef!0Is4%YsUXK%tuDKmoZoC;Y zQ^FA&k`-A-6XY|%wua}Fj-^#Rm$vSHy~@*BGJ=o%=|a_SIR6)iv8avBhm7gv=i(vp zBTM)qxMD(+%pOt}A2$L0ABA5HQ&6k`c_?cSj_lusxA&fd#T^bc`7nO4x1ex7KL5op zA|WM4JlZP1Ic* zd{k=qsNo@BXV;;oydKZKyd8FzjoKG)k>$nAqM5knrrVJ}cP8S<2dLqM+hTytYK12{ z9br-7L0e%Aw`cC2KQi^V82332(KsQmv9rumU%iPXgnL5H(54i!!w zD#>W)gCLhRHBQGyXxbBEYaC1mjnJ5?JSLj&(s4c(rdq49;io!&tvCgLhQE*6Ph)iW z^3s3aU(Mlk3ir+XXmH7j@E9o7ynhKh?_lbqgVRY=5>CE_j*eJCP zRmeqPddM*qZtv17EtuBW^^!{U4jG7)del54n4}ZD?~ls~i^Bun`$-;Q<9p*;dGb(5rX;~-p7~s40d^V zUpBdxKvZ-zRaC0!KgI;zmGYR#z`tMdBi+P^D@EajUI*=(NBpU&cuFDZP zgMRV8fNp+oX9T~;NVJ*Zw=P3rys`o@nY6*6oXj8fQBFmSXtQJcCmAebY2t&Nf_RaR zo#ZU9TAr-PL-Q}6WlXeYZ0p<})Nqgw~i zp^l~B!>=&pbBtm7@n`%5pWw3Ll1vzi*#`!F`R^?I|4sxlQ*w^s$HCxIc*WHyD=2(- zNhGHz-**~~R%ncuXSD0$j;Z)HnbJamOK_o2qd|F19dfd=Ft>0fox51NQ-QCxLzR|z z;6v#b4?3kc>hiJwDu3iqmH)C{IJEQEfzLAhXBhsx@{tTbZH7lTnt-cf|%Q}!#ycA1{XCWqh^pKc~WxJWkgmCB}NTMLg zK$1gX0Bte#p{n#KTKKJB?kHpw%)`Q!tB?{CCI;AY&|6!69(9#fFxewvr7--4+iyZj zQrxh?U2{HgsZ>y?{5Cdp-S<7-@z|JLY8jz(`OXIU#i{beVC$gz zyInLW34<~$Obn7n=4Z-o7`44PfAjzi(qpl3{q^GQ$dehUkH=r|b@_%moPufoE@ zys_K6525QmS~-45K8({zeFMXv+6#+u5XTSg!=TmxGueCctOdC0sw=_o6O`@P>$an_ ztpUf69);UWb`zI|!Yfu`alW*v|MWoSjxlySRnrqf0 zit58{erz8)UTK#Gmq*W+60+PF{(5{g()>FG3+od>eI(!&p4|Rq@sDhe)DMz=LbcJ$ zY2$zYe&Q2M2PQHOm>_tvhF1dEVK*6JHCkabx?r(-;Ubgf$)k$_=MLRuXfWuq!fn>T zX1Bver{rlP0iGaATE9cB84a8Gm`{c^p4| zN?g1A9I7SC3MfkvXgYB@|oUrl@Sia&S zomliRnM~qiG+Bgh98{&?TCoo8aFKpZMxtSN^BTEeJ&PPP*mdb)HPZmuY=O?~fZj#} z0=g2Mc7hnqWOLi<(A?f9^vSeB*Y(muuBm=Nhl3yNLsL^LuZ5bLZUZpgH5sWnk}S0shuKCx z$PQA8ut^3Fi>(gwTh8%q!VmJMULCp`PvOJ;2hnX6YeqwWse3Wd(~akz{ym<0>UZes z)6toaIXpJ9-+q{FCK!m>As^!Q(m>n>hs!f$hvRFHt=fvVrbZmzeT-zGkxqw%7lS0L zfBBam;PAm^|Pg--?U?=}+HJV^1A84^TU?&0>YtupSTA^vafEQkV z0sGIj34bs;n0puY@7;yRpZoz%ohg$}kAvZLo{WD`P-}48xeqYIL?R(I2f<-?qO80e zCy(vIv%mdM>i=Gcp3V!u>nq1YUxR}>oH%|Mhqk?dr+)tn>^@s3{KH2IUVUfPV%J-* zp`xK)Qf!GjZGXL*|Dl*o-)|dk6maPk%u##lrG^qH!?| zRT>GNFf`%$KR%5g{_x+>-f5;bBVFOxAyhl%bC+*$NacGhIJNhEJp0m1Xd#C$$01|$ zpd>yO=<6TEsk3LX?Spsm^i#ja=`*J!zaS;LC`?HLKAt-%PJiW(f5ge7r=TN-#_rvx zgWIX$?|%{?cwxXNm;!=KVB*pY*~NdNj7ed5`y1IFX(aeDXrq7se&Q2M2PXHHWZ@y` zL&hTqy=+EOfZ>7LZ9z|a8+LuL3l$Z$C@*hBdsi>gvojGHp&s@a9MPeCULP%{6KIms zF_#y7mCx`muh&f<56o5@x*KaK;P?;?J$giE79ci)AN%Bd)Khz$FzIzT@Zk=eDJ?@; z74cQkfV7NM#KgsNRRYRl(2-n-3_A9=mwGKh<)^S3w)nNB~+fY$jfwJlN}n= zahe9PbN^ZB9pT7HQKGBiEVk@8gbS68sIIO=MP(IAE9#JvmW;@#Sg7RHWBka9O^b3W zYunC)&{`Zw%+5hX7_TX1-k2{Z>?S=LE6Q=;$Z?!6uR%#wBYOLF$j_SrHLomUKDcf} zHI;t(j{+gpRixnrzKen*vMHse1JA$sGB(`(0CI9O#A*tbV=yfr4X+bUhYevdX;`vi z6=oOaB0?SFp-{0B)~^p4(h*T+M#Mqzb-7HDmu=o{$I*SeaFFECX;Pq|C`YUul{(v`U3nze}1b)duK!Gdc)jr4fM(5esx>ArB*TwJ&LCR|ZGA8)+&8gBpMmyw#m zZ%-q~F~lSD#e5Td^_Text_I&JqjcirXmSreEB|81ITR#=_UdN*>ZiXzTy_TX7ZwNA zjUmxvS@>HQ=k{?a&M_~1o&67|96B$4F<8^md2v?oE>RU}eCU#aOU>CE{bGx0^|{NCuqRrz|NS z>x#Z*1&X@zw~v#6dNZVY27>G0*NA-GpGGP}`X!y^zw$uUsOiMX1KV({tOlOs9Hhm^ z2%EDSjM%$<7hK^Hh)GFCIAszZN|2)R=vd}UqRMjQp_&zk_a4I=&u_t;d2^7JpBcb` zoR*K=LRL1X3-gz+!HSj35J?xMb?e;U(19!yfk5s9AtdViEqN#Y@FF^CB^~qZg2Qe^ zTVpf!?%XSUUqw|V`ZNZ_r=}q$GThf+Qu+{bmrIn6gUjP1Uk+b7Ev#MVv9Gijv9qqh z%HlLcbMhd1{EKt(aN7Z!&4q}>EUaI<4*6N>J~|R4+Zb62AMr2o5prUhatRFSoOD*o z!{s{cCRCn3A$+^eu0}>iGNOk3qXLoSuhy))3OC+xB?elm(SbBvebXkf!Z5<852aFt z%o#bj@um$}T@;54t-T1JyB=2-`&O|Hfw}Z^%D-d|B}#NG+dE$o7wLfhOFT;-aXADn z|B+Nnp8STP7vFy1Ylxh+0`nHn7Pic-Ln=4G&j{b)&n-h-74d@Ht|$j5pbK(39|a%y zti*|5&ch+68QE4P_-PEyc=1o^mjLsvR7YU(@}<~((-nwQm~gtuhE2CqUlJwxBau$2 zq$1**Q#0s@ zbz>RCH}C&EB8rw{!Lnizt4KSbN#L|{Um@rA3O*$a=}zR}c+-LMV@L7JUq6NG?)efn zY+Oe+$~5V^ProAe^7*=;a(v%n_B%ipTnA3iK3d{H2B(qw3NC?D^M@g&qw`!Q#VZ!8 z9hpVNxa#T+NQ$Fa+(*RUP8oj6(s}HJ@ST3E_VFeB79WS; zo9o2zU1#`m3HUhmK^l9ojAZ?DI$kX#bVR-=f%;hIKs$Eq-;dat3z0=Wjr)>dIt;l4 z#>lIe0>kSRx{iN87~U!vBhP$-$zyB@`dowrVMO$2=-emLp9ok=Q9cfqP3fJtwF+2V%P;f9h1Qes;euMHZF4oA1=Jm8?V2Et$UBc6{f^*3L~%Uiah-{!%Ld84pYBQuQT}eYq?e7)Y*0lDpC3 zB72Ag@3*ZW{VKGbc>blAaP3{6K}u>0*(u4C^{n6xX_eB14C&5mC)p5F%tnJ~A4bgpm1Tk1?t)hp0VO3AR>+UKklk2#<)pz#yj$pFh1u*B zZ9}OheX4lqM|MeV=E)!aCmP#y*mv*(?!NO5#Ly_!U!;(PSP9X1>HwN_ZY;R&4l(H@ zO&IcZ`9nPQ_T$ADUltRF{CE`)s)pJsub^?5$fl^C#E-CbFWGvy&(?=PGIQIZCp$5i zZP1e3?KBXLBpVzG?8!b|b~EW_a7Z_Hw;B-<{C-sjoMtnugF1A#v}6B)!^oRA3mJJi zaFTAU7AL~UH!v~LZm2!Gta|9Q10)}!Me?*O$(H3wW^SXr>exnYQ58mVHN!}9HPFC{ z_d`;Lahb#t5A4`CkiRl&sE#hu9pC&IYBv(ZZyK1n*zTwu+t^l!fYffRup0*O#G^mN zp+hI3PMCp3S1d#X3CK#a`^hhViQc|`Y`*3?qT?Vr8DKS=pf`}NNuGKVF>gpsVjw=q z9~k;!G4!IMtQ04!`>|r}M#L-au(92^6i^d?LI#x9Y8ZrvoXO1jD`;T92C+$TLNCHT zMLXAk-Gw_m+~xi&&zg?d-vi z|MlMyHzNlH3+Ex4`W+<=`VC|g7NW!c(L#DwMn(vp6dnug7P0}omSk0r>hdPcUtEm% z-G8UUy z6=>wA38*}e(4*`dn4X(ljF$RSjc9}XVZUXzIpC%`@C_k;lVxvrJKlKXHN5@)2XxJa zxpQU-f6b7}<-b%Pw+1Ko??#I!71!Rl2}yKT+>8?Nap*O}(N%@r=Nk|)cLUbUoq>4i ziVt@3S7vHkMxh&SUp8tV?1yBZCdV_N7{)X}{e+R_PJN-rOKm?)lq5|6Q9H3xdpD5( zqq^xyKTe_&PMw5`Z=mw3$-!>e$e+FZ%3H|0g8Y9`9^4kPB|GU$$iOEr_8lgQt%xrp z>4p0nDvbPTvIz2ZDuGPUw2SnNi;Nim`$W>^0B;svV!^=Hce3?XArao&`^2?hifgk z{`OBJBQkJ*LMBQd(v7FD4m(a)A#COvT(uzIKVis!_(q6}WN9SbP~4;?-B?LC5!|CDM(9Z1cqz zUPV6Xrf^oF(2b4k(-WW@xv${9OX!Ad!KZAuy44~=7zy9a%kd$4F??3q{ zqDX%{ajM{clI?+P&Jh+RY@hRbofOv^EhJac3D?u@rF!Yemr#G_vP%6W@osi25a#pi zLuHchvzhu}Fi`upkUU5^9yR%KN+4{CeTP8<6X~#U-V$7S^)*OHjTSdr#B~9dCFxc8 z4yMC#1+`H##Uy5u6X{z?e$YXEiG^eqMzV02D6VCHM6&Rz!l4j8hE+(k*bh1F6q}Rm z>AKeLAz8_ONCve*rxX_%sqS|30M4H}g&I=?mR)-z@}eOgNgf+giQ+>_@9b{D`I6(v zFPM$_3zqp}dIHH+*r$uyGWmCYip5B>GExi~5haZ!gl@!53273X$?;`T^xLLn0Vd)Z(|~Int~ktod@9)JzRQx9ArG)mOg2N|H$x77_yRm z5UVJBzk@(&>;|0Jz6E>E)*>l?E|Ox@)G>3g=%N9a124Sz680TFg(v>sPw=_>@5Z_n zxoB(Y!j>JSxMKMVq-VrP@g?QpM~F}C-ikwIO^D5%gQRGA|1<{CBzpaQojAOI56)7j zTUp(T_U?Y9&d5R>b@VifLPb?Q4xcQ;tc(PxOugu(p}~S{??6tXk_HkK@OuVPcKjfI z_S}cK`Tnos(TBc*8?IZ80*X?f`qQ7F%3h3l^NU1qBGv?XtTd>s$IdN>DaunJEhn7< z3V%>2-IwIY3Uzq?)op0%7{r3&*&;xbM8>k9;LB!4TT=s$96E}#CFQ8BZJ{7Vf%Mcw zMDc*bX-3)J9jNZoA||^KF=`q(5$UeRcD(u8na=X^zl=usI10?l1g~hQAkWl6|3-=e=$hlfvTnp zFTeN->NQ>z%_&5r;N7o(o(6H7wK%(fJNBQiMP&AD3Uqh^mebfNwzT4zKfZu1+YjJN z4?m25`sP=0{l<-0chyyhQY#S~VTIb#jD6=CVNO_tt5?tU?NeleN8f|D_mm>7@CsbB zGy!dO$8fGojYog-dwllxYjM+cS7GVgB)tC0OE_Ak!RmGEkxX`>beqvrc@jrXpTw%o z8)5duz@^T_qM50P;=!F)i?;Gp_^|kOmOiZan+K3z&cPX7mhdar($%RMs@0-{?kOc7_xn^Q3lbEsh>NfzxNoarksK zBI0t8l@^C^R*EnhFIpPO=04bhwB%$Q-nS1IN=wLQn&42z65lBxu@Wsi^aGt_2Pbjx z*csGUSK!>q<2Zi02DMEZWaefdF`jJ3Yd~Y^dAzmb6#n_gzrt%Typ7EpHd6g!MR}5) z`atcu;nX2C>Uo9X9mpJ-FqFYM_rlmafEQkRMNAl`4^0^I;D81sRufukYq4+New0?# zkp0zSfaIN!6px7LXel5knYye7^t3f%$L<4S^=DaKlL*{%XQY#j^6F304cS&xeGPW) z+=)t}RaV}B?j8#=vXUqOSD~e<8mIT}$JuiiaO(U8IB8TcNP&G_RV(W2+E6enA5oDk z0}qTeIH)MSfCC2)qN;}V*FpnOAT5R3AWz)TV65xxak8lrXx-r`D=j4*okU4lB@Gyz zNKH!>ZzJRO&ut7A9bS6&uPE;_W7hmRh$5ZHDwJjFvFcHId>?ikFQI~FB0Z4?8$yOw zxar!PZ@iCri&mnhr4Q>itVLp6I1R)s*s^OU;^Px={ngje;DrY0gRMAw<{XY3KZnxl zI@GqbiMPupC#6tZu%qnQ0h~K=7$?r0#rgUH8b~IfySW}URaNNG+c1;lpeDYZ#PiuB z2XNx(S)4uJfVSR243%Xlcv1i8)lvdWDys`mZU3z3>CL=OBGC&SAFt=*)!R})?UD|}~8JUQWrRzb# zBn{9!22@>Z)64fa^hLZwJb1 z$o?EaMrtymBUG>%wAj1z1C*RUk7K9LqT8y(?AiIEJ%oWL9|m#i)Oj2@)r>jCv&h%6 zrz8Sy6V9Bi!ilpDNFo287#{_X-Gbh>7VO%05a&uOQC3@zj_y9p$eMv@vQH)GdPf_6 z_3LL~CfobwqmSX9d+)?%@}sk7&ZP3ZNFe^%_AGi0%8wtzu~TPIR#`{Bp&DxP?a3)g z2qQg8lSwXE4FjmHs>BC7_MxJo1+{HmWHVvN;B-m@Or7QAlRKeEm<@$)knEIrBU`sR zJk+My)~OA8t!S;SAm4ZxB^8ZSW;u%TXCj#lU`TroJKCsSoj!jGMuig>E|lTS>GLS3 z_NRYGJhLhYx$whV`log`WE>i8MQw1 zq6C($2K8m9X*LN zB^4wG(oJVCl9Q7V8_RtOw>b})f)(8ztvGsMKhB>kMMX^;`pGw>Qaz)5D|6irBRcEL zNuH;0wzPtLRyFzubx2B15xUWIx8mRj@1d!*3_A`SgENkFV{r=Iw6=GWZnA{ktN4bk z(STa&a}FLQc~;k=tgHcEPXy^XL3&#yRgSzZRY8jr6tBLp?Hqpm%ikg=HF{|EJKwx9 zwNT%-?+Co9g-A-^zJYvQzYd3Y9)QW?LNdh-DvKEh-hUSc^0yrWy*PaMDD{K2C~xRM zeqp|EI1WKQqa}iM19gB>N1RI zD6hsTveok?C9shklF9ZWInLypu@(~=s;jYW=N?qlHKK}qk;!7EI3`ovFz2@Aq&D8s zP=%w%&*04YGPG8f;N9;Y8#qx?AQ^Mls3R@4W~XXLGq78 z%R_NY`2}jHM-CB z8;1UNy!^N4u;suRbQo>W88y%jbfdeqgR*njs76#&G;Y|q9=G!2Nzq|6;PJpi(VJk4 z|JYqPxaC94n==nL-h3^>RTQjHhaVH4MuXfey!-ZE*!YPx0S+t-ICK0se)T_3Vc&rh zF!CK+zQlJa1q4wHbhqGlzkLROc={!n>>g1kDTSTL^r5Mt8qdDIANy$#ImjypXwawW zY9{^%Iozc*Hk{b`J`K$FqR+%7OK7Fw>UY2T9Sth1l!^u<#(sS8{#I-~T7?dSTdXXz zk`5FuvovYN@F+`bEm(Fq;3V(@!BtHdmi|6B?zWQ|>J9HQoC5_^& z!>PdeRvk2?tBsr1BRwV(JtdWJ8hFq{3G7ZB_+UE)dtQ9{p$CwWn@j;C4=$862#mub zvPb5GA8qE<3l$Arq!$bH1|1D5Xn_36UvPNeVQ57~?Qj^IuzSxTy!gR68h8&vuh+q% z>qlo}EnH4YB*aBFOd@&j;29jFfSy+|HP=;Q`&+M~rM_8`t3nNvJDhZ4#9(_ZUU~Ud zoIGF0W*H!IH#+Lh;+H@B30`?;HyZkF(3AXY%g*AZzrKW?F5ayRuu&j>{K!6R-M#}Y zy;>NxeJI`cF8=b|UvQ$j88)X~3Xtt?9HY7{UA-2w7th41)$=K!-OAQ2coI?{vS$gw zN5kQ3PlTX+G?0R=mj)W=@bs1gINdTJ>{HiSi}Sml!_Mv7&`1Im&C6p&ZN&w=^VS;_ z)K}2pP6wN=7y9l-^z~^(`{9XpBMs)u>e|JsGrq&$-Bg3!uRn|ZJNH84Bsr4Y`m8j# zruN}+xM8u`pf&0#Z6D0WUTR1V(z^!=8UXEh_Z__Q{vot$?Zg++YN;bVKab=4kHW-F z$Kyf$>BD&Xw|~N)U)+WBmCewQp1Wy~x#e$9p{BMTCaSK)8?|w54}SUIKf~|;{35jc zf@yI+7*69L4t)3~e*Np;;$&%uuZ40-`jEaIib!PS%|h;sc$A+zN#|KBbk*mx_~WE{ zwRdaK*+X_md|L;a(RlC`yz~02INxT0m4=x*((!;{5Du$`)a`-BO#YGV&qN=mNi#G^ zb$i38je9Ua1Lf0)4&tphUKfw^OKqM&da0{8f!E%A3zba-d!Q+h(D|HRjH(9CuxE&stO%^&fz^osH%d90Kv6g7KPqcVK z$f#?@-YqZU{p}x8uO{oCN~3hu)m8Y@U!FxY7_YT?fgHCs54P=6B)EH{L>BODENV>|me` zn(lUJjMPra53$UwB&U+YCvkz=U?=_VsISGg=l_hRrZ%dpihOYt%nk*$mjU#)H{egd z`5m>J4!^5nP+7HS_v5!u{ShzkDo1^{9tM3s`Rm=-{@%Oj?;E5mM9I zlh#T7_W-IZ&*103dJ5-Ds>L8)O|nv0yHVfRgg#FOHuB?$3KtrxYtYAwkfag`B>x~L z#sW|9`+f$3SBW`IxNy1xufKT+mE>piI%+@tEjaQ1%XsnCchO_^Z3f_RqNb(-&%W|9 z_LkS6i~3xHu@eKm?HK6M3Rbuu(`xl-?(WARuRNnZ-q2Br4_^K)b{{&1ewT`L9fp4D zXEbz8$t#@5J~TS1KQWWvQc^opbDY#wjlH{fp2DvEhtOkJQNOIlfYFKpvH_*j zDW1w06uQyFVx$<0bwe^&*gA0jJoUwUE})v)y}_VIcg+RtfA!C(tf+yWswKh(@*l8x zDL&qctFGRN?7~zbMsC+a^nCNTp$c!k`ZB6pdWLLU+podaH{Zj#a~H_}Qfszph{j*= z>%Y8$ePtc!)${88KD_bbvpBHlpf49`+SG*o`*!1R+e^@@bBVsUx3d-f?M>w8ByY^I zm6l>7ov|GiXSd?mQixEod zD+j1A-txEK- z4vLBPoULc^K;br{zP1@}es~Tg)vcm$)=*!@Pjj%01WV%YbbyS(WkN^A8NB?)796YZ zrS@p1ezFhs$3MX9e|ZKC6id04VKB8f;^{v;gQwoygL4g?qK(#9mI&T!s_My8ksW&s zXfHX0m)`gg=bIhW51P@{(TM;3{U7k*?tO4t^h15Gc!N%OJQmF>#&YWK&mP)`eiDGJ z_e+QAliuGahC8}X5R8kH59j_w!k<9GNK4?}{uBJ;LiBAC0$qXV{iUBW=#bB(q@^G$ zD;w%?d6yCDlsQ1{??X*pFBUIZ2Nex^JDM7C=4?6Icn`zWcvRMu!%R_y2(G9co-^XX zQ6}Gc43k!(*Uyq+xH$p!OTUdt7?Nu=SwT#MQn65 zva+)g6Bkd#$de@jjeyGFySKSnxkyXR5DPLnMiuFJP-5(+KyDAtG-z=1y`RTpPdtey zANw9|-Mk(#QDM^DBc+RSb9k?SLn$8Q5CoJ&Oi6(!PayC>u(iGlufF~|idSyJcb@o> zp!L8P@5jvSRI0C2;)>F%NPgFBz8()h_81=h!FTY;cRr8uk`kOcSq5M1NHX%(Axwqr zoLrKW!aigN5NOG-hnIDZ4$w-wC9gZ!w*qS-$#JT zm&=n7Vi>_jp@>BB!ewHWN^SWCqV1vJ)QZwJ3vy;JM^Sze@{;0UX{o|MdpF!f*P%C~ z^lS-=7S2XoUbN7OWZ1-^msiekX)I+Ilqw}=&09eB^aDKc#1nY*+h0RwqyxLQZ$pof zLTQp~SJf$O+j#&{*-P=2Z+#a}Qo1J|`8Enz?&9$)2@ay&uC-zH`fKnwrTNO2KZ`J@ z9_LOS7LOQt6yeC9y9}TIrw8!;Z+sC&g+*}DpxjS^;B_N{294eAz34Q$aR1l8MZA3v z_uq3nDo&q6OPw@n+TYiM!$%GxAte{XL$*L)EUVGM7o`M3PjWs61^4d3Hf-JcE-Ez+ zeD0gyA)ZK1k9`Ac=Otj@?(H~H-YXtqbDDbb{+4a1ZSKRppZhY&>M{K2vG3y3H(pD6 z@=eOqFot7n-Z$~FC!Z9$dF(r1M^Um0JGQ=$ZW=JoU$GKjd*l&(>4C3ccEL>C_NhDY z$YYOE+K2Jz_a8(?YJynh(BD#zKm7i;NY0syhoAhRXg3dk{maN!+VTD?FQ8?Br-D?} z2CdL&Owc-GantR$i!z^l_*)3G>CoNL5%2;f-kZpa?A#o&QX)`fEEgdg2FWKeF`nAZ z3`9o61YF>s3ZZ*`8|R(3u0zS`4`H+#;Z*V(M5ZVu=G8*c@#(nXmOF@_C-K9_9>V<_ zm!RzY1spox1!qJ!Hr@7Ve1qh7$88%?GB4@jwz zpTqYac?`E)w?VADbd!*W>@Pxv%(_Z-maseHG}FvWpEowPf0^oS~kMe5u&^RQAh{W zK4M~`kylUvb>#3Jt5kG34Cx68$WMvIU~eyabyhI|W=KAi>geIeucPt6gWtd-KOo=t zx|_pxtc{qk9)CzYs^BoQUKrC2PX{w7N4!THlCgkAHbv#ymVz56lqiq)8r zH-p@V5xaKoMOk?>?!NEy@_#uj_*D8IPpz&{N@)>eR8*0UCI-NJVB`+=)xbLeG#hYWIW0?^Y|n9 z{)1n}ssO7%RKF$jX&}!vU~i~^&CD-YabWG$>+x;sD<1#$*P(B&B3aeIK*{*=yG6^_ z;E~6_i|>5n%Sedi6;{J0;Jglmdvxd>(83fp19#l}IpXV4Tz}nK>Kjg@tFuM$?J@P? zjTioe#O&Gl+7r}&KkR<6UMxdl*qtZ?*opuM#dS#wsQklJ)Xd^oz= z+R@l+^$)HC{Z0S^*%Dx9CQfpC>ElmhgKe+{Z@;%4O+COppL>9G^BC#o8@PGRV*Ks7 z7f@N(P3b&h)%5%Ce~9XqF5Lgkhw$(Z9wq+2h1+i3gb1blXeF;V3d5|0)JJ^%8$vh# z{OE&7izFYjeJ8riKvvNlJoNYv@%8V02ia*!)TVF8_sJGG?So%^0L6vb|@D?P8BOeiZr8p1nA7(QFTB7w0WXF5(0m-?|2xOA2J@~yR@byQ(hpgl{Tz}JTcvR@-QGDycuVMboJfdZS zxvN^-5HLk%;HwWmLf5HZdiX1F4b)@*;REQDUpPgvfQgiT>S#GOU%y$bBnzr7wZkZn zjoOnJPKt44pU5$v%0umwzIk3)MRuxmn^4)*jjZC8c<8&|Cwq7Va|*Mt?cG=5CZEF- ztZscfjvhM%YeXi#^pM2Y=RR`}ax;^OrW9qXB4enpxP#jIgShv$H5Ajvz)mri zn;xdV>*nj$h<*QLjAAJhhLoRbYBS)2H($Z&iaOl(rLW-;D({IW$iIE{Q`q(X->~D* zDbk;k>gN=XM>lEQSbfb#lKGQ(@XKF-y}t{k=g&}Izz))Y!ymqmj$R8s{lNFBj{l4& zi1&rduAp`@SL_MR@v!6{$fJ{<(~^>KWnmh0-R|^w2(@l2v|lJE9Z|7xZV?~ z`$W_IBjXbpeBAJH)yVjK`BO;a80i+_C-}#NG_NudP|&VO%)m{b`#K)~{x@+&QKA@_ ziY|u+>AV8U>ZA}lEF8^c)!4uD0D6p(Sh8$At}31bn`Qt`osl0E@F_8Ta52~`=2Oa`aBYd_RwI@5a*5@$A--taphGDMc~MMrDUWcD=UKr z({w;`;FmWr0y}ImFcT-lE?78LQ4WX7iyJqt6`MedNiUI?29mz~P&$#9&ktugQD0wA z1OGPIyfFxmj6+{aM%ztN8wRAHr30 zl6|`2@*L>xZXx+cAT>?e6EN^I9HGwp>lg}@ngUv$WKhw-%%JHL`xo|fbfB?OhbynV z5z*nPC@RPx8)-mAEe)F83h=&peL6kT($f$_5u3kGOpgaX^6zl*sA54{7+PveQCC|h zR%XQ~MZs*fz+fiEoC;JOIYV@!al@x>Mn*=w)JW*h-1)@_5BEO=7ZnzU&))VatY12p zsH+eg7mK8{NObkJi3vl=adK|{#gi1EC_L~LvZn;HCY1uo6lf+# zM2NRgv#w1B8oU^7bUsb&kHeUenMQh!6nkqibsjI5H?^VLrNE*EbIDR&STj2nx{i9B zuez8w3JU@H{`x>=ntJ+CdgeSfU40!2bJ+%Iph_}WdF67_Uj(WuTcL0{pzW-{8T!3w z^FLw!l4Xb%>?@F%oQ{IrEMFT_Ba!mX%!-tBQ&)>Y@)-$9(Xdb(HW*zJ@4iN^;K~vL zWhtFe;>?9|boTY(^I!P1Z{JiU(o&Le$NI(4^t7Y0tsh?EkL8+_oq=obxefCc=0Q!m zNtuy{yTAN(%;TF3lv8pwYJ_DL;X6ug_4a zT-3kJL0elR@zj92#x8`%q|$j6`g*#3^<;DMCWH3y#5d96E6VkuwV5q&Cu0Ux)gpW)v-4 zjP}NQ^fkAVkFlboqZP?nSs*tpAo1ZZktL=ee^p5G^Lhr*P+x_5@>gE33K5a9Xzv*m zla@*tQCnS#bIoSl_0@-wnVBqTDaoJkW2%|Syo0PXl@y(njCHr%f-6?eLo`(>H8BG5 zksjzs?{-qNT8L9oZvHuY$!b30lU*o7UPA>Vn%0o z7qW8m#f<>Ym!6u6xafFz9KP3Vke}*p9z<_zH`cCPgs2!d^5&*MXEmUrrbX~H0<0qX z8o?Gr2vPhY3{PP~U;8;!HP>Om)i)xyC|Bx>!_>I>u1_P}B;|TNGpd0ANu6?Dr z>(gJs+GX>F{jz^gNlr#yp8rN$R$40J!<6V~X%T%%8~L2f+&pylYhZTxI1tQAGLu&l zbAI{bw!3hls!8mpxn|uO#KlP7Fuf=ptJlpJE1moMi5Ok?kTOyVa`B~y?n7cCait7H z+N?!*;2RI2Ff-0SSwZbOv2Z26`1uF$@MmtpoSZa1I}GG=)G4^-{s-~oW8cPAiz%-1 zT@g^Y5gV^UZcZwi$cF3cTF|9)AtEY4?Db|g8hr`Y)JA; zNk#%W|BlvX)Yh?XdXbi$gC30zCh7=>Y@HM$bVI)bEku~=72wZ z>zi98os+7}zr=a3+l|ilZloloBQ8Fkk}D7!6OHuD3|L*{<0utVa8T?Ro|=c7Zo3H! z=1?0@y2az&ygiV|q(PX=2u)WL-rKzwi`HzQ*s}}u#8bQ8NNpk)U3Jy4^tHp%UxkL& zI;6~AhJ2Efn11Ivi;^VHCJ)Lt(oqoeefjKuHQgo2kyW4vbg$u#O1sbl4h)5}ZFWB=Yg6o*G)<=V}- za@ATycfK4Eh^j zjefJ`lkz73ysrv=^LIh5v{KOH;h5V8;y@9Jg4wfh$K5w# z{`|tB)pLHb{38;os6^a#*Dct1&2kECy_8c5%{UscYYph@=tjZJLOvtT3qE}`M5suC zql~~fM+z~-4+=t+Xzd?Faz+M{Xb_?H1$_*jU?9%v7w@BFLSxlgY}xV--g<8fwrtyl zZMzPM$DhL}NC~{3EJkiINx0+Q&*1tE(i=cS8h|$lwVb@!{Vhf&AfPpD+l)kH^w)3Lg1|Ic~QXaj~R*>mZtH%1~F|2CG$p zs>&v@zm&z=gkxupp+oBuo5%ADYOEHkG@;^yzeEQ8qJXcbtr=UkzJs^leG6N+Zo`h9 z+tJim2_*@_%QdCJY+qA5;$!0wmn2P<$~qZh+eeu9iJMKqqHLeM3eBT*(8m=Jn2!ev z{yhl@3rj*;8Z`vUCH*7wB{_$AS?4r(i-|!bwZE#ea$GoDCiV(EQc{Zwf7^clAa+U!6aaw;S&e2GX#kc-GGIFrS4k%E z35j9?RHrq=>!1O6Z!0X`NJL~T;yG(yTj%!18-(*Osw3HIe`hN061n@Hi1zNiLkVbNekC)!eF6>REL}q*h>CBBDEe&?5Eif5XG%*w>?KH@% zQ+OrWH8!>ZP{tg57LF`H6FQf1Htsmmx@iSr{PL)SmEh~Xb zX2H-mo#WGz)l359@1jx*v0?Rc7@NI^1&OYNRGc`E(#b@W_5h6@$Xr z3-Ptj-GjW;xR8Fx$#0v?$rIZObk^0wpwWs|I9yBaO3Y>rP952U*I#=L?{D3KZQJ%> z-@#+#gl*JzTtj7Y=`3BIaN&h>P&qQUdFt?%Ol^2z&8p26J0Y(%-G2c5&i!^oeR!8;sC z2;fT;OoMfs36&KW@ZOfU@!osy;KQw3#eR)?jb2O~O0c1|sR_!20>orUk3sUsOKpqm zEK&&{0z|~cAu276=g7orQ+_f)T1P2aPLM?Wl^+3?3{UQ7B*jbgxouM4?^W2PZ_=hR zDjpe`xj1?BFe-T8-O73#A>Wjlk%{HY7Ly%&;kG$ZTiuGbh6Yk*FRH3aVW7eMfKi8Y zr%pqovy9|xJP_ah^ztwMw!^G%hmzV}M0%F=lr~@D7cwM~?~RYgU`sPd=rCwC=_qMI5ZK#KX+E5S=KD!ImC-7tP z8od+Y5z$DD4|vi-2XukzZ0KWZ+UAr_mghkp}u4bc5T~&>bh#!U1I7dKo4xU{v+{A zPT=yu&^3UL<`z`clw<1$dx+LnY~Q{eeH3d%Qj8$Sv|PB}?4rIu644}YmgQ)*^`DpG zy})BW;JW9{$V4=moTL-BJD$~|zJhIuYOPMp!KTeOlJ8XG@Q%0e=4&tDg_mE&iSkAm z?L4W-kSoFn#SKM~#Q%(pY&6$54efz_zM=s-vlUk^oevJdm;$ea4@=BILUKH!!-R0? zJnwnyLAaMEv~B3?Y(t;Xf(x~c)E+)0f4q%i(SvAcZl(S(5s@Av9Of=+A2vj%Md zDaD4eB>V`)ZD<76aX1t(ne5OR?MNfP!@E2Py5ysvcB7~$51sA392SbSF;S7oq4veS z8QT=cuPSPH{LRrfRBGhp7oxVNl47uOv9IUp(`TX8>2Te3*YbT{iM|g~SrlIIez$Hn z#jLVl7>no22j(x3-+v^0JS6=02w8HUNbhoDREd9FRWY0=-|;`HKlua_{4t^oxtAN! zosdPi?dWf9Kph1aS{i3aAtU#~;fP60MixJ=vU3aK^9yjpU3XwsUNWqfK4@(kWanmx zJH;H}2~g0$wCX7s8>9|f8aHuy6eUq4!H-W2&;Zqc-;oD3(!fSWAa@}VsX|&-8a||f zih%}vH*LHc*Q{JZjGNHY-HOE26h!$}cyRE<0hwN_MMF(9Gy{53b}*=P3f$aoE2`@o z(b(P%503(5P%x+DHz7x;p)r_6{^3%T;5!po89Fc<^dhj}2}Qp18l{Y(0E>c9j~MRK zH5&AoE#N0Aol2IZ2Yp@bG?;u3)r~EP&&*R2a{u(IB*; zx)Kc?eZHY$2)Jml(B43SUNZ*l^1drzX%>}H!3e2gN1_4Bd3m1!2SVFpEypk z8bn!DBg&feh>VOwPHG|=8(YzDknM69D4>P06+5=Pi#HB7K%G&H?7RZxWoKeWLIe%A zsFuttRg+(SVz!g4X)rg^27)nMw;a&RRSqRvJ|}mC&h4i)hgjzk*JiIp6_YeV_@gqRWFAJU-!Pp*)^YOE?pT}zi(sTowJ z3~D#}u4dFXHlo)e@3|ES)}|T`MIz=dz6x%AF9w@y$m8+t0n)KYkG7^J{PC6DXdiTv z4D!fEvyhh+kN7CHXiI*cR8WUS!)c>>P(iHM;I=d#xCYb)6_Hr8Y#|~X188ZiL`PpA zj+V7x?MBU9@Zql(8 zt?eCRKRtdMtOQBY;&FJEC$&etrVpi+wJ-$ks~7;5l_D$w^X4vwabOS~on3HJey>up zwueG=l+kdvK@T=K^`c{4F<_Iy14 zy>HMUH3PigLRL-=y4zYY*kksUF^nhjcZTNEc>TR?IMv}s?2IDha(IrT(KBvsl&bx zcVNr5!#H@N8ux$cKFppsgEG5d)c50DWhoqXBTA0&gzt9jJ=+F@!GVU-6KJK_fXDKo zY$G&84aFz0?h;gp2#-TlOf1=tluG*Rp*CcL%}Fv&Nh8)(q`z>e$;OPD{t$EsUMoDN zR(!bUAl^CNN)9Ijd3m|iR#Pw|MJ@6Ufitr&jESWFM6z?KYzZnw7~<7Y2v@18S1$RYjD%RklTT2K3B$me1S62eK*)T`*TpycvIt2x?AWRK z`rr|LiOu0gOj0^{iw_0KDT-t=s2vb*@Eys46<%{Yb{{y3Hx4(!9Z9;Oc9We;x=9Tq zIrD_J4En{oiH(BMY4^ts0ZfLI?A7t{2+znv2Km0++&p9z%)$EW@4)A7y#e!xW$Cse z#kLf4@eYB`ppBzP!e2hg*=B{)?iiAXxJVzF1qs|r=^xnuUuM4us@Li@oAJ#DzKWZ! zU5C^t2QHjHinl-5Dqboi_N^**dg^K#5)Rat;Qx5qn>cdG^Yp=Qr58Qt{($f+VM*XjXY}?KK zmPCfk)kE>J1$L5~d}`ugCW06d5NC*3RG1p^;gO;&zS$(ej5}2@YK=%vl~*eSw@m4m zOq=DeAbX1-_Yg;MOg~N>+J>zke25Qs9>#`sS7X!F>!dP=gN)fpF+KNt{PuJyjZF9B zfyvAK;xQd%jz#0rN|;OjD5Mq&)6Iu0xrs_|DzccW{HD9E(}l6?Gf}QbmG}fyF*V7A z&K2q`pOZfV!+Dyhxdb3|=^tkVNnir_d#RyZukrZ}J%1n&!UC)Y?0Ms5{O0$6MqQ^F zPC=i8*$5<3u)S&RQuK8h-Fu#kc19tTUHaNTXe`7>wn)UTeykt1he zbq}8~;`Pv48>%a$WCmV!5ujgK=4UKeb=?fAnl{s+&$z8y3!8&*FF z{NjW|@4tpWJo7Bd+YEl)cw8yxJI*%tA|Wjsi;A-8lwI&E zQLI`$BLYdu zIpXcA+U_o~XoA6Vm2}97S%;F|+I*J*8}9s2gH}lfD?!J;3vQQ9nSOHi=`_;;GwLC`j%Nm`G^g7iwO`DuhI~Y z^?Olz{w(UMD{=dscj2CU@4+3P`ZVUuoJGEe-;^yED#EPA%%U7;~%F=_A`}abtH;OXY8m-ih^yEkR zEvP*$Whf~vN8y6ig2t!sx(5r%_a#L0+kB3~1|WLuq>-uK&~>SiNLE<`pkQ@v`f1%jV69Ghui7ON9F1FsPEz^?DbkyEGPj!DI>cioG7 z?z;!;u3n4W>@=pl+5e)N zUtA&1OJ%VCFb`NDt)lhxbNRi+)5!mzWk; zMdFr^*c*~=gyxh(x)Eh6qL7p^8}TvG=xsa)lhz>RBi&S%RZ|@1#*)>m$W=ukA~FWc zub78}hjx)~=@$7ptPm_k7GUA}8}X^T?!vsh1hHy$JcxdT{B&k^KIRnWpdc$5cieFswTaKt=kw(I?!%@{ z>kyrojEL9_M21De(A5NoiEq9u&{p4!@)~|4G1*9H5rhEOjkUz}qfFv10LPTljS{XS z_jxgiNhqE>hhl*em_^&=i)!@I_@%U>88>WRk4Ul7QCMOy1TTJuY!tuqn~|?xdnMMb zUWDR#^RQy|I+Fbtsa+)cQU!wgT^HE{mFrc7AvrEm^05IlKQ0Ke)k_SytcxjgQZz1S z<)(ru^T!$G&lnhEb{P~zdus58~Vf+p|RQ)@__j?>raH#9Sb5H#m?;Wef zs_XB;>>LV^I3T8OkY6T|9N|WFWhvANsW2K0XsE5mp%dqkm|u(=Z@OMQ8p)~oW9e9zM@PEL$((8@((!h%M9cpQ4gW6$(l7iF@8o=(|bq1|{W<2oq zFC#B4-uEbv9Ui+D`*t12U;pwRG759CWX*CKd`f#1N}!;^Yea8LBmVU#e-dx6y#M~Y zs4TG)3w=nsBnLCaF~`rGg~x2AINy%;jvlmknMAOdm>3JC!h*rRHX10mBRVn(c4HsT zQ-FK&L@nxCd-1>{50b2uNObFP{P1DeRPk^)l&CN7z=^UlXuUdIF>fwb&RYm`UkmEa zAB86(j^ge_^tH9)%<;3>edru!EnI>%YgZ6|KB;>gFf>=<@gMysx;-hl_4Zql6d}dU zfv^(Iw_f-I-Z@f@`RhK7`GxU5-KY>16Neb34VCAQp}kuRb!4=d>}>1oL9gCI1M_%9 z(K#LIqO+zNk)%tbQH%EG792l$0jl^6T(j|NvheuI{H>OxnaR!lO?NKd6Piw37om7%}OhPnAU=;`i46+c41e-G?YnOM2` zPUIv~9m5qUYiLJPV?Cm2B&{7BKs(t@SC1YUS!p!>BRkM^lW`OiV~1Sw*N+XsvC=JMX=R zuRZoK=_-uu(F$#^4!gG0H@i6h6|k!>5~&m{owyo7W*DPDOQ6!#dcF=8}VAA5(sFV|!O8I{Ij!M72muN`S&; zz}b@}=x#M(K~VvEdb&|laRK{y!+BLAR^MuiOzs&P>WvD zPkc%e!fEuD5+5esSl!s#NwSWjYb~g#ZNz~KEm*W_4X#^%1tQ%BbeuYf11(z2yzUMx z$tHgxY+Uk@++v2n6NY`wc=(aW(O`(coiqSSrUoJT1=*vL2hnz7#~aV$_&d!JgmKH8Tl0tvf?7}+AFW%!k`UV`9-*S-BsxA?#21iQnHOG zs*3_0%~d#k<}mi3>A{>OH(=e$Opy6dvD9tTfO_xY<8a!{VqeJ4u5JvN$Oon-lboEe z(!jK`qzvs%%|zoYs+*gUJF7_8o>vu(ShAroUV0Jcr1ovYfkR~|FYCaH_3Oy)<|B?| zDINq@D#_-faQwt+sK~dutwwZobknC7Myn0c35jCDUHB2N8K#~l{Nc~<;qb{uT)S~C za&lAYTHyLe8{izkJ8!*>-@myJ%T}$$l?$YOzZg35>n9EzM0iRnuH3vCDQY{EmUf&y zU4>I;>alv$btqmi8^)$mRGvMK>^X~}Gg;71wt8^?Nt`Tg!1_(sV$q_Rq!un!1%*wA zwz9)$rNOj2It3lQ{V>|yV!y74Fa`7jt*Ea$EA~9wxBoc0OiJXFPWrogkQh%sjcizi zN)ZuoQU6l5`&}5#RQH*yaNRX)NLE~XC6sFFHH|NR$Pna>sBIOp++qEjDf~dG*z8JSWE(X1~q6U-!`Z>k$;XyVzdk9t}^T|9Y8|S zdR$qYf*9`0%!Ane-aAN}zXo%uZ70*f5n(>u$`Mw^-bC(HY2Jr6oh{ zGNa}E36yu}uw>KS$V^m{@AIOexdtWW=W)gIHRNv!VAFMC@80cLxb|9P6%-;NHUg2+ zYIG8B3Xc=1;W`{Vb^(WKta#|r$FY9R3d}2>i{j#Wn3Wli`lCBA=n6x@%B!Ka4x+XC z1ln6jzXwmEUGG6|UJ(X6yGb_*h$h{5T^4k;RiVDR6b;pNIB>2W-l$Y0ge&199q`LN zV&W2E&a>Ujbb152S~{?O$3Dbn7vYZk?m!&-UnRe^r3>dSoPypH35(rDy6GYR zuZ2^gLV9vMoYV%+msP`H)F3?}oapwVsr)SVA3A{+Qygx-^Il}d@Km*v%+HGc-aZ^T zdMI;juki8S7Sv!sN4b5$3vx$zb(U67AxuLehU z??R;}9G|}bZX}UeOA}QgK>krve+IvO>3y8*c2K{01qw2fd>4J-8(B^bUVr8pJoENp zY`EbLES#0@YlBL7Yk%Qmoa3nhWLFbMP$es#vVOdKEG=6JawAqOrOJ ztxa_}bGj7gYI;#HYXNivePX}gc&eYmZAEu`HR`G^ki9ox-|1?&Ba)F66%Gf*bMdhW zq?-hobUJiaodQ22swaEvYU{wRT?Y`6mV-}`ZW76U!W3@ubv7J6dVqYLhvH5(x=42I z6dPJe)S0PCaM{f$spLIydqq1k>G~+vIFEzmD;sq&*nHc4$W36~xT&ht*A5I~-@fB0 z$S#1Pzen6)=0{KBqN1S=Q=xm%DEdOR%LSuu5bbT``#Sre2#XiRV7F_gdjmC1d@H zrN~KV-DuEMTZ)#(TAV#winG-{$Ss%;qqYxW)L$pYk{58=aO~(oim@ZGeDyl=hf#>3 zfY@RpKW_6PJ}w?=;-R&@5f@6&V&43vm^G`IY>)di6JC972etXNn7^=?+Bfw&+BQ^` z9VgpPK%Yi~PKpiLhD_wslBu5vqxiV9ll&9KL_8YMQr}&6?g-Vb9>&-_tlD%7@~G34 zS8a|CC%kqYI@;S&T~m+BstTMtf1YBJShBk)SRHOe#wJoMNHGI-T1QWxhgum)eWF&h zv!ln$(9mPVS0DU3a^k|^Y^}uBL#Hw8>f2FVm@3LQ_EG$O;27d5wp@JG3Pjrmaqi$Q zM5Gp=XyFQ^Bv7nGG91+D#Z8`czPU;H{QaDV;$ZTz6w?|M;kfa(Pa&E3k(M@$27f$p zIdM5llgql8GABjjl2vY^ghxb_!I%Vp!Ib1*;}GD>fk}oRPZ=_i$r6(pm`uJg%9;$B zX+Urb6{CLoOGjg{cCjpmwjhZUIkKKy8KF4~eKz=b__e8m=}Z@`$s)8o$3h4Rj7dPg zICd$M=VKbd1@rhwFfj_h!wyGI7)mxMAuzERpxV&d&;U$`Xu4-VAqLK3XEqLFTxEstmVDA{egly9BCOdXpHR@lTkcatZ!_GLKTHM zbBhoc7fIKgNK8vaN0$z*jl@q&9b)6-uw?O7(9pp5Q}^CN0~a7AS|Ng;!)L3|)1?$$6u7tb(Ez9gGwHm=twb@&YRTfcV&Xx98w$IL zg4QM!EWH{VuU!F(m3-RsL*X)^r>zODI11pdyA4I@;Uf6uJ?9jvFcjscAS*Rd=()R> zbyJJ>&Q7Ff^W)HYh>D6qMsgaIHY3W)FQ9?s-av!-=(uETyx}Gk&!quSm>Ig>E>t&b zN#163Hr1k`p$=Yk3_eAJtqtqfBAf>DH8nMe;CwgUgeVF&g|5654AQ`^Ut@!V2K|c{ z6ro^dK4#_Q!Q9t_I^w;ywM9(QnMj^XX)v4^7Y${Y2T2L(sH-4b8qlJnyA@HAmabZhlvssWNtK$Jh4$t~v{4}2+(fjRx-o0cJmlo0i$~79RJU^#wODn-oyer% zoNe1-r$K9XC(=^0uw>D4(u*Cn=TGC%p(ChiYJ<_rt5Jczz5y{Hnm1=AVk5($>(|nN zy8(CJe-8~FIiPe=u+MMA?83?`mmn)Ih0+^vw!8*4jVNR5v`MQa}#JNnVp+)KLZ#DXRB z5liiX-%B%-?69hqCqC-Y)L0`{+00zJ7B^gf4YCqxs6vBSLuVU0J&~BR=32~6jTD0f z+5O2aMt}y_>~GGMR3V23ayQ<%UbGu&7epBfDBTtebT-2nmX77u+=1fUXx~J;iw0&c zlvcK&xL9hFYL!P!YH*~%z*WquR+nwY~&?Hi5DZ)wDqHz z2JyXZ9ay$(6$Z^VtXa1TvkGSrHR7KJ*s&1`M0!l9rM{uIeURE)JG?ZAE6C2Gx~tLN zP>F+kcA&Ph9({T{++i`$lQ7$x+fY1v4pI_hsVNaHl^PM`f6VQ*uqdK%#SQnNINx_e zg;x=)!jYXGizwpxLQNy8$@e!=Tdi+sgwEo|qNVeprgCFQZ*z0wsVkwu?D+f6-h-QX7g255v6NB$8(r zO6tkiQhMH;KPib{5LJX2wF?eI51KV9%$T(T3kAO(>c>b=~hu8QW&MvTXaeeb`8 zvnAzdXz7I0&H7TJy<-qYvjqzl7mEone&eUk8-@9+uSHrcwMj}fsOc9kkXo{MIr+m3 zvC6BY^gQaR-L+Bw-P}-%Mrw;CW##0zDloTT2I@NWP^8SmrcGCZ-}NE2IR)|SfxV{{ z8bu5W7O#O*(~6UOwxYVc7CkyE+^T5k$>+6@4bGoiEOg_xn^Ah^2#y>$NVd=iolOY` z@juW#Koph8&&@?lY&_{E2@Xvc`G$JbHnyUvt`R9|S-ADyd&##aQ(bsJW}q-T0la76 z(evaxx-?W)D@zQ;3+9QpxrUQZi;9k+HcEPFp?-p5os`4`#3rW08> z{iXgQIXRVViDII*Ry0%E9`fO7*)x$rZ6lU^8Bb=mlP{>HcHh+0gziBLR;<4s>nRRN zCE+MN7QurxG83E0hw-DL^27u011xM)hSvtARfD=d8)9bZhxDDN?s;j4dj^tF|hy;@7E%)4myaMT_9Q)kN^f;1NJWgGxMHlhcL~YAr zwPDV@`QokTY6{3$H~sxxXl-hscGH4{xHyUhNjI^%SiAmeF_|mr#*Ng}bn?U96pyx| zh0=EQ4j?V3h;);Hi0CNtUm4VA4v_tolI|!LsBb}IV+T^QGLgnF9wU({z2wuvt*}!Y zc<(?duDW^yQaR=j(xi{Vjd1Fl_(n}tMI*(9ZPeBcm^Et&qGD4qXX$LrDb7cvgW6|@AvZtG|fPuOhQcoUUJagf`IzV03zIee7-Vh!pVo1j!D;F{}h!s6lr zL=#i`fo>c-up1@kE}*@K-((*}{fQl|WM}DVDJUYJ>!$Lm8=G;l~oy3BN>WF%7jp{0I#5cB3OL2h=Qh*7vtE2I8;=@qMxPw|(EI{P_!NiZArI8j=I zX7WQ#b#)XobVC`HgrdS6iWwqE7YWeybWp!ljgHQ4YMbdOnmrTBq&zHMwgOqCXc2FY z1-^-A*5Um5QtUl=l=`H0Sgm&QmmYL?_rV($g{+(c#FGtYWTwI+RuY$>h1ynKLlX=R zHSYfG{S-USL^!nrkA46(U0R9-t`n=nxqRN{p_BTQtl4>(v#=Oyiw2#wl{nkjht>fz z#fA;!v+GdD`$>}?3g$0FLQ=eFmkOSccbQRDQ;#i2E?~vlYjD-dCE(o!X20^Dk`Kw7Y$T-a>u~~@M@T>Rtox6 zViIl%4bJ%~W}lcueo)Y-9WbG*Q%k{MD)F8qI(L5u&$V(;GAGKaI}jO@fSE;E;tsP! zgRYB5x&V#NNCWsr3aXqmFjdolI}3SvGoTLRrtJ{2YHjXCZ@->`83(envyq&bh1$9X z%v(Aa5yTr$QW$B3Tvppc6xB#cNFrIzK#!r92EY^$(SVTYm?$uAqQKc`_fQb5ph4v< z^!Dix9UF%kR1Y2i2{3OCGb*a;p-RXm8D$Pl;`%{#pa8$Gy&3&>;yZs1l4-0VRvL+v zlAb*-(oI=C>=ZQe<4IvOfXXYJNrQ+4k}xkr@LX(l z@4p~hBir+M(b&`qt%e3QyoaPR9K|#s;R#>yk`8_$L3J$!lZ(YUwp|AW-`%~PP^rQ( zV@4j0MO^6ZXhT;|7hFWct%xAbX?U&hij_^p6eLH7t6|jYQD5JHg)5d(UY>Yy!N!4o zV;8b#&>%F*hPOUAfVNUr-oIDpef&*I6SJc`Ja7^G#*L`rg! zZ*avRy-~V>zJ7F*TxJ#*i+!P5Yio&Legw!17uhGj1yM;2qLAdA$uD1V+h}0a2!qZb zJyI$r3`s)@6$Qm9#Csvx8t+%I6 zPjlTL)ots*zuiiKs}G&kh(&i|xTC_KB_T&|>bKVx+|IR1L!mhsABkv=WRomGIO)Oaz_>uPOAd#?^IuL@D*=;qH~gxbayk}(bDDUC#t z$|jq!>H5*w-V3eRd|rvv)HLL0XNi~4Xa{=GM*We~;S_y@L&;NWVZv|BpEnnYaWOt! zd&o}BVlPpxgJ={jL~;ztRFuSEc{$*qL34F;hj??Tv~31RH%X+MIn-{LAJUh{0XJhI&M#WFs*>4UuH$wdJMI zTg-HoHP6-|={YVT7IVq2)MP8B-VXGeTu7$AL28p;==2)&4fKgOGsnlqV9z_R;OVFS zfKPqx5sDS4&nBI#seQCmp2Pn>{TDp(i$9S+oePyG0yA@yg#0A^6R2%!I$F?c15)#6 zL1ERRySWy2yFExZtdE7{+xgA+9c1kXHoWzU6`if*&unVUB0cc*pXA>|fc>LQkA^NSEUI`E zWoICE=%%3$qKRxLm_<+bWi-$J@J^2>1)S#uHS0|n2Z7_HrS4lUiNl7Tk z%b_+X=|wx&g1QgDkg~)J< zuZVY>UDAzKqY=8v%gF%0u&vkN!HoPu#FGOLlXXMoG&WE@2Q)%A;gM0I4W=f=iCBOt zMtyiMIyyVy=8bWQ3*Lh{kNjS0T8ghdbDv{CcS8d{|KLAk(=DIIgAaX8+!$bKC`HU- zhf!xCJLsaA)`o<{7>bGV#iPZM@evgJBtq>ZKUY_ah_qZJrlm@gk;GM358IgzbLTD~ zpF}4qzA@>^_fy>9;=Q4X$%K>)WK&-j?bD;pq!*LfM)F00x?xKwqW(EMGZLMo-}}Dt zBP?IB3isW*NywKsY-p}-#rwcS#p)YfiHB?zT>-1 z+=OIel3!xw)^y{chabbH@Ba+n`}P+=Q8~ihW;lC!KjQ7!-)hIdJ^3vx&ShUL`#H8W zhIDjkF|o3wn|X}O!PCUS++Qq5ms8p!5nO|@>vJ(zgCW`HKxXYr#5YS`7!m{k1}D%r(~iNBg|B|^SxY6h`^tMB9g{q|?La^00;AUS@pJ$)G6mL`YGsu?b9!Ug#` z3#@Bse)+6Sd}IL=k=5ko8Jd&fkhx;WIT#~y1tgpp<%vV8o9{qiIOQ=pn-V5OF`&*L zHKtzz(}BrYf;6G??5}@@mtT4Tzj`%iOYhi)K$*Im=vqS>K3Z((fE=5?Ns;;22Uh9 zs9Uz-g=hYRSKi!;uRZ!7xcQb%h@uX8Xd?EKLaK%I*Ee|KgHV;?NoB82tlC|3%UAL9 z|M?ZZcK@x?grUodY`nBAG@9 zQwhYyQ44_AgY$de#g88Q4nFhbFR}T)&mn~zSGdQHuF5mmy!mEa`{=Lm!^gjdbe5q> zgLAuf;F)*#WAU9|#~1Iq3aP}5pP%Wg^Z1aZGI}|ZkCFHZ9QMjN8AE4fwl5RL=YD)l z2aO3Mq|hXc&&Tv}Iv%Bolr&nJ>ErV65M25HlNAqv>6FFarCgb5`Bsa>{KvZr62B6c zljn4lJq%?+6DJrLH!wSliR6~k${3j^G&MdNkN(Isli`dIkZdHa>BVSq7h^iho!Eqg zR9h*P)D$F4Cnup1s3F1T7%G!bO@=F~{!snLlLZ$&8QSARs*EIAma9yC_;&!r+bq1$ z3>wfWh8!dVa0-+|QlC~o>DmF4F59gK--X{4Ph>AwQA z3RBV7)r+^^J%sD8zXb_#>M`wE(rh3`p7CRJ`AO%P_W=Hbk_F@ylXPx)oNiQ?Rl=kj zAlAKTsINhNMF|d`t3lw-!lzXD2ri@^Hk0^f%;P&mOc9{^OD1Tjo>nI;d z2-H9@INfO1CM&n`t}{kUGm*T*5zO7C!R099(h`>uY!PyyW9xrvxG0*_$;>2aT?$!D z9ufy~i~Z>NnJAdk$#Xl#Bgw-tarMXqCz6`eO7pYq(xvau z`NZ@W69BnHMqojsou7X2LnT2raA{OFE{1AMKP1&ojFGWz!B1m6oAFZP!Z@|jyZo>Q-apF-Pe_rYLsQg}Lxn68>m#U5VIb&QL{fnVA zA)U$Kf+ktO5N~73ag-WNw7gN$$+`>HphFUvrWSzwyv_kT-K4Vj~q$IrS(ht;3rioWO(M_$SQBj1`lGmuHpfR3WV~ zWDFmHU4D$F?~B2upg9UgGpeBTGCvG|@tGL1RcmHLQ%LF1Qq1RD5pb#lQM#n zPlCW`2p|!+sEP!+7J;=1wviFVOtI`SX?|SctLucSJX)eD z$}0%dLF2N6nU*nm<|jB|$ejs?Qe43FOGuXcaQRO>_?W5?ab?S9KKWpFY{*zci+Jf z#-e&DYbD{*mvFHNL628AIp28cE)JuY5_pAwOS}u#M@4Wu z8ilq@Y)lN#8+h#hE&q}s=2u3cJ-If{2uvI5++=8u2l827Y0CuOW7O?s|G_>>fyjtx zBqSw>qPZkd+7!#36u)E0a$*=oX7u4RQacc*=u7%BeexU*E>+^kD{bCJg-YL}mqrmB zvnbL@`H4IF_T`u)uTts$^2V*JR5<^jkCblc)W-&X-u#dzQpWOde4Qb+V4jF`)0YAR z0~W+5Bp}@PEU!P^cnIc$<;3S$zT?su$~s<3zRtXQ;dXj3IH-ZkBgGZG=dUU<7EuXF zh@_x`pIVqG#w~TYorucON1T(+OKJF5FqL51@|a|@({K_M$@aLWeC($(x*bzGFYjM& z#h~5}yNl9@ILrg3Qi=483??XellIVb!pQr)ro@Ro&KaSoO7*8JAuoo8uLkrPzuyfF ziNeT2eY{KnV^LsCjjxGeYGq6$*XU_N+r`D!mh(?MuUr}r@j*_Mb<3;4c1j~9yuMKmF)<)_dd;3@Pwfs(^t`n@;$yl zM)Ef?AdY0{Ak;6XpHZls@z2(hU+4=)(} z0-|x7)+E8jFs6sBOiji#Gdl@zhbfmoVJ`ex#>Pa`jSJogo``v*-7k55N7_CxBR zZxW)1B&g)~T1#~wQUCE9@+gqYVFczM?fld+UVkf>I6b9ZZiJTC@H$Poj+}9Hs{p;B76#RZ`>D3<$_Wcv(eadzH*n&JBRZ6m9@@~SO zr7IGGYZ_?pep!qy75OR`6x3GuLZ~bu&W~LJLF5@Ki7;wPfeFRQqwyC4GG=7KRHeU6%Y;uy7j-}eX`pGJ(u{pj_dNS%kcCrpI*iO`%ngvkYv9-q6( z@z1Hn%hyDkpy;1I9MZ9c^H?A-4w2+4|E8qkfGPN2fXB}e@DaBq`2~VQ5QRxUrGr3> z&ueg=iKPGNF|oE7jH%?g7|2o`yF@wOVDJm)_%&=&p5U<{#T6XOQd}YYBc11*Y!A}7 za+(liOVfcVjnXCvmHRqH1f=JLVBZ{)OkyH z2?L#6C~$43)*eTxiI1Rsb;6`M!=w^3MrIi8irfSO&yQaur;t*S`G_-Pr~kNOG^sG? znz&mg-aRQ&gs$UQGy)-~lQC9$wpS0zs%v2|nQ_IU#fXWFnqXt)a(s@5e~y=&5m1HU zW$-1Vt^<`NM_~0Pqj1@J%t(y(ZEQ~EP!Lg5ejYW|HLy9t;SGzxH8)<5q@*|z42;*# zCW47NDH#FPxnvkwhVRVKN*?KJY)E;ezfy*a<#{Au9M-Wl@t|MQM{pS*K+nU&=#A;#X+flu`{*4Jsz+$`Hnr zRp9kNa&%h!7$c|e!{AgSX-p23lizNvaH6{M94?ep(wQhIB4d%iU=fxS&lWG28BHz{ zNM{}ss>?2*y0!%tR~TGjvAE%;n~@%`4hrnKep0mqIG#H5Ll{rqV?oO6JD7qz<+NNm zKM!ojE;wusTqv(bW=0nB^Kzt1K7=Y`h!$fQUxC;BN1T9v^M`*+SNunp3jXrtvZfDy z+Kh|Qtsk;fWdB1SA~~!i-^YJ6`g#+-D*0QzPJ`{+cfjLS!J`OAR6+`FynYj6sPppY zru0+~NulytK4(OCdmYYQsDs|*fs=Z?HS5-6X5I|yvwW$7>d)oM@Ka~VSHxL92)_I= z$3z(psHHDU=$vvoMsRBxiQu$T1JUs5m$W60gDFjwZZO8K$7CUm=)@o5TjJVx==(^J zOJ$5zGN<#v>$Ib>t_CILRj|7ia4W-6ym%=V&6$ZPwZA8ks1BDC^+j$gIy$;=_~>ak z6l!?l6R=|80xT-X3r;ID%0GE!?ob<%zNg1;1mjZB91o+FIhtRPB^sDRl8-Df|I=TL zk(8$kTnc|Aor^Muc;**m8BtSNjtix&)KjVv5toGZ>((MOIo{8c$AVt+k0s^x(CF=O zsG_iN`D)B7%tjQ?NBfdZOh(fQ^8I4WFHMMZ3H*`eI$DY2T@MYv9J$4|w$;OCGoz@W z7~$bG))#3eOLO8N5t0mj2O&%2b1Vhb&{c+g4bmebrbrdkBcskR=3IP zY;;VB(#00%c0k`zkAu5+phssB>+8nG=mk!+?ovAG2jaec}${MB4Ib2$b2S=^O}taSKe{MmZnfHM3*;mhEV)Z62aI8OXJqzP7?`RgbRb zbJ%_OD0;2Zecq7ZBF8LNO5iXX@yu_Z!S}xP12i!Q$Eo@ z*+fji_BJ^`Bh-FlS zDnIaCD0z$P6A;LFx|%40>5xG=3^;e{IR581e@0Vhe@IqFE1AzrffB*0bwVS=zP5sx&<7dU9m=OqB4;2P_2XXZ9e(c!xHXeWENt`)Z zF6QuMgs%TMGVxuWv|a-+R^AEI9VeMYXEHqcbV>U2HH{E$g6Mg;!C1*m9g=_arJGDi zj0wwtClB1zv(ey&ZOTas;Hy%KU|mgEb{C1!%8^d9#9UC2oD>c_6vQ{T2^*a@)0N=Z z&(HtZypvvBIyBVP;hE>&#$R7}6T5fs#-2U<(Av~N^h4$KPaF)vqd;G8A9m4r>dm*_ zz|VjE8yr4qPy=Wc0x_r$c{CVWLz5>k(MCbe@k{mt^23UeFL69NsiqDY%9cC@6s;beiF`%@(6emv|gTZLQ-u*`~pf!#=wz?SjMLSvv z?2pZ4S5oW|N))`!20Ugc+$Qqbl22w8a@tWJN#@fd70Uvlk64|>gr$!J7x`f;8O$J# z9XJ7pQ%wy)g~ZG>tX{twF_HciC{iAYme)s1n#UAp#b3d+0MFF=9o5kMw_v*mofo#@ z-~W0Vxf>qBL-$^RWN}@hBl9N*`or%)$~66FeYxb!GN^;m4Rqkb>0=nsTdAhua8fH) zD#M@*WB$XTj!D4HH%oUarF#N05sCM}I*MyTj&dPGX9WU_md;AD@ba^Qlt-5RkSt{I zPan;IvjYCJ@)bTK%atYULT_g?%FZ2!M(==I6-&iN(j^6!T(tsug}I2P0;PHB;kuEi zNc=FEHlJe?Fl!xb$Fw#bYO9{@)M^u(wkD7g3@Wg*VgGc}H z9#$-xAy#DsLQXH;>5*z2s{A3EJ~=Rg)5#og8YwN`m;R6c{T*tX+VG2i`w!%%r;8NQ zb-5&&1O6rD11#k9`tZNMeid!)Ea!vW#J?$s|tYFB!r5@pBs5w~URE zwB*A4oxoCb)wYPS1jCw?|l3F`0}?u zCmz|AA-Be%Qiu5D{6m~bW%059lt{q{I7Q&IOp^+swkJOd&S|7#>AHBNuoF99{~Lb( zZax0Da%p9@O&A;M^a!?(p za`amPhu(zO|MEPxP;mI~&;AXC$>E}WA&G!=TnxKurO6XI{A74KR$U0u-?363yuLaG`8YUSr3Zm&BKPJ^Mmbxf{nrcc2t(1MsJ^y zT1gaAb7!G=@nU2oO8cow6#e{+yIPUU4jET4n2sBLosBqk>?AafINWmQZAhZdHgH9p zNH)}-B`PD20{;eIVMyijk)IRk8hsOwO2Ipw5qGRr!}txPLi+U@(A(CI9sAEBEFzX9 z;HH0Jh>lOh`t@s&lpwtff!m8jmT3mt3S)@2*GlfF6aV_1$MAZE74Lqy6?0P}g)e44 z7*cKtqW_{RZa4F;f{O3*vwXN54gtMJG*>mE__}Xl?~$`uHCHukb*qmH@kLbW|46=M zFkSkHiiDD}&}$s`Mp9j#=` zKmEni2+LiCAOHKKNTx>Uvsi*J9iQ>}WIiL&l&?|GZnwaww_(SQWAKKB;kNs3K_s=m za7rV5f!7VQL5rHIGBh@}!L5uySaKF-FJ6LKS*eKeG00zDN-H&!(c%w2D}Sin&= z;B?Y;lfECv4;@06L5a23UQIqG4eF5u37nq6wURs{CmW(FPVgT|7OZY&(w|Pg#*pF< zKH_VVEa`esJLeR>^WqnOSWZR^bhcynfeRFeMv??bm-JT|8I5bN-GJ1jIMHtWG%3%B zb|cZ2DD%X956&Jqgthnm1b=zyO?>9YWuo2qOO|uek)TJm=u*(YSw%jS4mlCGY4X|3 zpII}A>u>xTzWe=u!>8}wgs|X%h`{vxwdW&$S%ILlqqZA{e1+Z1b)A*GQam&cIGlp*Dluk(?g8#$e?yTr)?m!9o zX&d=TTX-03dlxf&cW1?4z6Fpc;%bbc5-CK!6#apFuXdUaN8xZ!G~XC#YQRO*3PTQ*8(MA_L| zJowOm;df8{4l7s9<=j3{9octM++*#i$9<3f1&cR+2@ij9J<@{)**>1c7xCg_e+WT5 z@)iE@u~Y|^B*#ocvDgr#Jo1?#n<9Q(U6uHMzxx1nrXoE3tFIxA1U@n+Q)WnI361&6 z@l*Ew54|<#ibhXytY-JB3((kWMe9aSR|{Tz;bm;!dkg~_8gx`$ zz_DYe5R;IHf&v4Mwog~Q^eppeR80t6PtKz}dx?%j>D$}+UJHsjf+pGN}?u5H9)TSq$vHAZY$w+<0x z2@0VY3#=9k>`s@M(RNU@&g4YNyjsy_BIm(@5dE|f&vqBlB7TW)-XvKnnc#K8?a;w) zGZQV3xZB_&TAW6z1yf*IQovxg!cGaSl#U3A$}=6IBbNtOvytKna-X!^!=-(AaH4Zt)dJjt{3mQ*cc6BRjAX9moGC z?=RrwI)`Dso>b&uf>GBCgHF$K zY@JohYeXd_wV{(X8u~HVM_`qlhpz5nELuDd&aMthW+UWJ3yRv4RTy0@fu&^dNiRf`i>!8AMIuc`e76FX7*fU z&r?#&z;QF0bg*SV2lcc;>XVF~vA>^=o0k*p*bAc+^vH;ge2B~q(9;IRcZ^7qCwgua zIkFwf^6!6rAM@ugM8dpywrRv5`^!9(^3r2Seh*WxdB9HbDV0-i$eZYfD3y&Zlh$i= zcH&{tDS1SgevkbVdl(q8RU50TB#$~iqU*u1z5|7M*+?qvz{(BV;p;e}9uw9}S;|7` z0{x4DV! z^~lM{M|o2V*57>(yj^USUdUZ=@ojqgBOUv%87^v=G1Tu4iY z(QJ>+o3<()9Mdlgja_UZk+H~%wmv``WP3y8OX@RF?}tnb=rUQx>jcKD6+M)jUN1b6 z0|A=sq*N;(gSwNZV<6|aNgwG$gntGLg-Nw?v_z5&Jx85>!)FHi4iu)z$2 zk$e~l6v&@ZM_E|vGTJ(@E(>@yGZs;|ZZt(_4HF8 zWWP}d@_v?o9={`$}0;^dBP8&_GbCs9sa^%eMJ?_umem4^EIdQ_DbASJVie%lLCk->0g z5VXom;<#y8k3JoL)9%DRM(eVYEb(C+l{lfJj6_!^_KgEZc^%xdA8)_&AwJx-4;wb! zhVT%7RhI&9bS5T_z0shfy3sH2uadh|O&M!-Qg(CzGCq!scSd3vy~skwMv`suk*rUp zGV}|>I^jw4qf{G?72}r_5Ps>8bsT^FD8JHIk-w3B9acJFhkm4=zkvIn`8J$Aeb7RD zR8f?Vb4QOd=DHv>eij@hc@`zL)G1g%`83f{wkLH;Wct+gGSW zL-a+;$Y;E4#kS4*Ze*qxp{ZpEK0coG`79+0E#qw-KwoH1ZZf;Kpq%hiBZPOQ}B~2Cw0+orAt_h~E%x-oUPHX<+mB6?ZBi#L;Lc1-G;NSb~c z=@)2wF??v8-3(DuVmEdQ4>7*ex12n68D(XS`0MZf zi1*)n56^x16*w}+s^}y7)Lnzqr_bYq0~b+LR*uHHa$>GrbaV|PhWOaS!&S8@zT1Xz zWn@@~em#Acy~9LII7*^#1LTYN%H*gp0RJCHnoeeCrQ+AW`3(*pKaJ4v7|e{HMtNvs zC~7dm@fxD8+uJc-D7#@lS+zW3Xe)h`a*TJwR7xB6TwYF3$u~Bx;a~yY_o!E`lhp`X?&;VsC6NC2XLtgSFJo4fX7^^+d+FXsA zieen!w~uA*5HV{m_1H2-)GNsa3}QFJ4{cTXG>a_@X}j#UbI@G)D1a9H}dl;P*vRrUtf7~jnrn*MjO$YKD_(xL9AG@0W)XEf~Go=Bu*HJ z`6yR|$XB{(!R=CuJeedmU@vwt&TdTP-E84q^Pi%7v^~Z+(LaOOjp!f8TvjKUX^Zwu z0?RW_qqwde@Ck*B$VR&Ixsu3~Sc0*}NM#X!Wnj#;rP0ccsy0m0+lU)4rWN4Q*^7Al zFMmQiW9GcI8{keKq#h2W?hZ1!{^;N-e85jrQxlWfYMk7)pJU^I*u*&sONwYUSx8(Z z9x|}w)Tyg6xr`whA^z*9!!}WW#eZ7MSpA2I{y#4x9bf^l(Vtb;qvKA_~nm&h5D9OwDxwQvLuJc zj-XfXfRO2l@E``$9=fCq7;^`iONg%$tSWp<>>88TAWWTA*moia1I`I}`tj}XcGk{Y zKVu|y3P#kSxT+q>>A9FTE!N_TWek)s(8wJ5Qd$aPBOpoYnS? zy*RdSH`+$r;O6HCXOXpB`ni$Fz=|__70wi!Fv;MNAGzrv{S$pVcji1wGm`Lw@Ba{< zv58o)WWJVriJYkqeVsixvi~eT*>@Bb4fUw0EJ6`uS%G1tjCc^GfK^A6tMwHq;sOAs#@k^5)`#{Xl z*3gDShc06K?K=<}=4TmPA%2nmGAB0owBY08*$9YTjAe-taM$h-kVH-v-$C4O6q_LL z6Y%;d?QCIaZMuquILthfeq`T0xJ9~exJTWM?a+_-szK=$1^H-w#eE{yki)YrEtO; z@u9`P%S5`CQ<>r9>_GRSM|x^98k=g+(c6vef-0ag!{HkK|~3V(kBBGxI@jtq7o^W1SH71h8wB!0YUI|b+qtDkVA*E>w|+RFRR0}nbXICSu2S#=m_YmaOPMFlOH?yg!owkybV0P zeX(-QI&9yu1v|EF#>s;haQBV}@uQ!7A3N^cjx8HyvXDwoP^2fAC@L%Y%!CUkeu z8J{|d^vrDJQC3+w<#cGFaF-c$%Ca=~GLlXmM^<_kF6Wd0?g40OXhbP>re{D8??5KI zYLZUs8$ee>1$G@hhXJ!Ig8Y4yA+=?E`eS|=wzNQo@ zm(L<2BU|b6$&^eO?VPb=(?ddM1N*(q0d$it};u^hM+pmLi*S@9OPCNN^|| z1f-BHLw^SW(|IIayo8+0Vzl%as2Aj20l6!PqUddH!$047PfZvGUNvDz=g-8nzNQJs zX%l)T$ts__$VaE@}4$&=1@0?~^X zkVc))FDgMzYcD+g{Rx;HS>BAS{8SVdXTioWh+`*Cp;>0^>F6&V+>7#>1~>;t!_&bY z)fvessVYGigJoK31{3QX6je6TZv5cq;ehUz8eV^#4zm!)kDP_Giz^27eH_DLl+`vf zv2=x}(=f7VyUE$*2n+~-yQ>p<5Sa`y;V3~yRsp=cz2Q!Ky|S;9_MO*BaLY(f$C*oM z$SGmc(%iv%UE#|*%mhmL$>))I`8+OCk1|UMc%8k`R8^{cPIH$Y(Xrw54Mq%fcQeU4 zNu3x*OG_QjFd5G;%tKRKFWh{Bsau38!+pp}%|r^@O3tOPs%u6>L@@POWGVcKEzlnm zfHnx2KYpBkqZs-8$fNOn)bWjX-3ecrFeKP94D_QgH4PUTl(GwoS+EM&4yCyzk*Fkw`8b?Ms;ecdR`FTjbj=jqeRQCinXKji=)A8#ht!lMy+d3hZB zT-wP9GA<_}Ju3^@`9&BoIv^-Gh{?04aX&IHokjAcRO(k1s+-yo5*wvfBxyD%J*GV~ z!R~6U$Jz52k(`-}tddG}^y?5BNS{p|)?^}M#5kwGW+Tqgx4*allmdy17ifu)^0m~R zwuTzUl2jy%FDkB3z{1bZi|sN|Wm3BTLJb+V2k}!MP3~b%JQne!qmrRUY zI4V&al1JwY(vz|CP%XLVj5dbT&>1qWFL4vtRBmK}J$kv`@`QVxb(0}$XNv6y_0=rGXU zjQ97PfUBo30=>+rDXqkNN6N7K&@Mc3-|g74Y6;p(^6=p&dogd*-3ShFha+uXCaFua zv+399lf`ZdD%GT-w~r@mnLrIOsWkL69%Iv=FOc>A7-^I4e#`{b>eq#AH|c;y*zHhP;^g{FvjM}3M3sr zL!B;04&~6GTZV9i-51U7aVy`dZj+2{kt)> zH&x=358h+~TL_RTSJ3f$OX=zW%L6TD) zI-2W{l9Hi(uGmd4lLkMIjjI!F(AXZ2 zt3nhpsqm%UcrxyZUCK?_;@m8pI&)EBoxJi&CUHi_#$Y(vn9x?1ixcP5&|~Mrx-E82 zyU{V`?It!5TomSG!+2JScMq4~on1Te#6!1Z2l4e_eHH%v#@{f1>wO5NzQ|fM@oDvy zf-{qm#yFf;Sgw*rZ;rRz5H*`j7$nce!A8dSG~~C~V*A~9!;xcZqV1UFMn3zeAx-h2 zOiBm3QIwT}zr3*%&Fy9^SU3xAZdPO@g4NSr2k_3j2eEv``iT>Uk^!>KuJ$HmWnMy3 z(q+cnQdHNs0WKc!a(7oZ&AU3=kd~6ft`DHRknt~-^32Oew@eBUM>{d#s3euXvAM36 zG443yPCnyqHho(TbA>83!59!40B7o7+K~^i^VlVH!ZPge&{?LUNqZ2GIBT2$4yAUrx0F3f|52bz#fK9jPl5fKysTjqNs?jBgU@ius> z$y)YpxCHicOfXuPfe!S3Z^m=Ta;Ac z$mvXFSHS^3ikq=suwNsCXs;`0d^(L0M{fjE7BW$$dMpKLTRNQ{M^0T*H!iksTuC2P zg!!vC!iBb@a!}@Hm6y*Uk2cWHD4v>_%<0pVXE~Z$4fJgmd6E~i zp%0}+`M7-P0>>m5xw*vO^jWgd#m314LG)Sn)U7xF@H-sIs(_Q1KO%|Eo#YObg{-{Y z3QcM~661)~YAZ{T$~-PJvj_zoFQbheyr_c`cJ+N-IB=9>l3R$o*DuBK<3}-l^$sjp zGMD2f6LUkvs$I-a3-GHyAHd3uTk-Ahe-n3ZSq_JuYV10a4EvzPNQh(nmA8$^yoPaz zdDl7Q=awNnHWCgJk9bDoP0CQ0n2T2tzw9}h#29K%-|085jF6dOq6JPi5gI;z!q7r2Gt%3Joz%;q*o9c07@?lL z)cCRbj&__ra0CT81;}NtURqfTPmYlbb1ZFQn=z2Sue-CIwv&pBNl7RWo$2a>tFIqC zU7W}l>l+zDXInE4A3la$;)bI93YAj?N3l(rxFa6BbnGB=f^?iuD?weGp7G3#isDQZ zln~RocoQ4CQn+0U3Ek#5?6^?dW0d9H?|pZiMu9bm64<7Q+ylTWWEZ zIaNUcbE*#VB{`Lgv%G4z&RK-Hvl9_Xf962r5=~TNaHZ^Snk-{X zImrCSK<-xP`}*+u_kM|oAA1RR-mw{WOkRvbZD?y7z=;b*2ng}VoCVVrAQfQ^Hdo^J z|M?^Q<3JJ;R^EfSP&YLZAtXSprJ;^VTMj#3j!q7GTSX?4FDJt-bS}ce1JPVlgUczW z&{|u7y!w778j*0f>BB%r3yel1rpL{Izn2@^5@$vr-_e059(|PwKsN4w@=*e30tMBU zm4=vfbYcIV!?ZEkSCvo`1In*@w zpr@}D6$EAZr4{h>4WV<8JM|-I;IaE3c^tz&p(t-?M@M4=lYttfC8Z%C&=--@LQz&* zhb)4Zxe2o|Gj2Nj*#}Pskxwp`AbRlzBu@8(EBo5kSkEMviFZL6Iw|k=DkeHiXx)Nm zBRncVIhfwgRvbQX07p+>K$p(Q-|ZM;;PmtNV~}7O@+9&q%*es+kN2UXp&8vho#?5f zp)%=k51vV|K*!IzG?;It$t=tW@>K$II=x)L_Nqpe-vN&w_l^r&Ks*1FMx-$ z6*O9iKo{*5_|d=r9REC&jltnuevhKDz2KfRs_6#8|E(Z1{1G00o2{v+2TGoJ`9y8pS zK=d6-qb9$_VLU=;^@g-}n#g z-Fq4vZr{oz##{LU`AkMvjkMg6t}n*k4J^U2P? zV(-p(2%0@{@_afDpUG5@!Q`Ibhf}FpaE)7v86mzn^X_l)mpzA2YH&eoLnFGI>v7?1 zs%%w?g!owWGQgj^a1Jd5FIg!C@bdJ7oik-jpe(EP;-bUg>*<6O9N%}}-i2w?;tyVl8Ui#!8`;7`l$X)roH27h(Rm$!Wn$<$$pMM{Z?-m^xX|`cXfk* za0JK4MDUu1qLMWFloAv*o8aOf#UW^>9=9{VY8`d$1u&?Atz4wl`P*+igKYhgH zpsI=L+KukkHU`%!n3;HPU;-BCLP)BU$*64XK9-KU=4Se;PE=&2qouMM;qf!!=Iss# z69ExH<6FB!a_%4^@$WFN{qUdf;Am0?22I20 zX(_|L-FxuC!DNE(1T0xF6P~n*mco2|wD$lmS2duU*EClVq?A>{&NB!R!2~JxW?Ve6 z8-M%buhHIPi`uq+w6|6wCo7Hqun9|-uOL9NQ+K~JFQnkW;p3=mZo@!#8=CS{m?+VI zM9xNlj|&_a_YUsdi4Wd*Q%&AhG+iS=UT)e1g91$EA=H)?;rOBbID0t@tvv<|^t7uRPC#MJS4ZgkscxP+D>qG;Mf7U`Gw-i=bwj<6YJKuGoCYf zJa8DkQ89>4oXa?20l>yi{PAZ$$M=5mPpsem5T-}@UOl$DUR>JyKEC;r-(fgl4puHs zfct16_HCpWO{KZ`;Ak3lJp4T33E+1A^G)3T&||PsKu%n77n6;D;Aj8+YYe%Ept80B z9j#iNs^Yj$pB4!}%KgONy?F1>e`T`Li>l@>#tFHDUy9?WQ!s!2a)kK1GcFm`gtkm1 zR#eralYXHh=K{)Er|+~y2=;PBdB$=4_uoE-(J2xOX3GZgL2r-f%F~BO^T@JCCH2?|St0cB3*c zhkY(ZP;?AjnV^>DrX%yranv#n96Wgu2KPV==xtF^P^ebsghoX%iMCg=urbk3wpZgU z?cy&VpN84q6?fl#J7Qu3RU)X-wI&QTP(Mr^D=5gqCwmSex0E~$_Mow@m`PbNbR&)k z3k`;wivy~P(($|h{2?kE`%&AWM_X$Rb-ftJPbFiqMb~Xc45%*2!C(LJPZZa-Q&oFW zoqif`e{>MVoi<3EAB*^yPU;wCl5| zV@{hTfrdh-BJ%okp?!~6WSvdIKKQV9hCU^ug zabYoAVwAH-K4N0}BbdFzsN;>q5cMc5V!}&&H!U^>!NiA6l?C|S&%cMH{8BUy+MZcoDyO;{&wl zys&J+EV#*xp{WOzsb}!*pTB{y_$630%?C}@m9(2YJp0Ns@N;9*N#RQ z4>O{I;6!^XV>>(dTtZP<6>(7~8tG?ii?ZP069|v6a13-Yi8`?l#o5VfvNyZ32Ob{b zs4dPVPA*ZaPknsoM>*m)vU0Mk4wqAN@aLVU&^=(n-SnXe3E^rLibf*b>r5Dqn4oKJ z#D^d5#QFSkG^2%VK&NWwL;Genea9|hh(V~Ms~rRV-3SZ}p}zP*Px*9q z>#=y*D$I%wN5;7mFu8eQ(Wb5NqT)G9KH1-exBmDi1k75A`yPJ;!A>T+q&DJMGd?(Q z1YP!lSif;8XcE+YJ z7~i$rO=7AIu~|oJC*J$uIPTuQ33DPHkerhb-}u!C3vg2tH!$?#=-+-%-+2zz_mrtOex)l*K<|D+*4#^kK;I(i6CpK)k7g1r65>C@V^@o-h>bQI?U6U5CyftBg4b@pD%P zar?!~(DmstW9BTl%8lKw2E6j(%P8u1K&N2@jf~6Hw43a-Y;~hGG$xFGU;xc|NqCoe zdRk>C27B94S(1gn{Np3GuR|iS$Mon3wW2&WHWtY#Nw|DA39e2-%3nLVj;)vbv^3s& zOhf%B%S*+FyZ0ldxE?(N18PEDR%nMX@Aq_b!61*-HMHRIM;}Hc$Nisg{R7jOcgcjI zJZEMv`xf?AWB;+UDDHE`H@^K9__-2S5B0$$lVWFcXlp)Lxo9Rl)Qvwqbj+!~`L%z; z{sX76nZC-)pMHa9#`;GLX6V526BqD}@BJ3e%p=yUT>@F5rYBI@;@y zUyzHsmTrVcM5-76*>ao*8uM}Z_&MyokcXz`R>%`}I)i~Y&L2U(UT|c*8RGA>j4Z~O zY-09ibhTFDyF&Mp<-IuXzot7cRpkBbg-a?onKH>Ew5|koo^p#wmd=Z=Q2}HUt&0ks zSJ;4`F4_P);|SUavgNHhr372vp3Xd%gF*8)rKqCNBW4raC^$Da`K7c;ktRf2^(g3q zC984YBQLOte)N_nkz9z^p~fF?&%cS~EXE+grH^TKhD<`XG}wUM+0N zN2`n97rv}p3lY@l zn=0|~`|rUJy$;{~_O}u0K7^dKT>SAjf5(|)#}FR-0s_g8v&n$0q6YYH_Q&gAc^Fad zj`+tP{((RK`A>NC`3LAgY0!pNoIG}r0L>Ra`R-2;=V6b&Zh|5L5*q?NIF6`EuuneO zk68=W;gKhwMYxYGnrn*jweS2AAAj&6X2pLS&WzA9i7_;6LM9#di_brWht@1cCjrKv zfA=>eoj-+DTNbL`nwd0qb+pqE8euowj_i~y`1t!EB-~pAO=cS^-&hG|9t{{O|(~zz!=FG$X$zJIJmgtr7wOJ zKAui=I72w_*4udHOJBonk3Ed|gh;I0at}7HS&hn*yHM8lCcf~MuVe8X0!yu;iNrCI zm4Dv8zLc}0dTdHNuKiK$$aj96s53q^OzD>#vM#BXJ4&aI>_$=JFdq2wcd#-(gn(oK zPO^t#RV@Pqjh$^Y6cph1zx*}6^oPIUt{t1*#*Otojh7JM&y#tLiUvM zaQB3Vmt3X63@iWEzt&M69c?*b{V_a%f^$c3`b;_2KlN?gyFMPCc7yomUq8Z!Z~YyM z7AIontVjawAykzW&{2K?4?p%a9(e9MxO?4lyz<-w__x0wL}o<;?p->a$$J;ap#je8 zg7Nb6k0UYI1ONQ<$N1#qgIK$M5$4RFgKvNL`wRd^9N(Rcd+)vvPk-sr(LD^+qz*ND z@xp}!VCRvMnv6y3SHa)jgucddl$BQ@F7ZwTF>x6$jcbu~wXd%amy?pvuQMSvt?U`@*P)`k z4vX%(2an(P8e>ZVe*Kq&D99;9|Hj4e3iQOC_dm$b1Ndn7yXwUwGD#>^sp@2vn4uGg z_J53X`JMRPZ~q%}gS}wx>BmQJ`~!QFQdCKMi2+ra_~gJDc*bqPm!I6uq}UlXnaTLp zYp)@IK<Z4{AAYp&uFgjM*KdDD-PN;gx%na_tqWf%BC;O{4JfM9NV_-X>95hk)Gv@mg~oz{(4U;kGu zj`LQJNN(8v97+pHFe@&Az|9K}KJfzXzgvfW@9o52KiRA9MvvLx5KOvu96Np#sj1nx z_sLhVa{XrbIFF#Ky#;Md3PtfaE@-Z}K@S9UWZh z%eA9&xmx9A97axZG9stVgl~8xf{1Tq1(K`)nY&cHe`p-fpxRT=4u;4J`wJ)VM2f5^fAmPP>oBN z#TeH`zStSYqSoe4ILVeF1hwh;wTOhrPx@JaN!F2L7c`=YY>hWt$Y z^jqJ=>2uMzBW@ZKzdqDA)nlZ;S3Mrx*VBoX`ZDxUSZ1RihS@JAvEf0qHB}+){4wmk zTnYE+C3xgawr?n1=OYcWcN)G14ZN@M$#rX?oQBYijU;Sf0 zW(K*VC5gDrxC@R8*I@;Hv$Hrz`qhOOcKmHT< zY+na=CS8}$C*zO5e+ws^Y!DjvDm8^zt)&)K-Ol*hQ{Tkag@LrMT5Q<*0#oZ@kI7P9zscR9X73xR*#7Hb~K}@G!qvuT|~m7M4Y>jsUGW8 zcc|rBViCdG-k$!jvt=H^V^_bBOT+Dc0Rb@EUx{fYR`#HwstlFY?O3{WB|HqNtj`Il znI-g*ffhj-;X*u6Q`-icSvL6ISD(g0+C*z%Ccf~}%c!lbLwIzEvI#x0VRl6aUVQEe z+`oDuI%@0j*WbN~))Ypm3^kl@%TY%{JcpmAb#6}nj2hqv+-_lTz{*Eej113Hp z9Q|A#-k@gaWw-`|05Z~%40GRD3R z4D|OyZ|p#8Qw4PP{BG}pe%g$@eP15W>~3$xrNc+@$&qsijGBQLUw9rf;vA}1CKmh!7A#u{pTy~~WBzS24C8!GB{n_qFdlsA zN$Nute)F^6;lz;xSg>IcoDIE5J+TW_^=91j^`B$=93OPmq@$qv-?4DzMyy`3T-|hn z*^Rzt0e+^PgyM?ZEa2*Q;3KO6G6X3MO7BI&DG^<#giUAym|K;NdTQ z3u_lK52hYg%M(p<0h0zVzaTvLz`ayaDs_Y46>Y1uF`{h^w4k!45TWz7!`>6<>Fz>p zb~f_!%jvIepl_<>xc7hwC(AR>qAhxQcrd53Ra+E@zK_<(_@REW9k~hb>qWbJDuoCh=g$JKql4oQ=iuVW-fv?o!I9zMog z;{bmCtG{3w?c9O+?xl+<`2J7-3U7B0M85bqLx2v2DW`Gx=w-~g^GkSwzAV(!nfQ(v zue%2}vWJp`9dk`H9NpaU-S2;&KiT`?R=oMAckr*j{xi1kxEl*&V(>-cJac0X48J{r zf`PgC;lDkGpwT@HDGA2E!M0lb?Vme|0mAV5_rHnQa4*;x+VQ)e{R;o@r@zGfaQ2!{HYQ!$5Je9wJ&^xoGn(w&e0`qO?C zOZPm7z57q8IVS5E$!DTmZ-t08MtYHa?gaj^>o{Kd@|W=BgY?I&c!1cZskuqDtB5Z3)Qk%%Q-sOK{}p+VH2DPUK&d(aq{pfR5TA`=A30%lQ;_lbaF!k zH&zGk5fp{n9(o>M|MII?5-X1vXe290WU?kMCK{ejj;LoMRA1MKUc(5Y<7T0v=&B*{i|Uzl981`Lzs=;t|rviHo?Q$0im*_ysQLmz0#gN70-+X zOe}8ScAJ_|75T=;Ohb2DhuRZOJtfbe=<4AP7rQ|Mj*G0TmVnHNP=Y2m8i_1l&d$i= zrGZ$tW+MW9c`$H*p?&_%Uo=vskg~N{ZL5sZ+qC<04?EWK!{&cV6M$yl^@D|!b8 z*}r-;b@gG%>h(;NlIR$;x~@+$26k|CcE?->GGkvI8fq)ZS3P10ENyH@P~Y4pZqcH9 zY@2LB1^LdNk)$w*}h z;YwzWzL7qi%lV z*P(tLE~aF_&D9AZ{;mW$^{B6J!lHT8n81~xtg0G1wlzesV`Jxx4p_T<3G}^fXplQv?1hw)e#jna@)44M z$G$lsCL$0EW_Y5cG!GSB!x$NHQg>t|(@0ppfMY2=os6!QxC23-kDCn)x_(?t&O&j0 zJK9Iw5fl*(Uk~jOZmnIJa+UonId1$ZH3@c>GuEkCldUaAJbbZ=fb=m25!p}6$txV2 zwrs(c^@*SuRcY-b|B6mZvMc&v;ZDZCrw_*u9K!t!km12za+Kw)8ly_V&SXSITBh3i zAUrA*1B2bDqb@smI3pk;2p4nn(Lp1utmwzN^Lcpr3onB1-9l2Av~7(JQz0`N>uc4E z1|p+lG%oCH2xh0lWbeROBmvQcwzdYO=jI?RB$yzcE|0%sdr z=n15}Lc;OF%g?}HX1i=g5FQo=4<;zaZYB;x-KeT4Mru(7W-VHV-p)4E)z`z$+ZP_5 zo~SD>z+jI~@ilDg0MFnsJoEh1h@Z|Zoqob6APld4)IYc+oV#x8ttBn9ht+<{lV_!1(6 zwL8tKE!NfFkcXn`a!lX21B=%zQ#Yi90>i@*5*Wa`)K8Lp5q%e)tiOe1Y?$O4^ynLs zI1hBx7vRjfOGql}K_Zi{M9(d+Uc_4q1s@x-h( z+wqmJy@q?%&rvJ!gtV3kgnMj|e6bXXbLPU^1Mu^=#XPpP@7O5}kS$GXv}*e9c{J=_CB@c2_tVD_wN)z)I;A|jQsKf1TR^O zRa-Z}gP2f!qF-gscPMkZ3?g2|FvrKZjz}LqIPd)V%{Z%CSx5u=&X?XO( z`!FLeM%j&xnf7Jlj*T0)V(Zp*@N{>FtxT%lcQ3YW-ULV4e~P$$Mr<71?d*vS>09fX z*>5|9g-t_EWwV<6arBPBoY}Ks+gne(n2$l~P-m|JNyRl-v1U19BRJ?b#3Z7J!`(P} z=ok)t@)7>_kGF7~`0}n7UqDol4;P(VXdOO8{d>84m5+jlb@`h@hxh z_~sX%!pa5OWTDc{QJVN`j6|BU&!-N@Qd1BR6^R{N)~Uy7?dW3`5(BQB=B>7iY3O0n zIU*~qgR$d|yD?`z{URIkTh7vxp0q9+=g~!_kZ$V!0#7E+V_20l}d`c;wz4 zXm4thxk*cLN7%RpWA)ng_}8yKg>9RbGyl{c?HeQhvh&2emD};+jF;rzR^D^_CiuEZFU<<8541Pn z^4X)<_wl>d@RXfQ8G~;LQ2U z=%d}wnzs=D`hy?fU%&Q6tX#Pg^Ovm0>)-x5e!ycdJ@Yu?Lup^~)L>5ycGLI%`rYFQ zn>im}d;MQ9i#gWyal4H5L`|xzZ$NHI8CI=bj)eFib>m8w4Ly3_?Qqt2F~`Y)P7RX{ zT$~(m+vcrUyJD`Ar$%Vi5&97nG+yK1u`=8*=500X?!*Y{=G{)&fo~KR%cKT66e^*l4i95F4hc%04 zDH(~rM@L5@Fu=#6`syE&CL{5y6WZl(v+NJv-@$y)gyxQ3oV{3pbD4z*oSlFbt5?8f zSdWoG1~65Z6Wp0oJ@MS*#7@tvJVgA>=-8;{$cd9b7B63kZ+`6=+;``C>g0I4(ZMv( ztRA8E30es6pm^39j5!IB$V@(r{sFzpoySY(hLU2tn&potaY6>Q@8q66$B8TM#nMHI z78YHJr50e#+&Q>(`XuAv2;-9xsd@GA2@J(ucWi=>%#o0Mh_waF$xSk;)6K<(K0F4k z&Go2b&R^fuisei7$Yq$0Px92@R`5&$y7eqb}>wga3Q)3$8A4V%`?g_DyV;jBMaFI$F4 zKM%}|n}w(-#ye+M-2dcb2n`Mx^_QOsjgWt0sNHYBkJalo;9ll=)|lnt>8|o2**Zq` zNBt{NQbNszv)VCrpCk3AzP9ny3$CD4;1s`|qX`YERzMcuh#_V0N zYTMm->aqI~;X7`>GX{M-TUN#-tF)vLCyyRQf4>1sSKo%U>$kv_0a+epouEX{9#~1^ zdg%VU5a}z>O^9V=FCaR`rrLV!`saJtw`VW*@7s^VM^2)$riF>AJsqdkb?sk%*uF~W z)v+r{x=1r(LipsfkKvI=?}Y4MEbXbQM}ruT*tlZ)teJS?nftLIaR!{p&!j+YT$`i6 zg@|!#8rsp)){eaF%Q&$A5ccjpfMZ9GpsS-^ZE9^CVdD%wGGGy*!5jqbF(RqY*76Za z8HFEbe{9&Y4Uzs1II`yz~ACNX{!mzd`nd>ql#A2kh+qX-K}Bld|+9Har6Q z&UWFXZUr!ZBKgyY9M!PQ*^FzHpETO>k>88tF<1_GZsh`zN)O zRS+u>W{()rQrCdp?|z7VyE!&{IW`B6p`x-8wj&PwXmtnEFg7rV4i0qV6PA1DgPqvL zu`I8uC5Y63M@wB9I{I`-Wst1sgnYs$0i1aZd@lz*bt0|&A^LKKaDks)8Q%#qa6G=l7!gB4=Xp` zhUcHS2Y&7oAD-^$??*oqR6hm`4_+@D!^`L@la=I%x^6ZP<5G4eY+apj`p{k+*}I22 zb6Bm6kr(I4J_zCyG@i8D<=^q`kTL!inGU11z6tw3_!#>>*+aY8hok$CpuD1i<1*fE zEHtr_i8QXOSM=IK)}s=bT?o=+XkQOv`LcQHhOnk%)-r5JWM$`q+wZv@k3Mm?S_LZl zs&t=pb)PIW?qGlKAnIFtQD0w!3#SiKXAaOm?8iBdv$u}~6zzt@F!>1#jbs}x>@%S? zW$!6(B&A=GJKc7)F{|wcGl31IUCQuE^5o{`!~}SVKvt)cN|CJ42=z!lLSr`>gnN6n z$IP-tQ7|zfl+9rW2+9ib%Ml$Hg}8*^Yjn&?W73skZXRw}xnd~-$k)C-`|!cXd+^Vl zJCT-~r(Q5Lu{1w&fS<{lkQHkNXE(U0y;Ch^9DK2O=`!rtx@WMO?7q1$V$VWeMhLrdvWOCLG-h@iT*|OxV}j?Id-Fe zkVj>uE^A9N0^_wtTA8d@7Gj%TeqJ)afYjuRsIIC;Wn}}h@=LIA(L96%%QE{RwDmGU zsI8_?Zo*mebC5n}@4gc#DK3SBOz1E(6RXryH$8%>Gd^yN7NSLzwV5$)#H1c0#ZaG` zV65rvMM`!a{oX!J|4v>+S63H;DN1{jBpf#CB^yqDflQ!e>vpM$ePjL4KKnQpEu48( zuf~2`WkI6e_P17}S8oHy;017Su};FvKx#2V5*C>ZEGsL-?!%|)io&pZ(|UE2M9ypS z6&gZMYX$TpjKk40txcNf%Jr*nx%XcVNMq zjc|2g3}Hp1duOQ$AXm&>vYKG^RfKqJ+ki|U=`tA#ow)!{J@X`1&I?tbS0nLhBWSE@ zMIjSONx<6cD^cH8iTJrQ=k_}L9tk%waqsU|lh=ALZ^~?? z+SiB~5V~rcP*+)nyrNv%%~7q*0|(GXA7pkAZ4|CZ;zMP%00X0J?k+%HNYe$9aS&~_ z#W-=|1UiSDFnj(Y_%raxIWbY?zQ(DTMpuqBdq>^tV_!QuhY%hbtHwj2E8+@&AAfkz zX1Y5&pfj0aGJC?#E(ihsOrjZJc!cF_)XF!(6XGXi?^1uC0Mt}eBR4CLc3O?Rk{Y^| zA`w(VvU+51lasi|5ZkOGP#g zA3lucPV2;+J$eRhpflLP+ucpsgm5q_5#rXtu3Gfydf^cp4?FTHI-^QZ{!&h^5%~+L z>&9UOYd!MUmNsc;r$>8h1ClNzAvL=c;fc$z@%DQW;^oD9s99n->H^Jf;u4qRwJ*Mi z$N!hUPOlv&_5nLb{4gDtqQkGgJ#B`1#@O2(!+p~!c27Nhmls+41-I! zT6yT}B3#MKRm9Thca41*F$^LtGXp)2PFQx^2DM^N62d|H^xEoD)U?-P@8RS02eh00 z2asP;Ls>eg6`E?CZA60^`)!!~a@=G!wX`DRCRN#t44|*65+{xwLmhQ~-lAm)^p>|X zYyH&vdLxn=o~$N!y_M7L=Z13eumEo;Kegr#Z~x!^r5`x4vb|C0(# zYkfRvFM_j0LtPDyGiNNX>A6uU(XztmNaBwoZ8Lp3UfMEM^Q~ZfCYfW@$4VoISCY zG092wOzeTNM3N$tfq7LAGK;Fw2|pyvnu|cS4Ot(W>q^l%z?`a}9Q%m<1s@;We;m1` z)d&v>l+;e$WEO7i>E|WyG{Sw2RLhw5O6TWR6oZ#sb zgyzOZ6y_G7f;N$xRY>e@#=`lDaCWxdR2LF`bX0r!-@a}EOamP_bo>lTs+!f9%121c z8ch7FtZ?-B)JP@_8Fy)uo!vHwVce2sJDO!$lur})7>$b&6G-|=8OT+wjWtM0%0^LH zBW5mGf^~OnhAVRzL4kHM8LBFzl!zn<&LP>>3HGU`>|M7 za`NzKnGpke>q}5lP{N$Y8I5g&sB7-Pw3q-~KDQSYwKbrYjQRhkUu(Gn$+AzN52V;ow65gD13=w##+>LLHj)dU0(;1&z`_1 zyFMm%I)DTFkK%lCs(ROgJ^MEPI_=N+o(KtFdUO&WJ1`G)3J6wn&8obu9XtX85KR4P zB|Z}yH1e2^IhFGjIn~nT%W?abrSS4%-odym{y?TQ=ggRc1q)WwEzv+Nx^Epl%@yGZYwdQ!|M>=PTEx)ouMp8*9tgaddSnQbzVrAM9g}lX4knE?tJLXAmBJ z;mZgKp2K(2d{_f4DLLiXasc#B?>+p^k>%O`B6OjSg77aZcZhzrXo04xi8CV7S7; z$pKCbs?K&YGfD<6f)&x*2xuB1=wULgg6vf!2L-sAP*+uthMFcV?rA5-k+eOG{+>S6 zS2d!iyPsESLwYmhU?*>uV~~;8xjQm|J2}D0!%HPpw{6{oPw#I3%g-QnvFbEB!Wib33w)VG z+_rWFKdVL)Xjw&TE8fh+`ef2|l%)1S;)ou6_`yExJDtt3bWwI=Pc-LbFG(xQjAu^h z%HQM3+Q%CP2jN3D3TL39-E?~~@Z&o&F9F_omb0jw<9L2V59bu-7?43ks zT;RfF^pU5ZMPfn>+*qsjU?=%jx^8Vn{#tZjoUEmTR(nvt*w?|~ehO?v-6_(78EYW0 zwR3ksziChvoG23haCXA11q%@3$D}wlhrqrG*KP&{L2Ew##oh>aKA7}Q^T_8tV-^3w`@WfiH>wu^U0Ij* z@7OVor->wb;{adA8-HUzvaPKqEC1saIS*bX_Zx0R}I1S=jv{Y9h<6?$I z8uG8SDhT0f?o|m9nVePQto2tbDc^+Hp1B8Y#3jG@`yQm0chbkWP-kr6=BS6Goh)~; z*6x7#X<=9x<%YV-Lew^Q;q3VWEJ)mjn6PO2YzYP&1Nv)wXFojm%FFn|SN|1{J^TPV zE3=Vu_PlyCpy-u7W3<`P34@OA#G3ZhEhc8J#0<+f;{MI+5lc1DG>d$SPVuAqp^j;! zdO+QD@J}Gl*osBrwm5y_5DukRqh(-N+1xm{H%enSKddAL==*5LPHyBw-RKpPa%K$5 z8^EbcPSgX5OR^HmEP6=NIFgSx2I{xiDJ$@FaYuJ+D^BkJL_Ny*Np=$=X3oLAYi7W6 zlqb;+BL+LGk$rw2PA2DL*1Ct(DkgtBbqn<-NOW%q{Vfg1ID8n%nRS@6@e!BwzJz zw2NBerm!XP$FRN+fBDP%IG$9*R&V8H@4Yj;)v%>|}XzN$Z>RRBT`Dz}iQxD=g(5gAJRuVBIV~oIiCC z`;&{%*hjz1v(~cLqOtSz*|c|eR}a`T4-k@b((;JW7KS0>F&>vq_l13JIfBNU6(1;h z(k2yVrC!?lcrtdGaqg(x=sJXt&lJ zpT_hs`UI_$78*uRM-3{=^O=-$%y>jSEm5xulO7y~jp1y8W)Hb23Z*Y=! zrQ{HXbu3d|jm}Xo(4>g6(h4J9~A%&_msdB&Xv7}{!(bm=@gL?FbN(kr>t ztQ6NuY;n}S)mh6|tB%@v>m^&Slsi_Q~2W7UdOAC+yRH0B&4UD zLz4A_8dw_;^M_vx?(ncfD*oiOfU|#BR?Yx zZyw4*_?*>v`l0Oza<}>|3mFY)Z>m9gbsceqN%e2)B+|4ezOonh2%Ua#QW;Dj(V=mL zR6pg5%#J%d6VLe*Usb8?7&fh2gw5-g!f~h{eX=~%GXM_mo>H15qhLaFgO*d7j9Q*K zkwm7$7#QqVFMRCik=yrHVrk zxRjyhBf5!k>{L!@2Nya#M`B8c5vyJDU*?YT7G~MH?5dq{IB-tPSzJ9RS1O{$x%^AM zn41hyA)MXh$ttT);NMZ+g+?vPLSyb3Zo zjP(6DyZb}@>97Apc{78Jx|YJCqw)?2#QG(3P*%|2**7mM z8-M-%J4jB>hMo#H`QVW_>RMnJM+BGQOhBV!Vz6SxN<9DUGkE#sm+|~7U%*pe_zE^` zT!#QRx$ycf$!csylRY{+29TIA2d}*JBEISu!F$3^@(jVsm!`~6oth# z*tp{v%KH^O_3X1)xoWxEFkS15g=}q=Q?ljX=9;?E8xC4;ttI)dS&zo@9Q^UGf5owj znHZ2)(yMbCsaPxH*x!u9dv@Ya|M(aUtrLTVw=10xEA4FUz@UMS%4CDe(kc{}R;tI3 z1ONy)S+aa7y!FFo2dg)>d;G--YT+|Z%;hqk3wYMo97oN2) z7@1(S%KdYFe8TN$8*Ljl-HD%j@pCL)=5?EUm<>zV{*5f zzm$Wdu;jaxk#Aa!QbvdJ;$qs(TR3|r89LMW$qFSXeuf#B`>~6<^X8x5LrYuFDF4@` zacsMLcpxG&95ZIbKD{OdcoENVwHL*3dClo%UN^ zp{t(=LCO0hNmu+-q?BKLrnM(h$dVM26t@EGXfLwKcvV@c zI%p)wsI9F=aZw2YoZS866^>2_j)^14pH85=8qYlQ1YYLYJpam-!YyC<|jUA9L@)$!Vg&hk!y_mL zx8409wr^Of5=D8m&fC`?v*=%@&5Xyxk36PqOFmzqeLeo*eTWJT;RsVZEZ})O2`4h4 zB9lIFa6?#lqZZ@-1F4Em%l)tCI0F~Y#^xfaF=__HzJL9Z6&>?m0HVUgW7i$ z#bkxEf3(`pqPE}?I-1)@_o&ek!N}fQBg%F?Fnj(o#)rr7@ExmBRhWgH@4tuS%tCYx z=(UlhetHK-!<7k3eMTy}Te{V8X=_ktfR3O@R(~pcpI|$}pQ^#~&7N}h!^~h`#?xU$ z&0T}((_`U4z+GHYipC*F#7vt8UkYC|P-!E5^TiYU@zekO9V+U3EVW%h!+O+aCgCrC z{1Z;4mD7L7L19Rbq24B3%t*#8#?u)yrX!f`d-z7M&nvNV$vm7pxRdf2Q8&Bf3dQy< zlk{JYs;V;VI&ubE?|YQ~CmB|LijD)e_Y;e$_h=F|2T*jDbV!{$PeFi+_W~8zk@c`O?8fgt_ zzs%6<29cGSiS?^iU=CxEJlY+#XdUi;@IlOt*7hb;G-5(mV*}0}-j6+dKf$n*FCKdC zEBM0mPa-zJWvtv(Xk-ZO%{4f_dk^;R+7Bo17~K2Jzu?8^pFwnhwh}}1)6>%d{=TlL zZ)mw%H)&sP{^9Upve1x~j&=fiZT}}p42Lktk2WD8k`ozOZAJT7r6Kn0=IV;sQQ>sC zX81)+!-|y);bu0XxxN|2wK_zH#~{kf$`|XQ-K3v8ieLQqA5c`$aW!v3<{{`?%dzXj zck%8gC(+uat#B3$Z65B%rHpjAM@&az;ygt7`@qXL3=whjaNlhkapCaC=;$4Ufr*q_ zxiiY`IMRlZ#B&4y<4dUTqvu7Rh}*l`@Ta%l$HG+`>GSFPUwjTb?%0YLwLQRSVc0s` zVfm7oOz8Bubm}CE3ab$jGatTQ-kRO8tmvhQb|b3+N9-N3nRc^n>2#df{WdBZJ7BVN zf_rc<66Vaq?D&~@?4f({3T^d8+RXE>d<~C3a1WCo=LvQrJgXGrO0si?Z`3?IMZ05A%^Mo8gaW?K7y{&wqt|ez>(AHwHdhvYfsV{=2C1U&C_h8k+ICT?2$(6D< z4z!^tI}JNO*@er+by#-S<9LPn!n(!N!4P60mSw`6eDVN(^UI&()CI|LH2#G&4z5K~ ziX~_h?)1U3xqr&}Bk1VtQo1Q;WzM~8bV6bbSE$&bI>fQJ*pEoj(c49>xNm6bz^V@N{S z`X)F;_SPL{zU{zRvVGkmtdN(sTs)0}I^tY=9aMi5rbV$7rBymIX#v2>7byoR;yPFvqSt;o3Wvt~H8CrRySI79N zX}$2pzv^OJx>Uwq{uWQ74$17ehaWLSB8DjAc=mD0!g$6E0~*S!)RT)rad9kSoiH>q zCih|IdmrP?zr2sOjy^RK6Q#(`82jmC^YEKLzKwI2GKnLF8;zf#0UdI(3lJF@g!v0& z5fI=BU+U&;*+OC20&Q!s0gL$6_$corKP=z1Z(5uDX7P{KUa3Vg{3e^{$TXm%iwA9f z7BbS$qi?{HFZ1Ho_AZn#2G3iv6h5v_@O5`akgo@7D=Sc4RnIh3Yr$YNp?6S^;Su(f zc;O^*T76Y3o_O*(V)0k;NWvV)ww}4&?|<0={T310pIC!Fqa*YM;olo>3iC6;lu%a@X;P75(5^OBWE2{haw{V z(bwCELMDZIIXOs4&p=jDk=FT9o}JPZOk-sOUp|C}dmjzxxSN)AP{JU~>cCF_G+S5X_*rc*$G_K;T#zd+%Lpl_ur=plV6=dh3xTXl&4<39qnRTE{(qk7f&Z6ue<>p?|K+s1Yv50;l$1=7UYn2So(pLj_ikRfMg!m&gEClI?4x=PJU-hA) zre3X@wYocb(^G9p7CzX$8%Hl^pkF`Hj5R<cM)Z>08 z=31RZPIjJ1h>n1BZ#|OF9Y-4VB8_8I-`Wl1hywwI1N-I21g#Clh1%F;a%@s4Q!8KM z8g#v#sFb%Zv#pBe9<|SyG=N&_K~Z)#iV1#)j6LwJ!m9=mYbsEUPhL(_q5z1>U%8))g3mek^fqnp1gjojF9MQML`bjJ|7vEQ*bsl zAG20%LsV3ZdbGfna=M9OJ-!&@5b5Z{)IT(;NHtEWEOjq071uC7_E#(*Kl z(h~Z+0`gMbP=~($9+c(hBEPsCwVeZM<+W4bG;CNJ4^*E-(y4<;W(-Kp%SUEW32JHc z1~#U^8}$>eKDgtdhw;qQ_bB`Sg!0YID^Yf6wsC`N@LVjNJ{==jCy;#jC{mRS==%yv zR9uqHqvcVj+G6@0`fJ(1*`V)JdqBz?)r(5X(5pAX&Bq_hR;?hI4MQ7!MtL^%F8Lhx zoK1q;oRyfpU;%u{j_ADtn8rbzKXMT7y!`=+EROA`hjl1DJRBi+Jd{7xDDd&+zjco_*>Gyma>_6eXQNRbxB!)IYi5b!{3qFq02A zkBiN?z=82D)W;S*EhUWKS@bj6xR{)enz}Zg*XmGy#@-2WOIE?t))pt<{1cNvH-yfP zykd4wYgd|-q&bjS<)JN!Xss*7;fp!whdTli7GrvBEE+B!Mb@QcrJq*2X(PB5KRKm1 zC~dI~dv|}#Uws#zd*oisajeJj_uoOvrE-~k0o0cjD*sZmJc=TCHO=`h)yXrYbRV zvOz#-pn7a(N#ZQbiwc0AzOJ&Kab5#|80qW7r9J!b@xfF$M6Jh@Pd<&c^JCR^AeNDv zGU<9cae>MA?o$`wHhT>oV8rfzH24=+p4hVGJrk*p1~Q`&L^P zQ%`}UBR0(Yy@_+T;`zrO#xmalod5WJoI8~VgId;4PE2~VR}&L4hU6BM5}OU6y0(h8 zT8iAfO6r2Fa=D?8WhYhv?rK12a=_in&0ey+0OmoI_k~;5a&AI9|vaAM#Ot zo&Md~WJHL!Clcq)#e(^BFh3y@j&>$gR1xne!(lmjkw?yPe6;@>~y^I_Yd&Sd&j^qd4?plS$swC{*e}MR_1ew{z z$R>U)t7*~d8}37EV>QPjn=!2f&D}kWC9No->tnWl#RuhxQ&=zIFrr2Ws*DfBgV{ zF_8#Kh?}6lT8m?7#V0HPYqzXIQ$rK6YaV(A?cok9Q&qNh zP0XoAnH|%RF_L49r}nN`ym=d5{^GL;4s^uc{h#32nIwzchnd23s+S0ft(Ob#YVSm2 zO`Wnq^~Qgyx#U{Q=FdQF88J3<^t>Dy=WKj%G6fM!w_w4X1hpc}I-z0ZL~%v=s1T!o z6}7;I3hQD-aCk80%t^$2=D%}^ySw_j8B4_PiiB9tP$yDPAH^ScpF_7xFkb!AW4LR_ zQph`kG<_N)d6RD8#bbEy?|;LEq;&LCH>XZgn+qby&yoe9>~|6lpE$?-t%%=?kXu-e zwjS9tk^0U!TT@;_Jggx($TN@Q(TDCu1!GuiSvwq!MpRLM^NCA}%PTRU>qTi< zIr%F`Wlb}&DBEQ`>ZZ+?NPH}+W-dY9s4qn}F=lRgJ^N?Re8C-0+_wewB`G*@@(ePW z1LU%Q=gy|0+vJ2zJMMxL@sw;0HGiSJnOKjc!@H1^&Ui_kN-r!!aZ49-Jz?J#A?_Y< z8#17Vd4F~;&y&AHr!S$;BsY;|QkLc9v23>hM8|}}ZlDC0s25U~;N!-217+chpt!|Y zH!lXYmp?>q%4OEYK4g|)&w=Aux?wfqX2-|~Dyb=&DCAot8Hi!DHCJHo?my$BJ?|m6 zT3Zw{v3k`B%jXIb-{ItiJ0E@=J+#9kyY>>l=P)lWKvrHAs)$+DJriR4-sWl)(--9A zm!ZAA4PBj0C@z$_o)WanQS59>fGN*eWK>aCCHno4dPuj#~EMtEw!AyN3%*c3!Y^ z4Z|I`Eme;_sg@mFVNZbH+)#lUmXpm#8#?+B9625Gk^osRr&fXun~;=Lz~eEv{DRFE-T5=xZXU@j#`3Ykl$i}E%+wSil zs5X2qE})aHtVCg1HL9Ds;N#_m&|qKK%0}l!MKJkCW5(j8@OK6rSZ7yj2b0}m-1+GJ zYR?ndJFBgs0hcaZLQx?NwX_u7{U$71u?b5T%!MbDWp6iocsUWARx~27te#G}g5bXl zGncHy`mLMP9!B~b;udbmP z^A@ebs`YCS=u431Xb)F6cVuRyQQ64-Y`1^;qE(D!QEj5Bl%Jo|iKRp`G_5@8%PKXWiMt4&M@+&G(TuCJ@DN&EF!IcSml(wJM zm1&?=SA`Vswnq4P`=GY69qkS6sIIF~ZygK{4n#v+4>sNTAi_fZRG;OJhdq-Q8zuvJ zndRv0=s-zDA!KoCNQCUCBx}l8r+Gl_k&{)|i-g5%urhHbJS=a0kyS@(9|tqtv#uM( zl)KI~9NTW&j8I=~A0YWS*qabQ5M0tgpi|a_rs`${1%@FwE(q>k4p_TzDLfsW5#a3# ze@|Zqt86qjwWGA8h+|z&XV#0@=oompOP$hFJNP)eK-b)WyxK~Xx3!_Pyc!<7Hay55 zt}f0T8xLem*!F_kHS7X!3Aa&;8K?0ZpR5!Pv*U02}K##iG zHu(8RAT)?DZKx0D2*NW{(oo8=>D3W6+u5O-LAjra=d4+Ama(DWXg9J+b7fTr<}F!( z#AS(UuM*iqNC`s8bBN|K%m7oV9^>44=WX!z)%K0LUYb9F?1sso2WxXAI4NiUO4MrG z!m>JabPpn7Mx1){VP#c0^n(M~zV$Zhfe!g)jktT;diXgGp`f-CJ~0ciIz9-MNe6K@ zs|=kc7c^H@p|ZFDUg0t7G0T_`f4Gn_rV-@bM9}Vl^3r0IH!)B$K-M(2AtX2$L4IB= z!_L}>FYG5E+J)@80c?5nU$AJVkD7GU^g#(iiNgBTO4?63!OzezI_SeLUA~OW;v$rl z5J(K#Vm<@hvwvjwNmU80?4qNcVMdYyp@62U=r9Xxyk5f&CqJsu`l3_<~cO>=z%{bLP+ z0z%mT7}&bIVZo{u@Z^BY3Q$jXA2d`|prW)8DyVCk0;Ki znX#U}Q9Xz6(|^UBCoVN%R^dauT0zL{C#QPMw^R{gW2@8T7(WXo!)z;T@Y-;G+t2jQzsBP+myMH7?xk z<>Ke(r}`petPQ>VJB&X1orAmgqM)FNy40v1_0$pgG!fK0dj%q#KHAa28R7nvnXUx| zRdpz=X;QYRGZ+vXHv?XtuBvWZnJAMW7+qnzMMK*l!qZ_Kdiay0nD8d0e3Q`Y+CL%j|`xWzQ4ZT33opDFrtG+ zMpux$jj@5UY#YMNmA7Nb+%UM=QBJ0Qg4Ar(>bjMb)v(Z)}h&0V3OuTJznRXJt zd?P|bf@n7t=ZLfrj8QG^-AJX~)X^VQ7nQ=vo8x@@b|gdxYj5J~ z>ZN^`W5F8AlSz|ov>}fvCSAS^QeBRsrh1eUV;Ol(SX30p&7EVTM{Q{y zjvPCOLIUuHhI*JOPZRxEWqlLu-Te?79s)1oQ5P3?RPy?&vLf1F0oug3ue$@Qmd=3> z!KTI^?Zz~KW+rCUz4o~6&igSv!utvwN_{r#VcXk={Hjhkh0eytwaXFcNhKq8>8>wB zcC7&ktM0+{73F0J zji1diovX%Wj33RmWD5#cS2v`mr>M7k))Aw)5xW>|83%o0aofg3wf~@v4fTz2td>4B z>DU=8+j1M0-@YDRY}i5hb_05-cNrxexc|YsXv-WX8^#THGwO*=tNWd?V#QMUFmv|z z_Qf!PY+;7#gl{rnc+oAR4LP4gIdzdfP_z82m(elEiN8ToCB* ziCN*EXlF7#Wb($WnbY7Rf!t_7?!|Msn3Sw0z2vRSvZYL8Q#a#f0~Riu4;KP?!Q3vy zb(hnNs0;M7B{_`COv+bp!^-6g;Ok;f*w}~4@(N6s8~)Q{WDT;Kd{0l#$BcxTm^FVo zY=~=Ht7>ri^m+7~Y>A5n+n1m)FJEmtAurqV4+^5+?ndF|W2oqL#EQF~$JUh#5KLva z>haiEip~td*jA5E_Me5peFh$V{0>kpr(k2OMd3NE)|fCa$XiKIF%N2J8({3MKyyng z;%Co70P_Usx>Qnr?Bsb=R+OWIxxz57?dfC8Z|~>WL?Ot>1HQgK>S>?+%v7~6uEd?n zHUl1d;u*$)IQ52E)d#_kj0g3N{cI0dv1&2q&WTkU{EzX@{*CmbueF)F--iV&H)7e` z>2R@=oK1XjCyt!R#*S;k}Da}HQqYq}U+K5i$@wBvGGE^fQ`abjj~3m(?^)yM|kPRv_7IX=|QDkkYIEiJ@JKIqe#v0%|s z%u1jSm$&-TSI8s8jhV?fu>Tld`2N2mA%XhKTI7w|;;Y)4dYGFHV8N1kj0?UzF$7;P zR}|F_K=zWAx0(A=_OT&8=xnS&9`MPl8G zSWqn$jq)!!j*K_Q$3MsmGvcDr+-*PubHnwkSEvuL)2U5 z6$X=y%Bj5REJ6a@(8_#9p027SW+ds zlYR*G#8Wm*_!qBR0jJ?U)R*NVr>Y6n)T0{eRyi?PPC+4Z^9vD09O>ib20LRfc7OOW zh5}~c@n@gKs>En5hO*9yV?>{0>Opf+3QlF$B5dw@tXVP#-fDy4$w{sqyI#IgKZt(= z{hZNCpO8~ji^jSpR1#k~xVRu9I-0pU5dr0W_UK7ur)1EU%h2A(oQfE$ z(io7JUy1q5spiqQOi-koC6P1jaCop685v1v>KnqM%@1IDq_6Tf6RW;PLvon-KQJU* zZ4;B3oP_pPnY*b*duKo5;$q=XY$Y$(tIE&9sZ(c=S1i7|hZx?5IU;R{aWObN48H!} zaHhkJ3JYUiQi@v2yNVd4q^cg#3)kSzyYHZGST?bde2T+&?E)q`4hbs5bjsaS&=!P5g&{a1K=D21I;1WsXGUgf9{l}-5g8+I z!L*(eevJHo1XWB-WCmW2%cEnC1d7vXFcG0aur(Rb$iDa5dm=DyIy`BmvH@s!TNjlv zAB)$lV6rGLsW34p_0frDU}#wOQ!&HS$DcuYAro`UCis-OnT|HMxQepr(l#5nwa45g z%i!gsz05%#(bV^KA}Ki)vzM-A^5da72F0EH)p+kB@G2va@9vQ$)goZCnz&7vJs;k@ zM(UEgH!?QKtjlOL${Z2_RV=S|Mz!qCvsms5aGdExTInfogkoBJ0^FHs%Dq7W=Ri>JNL=8+ zWO0^qj*^(><>aBWty5W~+zFKx1oBvfUqBH1Iver=I?V=$>4*tFiY37fMnr6=8egTK ziiCIC4aYh!r;IW;V>YjMV$vXGCL3RkEPYlar6?zU1{e$qi_6hT5GokfME*U!ys>Eh zTn27?RM%9ZqrC--IUeoZ{m3q?!J>Hy@UR&`A%OeqT^@5m3kodpiOA2Q9)-{H)K`rkU>_| z*)s7C#-c^@2#$v77xl=_DMd(d2x4NiH=Rx(`YK&tHwq}rc6lW^YY=^~F{|yhF+F}J zVxlA1CczwSJvYA`%>;r@%=%_dpGp73WT~SQQ3*5Do6qGjs}ZAFB{mJTX`|X_(~f#& zkHm$E3|z9>nL0xNW`NKN}Yo+(Qg0a zJO9PRsS1z2_yineLvH#GvtIV++Jp9iA^h7P{*EwTH#nJf=xFXheo-ZXmn^fAz2)o? z6%&g%S+VPAkEZ4Zej1sq&4HV<3!9Qh=!cMWIR|s*GCncrDtl(!s;X&3QB{NDN!oOD zB}k_{!b9ZEmnJ64^=NPF;IZjU>O3gP@j5OYF=CMYO;5|DozgByVq0=YRP-u3ZW^M) zgXk|b+v;ZmBr9k;*(bR^C^(otm-a(@x7MxQ#Gq_g-pDbiY3`y<=+#?J<;73x@dH-Z z+t7@H>MC^U>1Sv>QSvzKv^dtqaWV}nz|q+!`}c|$w0d6?0jVcu&r5`_2cZn*+1=So z&|ibTo=%oDAtWY&c0Wzs$eUm{EZb6Bhph58EL*u2%x@IW6NtXMr@0nI)$QJwoH+)!UQBhbt2O=U6ADq#<<1Tmr)YbkOb_TH6r6a4|u(HnBcY z?jzL8CMIodh@o6Oy}hp<8^yD{S-%DOOw8q+x4R2tcLbVyhGB9ERDe#~h<=db?!zB{ z^%l;Z$-tMs_iZMr3)MuLtlN?})5Rn;t)LOB*Di%K8B&RwzK0;J39Um;STJh_+#QG+ zh)*i3>rln9F&cTT#cpQKnT4Q`K$Yw$AWy%XnUo6exLF7b4_18~^Rriw#1*zzT`6~& zoe>d1zeX9FX#+V~ne-j>Wt70Mjcnw=TG&QVpg$HQ&SoMmZ4ELxY-0jcMZUGwZ(*@< zh+tf{`p1E;KJ@~j2->ZeAAvXRMjo?DP0CU)bqk5~KsA%qSHArV#7&!yJ2tLlGoq&^ zR2Ed>;?YZ3|KJYX{}OGOpueHG7~bKr@Cgl2V|~rOSxJUf2I&gw?P^1FeGR+0J0Xs`B&YN|%*3cs2fy%m z1p8?F=qeH&BK}NINrjznIA+eAhHkb~SyhEOOIN7)Ini!3(#wP)v7w895Q3wkCdNnA zThUp{y_YtbbXis4OpqQwEfTt62ekFr63b9;c#*93GBZhk|IIzvzvm2|_|g}!_V$fx zLf1jXH641HK&BQn(Pu1$8x_x1beXnRLBCr|thI3d9C$g3oXluzY^Cgrp<}$%h<=Se zG%7k4&NLi#Q`0<1ePBP^jEpz)l#O0fx0KxsGwJU{?;z`n3Wkd-wTC#iocWJTt`4b{ zr82##S)iT09oDX2rS^-Kanbh;A~my6?P(^M*~dQw(`U_x2kVh`2Pvlt#$7+!us?GG zA~4#`2(r`i87KYJN_->ZTE0F!cB1 zOmYs*gT9IvySuqz>58SYog_wv`cYk7t8JacE97Q}iP%v-F2uF*2{RGm>#k%osM9mY z$y8ex2=4GAhFQEQarAAUiXRyhS4)n4-9+xH5 z#M$DH@=7bHry_gSZ)cD2=s4P=C8rYKVGvn%kuNXInh~hRZH)i%B>ZW%)!ZQq1M0AJ z(R>vn%)^7IpkJ)4Y9ywT{6MQiL>?3xhG{cq!b4_#DdUE^7L--hGVh=dWvS?CvsGUt zud*c;l?lVl^lbRW%|i&Wl=v)@ei-#^Z@{QW?A+<7%}d97zyELO^y{pF?VBKSNnep>8f%3?O)=1Z>3}F^FPBE zA6t)*sRMV7gn#O1T?2g)^SzN_6YSWaY|#=KC98tXJjSug%0pLYmnttg)sT&&!lF)& z&RD*5G2A(Rf<5(pJt!zDLmT6(P;hXNlBMibC;d=4s)}DDtg}o_D9%|hhxk$RFJt4V z@g-ZE<(D=iA}S0K;r^C7MidqjtQry?3a?Q-B=#~y8y>=?v|`GS7%|)rDVNTnPuZws zATpaH3S*C8=Ek$<%!EDrq!tTQR$`dCE_?Kw*gwh91P3peH;ej7OH%%^A7?LT(tQg~ zGpl!CBraP*ThM%wjmVQV4D|M^6^-qT2ZFdf{fNJ2&VsKSw^U5u(HBa-p23iExEJw$E5bnmxT}&6&3u=Htd|4<4^}>o=0J4BWlabP+r@hws#Y| zNnl$c96up@%6p)nd2wF3dI?B$^dvdebx6!mOM)qYF)LOsq-dK$0y|WL`%0JeP@nFK>UfSj2^Swyb1qB0esqtq!Rt@np75Lk05k zsX23Js~flW%p0Y?!oqUY5f_Q?v2%36oJA|)<0V_|SSAc7qwy!E#{Bh2YSYB@S=9D_ zkF3%f-?!`c*?NW0)FpDQIwv`9{W*c;pwLw(Zkay&8viemwRb|Fu7@|QY0~lfEJ+#| z<3b#;wZUsOt4%~)efG-ZwwRkHG^TIi|8o?m_! z7usF%@DKiiIU%ym^UabL)Fku{>y5Z{`2u!*@*x(i*o-?Ld>r14jS6V0;K$Y(T(2b^ zEu?8@rH_-B`h01Ed?wHpXXNMS!k|^2A^}T^Yi-&}4D3wbp=Yvk-?ry4EoKgW`p2KZ z#X~0gN0=L>3iNEeYu2$%2pqq3He2?|~3K@JVno3<##>()mw)t7qF-BA2 z`c}wVuh31^ZS9(sJ}(BZBn8kkP+eJ8g#Z5SI|zxGg=e0A2q8>RrTNdBh5xzEehOle zh>W@M7Y1C`N)x5>-Sje*1EKsKMjr5`FRtQHuOQ) zHGpM_ci{eqpTKv1_D%HyBAHmJDx%N%@lRlI^TEIV>h}n9x1Yj|Ni%cBZJhfq-- z@2|?dglC?47F%BW0iOEevk0U@%L+y7B+aCG3~i~ixfx~o7jf>?Zqym0aL3c%!OGb_ z@VJ({u!?y+U3qM>GS{TZq@XrDAx(1QoHW@LQ>6*lUWd=AwIQV}mB_5%^$Y+1H7Pxm zFBTbzHp{jn6(zETMm`RlxPbXfR^f?fUVxXY?IbrSEMyV){}zotl6)0O2Q7?EC0V-G z#KIU^&yAItTHkNn_NS%KZVblPGM;bi@rix8vdt@hPfpfS*SdP#@#|Koo%}4ME7wkb z{q<|VCAnKJx}|HcpsCeiE%9ku)8ufa`@gEjPt(LGX>Cnjne~|b6>hZO*K2lin-*J_ zzhmdD->xpL1;1;Dz99_j2a$d`6_pj0sH&+^Z>FxUu10m0_P4aQ9<`mlaCUNnr;EUV zTc=Sj48172d%G1lblJ~}>$#tqUS!p>Yq#d`Oy>^|J zGglOtoR5ihPmK&19&ANiD?-zpxfsBM9Zs~=iR%h6U_0k0rm)HT(j zwz>ql`K36MUWLSEYq4eXD!AHRamV#$*da;r$uH54PHhaXC5!7_IsV*@TbR5J>-VW# zKc22Vb~jbiq{6wnZtJ(3s$1yF3*`4rS8@wPpx0Pcs`kS?a^fO-jCR<3?|qma6AIb2 z;`3#}DnBdzPjuKylj+5E{JRa$@M9RzAulr;AE`M)S|9BEx9_npIZE*^Ehi!=#-O*B4jk>Bz zoIIC?PQ4v=+`H!?1}4p8Oa?R+X5-+A3$XE>h8=g_j|DTM)e7Bf%@y(~4V{cW%erojCewYT zexnDa#0b@I0GCc4Q~QV~uG)&*Z{Lm}AGeA5%9S+9iO;9Ulh=CP6^3IZMdTwFe>QB5 zZ}j>#PR{seNjGf4+Vv^ZwR$JVt(A=(zuuI}XHQn%wALp6nw-?^_$@Ja?b@f(?xZ9a z@}gV5ejI%kbxe(1Dom2WybXLv`%;+`$*u_#{N`1~2rKpQUZ`Ump59kBk%uOZr3wl22Ncv)Px ztAo&Y*5dflGe}O(hgZ~GthwhA%#HCLcZYHW=Kd~x{Qf)0Ew6<^UaTN@|t~;!_F*kuy(=p#LZa0ekEeV zwaM2}Sz5b#%XD?0Chz-|bc0e?@-q4KFgQ?baD?Pqe-BYgXRvMiO4GkbS`8@U>IfWrRS1eq&8B5l# zMSzpG8t7{JycoF=NxSlqcK-+4wl=A7k&)(OLUn2`{`}X!!Q|@+v)u@6M+~rY3Bv3p zJFt4i9E1fps|52#ctItox*il3SHk2Jh#Bz_YLfQa*ZnWaV2UK1S?TlV?sIBQ9={*% z$A^FYGxDn&Fy!Qkhn{-{GvdP3(=IZu)|Nj_tJkcr)$eHl* zbW)2}ZkAoG=CO``Hr*Z1?-)(k!%%4Q#^kI7F%8sRS=MS)W0M2gnd{d%RgzjhT@C*` zbS=KGK0fh?_Dhqg{J!$cR7pz9zuFbot3@k0*->qB%X)mQob{VV(#FJVtu+4X>(lre zC%fXxnyy!~wXUhP{69rgm&K>2skSBMmraH9!70d zEryLIwFinQ{`f#`qTjK1aYeAdFTC8Y-hTR)$TWn(&KC5W?BNy=qQJA}-ALCJ#&{VR zF*0mWdw}%z48X-ZK<$?$d$NqyCFM=SXl`gg@1UN&9jkbJO*VG+@bU{lfUlRj_L|L) zqwBJH6 zqPoR=EHqZ1_DwrEp~&Y%S8&G9|3s@}G?f;u(iw|w*SW%EM0aN!+B>`9;Oq*&pinqE zYy0w!*}c%@bzOb*#%)?lSm{Rn`Fu#YxS5uvJaHrc7<2~I$-br|;sHk>dzv|V_`o|j zOuf0-vH`(O(wN-F%jfzWY9+_)L;f-G_pr*}o7?-;i#xnLTveeflSR3DY7)3HM^mrs z=IQgS`&LNBa|*b>yHmYD$`-8p9ZJ)LNId zoHa67Y2vS2qlvs+n=Jf@k{U-)S6zbv<1jCi_#owH)VtLpwg zMON z&`ZZLsL<9b$q-o+&pRAj?tC;TF+0VA2(C>|G!B_S2i{GA%p7+c*?5|cZCLpMO)sY8;H2<`!QcV4uOGFE!CFp_1$d379H_6V`({Zhuo5e} z;^);Q1++q%`@eqUSNB7%6^ju$Y7@0%azIhOD4UBaMsC zo)@hR?cZC5Z({=g%t=sFr@8=|@u z`B#0t=?;kxmDnZPFRzTyaH;6De6CH>nta592A7SOw$=BM3(5J6WkrI;j z^}00Ga{r5Dm5k7MR;L>Cwd*DNTIqV%h*roIb{z4s+(jm+<>fqDdveskk7$m($VBt6 znr2wJy)j?cr)%|kYNW}=@&e&++NA?=iA~GNEn)oiEADIA0n%qMp?M(fROkr zLDBN+1WW9d!{cZ&{hzd||1%_ZBR<%H|H>ZH*YdsAUI^W|CsXb7)cnQGNbu(PycN3U zgrOXsDvO^FYm<|uJ*#B?C!4-830In3S^fLi43y@-KOIf6*A^LFkESfYDb%HCv_knJ zcF5k!(`!Q4z217E&{P_lNY{8I`HD)-1`O$XkeOA8hSo0l1czgG;sSWPYHw82Ko*%E z89+l_J<>7@VejIJne!H^w?K`RAxYI!;?v55R(RsC&x6>4y4xU%r?oSATc0H-)TF2MAN{x!DA> zz6quiE{zJ-acD^?f2;>$uQly}J6?^=gbL2hCyVcrJ<>2RzUQykEK^q5n1O;Q+?3oDibhi|`RX%S-pHHn*C2Q+Kx5k0gpxq`Dn&_kCoOWJ5 zx87509aAB#73*}#t@Y`DnyhjXx>|y_CNBeFX(x5M0;QEL(Azk{*4-2H6J{VjJVYyY zO_I@+y>oRqcj_YgbtV`b91s*1iWQ3%sJFA;ygweJ&vS&VQvBZ=_c0D8AG?Vh-VDDt z-#6U#~66sq&dqHTMED7BHvsbHDlpPuCz-@02FT zZiZN^%6DaQ{ATf@m2~V_8;UEN(KhUb`E%n{PB7--tr8iNjC|8tvmO(&9v>_B=}B8@ zX2oDu`DyK3kFG9p%}Kuhb-G$!llO08xhu;|e$9>O>bA#7&RJ{yylhZt1cQAYxOm|l z^m-G9h->|)&7>aAgrM3_e>40_&ad4Q>lvX>-A_^6Df6vinIXoui|5m@Y{g2rdN?a4 zu3dw0duv=?`=uunXaxHHJ{&!E43{olLE-J_TO>G-S@%4-f@b3)$Z~X=9Zwjz6~B8o^Ws)-wiLCA+Nw)0ueeeHXNHyJEm6aJ8 z8E_)2D(l@3FlXh}2nZUQ82(?O%hvH9-s6j%(bm|4g!A#Rw6}wev(w;MTv&Gao}bih zj3NUDjGf5KEkt=)9b8>qVQur>8akRZaA^8L{wfMH@#f3F!_jjo7&CJf0)1RAt@AQr zXl%m2{_+x{OFOV+`69K}-o+BD@9nwVz1mobhwi-_A=8#1I3nbKS9eCx#kjkiu^L`R zz7PG@MqN|T@2{yJ)y28E;ij9BS=xeYZ@fk=&@6p7U%R>dAV<&G--DjEX1w;^e~^%# zhgB<={D=ud@wY~Pa5Tw+62A3(|odA+Y|b6<8~R`F`g*B)5#TV?TnvR zM=pbh%PM0gk($VfGLlVxaV&=-*$=BtE8R;Gv@z}`;;nZ-M|Q5f45kc4#Z{O!a}GS+ z9KqrK9x_7T-HEg3&f)OUqu77)G?M8@W>1kdusltEVW^TH#^dGE|4ywxgnk$YGJZ0a z)I4=pwRX1pJ18&FO1U#hzH4{d%O=0H(w9q@TblfcJy;A-pN8)2|6y(Y-yq?%hnw&( z5yL{6huO~vU49>AT#PX4dyttBL;Ud}5;F2pSYCs=rgjAT`y({a_j~Ho#NDXIDx}TjE_Ul|D%=X1=soA&)^fW6RdBk(8c} z&v)!WV@n&ZTDxi(c44yF*^211XL0n%LHfwE^t08NJ9`#9-JQRip&umQ(FMQnL`|MO zT|GE=G6ptQws3YMKcw%}$sa&Nbx2!ARF_qvp`r%;);8F6>?o}DeekfdLU~;ybdJvI zrA4D-@K3H{7bW43<4nC`&pFmZ7jcH#mLuY+CE^N3(e60OZ z9+IDC8f=d{TN`WeufP2XAAfxS6K1bMM6idt%k^?d>KPm-)t>hCzanEFIx32goR$TH zlP4S;Y`!Poy%==?9i{mAi>)}ASc?@a7r>spjn)rMRvZNjYsyGtUX7l_GIprnbd%1| z1C>J^yr8?6)qkmtBf>ao!LJr{3?e^S?nsUZUAl_VN3_`oLr)hDeR~ovt{!l5`iYiB z`magz#b(@Y?w|Bvuop^4N8OV7Kbg{zD8+}Emo{rqHb`q8uV>HhRl)?Vm&gsE`2h|adb~ars8L0?*;n* zLy3G#>lfwzq6hlX)7go?{O0#C7zPkAZ7K%H#D!d%>Ee4YhXyAM&EnCj5{BiQ3G}c7 zHg=AfK5GWnuUU!i%5r@5>Z|Cq_eR9b>2Rc{v66B+EBFNjU^UB3iip6GFX>25oI~is zrSS1^9=roh>$Ciox%<&G(DF+hOb==*O0jqUVWehcAvG@#MjF1akC!T^l{J%Am-(A^ z%v3@Ru=aI$3qnJ`xam7kd~7#f`)CWCCapkdfOf~6>8PT?YDk)VM(!J;p|eJp8)Rqj zL@6gf1`wNYo(?<}*WYy8un9wvkye+Ch4#&Sx~M4=%7HF?_~vIwiOIp_sgvO1F?83+ zU^7w+8`pmGujz5A3;Qvkz<7Oq4QyO}Fl*r=1bMp-3vLz&3JSyORjaXT#sqx&(Z^VR z-Mt75@wvoZVnZCOZ^FX}LpC2blU8nc2~|e%s~s7B#t`?z&oI}al{EKJlb3cx$b8n& z9#hTyC`m8COTYOue51nQ9~GhcZ(iwdxF4;8jUPHa_{h8+9v--o<01R zkhX`3YEK7B2+YS!orOh<7Yt4qimXP;iyx-n=6cLW70I2N-Hq6@^-KI`-)TJX#8Zd} z^ipFe3@zhbL@Zv^&={ou@zUH=wn-0S6B3M@nis z(lc_<)ZB@Hz(6M0EJD(1AL3MNV>HPg*`!@5t(7pB8<7YZdr_R7isPqaQPtK3cXv0~ z+gM+uPHAbVHL)A%_{ehRGC~?}+BY+e-XovG`*1OSN1r1=sTZkniHJRUj&Um;shOEb z$;d=gXBUD*R;Hp_%_Gkr`AhzC8)!#IbsnDo%LlOan~wkapQmv1^_#JN-D-Gyx`6o` z$;{34x!;?ghn1wzf>Y2$=1-Zp>G-uwQ_khy!`NOh%V%5a1Vi@BpUL&x3dlL=Z+yQ zJqJ1$A2>T}{!tSbOAwpJ!O(@wv~xId{0!1EbC8~%g{-t3wQiS3fWNW{&G(ONPkxPTPe^oAkB*iWy!@Ac zqpqn3FTMI2uDj+MtX;YQVS#??g2@n#tYPSv4%RmIShRE*RWl2ve95pQ!vLQ^V81L*zR9L}Uh`%y zDW5}UT?SotVRK7DCfS?(=-?jy?(J;CxicqlJ|P=Up22W-Bu-@!?F`Y+9+a09VgI*B z6>d*2C}dZx;O*tXsxIx5wH`}7!%Jw#WbY+OlDM8`q^4%!*r{mxHVg9Q4Qp0rDd#b{ zvv+jDq9qII`&J`4BMnAFFRtCVX>g3CWh+}JOq@6#t5?iLzuppM#m!j0bP+tgvaXT)d@%8Z|8o#7v?wDxzfk8)hdBx51d9@7~z-PMDpx&}P+@c$qSnOqM6dG~XdP zqXA6~H8^?l7!ndvkVgHh@9c$7V1T-7Sko_|q4tK`?1dyFE9EpUhR&s2jD0w~=Kzuu z6Y0}3sq2|Y&&@+uPaph!{Y*AtD$g&*zHZdk*1*BtAF~%OM1U7#_XxR-Y)dvF^HK9L zMT4<-umwd!zs#GE)Hn29`^9$kov1wZE&loLr*NFO3}HcTD&IA8AvC&nbFD*rI;oyz zIB_lkMV(&Qux_Es35Iw!X?kA=(&Nvep|uybZr-q?!?$4UOixL~=~JiS; zI9QJUr?0IMhraz53CU^5NY6wjxAb(%l$*r5>gG1MySl*9PMhhHQDQ}=zM3t|EZ-r1 zR0}M}f@-h%h?E*UCcou}sf>_`Gj2NAP^azt+wt!EZ^L8k3&#|+p z?DfL3g_GeZ+F~LLCZJ&8)>~MkmqHj)a!I-jD9Fmfr|*A+gD0a=R#uA4rp7`$1G{L6YfT9CBp@1l+N!oc8l@^B2g zJB*k$e+C@f9N9;a1>5DOshF%>MZWrs>Vl84U+z#~?Iy<5hdR*9F}B9E*>kaU?Iw8m zxT-)he8SLz0MLR?->S0?AAayLmfiRufYUOMf(};6@ul9Xx2Blrv z1-E{7Xn?k0p>M~Y14q#65{b2|7D1r2tR5vvg2;`mtFaMH6MjZ4JSzgD9t@D7{(g>Q zTLV7+ft3ITNbYA3Y zA@b^HT^8t5wyO0@`3@c*U_JIuPS|+W^_a7Cv6^ffIa$hbvMK*4b&0>C7LX^=Vd;U^ z3*p^>!i-e>`7i&#y^lYKl`9vFkfRnPSx)ZM>tox(J1gQ4sk$f;9_v6sVX0b1V)s{H zM2%FB=uPe6Vy3H}6N9-%>MytQ^UzC_ z_*&=;{K|3W7%JZ&?Xr-nQ;C4ElfKqMC-%Uxp-eSBRZVO1l(D1D7-j5uTy4UpuqXV| z{=}6je{QZ@^@BRbx}?00e1JarwU=JOH~-mzin96cd zEx$6*i!zq^=Bqt8aQHZ~^7DvM;*pk|q%OdC1^B8-i$To5kA6MIW?8XWsYt6!#!u=a?|P%=hlqL1en{@~k(e##^sydQxA15t%249@ zfTbn6d)o2gd#~V)H$O#7XEzF&Sf`vgjvVW_viJYiA z65}`&ViSTHq&0P43hg*QEs^?t2LJiuL;UNV-I%lZMnr}>Q_sa-#U}K~W%Bgt2cO`` z=@^uklpsAZ4mtTn2#A~ncTX2(--A6c>0v(|leXwPj|}!v^npAnb^Q*zp)7mLGx6R> zALEq|K1YwGBO<0QfV+c*$(OJ+>$9@rcu`08A33b>!ZjOdH|n^Mlo3oTE6jBvE29X- z#m!i@d@($j$0#2prObTM_C+65yx|!3kaw{OHO}Vo5iF>DjM$dQ(^C0&(HX5>(Im=@ zdN9BOVqX%Ql^;@dQ8t1lSx0+IC$@dD8?)xj#h6KvtV>Xf=&xBMN-J5N)J050{Z!Cq zurA?W>|1ozNFP8>1Q&_Tvw-+w`DaPz?rd+1O2(}7X=x~_Z$f2mA$nW8uz2HogkA!t zNuN!<{sCk@L*yxO&>X+DSTZC}iOZd6YG}ZT(@8jeJQ{Dj{5o2@JF#%xD%k3@wYL>U zDbL2XZ}wy7{*%bh%SU!*8jhbyLQrrh!kIuzeVXh=R~gU6Pcm2F@R|0HCJq1UjYPRJ zZscju-t~RB?W$Xmo|cX3#(L!E=O8C5RXtf}XYU1HUq9F|SEhxtIp$Ohyffy^XHK;d zZr<9>DAs0OQFV%6&}1N3pvTmb_&)6$>yZy}63S0@!)UPLIBPye9O;7n{9 zD*L>#am^yQP~ZDSuf(2J8R~R<9p3%R?~qs9fryz);9+Y(V`Tw;_s4(YYGD^4Yq#%6+Zp) z8QP<~ILTVp$_E6VX9G0OHm2T)#Cg4CEKH28W?zR*k-OIRd!9pYYPC1olrQq`|+p+&t9P$f_P(WPNP5j{N=MPU;XU1pI3#plX zlvqzYl5v(c`&gfNDlPVk4CtN2XHoI`*?9vMQy_}SL26K1!2!@&@hpoL96$Ii>g+?Y zZtZfFo7$M$ds@!Z?mjQ@Q0A0(w^ z(9Sb)V8=dUzawa*FDhhT4fMO?Bg5eCY)#H-o2m_wk@Q8zj9AQQW6~M1E8-HiCN?5- z5z^IOIjHzD@h*Q%x!`~)-U#LrdzbDFmSVme6%~ZWhGu-RZ9nGEpAS!W7j=BJe*QN! zV!}{0F0wN*qJEDcO|Z~)!H$FU!LBn{y82p-3361)rAAVQWYG<0T|MHn%aC1ZgR8Gt z1Sgrt;c*%0fu1&`$DBq|K{c$r1K`3WNM4m9g6->S!%M&W2Xc$5@%jfJ;O;wa#`0-l z`1ZgdoX&5>}tL+|g23i~P(KGJsrr;$-=3VCy|+zi{kPcSUR}C-N^yHoi#{F zO2oOCIGi~Vi@Mq-^jjKHM8}YpQ>YZ(+rynk%6935<eAP@EN*LT$qJ3XJe6-o`DhqqW#n}tB|SKuxcS(Sk8)L)66lBKOf7np&oN=TG~1Y zv^^C_Qe!QEgEo^Ka~^S&Yi4F9^(-0R9zBEQ*WQ5$I=az7R5%bCDK`m%y{IZLLfrW? zND{lrWO9{~M!U{MbFUFT1Ri#jgN}iys2vzQIanMA(r}=nZ+PjQUN!PjU8=Tc<$@$ zf?Sy|E6Ya@Wfm8gz;Q@JZfOmyon7GOovf|(;YTUibba4C-uGUc{ zq?CzGT~#H{9yx)G^n9e}mN0oy^HY`D6u z-%`MQUp;ni-+_OAa|*wG=`VQhu?Mkw!7Nm##^ccGbBJ291|H6~%KpXo8T;C4J88%) zsG_{x;XpZ!@Q*4P>Og5hA@-h#hr740(pM>=mDB9LtgH&h&ctKdv?z8VhX-%5Nt z<6K@&4vwEXhm7nzbp_nc-T}_ePD&PXN5w#IGn%OHyN~B%`ogO)KHM3Wa&d(s(y3cJ z6>Or&L}q>T74h zIL5>}lXjDtUyL>aHcu}v%ALMjZ$MRkE&+HZdUSeadm>NKbt@-lxH&sA5t6zHboE_G zNuXbdp?+m$DPNnCl8$`ZkFARi{c#^N=M(c(`9 z)9BS(f)DLEf%=9P1?q=)Mf=Jzr7p#%(4S?N!j<6K*=!T)xX2VJ%_+c_pM8bD|Ni&* z?6c3X{OX(F;%2YxUf17)&iW#Sb831l(NJBDq*JGml9`LT)>hW(rr9EWR3hyfXe!2 zc+#)h(%&nfPtcu1jB%22G-CXC_TIpld>*b|?s)ryPjSn&>#-or3ETD`M_P?FmM@t@ zjKR(^;p?OgMxT#C41IBGZUJhWnwjMJz?S$+{9ZqGO0RE6Yg0S+Y(Iu&%T^+4d>H+l zQB5l86u1|8iQKvM%Y;iKjvS80t8adau<_#%5K0`vGGaYK1LXJY`ID%wt;5yV-o#_( z2}3zw4}EtB^0G1zeJ&blSviaW#jv!tg^Me7l?8PJZ745HM@oJrY#cpshH*EOekOx) z*ICI^CMa1U?Y6DC7D@5Zh>wd`crGPPyE~z(r2~%c?(ncPpeQvOfBWYbXlk`aSd=eu zs{J6w)O2Aajl3A6)I^cmBR?*7(jdvjKqLCP+le#Narz|3ni#pfrV+NZMQ3FbM${GO z;l$1@sB11ka8w{t3;M8h&21PLM!&BUSyPvYVSe|^Kcl9p3$MQOF7CSh2FxGhgM9~x zy{r4NVE#4)jp2m2vq`ly2j?AvjavAiBZ5y2|ik`jYm zVp3e1a2y$h1(>{OKH3a+aPgUd2!99I4i=KSm~`^r-a|)JVs_2OE7bUpkd-9Eh7P2q z7NST^7%oy1hDz?j7xhFgq|{cG;oPYc9J@^7jy%-0cfs4&pE1FdcZlyZ^rE)93a5{; z9LF~!w+tP<1Mu;tA612zJodCBH9irEj4Og;#J=p9YdBJ0teI!jRTSZ%U^x1plgFZA z$FXSXXd$-GM`>*%TwR>tN}M8k)z{UE4C1di<`5Yfg~((XN9w18(mnQ)e(!wjc~mer zaHbCsY?zvnf!yK>z@C0vl6eZZpV&ht>8zMzZr!46VdqgY&IeSgW#lZ zQMi2mrVX%XopOu8Ku<09fA$vkoXx?VPy80oJbFJiZ(5~fyOW9i`ps9s)`ZtABtPq_ zRd_Ekw+_Dk{;(%j7}*bzSvzV=GI8Wg8v1SB;qFQtCl{hk7dzj5<10M$&{O#BUw@11 zuG@sQOQ#?=EeD@{eh86K;}8_?r*eW^;v{+8J)L?`+|UktFF&~3YB`lgM(SNB%8Ltd zkT@-k`DuDVG4!;{0P2g*LN1c@p}MpZdC7US_daB2Cn6?B@NW@xl$E2q$VzaL=x7JB z+2*rwae7C(v<+~0lCuCm@ z-Dqj7#nGcDk(7~(ccQ|4CxN%56E_dW-(g(DvoXVcO z>AIT`b7&{}Jwh>I{!&;{_FF%G7a8R}`0YPl#q>#$#Ez0DFy|m%D=RIbzdlUdo`c+y zO5zM_c(Q-i)XNg)V9Do`VL|>JJ?MWaJ@L~LH;x|M4+}eII8pD9@7aYHUidYB|N1+4 z{He!q<8_-cZ}v=76qg`0B!sz%oGBAlohYDBk7NFrK`fBY_-bu$&pKRSO?+HcoJCtd zO+254Zx5Y@m$wfZ8taJ3vr$0I<3-%yY%i!_q-I|}UFh0`Y^F8VZ9Uwag_N={$*%Ger_HP zMW?}ovQYlVM14(l*z?Z6aV)tAx4-abJo)$oxcR#4R9(j2PI$-|QV+!!iGQrGt;VJQj1BW0Rh({G&`nJGgnmhKa0lql)BWNi&XodkFQm zAy{voFw_pRy&mRXRY*-v#<`dzq_WOJ`eu7Od$QN&}w%-XhmIRHFlp!hI{yQtXnx9 zPUKuEAOTKSO)*~k(=YMv&J!5B;3`B0IKq~S1hk?&FAFdH`F;HRy-(DvsWrifWlt-r z>sqkwWIUEEUW#xZ7tNtjSrSej#ou0h9u;kRjG4b0?)EbIXbMKGGc7&^$B!|Y%*{nf zNfDxt96)zhH^L@OQv%OQOTn=N`%s(1!Ki9LleH&W>q=40#L3Fq1`!csnb6YlsbJBG z>f%D&{oFG20AREQ`NooSB z*TMr)k>PM;Qry?mjW{ah$rIlq=IAb*&MZO5oOOusc3`5Uxs0JcY7>SZeE2Dr-}nGR zE|@SBSs9Q@NA~v@pF@6K8^$hN0}pmgo79j}QYXt%uipLpOZeSy{)oBju0|jMy-I+@ z5e#%tkLs}Po8vfnCJrT~#dPp#D9uiTjf)qYgF?{31nAHZZOy1GE<;Tflk7!v7)&kk)=PiJw(Z-o`i5JX69za1Q0dllt(H2{QMCfB7k)O@4xaYzW(fU^!8ih43m z5!@MMWMX|A6E}HmRE-VCr6e;MmQL<)2@a>t6FhelIG;(v)}6m}$}Qcw zSasuFi1>~>3_1x1*zEt)qw!5q9_wvx9`Bg z)A1;&tU^V3Imf#nfq@}#b9T`DP=6b03$yTt*T2He4?K^_V?$s|=%?ZLctS0IEu3lxxvEhCfM zKm74;SabWMxMtHbI8wek`ndKcg7oNAIE2o@)bW0*AL{Xq+G70j53gg(&eK@1U=I8P zMlB;DlaTr*{O6NT@c8dv#l#u&F>PGvutbFV*WXDUO+`t07p~c`5Iqf)rx&vKgM5Qe;f61K_(Nk zBI?H3(=lkMX-DMPNGA18FzWk|LOVHpl)*kPm%5&fy*qcH*Tw^rCr^Q{s|Fu^@&!Kl z@-Wt|+W=2jTL$w^==(cxpA~&Kd_V7afiqh$h%{!<>H1j_=7MC-I*xdtl+tgM2(j`D7y!sGq|p3^k%J zCU}lLeF|UhIE=)M98{KMWQ?D7$fy*(75dGNMo7^-sP*PFXFg%JS6W)c=0-Qzk-Im_BVPLj1jG7d9~JEOF?_ zL6jF%!PheqZXPc36yr#pQD26|pC3v3?qUpE-OOf>0_v^s>Y<* z3t($&gYM>5oF-m4cRmIMMe-Q&StO_D!`3|#p#k1hxfZrvOJE1V#{}P?jmNC&|xQHHmWScr0VDJz6`Pn7|dFv8@lje!j33 zqmZQD*iIQ#;qc+3wA-DCm@*R^H?D-Ii+SB`jRYgggkg1V8C3)%C`ps_aJ9-Wd&MufTWiotS?BV0-!Sh=1)u$gro{$I% z4&ia8KxEjgGs7ndhffv`C$;zDhlZ%X9|K+F|IiU^-E)k6%tduo6&jkF;O^mr0AIQL z(*h-h1@Q9n#QLk2qqCKX-iZ<{Q4@wtIOWdjffkfxW#ZXCy^q&E_*6Z%XFbpbD`K1C zihAsi%fgZc3lZui7eh3Rapv2-`0FoUMC*VZqUJ1vi><{lyd-whgN}wKTz$nINKDPf zl{Z}rTRZJ>LdhYd-@S~_$B!LFWos)o-FOW$)5>Uvu9!YP2zE*jRAFxB2}8NVaA?BN zOj^l{PZ$moV{?9TJigv@1g8_Sh_QHfA|A-eEkjmtpm=)#7d@DHV*VNxn`qxea^m~vJmxk@z_v2J-A}Xq@QNbL|(Z&`b%w244 ztkK&+9!jfm<+ZmUD!>=urHm ze;?j?|6NR7vK}GSNp*3yzYBd8nRxNHf5lga)39X8N(6Z``R1`9B7a8O#g>opi{HNq zd%r1|J}C+g*21^=mI1u?#uu2sUeI%BQkR~73z&%7}@E*ZteMaW4$M+}jIfblco z;^IVIO2yIrdr%`2H5GMQzFS|W@?D*!HS^suaC5RDuINQ+ZUzqQ-i2LJP{wIV=z9g*CnGwg5ppMRdh&VXWo95dF#+~YZU`6~1-qU$#2nlSox3l_%$S4I z)YStAPvYlKzCawn;}pE5M?d$}bC^7P4rLaGFWz~Jal8rlzxZn^h>d!>(TTAlGBONS za%F;T_V;yA7O}LkeK?<-hl;8~Bqy9kc~vuWiHXW?3iC2?@bE$8(^gZ{iWqO4(L_8f zHrUwMh^dn&!^6$RG>$(7^+T`kLT*kvzTEN+4xNfcRYM&rh*9LKQ4sT87e`w)@o(qo zjVrIZ8W!y>*uQ5dR$X@!0zv|{T&iD>p6U|3{Mx%%d;4R!V$D3__IAd#UNqKJ;81)P zT%s0YQj{lcNOZaj%~eHs`o(`>&%szMU$Kb(-j!k+!dx=Zj*e=)%N+Wb|NID3X3WK8 z=5FTW9Tf#QvEwsX`A)$6D{g|DgC+3~;7MHI$2e(iC6+2TeY7AqB^IB5a|j1cM5B>; za|QDpos}K)-2gb-QQzqw8;M0uF^4*zkch(KOybZJ$SA0UXTSvbd%K!0ya-9YYML;# z4^ej*x^l0yCe+v0g<|GoXU@hUJ}Dh#l6%G+r5*LbD`Fb;nE5;Vw&kOb@#TjfqoqTS zxYSJAP7cyiWRfupbLP%hep@G3tk~ne_8z20C!(&a4a=^fAEGm~p%~@j##itE3p-Dw zB5c|k1bW+YzvTFxcd(T00Ri ziMGmwPS;zB`1lOw&L@zXkxk!ThLm$>(N2sSGJZU)Omhsvl{jPv8ygHTFWb89Yb?F` zHrl&(BgaVJ_Tx#lBQ#jyC2Jl)TYVjl(0T0mYA^O5OT@-2HXuC^G(Gyj9v-~1Qee)}yv^s~DV zq9!Lzb+SG~PbUF_0~K)qdM5JHp0qYZ>=y%}kpN$3?}W}iD)b=nSXT><@A{I4kcpcg zdJeC?^)}vq>rLEs+qH1CmWKow>}UktwFUU#o!6na@gW#}7jL}v1|GQUPUOTV;n2Yo zu<;7Poj-dJZ?aDt*Kfe8Woz-LKmHl-y!|HL=e}zlOd@POiB6#@q4goBzZUcWos2c1JgjSfJZ*$8jl=<3nZG zPZ=9#kv0^NjuMphokUh6YvLZtw70#ThU<2EY6J-|*@yf5Wn|J~(_}KXU5zShRj4{_xtXc>cxz!Nf5W@biZs!7FdP zhPU3P++O(;eB`c1oeq!w>_+Nk0TKwX=tkMsUM87U$SJ7B-1&>)M~60OTqcq}jnos8 zIFpWM{N*>l!i<$`@Ta$5$Lnvi{bwJ?K(E|JZ2>o*2;A|rhw%EV|G?Foufp#t{E^qpPVBZ+x%?^*yfm%^&}UH>guDzxfX=oi~|vh<kk2M_<^Iok7UxaNv=9E_wozGZ#izSyzvs&E}p7xSr^^WY(i=>qN|l4 zb^kv0F#%WJ@c{ny&f84NU&aOoK^xkWTu4%5Wb8&3lW=<%U-*xkqGnr%WF|H0(cUH# zlzVWNLGqqwpToc2eGPBC@h4odVHHpBVKsJ$7(WjyR;+@*r=yx2lt&iTAmjn>~$5-~gKIE0LOX9w`js1_r29CTV?|@{qJEgVs80#sVw!8T!>D2g7Sr{1ec0 zGud-;bw^uw@5tk{rGd_BpZyoX=~dEZ~ERYw6!#&wYi00r4(hg4OqQ$30xSx1)MZiSK{6Gzl4pw7hZewExbdY z`^Zgem^{T`*RgE0_4K2pI3F=d$;ikq0@^#LF-J36(8I{O!&6={w%V%YXVE zyp5fB@1K7|d3}d^B*cc`EVrN-`Hfb%RcgHh-_#0lMe|+P$zcbk)7?fvFEUXdA`0?QVcQJV~ zaIEy&ge+;W+JPQ4l%+#&)eE-?Q5YaXZ)u`0Jb8{Fvl9078|9fP3LC4l_*+kmV#6d@ z9$D?BkCwVdj0MZc-E(RhvEK%L`hIn~)~2-I&BVC@-<~*wl~-;iJ~CqY%Gua=;4r%A zM6_}S*wQ9i8k(3OmLhEI1pM*e|HkX|kz?F+Oorl6-$8xhSlb#6h)c{xVRZ}s{P(|e z3}46ZpLqm_c5X#^b%ScVug?O0fuVSbb-ecOzwpn$J_$SO?>9RRqq@Ew&K};lV)Zg~ zROYeY#pqH@^q{4-4Bs-*x#ee1!hy-IJbJF9Z_zWJwX`&$xv_-_Kn)sNIuJP~Ox>9a z>R9yYXzbg2lKOBx{nQ(H|Mi!!dU_<|$_FT@=~Q>Zl@v4PCM2tiG96v*tfvqOOimMM zCwYa%=%w65Uh-D)s)8)++_8haoWtUEYw;NS7aZU@cwuvhWQ$CS! zRLNwT4YU>{U^nB2-=y{U=bInk_1FG}n{K&TtslI3(2J5nwM}#@3Dy)=J%FuUR7drz$d{uCWtY^*Z>3Pr-yS zW8iCNjppn$bcnBFiHi;E@ST@0s{wSk)ZvQ{-$6=N2_AUv_w*g_;+;4Cjd|hz`0KO3 zKuJY|iVKF;YV6qc4fZA%;qIqjQ1X21^?$01ZN|PftvWi=j*dQDbK}j_g;(+VJAcD( zfB7JGe6s`5Oi&H|)|fbb4&G#}`U`z>L}(=LC)R(9IP6{8>TjQalK3=~^>iXRJ|16v zwG$B&rs7Yu)i>V#JO1$7hj9AjQS9Dz2%U_tf_DTPrpG5EyP^p%{`ntxgY7S6_qs?tfg}RkL>OB0TiqO>lK{qHP&9(eclBsNZFhjIRl2zWW(6 zOWX0nZ~wslzRuj`-%|>EhQjo-hzNGUiP!{sP|+A6>YBa}hTdLS+c}_zIzGBaMT`f=t}f-bdx<6F zamGt)3%!bkt9o*6(KK(A6{VoExf|AwPPpc#JMjwR`(wAShmQ|)DoY0~G$_(qPKb-c zj-9*lKY#c$^SRgY*2{mws)ciy4|S4fX;W~F5miNH>gk9TSKmsE@(O%GU)5i{@rjv0^9`~ouGBWQ?i#?6~ClYYQT~A@&?CEf#BUZV! z=w5rB@^v{CtqQll$^7QUr=C#nOxU$`J6d~nm@{h{rbf9VCodai&3err7`t$Su{Uze zOiY|G5t8ywn=}>8jDuf%@fFTQ$DyUU747YmB}FOeu*BP@nra-_{tf-A5l_AJ5?+7z zpZN11e~zq-G#ok-kM>SW%v`Vo}ROh3E*3izK(zY>tWjklkO zM&hRLp~ku{2FNYgdm;wjQBnBWT{pttQF9wY5=8V=w3FwP@!AKUqlw(!a_dc4HFul> zX~W6YFBGe>zL6^qub!`R3Gv^G|o z22UP8u~t{aZ+-L^*mT7vgt*z6_>mHpu&}hks+Eh-)7yZChFUb%*RzgD*fI#nRVxv; zQe-w05EWt^HWNuH4H85f#?)%woc;@lD)a>*HLbR}fz3*6Dcf+lC=+>(- zds39?fRGMtwcUt5n}>yqmLY1KuW9j7?Url_>{>vyKtXoCy6fed&1=!o-iW5AR(N@N zBf!TOF%0}oOcV?ZsOAMd)!U%dH;vyRzG>)8el{?vYoYkdy;qC|TfUAo=9GJmRo9GWc`N>RFWitM;;E>o@iW9ByUMcOfg$+K?|uzmZ#N#aL1;u2 z?z`s}czGyKMba@qu}+?1O<9Vwl>TZp$lneo*0IPdE60+nZibV)i)ve(z38Wu5sr+_ zb%;BC44y&02%{|O>Kh1JdJ#1?R6R1DMccJxgid3UYj5X_nM;Johyd(&FI zEMncICAn~A;Pwd$S7LE-b;j6<6VTGxPIci}v(Ap%dTiUa1J_=&kt}zhfqcpOR`aJt zAv+@l74G9N!aoDFpl9JjKkeK>sdFs$wEuyWA?v{3gN zn%e1`LJ$%bgwtnFp^xm#%10Ih_Lwn!3|!^1)l9O$dPU_?YlkdqM;kI54!!o^cY zL)B~|*2+YpzN#8$&gbEl+aAL51vAx{$>XV^A%TbpGu>H6K9$!{;~|^2dx$L2(oRV* z@_E|QSCm)az@a!M=hc`pAq?|p%|KIKJB)+z+ZOJAQMms4tMT+>cVK*EFl4PBWzT~| zFJSF~jhk=AgLmJGwTowsJWf#3Ri20X#u~Vc9RpiWAB04XM{QLDinH=%={b@}Qzowa zap!G!;>N4jt5fYbRvWKehXo5~Q+BeJC(BqEFplx+&fD%G&ykd;B_>UofOYhT0e)UA zCu=w9=zAtHL2ajhtS8WE#>~0%C`tpWDyuPI<%Fp4aLi!x5r66++VleiRaPpIw`VL_ zJa-zLOm8i>re0MPWa7yF9r)(!ukrVHcA(dNB5u8LGu+9urLkUJ^h(Ss#F{IwM|*24 z{c;Pud;{R->5I6;bQQO*yY)`I^sj&7xu+gO)R^(O`@To;kC*?9fB)-c+;hwIaIq52 zYeyU7<%gf_#Cu;K#{5-l@azjuBRtSUjrWjFXe7fsdWbc!3Z$pWk{#K2?!~8IFJ{WK z-CUjU;C1WJP*#AHf(rENu=I3DY zbdIg6YXDIZ;j{%WG&QuS@mBW0bFJ)MuyXZA{NjJ~M|D^0f<5nZ??`~^l zzk6V1*Nc*>MtsWz<7`?s6S*rWn{f0t)^x> zc1})My6GxhdF^U-A<53w1FJXRfVs10DxDGglCqK0r{JkS{DN^%?qIRNm?_h7+g*3V z#a_G9+|%0;GiObvEi}?5nqjnYf}ejdI$GM$!c83TtSRC23!Q4=|0W5Z)cv&BldyAf z#+s`)NN7_wXldv`WpNJn?%aYcU+uxC+Y{mF?2oHfPluO16Th}o}iKs1g8K5Bhs;;IfGL zju`6{F=nr&1s;3sL3QV}klcwB86FB}Cjxwth~PqnLl`*)2haxOyXHFNdmrt(0s+B3 z#AiTuP8D|TJA6ejNf_tSwEX+gG6}fnP|83|db~~SxrY5DDtDDse#O}EDb}U^mO`R(@RfGil zA%Zg1b{H^y#&|fp+M}+4n4CUQFpaCLJ4%X*#Let`hBQRs5Q*hf& zn>p^(ZQ9qwX;ZLh%{=m9kEu}+@b~t^m^pY ze81)|nujHGCZV#XTG@^|n*qtre?0De=yBY8^A(sF>92KZgj#Tql?&#tzZnldbU!vN zn@+zdrG_+W6}a%&}Byu^sV&m&~aK*zZ2d+Krep8fEpZsBZ5^ zK*V^2sWp#P2gKuyIZoKhEESWu#=Gc!3Q^v#1)(TCv! zrL(cYL}H~o@4JQiX9pKYD+GI3qo=bKjl_;pztmZfornK?_yqz&$6)S?rLd(uhvJ|G zTB>XD?ZJb%@`|;D$nE6439b&dm_F7YIhpCGYU+clmnWu92}M+)G*!!_67wmWabk*%NLtJhz~H(zeU*W34?ys-^E919TR7vyB4te_5) z$4#N1?pJxSvsVE8$3!BLv9`XggXOeNYdJPK9n8BlJfQvh?@7}o<^c3*6{Y1!$)HW# z`T!O({wnR~@yLi!gamqv$VsyNhw!8M2kmn>H#)R*7@=pL;BM20@{&>k(Auoox1&tXMRCh2Xd%w;mi2*_#^i*m)4dEGSh{vn27i|a`J3- zH)2WVd33e4vmPT-sGpX^xtrIof`hUfxiQQduHIwu@S{&KHh9CD`PJ0%{_ys3Ll&{N z6w5mwht8%Z=D6eNOZv!vExd!g5f~AGlZ@L!5#nIl2iGKiYx#vMV|NGw{%-;h7Cq$to>5%3JEN$TC6OPGq7UQ|!{u&$C zuU5~KOBfpzxO}m9#j0y>$Gud%NH6oF2&{pIX=%m5>uX0=Mk#jgJcNc8J#PQ`V;Dbe zI`OC;w&q(J75mok4voS!5Bwaf)~tj(b$Sp~kSAkrH?lGlv3>iO*s)_f0qag2KXwwV zh66ClaVw`V3{EKLFvvAYF%D}-PdxhQ&v5hgtJOLwW~ZmlmLNt&gOj{1dJ~?w>v{rJ zv$Dxw3p6wt(Aegn*5IQ*G#!((r9WD2{R28Cqg7~a>qcVSaqQl;2jA@6gM){T(s-)X zx;DK`TKm;GbQ!FqpZs4O3>jePZADt*S!~(z73CN== zm@$*0rtW6!K5!U2cJ9KSUAqyNk&QmRBq9vSMoU;)+9QY#$BxNP50hkXosPt z9{C0N@SZ#qest{GsEj1}J$j$&sk0|;zV|*9rN&_I=O1IomhJfBi|r_^s7LQ`7WiF$ zUc4S2VNmI%{kaFSe-xs!OC}!e#{a2Eh(LRDE)~wujV(kAsh20095= zNkl_nn{T=S z;h`7T2eDb{y(R&+JJ5xOhDMZEb`y|IRlZfT7t@);D==E1sj?34E=;0^vM7xN1sJ>3 zW9^kpXeW-Hp&s4QF4~w1OI@QFQoM}XCxzriRF~fxOZRxcd0O z!`-}wto+d-#M2Q@6hvta6HoeTndtWMa7U<**Pz`9D3W!HKYd}h zWY2|rHIns;+K`h|0yp<4__z_vaIf&ML4T6zZK$W@Vwap&N!C0}dV7hbuR4;`nP;FOy3;*4NY7hN_BEBvTJg#^fVv z>?F*e9?5v$i(cA#WlarosAES?oW$;(JBWGqWMrGJ`DC|xi;RU#z1 zEQ_BldhOi%}#D)};ixqup0$2Ey%g(lrE=&rnk)9H-Y@(v1TrC~r z7vPIP^|okJ>=1jDo+nv zErP#aY2s(PLaLfG#})(xSKS7GKUXxjWFsy$9#lu=k%x6a(a4FOi<2pAjG*$`dcuE4>a~-46jI~E z0`}C6K<>*gDN_krT}>_8vTi&ek?{ z5m%NS*Ur>zNMU=;p$rk@On*lcY8u<2cMgRE6KWYhm2K%%O1eg7GY?q$^GG!^PPO8(hGM^#)JGS4#_Nt|==8)1#7?!9)!6gRZep38%8wq{vl}%{ zjjTnJfw}boS*KQOesG^g{6)-BTb7H?0Vg;HYZo}Q-taH`r+pXvLCJ|`y-~Z)L_@5{4$GmZ2 zNR5lZ8?V2G|9rIvB^6cZ@9#u8Kl2M3ke`*0gT(ebnHTTabrN;NZaq5YbgF;apqYnS zehG~j`Vk~O(&RGKx634j+dEN9-%-~HjE$H)47Y2UfQg+8NPL=^C+&k$pbi?11{4rW zA3AshRgD8!x?%&?FPQ^-;xsE0ZdVmr*ulcV3%A~O1Ma$Q1H4@9)L4#aN@9|=J7&#W zhF?GV5XSgx7hDI)(gEWaZ^W~|d=}3>eiQMb15WSXgAYIW5Zm_1n#pq4w-qu9>R8kl zA)$fl{EKvl;j;(LHHA2N>>$40wu57H06X`eLIuZ6mKl(^YAR{kw0eeb+J1S;u^Ywd zS=hCeIA+HV>}8I6JUS6gOt5-c}=?PZajf1_8 zxw{qZqnkL!fa=_0)K-)uF(r<=c>ue1k$>j0oouj+`Sk@jm^N)`V*z(>H&nMYn>6ua zWZrx6qnDx)x_JdZ~3|%+B0A;N~DK#_GVNKo(fleoGmFOQRTLQ0!=|ql0LK zJSC$Iztl!><{5&<>PDP6coa^K7I^B(2jERGV`%R{SECWWZszCaRmQQ@RKNm}I7Z zzy-Yqd(^eGsjKbH&8_I_?ZWt}QCPoL-Zmw3)U1P`*@6hB*HR`q%!XtpX~T#@0;@OQ zeg~O_WvCZ9w6>tTtr>PqL~PlgkzFBdGN87lfih@8Ln{*&mmsWOx(IW^oMEpLYB@vB+!-)*wuvMYvvI+SwHsmG(~QD| zBs7<`qcpD^YgaFZi(IUgVp{3%CQ0&aoN&iu&)|t)JOdABJJb~=Vaq3PW6PI&(9$Xz ztqM{WIwlnaH}d!nb>{-UNanuF6)~-SvxQ!SpGz^wimj-lj5SJ?2hXM+w|B|}tR^KH zn?8#zbaW9!1mnu9HX+De zCX(bXL$MvvkzsvQ{0LWaUDpadHmpbA#eT`-rOoVPEBd;-v0&OHtYe~MBXaH+ebU0K zS+hj?T21C(BT4qlKJ#Y&HXvY=K(kLT(i(nj6jAP0wk*fQNj+v^+O5dcAu$0->}4&S z(ORI);}SH)w~!vL&IUL*SYX@1b7&v1#cg-ogvfE>OjcTH-)->lb5Jajai>XzX>g}3BB57%3upQG6^`UHE+q65ygiIyXhqSFnc1jjL`Q&S4GC(VTOI_}0 zZG<%exHV-cMQvoE^|=#>P3;hyl7am4W;Ar#Vdjjv2ym7cTxhmx;J0b>7BkU(5x@G? zuW<9)`8c)hT@;j6p+`Xxf@qyBIt{dS>XYbaQ%eV0`z$bH+5~J~IT37vB&}+iG@vFW zG{7%=73&I!I};!{&rttUJlVO zQeHYHj)s0&R9u@%FmW&_mhYm$GL(;lJWAhbL|kk(p^O1{-ntnR#sm}Zx1xjk>*yW8 zuq?I9SpbS*=&wb06e*xB&r za=LxzLDW+pWNp9TzyPhBBG%d8i`ME&oY;R38F}@%?v@)devB_H*#VJ&M#PJ~&8V-f zR4<XsW9~TYIbWoWj}2X4qNktJqB=akQCK5z0qj zTZqE!L{v3ZeZn~cI*qsb$G7w?xKjl~gUO?=_v=AqU#A0qah za_^(yWNgK|+sZ^SeN2zKFgcoJkCFLJ6?SajjuU6jqoTfzcHfFt#vC1U8GECi_(Ur$ z`aB?tMy%h%BwA>2qKdl**rqM@)6vQS^0FR{WocsoUBoF?I$2*^5RvGn_y!H5NIC5< zH|->n>AUGe(=+*|)ob1~k~Z{cYp7If=5_ZHhj7m@?Yab+OUNfVAV2a}^nO#0q}8vr zG_t*+ZyE{rS{eE7&Dd<&)_~LJlTq1ijl1u?6XPO+fIdsqlr-$@@+ zkLr5jxzSERRZ4y=sAJuY%=K!~)+I};sG}oD!!zfF+5AL2p@VVfwS)q$m zLizfSosOq}`A1xH#ReFgijjT#OMLO+-*Gr02kk1$6CZ9GKaRgb0>oKHLoWr#Gs6x3!}iK4DSVuwg9%U4$PC1WulVsIXu(4*yVRBJkG z)iK^#+Te=oZp4ef`wgD`aB^vSgx_Hjo4oDYq@j2OWj#} z<lpfOhk;re5|gq z3AK&H^=$vbrZ`gcZC%9umDP0$w-2=_ybJS|aP{`XwYS}gXMg`^Jn_OW;ce54eeeAR z#RUcEQe~`^EVUaqMw8Uok51wsd1QFkz5^(%YGnRJxe|X2baleMuU9={eQ_GjHNAg- zJ1Wb{@ctJEa4b3v4GLE@p}Vb~cv9X)uIaL9syTO-Us^5lcj((CX}H*Ft2#SZY*?}Y zRxRbIEKEmfRXI*)m0|X#n_;aU-4Quzwjug1Dxm6@dgKlv%0*t7D)u8c&vg(xR(JH$ zE@%_XO&Z$!FnPuT>gas<@|5AxNYx?kjbkHgU4JLvioUp?1n6cbBBe*3VW85T+y4#zvt=6!8hT8DUD$EN*a@?ls2R}H zRE38hxdV1~4E$X!>J7F7eKwddUc1d-lTJUXOY*Vj^N(@z)G0Lg(y^;U{5#Nr}j7k!>0tp)8<``gDvvJM}8 z`W3eBJgh*P>>cc#{Gd@8+iP&**dcuU^*)r<4uKsZXJ>*?`vK+5<(6T&0$*8LhK%$= zI%fg>;utLv5j7FszCqZ0&2@PCnWypm^UvX#7oNeB&p(MPu2=(4TS-!-K^p=zS9JE& z(!n;Xz$QFPc#xyQ)XC#V5gQweN1l8dPe1!So_Op@Y*@Vpt^`5myKuC#I1)4(c|E<1 zo4m|J`Zgp3eiPv+*+~$SG1Cl8_Go2<mXT0s(Du z^+L+QuH?o-T4tL6NhQEo`ifXlQIyYlE2krZ&0KqH}_8 zU@)#)G8JdP-HrI9QUc$8%$^jc>KNVXC5b%hIbbpoxBiN&*|%r$+*1!?#iF^`yZ0~? zicZz0Qc!{dxkJa$-i$Wtj>yQojc!pp$4us9vR6ApCLHCb_ILz?wMpT%(YA-HyFdEc z+tJm~AnQ3Rb1KOxKz?yK$4pX6+KY<|rp=v;Nt34Fq5JPLjm@v{i|7A{M;^Ej^T+v+ zZ9Uuq{g_0zsz4(gbQ2V(rl+bkHHQ+c@23GK;@LUH2nqDX_%Kfb%|6&VdE%bC@5c1W z<3Q}JC@?4xfq~vwMg4y2$tM(FPe1=69)ICCxaIcS5acP7^5!eIBPFGg=#AK*4Q9-p z1;1cF+{recqyKoG+l$X}jGlND8&<8*9vf8lW2%S0FC^JVTcqA;_dKwEnY`c_Xx1Ye znP`DlTM@V9=)1G&|2YNvL}z}%N>bwVReS&)H@wGSKjzE7G82Si|i>f$= zotQo&0@flfDNpO|>uki{{Ri=%tq0VLs%k3aPUuDWUy>}}=Eij;>Gwba2cJO~pfjm5d62a%ANiVmGS#!Q%E zN=78|$fFI`=;K)GEo|ZH;ejXb+yvXULhRhV7cE8svMvaZoQOa_U)*r*COq@p(`@ra zJpID2@!YddV)MG$a2mp@8u#MMOx+NYukwhdOc>6bw+t^l{UA24oQ0$_$ME%6Um`iP z5N)&H`zrw@A(B0KRU{|Ca;9i5iAE{*xuyOXp^l_2u&NDf0 z7%L>Lh9R;*TXiD}b4sv!@jTdD^r26$L&%s(c;JzHnRE-^BylSGZZy_bGCmx^Cp!h76uNRONtZbYR9ubPF zs(cg{7eLm#Q(W^;TU(bYA(TI&6Iy>{B1>eWJ-($hmrdBgJ9I1@>|9{kZNN(QF(Sl^ zNpBCbQq!Q<=@2$)l4%lMlYK{PGd_OzKiISDFuE@al=U!lHBk;H@bQ;>QCwcjGG<{cK>->`Ce^mhqWI)H}I@TkF60Rjmi|Dc|I;dC&LJ06N=S zaf+Csy`uw??R z76_U17MCpdGiml9Yth*vG}H&TUcUgnJ> zu4XyRYv`|aJh{BA9KGzP))yU$7#BOK?ThBjQabPF;|f<72PDMDp{rYDCUpqz>1W*4 zE}Q4)=HSf9lbAhsKAwE;1w2g~yYseN5#;NmCU7+#Wuz^ccUi;R&lwff%n#&k;Pl~2 zR*W+i4sHk{Uly&kYF%6d85Z6Z!sxAc~Sq4@BSKZ z{p(*y&MF!<`C}$2XWoQ{{}%nTL1^RzbP)5GCTFWVdd=h8+1;ZqHj4bU@zWGhdZChf z?iZgd<>g2iF{Zb@4O%*Su;!YZ5iCn`81>XWChf!$%jV7*yrW$BHnvvc(B2*R&)46g zPA;~{ej#m8HR|s|NqRIs`0!)IrIj<+lbupPHo2tCm>_zie)g+7sAUO*oboQrn6nfC z?y>}ea40$;6B5KSY)C!Q&pqb8iXFB!^}@{~2)}&xY5ew?hw%LU*FxW3t6sPziz!LJ z@I_ zcq9#1>WQDG&Yy!T*DOKmse?Fj@E~fNTTCOXLvTe2t?j9N?{J04yRCXU!{GjXxHpelDMTiGCdOfYV{ z`8v=gp{lf+hwR}U5P+yDW3Y1FDm?k*Q*7@Awf*Xur*OyBSHR1~Nopfm(yute%h?HK z1-aS;H}{%GVHgddv8EIUw||T;zx)!pXDMBz3jR#ar$_UdvSFOu97z>`#GVnw-Y%970@&1o-!p^ zg}TJw z#3Hb?$0EkJHB08fqNNJ0GT*^NQQ@I#$&sXlMEY!P(ox##rXEZGgdcfX*_ji^;Nt3r z$DSb;c;*G%ckjtwo7Yg3bh7^duzWu~;8YETk}PNZjN;n2AhtlNB}%HbY= z@=;uO%_gSw|N^*Y<<8AkzvdeLOhY1o`%E6&cH2b47`aKlxZou(0VN~)LcV9 zF>+}|JAJnvk-HRYp4MO3KjO zsW-Vwjm)1y-7U2^yk`rY@n^^_mD_)X#N1sF5IO<3Ua<()orP*q`mDSHBqnCzY)lds ztyqMxNb{9SQHwtHy6Wxky@z9mkD<0*yG>J(fPFeCA4e&j7_Q-r*Xe4ILR8qHly zIMig9iD)?vL4oSgld|(iP?V94yu3V=)zm@O*VO7{djmaaB1rk~~W+_8-`d_qOanc4@`1z7N2|%Z`fgh2q?FWUya_Ii)y$B$|PX1_v9q!oWfh z8aXb2MjehLyS_#tl|G;RWD?Y6l-HoKquVsxR0$gAl&C=H+6$1Jcn&GK1xU*;L1h!= zDo&G%B$H}h&emwCt7JU|NKMSa>9a{_Vo>g5f+Lm5<0N$z8B9)cQC!=J4w;QF%t3yB zDe79<2_`v8a;thtAWhg3dWMXfYg`_)uD9y{pHy?a}L*E`j zH3g~UI4BIdIeC6(kU1et9@KH09Nu@Ak?)hNp=Qg1_zOD({OiUEX9n1Pug&I<5JK@U$GIyNVy zq{I;f6d;GmMqXY8x(zbk5_)amU~7vTS5HG+%yE2lEEeMztVFQ8gPJ_JeCkJMTLZE( z5|EcKvgWuo8W28iCVUt?bvEi+B8lM8(gkCJgJ9KMhJ@%7On`EgKGZaGoQG}|`3@4P z%7jiUs(5Zm9>=k<1I=x1s4S$uGmzA`a@;K35fDC|0oo4v(fd)Do`;

lD?68)Bh zKn!wXuLI7(m_2hLEGlx5aO5NkOk$z=!oxLrps%p_E*$&0v=;wiT8&~4Yp*_e< zNmn|PO*v&2SE9PHRnskzs)+1TVx!&2`xsZvn+4|qJ@)V1gS=e&Bla(c{mUz

cCq zJ~07nuR&R1I*JR5(b(35p5}T2kTT>Im7~2!!lXUHf+yz94n=CpIUGHCj%^hqH@BDx zY6)tZ%##{qnn}cz1q)G@TZFEPE+z!35fh0|aD{lL&E7&K!9@YFTqJ0!q)X^5H%`X19_NH4A=&C~*^|UZA2T7^< z*p<`(kKplGJ|k4!O{B5VgD&dl-kp2#!3Wz>Qdu`5253a1HMHUE$-{X4{Z9~^k!Knc zmNc}Yuz>tEyW*DH9>p_{K7uD7e;kiL@i?A*>~^f2<%^W0L=Xc}79*~$4I^o7c#9g? zzBn;P(01g(X;%T71S)y?`RYX@I}aX5JN=_f0I5LF1kKVb6tgCeM|12h#2woOm$4J! z8sIlrziO5CF$CfU5IJ=$)@@jc9ou%GpoB@ir8~xs9fw6x_Bg%!OJrteBfCKCCLj5w z<)~$B5}6GV>lrCP6G{EzyE)#DZn*j0yKu+tSE8yi4`1)xq28FO$+Z{8?i!A(=x+h) zs_W26-(FTyN;mR1uI9gcQfUaHwu<{}l=9d!>w4$%y8BRgdv3kL5Om=EPLs2Sn zs52=kx!85+9Of>bkEn6}P&dG;OstnlSry)S>qBhax(6-Y+J&=`{Pz)ySK-Kk!+8Jw z?P|enZ7F`@6eg~RV&dR4ekvY$+?`#KO4~p=(Y;e9RGI z7|qYhR!F?lNik}=JVY1jp!7s`awB%|2pW$YZ@dMYmMug{S_XD~whfilO$uL#yyYb_ z1&l{|9Os6*W^}Z6p@8ic(k^6qk^y_hu9;I15a5Z_69{NDyaXH8ucJ+@M@VQe>gsFJ*4e}UNe|o6)mVt_UmwO- z2Qx8k@mlmO`EbaJNh8>8j)T_jIZ%$_<)EgQ7& ztFMutnWy4qR(>J!sr&Lm0HNNlX4I4lHY`9%RhvpYYbvslK|L+375wgo;E0L1Z$yB&KE-nK}Y1vq?Vm^YyeNEDkB(WEHsfpP7-n%$?@(dcfv_EyT4g8jdU~Mw@2z80c(4MFIO>P>Sk0V*ZXs6v^9(b1RuVcajqe zghd2l`owT#B%DW9a*EQO9O9_Y4@M(${#wkOJVv2+2S0zzSu`JMXAdIoY>b9K7<)7F z>QK|rO5>PA6XixMePZ8ky!F~U_;SxtYVK&=8>&c}9*T*wN!)Tq$hbL};_Zr*t?wZ#F@<=R zvLG(arax|07nE6}k$Fg+JaLeNBC*Ym4z#pVZn?R{DAlNA4(8+=j3ujA!_VE0IZG08 zQ5G^YQt{coQ<${;D(d)z!R52mop}wF*#6}g_+-mYl+vf0X-Gu;CNJ|GT`Csj|o%9z?%NbNIWg;xLO+tVcEjTICgLg^TNHTB^Hqv&bbEp5F^aTxg-0KEl+SLyD36eaTRKre`-3! z5o8>nKXWX+Eo<@N)~)F9j6%fN2_QVBtf-e9m+XWD=BTvi9AYYBlhavj0xe+NJ%`8xcGFHQwko+VGh>R)@Q=nA~QSK*g0bb?d;Uvt;ovCq@ETc zx1<~$J;N^Ivu%0#+@T%Y@%k(8<6LZ+>4J+%0E5CX{T@zgm*9411PAzH!Bh|0^L`wU zPEfc#*M!^aT4egTpLnhb6{T7}Qc_uoE`vecNhcS53ajdw)7vSmy7}7c;b(6|X+{$L zRSpu8^RYL*75 zInI*D79y>z8rALekCeIGsN`Z}fK`7FD$0oivh#87WD;_VDwLm*HCcr_d1L=jPSuO< z#s-yBdd^0j}CNT-+HRd&Sg)TBC-$_!QIh#6dxc>7y z?ZL{`9?MoOL1JP7wr@Fs^(&Ua&uuuSm3L+k+x0e}R9+xRd8KER;ZRx|{YMBEPYH&l ziAE}NC-!aGsuHe6S6mHeN1MSJ zP%}yO{hf`-Nl!qFC5>avwTSSwX6MBDSTp~zMr42sveV*GP*sE6f)YCABK20j>u$OM zK3>|Zq_rz8dNh@nqar64e&lKVf~9b`vrw;>m%E#s934?wT#b4ruH_|_RG55txH_S< zu?^E#Zp8S=0A(~fd8VJ|b#xj~Lf&huDhM75(51J4XFw>N?BoFzX|Ef7a`io?06zW^ z*t~fqY^;Y}^%WZEfkjsxlJl$3r1Qk8rHc^hr!C+sq_fh)-F^UR`4uQBZ$LGHiyc99 z@VJR+@9n`A8&|{LvLB8P7VvU+q0!}`yp%>jrOF^!D5-0Mhnp(`Jmt0L(y|UNOwL*= z3y@u2hy1E0CLE=(w|7R6pAW2AmnR1+mjSl2sS70pLUm7X@@q?mK|=*m#1<0+iJ^GiPu@1vB`jrd~`O zAFf_^ZXTPKs!Eh)<-lvq6iiyQ0-p8^W+DV73{7X`@x=VZL;~kFthnKJ_%m?GRDi7M zBWu`rxO$1LX|Px;{8q^&uv+ zNWC4jyr7x^xQu~$JZx>{5e+6)9A|e!9k%T~i1ZE@JofaHi1ZnmD1AT4m6;AY^IV)h z6GPxZu$?WFhV59saUG^l83RW$r9!XV?rm*@Rsx1}ZpBR;X96onCpY+cyK|f^=ybBt z*Jr@Gjq71+W25*O;zi_bKx1VUPIC-1(;1W*WSZz$CGeG%)x+H*00BN8aIm+B4TF1T zQXHx(Yf)5KNQdtUkHAp%X49qfX3+la;N|8HBLR3?Vw_q_P{u~KUV;vvpfI)enWKxN zdZ|N6K@N&bi_zNFhVfyMsHtngoYhw#B*afSwjpPFak9|2qNy|o@ddRgtm{Hqu_Pv0 z$j+2xJ%hRs2sgP)l*$tv6hNNKd7O!V2|)*eM_x&-dbNLOh(9b1-3)G-Xt#97#JSW< zqDgDYr?RRHHMO-^KtJN;?g@Wa2RN|8lV_4pOa06v5Y1*%S;oXOG&BsZPIl1K=LsO} zw|By>u%Do=l{Wg%bd&?QzBErJp=IjW2p6lgdgQ|vR zBxjYPhQVKwSeXnB2nl6-wzLb5qeUP6g9G;NJqK@BUyPp+21{#6^rbENOAV)A3ih!> zWo7R@F2RH1%TKgecfk zX1+c?OsZ=b;NulANoT<7u=K)}S6zuIk$xt-;Wp3(Ba_R7v`X|_d0_RL#qjf$x9-U? z{t=Q35B=@*XGO@Z?ZwJ9>oFz5pIp-!^|m41lnwi4T&v@HAA}=kS zfTfLzavADs>*ya{D7#i%dF3Vq$vTTtoU*X!>OktDU8w6aV8i`SA=2ML#SpPAxtx%e zlm|B#Z%ie?w0EXttos$Uq zL1#lL&Yp|K+1L~mG2XPdbud}&L0M@zTs-~YCGzz=V0Ahx(fwkK)E|CLs+d z5Wm3qlboJ~D#rAv$Vk{*51^+k3%gFl!EyXD+%Ut{bocH6(8)N5aV-S1J0i~ zg1CYvtlfMICP(_PoHl;nPht}~#!Wjo+6^EnyNE!$i9V@>{Val~k1v8l0w`MpdfHmB zZ~HD}XAt-@IjWJn4=rs_OI_&e)6stf!Op>nK+Xx>HB2%X2Xl+dkw)KZVda1uZoZXu zd78#aAYCu(?8d26sR$i27ArQcq@4?%7&e9jy-H>&S#{{>bHqv}PN5+(Nu-CoaeLdo zBbdYZIcN3^##hstTdddCvJ(wVB66zr>g7R}?c}4R8JbQ1@Zp_K8jjB@B=g4=DhhCOlEy!q3p)6Og^z0ko+ z*M%=WcpH@lS3L0aFA?Z!VFIfIFc|g7WAY$Nu`FG>TCHVg%lKmKw?!e7Azyz_L`|H4 zKu_9wM-@_;uw|80@fg7|6Y<)X9z>2Gr`8bDFqi?ow7vM4bTuIzHa3I}iE}YK80muu z-$pH;GnT&2!NwISN$D6faV&fTyvbWH>MKgHbN7B^u?>04b5Bn%dVBQ5G7WI|34xD? z6P%oFU}KXL|EuU&>Wh%^1!$8J>RXJF^Sbj)3G4eq%2 zMtFE=FZ&pwFGBrYuI5vDJ)Y$l0Ynrd3JYmtJCcYEoh8QEAw9hS zwRLs$MS1jl4RH4Kg};v{?5r%%R8xZ3(}$28FG*V!8szniR<=yKYS7znq>LkoH>~I* zT#%WTfC6HMTw>MY!cuto1>wr;u7mf} z##tT-?Zc77aR>;Fz=mr!z>e5Uj#Vhc~|&;W6zD+-D#QB+)tqJnH>=H{WrK%2k)c1()$Q%RtOtrc^F0Q$r_WThmd zMsN|aepjD0!ihcHU2NgZy4oA)W9fs6iPy{W3g{0VQQpvoX>(>{#*A^qpdt%PSTQ$o zwdg{cOei&U62}mm6CX-U_H=dUW(QkCqngN(i=u^sv#8ILrY*wq)ob7`I!t{`N=&5< zT4LjsSFmm~R`@X_a@KHr6K&$?>3I5*ak%Qbb!y^y@NsHYxp;u#t!Ur1Lzb{3b`5ZJ zMMXv`N*Zbyw@R6(6wq!R5JJ1Pmt>-+9?1y_ICkPZeRTn9sBU^o5M-* zXJo9)%R+ip52h|!hZz$>)Ju#+$7D9?^tZ?9+YG2`t|unUQ7^0U^b5s`4Vy4$7V`u) zCXZ|mOF<;#++>SKfkaVZr-6-y?!l1 z*)JP(q9m(;ZJtDzu@9vU)yT=pMR)Ijx{GV_w25%CwV-`iqo=Qjc|;l-Yio%2veldF z8|3<(*BItG^C0)lSu&5aWclvyeiShl)>V}&{MXSpKtB`)XM5)Dwzh2B5zUo(^v4y* zE3ZLzeje?j58)A!@Q}Nr^^M3bs6mfS5LPUjrs~!ab5$4QqG`YZlNPLkuT3usQsZDt z9T_uyHvFBfFecI$1;zAznVATQn1sLpU)b376DxWXw=>rj++IX|CPpkMYk-}d9fAXW zI4<%}2RmH{s!~tk^OG4^c*D=JddW1nbAT0w95AAe{KlS(Mrsx@jL0*y5EaZ-mt3`h zF=zo??d{adz1*0G7nTy&<}nv9E@Hfqi?ozPU_SV)eRU#0Qm-gse znD>{Uu+;*qRrFyEL*u8uCAt>Mf!FT(s%3CL|-5M!GhJxAn!xc=EBTpbi#?k z{qT#Es3k6V{_auT;A5kb( z7XiGjGyMFSxDyaNINEa{UE%M~^L>42#B#rp)Tx8LqdNgXAXY4!kH}CTEx}QBn2EL7 zxWLOlT)o9~&ZIE}8|LT9rLZNu0+{?0kclJoaC25KY#BQ)iU#9{sE|Mf7A!2S5gZyx zKpm{s(sN~iaib#pdb%Op-v=J1x33D>IlI9xh@j2c3(n3i1QAXM5As)Uf3+kyaP{#= zKyZ|LEX$qsgfkegTfYurVF4I7mWt6kQRHaTPIvV{n6aX?sb2$n5chza2= zt8t??nHca7j-W34z{Sx~t!LrIB!1kOa0QIjcoTS7wAbLsnRs|iUWOa4TLV{CBF9FP z*wct^4-ykAI|n!tG&M7cmoawtaL4#bGCF7+p5cO=I9~R_d6hX z${Yj`NO@ClJ$2nUwDl7-*?C~h{5f!ziDcgZgolqspI)zQ#*Mb@>F$Ey03Yh9Cv>z4 zdCby3ay)_=q#R7+CT~v<3=E+hqGV3`z+nJ|@da3F}aV?E9)n0Qm(5urf}jOl34 z&hG9AA2*Kn8^EMbo3b;L)-MZmcJ-j0iLSk~H^xtyiOG|v!qY{Yco-WV0w)Jbly$jz z`5~0P&z3&Y)ybar>Wp#YA`!;+m@ly=snHVl)a}mNY6i@`2$?Y(&b}T>hX?(r)aC7u z5Z0xmFY#bop6*T^1 zVvUr6BqgB{2~{voVU#~4Su7rmmt4h{5U-!vc~lGV{?OVT}ITnP2WN}Dhq)l+r1m&n!1j^h|g zT?vP`n+t8ki~g1nML)m}ixeY-5u?ngWI^z>8px$&5v zuRqqUUWM^v0!)60eRX7_!@h@7H=In5ND3$mU`&h%i-5C(JyPT1aOPYL!lunt6Wv~n zRbI}v7^pA8j%`~JF>5t~7@zv(?oJPXtXRGbkpxtl97z}S(Lh;tF>#DVew#gRzWV`$ z`#BD+Su|Q6BWS2DOKV&Dt8sAk@L}8_24Vd7ba%$Qd9&%~<&vcZ^oD-aHMJ0|xezCW zVaB|92o4TXF*V4~Q~3~UD`E?G5BP9gL|^S39GU$2VEwvv)Z0kfsU{bt^NeBk4#X0y zW8vaCl)pcdM9pS~xEB231P`wO%$hM3(b{r$8l`l2~=W^*1BUrl7X`-Wn|*eC>fXf`p_ieiSRS;E218{_DIq5}Nk$X%MQ4UvVF4Rw>Y z<{QFLFMh)v)2C0v!bJ<{2NA~`Xh#l< zjGF}$AUFR|j3WlLGjt;P>@g(dmLYW3Qso1@sZSo(1{9y!kJPFrc+FhSWZD5n+G-Hx zuzvk=xVdN(%OiEgL`?E@=s-?cE3&EvaP@|jm@%I6<)J}wjG_xY2Y=bvVa)i+@Ss1C z^@d%=Z%{8LjvYgPqpepikKnd->ebi;u%EMu+e7H5yxi=W2zerue$q<(C~;;GV<*D| zWyP2n8j97cRuR8@4aRtL{Bm)lEiwL1nKKuG{+{Y(K+={*S{YkAVshfVMN8+acp&z! z%CIB$#1a!GO{U+WpXDyKpFHT8V0jW-OHvxjO>eMJZ>$xJHJRn7PMV_po(FMOR74mY z8F%{(dNfcM?3~Fn@#@s6Q;8wG;LfBuG=#B%m|ND5cJmHo4iTv0sk4(ak9%P4n$?&! zeavA0=w#r@xEoGiDA-%@hhXlpV?r@Dl75u&)Rq|CFDRUN&YSVV1wQoqYuBwMhK*ug z63O3gLpYiC>c!aM;TKAI(7tICeqQcM=1$C0tY|9%j90c!?o2LiX*aGI8x@6hYu3WY z!`8$XZIJCuAl{OtC(JL{F*os}h+|;WrN`Gh_hJ6kH(=$mx!PTPqbfH4H1E|!Z-0-&|M7kYmB`1&&5PsN1ElPP<5%E1d^l!F_` zP+smA91=xw*>Fr9;o<4Su~~yD6GDgdMu_8RL0{`1#+;G$OrIE`UT$^)fArgKUO{Rp zjrntCsJj=H?@~2c!Nt>CJq6_C;R;vs5)vAQjaOX3@e|CXl@h$vChLA%IjTEwC8r8x zKI!f1pz=H~59$@i*OPdvp`{f%#!eU3CH-8@@tZ(jD=$;CL>CIO%a9P8k8#tdAe1?q zJN?wW`SUSz)=b7+Ezh*oVUVm85YZp6`H%&iBk?dhML=?|QVRUus>VP(Gew1f*QzNmq6Xi+!WBn5* zPh@;v30H!B=KEz+%Du!FZ)kX3bu~k>vE^}J?XG^6tQl}2yA;e#@7N*1{mhmBG zie$d)z`Q^;&A)d2eW=JtL~f%V3$D2pvnF_{$s|?B2pav{MAAR;AM`PS)MZP?Vc|ob ze4H>oO6Aa^gMG}^TiScnJW7Bp<*{G^^A^d&1@n0jx6@bB_c%GbA&mK%QRGRV7sT9g z$%6T`iExDtgbO86_RrRiIg(Ex7S5kZ-yh7}O`_OvPWg>^!;AQ0Y}9xxnLQmYcBV@_ z{9|M90BajZb(57m!Y%E2cz9#k(&d;tbCPqv;QD=;aqRpx z8`;MHi57oqGV?5SIed#cUfvxeLo`Az^5bHs4>SU8N%+gFTXCv+9Nzu+vk2sFG2qK5 zsob>je}8WqQjUCszrVi~n;w4|8&^+*8;@G5yGmpe*QY&FEDxNDV;;_$TtgR1oWHlar_Bn*O0Jh)D)Tp|KlX>^|koZD#;gKHv@@UYeFSEV>qx7SBGJ9nv z#XY@<{`LsoeB&M5{hL2x&aydh<$m*Y;!j1ws?h)6M0f#Rgwc+gBE0vPU*b$|HU99~ zw}|j@P%)~fwh$lu<{2Dpam4L^`2cGp?BJoD@*PBtH1?sst^@_iXR!598rp0p0lkwSWe?y1Q z9Bh8zc}(yzVSb@O{>(fJU4Codcb571@MHZ(0~+dUksp5+=l6YsCigK||L`v{Bg9eN zb^EL@tL z?S(=E9Y9|hc6@alPyF*d&Yt@MfyA;uY%an~ruGMVP+y#mx8MF0#XY|G?eBku5D!`F zaU_}97y2=>G1X`8><_PSc=^jCb6XeI{=@6K2wjZn%dS`o{v=!sqaRY=_qTsJW1{H4 z>c$Ws+P9Iv|2OG^$&#U_FG(ZL_)Y<72S-kc{}7V%`p~a$S1)WhA03bP4`*ZCqN}ia z-X!gId{wR&eVvsk$W2i%nu+5U9~-Ye<+p4xv2i#bpNhtI?PU}{ivEkiFpE{dvTP&C zoEU$XFuzmKid3!NSxidw(YaEd@o`9qi9uY`W!|>{rZ0Xzn^}1J)Y?MJ(VO zH3oM+_z-f^({c3BNmcm!+7KF9#@vrf^+khtKa9Sw&P$W|jLSXKTu%Iy=Bq}N>>Is* z^uCLgG0}j5F}@j@iLp3+;tWp2B_VL`0?b;zR81CMIN>#lF4Ym)f6-%?JTQz#9lu2J zp&nhLZ$E@C$=2|?wL+nRZt(N8LPlzWveWoD`n;G#d>fOG3A0vUazvo%k=09)xiQvl z>_&Ec4E7yOLDZ~`xc{!35i|l|UAmT`dcLd7cazy?XrDiv25tJsv+yJ6V&g6QM(sE8 zG<@&ycz;P6V2taiMP6nC&YU=mZ6~8K^@#ymLJwgTFg99b##uTrU1=XwQ!!O$5V)FiIYZv_d}-U91Pe1asAJ(BC(ZKh~bFj)E#r&nn6otO0Z$G5#Ka7TT=D+Nh>WB8# zL}rPaw^5{a88Oh`h1!Zjq{N)Y_5(2}wj7J4E7xPJr(8H3MuV0AI7ZE9eb?FAu_1ot zg=Zys>DYfV4g;Y}aO3TFVS1qBFn@lDzM1LAv3g1R{+K(ApYJF-tkjRA|Ds=))$VuH z{}YQ;BdK&m#o7+DoaCi11=&eBLp-$SSUl!0nTOf4!(co5>Z#o2Y_7-j`Qv0QXE2BE zGhpA2b0{tE#JzXj03Q$S>B7;ZaXZ-L4<^$ch8hiL_HrwfbAD{&W-_;HuIs<8@5hm} z{^K|h5en!qe-4qk&daz1Ni{8hg#PbHV2CunhX9v+8~Xi2xHI=i=m(u=mgNr+$#)hV zXazd+@!7VcNNo$ovrpUxZ|?ryWPb$7)E*P1_7=SL>g#Ce?1$bx3b)^KJthXb4XdjY zZB;oqaP&OVbE{2^k*i_RUS8BPVC{;#?)e$Ugt!lTwC)FxRQDs!8{*>fPamP%!`n4i zHmah_YeSP~PfH8_Kla`Nz>Xqo1O3L`-6xZoxGQmpK#<_>yRa+^|N62EYp^V`tgx`S zLkJ`UiHErBWM(q%?(Qyk-gl~R&%Jjn6X2Kk9w+HL-PKj6PMtd1QvK#DZ>fdf?e9TjLjzV^whEW5SOq_SPvbffgJ}M? zFv2)52EGZ3!F;qlCjShS$Mz1q|J7dPmsR1Z-~B=@$slz{>St_o^WDRDIv>M;ezDNf zLP?oygE=y|Tx+X=nJrh<=#Z<+@XiMxqO7SM&OzhUml!97d#k(J=2%C9|L(C52c{9R zZ;P>hmte^^1=Pvb$~0{K;!`9PcL0vwuqO|#3?i7WSbNRYm^pVkobBah&OsFEWa=Ov z4e4!1X+{PL8+#BsX&OTOWG!9e`ELV*`x*|$Qs@612+=y>7R)qmmgQ0+pp}@_QLnkH z!lBULj;6AFY?rlgvkKuEJr`@Py9HB1+>Nhb{m+6v27w{t7;zBa4s@N?`yREI?s}AG zoxHbYXe4OcMlXNe&{f& z;Efr}uf>&1rozwV;`U>aM^AHdg zgUheE8Ie(8YALn>DK)gygx28bfjGRpF%_@B@g)3dK+Ss_d(T=9-PPQIzx?5!xb*TX zu>7hOYFW4;PkQSxXsj0l78eW!2IDdvTkZ)A?bGtiU|bi1(dvIQ=QA>;=3v4-JOv|{ z3YeeK%LMfD;ad+6!{Ic+cZX>#VC+rAV*rW1b8*-&hHim@`MB7IW?(Ka+P*AQ+=BAT z1~m4%s5O|hS2u@tGIF(bva~VU)4xQK*9pVEII|Fh{t5Mq|>o*z1fT?wYCQY6UCs%pKQ%jmg zqwPX4BL9UZv+~hxl6mFfFJ%n|{gS+SJMDMqB{1!nd+IB!Kz&O)$||ZcdBS*YO$%dN z6D|{&&Wv1VH27w%{(C@ZG0`y+WuujwmkmBP0(orcXj2QDclPzG^<{#>v^D+om1#$4 zc>x%Su5S+_n+hnOCJF^c?`yE^k1pdQ0cEnk3yt!MPDwR-`ec0^zZnSsF0R*a#=CC@LuYp+ddD)3G1Tdo50A8-2wc(BAaloQ%`E(Vl(N|TFh zI<)VL0n5%b|7m`#oxK=6G-15Rfs6r0!!0EOqxU#e(!L8S$JVzwu!WL8`FxFh&s4j} zE$z)?nBn=ZQT$!d{XYR?qw|}MM))Hznrv!36b|>IslF9;)h!4J_J@nRvsPi)zBRmK zqUoEF{w-iohstmm_Xl7&f(#4;dQeqSiHfpv*t&WnC?Z;|3-_H|F4S2jFh;h??|?3< zIGhQgvdLnPE&YMEf03PhW6=A#P#+hrCpG>%DIE>|H~ao3AoNO1R8o|WhUPZ4uB@!3 zFKZuL#H{rDKqngNT2N5YfhkkQ8P}%&CS#Ts2K9ttW_m87Cr*T&la`hIckph=1w(Vt zPiSBI6NQlV*sPU8$T@KmdZ416XFWAyaU$Axa)s#cM4`VCm=A4u_6_gUOfWKrki-g{ovMmk0WALeVTR!E`|aVXmcr_B-f0e`5ekQ`9_)aV9z7#fo@H* z@c$MV$|b!-U^w0lkr#dNqrio5ej6A%f{Ou}N7)?tYg$}=cwkQwB(kR->bJn?^+y7s zn45ueL_YRgX=^+R{jv==9=%LJD<529)=EeAjliJfS>RjAvN#0UtnHQM6mry8tkm8} z#0lX#@v;pb$`&s@%z6YM3zZ~2(oAa|BOkd^$H5a7l|gx`#akAkY$s$-&`kj&Ik;2MEGtw z_MOSI@$XUnm1S~0HqfPmEk)3*)%Q^cWaXQZuknIBHBvSb zCUd@zWex<}Xc)DAzd7CiH{b%JG>eNw%f(k(ykh%*Z17{76Uv~0V^TM>w3-7kfX<5l zHWY* zIhDEZ(yHhk4eDfs;%^2zGIOM6h%M;-9=?aM!>AVw2cW+TJ)JE`Ny|Z1Z4=!5gAp@j zD!d$ROs^CvXV8YK@^U1l=EB<14bhXQBPhUo;KJg?$*83USsq;sj8!{WPZ<@<)PD4S z^|Jzc4a@#4k4A=m_F7U}h{~!e%$hwPHrB>3i)yDw0W%86+L;2gc7s6XIy)-`RV_fo z)R_qJa#4%RYhhh4%8S#HnVE(@gEI^^&X_)Lk^1_o_Vumr21ce%gNqvKHw9xS-(6pW zM>YH?hj+G8S0wWrXp$K|^B&t04i?^Inp7>L1u{NfTc zx3*(qRFwKk@<@Xld5*aCcA+FE6Ww+m@C}btU*uAZ89P6c)t)JAS>Dfz}Q8mh67qCiI?VA(*O8FjqFfwbyS>$hd)mZGe(4V|>r z_MYCD6*C^eUb5`_Xdvwk6p?mhq{gGFs)6*qkAY#K>UAFRGvAKB z;aZJUZh{Nf_}>9-E{z8S6P<&p9iD50Woh_Y-vlf%jfU6c;l|dhCC$C)H*})1tP)vi zx#;M(fx*=cQ)8mkS6+V(*Wv8Lr-W$I1gk+g=4W>Hn7?!pT#Yu^+z$@MILge80Y-1Foz;637%pbL7#O~_ zR!ZGae@*8uuCEKzITF6MmI~%n@#wblQ8|^b>(HD^YdtnfEWwhl(Lw4O(B0LEyu4iX z#c=jx$T&0*v$W&ut@!wp^;mn=I>y?T zpYoI4=)d&jq9VIFF<}e(-k<6xEJ`xQh@^Bc7+1k*I^X?-`CXzG2I_l@+Ap?f*QBgQt@a zHS;nA`??O%QBzxLU~fJC^Wr~ornm)*mR_P>>DG1R;-C-x_2u}*BM&2F>LP@U3src` zgPFl#-p6qBtR)>6Pl*ZSiP~Ol-ntD3jvd9UY1851Cd>B=u*X3>99E8$mFhrMX%#lD z-;CKcua6q1Llm5i=TIv(!(U%xYqf1%|d5ZiYsN5owd6(NO5;G{zYZ27{4q1sVsXx_;+f z`W>wc0TX%|S06gtnvihfC_dTv1>#e)ky%iRhL$cwg@q!(TfX~yA?QbUXB!Uh*^Q$I zkK^c}GdP!=js>e$!_~=dgbRk|V4`^BdHsJM=u#sv{D?Fum#2WW$iTdwl*mRb>JQJz zuJ%X4@Hoc`!?o7r0S05A)>g_JOU(=QWqw}v1(R%}wHs}obvYPK9<*|yl{Cvu8+EMJ zHRGNt86Lw(z*b!?ID6zUUU_>xvWqH_lT(1a{36W1Py;*ll4M&+IH@BbCh=#+V+ZL^ZMel!kS z`cUJn>hdY$WiwT39H%}m^YNMQUmS>j%6e<^SA3Edx63RKH~(^MD!Xv6mUCmE2zMhp zJp;S9?ZDA9u{d<<3<3iDF=0Zq>d%}P!(q3WZ(Z;QU~mgokT)!hui{!_*LW(rFMs8% zd279&!59aC=QW&T2SxV~n!h~=N*zWW|zS)BuK@LT76TiA^Ox-hpq3v zg2cQEgw0qCcPFd=^$Uih>3mIXC9<+JVDIP%2S;a)Oq)ZCHill*=4ax^5C0tTnRU2k z-IZ{%AIiJY`nfQSHY?`GQm+r^P9Ddhz58+QY!cpi_Y+)k)6MYkbWyz4rFJmJvCO+Z zhM{8-sTW(M-4-{0s?i*`@{UG(B|cpAMY9wm_c=B&Z!K1by^GB=b}Y6|`Wed@bx=1m z_}_e-=51tuI2@&Usd)S4XK*w&ojKJ?1o^nYW(dEAYdHc4(Zg0Z7817t3;a6tqrJ5O z+dtoc)buPwO_>XO2U`nYdNDlt9=?YWFBs~Aq=bmVXlL(;n28gxbnybT7v$rMH{V95 zTObYpRLGj6wj!`rw($1##lnU25EBxDgPXpJc zg_qAnSCrzwp`%F4&O>^Bp)v*jKHgeg^{WH1>Bfvl8qHG7BORwYp|z4-0sTm|?P){F z;hlJW{pWC=ybNK1Zi-&LuE5~dCc~+n(Lw9R6gpU*H3xG_`f>8?ah!`khnsJ`6Lz+G zkge6xaB7&OMB9@-I!Q+xK6-m2lF#N~($tA?^K!Motbx!ffAtdiYvR2ItixY<`lYBK z58Za|n7eo>!u>srcS*E1zW#w&xL^SmPmaWg@4bf=H~s*j;r^*)AoF66+n7p;vpDwZ1Ge~@FnhJcbqcBGYYpZ|-}^6{^~{sX>I z5eVdF?rhE54h8AM#8=C@`jPox*+d5QQqrLf+K7pN`wcAXLVs5+KHInrCr@T#+VrV# z_i(o8$D}{zV8SJ!>ZzlCbhOp0B+bz?6jSFdhQFt?8mCFFwR#%R(f8G1%jVBfZXb+Q ztCp#+Lfcu?8QPZ#jC!VasmV>1c=bR3!JI2@L3nh8lAYlknI#Slgnl)Z>ZCeSf8|%B zT`L#Rs%h91k9l&SsRACU6LP_@4+l@2LSAVZR#5J}Jv>-0=ZsY?TPZM=2(+QKz5t(p zc>oD{ZCJi?1)`&Z)dw0Z>xtf)FbwRml1^hsOD!^zP9Qa>7+o&D@NjW7>Y&t?_U)+T z+?_fThm^D|q-JNs#?ApgUfKmK%lZ1o=QI$!!=KU4asGQ+QCv`hqsPvnrm-15 zA;EHuM86DY-1H?11Q++zf01b(Dv+~UnSlIibE1{2c50VDs)|;w{b4?Gmzji9r%odyD<7Ge*~rbx;b^Sk;^hrnDz)gfsed`(uMwrP;sb{gcJ`ns zHy>}m_5p01z46Fnzs1@$tFd^_Oa%M;5TA_I*w})0K`Z6jW@~GYsOU*pFz*shWf$U7 zlCkQ>o8aNBT`<%!sK(M>6$qb9X9P5xX0#uI-&j}v2w2wEO9f14Om$_C8^^E8SjDpa zg~5btbTHW%b)gEPFC-d(tce746QW6qzJ~kd18QI9n&4Y)30PLMi7jbo*+E~r_Wc+Ih+F7Y-Sx=6r z4~916W@q5|$+PrHdB`X%LBF*P{OFfdy@7c$ww6EUV;nM%0%qq4+L6qcEqwd}Fn7U1 z_y&0(FFz6C;bE9Eb%xSE%kd3^%h-<^iMGoC~U*UISxezkw&YbiKD2uhyG!Pr%go&~pM(qL&Jo)zQ(KDt~Jb?Am5L;uxeO zOM1HarxIUp*@%CCdIW24TBmLvtJqPh2&l2kUv8=~f7(|nrI%d-M_~lP)T5CXszk?cK33D7`FR2DjDAkQvb|+(f|WIb zLc+1&k|l@^4#(SXzlA$~@-RHTw0u^{rK+g4(XnXl&3|<;)gIuL`S@ic5BmGMP+VAn zW5>>-rK1zx0e*}P^|jk%t?^zoH`XBb>`5dgrBGjTQPb81&wxPKv6A(`9BKWD*cq8N zh#qqgKVn)EOWInA%`^1i)WM@jJ)exs>@1{bQ&;oy(bmzWZYW61I)GEjxBQg@{e*T_ z>t1jgkJax`(E68O=2We9s3^^YlUF!%swMFAaAN##-cog>fr_)VacekqkR!8Lf&aBu zmXe@A&Y9yM;G?$LS@puIy$YXi*^Lf|Fhq@ypnPieO|`V`d-xtM1Q&e5a6q)^^c@*w zxVk!H_T=%X&db7v1Lv^t@*5E3X-mV;Iy6=`c6M-KouB|O#P~R1$D!k>>i5Tz1ykTc z!=MYMm0UES5i!u|%d;|4uBkplNS-Uq$;5{DKEU3iOyCQOkdhdWl#~pF#!Q8`x4Q}m zbX?k9gQm@ZhM38Ud_77&=yBldlW1$|#r#Ec;pXk8D3BH!eX?g9wfdtG zG59GVt*;)~J33(Itl3z+`YQMZ`>3xTs`*vGWY^9Cj?Rv7>1o3IoTC-D{#acw)E$z5 zMUo_Ul}LNNOne=fn}Lcvr^YI0v_+O>8^Fb4D~?IgZ`H?N;|rj&5G{{;jK?-0 z4xa5H4%JrH(c_-bC7|wk$(^abe$!PW<(Fx~3D0mdsao4W*83 z0y2J)Z$+QF8_qKvo0MtX)q20-ZY))+TryEt*r|VdUji0mH;^Mr*0s|9)_T1E!FnvX=1xRJhb!69 zl59&5N``TlFC}2dzbL@cpNjSP2U+y`LORl&LYgR6V3nOBwGf+>pls*No zcBfURU-tTY`%si$jM#&FactXXcz?@YxJ+J-sgZtQd`;lkY0vVQjBWgAGme}%gN)o9 zoIiIKDak1qKWQf1y$0Jy_NjD`b7SPc>70PjBz1UB2;oU7v8W?9w50}{*RRK$?|h8& zS^1d1W-VNu#P%!9Y|C+3$(nu=C=wCnqMygTMnq~hR2>8B*lJ@InP+{`LE*omPMa*& zm3h|a7TJ;yUI?!=Z5BP(FF0tM20TZcR@P!mtk6>5fY0Ci5U;)Y9x7VfP>_{@*h71f zl3xk$h$#r5FR@ph8YOoa9TdgZv#yPi$w<=Ky1ri%lCh zW5>=zOkRp{?(|8dq~{_id?Etpd^@43=BIxV- zt?d*KbkJy%7UB!!jI@*)V|8xQgQ$$EBDl2%MtVhtguk*@CQBMB@aJDWibJt!$giwL zVRj;p9yyG(+&WB}G!Z_YuBx4Q4mAc9KdBxD=oT<`L9jx8aym9{*o-@V@moxq83RA+ zgr|!$c|O?ZYh&nXDnl0STwRX?Jlvd=s9D?CF~)R(Gum-<#~1LKxDe5^=L}phRG@sC zj9+*w`mdrlvejxp7Sy?Z9+4kv4(UG)^x(o@oeu3U&nf+qHY_p9qzVD#ko%Gd7v^W9 zC@&jso<6X$w^!v_&A~8y=@5{48nr4bD7s|ML{_w!(3{CoA7g<$#>giQAHe3V`><_K zET&JLig959f`CB#*VBr`b0_i6JL_>iITQJe+4mnhfclO;L{FLwS4TTdZd85Y1^LSU z~3F zkt-P|ca0cCHDn(3F%Ha)8o!ALx;6=|w#+x2U0u=8RE6Z^Gl++ z?*>G-I<#GKQP$-Xng-}mb0YClGvZF2!I9)@+PVwDa~^B z)2@krnPi!JMz&{Xr{PT88O)ix5W%65s#vE_#W!kfeQ|F{JB?Lv6)>=ShP~)!He*sO7q_&*VbXE4OVCUcnCucYG zHnlLu{t$QkuXLcbtrICZMdA9dSBoWUS2C^=KPY2sM^i-}w(r=7uXY|rMrJzF($aAF zWC8+1A`lT0$aqK_p{AMLoL`gAHK*AGGIGf~Om@`>#$1=sViJzPUYr; zdGjy9+{J6)?yEg$GvK=v2qxB*{y4`n>Y+YWpIHIP`Lz-`r@g7f+LZC6gm^U8*23A# z7X}75!Y9!?6A(M&;2=K_boE$a^R_)07a4<)Fn=Si@8Ns6I2iGQp$L?@=nWi+Cv@n6 zi@g=z{NgZ{t-b*GT)_u_P6JC-b6NL-o|kSp{} zwG~KYLebdP4JV%f_0))#R7wQ{dOKQic>g{moKI!~k%7#dd<2Jt!`^{GdUq#svl1D2 z9Ytd7d7NT`(eLPihRR|lC@H9GX@j$;H=OL{6=lKMO?%XieTPn?s;&tk!8CRpl=<)_ z_~`GfM_OhM;xh~396WdkY3Z3LDy@Knn>RdNozT%z$uTA1)TwwJKYR*R)%EDJ?nPcvK9Y$~ zo}%&fmZxZh?jCfswqW1RuhoTx^t22HJJs+G3RA{Y98Pan8`6^EaN@*iWM&tlvZ)!) z-U0A*)Gi8`fCCYy%pmQZ_ddd9w?2gMK^F`~1fmYM( zVA7I~V}}pHFDwc+4*JDX!63%52hH`BICJJKV$YpdylU_2g_n;nY-uPv>FgwUF_7%qQVdvQJ?1!)`ybCHsk$vL-$m!~^<(T}9$WRw;c!`s~%`xx9NC#51iCzs>!P*0BtJsoXz zNIriYXHF+j7fR9A)ekq)>q0qDPYMt4y`7174vrjU;+>p^Z1QF=<;*608rH5{3|Xzs zw!aIljWxs-i?invs9%|AZl~@uL9ij8WDZqe(c6uLvuAMn>^Y>$9r~mc>^g7+mtKE2 zqN2mhE*MJRGWS{lVGa^fir!eU&w|`c%KcfKPf9~-MlRgEj0uVK)!m9id-kB)-W9H1 z9?D_&QC@OcFFP*}KB2)Xagl(wzP1#xXOH0AxkS>Qjl-wo(bQ*+D=%FEKX(_l>_Ja^ z8}{znLtcvBq$8h6kZ*7}?CfpSyh}16Y-_42$BwVIz{xulH{5X-eBH&S$ykgafhy^* zsw~02z0||h3}qv1oLxD89Sv6c8V)I8IAU?Mpm#$e3M>12>#eVe)ZCb)vU zuccg4{-|SdXOfYXkq2*iN`kU0bgRTi2FFC9qr3>4Hhh3JS1v;tgHh+O85ketqkV0h zbaghBBkp__8W__y*Hz){se_y!>RWjwd;@~uXiuAFXuZE^La9_?bHmyDdE zVmP_DF<8|vR`j5%yclOs9;Hqs(e`DavAtUbM=sQf_Le$)ec-72YOx>ft7d~4-1YXV zFK#Du3yO$9cUuq6A3F~lH%~nM`YX6&-3m&bG!RiZW$Pxa4NpsnchXOxa7z z$b!3v7hK(3IcBNTj*2q!DLDsTUOqTRxlc+=LT+gZ`Rm32mh}|AUUbn9#xv19b@H_0 zXIiQR?&(O)E`VP^Fx;FBD5m{-@ZR6R&o2z2(V?(4%7F~YG|rJhPG6vk8HkPPqiN_+ zYvLZJ4;CL+SXm7lCf^>;PP8vQ$WKecu20|R3^lg9{|;Zyc8V0Xm4xA zrp=$xKAprb|L_!k^x&PCGsT zBCpn&Ogx8l&W%TKBy4Ss`mAgu!O)EQoOo2gT#HP`GLt9FU;^X{J4vz(U1(`+ z#FnoQAuu=u`8kR7)5np{7^AA5V+jpoQpNGNGJZ+QL0JQJyR8MWrw$-KF%5Cig9kZpSxC*yMGLbDKY#i`j;XDo0SWYjRW-HnWPhR)60;b(+mS;3 zFR5yPqbFmLt_FPZ@w-@i-_P;LZy(2P>y}~i_z3*&_y2^8n?IH=nN9mhS?1g{H`LO$ zAE$jhudQF(-2*pYf7ly!LIE9MS$;Wo?>dC*@4br&nXi&zjb^dHHTZP#t;FFYddl3@Lp^tzxwQ4;Z8Yla@3ZKQREE#nnrp-P*siNQDELy z(+TmH$B!Syxwv@heJ0WwkBu82qn=(Bn^0Dii_bpWgz~xu1cgmTPEI9OELn(g5puy$ zaQCCKARDjz>+f&~8jnYw_#^JVZ7o8B-0|WYAHgp;1~Vo_DnFol@9U!fD#FndXVGNs ztZvGyo1_Yt)X;Fs)2ZiUk0Z0R4h}xSaHcI*aeIF!S{XOx8 zUA_RWj!vj)=|oU~ud%;gib5B1GBR+Au|;x9mU`mIjq@i<>L|GpyU^Z@eP8dU4@qUr zmx;W>5(H7M?Io5L7`Sjx`Ku^G{Q0wph@3F!f}zlDKxa7GtzvM5pLub#NVEjv4L>;WGtA|%$Fzm&TtI7LrK%41y z`tD-J+3`3ND=|?9ni$V{GCpy#v!j@hH%Im%F+Q1j0{vueCH%epkjF7(WM-kRy$eA> zjPq&#tSG0I#f6L?8Bfz6XQ!m#%OD}`3x?o7z;ix62`ugB0)HJjplsT2~ z&2nhedqW!viz{##_d;^Hl0i5_#Ix*@ZC9kqzUU-i~S`Og@X2|2{Xd`Jw%dPzC!YR2)?Kp_E0;`R zoRx+TKG=nriBp)z1yIH$e(FFz<2<>co1BuN=Elj*9WL%}l$l<%H8*1S-lL??jeK@f zd=Y!nM?aWGeCgDWponnBR*guGKS~`v&$zJ+Z44*fy#r|<&0}q$Pu5x4Ew+5J0eiM>hr!Jk zzG2}?CI{^K08kPfUEoLCnwLa*PD#S7<*VTAFgU0G9=?YQ!x+~vG=fs*PR8d<{u5Gg zD5(I~-f}x4{T-Eq(}0G(ubnYZJL;>du;oA;oPws{s@3!0N@>=swO1A5rN2CZH@-NC zm`m0nDo9>gW}wvHjIz8;{OZsD!HaKxfJu|1nEdv_&AN+$a1%D{J%tr3n5g+mU?CqW z=tbP&{dnPbkD<7+lejK}r;`NsVhrVSVlPt9C1U$m+Yy_L<#lvb2e7V=QQ z;}w@JfwQA6{{H(vk(b+X`K@=rj>i%-x7OFWHPs?FKU=*T6A%=F z@sSLqZTj)XzyHnT<`eYx+hW)5y>vP$h&_9jGSPs!vuDE1*#WJMbvSZxANByKKv%!- zJcNT=zd}w)HKs1U5`OOb+>q~dYN-r_zW8DX4j(&(tn@VGb4#QC_)K4nt5zX#g`v#!WW#|l{Z`q7xJp1I0ri!oNd{A3`xnEUmt75CPZyO8YEMxK&Q5f-)Z>ZY{1xlhZ^I>*E{AWhmfR_QW?xSN01rR%115ldNX~A;Ws7I3j}lnXE;bgN$Df~n6({2gP+nGq*i$Evnw)?`U+;oT zP!z((N5i?l5&wGf57@Bn1f|Cr`xtO%rY2GEPNA-@5z#Z|s`X<#nyAaM=dgeOAqL0{ zj^kr-CMgrnoj@;O}8ex7Ns@aX&uzaFe=Vkd>W={d@Pr z$yIDA!^zjbu$hf zW#mab-rTSa=VH$yKPLm5KiPz>)bGm1E-YTL0s)>*@bmRXMM)(-UjG5?Tx<~>76L~X zCT5fqEm$^zSnr`)dJ(|+>SdySmSg*9^KKlXUKQl$p`xY%UVcFc@$-Tseb;#=ZqsK@ z!P;w=BR?+(yN(p$2M<0#du4pi!q9<=(lWg9?q*zf(_Ogg%2g_6aB=BGQE?NFpDjf6 z#0W%;3()vZ-*xuDHazyTN6;!yyDVG+4%N(7YkJdMU5*#1H=DlLj@fIjV?yb!Y{|fw zdpZ%fXAi1+9I)($+mTH_?!nk&%7h3|go&nI`KeXc+yyuKqxgg~ID6s@(&EnG1QUzE zN%IjI=*6i?#6Mns3)}W3qQ1TgXHOni7l}_E_!?I3{s@YhqAr@%R~6$Jld~PW4kLrH zO8ogGoK46?Omq|i{r%8TR)`P(^*8K4c@{I*Ue83!S|#N5Rpt2SbI&5DtQOPfEQB|j zv2D}Gh+KFjCeEG)58HMGk=O5iybE>?Zn)vP)f{IZTB|Ctoe9Yodk&}@Mk(jxke*iz z7r$_X`g_rAw9u}f#W^Oh=VKFbiu33Z=#8@K3S= zCk04NJdaHueyDtzvtK0U&zlJk7kjw7IG`jy6T1$bL`%Ojf`WtKUNPDjuRTmZQB+ioq@+Szy6h5+ zV*VL&KKl<5k z(cln{GbuZtt=PMJn<`hxa%}#TzV5iee4G=`jq76;<4X9jX(eO1x%Vg znf}a{aitCX{R7}1;AiwF{pjj!W*i&KB5X7ht2mvI zjCspfqM;FBZg__oKP776*21$ETaOA~7uoxr~RJ8XFP9dGYe` zhM}_^Uu;}Y-+mTp896Ab?m%Z(FY+={8PDXx+RhmZ=1hg1p%KMJCHVY{eb}*kAF^_? z7(b?>C^HiQ^o28)tv0)0*p7bG7?K5HW~jj9WVwkq1<-T2*~Ux$ac zH|EcsMxSIHd0!WbveK|++aB!PcLHgQFEdHgY35Dd9^Qx?7pZd8x~%hf=*JJC-oXQ> z;u8_i7$x>>BK=AoCQlp(H&=U{K7LBwL`W2RON-Ic(uR^E<^WlFv?uM1x#lqkuw%^B zjJMzX50YrtzuL6}S+&-<^2!xTZ?p?PeQ0N#l63kcK4*S;;&eQ+bJB6@#1YgqcOx)7 z3f}Jau<7f-PwsdSb=CDKt*^uGJ^PTI6o-8Wk0D^fJcNe%st0nq+FOvDk&Hci4&m6z zbIkeT5r6s!x~*IhFm4K5XzxYt)kSnpr(7_6`@Iiv=cA7@HZgm`P#7lp8~d%Vti;zl zc45<&eF%u0jH}kHfS-pvVaU9McBGWKPVA7(?am9@{BGAPe&Nhtxtorf# zn{P1pxC{|djDK0zMxGFCEWs;pyn{cz@(E_lo{!1ng3aVWfP?F6!0|2X@zX!N1cQ4F zW>1>{7YC7Hwx`~_|K?`Qowpcsm(GF>iEwge9v>D-y*`foq5@p9X0^ImSCko#&o+LE z1BcJh24&#%$)l*L??m{xNt_!;G}l(*-ur$6XBRKbn?DCO)-tc1>dlpB{`!(y4rtAl zs~LAT;mH0y)SbiFzvT<$m)B$3;tuK9RCVn?gDQ>FW|0%WdCx7*1jj z!pt6GP<;|a7OZT^8}hcX2wzcl0_R-87@Dz_oYrc658uO>K=Y{t&0XlAADK#Izqzgk zM|bbSb_Vxf?K+9e*ItgONDqrd!qD4|jO4TUic0wTj(D{7*yFnEmZ*hb%|V`a=<8{t z0=1&G-Jm@YEJ<~L3o=vAz~JhSz{!(Tc~e6JjvhEhr<23vr;mm*PT`O>$Hb_$GcoIH zrQ_=8MjM@%xFAgx`e-})P+VDuKR@>`Y}mL7n?C&jk3Vz^UVip*BxDtXmH=1Xa3|jR z_-*|8(K~R-ygB&kUtYlbpS+Ka8#m(DKm7^DF{oF0pzwo+wxhX`IkYuu>sr+sNkWAp zUuRc;AN^MwT3cJtNo8%IlhID8S{!^6j(+vAdP?!$pFfEYKNY$@!2=K8$?^4&@?Q4W zjjozJy!FbzP}l8>w>NCUC!egxqd)o~5{@3n*SinH(SIBs{@Jha=_haC#_Mjt)vMOw zm6u;pxHf+JA?~_)9h_yMCwT;}&l(j~^?3QMx3PZHhYYA+z{=%w@aik?BCnuUQRm|3 zg*)#55kC6lb^PHc*B~g!2W_@=3hJy0)RQsB5seHom)*=Mn$b&#+s%1t>*_JijX98e zh91;cR4`CTm`}^g?ccHA9 z34CUOTpl2Fp^9V3%&NdVI;!9RZ6bslf-BeI^-UYp{5<{S6KHR! zrzly$J8%Mi{Ig$U{f95&j@xd*6|1ktE3dqcO&dSuT(8Hi*IfxWIz+iE@5E#x`E)Gy z9!tQJ|9XWo{~@0H%O7!Y_t!{I$yZw8?H`H<9(f!ee)u|mdfVl2cd%yAVXx#^ah}OW zM^h#K^Tx+0sJFulFTF-Q>+#-(e`59WX)58Cv03-jA%O|wtLyjS+B<%PPpMxYzV#Aj zg*h-#{1i!*t;E)YItJtadFulN&03BByz@SJxdHFK{x^*CmpiN`ZPh^bOh-B>nd@&u zQ&A$`e*a_SH96z)zdVmmDeIqn^cF(>T*)g*lvvNu1$)X=kCmO88<8b>$kvuRYt?U! zUU}`Rs}(19e~E*~&SKpUAH)09!Hu7Ogxl}AjY*XQY~(2qquvykR^eZ-y}_VqJwD#> z67IZtEnXyVGqb7`Z8G=$Oj46mbI{luikL}r2G;a30h6uzO1%2_=Ws5y5YN5xp0Y-2*x}@T3$^ba%8+M`=GhdeGL{jb1jD z*6N8L($GV>I&?k@F>{wAgn@2kPzdr9&ZD`eO=_`n59~NUeLd|s8+!(h-obe4AFtq3 z+SN6)Bk|>yEvRZ23G3mwdT{z!BH|L#@!ZS*Ax}TVV?Vf&W7vk2yeg&ta%VRnBpi=F z`%mh_27K`TYw&P&#->f5p{Sw?K7qlw>&B~*eDWYFYHEx&svVU~98a9j#x=Lx&mc+W zQ48F~FLiY=8E@-iVpE2Ox+cU#hQiZBUi9uoK}H&0`o}+T={48kwNF0AhE1>IPrv^$ z{`7~ZkUIyz4v8S+W=hrxW`V1;+D{%VkDeT<26FYW&jg-uMbae}_$hF>f)RyMr z%P+U!jSXL5-Zj_Y`FCH3x0j;|+J)3ZdF`qVs|Jq8E zvFXb%;T$<1uYLG2_2xtT*MkpZO2kC`?y+AWJSqTvR+L&h7d-Q?-{Yr0zm-1xHQJoF zk(OVAHZjxePN!S`3SP@`TqH1IsiZ8w`_7wyXE$|5bf`QzD{}d%@!-y z-G~{>S5qhK;lwt!qMJHM;$vt+8g+(I`o0Mtz4tEeV?sM2JQP2EN%=AJV#o*oHK_932SlOVj1*xYG;Hg*NLCodX zMs%NKpWrFh=imJc==35xK99_ zndDcM)MDqC`(W2?g@lu5P}SHu;2--9y=ZT(M{f^fv4#%x3<$gBTy)S+7j1grlvTk!>SsniV@|(&}Ze0H^esaqi`p>73m|HH-5sl7X1yX~) ztGg3z-FOgCKBTgPofxU|-UVQs4tpDH* z%nx_N$-@Uw)IdBMQscA@?p>(1RZn#P@Q$lcQk040{0hZiiKhw*3Q$qqi8*r?!q3|k zj?#o~b@+m@{Qjfo@Ze)lX*m_+iaFr{c;=VCKv8L(dNtX;za4eVV?X^m7IRmu zRk7%kk3WW{f5BJ@C4Rc@G3d*|!{q~*rK8Q(Cf%IdY zrciYNtdL*ch{|?*gocH{+u6Wavl;Qnk0I&I9t8W@;%ICVz25rr<4_Wqp6!d-yF0YRR?>L;R!!qf2>&)iGrM1l-9M-KGkFUryJE< zgnxL6apGD_T0bNTc-4c%!Tc@o7c4_3j_5 zI5WUM1P}f63Fe5e;gOqH!rk7Q`G=lU35Y&3?mc}PFTD64EV}ACyh^*ifqlO9=08wh zT}k^WxZ6>hmydVf`w~8W;f(W`%YXb9?!96T_U+z{{js!tYN&Pyi_%E9^6?kTJApCv3aW<_`E~~2ZTAlCVdl(yB;F^Y|_Scj( z;vau|lfmOQ*!%h8;UC?Dpn-LSH0Yt??m2KAPyOvBWS7_BwmWXamCL57y9egr6&!(8 zH{6AvKl)?Li}8bll55d~E;@}81Vsdb@dS!W>ak_Z0h~_H$Fj9+F=b*H8fr^*HQ+xx z%v`V-cRc)4Tz%D52=kHG5_PbKjg2j?x%LWHW1ZiA(C*z8|kL)L@8f{ZL0uUOo7-yP`bX+t{= zR%G}%CL3WmaVi$|j7ECZJse4jiG#tmww9kpsYaLJB46o|Zoc(;w0E|vJ7!(1H+kv| z$(>MET*^8E(o(qB#~@UaH3NFhc%g%Kq04wMA}}bhvcu$A^Ks`-I5$k1BmJ~jVl9Cs zHjcRMhY#Z}2Ea=f&Q-D^w2M>duPI0TspAM9A4yzONqdWWqHpfpNvLaTL0S%zvwpcK z3QU_h13rQN=(XdqgCmwLy@YZ_H`LvOmgaWWA%E>`F>(GPJo2lbsD!|cN&MVtAq@Vz zkY3)P0#R)uHD1bJ8LDjBksKH>aatszqC&9e;7NIS6IMO-$Ys(}(q)TF*WCmsTWw8= z3xXi-?S-Sg9Xgw9QBqipw&o6!?}{~7UaDPnlJ#E{Hf^3IsnXZuv!qaJ*upP34FCA| zzYrMc!;|*v)tme7xfx!rA}a!dOXHax6H06HMb_EUF#xS7&OVD&I-4AzfFXG`@ z;U72YPslPf*5K^1L-3CX!;E<|(agEAaj?d$*%O&ebRsFE95#JcIDhC6_00wIR<4GN zxuGDkk8pE`*?dc@lwco;F0BGt_b*%n?hAFeLWtSNLFNGBbpTE|A~ zO|)WQK8dTMrW$dXDHuO78uMmPp%h9j(Lr;%*xJTGJ-!y5-PD`bRl)IN4k921q{2640r_V$RGF$dQ(S=XNq4+ssy)X9@z*VjjVXhXl96K2hwhlb_`>V31C z!_`Y>p|`07aY^Y~V%FP^l(>^{VPLrI^2?-zWXYOFEh>TAwdG4}`g{l8-h7DT^~I{C zQ>YVU6qC1oyY?Z_I~?=p%vWD9ZfR-7yv0l49T|e%#}32M+Yb-_{MSr$-^K5K`xs_3 zQTpv~AIAqDFwuMOb=+~w6_h`6jN_}w%fROM-@?wX58+PA>0?j)1|II(onK42Fo9k+ zaIUKp=T97m6ZK{_gG(QI|CW3S=iJtt^>2R%A72kx+gc&o z-xD76akWkDN|I{Ji%%j2l3*|?;tigPX(ExPdX@_GVXs9=;`R;+~qJqw8DcAJwSU!c_DatxM6a16udoU z(F67%_D?nvsr%>wkbQ&LcS$sD;Ns+pn!ExW+H(ZwbLug3<^skS5!9J#Q-VocbC;~b z&3FF*w_d*vK91VFm_VB;55EB1L%q4__B${$CeloA1WOM}N=s4M)rW}4aR_j>!-D8= z6rGPnen}Z<3E7Xx1L?E1!;c^RFWh#+HEd^1`3%ODS6zu&6M{LOE^gEd`pSiut;5~- z{s5uDer)4_RoC2rRm&E`%U)uFKKh(#m=GR;?ncHj)CnhhSFBvL5~+z9XziwLcCtgv z)Cl$DN8*_z`XtiXRfB}2RCt9=$Bdct;i5m0M6%?m-JM%@MaE`P6e0(x>)d%<8|1c)UF#hEkS@vPU{Mpof&4*jt z*x<&SuV(1hMA>U%?Aw7kGp50g@qA8U`H&HdK$}a832fBT3~MgE1lB!`jQ_c{wqVtY zOVCGuQCM0@Z5G?yr@n@}aP^h=!NYf`bu}H`+_7}cRk&o~0=U`A>V_8rj)b;%8gkw=Zl zE~!FoYp+^2K011W@o7f(B6f|v%8g@vooLmTo8l4eaeH9_(i4&~cljDbP6(4yRx-v` z7WyHr91s~j4fo%78-D%sAHu^$TU%2*r-LnQUH!3i#R@#~le=)`nx$~0j2N{5rHsYS zoj;FtH(ra0Q^K_{ihTaX!}p=Px)3MMC7@HGGr-f$75Crs0IpiKoITiL!nkpmN!y*7 znSq9Gxy7x;h!T$Vi(Dz$(x{RCG^(TnIg&G7EboS_4{ef53VdfhPP5-8+W@S*=0(%r{Un^i}5oi8|PcNK^i!I2j*1r zAcU-qshyMGF7$OYV(Zp@ShC`J=JY?LKB<#%pmASy*;4YBTJ8rSlh!ZC>pbuT#9F2{3mX@@f!6io&IFS062N!>brl8pFQ#*CIrZ3KLIKh zjkApn1N+_<D;nI{eFlrL+{?%i+dfip{Ptoy_S4)1A3P6xGdjKCx#$XM**7ZHuyfA(wKaMKO&wKty|0l_2m z+hfjUSKyhKUqPrpgHV<#!891qTUUe3lz6oCG-Jn}1LVOr?AW#)v8hawXgLj?J&c)c zDKpMYZo=T?Y>N(g6^4e?*3KG^23l9SP@%?1*>WIFQIT-=G|H`2H$1%9-q8j%ZJlOD zRzL+V#DH>*1`>35_~NE}?nT_e zUD)#e``G&V7JTv5PL$R(njEQsL_q|#Bqd8N@1hZSzSG7YK1?bd$wV1nKj*;J(-)8b z_Gg$sZ|aCMY68J2U*JuHtCtrOCGCQ?o&@PKEaU5CQkj}?hOV$3JNBC9=5$huac<;w z-yu(y#C;}18rFNnCQ3+Jl9611Xz zR5Mv>VD~|x{_v(;YD-AV5(hjxaFLwH)SLXQMB1Uxs5d*Xb^8GvIeCsM)dO1w%o;xG zR|lPRM<=XYd?{n9&Sqv=1>f=G)l)i__E7eOUNeH#t{!mcz`}lc1=^N$bs#T0 z1-tehM?yw1{_BY+5Iu7WS!#g2y0~Jj&yl%!2jH6fAHahT{}=%-HkLZ5>>jDHalxJU z--%!U_928&2c?ci4f$dq4>FRGon6VKW-=O@`{Cy9iU6-396z>4Eut?0nLt1B*t>=> z2-03hAww{E(G~dRkAH}8Ss9%{ku2iuJ1z|Uw0%lPNUzuUX?XbOzry^ zq_{Y2`*JI`ezgNzw(UhaldkTbHjQM$xbcBlddUp6Bm+eaeeE4Ma$p~>T(=gU-Wu<$ z)PmieOkyjL7$1u*yZ7NlS^<-^HCQk&8ur$`=<9Ame0&O%BrBXaco4gogjsn zg>Daod~FCy+Kl`1Yc|_mJ*X@yfr}mWnaQi(R(LgG(iGHGHK3)D)`83rd9`=(Lr}1X zqD3xX+fxqY14feg2yO>EC$;WQNnsxSM-A%h8<3h!y$lXPa7d8p0OEuVqh1E#qUEESlceCu{s=W8co5C@!hNinX`mvP&3Hm^ ze_-X}g`lXI#V`sx`?ZCgs~0Z6{T}@6vBwbTW~b&@!@y*aIuVmdre^HhwhcSK*otjmZO4x7dr?+U32TFVoIpMbWP^a1C`_9+ z4aF(x=%{I6f>way(h3AmoR7d@xj@Moq+G~2it}@k7#EA3+YVy)uH(3iHX<@QkY{?) z(At2r8L8;(?!eJ)Td`AgV%vTsCZ(dQtwFt6Ax}`y6RK&pmN}q4$cF}GYGe%Z>w${r4!;QaNsB0DPqZXvWUKH4%cs)7ss zK~NyvnFTdhl&ey)lOe$o2=ei!?uoBq!e%2jo?{o>{OjgIzS>wJg+8UBp$S#B^~fx) zK;*>92=dq75Kv-9zKTxBN5XEsW))^egkk$f8*t>%Ni=qAPeTomNsu`g8>ibT!J+V6 z5Gcb<9mr10$F!(uc#(-ppNE0r@kxi;roK>W-G|Q32E?8^rq=M5*x(mWK1o@W^$9yD z;hG#MTy$E2ljh+MPyPj0Et@hVUYDA(S-JS&mIr@|M;~Hb>!rtYtVvmbwG-~X_g?(- zg=cWf^=puu9*1||dfXbTPv26K7`fU<7r>O^{SEVggmNW1=(T4(t5nd9np z;eL5UpInx)*)kY+3kguJOi^Rwh`DQT!ku^Dg=jw~O)mvxV)e;2169HwuV6ncTDll# zPR63WxgDKVB`Bs`dQO`IpNJ3*x4{+GHX#V~4*|tc^)E6O?B|Krx)SvEwvr3o*nj3E zZM!3m?A(Ig)WI$EOUF-~MOQ0h!_Ic}ij5b1{MBxUjAoNZ1F@^t2oDRyylLUgInLs} zk2YchW6}MGPok=>!R%>L8L77EH?A#!By?6aqPDCO;p4{{3k67*Scqo2(~4JBGfW#}diP&3ZQ%#?ItttIDaQT*f9(LW+#&e&`~AKDfWF9%YQ- z+nvMU>=j_t74fZJm^mc|T@CqYZLLQ)Y3j1J!}M8m)Q3TB6{P}N{}S&D1$|u|C`d`e zt}nN!3-P;{$DN2vLu*SnYz=xnOBniup{$QrgF{CTWBsViMz5{V&{>uKjD|+84s;#cMXzLlohKRv-?gZV(lFd z&_7=TC(45zZLok=pM*I2Va3WT@wdl+jtRlCF0JvT5uyB(xH5RgQathaZ?S5jT+Ehp zCXjKP#x4yxtBet>?Bv3sWX)|TDK0^7ZVd{u^RbWiWy_YG_-flxRJAmrxu>4#hTP?_ z!u$o3Fk@0M4jewNp5E(i>%fVlhp>9x6>#&`AG9!o0a~TC01x`R(9u|fw6sD@n79~j z%-_X7YL{B&;Toye!T&YL%SlH`BmM5#L~LVDwVibBJ|2$-#uoB`hkSQPVdgx@d^ox| z;Ngev!31A-y!H3zar{)0dhkW$cmM>ylOyNZi9WJPy}YN<_C0(LV+YMXj1lA_m8+*G z>>RX3NTu;`M#I6;nF*z*@kpyPq$5Kgx|$o&(A0tsO18K;%?aDX!z&mKm37#=eLp%o z>ha{Ce+z$aXY{qv`PTQr*CWW-r~ER=>gh&nQ!U!sTa}V%y2?EU_0ZF$d|J0e))> z8k^b} z^a~$4Xk5|4j&BOax#{geTXP*++ggmF%wW)Z?(S?weO)Dbjkn3PQVGVp(cIOBysBCx zW+Wp$BONIj`Ka%-!zFWOVRo1+9BGV&XIdN19j&jtejxj%`=9!lv~zN#>dRdBa{T=3 zV6B;N0l6C`G9goEPP4j5CrP!vD=xo!9r_w8QE)y5jb$w;$SJ|)mtP7GR}Ht0Yg91e z`pNJAf?xjfS4^&&asI?^y!Nl>u=(>nXlha3K`K%`#0g3eK^o-ABF)PTfIc?u`3gzs zwXy+9EUS+SHrC`ECrt874cM5+?OpPEoQBB2B&?;o9R=0(W^~fQ(st!cn|ykLW8)hYra!Tf(M+Bew19mJ2L$Abhlg zRiA^=`HGF|g>`Q?I+y^qi*CxiSoNWlI`aA(uVeqwlSmT1$;jY5XTyf}V?b|+Q>4#| zLL&jZQQggexFqQ8>0`n!Hv>kqFFFWjZFHtR>4-tFmk%ndnsD^+S(4a?Cw}u&1o*fy ziD*J=j{&Z}fyQxbE&Dn4_SOb8H8%{UO+!rRM{Bb@N!UVG87nCUlG3hroI3b5y1E*1 z=*R(e;q>J3qiCxuLR`W*Bo{SuJQDt}FWMpr$F!teil|5oY{&uPVw6IBf?Z6O{MEYW zuL^!7D5Z|FznpXD@cxEP`0``|;<9MBY5U4ca(J$Wxa75JwsG^q&9~o$^RZ`9UDblJ zidMv?<>JQc))=WFFyOYbcEHb#`VB7R{K1MAZ-aBk4`9m|TbXd|#PP&LEM0ds!XiSA7qbLNrJ;+7ZXIIJoI`3>9xlCX zIVMB~sr446j@Ae5KxaJ~8tPEr+Nqwh9&!WY0-$7s{F3#Ad)RjP*29&O0&Q>vQpEPo zE-sYQX6)U&6Kytjxc$C+F=>1VeQEBH+xujni2L|=0;5|iVRQdEf(XX3E+tFLhQ*fALF?5Q7nP+iH8U4&c#ak240 ztRzVh_vBa-OZ7+ereFIgi)y9xMtjmm))Ta%U)cA>=Xja1%t^|n#4?$b)4H-E*wT&) z_j);tPJZJtZ{cFtbT^_PBMF6BIj|;8ljkrQmBj#s5H%1h+4)pCxEdX#`S{M`sFv4ZO$;>yt&I_l36$(x!Oq@6y+xq1TLm&6gL>`N&?mM| zYp7;W(Iga!=5bs`qa=SsN20<*Ve4qXC!04YoA$+y-AFC1#tnDg0bk?Oj~YZj~!RF4s) z`+?eIj;)JYdoedH7KcxtLHP9fxasB_;pt+HIwr0T&h~KUCSz2y$atc^s}l|Nb&R*P zDAHV=b*{>zDIJZBeVbb#4{Az1?JTh<8*Y#As0sM@zyFPY{o}8A;Et=1lX?zsyz@S) z8|3w0j!UjQiRqG?8phW8uR6iFy&^Xm|9*QtwjEDGGG$%Xq$|uygB5*)&?}^wkvBYu zeI%};q*Hk1y-nD4I+t+@eRz60in5slFb2_aN&g~Z7O|m<0W>MlE-4Z$I@M`51JPU0 zKrCCa65Z7$C{3Z?%B@9neJhr)xCCy-2LvR(@9)LUeHa(mtb&*=AyZ{(0=qThVetTO7 zxH!1Ofj&wrmrDI=`4DM(h zqnD0X^<(N*@kJg2F;G71X;a!ehl!_!p95o;v&F;w&YcYMwcw9Cw6fWw{h2SJU z`iknxT5R3EA5At+xcbg}iMzvivrY%rRTt_dpXO}JHw_3Eru%#N9xTBmQx^hCmz_f& z7A~3$Ump)8Y`Vai6qAa;!eC!t%$holM_LWNo;mcjHz9WaPJGTlt)O1k1|W#Dg$pK3 zoTt{kYpp8A@1J-S&Q5fsosB4#1y`v^lPAnl>Y^!)0ac~>*z@_POaf1#v0GF^PEkP^ zEX1GOhwXch;+{wUfM=fn8~*alvv~N&4>2HgRgPl-$O&6FNzQCkB4O;n*n@s3zdevI zbbhkwD{Q0T?qO$USTtY~(AQdxlP8W~sTr?{i)iVCEs z7ofXWmI0Q)%LY-=QxOmpfd_y52%dfR89e{|^LXxW&*G`SJ*5@~_Lap?6?OLT^mapg zcO6>DR~f&QN@}iXV_jF-WN;gM1)oulP>t(oqbG-T46`W9xe+_8_S&;OxxS_p{c2X2HXsja_p>S z?S|m6NL;mO3Qm5#9f?V0sH2JGjBw#RKR@#Zomua@+v3*~YoIJMSt(NczE%V1ajs!|4?k&|YkrLhC{ zy;iv6rgiZ1a$v67fs};v=(l%5)U?SmwZ;|(v^Cb@*zRrEy>mNi+Kjf=0+{Ibw&UQQ z1NdU&4%F0YHbi7yP;`(-$B!Pt%2o3jSmvR)qzLtOH3;#QFZBYkNqHQPCiU8#U1@wF z(8g#9T3Js^6He~kt-ca?*RTJ8r>GN8{^^gn>4vM}?=4TF$x1SuM=KXhnY#qR9u7En z{3yOYodN%txfma+JnPww8PYxaNKsq98{JS;6!W+=bF~m(E||^qA_jW zbo}Bszru60kIz5^;_{`q=* zv3WZRiz?N+VA?FpKTUQti==_Rr+9DY0RPZX$~6OL+C)txtkB0SJv+A;6DGyLFThzF z5G&uk&7!=Ee*qAWtVnZEDN~1-Fo?v8!ie1w3KWNw?NIiMNsT-`H$KP>HPywq_P+b!6C8m4?pEX$ zq@%~l7BLeiFtU&vY^o(Hi?Z>>hIeo-F$t~4dC@>|jhg{IOvFy@+l8&4Z$@=m6__+{E-E;df|M)@vUYJ?V2DjN164<{Yfv4%G5ONR znB=v8zJSP?3-QJr|kSBse+)+|esMfPwcEuIA`ueNY2NE>TXyA0Wp}HswJ9qEJCp(V9 zf8rGU{4bB;#v9fk(8W%(4Wgj^U1+Q+!y9kBgU`R(jTU3PBVa+xh|mvvcNc_)_^A)! z)z!BdDW=GArK=>kjlC#=nS#i>0O9bSt0$J;9>f{NiF3G^;S+lWl&DC&ovPWNA z8%~`&N4q(Xw$#lOtMqlFHZL8YuKx(f5_618v&fGLM89u(Y%=`D=kNDLo;lg+|}2TyBbQ=#g1uW2N z?DjAQt0*f)S!FqT`^+N{>9XFdRvg5mWuV8b|Wc0PxWP# z9{4fml&=X_mRF#Q@st9#FE3?I)N6;O3+HJUrKmT8R_0w{Cz9I62Gn%4A}}-i03E=e}4MUc;Nm!5EdGw>S>^7 zeX?nz)X!-2JJ7~NVBHNj_IkEf{MDk4Ile3yRXt zp;O)%Qtdfgy?y8;k2R77tHuY_t8vbxXE~NL;er0}r9rJBAJ^V;FDAx>z@G9@R#1h4 zqDCy4Jx4uhtl3e@24m=i*pv8h(^eE#$QsNh(DwN&Pe5kHW8og4joi znT1;ISiZuYkzIhK+(yipdl@1E1J#mEljlsqO}E{Gr=OPm`UU1NFW^Pmhez(d4gOvp z90+HW@x`*suS5goH!rOOhvV`Q96bk7A=;ZlVu3Z^CDc)qt!-4?B{yval!ntTxOjOZ zJlF?WNvA1OO^Q0HA=TR2#p2RBICvtQJ}zc_41V_0hZu`Ki)a4!5B&9o7xDbFf5y7i zGvO+ovbC&@T%MnYO`Eo1{pWiy=gMpG*x&ww+pk}v-uzVj)PV$b)4p~vo^gjuSojU{w8 zRAK9;5Af1|UPpSNww8(};_e8Gnuc4hSO|AR1rv!p6qb}AJvCQ-8EW3*1&E3_$#;_lV;q`%upz0`HWwDBP@H02;QB^KF5rN}Ls94ii=CRNdVepykM*2 z(?DI29ALQRIcp|+z0K9AEU6&P#YoO9!r{U$1dpGJxe*@f&W!Nh$HxKTAx=n6ibrM+ zgIkWTsDutx$%5Q*bAS^Q-aD_HkJBgiVEutujGwn0;eOf|Xh(wq(i9lf6PTH4iIjEn zl=IfqWsA9sE`wKKfO_IptIy!p)}2l&7*>?6#B-;RYm|q&mJTL)mgBTM(m|V(9tHt* zoWIKAGE_8oqNS}Jb)4rC%3nhp`?K;waO6z2wpaeS{V2;VMj`cP-=2eLmRG+ofD$wEM%flkJ7|b9AhEUva)gXbRvTjM~-bQ zo=ZlaVc45}Da$=#Y<2$Q3LH8$?Z;fYS6t)E1VaATJw- zkDtVO&UKG`*ntq><&5!u&T8T3l(byr<=5lL(F_!omT`V%31*RX&UHsK{5>4e&45xC z`p?NK!ofqQ&_L(gLz|&Mo&@p?Mp#$?Y?`yw2YiIn7W2&cnKCg3K@6mKZQ6{YEb6_Y zow`_9X0%7NU7c-eZmOxn#f@!h{gTegVg|m|sA`m_GF=foVLsL@oQZL^^{6Y&rd*aH zBfS8hZQF~huD=rD<2}@6m|O6gKpaNPe1+?|N7T! z$Scy;l#$kAUsBU6P+Z@G#~%MRp8E4|@#K?##vh*i6CV5ZPcd;^B+}y&DZ|aG_Sj*t zgqpY|ua@>(b->lh7QL-4D5w2TPCAG9^Gu>?-wo6SvEBW&dmes4xNqGuZ2j<6Y~FbU z*WUGGwbqhImDYp&%EbqfYwp{*VD{2Ahztot%8{MwtJFPK&bVyFC1`6X#OYHfkzXM8 zrA*n$;<9RViHTCU1T1EC5NLgYGUgXH5s&`(A*@@r0Es)dIKPvoM8es= z568dW$}#5BJ`^LHwmGZ1K|SdtW9VwHM@?Cdve`8a)RVprR4_5jEv=zX;jxtq0s{PT z+qxz2cI`(2b*hlMnw*q^vq@?2nlKeH)26D|V3jpxQj(E$2Cu#NH*DW?02FOw{Gl^W z)0@`D8hrN7+j#f&H<(OSn>fwnKl^+E6M7ds_~_5@r$0Y|KR@*(o_OjhJpQLYVrFy* z5|8YLp{ot@3Y-Rm=4dQ1(4?QfxeHFtj&OBzLOJbAF=Zt#F%IVv)6u714c0Ck$;G0G zU@y#%w85d{N3boe9zjtP5$QE(DFTA{%|7;FuyMhftFFc3=pcOY&P%9i?S!M7H!hhr z5j8~xI7QoDBKjcqihQkVpr7y4ysl27+UJicJ=H;uX(z3n5HKzp4?p%OmM@uzxLvPd z|E?{_uJ2YCr|;euC1OH@@i3Fa2kyTg_uqXd7S5W$ zc%}-S3}lon_jghDitzRJ?Kn<8%$c_WKl$+km=xu!Y>dpV8nJriy^A`WjaUErd;Et< zXJJ{xs81FT3ofphd+DVp&Pzkw!NVviEJ6`^pU?OsBez_`WuOi0XjJy2q@)tH_3Wps z8P(NQC@!i*U1Jxy>5N5-7h%bwS?U9WWsEHF}@ zsGRHvQq4_vh-Hi!%wS zC@O0v7V7civ}HP|EN*WT&Q61`5chB5dA!3Or~OFcH6+pIRNwLOhtfQCE`x+BM*wmqcX%LBqAxR7&8}N4i7JX(j>NY=v?V6k~8w= zf@HQX?u?0g+fl}tC@(7uC*$H#RaeJK#%$Km!T6JLJ!8o7stVPny1W#nWtAvzXr(=p z%V=Vc9S|^K3V!nQN3eKaG)^Ahk8QgTFo&zxcuJq@861OU(?gMSggI9ml``d(AwU{+=4M{bscIcgRTLgShV6&L zCN}Lpis0FoV!@mlW*4}qlxm}?xa#k&G zG64Ms*c-agMtvwFttH&jlhd*L>%-`>a>bQb-w0>=yor&a7$4?>#Dvr8gN0&e#m5!V z9yK!tlMB2`*4ZX3W(roWnnWD$GnYufO*cOX7h7$;ZCNL~SLIZNDyOP!?nZNKyYd_2 zyCkPF^t-^;(FEA%3b7Ua#mg$V+H0Wj{ca;JR!!d38(jLXDm*c zkvLRzpZsK6&KVWxMrlqqzS{f+4$v>Hx$$QF=664%o@#s)S7|2mw5CNd$P2UiN6W?ThCVf)bA){4#VzK^ps zic8ns2sc-Ufn>uRdYY?|5`PMny&kya@|zJI=%B9FtK{3(0a3y3G_r?LTGxoooI+%# zW;3vLz-{;5h2Q{Jr4-s7gdSAoG2u^2g5QLhn7sH>cst3Oh*Ssb9ymKXswBUms*c7( zK8>G2htY-n>UzvxwGNYGLg)xtm36J@%^Mp#ke8N?x~fX#_aQ6~H*KyP>A z6s$IIvFk=vehM;~MCDdCA%i@#cW^~WpnTQ051zC|DapyGYVAQGgMrfALbP`Dp@{~4 z$?{7uE;0};_2o!7eSmzAMOt<-N|~HV;9N<^>f!1Je_waw9;C#cLmC}cCV6V> z?1i$5Ce+k*VB&;u*mH8Ux#?xXQHUJU?>TM?rYu_tKWAIB#6`J6{^>#Tv12GJug8iT z??7lokP579RD$mWFLz%wm6jr%2~s)dB{MUVTNb()5c&p&z^c0&@n_Fq{H&=MH*q4) z9zCn>4qd)-E~=_)aVRzq_uhRI{G6&8^q;|r#$(urI=F}_)mnO2UV9Dc)Lv;b^(l8B<%`tiIKFES(o&Kc zpyr{Pfp!Pyxum!T?jC^%^!0+Xi!-OC7isZlQA6Dk*>j;@`G!Tp;2Dm~7SDwn=f>CF z8?6i!QWMUilEG%C$OPv{@c9OXQEzM+7p%Sx5u9() zeJx-Z2{i7xczRPN9GN60F=1j*SXfLRl)#R%9!5Nla=X^L8?G*HIC?A&U9DZnWAfG7 z*26k}aPV?vK)xKljs|$34QU0HNGq&Cc|jEeK)@r^6D}UMSTSb-yqUatIQAnmEfwXB zolI&fn7HJjlgVslQ#+QdSc(a>AL;@)6OwM)zJio9aB_2GkUbTQClvoRVBqBHNPLcn zJC}lnhGyjD=hC+2(3d+QjESp@gB4n;D{*w^Ze%4VA(iKf8`{xJdsxq4L6-Oq^bcT8 z>488`7j@@XE*56z6(K$`1w95^Jo@vW!N;4y340Vdme(%Y>S~a3>NKp~{V`|FwFvNX zBERLXD5Xl>+Li^GTX5n8Wv9y)tFO5N{tWI!Plf(n`;Ifvvd3+A-9;G_J1Drj;b_~- zq?@`?+J+@FBj94wLYr3&-|%Tnyd#y|S^{<5&`MoRM5B#A)?ItKTHjWJE)!U>52m3# zt;i~=RY`YkajtrDM=qFl_dDX!<;xM|=SSLP2>}BF7}P)e^mFL+jKZ&<_zi=8Q_$4Y z!TD#<*=f^{MXN7W%MQ4>x}&|l2f1m<7&k5w;gRDI=I@H`mRiImX3-{7-m@~1&UV>_ zRfw55iE|@QdrGL*OIzH?V7(M`FPVej2wC&Z94HHNk%)<^lcOExPo@6$*_1RfNc6(m>g8$R7xf8x#xPgk>_$#q^q(7aZEmAmRx~}Aq>Y`3UMaA z4Cy7U^lcT$%}C*vg~Zqd^w~J7uK_ySc97Rac>ej1u;!{;@WcOl2>yPW9B6ZH2As#f zsyysVC`M|nEtW5ujVaUxo$n)odTNaNA~SAfW1wxPB(f|$JwrW#lE(zDpsE(OZe9qc zO>?1dtt`sIsUx&kNvTN9DnWHaFC1JwkyBKxUi%s!76O_X__^C4C%*v6vG*ixIn;X3!o9cMfJw#=jw#M}G~wj2;}|#pGDI>tab(Qp z?Cb(}%FX_LUt{j7wFnCiK(L!N6Zu-&mK;=8RU<2lJfr>V>a#;ofWPr02n3>(C1m2` z6L8tOweX?L$Pi3pmBENkSSkBEZ3^eBvy^tX3Qm#p5y^PY5uM07pM&U1b8E zln{s0w4d{qUXE*Sx&smZnjFaZv}w}h1y=3oYj45+?O&syxB^$)b}vGWF@@rVRMWv? zevP2m)Q^CWU^KOMsP!f5tI7B5d}QTT!7m^X;USbELoZq?im_++E+nQi7GkVdTi3+q zR;Z9CFDVbc!GQ?yaDuI&6X(+yD->6uC_7Vq;XQKNeB5;VjgapI48$tE?Wih9!|CKa zM9jV%D;7_I3r|>(Nj>dus#H(^G+6s%`BgU~+{Z@w5|QKPhH9i`5XaeAWXh5b+;Vg2 zn_Ycz)pgfl%7if3Q`UUk?ci?Pf`qI>q!-ttgz;!zRzBUj6GFm*;pEV-<~lPik9NNv zrA0iSosC{P=o0#XSu3x`l&ApMbkrd^F%bu;ixPLpQ;hP2V~xC}Vz7g6KoH{(&aSf) z63%4OzW1PjaZ_nUDOy^ZP*&H1)z{yF$RJM`nu>8Ytq9H`Gq8MtJdJD|yUdyRcbyaR zxo#3xF1{x7ISIWDJ?W_#>HdTt-?JI(lufkg@yZ z2~mvsBra#werwt~XVlhIAtN;bP4)HkV?}7~wL?^7C>$M_Es?M+#D}1-E;*QVw>p~fjqbBZbU|QCDL=7F@M1< z%$YNhalDp$+1XQXyj-X!#msL?QJ9ydmR-uP>BRMS+=)5UBB=|EA1d9j&Ma}GZrU-)=#0TZ2D9Gph(=Jq16{D_+`cZF- zMa!@ZBw+vHQ{+Q7 zdCy$7w-=qJy3}FK77px}sQa3^1oIDa=A@S{m}I+i}bN4`3SO1`bct9NH0d z)Z$!n7IK?xu=3Ism=YPJ*-s-VAJ)sbhqgK{uL%?8UWWNI##7eiEbTD;}#9VFB$(I>%T751$bDdNY@k zhiQakj58dZJMh)kFJNmIh=(3}2<}dz3!2{4lh4O@YdKYVUOuYZ7cKZaz%S{6^@5oS-Hq@0HqehNk_cr$^Ch>Ss1Z6o6!d-(eL;+ku&!Gx&L zfuuykX$?0wPXt7a!_@h5;VT4AJgMXI^b0{?P$ar~dPu)DT-{u7$G!K!lflGP(iiCM zW>hi;+fMGPuA{RXZB8!@gROehJuH~XywQ;ePG=V{xO@5Gvdfmx*aj;HYNAI28wY3h z9fpZaHW$xg(x_g45Rl4D{KF!d^mw6{hSlEI8dt5m3X`TxP>XF(j*f(#JSk;E=R1A^ zeEoyb(9o#pv8B^>WdJlTC;)zPJHd$6#ls5$LE%jPzIUi%;_YCMzYE(Rg}u(h@0W~-hm3E|v0F0-UYstrG>Mj;DibsJ)MMv3VY0)H3jZWSc3ur=#(zS ztcg*^KDCX^MtEctY#iMIT$a*(P-PWR^s5v1h`FDg4=Ju3Z6t_ zIliGV(1!vm%9{(EoZJ}9mcoYp+LQk1iP5<6#w%6wDEPGcvObzO{23_rI#6nnWeAE{NZS5Q|ZrnJ^T@2?z`-+@-U$V8f%GUwQ(M#S2L_}iTgvoGmafTzuH!eI# zT^N=%VaAc?J(N$m%qaZ5osPyYD1u2#7$$^J|4Esvy9c6UV$|HoD}}afCv)TH=cn|> z$)4kJacANbNma&W@<=n3%j@PL6jZ9_S? zXCI3e%~K9u7JHYAH4#yhU|>KbuQ+*lx?}a~)mU)JJj%N_qQ^6!C2xFa103D`)GI8m z)bEummtrP^Y&pecEuY z7&v5~Mp6NTgy<#k_hzuD4@$A#8c~y{A&9tJ+S@qSvL=^3X3d&`@F2MWroEaVPe{o^ z*j_;)m^5=X_09**l-1AxUj}ska9|sMp7Ul<+S1mo`gU`3!(E)8$Z*;t;?d?^`zLG4 z282c+dO8Er8PnmyU|h+RN`wSx`y5>0?%{)3b7x`p9NKN_pf1O)?d@2wU?FX5_<%e~ zTPs^9&c7f0{e2M?9;m*^>EyvVr45iIOuwFB0%=JZb@0IW2~#jNdYlScv=^^+;68Q0 zKQIjLKEdeg?c-eaW9Ez*SiE={VxpojF)9-7&dwSwB6DqRc>kkMuQ_(+@CdrR;fmc_AVqjPg$YIyqy?)S2oAiP&UY+GJY}z>#_;wnS`< znrC9RvUY$M^(}f*6kKJ&(jlOahw^G?j|tOeP}aQ=GCo|{S$RHL*3Qe3C(kK|{z5$+~Ulv&bx%dI!j?s*x&(kg(UX$bVPXYxkAS~EF3w+|=Imtoq1tKd)L)kfRk z?(K{F9=HQ8uA2SO=iCz7$>%nl%BV%7!5=rQU4{u^q?u=i)-{1i=Cpk~7dQI2iS)}} z>S7Z>NBD<^ zBWlVF%$_@k$%S^YT$VWKqYOn(p2`F))Xb-`9c_=FpFj1&6%(gSh8^v?8|k`)dgbNn zq%d0oODIpW=zk3Tesmapsa(ae0M?;hkC`|P-AvTw;)FA0bLpa`SiEErg2MbTMaIaO zLGi!81}{z$up*KZX8V)q6UCRQow%f88^tB zdk02Q4}I0m530DjxmR7^h;7lVrZR#ZIpy-|rB54Ec$=Bw#4iy^) z&|a^-@=8pgU03~57Hl2d5Ec=RaJl&?cQ8egY~0}+5Q(W#v@L!0*z@^UNU60#)XW)b z(R?Rc;_E8Msk3p&C*n5QnlcXW0mY+MYMUv@csJmtwRksx`p#C(Jt0D0u-&CWy_YTi=sn(G7lVRCtYbf=PabJq|A9p7@}%Xr@Va8O+Q&#Sx1`fRV?G< z>yN9iS%;a^wU11Qt+!?DA3?gD=sUVemxFCT=h+4e7tEn8_K@6M>1Gh)*-lxnmE-sF z^~BA0+@XBpm;#QAe5a@bJWhn2C%DIp6FzfVv+V9Hin8|By)aB$qsG zWy8LL8N1wi`|ZT(j~Vobp018ct{mM1F>yi+qJkJp>GNyB00Huf{#JC>l{RG7^a=2m zulicl>tLufw4|M`7#d3%_xUiy2!?AWv?-@g~OH zHkuyupAGpvZTf8LAgS-`p}bQEnFn}zxFaeeR4wZuIAyc7>p)q4GUAFlv1sk>>f&FIyLGmMa^ioGF>S{T$jIB6VOO`Fi!b=vy-AQ};PLq4d^W4?)2a-3~N!&_b=TE;i zo^*-t@(&1x8+jzJv-7a5386{O^6Os90%JN)7slXWLF%CnTgIy)VPRAp7e$vAuZQE>b?azDJ;+btrtsMr zlc!H+j^@jrg;>U4_HGDaY(I5UIAxl?pSIGUdAF>6C^2><^Q9%rS0RRZ+O&z0D&7>| zlwVka4(4Zap>V+>`bsxf>KNk;+VlyLVa6xzr5o9yJ?1=)8#fXD^jpF72R>dNEGAu) zbKl@l+S?d3%d1titKwIqBO@?-+9buR{yqb9#s<{ZH#3J|-am1o+Qc__xVkb24OGj2 zxiGd2qi>d1nfusAV$#dkUWs|Lr;xAabIo7QvzLDueNPNlESw7uCw=`#9fWTB8}9)4 zM@+;e3+FQqAg`4Y3MO0n2UoZ;Ru^X~xs0EG0B*kZHq4(#pUnPLP0Fk+E9FUMW7*P6F@0);I;ZIZ(9+V5BFU+kg9S#8({d{2 z>5?;!rw{V?aaT6Ni7~RonR3%lS~}7m$?{jLS6^z(n;8>P=f{nYhBNzXB3^sWwd5WW z<41%9!G|&^ZJU}}F>BsJEMbfzaxC^*k!t~R@wo*>MJ3p@_Y6WOPQi`q)_|d%QI`cM z+{QfxX1o92gF#~*1f!qT;6ioAg5#TlC2lzqxVYZF4SmeFm~o96OazXl)6we;kIPhg zF)%zXy-+j&+0#_m{H#W$wl_KqUr$Phw!9F|vGl3e(cwar z8YwaU5irM~6`3%YRT~6yMn5}Rz0tuEi+~&n@Tk8YXimWMFK@)LhDmty-;Y6FUJ|1@ z(4R%edRx6%P7J)zM8zSU-Ob22x*boy`Y~2L^c-%uW&yl-R@TbL5UF4i zw1Rn{TQjgA#~emHF;*}~FcLB;mrX#EVeKU^Q(g5Sj@mK)9T80J1c$~;HF^c=th^&8 zAVG|P8E&a5n^2*>cxS9LB8*L&>FFsa1K5m26T7Bc^oz?3?BDT^&$clbi-26h8!k_}aDM`pHHNks9m}vV1hp>vQZN*|fL1z| zzDzAlwJpogP=xpY{yS_*Z^!-5eS$Sp-QdyR3qw}{*1z<3Jo`y5UfpmQ%Vx+bacrUM z;ILpoUr!fms`HT-e;Ds*!!UO7jahW5K1v zpt{30H5Cp$Jv>%R(CX;3ty$5*pkW>V#1~G37DTTf}tCz z&BN8pSHknMAL1WxJO>{ZJM=fwgeT)qfBy&vvt6*^!~bBSpYC%6%rVQXYP~7^h8~m^ zW}%|E7%#l`5kkgK#uI<~13X>KJ}fe37)0-gAoWbN4ckWS?J)Xm*@KQp!(onbD45P# z;t&|QrDa_?(n`#=*90TmM*&kWBll+lvpQOr7GNsU_Dr?S=a-;P@J9tbIudgp$Q~t= z{FY;RGA(}BM!dE4I95IUJg&KPqKXam2KjMWce{;us1epSL_S6uXq@r9;oiuU2Y#P6v}$9Uk-WtwsW^ z#|yQ(K&^q(gZtM{n3oU6q0QxJv5b`H7>2eUTBb_-+tFH6fKRsU$AP3O{MSPdV%e-{ zGaRE~{SIMx0!N4Myv^uil`{et>T8uXIHq9Jq<=U}ce{>kFj$W?UvFuy_YdSv|bUJhxzTR*x zN5-jvj8-?mBZH2^64%JBjAzU$XuDc(8c2y&YF2F!=yfhsZ*;K4B4B=`@pdqW1qSMy z8WK!xO~o><9`yILA}1{Yi3zEAXJ015=d8xsrL)xQm7-8q=t6IM1&RwZker-s+>*@p zlaZ8?it}mNXpprs6z-v5L5>0=V60>B zfvAPoVAX;Js5$&OQqRPpu}{4uVFVr5P%tkw^`eyy-)eBFC4H8Zn(7Y4BVatQR~x?4 zFfh2Hc5E5;4+2SEjS1}N-3(tx;qLE3S4$Og(&KUZ)G=&0cnVkj@V_u^VuZH5gjRI` zbZRc#-_ROkDY+mFL9G`I#bsGK^!!jTwXkewR;He&4TPJ$4XQH}k(r!E-Aco`*c_yk zbz{cD+3@kBZBvy^ScZol^tIO_IVl@EVkXl7e#Wfa z-X2jCV$fe-iK4_bWF)63nLeFRg35+o%$qv}riX`3ZA}>TJ>dl{e1A9MkDkQd9s4nB z&Lwz|iJ6y+z1dS77XxE$A@%fa1Q_As|Ikts9@E}%U|G)+Ms7J!szgVxXPQMzi87rZ z4oqjXz0sx743F`TIhcyHJyUJdo`QixeMdW|{R&7F*NxuJCdM-Hh>cCgrr0XPEL?+? zbEDNO&I*hTw0mumwNJfY%mroq|Vr= z)j()A(QRs}m6?yoyso}yd0y|^R64YtZXZXXQ*>d_DgDrdXZW^yUzTT0t#lYtGPKGt zV5(%oua%A1T%C}g5o-*Ak!xyahChFyK1M!odB(hsj8dB|%Q`xkN{5~kkoKe18NStp z;KHp;eU1p~^hkB|_QRjIEFD@;FR?7uU}%}}s86kZd_L|x;?Kq7^2=6W`egHqg#sgC z{Ep$9ba8<;mbz$!Z!wgyHkEo>-JuldILvVksWsM)2ElN+EkGsZ!*az4rigbm5=^(J zhQdX>l57OQ^$iC+Ru~n%K9+An!w}l#KnnE}-vsI4cKY$)at(%-3=cPm4&tXA3XF^+PIBV*}iVAX=0!di4WgyOQrGu4ww7mlgS#-Rq%i{AEDe6jIU z#HZz=%_#_X+|~s_=3-U`GBdQb7v(rTGaDJXWf&hL zuYEfVdZlGJkiN&n$nOx$$3E!jTazFNG=6rsw&2T8HzPGE1!-BC$Pj*U%QF5=&MrVn zeLI5ueAK#~-v)FJ3Rnzb%&Z-$0?Vyz)q0+he*S39%S5lM8-k|HP_Nz$_r%*^7>xOX zqzns|t-cwKvBOY)O39d+dGR1m%%~496=C7Bqxb5Ei;O}ZPWDS?0^!;A| zq2zy#8Y@}D*3AXAOj_~I3WnmG=}oJ(x@N{$MAxFfLI*mMozZ(8I{#XU_B;5p(@+@g{8(j6&RyvIg+T9X z__}ho7j@-jIIwLS60%C*7&aYOUwt{Ef<|6b_hP^_R^7h~48khKQlxM4M=u);#&WZ& zV}_CHkBrCsjO?>>G9@S=5Q&MIsHto~c6K`bP8RZuDiJwh8m_tSYJ>;LdZQ!Zl~N-v z;}g#v&F%2<4Z`Ku-GtCEf7MprFBrc4w}b({8tITLP~W&CBaXo^(%gL)V0Ix>y3s)^ z9XmZk+Y-JLx`#s`Wg{V$PJ7VSP>ZdbHzO&h1pR&!aP!SKV!Xe$rm#7*HMJryCl4{R zry?RIN-fPHujP+~Ub{p-LjQ&ySar0Z&&d}vmafH|8B^flATPRJ091eSBb&fTW7EM{ zZw#_#ep<&aaG@R)SdcSv>yi7gKwtv;uHNQ)oJ+~Z!pm<&pufHxm7J3u`7yQ~W{e|X z9T^4?kENyMxgm2cD28c1xM(nzVH^D)f(v`#!6VT5`Q47~JHs?ABAwRJdb!Yf4K7?m z2jP)HeHHo4sZ;Rr55;vi-U<&UBSU@C1^OOA_Wn0PND>%K)=1@+FpOn1j&V%GRCVRY z*k}-#myC^`v4bTZIT{Z0NWE#)jHYv@XO@SrbCItkYamE9R_RDhS1v~;mhfG3&jK`x z#s-#z$Y}l_=H3FpuHyP1|E#;aW;gC`ga{-g1PczuDO#W{Eu}3KDc0idMS@ELad%I4 zv)Qb>ySu&JeQ*AsGxxof7=hC7|96u6_R7qeGiQ#@y?5@}7_R?4^!MR3Z<738=U6!uK=f_S|$veQCM7!`W`Qg8$IOoi@u%~1g{0XrY>!SGQ>P=q&*Lm645iMgXzECykL`o*4~34^IUZI92$iZfocf|TKA zHlv!!K!>9X0*A!F-N{&6>}=s|S~?k|mOy_WZSeDP{X964-e1R_o{kGV&%nQxiCIcY z8ahlp7&~_;*%-wYS)HF6d za^z@)Mh}rC%yd%w({jps3Q+fTsmC| zTdxGpjNw#Zt9=@(`efKBXOuccS@Ou-Qjg;y7`YG5 z)=3QNp7pN*r@!*kAet;215J+OnUf>7qbCYP|Bdh3+RC4Gt5Qi97F>p{Pcq zHUiEb%fPk;bj@GbkCVZMU69GBir$C@IO-$WB>AvEtM;T2n4fHRcb zivfOuk`sCG>k+QOhECvDLfHrZOWcOtuz6wnQ~pYOzq&si2q%uk|8wvw3&;O&AfH{7 zKJ;7W|CubEa!h|sJ_;^w)@c`IXZSgzcmIEYXo_q`HYZvn^vKcK*{ib$qsM3J=TDdT z#b9Id0I;|Hz&HEbk80{dU<_=$O(6JT)Rljc2*!vS#8iJu9qFRZW|xWx7U zEmTyMqOPtMW5!N|y?y`Z{eK$%+rAHm-tIOO=4YX{!yXaC#~{?lZNP%ey{M_kML~Wp z%*qWGdv}bQI0e4m?phZ9d-%1u-_MiNVLV%|ovPf~PW1gZFqrN$&HSIfCxdYehR*gH zloqDK1V04FPS9Wb6J0$e=qEDTk(XD1cIQCE#zyO}(Movsvq2_GOsz;xPC@vHN$~bJ zY{c0(sMGiKWa#hv+0>Gt#DbEFO8o`O*eLeTMV>+ZIbdzEw+q!p`RI1^hEGJ4e!}q7 zd^+{$7}TCf{fT8yUH5+hCsOiD;OA5O%iB5`Y^~eim*6$%F@8JH(_V{$;%Zdab)idM zMfdT=*dfuhm9o&mPwA=!=I##U=cLizxADjW9s&LsI(j5!#onreHkyAPttZ#|X*{R1 zW&Jf}@8{9+%i+Ii>%_W0-S?jh{VklD&a9XIXYs}v?*|RbJF%tT02vVEsdopF3dnnn__)8L=PX0pwJMw(lHE};u-*_ z=A3a>er?x{GN&$W1Gy@2CJO&wgcEvo(xIf%sVPeno(9%i?E?#2#gNZvFNE~=6?*T8x~U|av3MpHguuK z&IO+Sp%^oAC_J@cA4vP@K+2rvy3SEfzSdvk40TUvd+<>|oC(9vhr!r>UEDShtZiti zD@R^Y4dj909{S|rBS#@3C;)V+eekQr?WY+Bfm8QU&IK;2 z`1r}!vVYkGT|ZSP(|4k*pRxzES7uWia`H-0)6jwrdnb4Y1Y*Ll7fR5A6DhLcIGefy;gmf)HJniNXVg3c494e2;e(z;vvEV958p3kffdX>=%_A3Zb30D?tXA}G2Zn*Tlnt> zUXi~W@ZHL9vG9VWaB_4q`f6M}0|=kGQIubT`kE$qxO>pJ{&%5LYhyJ&dH*eZwR$^Z z#?3)YxbeE50TyJXBw*9(ACPt|5nq1&9VVW$7$IT)eM!Mt!zm?KPH_8gq*}wtl~0H6 zROj29+mMx(340e8%D9^@GqBwMda`gJ7%fo)%$+DHDMxh`$Jx^pj`AIT4OUvtz%x00 zVvJK+oQcmrco}=*(hxmvK0*WBPd@=j#{x?eK79Kv#FeyR=Im+4x;w_zp9eBw*ieZ_ zANmtQM^8ssbhyUzQx4OnV*;n?<|*Oq5@R#*_hR*ijo5u~KgN%dMepS^Ha`dC{#+O8 zDr>Ozn;%it+<~YOW8v;h3eFg)o_yYTKD(PVIq+9{AU1X&2YR?kdtw)jr!;ND|8sbA zwqQIb!S?iP^=y-Wc?frxto*kTh$uooSQFtXFILSrQcSib82~tWrWFI`X-5N0>4@tHc%#16y{*d<}EmMBo5zxzaCN1BQauV3-DeFJ0b6^5dBo49$2~>kylWO%En&!`Z7N|Wfe+=>cMxP ze1JDzc@1M{EI>qb#98G(Cqh4GPb~qbll8%8&FE=w#yi9t|9;;EufS*%uF#O!|CjlF-uc}3AYC3|#!*ncqDm}5$C-6(!{K*MJX~2%g z-oeoYVN8f7O&o*voIGs&>~r+`gd<|yD7aDD9i@3YM|gVqVASa0hzbb6?jP3RP(li# zXU>M7x4V8yO^DPu2e7t(Pe6`DP`c1SV{zbM9C8YakzHDjUS@fLem>exYh5rZSfL;5 zpwk*oZb6{G9MAN$p?v>leDv)axQv*Ch!79BSdk6J*i_xbBJ2k4yIV z9omC~hY#VJ>u!J(`7Hx()HTXU0%U{!^Ieqlc6_^hCDM-OVZ`W>@bq;bgxLTp0#BYQ zIXv-9FO%8K^i(vO>@oG+d58`%o&Ynz4qiTf7&B@#CJhP3@{d2lf-7%FWOT5;t9xpr zXS8xsTZ0Zxpm;D*gu&Mb;_AP00uBwfbN$zi1llMiTR$#qS;|>iW+C2s=}q`YM7tGgR z{xe=qHsECW90;~@ydsXGxe{-@{1PTza5aXE7^3CNi0cNFI|1a3u??ilC?`j@8pi!g z0ppDA{Hei)%a}0SMyHuoScLiK%te5YJb`hxdLz(*j;124UAGrUGU_pX&TI@H9sy?x zjs!aWAUI{*4T%~|t?{?3y&kz4amX&LK$p8eyxd&|P1-|sqPDUS#}ZSJm0N(Uf+E;E zxxm-QTh~9CHiP0*(=A}Va*6`_P@XU~x1pl60!QLgP}kB5zmO1}#GN>9G8t!}7~E$6 zIr-(0=E1+f>OgUR1`>}QLvCRy^79H&m|uh*w(IKS3r86n zYuQs#G5Cz6S=}frD8wfpet|CKj2rL%9p-VIr;i_l$lw6DIEg2;C5BcIh6wPF^jrGu z;NXOy&?t-@H4fcPEr?G^#-gjPf{(lLl&^79j1{a^6>-=^$VQWZjVl_jJ~lSvthIvm z+L=Jg+i(pm{ZsY+8yGy9V(XPqF}QtQj=b+`Z@`fwhmo0I#h@q}&NNp2V@I1JF+*)F zjvhXW+?*m56qKT?*9>oee=Vo|Wd#PdH=vz9dTh|onG>&Qo5(f`va^v&J#6Eli?6>< zQ2TXaU@cpl0{u9R9uI88*gLVd0X7`{B?rPu<@Jeyb-3T%ij;&SNXy7UF6~A^ejXE@ zLfE-_!o%BB)83C~aC-uBW>7l}{@com@a&UM!Y+I?uDO>wan5winl=$(p+V$_O`?o` z8W_Z<850)*a^BU=1JN-fFmY%kj&9$8fT5Exe9BboN2A25+sgLSBzv|^z}Aj!&j7=Q zivsD^x!*s`wwg8=jKS`NyVJJw0;iUj!SGYpPY=?Pt?hw57BD%Q$P35{*M{Pcr?CJb%0PDPi%$!7%Q?8#*W`pXZhDeXEiR|*TogprY`X*3}&s}Yx8ychwV zQpPB6>uWy<9}F2Egs)FnPi{}n8CZCR<$AkXk)C=CDe1*<@(zLreTh7zVaTZwGt^gC z;Gi|9%3)4rA(sMt8RyuV83fXjjb0m`z7{o{K&=(z95;?j741So!ZGCJ6=?r!vN*uU z+nauOAO{BHI2Fb-14}ZNa&z~_&|y&+Ga?!r*6qfm@iQ=C+(W) zM{i9IKKXhx4(9-~W(?Oix~x|R)E7PMMNVomN-OFxckv|%3lGtJ=(ktmuY@PEe_~q# zawH(u)1b|`b~;}4pOxQ1W!Q)V?Nw-aC?-xCi=y&U)YeyH(cC%kr=Iq=G!W#h*ef|} z+XM#g+o#wq^=p}u4WuOa0q#2bqEL!hpUr3dS)vxlZj$K%8(q-*gd-d1w|#W zI6A?VbctOv_;u>``p*soAwd3~noq3_^+--kgp<1`H+PM{e@tv(v|@l@u;H?`Ym=41 zZ5STgC^zuDab)b-pnp%U8Kno->8GLpjIcCe=VAQ&`Im6fZ?4r3jSYOt1rJrNtL{QB0yAjPVn&Y$B0p5 zFmLV*+cI60095=Nkl}FnYfJEC?VsQ>_T)$E%F^>@)YsOduBHy=9^;Olw0k1mcCxUyg=gd>PsyLb z^l^{vcv2yMd)M#q^s~=k%;@M7gi0n1gAhG@3`WF8!JRloMXV@K1{vqmjZGi0rj%m| z8%=|8@rf+FH7ibeI#o7<*WLYfr9A8P( zwY9hRUAH|*qNT{#;aDG8`pKYkiQ-z%6yqKZo818!DVLbS!hY=GMpj)%ir7>#xlD4J1 z0LPiK*=bl~PTBCvHV7CXXB<6M0{{@B9blt=-fnTUo&@V`A$Ifp$yKNl%S)awIhjpJ5i+AE1MM_4){5O^e9A zaYcR&sh8{h<12p$fxaVYoNsSw!`2_x;a^Yv3*W9FyZRcn zV$=H1`aJB{ccVpPuN#$>H7Llf!HntiFnQ7hj2Rw_;gO;6a_e8Xp?|En^_Y>c{|KsT z8nm>Dt+#VFND&|kI(4K?N1xpfcB!gih+T=Lh0!AXL&BK;65jGl@N8#IYNQ?C+_ zApXEsy#M~|`2C-sW+Fb2HtG#^zOteiD^`4m<)3}UabANjKm8ORfA~4lGfTy!p8&!K zBmR&}@^^63BEYj&e%Z#{4Ah1yR2)Igu)_YE_T#7A0VLmNnYwmw?Ox6-0qWF#F$M@NUXRRd0z zmtPB{1?g4)I14?p+4kg#b=A|2+S(c>cy%n(9~(*ugJ;&BXq(23b!7at&!wM}t9@ON z_|+OOh@|xM^90bcWay2Qv+=?J+gXE4LWfO04SM9dU^WI-KSfC2E7y!D)&{Jh-)QJY zSy3Sxnp*m<85l$gB=zE1((T~rhQ${z#S3pgj|-MAf|I=@o(33kIcr(iu63N{jNp?^ z(Mrs+D1t{%K?g$Kz#IRve2nz}zubof8F)(Hn;8oRu zmil7E9Zy0tk=god43N|oN(_qSS17Q>hZ66U=Vl;&|5kkSx+qusWz+!D+TDzz zf?RyHVm+3B`5jiTUX69DH{!s~cvMx^qgTtK09)1V7}C^_kw=3G;(r?t`Z;3Y5O9L4 zX83`s<*WbZ6>$YOm>dlBhaumJY?JPSm7qHP_!zf_IDm2Pz;f2VfkioQTMK*%EYYLNWN^-jGfxK7zet=BBa_9fP|wQ2h`Y6-uE zMxK-6+@#zemT{k+vw)db)vSW7FD&)Fk36z${B~;6Pe>cUwPflc>Dbi zF>d@gePu96&LQ?i_+ZHA;Bgd;CM#}{aq@-q4{pl_w%RspaUw2bZYMN6g8q6KLW2C@ z$N)9CacyEe>&ex8alje29l0 zc@Y)$y@Og*tY2Q7jg{Yji{+nxgSBf{;q#9_#Ajc8gDkOEgUI99k*_ks(l=v-P6a}@ zXv`_%IohML++4iy#1lwP%hHl{MjTe#C}jruF0H#p{MOHBu^Qw@-I3as3Rb4bnw{l$wnI@r`*$WXw)%cet7_>4aJ2;=;g7T*DhT)DMJh$O=U<=Nkd6# z6YZ~Y-YBQ3IV(8JSCR*q2Ro{;hk~1v1t0vd2Xhx)jo}gQaPBW){CDa`kb5iQ(+iMZ z=7?n%&w;0-fkzkZZbNoz0&+@gVdonR4`+KApV;WuPldem>N_}+kcPiK{R}R@WC+Hh#!Zlt7TAS)*y zc}&Pd!y@49>1_8RB|Zs?l(sa`K9hnpXxtUw^pC;}Cry{p{6r$HyH)N=mw>Yxj`^ zCLGPU@i#ZX%~4*7reIKUN(%FEVE=yPq5nUa@bTq|MjQ zACAl_OdTd9lLv}?&&W*WIOicaIF#d;iuj|4(b(gNKqlo*z4a(y5L4RF1#_>325T=e zGqX_J+)7+NaB>EUa&mC|&=DjbOTxY*ag-+yG_|!7S0M`WixCzXN`Bel;I93stEop& zWVm)@(r*i8skAT`nc4Yp^9zED$c%LHbfDQOvOojGKJMGO7ilS($fhimSJ%KlB$Npk z``1>9wA3Ud98JW5gU3-(Q4eQVdz6(IBQqxt7JDc7%99~9^cn0=RYf`cyj-z&*G?X1 z>pP$J?q2Y;K0VUa(TJ?n!#I|Boa4;E60Y##IJ+^qlO#>^T+`Koin2T$I-Gzk;w{L| zz~)_tVIMIb7cQCuUzTtr&pTV{I5vqmo|LA?rnR#he*OWZ$96x%kmEFt-?79bWM^k1 zpK`ly%WllO@@5Pj9;Ic+;FS>2FK2yHCqH6fb?|FqB9W`*UY?T4qMmvB2EfzZl{k34 zrws@9>_xYeCp>(-^qowzsTak$9K+&b_=bfTcPfdmsj&jb6AvOeIh8tCh@D3g8L-=7 z>4NzP@|I^Pd(hL>q4g$%jwFYAQ&LfZfY68mcX5PQv_o}O*tmKXlv^N{-Ebqq0{Wi{ zG;xVF3!YrbEH+Jbz9Gig4UUyG2rmtaZ^ z`={%7vU?49@7?bZmsEkf9{3%uzVaf>92bcnnE+&z^=je664Bjj1M=NsDWUR~K z9u!WUG4$O4;V^ZX4mTw+69swXPXP7G#YxlLHxhJErm9kW_tmGEHhUsU8XDmpI|Z>3 zevs!}?MRoYwTik|K)Ydw_J%qnCG10L25nYN9sGlWDa#IYAFW7FK8BLIHn@3sGlAGi z9n3_2c?H}(JmKl-VQtrpx~ei9XW%Q3*=OhGqq(aa?mhwVpiXqOHDcHPICPQq{(gS? z2{P?;dwY?al|voOMsQ>Vb*BdjJCCA^I`Wqnp2KAeXJELu316?@i<%yPOdCHEK5oJ( z@_;&+K;2JCq%2T}a|((P6&nkCEyof>iIX;=IIo=Lvao2$5)2z*yP;y65RqS-{`AtO zb|RiS^W3A)VC>ul2#TP8;uT|TShBkZImu}_M*X_r+Uwxw?r2>XSY(}>xf?m@sW^Hp z5gA%u3gGGEPhPoGh9%MHKuuLSj%5|WmwqO0_im)8XP~IEiu@%%SQ>Jb5&EVaT+!+frZ7q;4Z) z+6GMYICgpThdFuq@Cu>6I~w}l2bN|u79`<^tp^b?WdX8_YB6Ta6hwu2z)3&R(~Y*4 zR&3dQ2*IJDdcD{<_AxiR7M2vyA{^mMe*_b?Vo&p~lj4ZP{& zT$ua{Z6bdqh3PnWV7K-Wne<0(76q4k7bLdVn|pZ{`u^C@b(GE>{%1Y56TSfW1D_e})KN#q zIzDVuo;YixJe8rowiP|579=O_LsAO;esvxFd;pwX%^j@`6D;wQ5yd%3h5 z5{p$dv?55B+(8!(@7<2o-+lq*><#a*=t0ZX4DR2F(9fT<2lkJ4qnv*C$l-mYE06Rr zCUtS6e#=@{J!omH$NpW66_S&2DDDUvn_A)Q*b!9k0`DwOy*RRy+C!OV*{OGRmhjDuvImIOu1P288`I6^Mea!7BDy%>?>zjK!kdnNg zF&%wsRUN#2=}TOl(O6r7Bm1@^kMa73jXPlH7X~wBv54__QAr6L8OwNkdBEP%iSm+C z#2t)BZczy;7}t3DxWmWG`jjlmHGDH|3}f4a2lmqbNe+;Qf}%1+4H4T;^62}z>dF~o zrO2!`3X6&vm&fb4f*A$;7#oUxFnDD_Re2t5#~!3-WHbIMKz&y)0z-p!n>M-76Q148 zNK7q8Vty;Gy7YVm$?~=Y!DHYefts&o)Rd)R-HOk#YF`GXF1P^RbOOedF6pQ5t*NTO zF4}gnj|F8F=wXuYt797r>dT9;f6WTym!!ZW(3Lq?Gp3z$6~>P6f{VTMzl(mT9dEt- z77~tU<8Mzr&3tSLCPjH+%a-j(t^sDwnsP!;RZY2xLz~GAXWxN2l>vq>T2PXgiMS(2 z5D*aqdnfBMN|tu?)Z}3KcWbdJwI27}e=lylY8e*IpU+sq3_C}~6pJy2=Bd~O6UV#? z`}QBA9n3)*`{?BAMt%0y_=FxyuL-3kdDzc9Nc1#|w#a0r?enMaHwsy4VqZJk@ZI-Y zF=_H_j2k}!4ix16C$?ls5wWpH82261aYZ@pf}^J|JeX5yx#{gh8SQ=<&-XgIA?f&W zBvIEg=`&s2J>l)`NB`@=JoH>FI&U@>Oo~K&N-F%OFU7-u|0^zDFdH)_kAS;<2lY1} zg%x%1^YNvQiWQUi)r@?`7Lw2T_<8cY9p(K9;~H1Aw$@X#o@T4NM-%Y&baH1S6`yN3c`@#Bj7|v8`pkCFlIN6 z%`7(>D>ya>kU`oqbBoZ%7&?e!?7;CTq)k13I1VYtj$_~9BXITeVm{cye8QSjMMY}g zDz@8>@lHvxZJhIQoH2KNau)OTNSr%ojQ7OCwLKrhSqpg;9lRi0(aon!BbksFl;N{+{bt8-Mw1cZRd}*&7XuI>X z8SBNzA+NBOxqd4`q9gUPZAMHe&!LGO?M8Qd1LL@K#3v-_9IvLa30_{_aFtkqc|voN9(*O+yFX%TJ zi|nVLsAl}IaPEBgd;9d^F~BYO#LB*EpDx zjf?Mm0Jr?+GK?DHiA}o?qpsZ^V@D2Wd}xj5s85#8X6&JTtnO6slU$#tt=;Q`?v5tp zW*kLPSq=00FzVgFJP4>N$i<3pzrpkiE=I)g*g;Ef_1)m6T{lrj#dahzCX%tKs&9ZN z$Ht9g)6><;JogA=tu{FO1nK-;=cJU^^5O#WH}Txi!r}MU^B5UPNl2hgIFgh|o(@Hk zJnc*yB2PL8(Kk8K7B!Wp;)|~~;>fW&L^7ZB@$=R;QQm&*ALD))cvD|~WjlGoP{!*N zwT)AfSQ^P?dP*iX#pPhxwKpI(#FYWEEZ1dPHZOJ=}Rm?T9= zNJn)s-hKW_{Abl}44rWiVngMl8B(_uRfV~D=<(O_&llgswCR)JOHUTe(JLvh!Pnd4 zaNhX~5$!LJjEaEvB6067yz=;?C~4}#u<7%e06JKM7zQX5rgR#r^{dw-o_{l#obKAR zg+Y}YqDPK|h1Zkg4>KWIfx?t@WRzEboD~{grz#Vd5kV4G*+lBUswdRGE*p>$YR`Gy|Iw3a3#s(}$1N1Q++|y4!gQ<%yL0Dv%)qv8Vt4=i1DQ;fB z8|yc0MRIB?l4u-DXyko@L*dQDsHLVJFFf%WDjRB$keY^1-un_mf+FdJ*W<^vtFbeo z1Xo;sF2z*1{`2O~ggX<4m!EhB8`p2d1y@}QCu+Y0ZtYDCSoztP_+jlv%sT%hrTk!H5AL(E)BP$!lB^5LrVTcY7L0dx=_Uzcsao$EJQh>_pHZ-@^ zBR4Ob2|yl#Lc%aKhRLUc!bk7Cg&)8B4&5A{&6~F&GwrwzE(%JU^wUt1gbSVfc5cCr zjhnDz?Rw;u*JJdoB?x3@W{i#Sy@w7a537G%huwP)A}c)=g=vXMVq)bt{#;zVa1Q)v z589hJHXHU}&DzbBW6EdZaTL&@dItr=J3Ioe_Dov38*nrs72kZh3US90k(-r{0^&-~ z$iw36Z$s?xsGnR9QA?d&2T5yYJK6Z=8`3373ERrfE`g7Cpk9;7*@5!Z*@)lWa5J1D z$6@TmQQCQQHML{)S3h74`@Gux5*spNYWbub4m48Sg3v=Bkw zUKC*qjx%{!vuZ7pkJACBAIHu;`(WoDg4oy?J;|z{U7-_CI+lvP`;sto-Xct&HBsM0 zkv-nAL0j6Luf{2W&+;*W;>SM_Gc`ZKLvq4w2yvns4ghMtN-~Czq#pF1o=4AAz5hf z&8RHM!D~-Eif#LkVC>wB5a{c!dD0IinA#ff&qrUxH(zbQjCpeq5Eh_y+0bW6^m>t# zmV&L@Hsg2q-Hw`;USt-vW5MhRkoh({+QsIgWIXrkhd6kQ!ERY0;t%aZI&F=t=j0t8 zi-_1*xT!|G`P@_ZZo^@8_Bdn5rVYqQO(KsEqoJ_{!^Ta98}+NBxfY2>k7DQcT}U_@ zkHo~Ih)d3ahkpcyg!!YhxgLN0`;(}ssKvSS=jods_UuPzYdfC%_nX*zAP$#YehE}Z zFH#O3M`KSnF1+ztgn8N_%GHD|M+#8WDd#=Zyy1_FoAYBbeK(aO`LT zzF4t=3GqJU=P?PYZG;bPUwEL8o{&gkfIWOzG!|ZT4wJ@WY~EXf-`;fx^;7gqCMh~m zSzdwnKlvVumR^C&uDlffj2Qxbdr?r>h`mROF>3Ts21!A>4>C~{zk5CY@s~fL%_$J0 zr_H9nl?2@e;=3$pt*yYTPy7QbS8u?C^DahE0Qt=;hCUORSRUWL6=j_cn0Muk$jZoI zyDk_#f;^Lg^j)QSxw!P|JK#xQe2huz5sqIP1L;FYlMy;J{c1A(ZYG0&&){f82Kmx$w<13NC=wFr7ucVJ zM~=dWc&gZ54iklTQ#T?QtlB1RXq4yV{yq?@!cxyK6sExWfn5YtGt3z1V+YijC?8UHTd!i+WIwn(cDr; zc|U-p_;~Euz5~ww;RqW#oUy1(BDG?}n(g@Lqc2fdREQMD0pEZAIo5C52`9fOOq)6Z zKGXq;jS90;u!Tu|yS*18BEsNiP0-IyC;BAD0Ew{$PB^XQj{00vmV=L$e~WEJb?piXn`l?0Vagl5hxL zf3bpNl*}=PNZPd?_y6TF)H_9C?l}|T#*2L<$_}@7#;q?s z`FDJ==?KQnU51DNXDwse2U^THel(Ry*N@oABt1JP8=0h0690gRSOf?8F&3!9%J0|W zyYIH4rmg}9IX20ZpZ&D+4j#b>A2t$o6~*+`pCOC#mX?->TIGTYCS`f)Y4nd}7&3Ge zq9|wLPcs>_uUWYoI}RMcmc9EB78-&vqesC}CqZO1Z9{8qIsN<|eD}k8#!s2ZW^%T7 z%MLx!5+pv{(E*Lghw%76pF>fX6VkGBaA@yN?B9D7RTXXeX<2XDMF;k=xxSi-S)7io zjwhusX-viL1Bn>MI6E>d#KtBvlsS;88OLS9u%H!JUV1(PtrLa@SapK&MQL^pzFPhf zb{vYs{H51n;-oR~pkT=agTy{ExdCg|ZAMXH5eiDm;pFbCb=Jeh2_@vSOzG4LzA}R` zQ5N~{=IWt8tmiBrUSKTRP+yB3J9c9ebGLNb*~9zeQIu6mn;ykHfUydhZcH`|o-iCM zud)+`k@zx>>Fq);bznQ=zJvP@Fb`sU%($(g!vVu$BH=}!Zbx6g^}{!@eC1Z;*7o2a zZFX|fG3FEdP)k2EYT{J5>i&4ppWDIRQitud4ZSh5uynx`gnGC#hOt9uOD#TQJh5r> zL5v?i7D3W)#we=01#i9mIkxVK!))rA*q8@zdjL%}4ah1i#P*##kVVnTgWdg(kKVMg5q$K0x_di3Be+V^q$Z(!ggf|g&#DGkiOR(`)8Kdf3uJDgAbDMWLJLM-EPUoU4QFz?*4eiLKt zbmZpqnz;*w73Ij$IaLW3oHrL93?l8#Eyztv#d5|d`wkpMHuIpg%nX!Nlxm$eCJbd# zNt$lNp>^NjFHgM#v&T?O8arJ3JCdg-L3B?kTOB3YxZ$SX;&4G57GJUyUX%sfYMQ3@ z7GxefiXVPlYqXc0g+mASqnf!wXjClxIL1!)W^7#hB|iP=L-cez()UVEbqss=A3}S# z2gZ*Xh1RN4{P@jhNJ>mVQg$WdDJQfvRU$u=wlB8~(L+Wt#}1~BGVVBd0NXZh!J+*L zc<+PdSbEL12(*3}R!^9s11+@W+c)jQhK<{GzLd$FAv@(5T1|j&btGw`1; zf7Cbh@)#Qn4}ATY%SHt0$???VaR{O>xc0iMP*_rnwY$@C+ikZYjILXM3xhh6o1TH~ z8+T*+yv4ZSic1mf?#$~hNKP-uo`Y$aO5YqBD*7bt_M$#71vmWWX2ch_;^HeVfxEV! z1Hr;PC<7n7`~=qSJc8(HOA#J4_(QF=i~%;TUys=rUyexXjO3&?n`e{xp6*V{Xp+ti z_8mHkRQk7+gv01E)1JnThEq=qzJB-L*mxivLnbXmsJ|0qboQyY1E0S2Hs1JRBd)&T zM))|I(7^a|)%xxDmN~ZEyiZG_yrd+;Q6`v~FL*hdP;z{;u4D0^fYGsmq=~ZISb<0X zct4zjVh}ZQ41Q$Hl62r8($ce-=a+)vD1B}L?Od^b6LISFNz5%R@Nl(5V}l86=}SsV z3h76q^g{-AA_N5a9W+VUr{@EJ1aH7ZQg=N3oJwy}s?&Pi zVzDrVZqAsEA>c)W>_uRg=Cz zOqM*gebs9!V$2+s*J|MIY^zawzxzPlcgIa?`23622dlTJUMa46*k-%h^u-5iQKY}R z>&|;sTt&NT*VhE0Nm+W^)v9lQP%X7Bs;8?*nY&um+7Dk*6JjIO_M|*bS5I%ZQq5&5 zd-?O~mP;>DUmhw|HEf`}yIXblT69|m-96l?QP~MwRaER$HDuf+s=TR-tr&gl19PKl z&Dx}Hy5Uk4IBS{uYFk`izcr@LQnmiQzp1OQzFw_6n5&w)EUK%!Q@!=!YihwI^Hm*b zRH{RDSH-J`Zofj^eD4#gvZYsbb~LK(o3^SOuD(xw`PCZLX6aETbCWWeiq%tpeN_GZ z&L>r2Mv11otGicqlPKLT$Au@<*Pni_x;i?P+0>}osuR_V|9V`_xZqZmkWs6P$d9Fm zy=gB{J3e|yU3cHz>a(0yRVO%31oBlC9#S(xUDbyDX{v#z$ejUTjLpuKpQ$B70@bZI z-l7haHXCEp#|+tFdp%9+PuE-Qh2A6I>;?1QGO zyc%L`iC0hm^#K6{!#Y`a5;cop-83RVLL+k}REFDsAsEwQS}^ z>W4L3RAq0kYGvn4-IQ-(l%-20uK!+*2@O(PkL79^vT(fe(vsA@*Wa%G`Pj3nk^I;E z=e|$*Y`G?01fgt?H@Q zzgK0oO=MrI>Me*@^J9F~%FX*#EpdqsogRd0p{rW86m3-x{Qf?5#T`$mBPm&`!$cl* z^{Rb)53A<34)TEfY|K*^3=L7QEMKdtSyNcpR@1EBef%|b!&SGcGV-j`)T}mq^rE`u z)?3uK`;T$FgqMx#={H|dS3hu{$}TN2nkD}4KKq)}7 zH1yumMwuy8|9Sgeb<@p{spQns0XD-5v$)RGI9se7MY zsY)A#`fgQ}c1T^bWS07vV_nzQ$uX!_>wj3Th73DLC8tzyI7->xBkHCbm#GWSpRb07 zj8qrSzewG1!wu^Co9|E`f3sdSwTYy$kLG68RFkE?`r;#X{?bd-AO7|X<-SwP_=$31 zZfRF*-upydwCFOGRA-E*zTR7_suO-tH{O1$y645uR8~cu>M?OF6St}>7K~E=c<~)o zCJ4>dYWw%^C_9%Bb=l2-Q3d2hS5uwZ@ySbS>4JG`Q&J_{>{adg8`bZAf0w%B>5o)i zbv<>aLB0O;^XmM`=c^ri>7UssrG@yYKkc>ZgIAtb^Jb4xFTDD1l}`IAwm~>GkoG<} z<;lTt$|5}ts^aJhb?1$jseAwNqRJ?))HbcEu3n{N=c+o6y{_3Swyl}FscPlNe^I`X zv()i2tIe>G-tKZ0w|9%0IP($}mqeS#Dy_{G>hO-`>Ve^dT z)ap;3SCb>0)o=gwHYeX?P#yI14cYtD1NYpi zZv5+es*O&%J#VAB_x7t**s$5^@BeyRWfYZi3@X&y|F~UUcFP@VHwU_#?PVnGSI3i% ztIlp7cekj5{2Vp^{AKE{-#?;Sdf6i5q>XPqrY>A~o?3alN-;Fk{5gDJm%9F{W$Kf! zzGbYzzEx(Z+h)hAk3aubm1}{lQn~3nRY>ea_4I3BXuYv?w5nr2e5Wow?|k*ewmqtu zj;OjMSv~UPlj@egy{NK_N>octk$UmpPpcb$bECR+!BQ3K6|5Fryi6^ltzUM_o$9sa zUt4Vmxz0GHuDn2f`pTPX@w~WWpz!ei&uq&KCn>As?yb;mrhh~zx%PuX90-; zH4aOIYN;+(7hiC_y5o+&tD2TJ)lEBIcyzP6?#e6FjemJXRkXA!OMA9@{;7Ycn8=Ch z?~gyPa!Se1wqo_#U#?RZ-*T7QU&A*$13qWewHXDe{6sdKeJfdct zzesIkT-L;HcbQCT((4HVm>Qa+G^G25C5q`{k+v>e|TM$ z5s&n3Fql2#Y9Y0x7_oP+Mk}mP_kNm^3;QB$rZmZ5nx zS2M48LSFQODY&$JCpZ->)mLwo2Iaq?$T=vHEargKE~wN6Ti;Rohm+szOK3 zR{1pKqH`Us)oS;5@6ztvphlc`srt{FZB{*$n4w**TmH5>Kf+Jld(ZvqC}V6Rr?P=i z$DT4?x_jwd70S5%>y3MiTuNW2EM#w1_uX=(nz86+^~I)LD!Hsub&yoEb?l7(wWzMf zBK3!R9#EHFdW(ur&Qz_`!yd}=@ngqSNmZ?3ix}4y73HcOdk*OSo4PyH@dNwRZHq5d zk3RZORnTF!+6cj1$2coZEt$VqefjA|#zt~ZHu=M`@2pe%c5PJU)TM6X>1ZlZpDh1G z&0P8$_1(tZiXukaTcv({=q5EXY^0ig$wO*uT&im8>Qw26cB^wn#;DD^59_|lILR2Q zik<4d-(I9{ed{Y77whxFAKJV%E521%GS>L!+aGmqL7`C1MTgZz7hgwPOP|zAE6rHr z(n+(_SpVVbv8P{FO&y)8gR=j`UmsM9Z+T1=w_D@5qy+Ua#0pv5B>Hk zb^Tp`S7i)9WvnxE)79_)c8_|R@klF6YWb4!G?l8AFWjlZBZjDZp8iag)JyM9fZh&O zb99S}^7d4t7u=*$>uqDhUrVb>PuQm(yyXV+UP=0; zZ<14(`}1(pZ(%HB>FQAVN0U{we}p=mQPi(DqGhH6wR_VVHT#_F)hC~=RjuS(S96)# z^2t+b>1CIzS5_U*=qFcYt+{Lj0xJ?RaIu3ni%5A9D0||p?fLsl@ zuDn{^d)Mz(ZcERAxW`25TGg{}y`ZjVOp{l}e)WM+PYQdQRY~>% z^{=P@uBKC8UisvE)kfCv=)t7OlI-ctT z>P71A9cu2#Vd}>%yA?AR1NRBg*4n5J?%k|vDvQ`RlWMCkQlG#42X*1H%hV?a8GD*d z>cF@EQWso&h5FC7Jk==7p}+2`EKnCt9;5#H@3)Qll&&(hV)^T8)OpL)s)LC-7i_N1 zSKq$+pt^k7Me6IrrFuLH_CBX>y!#gQ>h?mT9>-Kwrl}id4^_**TtPje7K+ho?^f$S z{y~k68n24zXB*6-Ka}(ytA5ES(T1qbCbjk3FVt0w&Q}lKf4@4MkWQat<^5UM24iyh zETN{P7B4*h3f_6=6LdJb;K2v(1Qlt}ENoA=8Jjk2#ozz_91f@EuDcQs-hUhXyas-p#0qv! z@C=E;*P&D>m^9N@&uwpRz> zsBq)W*Tdeyn8SDR^2GERGttZODv{Slqyy4-wjqy;x;u39Q0X<@1j8xev=;=7u^Bsg z8m_zZUR-$5QVa?5HpXT!v&eK8Tz~uBxav38VeYIM*1IhPOD|M?1&$uthtQ$JFl+v~ z`VLY+upj2mnS{EgMx^A{!mOOYNCaaiPC#&ExLz1l);>D_{JHwc5leS3TAJHQZ!a7; z?jxtp!h?T(5JAM_AMB0UlcLaRYDao`6HKgU=i&lK;&34S4)WK5yd)mEJEA{cK^dAb zc{n1Yg0OY>A-x`eU2hZe3v*G}s&K*O*TJ3q*L^q}Fm=P+)2*LaX{@P+opL~6U%CJB^SXr(1+?lp6b{8 za| zC@W@NDQxIqSX2ydyyt$()8!Z)6Rz#Gfm42EbA)$bI4-;5QrvORjTkj@h<;++kY5g< z>OgBtBX%9k#GDIgL)-!p5f%zp)qpq&y)42_D!l*yCf<0c<)vWK^S zFlNu0gSv)V)HXJ;F?)SCzpW+@3``9>-rIrn#3OL@^2NeSE|fDQi&s=TN{e%_dEFZP zxOx*lU9p30dg1(Y$HHG8+q86I8+}%YUj)vZe?IN0Jo`XBzUU(OMTTPI?%iwA%P+#Z^;=Ni zXcUz-Y+O7-@bKRsMQC^+T-=>8E<6CF^8ohl`6d&O2`* z8+PE>Hfb8Pq|vs()dlxHcrX6?;GNjMaTQ*F?=vK47s-1ABFVoR^mKLOSW*Vs`1hXs z??ZHSsJ3ar0sa_2aySBfeOZb$InXDGk0zClS~s1YlahD5rq4MamtA{3u3B~pjhTHGB2eCsU^{YlpE?tH>)2G1GfqiCs zV`MRx+=6W2Yt&NU~#(DE-@7N8$ z;85z|IcTQ5q-W>ABQO|$ee^MW{ms{S>dB`tBzy?|^{;>9^DjQfmtTC0dv3pxxJU#0 zSX-Qp&p!PaUu@ft>+iiE5B=?71kqRYFGVo`#K+p8Z>VWo(2S&ny^Je!@$gg6>DN-7 zsH-ufhvM=}FG5~wCQ9=PHT@zZ)22?x6_;O)A(6sQ3uaFnjxOp|t4WadBAxcXxxELo z=FNd4@p$|AVBUfS`s>3jt&N&T;#nmS^@|*i`+j#fuDozIwyyXVU%vY-ii)cttKVvw z`ZYkvBHeB6t?={rMOj55dV4xG4U3jsh>%dmHk3^zaS8p9_+-6Y0%JNS?n_MTLLqgu zlNhoS2lvO(5BOlw!X*d_^3^^};stpc&7SQH;4-`Dj`6e4!)!%;B^D+ zXt#&z_~wCo@5HzvzB*R3jZONoz1V#;6^}mmBD}qgu?Zg*iR-Vu0tIQAD9*_z2Jr#F zgb9;y$L)7v^4KwqQ5>=CqVrH!+koQI2K0)0QBTAfON?Mo-IoWqo#>RrXUe{QBa*z3 z_+EJ7$ZL-FvQCs(?tZc9Zf(PN->tv}mn_57bEYxYb`({^h0Eq*%;*TLTDw(VzM&2( z1o#KiM_z;Z^JlOf7YvPzA|IS+JLaOrq+av0DNp2CCY~!&f zpTMZmG30>*d>Ols9X=G%;o&UA2E;Zpz7o9=`zTAT^ntdO)Ia7_kwc>y_yd*IEm*sD z2e#2hoV$22#*H3=`fBC?8jP4>@~kWGsn}Wx?<#*F}URN z%i&2oEBqMfGfsXQhb8P?Dc>Uy;oFPk)Ff2NQg8IPIY~+AW%0@L=P^9;5JIWbj`YWi z@sCFzhP#_H+9GRoo z*26PEWZoS!7hQo{Zvdn~Tfe;-OV698pJY40}~4==s`8X_e|VN5FPllupI z;aKufl1{8n&a}OZ$z_cL=2q+?X_x$rwwad=>EMn%dEvyIUGg72?%E<6GD_W5%p(pQ z*aPp_F}V2pW%?Cl7f&}VTzD>qFb_*VmZab65*@d*bH}ZB{t1((O@^zR3qrz!5KB8> zUQwmvVeu28th02{IV?}%V|$Si!I&{=IOFJQR5qGU(i>|3Nw!M!%iN6Ilz6=L?x#4M zoQG$a!`ySnbrLFC`$p*LWh`=(`Kc3byZd(-O}{MhoSUm7=AAo(v6(FK#M1VxBpdr6 zx#!>(DV>btTiaToXkYw1+|gK^kNtZNVAGLwoWF1pCef#~wX{IoxKYy{G2ec<8YfjP|o0{kdUEbHp- zf@zaR!`;gp*`;;pV&BJ3p3a=q9(n0U(aD%wf~V~CY;=&|>#x3?c@TX`b1hbX{~a#A z;#%gDQ?#sm2Y6xW`7?>z2YdDNF?l9Sp=vgRW5M~nx= z2axCTQ7Qh>%Mlp7pj_H9KX!J(lB+JkV~^jD_Ucr8@ZLMvdpHLzT`~vo)7kg{zDm(E z6erc*9${qI5Ip#gr}5qwpI}r>DC8-7?M!S{9b6C?IU2JUo{x7vWg>OcweaR?j-eJT z*)+KO;hMW2!ax4@M~n?OoTPz-0fIf5VQE82Q8g2?&DeV=6@PsE8BCiymxiO422xC( zlof1Fh#WNz_x|%4TyyPJ2%_B>Bnajnl$GXU;4!~@Y$%Mg^VHO%- z8|G7ku*)6J-XVD5g~##7`>%z>-_n0uzqG)~OsCHfTzlP3clYSoWevvw2KZP<;tgfuviKU(-SXe`F=0E$n4y_ShmFRak3ESSZ@v*BWXoVBm6|-m0Auz=%ka(@U(zW0=|_PzPb_Bi)K%*>5PR)9 zuwmN{tXZ>;W4R7T)6>z&V9?aoMg5`ir2`p0G=}o&j7~ZukFZE8j6GZ#U>E_d(KmZK zu23eU9{yIjwd;njKLY?aM^v|Vo?t)+@?-FxV}Lg}q^oLJmj4>G2 zwv6_TNpy2l1CkRC(qQ#q!}cAN&vjV8em#z)q@uB<1E$t4*il9clQI$D;|K2`ul~Ns zo)PtHkUit5f3F{eK5jH>g~^%la`!}7q`c~8rCjiE>+1z+9f0~I5CdG<*o1mB9cXm8 zb}&vHd8sd0#L2>2(B9qwJC8s( zhyV`)LoW}drXytJ2t?^uOsq1lL9iH@27&d|puUiJ&3$thiVHKbcGEs=+j|^;f9_ux zGh;HlZh@2B2hw!}ffGD~B5>^k58_XMeHam5@|xm6U>UJjj-I&d_jlu|=N>{-n2`t? z%~N+Qttie;M`m^ zGCcm!9}(kCjH;V<76=;=3t9Y7w?TdfjGT(}7DkH5gyglsIk_8OcwcPyOc)x_>*966E*Q>POS@7~Qe*I@0MZTNBHE;#zQproOk zG?PcPQ!>_KZ5@;&gD|nZh8>YBGRA>y{Yw-SCZ(`p0i*=&vq2+7fc%KDyb)&qX z819bF2=I2Zjs<}M>A3L|QC(S&mPT3nL*$i#sb>hHq8ao`8S>AW{y`=M3^|rSR@PVP zMrC;^nwnbB-qDF{;`a;;Mi_%id41kC7Fr(copCAoc*E6K;FC|kz=xmyfYvVKV+V#f z@>lx}X~TBz^w3AYE@_l*?=T~)s1@G+;fM(J>f?_=@riKq83Tk4lFQ4zX8Axx0k*DM zjkMG}EWG4KTy)_w_|ONqaNPPV1p62{Y6_lt?IT=t{(SiJv|%f4Amtq4FmUwEt|3F z$2HW6_1Lg}8>&m{IL^k0(&VO3=*VFhKW-cfkEg&?-;A!hLgW{gB6P%bghfP9J~)Ds zY_JKLFxYphtY4S)XoqZm0ll6>evLt6t5r>Br>y*RLeK38;N?GC+EM@Lf) z+B&;f%9&-g#E}M>tPq_uUJR5z82yzsq&u){-H&+t{r52G!Yi=!x-0Yz38UsI!0P7) z17%IMAwBH~Jj24_8faTvlfNF~4A( zmIbo(f#i2Nxy9({=t48&iM)zxgby8x@DN#lOJgU9PPoFy+Y5K!cqL|%$6vnw4z_OI zhX&KY$qH%Lpxw5e9>j_mTOWw5(0-1eaUT3U?Xmo=e__oJ-($tfl}OCYMZ2&-7qiuo z32D(9!D9p2W0IlTk)DxH*2Ey#FNmgAG*`}BbP2;Q3;@RMk>k(73$MI}s~1nwW7k)r z4>{ZO2*Pa-K88mgVq6V-tk{lG#{NYCUhy#5D#( zLWM3OryX%)0_MQPyOW7vUpUHZ`XABs_m3ci$u{7m!Sxft4z2aAD5-8jcz6VDy-`ii ze!E#P>zt{`&n-cZgowQ2=i`O(99zRb8oYI+vyy+JF)~?eHgzN6z&>nQwU#=#4r^C$ zMoC^dV*&XvgI*wE5GALgN}#!-h~xV_ju#Z-y1zV*;p0a3$GtY7zez|fhl^JzVq?Mw z$fNPswr_y#$kw9uuMQq4^RkwqX;5KF4&@n5B%+p5HR#ei*TgPwh792PdkB+WpUYB*;bU|Im zl?goj#Oli#_Sh#7kG=RR&Yd}xS)h$p`4JMOhV?*$u7U6fe@vSYg36k5a>FTj}(l=~msT?QqFAk3SYtla4w=n)tE!8M2EKUi%lm{cZ(TtlGkODw8}lZqNFtoTa5< z0N8sVdfNH;_rIURCG*F_eXxKUATg;=2>$ij8+iD>-@uDV^_a;Gy7mT?R@I}~Ih-+2 zxK+L-?(xTz38P_dD?xKhExK5vgMADeF&rVm-a1Z@5wdf2LuiyADr*Z*(WBFI^5ltS z>8BNF_*82Hic9hkGIlu3qEG^M_HYUaMtE>A>>Fy))7&H{;ppOn(8$q@wRuP#mVDWf z@-1Lcp!9bOk`wl0{hIF?)2_kh&6`kAn5QS@75i~ArG~v5Ko!x@Z*E^ykOY7khXFMB}}=_2;a?-Vv@$ z)@5;9sVB;4RI?>rY_x01GqU|~VL#;yHd0zEoFoMD@f{1ZXa*{Z328eM*p@as?Aj-EZ^O=AyK(Ty zK{$KZW8S&b;lnIXJ5=GDqaC^(_tUKt^@ah5V-=|Wv| zJ(BYZuw~~~@?a;n?mB?%(k2FXBQYV|6E5NaM8Y&;*5Me)7e|FMeYaIMYX?QC<+{+K zPw|>X`p>!!1HP_j?C=^G`0}gJUR8qPlx(zAw9vSfV#(4A;LA8es`ueK z6KGuCc;toG@z6sLp{6t+n^%5@XCMD7R;}8OmiB(5p-7)lXvBczlU3(zAe@ta>%PSI#L}zl4EYYJMTdW*c^KY_@Z3 z(hF-bX815nj`m`?E`7H{?u1AVZO8z}$lOJ#lmyN=b5=k-SG~PV@)(%YsPv8DKo~4s z8gv=;VdWiz*(LKAXv#51Sba*a%`OIyFwXo_whwd zO*6J{-HX=dW<2xMV+>||VWJJ};P`kjK{UWxp9V-i+|t?Aeo~wL>|$_1Tin@Z>f?!S zQxo2S!@IVit*sur_wUv>E)E~ukB<5x#Kj#%T3HLn!f?Mbfgn%(N<+H71a0J}*ne4o zUk63pH&Z71$B)0_$k=~fuDcUONyqWQ$DiTDRU5H!H~CK6ot}J@0Z=t%Qzov+aW6mI zaPw`5J9rTF_3fyxZpVSdG+cMh#c(A`89^CYxtTC;!6Ll>pO5j%i%(%xq#F(#*p0l( zPL4XqjQlb2Z&q;`_Uzk-J(S~}JNIMvu6-CC8-ZC<uInC%=!UCZFFs|x(2T_sYvaqa2lDRIAU)%d0EPzU768k1SDiuaJsm>|^@ z5x3Yg*_WqQMK;5NLgB;6^1#lG_~HB2SiO1!wor~|U%W)GQ)mnRG?n6uI$Du(JOyc4 zd6+$G2J3t42}h&H1hKh2&FEl2+A5PKKhd32flLf2vpw8gJvDEBE()VOsdjK?T;dK3 zlfI1`*PyDs1Gn9CH^z+~imt8}@~Rs?o~{G>NPby*DC@20?KXU0U%&c5bB{i`10Cdr z_<=s4-Lo*EIL2V>ctHgY#V26>>Q&f(@Bl3K4p{l)M%31dtlK87jTW?QOLaZb#(-b0 zY2PK2B38X2Xd1;YF!7dseKVuG4SP4N$Fr}#i&d1%t@Po0=)?2UQz1`8i?5amJok{{ zm@#`U>`hH5$x20OP9DQHCybdk9iH+f1(7=vA)@=+yY9h%K7J3cy!;$S#Dw9)4?jbj zJOx8aP3&-aM+;K(vaox{HvM6c9c;I>umBTB48gFdNOntlWPsKOL2R{Qaz%PC_EBV` z*TEHw&s&J5%yb;y5r;(*dORv~G z`UCnAt#g{MR-j#xbyp=%8Hj5T&^h#UbWl!A`ibMd+T4-Jo6x8zW43qzm^n7)UdpE| ztgM05iCtpbT0>~h%}lE77=Oze-hCj>+R2{zf!r7$(3TAb6g47}lR@4Raiam14+ac! zuSC{s>OmKC5Zf0`wI$WOGEm!Wys${xc3{ksBZZ~zd>71&s-M*bZ4(Sg->bRXu>ZgyR zDhf)U9z9|d9-%CM^q>FW(FgBEbJ~7<`0jg1D{bX<2gXG0+CB+78&=zn{zJV(TNMuN z*@YMXvl1&eA7Grhn>pMrl;&o05E(B2oQg<4nG`jAxV{Z30;*9^3MlhtEtrh?nRn~)VEkHM-K5vg+rvCoXy1Dz zNw2*)bxb7O^_NEl0qfBx^tLo3anAvy<<(;D1sB3a-WsEB>%M4>>V}y*(9y)Wx6}B- ztW_1x637pGH$;sZhbLZq7PIHi#PPU&`1IYk@yauAWw2>TJ1|x5*DcWJnNeQt4 zsj`z5+QcH*d@}7RNMuKD#K|TWRyHjW(gy~6PRvDY^~8ru3@`Ne(fPB0T23Ch zxVpmA*N+%AnfkoQgAzMR8)2~;ENwlN^~RZ19*uq&v4x|2bm`DOJpb-jSh4AVj-mJN z-ideK5%G zo(hCnePu`!V+A+zptZILo40R8Bjw_X>#oDtk&%qWm}}Tu;O^?!f0J7faGWI;?J&t4 zh!v%bMW2qcBm>S~$%8sOG`mg(ItS~cWgxJ#mKM}hmf}BOti`*Z{lFYyH+66;65|h{ zp}tb{LE5%=55O|U2bS7AR1~G7vc4Ld;?gkh>O1th=MwYDyp4WZ+)r$~6{KDAP;9A% z{<)XqeJW6tJGo7H;#E7bRZ@2#^92ST%EU7B#KMex#3vOYcGw7nhx)MHP8PFa6xr@# z+Sh{S)^_F@!cPOq0Ev+kCp0t=c*#ZX%FP+87wMFWw$jp!(G*3PCnJM{FnRn09-e@2 zKo2b>I%(fFtlzR7#m(JD@F;gnl?O(So`JU72Go}p;Kk=3g*)3-6#mNmBD6R3V$6ui z8ixTE)Nv5^Y*>Y(#}d%eLx#y|$$B}!NB3>Ux2ref?uVblJ0JW9Z_!!(^YMQm%%1_o zK&fHVj-GCCaFJKE8v00;zk<)!KQXfMmDyEmHt9ucdo(jo01AY;y$%T!q*Yt?psKOW zdcgp9H+hXx(bzFzVIU=;n=E9Xon6dhSK^Z-+W|v|jYmjW6drs0DZKaYyZGS!_wn9` z@8XRQU&kH4{SAWk$6bUvCwTjMptHAu3e%>~7@OhjWN+K%jo7e$1D<~2WxV_Thj{z# zxAEWu_aWHR6OO_wT~KNn=3OV`{42^Y(AP)dnQhaQOS%3s#-0r&OpkO9$cn~|NAfEAyAit2_&Eek@4JT~I$>xId)XXE^fF2!rFy@(G!cn|M>upDoG z@GV|=`ky#&VmMrN=xPH}(ANO}4kn0~$Zxi1liyy_R9@c+2Re@kANi7%0s7+vj%Kvd z*~lE34UO)jOoBLgA#%u2TsUJSj%->}RW#w8Nh4$`s!uo01RAGI@G-G8G3dYL zj=MQF@8Z>$p1_PrWAMqxU!$tB-HMA8JGkgqBP>i{CD|1oX?pbavu#l}!}-X*SrQ6X zwWS-{Fwj7IFo5i4;?v#Cgj+6I>`+%;je-&yMplqhPTuaAGJ6*0F&KI6mFFp+?{aKD z#akbIgO{Ft3QMLAA=y3f4D?4Q6K@lZwY1tRU(w3XMq4MHmX&PzW9;_todhn<2#Jh9 zO+y_@ORM#(L;^yyiHV}_l2oPK*g4b5Yv;x5bncA}4Ja$C;(;O89?q_CH9MfKi9xZv z14TlbXmbim%h4jZGzk_4X4~<@4?p6GXa9|N-v0payz?#|{p-UB@$v4vn8q<)y?V@fH7?2j?o zp_$v)_CPTDbT-h#KzGmjHF_-Wdg3{}L7ji=&DU|~Z*N3+keB9%4tDH3F=57h__^5W zukdXB1ORvGh z-4S1X@)lOp|5Q}du92a}uyZ5@1;;=FQX3Z8lYKqfPEH624b@8}G&LHp3=7B`-37(v zbo4O@3iV*6fi#>9wn^x!qGHAXZ3y#m)n7Y5nvjHXGp69G+pdRSfbn#no)sNVI;5ZsunOL5g6e~z=gC=)Z z7Xq)4}$~r@T2{OTCVo+@ELnvGmsTB^90`VEvvle~F)J;F-7ezfNFU!Yo?t1`1 zQQ@%b=|EvgHcYM#7(QYc|T1PXHtQJhgglKUk@{@V5jwZj9W zl*j`A+6w7X){^8yZ$k}UdiqI3Pnv<}7z4cZ;rn>?l^1c%l^0VVjAcyBGE?IijL~Dp zA;Qms!-w|huSj|@`G_4JX2V7>Y)mixn7kS{dO|E7xc6pkUb_y-$BT%^39jD$m^6Dj z7G1mquf6dm_2C2h-Vdk~@8YFrp1|y>lTPw;hTZDxfB}?0XAfW8bl;yDcRhuOu4a7k z@{8EIZ7&1qM*Wp@!`4dQt()$RNTjxSR*ce`J0c^8(Pr1sRyXUj0=@JjwY1&t{(*=Z zHe9!2gGP8DCb_5=7-}uCK!K&%VamO}q8Y8`0&HDKNmG(sot;${82^ z6w-*;k$B~~f8qW2UdKQF`g={^`b}E~m6n@Np0sJr4bAB6HWFe#=F9ktbjTm_V6&P*YP67au>wj5LfU#K_dKl|zV7%4c zYDYg@g?aO5F*%i!#x+|#*#?5p+RMD)%lF=d#lr`Gdg%?kP91#X&DU_pZ8sXPO|pzZ zf#4zri8c5~e-MUXJ*y#l%v9X}=!>|UvQ}|$J-++oL&PPep{ZkloneiUvC$|mE<#~p zv9^nHUhJfa2R(i9hy2wU6t9bg6FTT4NsPQjC4N$Js#cCeb7e99{ja~l!;RNXO{gp= zL}!!2=n<22eQOJ-EicCI^*DDaU~-9JJn{kl^~9gx%fQH>n&aQwiG2FM zm0$db!-tQfg~Me?suhfmoEfM~GEQDYqw&Pc&*8(jUcsXe{+_Z{i`8q^qDdzFtVJcK za*~`%Zq%`>HeTsws3<;+O_Y4Cy}4Cee{**y%1bLzT25I}jB%~}*5y?PY73K@hp$Ca zRt2oj?hFQr6I&awe#Q6LvL`|Nd7~}nJT4vx^z(y@sTnP`)mGgyqr1BuMQn5Es7VM7 z3ZoTe#6djz6<}Rk#@LJo<`XTA?TDr?FsQO;4&V+KH)r&8HKN=4bePoW*Bv3iz#$;N z22N`k$tzvG5EvMY`r=~7A96dJz#^?pq_eFPAyF~#rl0#M5TB%4O7+K-D%;%gueUzY zdH>6=yoBqnSVFy#&&SwgL1;Co5iouYkh3!RZ|@s}r(Sv+PyOw8jBVoZ`uiVY`@tkM zwRHByZT9SYSg;RyQ;U+ay1pARa=A-#Dt#JOV}{!n&$S}1Sdet~TV;`K5i)}{yOs6uCJJ&x_$j2+vz zqO!$0=QIcm+Kh?ee$X_^xb-w4@puA0U%Lyp-1ir}|KSIC>+RR^w?Ey8aie0bWd)ou zdUz~i{ay79-F*k+$-i)fjvfz-CYW@)yCWhxig|FZ)&&g=gc_SV$g6I|42#qsu2FU_ z+Ai4lb{L%M?PaW7iN;puXf~&)LBZRb`KQFDEr!20&e~{VpJ*4l+gp$xcLX1Qv=Rwf zt+?sdJMh-4|G)! z!A6t;LY5g0y(XCJYZ=HmGTAT>a3X{OqtMB~>f2A>#dFWUjMRK4+4`(FH*X9XJ`u~# zn}$%=R@Bv$qpG%6zdEpY{}D`NpgO`jI0gkh$lxHp^~|$a`TdWmqSfnDRSHsLb3KB+ z-QmriHdZ&Fnw>weV;`EB02o23734M6h%oq)>7@yK(I8HY?KZTu_azj@Zx`w+3h?Z+ zFXF{lK0qrYUW1tffB}4WT`|7;>N7m^`g=&tDeAw@LG};!Kx~vJigGhhT2PJ#CX;)1 z9Av=CKuumRqPok(*07OL7!nL^!TGDYlz@YqT4u<{g0v6Q`psHx*e&lTok7rjA>qzB8c>p`#0iPMt|(U5m=} zW2j}jxrIg8w&w`iy5z2*1h}VyJ|XD`M_0s7odbJ2GZJ>KMR9&H$D|I`HI2y0uVm81 z2IbvE29ffWtE!v~v{u!jqO2Hu4je*UayEMGWrXbz;^&EIKUb6$<|8-16y+6d*taiT zf6>uoX=UrgMH1Q?suAYzp}#aPa#vbZi9Ndyqlx+;PibhdbAp3UIHDqhVc(pCChDJb zy}t*B{EZtm0zRZ~XqhGR(diLnNT&h%F~4+qe8zM z{6Zpe`?3XC^ZC2@e*GR?e&c=ca57$p*4-l`lz!67sAK2kff)-fKtyl=lJ;yuMFoRJ zCN=Zt&!!#D#j#`YsG!Vx`mT`Pp^Y zdAI;qTy`J`<(I*kw%646T4g7CyPC1~ z+pSDS*1{`r1RiN_DY-mJzeJkqQB{B9u zXlMwoyX;(q_&d=?QFl0YIoa7bay%KHLxy40gb8{YMJMyB7pVyc@%&T&z{+(SP|rab zJj90Hw9~JB{?40t{l%A&TUOi8X?8R(xd4v#?)bxB|BlyQdkL?<@djRe^DR90(hC?D z8-e7#TbS#1Fa|U@*2g!(iIQ&((+n6`un)+C4tXQOjF9jU)HgQLZd4;F^*9nzGI&;` z-FN~?{8DUGAf^p*#P&Tqur{?CA?*JUKjZ7%21p}Du&&o-b#TLFS6+=Nk$zbD(L0PA zy7h$Nw6P;lo}Y(fN0Vr`>-E@F>oMrkw%x#TCa{%YjN;&q$dRM)*C!svj7gE$@y$!v zv3UauntSx*l*ARLHja4}b)v2jRCBbqQ5R|&P(#1eZK2(^3&8Z*3lQ$oj=0Sm85>rU zF42i{97{>Zkntmob=nB0w=VtHg^^g4|GEn1}R?JjRACsAy(F=Xs;M30EkSY&d< z+{v+(IFq_sR?>*g`_eFO+&GLI8VDB&Y=x^#YNyYgh17#PP+LrXNPJP>h&Fl3My{I~ zo3}MmE^E-(NM19+ZewgLaYz%B>n_HoE+Ii!u;hFs#qCE)WG+@s00KBpxx9&JhjNkNX4P}W8{N)DtQBh{iAJfuB}FW zJ?&O|D=b}&^xyS*Z2FgVphb1?z>;gPrw#cNObxl%_~mOj5O*AnEMV9g3x+ZWh+z(} zdDS|4-Qw_I{jZKczkHs;kdI~?RT!pIE{z)GLL^4D{Ojx=6V?4&V zJvF;P)2wBk32tW-bBv61{ZwO}wjGR}N-9x8#f}*^6cK^)RJkjnMomYUg9S%dET^q0 zrVJalyRf88=V-EKfILYrH^{?XJ)jt;R8-dLr4bUdnJZY8%UMLYiQ`^F9+26(& zX+}*A$BA*fpoCSpy~D=SA)V!H|-H5@Q2bcm$8Y z>}jJ>o3|A!*2>r4&qZ)V6k8M9A&O>!6a8s-YrPReQ*TO`mmG*o#FW{SnVSR?O&~^( zh()MN9TJahLm}f9;gjf(OxALktrD!z*<6cFtAD@??|p{w@iXzSf4_*)vC-Ok7&)U+ z4$K3N9NbFV_d9&_*(WHh?-?9T^mRm-cPb$Bdpl6ee7LMUkF->yv8{vg=P=BeI~}g> z&dj&0%l6RN1O>VxEW{Bhsfj3J9zHOqlCc>!JP1MbQ*j3lP}dt#oLh$7%o9qho9U;- zdk+A!35k2Q;F+hM!Mg2-(As$_8qMgcE}>0(1h2oloPN#L6XsIxftWgRH2K+pgu~lV zR7rj^7SG7cMqE-hMo*oOz@Sj_OvZ**+|)|_V$Mr`msi(f5A{YdmYg}~T#Zq306Wh> zgoOp5vo-}escG6@O1?yQY1IqnS&emQWbRzw(7>F{g68@<+KDFAiBFZub>_RXrq4oK z#$n_fNz(alO;rQp6LQ(dZj7Bg0&YIC7NF4=iStj@bA63*PHR&mBHX=TN4=?}E~cer z;Ye~Ox+Ewhh&-t7dNeoE7E_)&TH9zxJ87Fm59?^>#D0iBA^#aCjGs9VPcVLraB9J~ z@4ikSegvjYjSBLig{TP=VeakX*lc31B)JLmFX~`UVJ$kfveNdGmyI>$Xsm70OIO*O z*so@-q8aP#-Xq6_BZ__1t!_w`m>xaa~f zL@eW9bJOvw1}6(@I zY}q0w!en_Z0@w`bQiOvhz7-$7^E{k` zhGE!*nIO-!5i%MQR+-U52f6&sx3O#QLCm}SYIu9PS_AdL(A8LhghRVf+~$heORmGn za96nK;Mz$qni>=0fjwkqeTNA-c|}M`&d?6zraNwDV(w#YSMD|TqAW8L=||$=KYRkl z&OILi?hbI0Xvn@tFFcx>oWsPwnFCUd-1Kx*SC%4|N#V4`motf%0bx-x+`-X;Dmvoq z)GV|%&?z%mmc<}_gTmm!D8@ldvTA2CUxoGCj>DNo=ep}Ift$1O)KXs`<&~u-8oQ$z zC^dQDym@mlnnqt==Z~XPFI-hC;?o%9aZsxYi&0rtg|K0xsW9!h>T=>FE#96Cc!>Mp zp*SjZGxBotkeXA7yy_-i^F*k;`>L@z!o$9k0Z}}1%j;0Y06R0c7>>?v2n+JppN98z z>_I|&Jeo}QC@!v`GtWUuc|97~zqt#}#n9L=@~8sIaXaadjv_fN9|bk-R1|yElvlya z-2*`Zo@}WHhqi7*dVB)Xva)cDcwBsgs4#6*pbm@}W?L&pc-4!_yd0z+I0Ww@qcLvY zVgyoX^po>_z;bk!2RCm-#vM~}vSI*=eAA1u9KIx3lfY}%iR-`;jTg57HMD}o1^M7h$~ z7iAttVpcv{oddCW@dcO|8)8k26?sMj-dTq|$1_pgVy0oJLLqrNbmUO|Q3M%rdBxJ# zT#GNhUxm0DJ3RRJzhP9kvChSyE(#cy!~p9>BAY>NPI>{-3(7J6 zoLQK2&J=i&VR~+#fw>!vL^%`1GzL2545Bg_^f|f6*mb&noBjusmIOD?|*u8fSt zr1aAwD&K>qswy1XvI7|@DM-%DLPb*-x-1STFRg>8R}ex1eBj~XjxGk>=?QUYs%t_n zlbEi~1_TX>f!TKm7S5dkFDF;{d;6i5Ml3Bco=&hHnR;xp(9S5pFE|{YE>8N%fda~S zX;B`^E6N#Ml)=x{6XlgPn0?{Zh=~c)#$V?N8U`>+JmLUmdie)%-0k$*3-y&{3_?qh zm0gN%Ch^fj!i@Di>`l~r7X};g`Vs!z+!T~k)_au~4Y@ZKUAhoKt_q$#jYwutl2uA) zTEygxy68r`K-^JmPWta`!2(S)R9@u;IyEGebIPR}C!bb^iTn0ww_jH3O}lW*ij zXKO18Q;xvL&jV2-M(I~d`%SuW2S}zX_w9{GC+%8cVLmc)a?wHB!^1`ngyg-bqAX>l7Gc`#X^5cR91LvLP7I)f zUc7wHl*w>x>Bib`S7PRpCDaKY($<3x1|{3qZ$=se%hW9TiJBVy5rc}FMmW0qAtJ~R zzP>(aZs|m3Y6_}LOHi0!ghO$eaPbVq)z@Ev@Bj}usuq+LR$zBRDQ>@={+0tPAJQ;Y z6ys245k|~iiqVl?ux~3tTxL14%RA6OdC1F5Lq2VHT4D;bEf4qy1;Nds6ZK3=A9>_W z%sKB8{PB^$AS_hYfU^Y@HoasY%?vC{v#|MSAyVrcF?aSPj2#iF<-K3#&kT0-OMX61 z=rWm*oRW$<2GRxj^lwatdR@H`>hA+j`oF4@Y#iCQmG(ak$I}WJ+_%BoFATZVeTe{K zBdGTzGSJHoDQW4*Dr6v6R)*wb31~ArVBxZ>FqVn8$h@HwW)x){L*lVSgp4}}ljoib zZ+6g;F^+LZ+R&S>=0@$8y6jys^P;OLb3TUNw09zI`(Cs-`RTQHgFPIKRFg@MtD7rr zNeS}Pk{IyLMQe9Alf+%P=K32joH{j-H`0>mp1HLKhf>O6=N*gl&KaY}a4=Y?6Y|PU zJ9g6_byH)CvX7&N_A@`XhHZ2^QU`M+{J64y0&o&)ImG?N@di~WH6WSj$ zF{8w8+Jo5X=OcE^D7e`>YkTkB-H8qBR$5f1b3y$pHj=H8+7@mt%NzFdTcEk(5%vfZ821XH7O<#YQVC3i6O#MmuocQbc=EN1AG|Zc{v* zoP4S88N`)}yo?lN=M=!jCmjB?g-f{gb_78#>V4 zQje{xH=wwr3Kv{+6QZI5wf-2>xwf)a(5Gztk&P$Jy(pmFPa!Wd(ld~qorwg-qZWH_ zTzT1L7!q#Ckd$_`qa@IWr87RxEo))y+Jxe=8hDaNLDU^T59&@qDzZu{88bGbpt6Ft z!w-$64Olu~ZY)LilhoUeGRoBM_;iF#Scrx5C&81~^kq$_{3vwRm+KFmRrdH`!Nu2M zNRT7PP6|4~&BF^JUXD1-M82W94LR9)$l}PyR^;jzWW5Y3i`Xu-;CO5aY;ZQ~a8rm7#7G8n@zaO6llllWTdLNT(7O3}i2EhLaW!p#NEb!A9A zyaP!`6Lj33%VgNW#S_&`wA)NwhzXB`8|{+l*`94iPL-WYyG?uA+R=*}e|sBqDqGwt zx2SuVOQa(+E*^oSCS&wD=fU6Io^nhX(DEv>AaQ#S_N`mTel=qLRW~4t@t2q`c^XUn zTO#SnNM~-Akx8CsAt5Q9bUWjMOE1IJ$z$}5Juh!xcvMq}E!3lZUFhrGDGICSI)k}`6UnwHCY zu4riNL|$PTMvouCn9u@e#)f&c&v~WwXsvHVX>KY?DyvaZ--)r~#$(##NpNPeTzD)F zxh7{!IDaWdFdlT%I>7BjP~A*s8*qT~>=-ctbEl5gF-^anCs*xUnA^5s^O_HFyrdn= zZvQ(bjSw3men#$A^U=N#&NVmPpkEnubZ~*Mr!yQoIo4@u z$g6CkugpUZbKj!sX7~h0z}G`=3R#%jCF8>nKg7fZSK!Z&KaL<@V_jS!zpo3-&E&0= z(xP12?Y-!6_hat73^Bo^)mp>=>9`F}V1PV{M7vR0R)pl#WF*mMWo0oJpnmn(J7C`Y z85lES2=hI8ti_sBb<}AeR>L^6j5$>SbE@HEhQZBA(Z2Vhy_qp&LOMFzx>1^wk50zM z-eHjlWF9+fhVfw?ZU1^(P?UHC2Quj!rYykBNh9DTUo9O7mR@w$l;VfATi_No8JAqj z_=}3Ir}gce;V(~zcUI$gMh5v=Pkv_8?j|w*kHCeOT#m?a^4-pi)Wp5mzi$UzJmkYT z#YiL1a|_F{cDV3EnbtlSF5o8budccrMb;Lz@U)YD@~ zqfVsdWHX=fLkWFnb6XE$Vi}__kMQ&IK?m(hD*IQ{*no_*OytoI51%j%^A^rWfR_cW z)Om^drp;M`QA1ogz8s654s2hy5z#XiVbbK$aCPW{#biO;-el^ev(}rU!aNif6~M_q z0yECP2$2kY+N-i~;9xxBXzNl_GiW~>DU*zm3QOP{6ajBvFP)3qi_M}e2L$+I_PA)2 z(6%KdmEqjEbE(HP+KhjE0(_ZEQlFEP8M89yD=uMY4_8@A!mQ^pvhyoZP1_%T@F3dl+;Gt~*J8wwApN0S zJ>rzZ)`~p(@?CHW9gC~3y%b)&AhKept^6wHL8VAOvI{K^{up=um52>C@-YJ}jQ<%k zAJ~l^M>pmGO~|M3&CFsBP*9Cwk^_)<(SSnou%axV<6Dg)%1n7#DN0*iFnsFym@z5{ zdOFvxo%ylCd&{@6emfrg)9(-)>8o{1bV1fA4Q76vO8-@y%bcU40x2oXahOX@nK>H^ z<}vSQeMgp)w{Oae^BC{ZkLTpjFEydAu@%$Lzf9|nJBgAtuH8A_l{MAaotOoa$%%19 zFhSJgTa3@mfG=6pc*;>g(&#E_dm9;ob-CgNv^}#!sBY#LT+DKan}QGf4@H z;ekDxni|mA*-1h6#SqfsDUQpE)6>Hb&P-?)E}Vx6<6;bD(pT7h%7Vt_>TNG7R@f+&nrp74QyT++A5TmiH;Rsp#T=b5YScI?Y#@q? ziZnf)@)au9k6}O`%Ann774W3t2?>orW2XfztsO9Rwj(k$7(=Lhj`sGNpML&fsH|(& zYn6or2I4n2+yMVTUyL6&0s($r`t{rF+yazVG{DU#7!lFK;UeqB(UHeShB0{zqye=@ z!m%Wj(-?MgtjA83FVDHb)YZ-e!VZ(hjnQA2)>M%Z9vox;kO+*OJ`1Bp57)tqxc0AoC78Ya{Sg=(gr?pe{c4e^vjb7# zVS20`NwYk08OcCv-1zZu;z0EErw{Dt5G0{WLjwSzKwiJ=dy`*bltsBqBTpX24hcsX z6F&L!;E3TP(byzU3(D&w3itf(UXEoX!el&SB57IUYht)~+0eL2q&Mt0Vm^gVnX38Wr?UkXYO7myt z?AeruVSVx>xaDi|ZXW)$kvyZYFf-7yo^3T7*g*V-e3zyE+ ziyrqkEza2N1qx9PjTT^t*cV|jgk z%ESp;j*W4ocGD(!czIzIlMrv(&6C04kZcZ)Ogg4cr>wgmYIrOH{EaWgw{y(mk0zpp z`s?ZEhhd{e!XoQnnR}_rL5Sk!;Nps5)WL4rgxZ=KEq`uq9=M$e-jL`(eN#+Yk@bCK z&6{%=yt=aEa?Z?F?7Rapa_l&Sy7izmEe?B9%hBi(1~&&2EFDcu!po4fZv&2H)nVN9 zB?$6%(ogP5viXNU-v=)rqtB7Q%>d;{P9ti&{cz=lb1{lE_1mg5fXzt~oV~oM zt7GBh%pk4G=%a_HCnk*_hsXefS2YZRGBR^f+tkT^hB6qQL33q7H~SDyofsKAgy`(y z7ZgOj7>axby4~cfueT=_E?Ug^VvIFV6JWb++nb51_6v(HMl^k?ta+(zM;}lpoEaPk zhaqa@7|cBPTzEQBc6deBHfrxSBYN~$+9KKsGRVL!s2vd)Kt52%CNTMQpkCQg|K^`F z69L}tgJj*-n!JXioP98qHh)xXi2l;;KuYux?eUm#lThE%!DOlh7RC(c%|8$G=FOvS zdt>tCv2bBvpy^PpaIDM6-t;iVY1BF)YrgsW`5-n*?2uS(11$6(u3m1KFl~aiSE7Fd z+Y{@p_t=grb$8T=G4Kf*0$;yCLHhet#rc331_grVfTg?7i&FA5V! z4u!93!l5nO5noI>i43QWX<@!q$J{>$`wzyUxY31nc`|LKg+8Q}autIm7hle?H6~Vs z&;9)qkRJBJldT8f==jIA0#&nZr&c$@foV?;_ z$Ub|>u^BOHlwNbKxU7<6VRWJ2tSHpZb66V9G76BjIALO5vPe;lZJdZL)Ztm=6ycR?q zilssB;@ET}Iy{K+Rgl&<;itYq!T)|CA&g5VQi9wunl@h^#t^#XmDW*X#=$=*6s4u* zIv29%SdFE>7#$-YF@pYTc~(vy%Bq`~Qw3n;m`U_!uITA%h97lu6zz%&X_vJ>Bco!F zmB*ZlW8>xFrE{t=qnX1HkI1O@>-^{G>7(QJiL=h74`PhXi-YYWfwnG~@nax;^#vDQ z0#EYVCNr|8ti6*nQZus{pR_ZsW!^#>7B5|@V>Nd@&eDI%%-9JC3uFIi=i4Nvta!V@E(QRSuKL*}D zUg$D)=ry-SDAFPMocK@w=wX`A zA+q$-30)ryjJq8?X}?Ed1a(h*O@DFOvq!I~2?r1CMCgcVxcKtx5$-KFcf@AMgrNmr ze)1z@ZcliH`k~#_1y}a@rdw|%&PeOnQ1@t8hcXXRPVOkHsG|R49>mRoc3@a+OrL+L ztEJr!kH)Qc+{66K@L9gk?R2Mg_MuO1^8-!NQo;W9+{8Dn@doQ44oj^wj1s%o2% zpI?lQ&K|uq&%%WZFn{iJ`j!Ft3&(%xu#xCA%d%hnIn~su6R7tTcv)+A>_k-4mbLSG z5aaNxuDu#zxPswFvYy_9+jkiw_e3-AF%eX!^!cRXiF$h@3#htm8mp{&#zW`Il zN9#!@ktz9glqce76J@EI^2%zx)_!nc5Ov~O3?CMvV|RHP`q=R}{b8IT!zRFFvcS?~ z#`*NU7cV(qml0c`aWbw9X1w4>+tc*_QTG=Bc2!sVIQ)#eyJaTso&YgOf_rc+P)ZF- zp;Vx>KyfH;#Vv#gNr=04*z|@&DsCI2s4Cm?Qf=S~>tN7Z@(-R{{j6hj6V=DO? zFzw;QNt1Ea)z=`<&rQ)lo|>_D^~RJ*al~ww5;f@}o!-U&a4wjvSwC z?)3R5Pnu3!?vAM8A@tewDQSD^l+m>95{uSWRw{p?lkx1u7tc{QGRV1$s_JUSx(Vv5 z@~&RKn8uh$M&8}g!dTFQygW?tv_IoB5APt9l$E1}dfS!!f5Xi;Ve-UC?XSs{6?)-L zKhl=I@nu(CkBOt@?p@9E0!14BI;(d>QXfy8#W-&Uyje)rPJz54&+^%`jc!{fv{OGS z`BLL=$27(tZmury@%5uE2v;YA+h|8ZLc(zA71z;cn#>p|5>Wvjs&AkP_Fe5*yX`ba zPhNmqmRt%yDs`nFMUT?9d3yWNe;kLbj7-&!yoY(}v}ss!(^YUIqssgoNY7BNRV(#= zZFMc<7C+p6=bZ?n-c!DPfUox7q+QD0m~oR(LHX&VFCcM~2iIlX*pZYC8#Fh!DLr-B zl~+-}j^LVF$fku)E*BHzgr^(hfUh>5R3C4={xXqeCetX2);bHMh_T79I!CHHGIe2s zsVS9dbw9!&U@p7vCG!nz%cv6-GkMbP5aV=Y_g|wjJ}Li4f6;HSv-NMz5!FqeJu&B>J5Oo zwq+fGI+A|Wj%A*%qaFt$v_h6N>-q-FIYwVHW<5sD`I4jm8Xu65_3z)B+(o3N$>>wh z-Ljl&g+J;_pQ%mtZl(T(c&z{MSItrO%zf(d=o03tay+=KT9s)p#px|y`}NUEHDazMFt=seGZF+ttg|(&M=BVQV$vZ4^92^EvTL2$2dgYaJDsBG zC;P(l^7S>1fqfX8l-3jx4D8NaPU}zUK~~17BmL-JyJ*;>w{6GcPyG|Gty+)i(->?q zx6cs}=%&;kfgY>gGh?u{{;51t38pgEMf<^2(`bM&bep+XQdqx?7QEp&I=90RM zP(#p{X2IL2&Bs^&ehTZ78*%?D%W%acZ+MZ0z3q8e`tHB+;?h)n^!))`JkzlaGSa2l4(7F|ZH47=QlF^%xWGZZ3N^s6J$j>c|9rVCr7MoMNO( z%3u^UrSOlX4jEWZU`{pYCa}&o+G8r)(;kCGUy^Mhn|&)@Ae<`&OoxbEf}e_(zhj{+c^iKgOXz=QG7l1hcgyMx)UV*Qti&gM)Zm{e{b!?H+KhMaw9+5sW<}EWY=c_$k%>qoY)ER_T zwgJUEQqHj8MmbY@UoCU00UI;|t;P7@{k2HT=)m9p@dreXFnyrQRMp&qWle$6?qFq% zJadj2{mW=rwytQZ=EXhEU#AhKLIzK3KHxTB?i%JaDQ6t0qdxSt+gDKz%q7Ie8QFd9 z^p({+=F*lahK3TD$7ew0^vRA-pEL2i0Ha@_07w<>5+Qp3#S);aQ5eb9t^gU6=*q034M>sU|5tYd)gj&7Wc-iNQh z`VVF=x)s;m_6ztqfqFsmDHEL?xcs6!aL4Tr;Gw_XZ{@a1D}p}%(bSjzWhw3$c3Pl! zC0Bm+kwvx%z3A#_M0G_zPMnOx+KoFgdFBG#^}ufs;9;Zmm2SaQ*EkRiXv{KGU^xN} za}`OIZ7}{Eq=qDMcaR){sU{n8R(++~=vPKHs`K=oP*W1cN@;3q_qH3Kef2I@|F{|} zS8s=hi~jnrR8vh5X5DV2^!3s&WnjL*NEMu^oIr9^s+yOXYHF|Vz~B!P_wi5^nfnzjK;OEv63NlW@Ryg@;@12Aira6Hn=njms)FWL zt)Lq*{+JR3mgRJsK{xAi0?V>STk}B*(CIp>q6!fYnW0kjE8lZenX|OOTG-kx|~tdI#n=KswMHv`I2r{C}y0k ztU1T}NWeWcKg}Y>CNR%V^7XmffpZf`8=CZ%d9rV88m1$2LSMc`k_mbw^4G{Sm*JlD zXRM1ZX5RW>9Ow*l5o3;~6luWNRGR+PdxYhVsZW7bnLe0OjrORBOesUcIPoS>dlnT8 zDro2ob*!8YJqPoO871^)4PDMUUz7oZjm~BiW~U%MB^zJtNwZ4eRtXLDRsXmbtIKda94U zBNKF*aIWTFLH{ylm~+f+2}O-P-LKHI1qGx@PI@}BGX~NtB`X&xMHTAZT9&Pv<;~QC z%2IoJQ^yuC9qT`muccETOKS!lj8Pgi8d_G(;E|&z#?HA0^QTQg;m%b^rZZMgwv&4( zG+4%hcMTYA^{s-j#-_5SRJCd7BlD_Rz$8Crn97(8)F3qoi&XhzNmvV5)l*}$q@PsN zC&%U3s%#%5Ri8e!6J%YDp8n|jQH47_ zw}7b=qpg8-lL_}v)38c4oje+%Xg`<=S(G>ZG5wm44CoYtCdMLImoZ>Y)wT4m0q$Ay zwyiBZeSG2WWRKc{EaYX1UdlpJVm?yKIxue9BzT)|R@Z-}3Da&LG8|oFU!f$T;qwY7NO5TyCtzQ0Mk9qYuP_bWn}fqc|fK@u$+TC8ZUkr(cN6 zW=HCiz?M)vsT0jv)@g(783WQO{a`L?xl2eTUC}&xZZ`e>Bg;Hv-zwDt*7*jEwKP(7 zuktI$(x3RQa!jRcXNS;X5pZa2Kv{Y=a>VXrPRyEs*x) zRSO-6IdB-8*KS4Bi1B#%kw@U~>CzWmo(X!K8nSP#pY&ftUs{)zJVVaUBsW%+*XY|Z z+GB!+&gjeT55+rDPM5JRXQaqx9;;3}acm2y~ z7^ymkzb60KoJN=q%xQ8k)Z8u8q?~EfM;3WR-;M!Ekal%EWG>6!K04_X!C)!cM=>^? zV%|*XM|0y|5$jZeDW%`|tzl7y+rTmdQv~W_IsY0tiVutgBau+MrtukTW0|j^6Z(?X zcn1_xFqhPuxn%zw`Qb17lzpM5N*fa9Qr2bL(bHam%Hm8U#YSV#p+xwOUW^OpUIO2~ zyK>D?E2YM74XgE2d%BmtH15d0v01LD)HPDfkmDY-G}Iyb;6cR3CSuYBi*W7r*CNov z{N#zW&OcBVDjNc-!&tElC^pS#TcaR){N6LoCo+XX?!Nd~->Z=4J z)q~}cN?KpKKxoZPA6US$jDUL< zXPqsJ%f98Y8B86JVU)I{r#aPt9!_MQFFl8bxnlld0&gL5Cok3vAGZ&FP%L+0s zoo1b59GO!L=oF!tp8L=x=@&7!sxB+Ry3I#$G^H4i{O%rHI&ZA0CiT|82-E&qVBP${ z&dvszsIds=U;Atrszpg3nAbnmQb1)P2G$q|%Qo~Pnq>9=4hHWi$P5P3S3A(MWBPkW zNa4XM$o@GrU^Sk8=$AfJi^j=7oj=rghQgW3cc|y`(=yA@>=24e=TBEB_zXc%Fscez zePEALpc~z-P1vw%C1R2?)jKNh{Kb8k6s6x)q&cYrtyM)heB=ZU9*se2YN~p#pZrx@ zS}Ib~(vg~3%-~`id^{Z0t=eYD!NA_n1%0*7LbkphL>d0C7+1zHNYmydIf>he&gLep zU$q8F2}wvwPbY0mbTVxT8M(->ZbqQ5FM@qM`Y+QNa6a>rvX+~M_0d^C2gbqO4^i@! zvy{{5a&<%4#3^v?{|InDSl)dqIM@CLD>wLB3dPRC5SBRC0a!q5A6;?q>XK@i)BkvI z=n|G?g=gAPUs{0eTejfXsnf9WkHr1I{Vjq$9r`^EG!O%MXK32=UzamBd`3vcbE(|F zoW;0g_gtD71U9xk@Nlt5QDuvIgs(I|4;iWQKEMpvIQ!trB}*`A;xM?;|2`9+YIMP- zvk4UqUFdNS!xfh=!qjnL7LVTl1XwPL9H@rQp|!!6?ffxleM!_Amc#pRyHYsITK3Nx zw5U}Za?p*Ml49)JxEUvsa?s^B3fEqH6~+!T-x2Xsq3WI^ttET#BBq=n=5CcVcmo<| z)03t|W4Mwq(4UhNhQ09i^k%@Eg8G_fWTYq4E~nE!D1uK=B(7U>Jw^>PzsL2==wgT6 zC@Uy|qmw(9-17_C^>9^p@W8|j^Ej=`4PM7`5iCncx@De${^j~(usS~#y088%YT6s) zBb~b2keOA6&TePSzF;amyyOzknK3{QpAXKrnhbN0I_KKV`Hxk~n#=#x6|v@VgWlF{ z_s37B>3;#sKF>DSA;&kw8fh4F7o7k6&cDV`XhT~~A&wtDfdl)Gq1Db4bFaPyQ^wNH zQYo0@4$&RJfjuWMdGa(&m_Cm4Hf8nAL&nvAWCKS!aMpwAK24v__EywYRU#m46y{xV z1ERwG)t!ZwrS-mf41@Lg-(t`;9ypORKnnd_$BWSEC z!?6RqvG;H^qZAk1c!)Cl8T23akg%N3 z|K(f;4$xppYdJmhnDopo+R_OYXoq|jJvE~Ml^q_KNy9AfIy-Z#{UMEZqqD6QySDFu zd^ta2+%!y?G^)?o>c_6S)E746lhROFQYjU!p*p4duyevC7cWGhzh}P_hG&NE!(3s& zRR3!j*K~BNKeg?%k4w{2-r=!l#~!q`b*Qg)$|)gBblW<>&BqVZCXQF{8`H3+ff;(_ z8b5X9KNZq9N8i-#r!E#r(xKrvFg3Ea$-r|C3oflcI) z%a{V|cBH)Er_Jd6PKsFcHOPeel77OF&QxXSJm$#L2J*3jo?h(UeOP^Dpzv_=2|LcdYr2e@~M_RBf*Z){SgxV^xe+{G7fSPAP ze{Vv!Bskxe1m?Q^TajZqVc3ey%wjY&_F&xjQE+pYOFZX`a~z=*JAZQgw2A*eg>$4& zKm30&^}+i*^Z5Jf3_0L)nMwaX^s%mawz}tnPBhn5ASp2oB_&me8b1Xi>AyO|*Q!lJ(9$Uwd`$gks*m+tEOX2$rak#!iP&-*$38o*blm_9 zZ0r9`oZC1p&vu;Mgh}CEBoSSH;#W=zyGJ`qbGONqq(-|G0j=8pA;|({a??=prhcY^*v|{ z#&mvWoIAgqBhGW2`c$5-B>x7T`#D2`e>rg=Z&R^+q(b+hm7AfenWmf!%D?GR0CV|) zFpqMuGSZgT4(Ils7!4_>AtO82TIUMg`p?p`nwvOdTIm`%?>3DITUC<8|6+tn-Lu{~ zRULEy=LDmsz_RvFZ&4t%P2>NGEu0am>2n#Otg0ARkpuG^OXbLH4+p~BvS}Vlo}`AI zI*+Tmm(AEY1I%q1=$A6Ez`23-RQKmwPt{U2P20~5qv?M5x%9C#FZdsf{C|rf=yGNl zvlaYYGZraeyXwlf$*bV5F}%Tgv7)&;5L+benB_`vPW8_R#-M&i{Oo?gc|j&*OzKQy zFeqxRr7Y=a>|RIiPG=l7ZX#{Ujy zn}bM-F#_|s9kIZceyGvP&p?I$KY+z-Eod+#&Q?a{>o$dl1!pU6;A~x=JET=(pg+A; zixY;%F#9*g^xh8j1)JQw64W%b!pX-EVUfe(=>UsIHsnf4D{8B&k)2lx8%H;UM2>*J zxBEFh*6~we5aH*5Zfht?SfhWbpK~1Ni^2LHWEQ<$=xWw?*>$`4VC)$ATC7pxe>eU! zw>-!=e*(^L^gmCZXT(_sbf#>5QgATisMGsjaWPUg2B|efFw@>zjk3ZtwA*;YJA5pB zT=Z9*G|a8)nw0PEW)$QXq1n*~;o+g`4oxvELt)4fiLvZxLRwk|0;9&k&C6rRyTi== zYnYGw=M2P<{SOK;oINP6s6tC?8^S|E;o_*jcV!LTs_uQTmYLJtj_RU3bl7{sBPdjT zxmFqA^Bj@Ui~hBQ(T@B*Pgi4LV`qTT&rlW4oPS( zx>~DHR8oV=ns&6>I>64|3!}q=5#a7>QFE~VB~8Ax*M@@Jbkx?i&`EQLtB)5VMnsZV zY%OH)%+TXBVDxpic@B-iMtgp+9+BYOM>SZ<^PAJa-Un;p+}ix_NAvUJ=RGN9rj~hL zKXryuubn*IfvoggG&OaquNe2(+hN?K321V=Esq_CWWW<7@MlJd6=t5 z*xNfIFj#*vdhlk3==Zd9viomPcKmo9`0ofw^ap5rx*%R+RCUUCnFoSP7bhlcGf(uv(6nuR2Xu6gyhL4>cTpo z>)g|hl9F=NaGhOUUHjJAD5vWQtZNRnKJs$b6<@xKgQrpuI&Llkee_G(6-;Z=S5rOK zSc|Ve{}iW+nlXLWRQ0~NGs%e59lU4Gh0fjcPyZbOqoyG!JXp2U|J0X3CxOw9xxK+# z9t;CYnh9kA`4)3GHt*blgU60x{FpKDbklFWADYMeuaRh}cGOl>VcVJwD4_uk8981B z3f6QQeC*c!Xt|ab)pnTrv8E5#Lv5lT^%ZqvT7$Q4!WlFB!?HDLo%53~t}#v6L;BPu z25)rm)PXpo4l$DZ)@)$8!Rs5`Bcqgc5$L;F<;NJmz_KP=;;;3L%dzOfnGe~+Ku%Fs2>z0Lk5jdhfCkqJpLKfPcst=C35rLwqsPwiT^pfNCI zC=69Xr|Uk|U}Uq^ev0Isz2)A6Tw%|=>A4yMkU2{#u;rQXB*tB62T>T;t zK5CS@s5zLOv8=0G(>Pl?Nu{T*_Dys%3R+es`W>Df($0BT`f2FsY(r6T0U8<`;O^ zuyuyB=|YUEV%2UJ1KvjL+_WDbzr7qTKJFMXX%rk-M)C0=eJGr5oz8h3Eepwh7kZm& zv3KWo?AyB!dk)1SA-f(kXU>MF@s-SUA3fVcEx-`?G>l9x@C5RXl7_&w^)U_e^s=8_rFaOWUT!vyvy>YkwKksb*O2|2Hu6 zrHsWO!ob!grpnI9f}N8SV=D8z0MCSq%2KS~xEjS}FRs3cKBaDkl0cmilq&m-x?zA7SkDc?b@(j(?<7A5`CJ z$CR!cJ@aV#=gKiRX<$IbE_624W7nQN*mK|zwj4Nutl|pHm^2aIuFj@1XNKi?`!_E% zG!o5uvd0W{mnHvjf+8!{euq?Ksx;8UO?~)C%hX_UWvr}a+R%`;W$Ra34VHgo%}sem z3G@2vGUmT$?osias%va#f_YqL<^a6 z9HuSU{KIzCRyOD?jzyQ#3eRXm1fhq=y&{BG(8v8#hlFXVWUfZ*gF%WJ;x}a5s;z;! z*7UXRWyrP&-KQDWHI039i2<$hPeon{zIgXjBqnE|g-P1&v20~1oV`r{@hQlfn05!M z3(4U+cT{2@9nrP2&NaR(%6(7=)wx}3Rf^PUN+ePecJ zY6uK|Qs)z_AnE#qVL@>*QnRzr+HPn7_CKJ!ruBeiQl+gSAG`PL!-3RlIC}-a%|Y|N zhV03v$7aCTQ?g_lV@FFJvQv)ZRNN`#)wT5LBMqT^8)_@^5qhNwT32T)igHqL^w3_?=@1SbI)WpIk0ZCB7_DS!^Vsy-7%OB6E4xH) zEiW#`iWS>XQr?6sZ&-pmZn_aSTzmn-{Jd2ubA8jEFq~;T`j0K_9o;cw-etJ%hMO@e z)E}vdacJ)BR(i%%bg&%_ivjAPkqX9y&j=|qG>6D47%OpZ?P;8;qzunuxP2JBeO*#= zbgpIXZY2|^P9DXW0coI-T4vHt6!k&E==A`4)p~J|^DnJvHgiADZUGrS!Z~s2!*x+RYdav);+#w;U)+J># z^Qa}Xe3(m_wvJ|`$Dg1~9Ka#!jw2!${9907J%lbEm|_KHw=kgl?7a_A)8mLax7>;w zZeN01Zn_FSu8!ur3$2=w1AXE`Ye*aQv61KU7cWwV+J zmMH?=gOO@UDnouu6|G=y%h>vlWe^O~&QF86&woh^tv6^EMYhCzm^YZ^5MhdI}>jI^cNQ@k!Z*^+ujZeJ<>G1}3s=wBwQdOVT~ zl&#hH^u=ef_b7EEQw_N?${G@*wU*c!{u?9?7!;;qm?3Q$Efc1Ei##wZCpsKV0CypLs9CsK8 zkDNeeei_LgSVtB znK$cFMtVP(cw2b0t*MrCI!Ia`!jVJ#OC5Uf2z{|b$LM36Kw>K6hR!Z)d)^O&+rXhp z8!+d~UY~4PEQ`!W&4~0IOd3ozeirD_NNdKpbnC{ErmQ|H%PYp`Z-0X1H2T*(wqS#P zFxP2oYejZ$Ch`mO$-$QEWGH57 zShsdPmTgE-et-e%9^^=;^z_x$BXbab}&k1zYf+={8$zP~z8YcWkaqpTdL1peo0wl(0?@uN5zoq)!+ z{&9kEM?0#@iV%B*W%eJTtQ|*waXDI9(69xTeVbZ715E{zsr`=rXs)kAe9S3S*Vf4= zQU_gWb1z1Z#!M}tk5nns`spLR>3^1`^q#cWC(jB-o>4=?+=2x-h?M9=y!QO-sPE`j z@uGQ-QdUN3J)?dtoD<9^buEG#D@eMH9qgQ3F?!5cTyWuRJaqqU7#ZM#@4j1sf(kkz zs-To}gtvb%W?gU*uDE;wZom2>WTz%z!?tL&_ou#`UGC{pQ;@~cZO8y*uM1^V5I?S1 zfu&2A;fFPAu=2ZQ_~zU1ky%tZ$YRPH*t%HT>@*4b1YPwd5-cYLbPuxW=}KMZ^|aGJ zDMwCbCTg47RlY#l64o{Mr)$fxya&)sGGlQBQbvzb_N>QB^f0xD`RWY2%9w?X zdA%Y6dW@29)s`_Zs+~|NquUe6X`Xfjhlb$6-~JH~Kk^tNBm6Z7OLu)cLNB4axu#rw z>9Do82rQY`w>G+eJsxR9O6XETQycQ2q9BuWs(v4$lsAN+*v zUh2-vY)x9Kj7K1gFqACU&a&lYsj<>Xh^RqATKH2BmgXALPK z^j9SnEZdZ}>SzOx9z2fjPTiil`FbhVu33wvUw(sczFLlqtWtDaO@w_*f8qO?}?r7oju8CTHzX2mQy*5%|^ zOSe8I`m)bG;9Kzedd{37KWNnxETZh^ESoA6a>Q64hW6427xs@hO< zH#hS0LfTbxlv#;g5<8~HEt~3H=EO4H?d>Qm%12>Ip&FCqThGyyD(f!-pxR|BgT!um zcb3&0m}jDc6;BB)%PC&ce!G-YE%xP$-j$k~N|I-+jShsQpd87f$|fbO*G7X!V&8k( zkd}BFWtBB5Um!KiW#zXAb`H*1eC6eM@vRrI_=-hvu(iHdMfGd3&eEUMluZp?#v$^d zWJI?qkYDqW0jXigp!PLIr<*(*PR)*CkUJtrkHw4`lQ4W(I7>M(&%|7VLTa|6v9<)K zlT*;ZVM@OSTB zZSRk(H{#H~Q>dWw?qXftrnDjbXl_z1QeR42*5i@VQcm|`^w&2o8wa?1hhY5VNtiuj z0({-wVc%=3d_HTKYRX!x`N=r+-IjghH*79bbo&|-@9B5H+n}kw9&1;x;ha`t?f0MI z4h?|$4u+HS`2BPY?(-l`~O9;ZNNWg4|=OZK@L$;vM2V>z_0D|_aV3P;G( zCw=2q=tX5|0g@A9(M|@iu794eUI1%Y}?=l-z4N2)zX6Hl#1pxJ`bOq5D;v zQiKpqrdyD*vYF2#D5yT@3$!<)FuxEvh1Gp0>y5VLuWpfL`S*rf?!w#ey^rw|$Eh4k zXjyOLCUh|PQ`1$?Kw9kRYg@@7$8Tr}-IoD5l0NlljXLHscCd4F#mG^kalr*M5IHOa zPUsGA67O0<_>gte`kcp_M@qiNa)%4c~|Ha7ma2H$?O3_q@2jjzA>49i!oMOJa8Nw;YTS4eCr{YgJv9L(UO zmt2u+SE`7n`@u6!EyFP2QvO!00n8melM5T8ieXxl4!nkNKZXl*P-W_kvF zyhg>JQbxA(f$5QCLgrw#>L&E1nRXb=IeFT_$*u#Ru04SHm)?Srp)O!tYz7&`)~O4@ z?#(!!n1z&Ldt86j9C+AUG}qaToV0jk(?Nx&KU^JbVXubYPDiK(Uw-&G4jhTagMWA& zmo1!!@j=emwS6a2Ywa*&)>Mm6J@=AvtB{c#ho-hJIC%QPp8i6g%Ru4oX=_5viDO91 z$U;_bKJp4n;U5?RM+ZBuR~w4*(hwhg9Lc9*aWo|tcK#t$ZdEAC$wGZ|8yr2nDMa#! zvMd3eUDi1klZcx7W(4~C4gA8b1R@=E$ffc~$uEPKmp5E2-v`juib6V|jV&zfz$)jMA!7r03+5$?V|c=?QzTSvQs0@goP3nOlH5xy0wtrcHD0bq zUJ-IAj9#oGw9)Fh7ggmIR18UM+XIfybmYnSt&Md!aOfy9a`KQ~P^1Ld&&ONQy^kF1 zx4EGPnaQUW9UZ*=)!GbvLZF8MM-Jj*PQfQAoWZmXGK~I(o-OtA#jzwLr)F>s3((rx zOS*c&j*WG+b|RJI>t(r&%v7XixGJ6z+jOXz8S5o>Qpa-xm-P2zy2{(R2uE zD4>2JL2AM>-X4}KDa=7uP63>}{Mp{w9%~l}&r@kIC_Z`k7&6kbkj?sKRn_qF4}_C_ zFFIOEnV!tKB;oMUIFyvu!Gr5uR#Bqp-(zbJ4|g}%+4dqcD~o}1DZJerapK4!2FuyV zEi8eJvj^PVUDbFxT5FM&ehSHSPAh8KVCUowPai+H*z2$P%DSm>^|qj*G!HSc3CPOK zLSa@qw(dOwgigX$i|4|NrR>QIZIqRy#6%=f(aPLK4!ym7xHiVgGODcZc4U%=6WM2W zP7d-opLLseVZlxJV#KJ3zQ>!TXG>U$mMB=QWZO|zltaExLRw}nvdFit9zJk$k;fd^ z7kT;Av190PaEFTr>B71_9bG8OEnM5W7z}$dv(hs(HgZzFxuybJ*Z+uC8+Y7z z*S&}crqim%ZOlVv*Ns}r=ZRy7*Z z=Ujv7!~Gzm5d+fIQircU{}G2y=HlkN@5B|CUxLZQeX(xEGUQY_VD`M3rqB3mYId|z zw$l@7D9YjD7exNG?Z=ne0H!8op&+jWUf$kxBJ3^3P0+A)loevd(l0S`OgM7MpFX2! zAu_}p^5BBp-PPGtfs~9wG<4XarLGz&@yC&xnUB)yT6p{VQ-{)l?`fjb7?0xGRyezP z;M8H#fxMN+c{{th!Ohi0_iNKl`6@wj{7LG#bS3kRcvnz?()?AOn(WljgC?Dzp zThd?Z^>lS3FPkz*zV{0WMtf5Sj&47S>edeY=BX!f=|vL}=1_-~+YY0qHvm&7jDnZ5 z)D#-DBY`#`Eh(Kmny=(0Bq9Q~RL15PW3(YJvk=jTQ*rr~S7FqcVVVbsK3XZd%hpM; zZbkgzSp56BS21e#9QeumwOCN~NC^xz3)MJUs z$fRz{BEPu0d&AY$8FnJ4l+o(SG9+ddDm!zUe43GwiGqqMIFX+2CVC3rb&`+M(~=M$ zpQQLWCp#B8(q;kmg+&h{3qn0S)u~%#bu=J0HmGdgSKi!7U(!`b^|Tk6e}PX+V8`JeF_X2al1{kd|MK3FBu_?p^5* ziJs~}OJgJU9gBrOb$MxF3T<30>0XMOh9=s&Ksee^zqHVQ$tp%gqwF^!CE*xS)ALYN zS*sp3b#gMj_C;t>n2uA?N4ReJhpp_q3s`q!`Cfo>G2JPDvST zXajxdpV(4Hd*xlbd1a`rZl?d#h}4v0NKVT`G3n{au{%1N{0j*%+M99g$YI(|>UPRP zE^U+OnzGtv`1{e9L@Q1l-h<6+Rshak^asM>Ox>vcv!5F>f7!ZFUYd(I@@Hli<&$;^ zjxMzM@~*2+G|{IzdGrL*l9O=sglaU5r@zk`1=f7v+M4$oGlIGo9n-&xmR?tUjpssF4YU(j0rRJlkqLzJ< zH=P{NP*Z}~lZWUdq+`Q|op1;m27BtE;+$+07MH-zmG;uj1$I3xDCcopbOH*>s?b2) z?B(kYPw|B~8l{)j$rr`|;uDZoX$+XMIIg^+a)gFQ&}U*fv2C?w)Qzctle6*>PRGTO z4~j``CwP0goAiU8V`X_R;$!KnWoA<+7oxVk8$JPks?R>VO1h)H9~lZH(Yrs=FFM}=k6}pI@-W9z?U@DKA70udiqW$qGOOweovA-dH_H@^4$no@ab)d93A168Rm^f)P zc}HasLeCy_wlyOykv_hPsR|e`)vB1vl`)mlGu^Ey%}hpFWi`6(UFat#(LYS#TIa#Z z#ht!3<0Fo5?6?V-bHOCc9_@!?aj|e5zW~4a{co{w_Eh>6QSf%^r2OWiu$+2_@r9Fm zHd^M}%{YXy7T40li?)I?6n7$yF`_e?sr!!8cg@KuRwv5+LqgR26z$0ey_COf`ZqC@ z=QQy}sp~!HN4e5Q2*f{5jyZ^rKX?x|F2M+o8V-t%qK!50odeEPLFH-tjqXm=SC=9B z>)YV1XTmknhj3z@Xhc1|Af6n#Z)Qjn8|#M5yo zE^9_Z8xR-~1_#;`;X7?lW$Qp^OAYW*2IL$TndeDXs^+%;IXB;nCD$)R zWS}j!A2^Qsb|;J*Jwm;{M<9MmFLmv4@>XR#7&nXEmUOn&XW;0Kw>HpskEcIa0|&1l z%A+aXHK8Ok1Ixbo1~V65h44{PeKDyOD1#l%mDC-H>gmHw%3yhIJ=cb@rn3`uOC!>g zVo+Ao0tZjVJgN^lW!KD@HIKZUMSf?zD>6wN(aAXGV{_c_>xb#I5yN} z&2`j=d6j6kb)_#a`ibUH5n|e8TXYz=^9ft5k#($O+j#8B7chU}e1y=_M!4G`hr#Ufy|K7t;UYx%xF`i9{Usbdi1%N7 z4uuRtqh`#5r?Z__{LJsAa?T{DaN3o`andoik zD9^+PpDo1~>knem#L0-FlI#N)e8Y!tVf*oTV3Vuz8#LTDT7uzSyL?AW!7$~Xg6<@Km*tVTM8H8CX< zUQ}qKM?}JrbnkAe$2XsTgxHwF*sxo;QMj~kGdm4g%--c<5K_=qvufOF{T#17Xc z<!UhC73<#)@jZ^Wuvrud6^zd;-4td?khjMBwD1E%ij<)=aUEa;hp!sPzEMBH4S-Gvi^ag2oLs0OKl1EbDekY+KGgOER>ZsGDxUq zkeQ}V()suYBZ~asXy1dcKl>CL*Z!z3?rhz<4T*_yIDR6A4s|moPnw|a_-L-L#-Tmi zv3v72?B22sImI;?GjkFAJWO(9!^%{QISigQZ`gv}`wk%`DFOLOClQ^P3ZDt{aP{K3 z@S&WyHq>F?uEW^4k+dZ3<748GpOH<6Fc2QW!Elm0Cp+s9AD@aJmaoGxDm4joG8u#> zB&Fl>J07q)Vb~vLnG(>#cGfeP&cd2CTd;TkVajtFQW*?-c?TjQJcztb71vga#~*wM zb|K?1X5wh&WVF{cWAmyF*tC5I=8_lWfcnJ3fIEsZvhe;_n{n&icO$?5pJyZcbt)QyaiPAr%`0j|pNpaYbjgnz&PHI66J*)7V&i6i@vz`*Ok-hJ>4 zjY7z)MN5^z#i(h=#$aZ zTc;3{k^@)YP(%cIqlH29Kc0CBrDZj^XwF5->9&=V*Uim%>#dJ*@bGb5aov^ZY3@S& zk<)18I^Ou;y@>L6LX=x4wj7}xb@0E5 zL{udCt{F)QC&|Y#)ITZQGT`asLFcPhIYk}aHtGU~Tue~*k6TMiGY;)Nh!5WU5A5kQ z-uR0<;pXn7WK8KuIy1c;ok*wtIhB%z%Wk{fe8Q0Y)Jr~&PfEjT^7`SU$5qgBl8#>! zgM`3{D7bqtknL?m^uhgj_RY`W?COIi`F?B2Z}nVFd!PYpc$LJ{KUNtv)gB7=v}KyS=lG!x|&<=A$x5WjrrLHK)0 zP%ZYO1?45B`1Grlm_27PZocDogax`I%)bX2+0}G{3o&lONQ6iDDSno#v$2PE;Q6PY zM6;tWMopciPC}dcUF4!0O;u(1=;i0}BX!$^1y{h2>n(Dl%ka0W1qpk1A*a3v^OxL> z^we~Y$qnO1g{co6$+(JgvT^G#{s0f^=(tlyag=sEJ~|F3W0DXuZYo0kJMp|Jd#!Q|GpPqVbT=gBsFT5GE7S2PY zp96+dcYgHQkLqrN8?IZdPI$J~RAJA~U0ArVVK~D$VQ92LeH@pJ^*?UH?gNLY`_qt|KtGBOvVT}4{JBOp?NwN{ zVgv2pQ8YAG;`s4{ICbI#4(>Y$7xGn5R21Coy6JW_VfVKE`0|VIkeQQ(cn1AzzyBUv zx9@|kM<}LEod{1CTiDVMON~E;y~ktG=HP?SkU;v6PKpnHKAF(ET3rAr&B4;;tFd$c z2_(}VCDX=r^w?s|sNrySuw{NJzW-(^ZB`7D(vwkDU5o0P2Bf5Bp@9BtXmAjQg$2Qe z{F$0~igtG`gXUPIrNtv5IT@!Db8+L6+c17yxQUmE_9i49+=eHfegV}EVYqPiMCBXy zc96Nd>?5SH7-}SUHpfAON14prP&3Yut z$gP)OfDiE?~o{~r3>F{mhO!$|rk9$s=1vKx)` zF=L}oV(YeTh>MF?CvFa%NWjREBdDWHPiYXM+s=H#P-w=jy9cG&Ib5T!v4b?5GWQxx zn=u*g;yZ{F*WJh%AQfwU7q_`{91vXC)CPg^rCS~vr?wk_Cm ztQ527-;PP6UFd_b4aQUz`8izMH}R*ZU&6dg<}#*oQ!$nJo2w4Q|j_<%80Nn z!e--v2W~+q{Rw|x9}Ei$fLDMI>>MNx=wdVUGqbX>b^T^+*s>GJ^zD*U(&6gjhRDcp zI66?{QeW5AR%8F(1K2`;HIa7k*x_j8XORNK7sM$$HC8EZm5t~3pL@> z&z58N{*#zVIrC&p^yKgVjE2g3u5BK+)8~;B{Cf``Lnv)TaIoCi(Sy#`W@Mg@$7;s1 zyAPkBy-B5S8iO7?7X*e=H&EY89OBNkI(6bWeVH`~3LHjx2_JX?_-BFkd+VyIvFXQc zShIEm(zCLVKtHXa*%nbzA&i^t5qsn~_H5dMINGZ;#s&>?5tK1iI%BG=jC{RI9S^fatkwF&zU9HVYeMSMy!atjMEZtNJ$n=^-YNly4l*L66#?FT&l>SyS39gc}( zqm;d~0(q;oC=0)O@OL}^e8ly=40JT>f@MWh)pd;VV#}2 zi76{J1}W*(R}o_|EL<)WcOm-lA?zRzA3GF-&%auZOK)5PUti;+k0Pt>)G-Y>wEHOS z*B%@@c}ktw&Y zy&lU}ZN}K~GjZu<^Wdb;7q&~Fg@n{he7lly_rXIPPX^<)B6y469_Ggw)dop%$KdAb zfNQS124z)E*sw1J_uYRVg1tqCH4U=U(y?RHE{vNw4_DrP8=?X{G2Gu7F^PrLw`rI@ zZ4!08oTCuBQ9tCQ;K5%$gqVURTygCcpxIV7z>FSL6r|&;_g=#0eJ2q<{WAJ6E~Yq0 zqcSfSTQ_gQMOWXbP8iBXPtj*8PB*pH+1ZM`lmzVDxf=&4^9l5S5>GRhVM+gC)HNOT zSozr-*vT?0)4oGJ&9i;SKH8-X%8yB)txQi! zf|IKkyo1Bx=FpC!_+9wV%I)a(o`5ltzHpM0qqU`Y;crjC-Y*TKFOFDs?9v6(Ce~Efwty~d`Jra$g;$k{MwIpgB<_=%X z3KV=A=BmraO8&{%k(+Mp6{o}Z7Mijhzni%N0z|~Eza~D5>m0 zOM4fUo#{?8gR$6WS91+=am1ybO;<_&vNAzmALZedvO;7k?xintlM!2OV_T!FiO{rm(IbhcioHYZ@L<9 zzwixO>m2alA1HVu0y$D!b;pj2y1dk@&UXLhpMPfnoryO-qXSvjj1C#98jua_ST>Z- zR(<|nu5mm@jvEd)(yFVel7{UF!s)yR1qH!@&hwVFo3Li}Mm)zLBrMbqot3$G?v?lO z&KGO((zAa=h`a_?gJ4Ufb_Pkn>B`YsxFD!bAmY*?EPch@jH^3+ps>uA8{FFwE~($stM zJj`5tB`%tEAyQAJpz7IIaQpqg#6?#ugnO?Yh1G^mraKi;CvLlD0e@F>rA^t8rGmBDWJaR^$1#ATeFW}wxSAONq@@+2cgk3J z`j5b^cixAqueucf`FI10N^J1(qrXHr6-W=)Sw4W`Dh8JdyQ``S2RCm+3q0}Hr~gJJ zYfnX#iiiLF1{Tekhv17R!OJHYH{SLTu3mfhYz8hCuIs;7=dDyh^^f7n*=sQLKQ! z6&9hrxfwPtJ{UQ6Je}Y%aJ6fND;2yr)yf;zjqraHa*~h0#?t|zGpEAQmFBXe4JQvB zM@d0By6PFMCuCw`=rmoP^;;Thk&~2wM)wd@*0*4ExLmwYR576&ZOskH&&#LbDn=FQ zB)6m@r25y^B`I`sf6fAdm zP`)dQvH3s@e*K$YQ};PQztf5v0~{4V_S&MXqLEHnGr}h_;BpgBryHd?S$OZwe`C&7 zcjM~oZiKgE10BU(c=qWRF?RTOhzjf?jrZY1d_2mE8c|eQLZ@~w<^MFg?cA~G%Igpw z9swt1dfGVta%|gu3}3HUkH|?=@Yo;!fZ#yOui**(jdf*k%Qf!f6#;cXP38%F^yOCA zM$E=_XzW_#TV~ zp2C-(e}bi7ypHO+VqAUGJ%|ViP~+BP|8JoAPR+Kv1=ShH@Z}dP(HS@skN@L&jGz

`N|ZpdkBYaHp)y0dlw9;HF33?=@0-XL{%p!je;K2^GqDj_2?;2s4^v*1VmHG`sCgU>K20NPDvPlD_$h>i+=e@U^Cyh-wZRu}zle3~*I}4k%n7we zby*R@Mohu8FW*33r!!exfye*+62AUyDPDf%pYRLtz~s@s_-tD=5^`$bH#vaA>_A3t z9x_X+uyDpD41&n-bs`5c6Y&x>O3F*n#$bBnm=Vh7>TPXDCWE4Hzgdo}9)28G&YOXb zvJ8B>Vw>WlNZ;EL0GLVp*f%@Va)Kbsv+OZujf&SzrXDq&W0VYov%RZ#PHq=$t zVcVK*SiWv6rd@J9?)&Ba2n}(yI9W*Asz(UTD`AGT(L%Xu!b>lG0?&XTJoCa!@N()# zS;{HA^x-mm(CUg8DJQ|?mAbkL>V_R~_PrH%KJXYK-8=E|YtP|_b!!kgn~uDHI|GOe zJpS|>ckTNKm&Et zP1j$8a0W5PXl2u!J^1J~&Q!wGLQgGD?OB8MyQA^fmp;VwC?5tcT{s?nl04lAAGZJo zv%~SXXP-k+Ssp(5>LdE7?s)u(XW?k;Ksm8jI!*3At|%_RHy?ijzmYTWyWc#95U(yY z$qDHbe}JRdT^WvPD82MGE6WOK>oVb&RSJ3Ko<+6^`iYU^f-EGJx>Gl_lRBi4oY+Du zTBuWBeEtJCQ*S@>^2^lqJ*du%#|v+Njkg;e@%ppBLx_Dl8fc?-?V^ujyAb#M`Zt&n z;)PWoy^S9zBO#M7!K{&!@z;O60GIj_oH!PPIZJNC9lyK}Ufm6xV=r9Y<&IkFB0D!s zn>`0(#!th^_>=f{)wgJCtujBl$T@L{H{`I}L;nd++tom*}jvV&K4Ry0CfZXwT$?5{P@N9h^Z*T4G;Vkmo2;k zzLY-|Z%}r-8w&9yX_M06i9h`9pO_dGj*gBFR5vuDjhl^|D@IP7PJewO?3~L`l6}Hj zKZ<|d+dy4-0@b~)7&mW$dMc;Ar3EoZbxb7}VbfFMFlYE8mKVRB?Uyj7qEF>AVm9j9 zy2v|fL?#XvpH1{`2htcYm(p)9Dy_t%spEA%>2K@gOdrOM^NYrbqen4$;uz|7e>i!# zD8EAURx3K1Yw`KVOOaVng-4!v6l2DYpr751{M=kLHn*xL4&;fZW{%f8bU2=W^*W_j z(_`YPA3wm4KiJ^@C;y5d`oD5<fViVMaML&522{O!+w#;l7MV)p#m$_F0O9@^N!)i)5M zhmXL?gX@r*l!)PDqZm`Qp`{`hhmXZ#+WhO`;o+_3S6^C&&;Ro!uKoRQ@XRZ(!=XMG z?~pFY!n{kb#H@=hMoM%t%F14%-v1S5UAjQUR1O@657)tty1l2q2w#2t z0dm^H@XU*E!^gP?1%;XTV(F*kX?g3R0j?w0s3KQCNs?Pqj#hcZO-JDt=tWC?H8L`@ z;O<#~MmEu_u6>G3bRaJy1K)n}9b848|Nb<>{A`hya2PMWvlg8;-uTV^H>f)W(NKwf zn>V4R#S_mw_Z*^pyRmQYQM|!;{laN8F?HNHJkI(J1*g&Z$$nHi&BJp~-Hrg~21=71 z+^DZ43>T@Mf9W;2V9p}sC1>OMn{QVqbVOP;=XR)gH~v%-UjJYd?zrnl{O(WlVc$`M z<6D;Ft5wTU>pUF4zj8PQxE?8|lMtQSh!_9yM_e>13JpamxZ~D4FniH8xPtMiJ$>Ki z))v^h`r==&zOCe@AT5J(^FEe+^(7vC6&rijQ z)myRla2Bq<*!4pKQgcc%epnRgYlDv37HnO= z12e9Zel&d$05>|OgMmVW*YT0BSKrrC1wvmDj*H)=X` zd@f_Op`TV?UWlgVE_Bja`{iSg!GXT=j@8?d&e-*z78 zbWF1z`Ku9eDch&*4yP2Clu~M%;MCEPVpV0^UJkn0Ljsc;FXzW72SWG>3^? zJaH_`O37|f5pWeDDNj&YzD7V~3%F`Fe5^XxU-H)EQWE*DtVe;Ua`E zsIr`t4UN{-*IkX^h#>d}_`%oT57%G0NIhnln4Tl-$)MXE&i;Y$^KgZ;gCpF%{o(8D z41a%rczd`}2^)lyUk5})jmAw&uEnn&xKlgeCYTwIPMd!OuDbd<+%lTU;Q^xBM`I0ZM}eIFJrT118HF+imoW4rP8yK%!!H(>Vksj7?` zHjP4WbqP)#I}GozNL+HoBF@!Moy@#s-V~HoRw6#LinOAuKp_}0dNjgDj^I>W&`JSX zym&tRyxh><*unrptVJj6NRZ*w*LLF48*Wu6keOU(NS15e;R7) z>($jZxxjsT-yXDe^kUZK*CK3q1bjGdUtceiT7PMQz<@(m5d;!78ywWW>DN-oO?I?-KUsveWE^%{-f@JK};>-FzN zMn(#b#h$_+>2yw>Dp#C+5X9hm*0jk8qD=Sp%EvIAxHg;z10yk{a=X3Kiw5KogR*?M zc-q6!#|afRjo7+nAJ%W!g1aC5C5F){uP(?l**uxAEdoP_;ih{Yz~xt6jZqQ$i)_}g zho?^*wN#p4~)e=C|B8?fhS3g%pT8ywsN5EdLnN2V6pshNt#(v>`N z(MyKCj~AZCC_2&dNXpFFv#?

nk1yWp4MjUGQ-7!4+3r zjUcYEH+f;ntvA8J(;eIP9e^wK!9&0PE#7llKT7p`|snuci+O28?T@o z3S}kO%A*tUoqFHOr8nG$C!hNV!fE4#rc&ZeJl{w5j#ivLyjMBj3$Epwj2TXT@x}BD zCgWNLH5=A%MU8ylN*o?G=fJW5_V<6laF&xhJ*GznG4Sd{X-$)o=<LQ=q(JrS{%T!)dk5m^^j1~Z;l77{h4HjMqSJ;)gk#E>5%BerE8I44aHS*QBDd%CD!VCuIsD!<5-`aPK& zFPiXyx|({F`tg=~AHY@DUWf4`hFQpsP=NtUSp^E~+7U5qID%;#XN?L+R?Hy=&t;%H z%qFOHgy(JTaM!)}8;)F=YkzF{ji(vf}dy)Q6%<_uhM^+MHGTSGODA3aR{ z6iAu40{*1MNV&^n{ybDN;7+0c;p*dy-#_vP%D@MB{ISOo8XAhf{q0H0!@Kz4y|;1K z?YFQkQoN^~j(!f7fA=kxZrzKk@46R%`14bU2oHf9Sx%pd7zDCdeM>E#S_2Ye4y(tM z9{wu>yzo$^vnNrPUPYap6qCT9I-h;Y#CtG)+yq>8)%EJ#+ulL$m^XVQ>Kf`%*Cvg2 zA}Qe%iYn@G@#U8**@}pWz@>{XMMp;)y1E$1u#`HbYVU%uQ4{dn-~0-f&Y4Kr{vIp7 zSc{^PD!J!f>pu$&E*FEP^@m7*H)?At;OOFnx~6(~d3$hd{oT#5@ zziBgEX#2(Y^A|hl=R@9*#}mccwc*&|6KHR?#U%?br_2viuVs-tQ61<5iG8z-i)@@+ zFmA?03{vl=zdm0*4KxsxFWQoYSKWYL(r+Ifu20-)Mr%a!=_A;B;3OWU?;jqa*JkV_ z`pVZ|g=G2?`B}6}lpB$W(bVU6+;IjwauVa_s2rrfK0#AKpRagf>X>6nYuZHDcxRmAGL3LR`3L0cq$* z|HBJ6-nzvA zrYgxsNECG#RcTdq3uCIC*nTud#Z=>{w<=2wUxqD`t`li{Z)8xsaN#8g@|L@mEFpsv z!F9l`)Q{KSw1j$TqT&TLZ5u~;gpS3MB}?GiUWR`^^G|&K@n_hyX)97`M>=FkQjX&) zN{_+exJ2Cb$dj~fvk)XaNXPStKqk>UXRDi06 z9^KwqL7ry-ebL-Gh>99vj;ZK-czJsvEi)aRBD`X2W`_GippFk9)t>C}&l z7cWw;ry6K4`h$Kl{q|{-#v;hA1M!KmI;NtJmz5Zg*3J&hxOf3vJ!sby>@aKQMR@G7 zKO&euTyUg6E?+bc83{?KEn!^JQHS``amX!i#AR3CK)wruA9;DnwO25#=|W;c3R>G_ z-MIobPU?>K=U#aUci+7P%RYMz%f9^rIYpJ^W3By-SoOcP;XH$5ukegA6-8&EB!QVf^b@5g@ zSsrT^eI|F`^@Z0mM_U)nn12OszwI_GnmYwvZgQozAIQgkk)v?OJ$K=@C08O)L_}rK zmUPtPuyzksGJ*^yI?M3&`F3rwjULh6-u|uJ2l-o=D$DtEJlwX4L{oE zIrA=pGnHfvZHSGN6P*1RkNJAS)y0+m3g_qVKw0%8uQFzGmhlUnZQ<AiVcwkrnF28gx0=+$vKpWRm-#{5>*!OPop9Esmm`H?qI3lk=UZue1IRC()09j84MifBp-8^V|F2!x%)0sHN%ZLi~x7Xld)jEx-5`7cfgyhFhD-pcwks0E~PJh&9&EKd{mGs zGw`IjZqX8!Wn5j|aN&g)AtEx8>+7$cz?nCDGVB~3k(^(F4(h&1(`Ug}UJsTWtKvkN zQ%+hIYU^8Y=WRE^nSJy$RASw#6_~r|D%zZR+W&|O#Wjm&sv8~l9z2aMu}flk*nOAy z>1qTrX^C&;j!?$8%yXxYAg|eya~1Y>^7=K#^^_YqU*V-r#>Z=ZYy7c5#t2ZF&ogJumGz9S+hOv9ruzmB`^x&uM%sIM*crx(?A zrP#WC9X4!Shs_%{;mEPm=;^ViqoBpuK;KIwhXnhxafP#YDBgPWMf~;gd(~rveTLEi z86DokanC)!#5>PCjrK zw?o45BM2uW51%xGhEndtNkwK+3Cf$>v3b*GY}l|78+YtOd`1==I-1Z{Q*NS#ZZmA` zMEw3=ui(C4-iKfpy*6fA$(}H>GcLaIHhi{X6-Gq_$hmNe0zskZtSo0Ba16HgZP>GK zo9b`VmhFhk$WtE?kUNEXy6s_S=Y}y1#M~J)wbCfLhK5tPozxW!QFy(g^hh6DXD5U) z2=fnf@=gMoZ#tu4)EzKQN0OiQY$%Or| zF&H*+f?Csa5-#~%yq6!YxZy@@T>1%Cef9;muHVSH9zZjLNqK>59}LB18p8l%&-_+9 zM+A--NrUXJXw8wrJ0KMQe*JmOn>($K_NpciEQ1CONoYbrP7GYAtOJIR6}!u1rt`mq zBJwLqT{YE+JAMF8E?wBWe>>$%)@B=`Q?j@=4QQ-s7V;wdR1$-KANYpoM@JNtQ_HRY z984HAl@56p+wydA!?2OzeeY(GA9;U{ZrfaQpfNDU50uo@qP(*c!=l2#xPiPNr%R-+ z{L0EnIT>Rss%nd|k-B5UMh4eg4&r!B5_J#_U6;Ian&4czo9ocj#QC}h(P5B8OK`17 z=)>_z@EbXj!BUtClr?{lmUc3B+d;=W0CcQGSJ*+`>63|dTMnpqQvLCnXEAlbMd|{9 zUAH>*!(Qdr9-cwrxc5&_;xB)H8WEnx>EWXLv31AH%WuWM{`MzC z$@w4Zb}@3n40zo9O>%BykAU&B@btf4#Qe#_xfZmPq;F1ECU!E&*tlT}Hf-60+=2ph zbW+~QGcxLN<0CL*awK*!ppqe=y$uI;@4(_K79$`;%(Bc*Y(+EjvQx2k`S^yRm845!jJl=_NT-h0av{UJPDc z=oC6rS3AQ^-aSbJ?B(tN`f0Fd;4EJu-Me8e-g@;7%$dIkOYXP}0p4^dISNfrfkAf- zePljXXzM^;dN$ms%R=c?o2bima=?rm4Th^)5=&Wl(du@Sw zuT;>;SqO_5##w2dD}A0kdKhb#|A6=2{1o3T-GHZ`e-RTVk5KcgZmhzgggA z3ky_fk&EF`qcJ=zg#Lu+C+cc0pm;s%-9}8a6<^ZM2BoF?qd2V{T_~t&pl=X?AU`h) zc_nAsx*>>8`5zy=6ElZL;?tL2$L5XuP)$eM2)cx(#+Zc9tT|P3jBO||kNu3DF&}OY zo%rZ%1=~?xR81Qqr!@3Kg$jK%lO6{Stgbi*pa1(`IFgit+n;(K z(=MC}Po*DBP*ogpBB21bF8-A15lUWEYn-T^tsOTrhYk%H2sheYdx@!vs_^~yTd{Tb zDg5Ry|HK6g%`uhc3Te|3QIluju~*)}?YG~GKyeSGDMHUie#LKh!ezJJgU?s2#K=%T z#WQL|692eI;Ho7z;j{Oj#ntmCATQT&Jfno^3r$hS1ek+D5^`Qm}cAI4qR&&He2 z{S_a7y%xlNt?uuTNe@5=Pf&I{K4Ude%%wgX6$qw0RJdzJ zWDVgXpHRH<;m3ITv0o_rt6GtZjLlWZD=I^+V+g!MB27NAFnR!HPoId+`W)2Nm!pj% zX|i)g2<_^~;UT7S4sf8&3y$(eVOi#Y`B`4jRL2&317bEyP)GJQRCArv5Hx8tZ0Ikm zx}1kgu)Lls2)z}hXs?$i7Hwef$T3ubI4^md}RwHzP6^(x-`dL15m?4P*mlIf6}j`cce z^e_%iPtAvyqYEZS2AXI^uz)?~)XJYT(ZM`Y11uv+azfOm6~%d(*u8EIVw2Nw#l81q z5&bn!hxWd6K&m$Bo)5C4eAl~Q;E4~q3<9*L>tHA6Ezvrr;J5u>LJuu7O++aHtau) z=@(y$u@NC~rYc23Jx}m{I~*DQ8|l6V#Y!h$$pyr6QAZU{K~uLRVwmsvU@rkH!Zdyn~Ps zfAVc7+8esz<>F(SK7S>ECgfd(J={!WA=qk93+l>p@Wt25@b3@4#CPAXz{+LIux{lV z3SFHlYdP{BTfIp@Ye^(Cqy{%GKo|S!=4wgIWJ)zd9gr&W$)#>x#tb>oVkvipSqhvu zfBH+V`iewrZ4K5eTY-PQ{x-f@z7os6U!_i56z3(eo<8%e=q*kS9s5q(F7(K|Ma~92 zguZ_D9@}wtz{F`|(aFFvJ0Tkl_1)MToq}mIrek=7Xs=ENn=NwQ23xl8q2sZV+j1;l zu};0@deNMz2=vnL_K`N_YoO9kF9k%_W^kDLX2C8|dZr`YMvr=Qw79$&(Q$EDwqgxw zupBGCUxPz2xfm82f^mT^a8mX32^SmAUyObi9V%&4-$!ZUFs_#baNT4|Niq5mMIclc zr=SmNR|LkAy9GixW;(gy##?VgRelx2;zNP$qzkD?|uHT3* zI-QO*9;ySjDR*oLZ*@vg*Ec9LL>8JFo8aQ?0y}$=?z2P2CQx0dJ%UEX#obeV;Arcn z9VjX+#A~mEq857S zg^Qlmeeu_pYr%*?9xJlxY(-&mB0l~2Gra!s7xR#- z|9%|Yw-2>-t*EJL!QLa$xaY1VaO!KDHi&I6Tzo0MCV##|rzbMdS&citqD@=?vQ;Nq zI(rb8oQWS+ucm(ap8frR6)RRF%+DRu=)7sF$c(f`uo1f>6CvbR+c(;d&TcewlgH$W ziVG1Fa|(^EmXDnbgrsXuwFG&JMD+6VfU~?z%#Pj>T-5=M%&K^!K z&ggEb#iosGkyTQHhaPzZQznh$_*%IBo$zpF0LtYGy^Bt_O*XiIxy zvR&qKt>|qj!O7SYh|4a-@t8Qu;zruQW0WBqtX#DL71gAV5y+YbY}oo<38<-~g+~<~ zi9WgEzLpy~NhKdokuZj1q#Zo5XBVD%?j?M^jJ^P6LhhW3Jua_xkgLtIYL0LZ8HEcj zn#X{s8b#^JD9+4a$F%FSFNCK&VylRzMup#zMoU+GgLmJ49btif`236S&?dv?DB8Na z8Jt$&SX>NNe!r|wCSp#U#PDEW+TLJJ%QPeT&v_fZi$HR;<d6ns zDUX(Q@vr6F3|I!=qKeH&#=h{REgbD#;Nz_?R;soIQkOhlS6{CJJ%QvIJgYV~s-8+v ze9f`amr0?#y-t7ptvkd*>^B^7SgxeFavnT)}|*G~}a|vSQ~# zb9wzwYjqV?v;On1zta~}ZQisVB?ZZ_lZy({tBFy>kLzY!C2h-C)Cnki`EY~QZz`=H z%;S(;TR3^SWBw%chhg|rM&7kg`Q+eHYWTl zJlIPgrIo&KPJSs?uU)V7@p6uR#rhraWgnA91i(cpEMaD;OJ-_sMPhU;QnE`i{i22R zohs!46;mHljy^d3#crz1o-XmKBz3S^4F&RvoePFfoW!+x9h0VwQ%@wk@~_wN;y*q> z-03W|a1I7{s0)m8Ly+m%Oe=wCENAi?Ct-?R)I0P=*L2A#m40UgO>Kz{w@?GtkVcr< zLpfKbUZ*O^s@l@ecXJJZn~U*r9Rd5N!fI)(Lub3#5{aqgtMU?4_2V>=4H<_CN^?k` zazSDU=$LJY0duTkujHd#-u{8ADFxLBH~#OW57tGQ)gz&{qJOQ*iISE2iWsJtrn3P_ zr;g*5w?6=gI~>1}PH&teOE3k_jU0095=NklmEn~VvH4ugSXq5)AKzpBc^hRu}C#tz(k z$6c5*VI(@}&-GAdT%6<;1QyM5-ogu_%Z0P8A*Hy)rJl4 zL<9$6#^fnHHqS;+#k9%)_GTL2gV=N6Fp9_!N^Ocl?sdbMF|$xvSc=k|bbRv3tMG8+ zQjqzp3W`uy){60?r%P#q4UtH76(u;nXB!d{6VS-nDKg8LdKz$Q-zI#%Y&9Nz<{d0s zwHnK4Xg>Pj6@&-)D&guIq3|iAB3mbWG&ME!jaHaXHrdyEo#<#~u)bpt4jwygMa(`h z2f;@tSxOd)GX2$-K3kGJ1?3%CE_34HOx3M}W?-+uEI zEVyVA8IFcj^(-qvhrrVnZ9Vm9?GSnR{ZciKKsvWm@+O3@4fd4N=n7C60jVd6&FV?L|Y3DM~h-{nVm611N~1VS|95g z>ckoUi=(1p1OQUj5w5Pj=%j<+%^*omP|8W^`l>qQ7FMu~P>@_lo|`du4i?^Y8$SE? zORQRDUYk$ee;3!y8OdR{!G&ws*42uR4tcPLJ<-q>7UZF+twT9wlGYzG3hP-A&Q9dN z5C-E_q)V-G0t8aOi;L99Rx%}9ak4})GoKkvO^v81uTt|6xH>q)sml(n4XyO$q@oR) z7zpN-l%bw{+NEgLje|S31yEKHd9%ZW;UiI=n}^ck@;<#Gn*`Kq2u6x&A`%43o-K`>^rP0s))A3oCZUK< zXIE1<9{ufu2n}#ytktI8$r*65-gZO5sj z(b&8FFzV`BvSo z)}Q~*4>s(t9=q4BMruwT{`e{#d&<0gtmuh9J*>XUEbn5GQ`&MD*3^p@!HHu}j6RJ$ zu{m%G8l~PfCo!v{S+4^(cb0ciuW*<(c_i+=Y8rAgl5sR91I=9wC|$h}92JR$S6_yY zKl@baGSO>mR<6Vv^7XIpyhX8;hEP_o&p_C-EgwAa=wI;qhi{{^qX{3r^%}Np+lOk> z)X*b+ZJ5^CXiZjI_7&|N#A)=SK5s)K>95OhMs)0pODYf^83zAg{g|$XHO(c}$PrZJ z<)gc$QN083@V?zxzkUlH{js?FxA(&@z?*U++$A=mi*nz9n8Szg_WNHTr?d*Mz40R6 zc;PPy@o_ghTBLbxVJe-aEjSpRg8F7Tc_|s^4*9BiptlF|va>1E^;*YrlaoL4N+S8I zAvrQOIWfeQFzCkq2mkU{^-2>vMnVNenP?|#ju<@xuH;J@ zzqD0bUVuHD*B~u38!gh8O1DHW{iXy~xRp<^eo z=EongcJn5<(f3J7N=IQ@J{Qttj|fYdkiGt-QfWiU4duV9rUK9Y^$A2wxd89e7g+Yg zDt!0tS9s{YyI@CuL)K1C81^~@V*G?j2=VI1$zul*9hV6=4=)TKMZ0X#ysWl7<|22n zPo6ObkN)l+tYHA1l2SzebAXelFQ(1A2shqwGnRh8T*<@_+}5t7eR}r|Tzuhdvz;?0 z$-jL%xDPgP^6Y2mHgUZ5ItzR2CZ#v7 zb8w{k@I+I+>D4F9ly@W5);7S^#}C6sj$(C1O`UH6vo9bAKzwW*ynS5I&{T#We_VrI z`wn9I{Q0==q5IXzT+Qp!0cEgSK9F?~Z+x}_o!)+U_Jh~((68@Agr~FV#G=eXzq@qR zs&#DZ7}_Yv%HL7>=K>j-6S_N{FmcReeEH!=^s&CiKcD;~_U+k)wd=Q=+u~dtZSCP^ z>qNWNq=E|>fmjOhPsFEi_j038*nn0#{!&jq@RwJZi$?kmJz^g%Nz~nex{@4h-?AC8 znMLZ+MPV3go@DUd)`QIxf)(|k-8?MPYX+BDQwa(#@~(y-6YF0M_z`qFOH zn6bJ|wj-~lQ2NHS7!_3&aB*{1Uybc^Nw@&sLB6P}sZ`@PVC?H)-9mDuqG`O6BRuZn z>IOegPX>Aw=r#nnm*HBg9ZHI;Fn```=x;h%_Fw@ftMPK{qP_e4&DYT7=!s`O{tDl& zmJ5jA;xCUq3`h0a04b-->-Jb8LDK{dWG!D}ofgZRGYR7r1x@;+OZ`fk$tekNnJ3q!-taZCnvGd@O2;OHq?wrO&sU z+K@Zo<%SHoGqQ_;_tG!FK)qcUK3ck#KJpLv>Z^}&*OH6js$Lqb$jq{u7xXIduSwpG zHs#jU{P;Nf)$ifXKl}w>N-RMe|K`76#=O~#sgxf5Y3S>t57H7h(m!ItRQQq)%i?2k zbY@FQTi?Bv8SsImF3m+cN)2t960;Y)%YtPhbl_O z_8mKroKvnYT3SMt6NYN8#qP};uAXYiuYr$EwlPX;Vz z!~@27PHyn=2|!&@vA7u;%xr0FLKEo{AQygp^#@d?PG3tVUN;~%skfmJUw`ru@@gFL z!MAIa+ITC-BkoE%?=+o>4NmY~`nT{l)t*W6C&lS=!!-^f$xZ zZIGS9IOAk2I$Zq`G+~;0-G;!%)foXH!6+=uS9H;R*r1`Y9qnCRh!`FQ7k5|sxAw4M zperBvl3Da{zq+al4UEg>Nh>pY;pWcxlk(Hl+}1Y>^K7_B{np*yg3P$n`0_u?vFlhN z?z!&)eDnG1m@#n#M=fh16-d4A7Q`OihgEAfA-{~Vwj^^i4gXv~!S5)>B4Y?NHCk>$%EIsKknM z@VCGI6VE>X78=`3C&5&{flhQ(p36+wJOnpO#+BzxJBE)h>m8+X8 z9=dfQ_U>GV6?>u)M!p>x;;Gvh1Y&B;BQroAZ>&XjR+^eyBiGr*GZ44k@e4S)d!miX zL)W3=aPY*ipg?qyw=$AXqnhPwSiYIY$vAbTF|Zd6p(@+!Y(+<19a`#}81Rx6=$v#k z)Ut0H>rM%iybuyK8Fn2FC`ven#;SVMR@P(Zrfta1FQ;D9j)jd!2qsRNj_S12$Vo_3 zMzo=UZIZTfH}ef6=nt72NGlA*wJukYsJ>pYf(c-s7Z~z5qWe_8rYs%A390x zT$}d#M$~jLNMRsoLu1&~)NXpuggYW9T!^_-Ct{Kx1E1z9@?`@`i>tBxhmE-C;<*?( zHb`ubZjb9-QJ9DKUwQ#=y!tw_OY4*jnjxd4zG|w*yKldTKmYj|q-Pf@eJr)*B2G?L zInoQN@a8+OQpf%W-+ucIzU21yo6j?#iNxs>ai+UREMUowLtrQ_?P#NHHdd16^2o27 zda-;agN%5tO>I-BI!UD58U`cs_03-{S%h`p%2y5dV#zQ50Cy+(GMO<1StGe`k+dS; z%8A{%3ok{8pBH&(E6U2MxQ=dEaPfsGD`6iDC~9goAJ+>nlkRfWa;ULdLI)~h!(0%? zfd605`~w$`AAyw&P&R(M7LCmvO1CMU$8~P0uSc_N?XBuANBQD-LvuSCxj?-xKA1dr zG1^<2kbd$A>Z_}jTovXuV(b27+<5&p2=!FeN%oDA~8H zbWGBZ@24Gn3eWxfV?1=6n&-7%+PYvR9%7py#6X)_~$doC@5EyR^#uY zBN(3rJFfFzo_zsdee*fKUAh#Xf42N@%;Eeu}m(9U_b zkmu{^v&a|K!4Uhxd5huMU5QgWcdPdpiA)qRV2qASLg?5i3>y{23CReoN1-7>i`fPz zB*x={$rJDi_1Z@tyifi22JXJ?ddeK-u}|h{_p3^=Z_8#C01q268Baa+S4^E0ZhBB!0{q|eue8tNGNnx@>vI(6bGv^V4;CpQ%pwDozkU2Aq9qQB7$ zhaPK>fsG^i*d9Fg=pXRS${l@okMu`NWidW{>m@w>&*za=*rsiX+zo5vjoH*!zNo{A zZQJ|SrZm4w1xz8MhaqZ=Jnk->N@){+lsw!(J6BoJOr3UGz4L6+s315QE&+q@ne#40 z{9&%O(1W(JzKQkNsiG-^nojz1@`!13Q!6^@*UBBH;)gW1(WbE{m*61GUwko69^Qwt zoMJT9OZj#rC6wWKLIG~Nb}?LRtsW*;=$pHVNb;wxxskllh_J|T`Vr)d#s*|$q~Z9f zSS^DL_(ezBb+yvJtS6nRZeqh*}nq(~nYW`m>{>1+`U`i1ZDH1MOyY zZ9S5b({MU13mvA|MeKb~OD%0Gd4zQDBn{f=Yt^&Adak$T({B2CrLgCE%PUnHIi~W` zBE-fqrYfoCc*w_tG5vo7%AtGkSj?F<0acm1vGT_?@EJP~L17W(0IjiwY7TN@w1s>^ z{%ovgtXEir1BXvy;`B)fi3|Z;{4jcCB>e0ukq}K@uC7xyPV{medER`{s|W4PHQ2p% zBc6Wg4S0l)#d{xof{9~BD*bQPSJZ=T&Dg(t6CU~9J^1kB{~)iHdRFC~D}+(o)H}5+ zDhf!?I_jPtOqnqa3n+88?69Zr{Q|v=spzly+9EkQ9;KBH9KXa=?JA~{wHYxg0AB9B zh&vHYS#LoJefmAS4kIJKg6n5-Vpx8=+Yq~JGoJYKU-84{BlMf}?5#lmy{$AKk3aGV zUi$ZcP(v40Eie6b8`l6#pE4ePw&jRDu?J-}q$l|7pSgOtm+g#-sA_a_BA0rg zrH*Ud+)N45?|*Ef?y7HN>}=}_8{bG=IO_sb#2r9hEagXRZf&#rm}WKCdi>SDIv(b_?Otb=w( z{NF~kp7K`l9(k0NHhk9nMR@i3=iuB@j89&F8~gXfpu;$^>FkW~3FE1Y>kxZjCn`(J zQ7>^!eJkU}YJJ{;bMEORE$ewq*%Uo3C&OBr8Ou^%(cNZW^kGJg#0}RhLV%yQ(xDQo z7G-8&|KZ~pH~V6Y95t%%rT{s?Ra20O$A0%ayzuI~5LU1jV=ZG9#ku(Wy;txGeY%8v zP^i~)Mde?7LSrCxJ#>Op9zFX>Z{@}gquk7Sg|%_xNG zm;CJJ=ZUMXxCC)AX;}W4l(Z}z=$&=C4f%<$! zGmWM88mLP<%8(kLgm*t&i@1zR{PK|}aN`Zv!PiZncokY|E+Q>xpFe!_-+1t$M<`$W z(awnph3x)Cs4@mL9}R*YB%auUw_kq>Ha?LUIrSpAvlr<|SI~>nNhjc=S6;*B?YnUC zlH1_vuCKsKIHFJ9aO3}$szMyyzYXctHn`}@J1{ofl?*@v(D|@)azkqDo{ozx zGP3dz8xya-y!62DevRRgzNR**jCwnflMqh^Jpixa6EI=^V)(l;V3ezmHtmp)T%025Ci=n03`nm^5yfVmw>wA3mHTNoAbFC z$%hZ1!94`y=UxKFIAjW-G_M$Y_U*$RzrJ5NgU&WAmzIINvPL@2bokO@k(io+?2=kc z96uSeXU!xXgb8io)GMFdKZNYkS`^l{BQ2+Zq;yj6;&GsXtSZjIzU`Zk!S+tZXHr47 z!_mP5t>eB-7B?aBV7)n^~aWo#$wxIs;c91}|SNTZN+~J$U@7 zr!YRk+u~$Jf9O=}ylxtVa-2MI6e%egT%RmtBxNHuEe|86PQk)UE`X<#=}`sBh9e!M z+*2ozT$G3Mwl<_uHtZZ+;P2}}N2nLakDR1osK<@BETQs{uYz0lC&s%A^<`x^xoaO% z6BBTXTYeRdm#q^@ifiEF>V-fbFSxq9qrSQpDRC#&dm=N_(oj;EiNN7e=<FVT`S3nTln~nlosTmys{kGxjA(FdQntV zin-U^im2gXita|7U+QFIZj@5Bu!5vH6@3JnKI??h0AZe90AVVaBi(cTv`FLC|_l{70At^GwJID7as>) zdf^=SyL(VKG~>k4!)WSpL}76yGN^D*C#0jWstFfgdI^J^VcJ$ZIw@x_C*cJA1H2JI zgQnkoW1t7#UewH7vmHAQaAJ1I%g;e-CLIAf{z1V3@Nlt16CLB@42I>!*fCt2B z_QB1!T&v!JYj$Ea?74oe4dqD4DyE)d&{JA~oYZ*4C1-)Q8<$;rDMEvSSWo1l2SIi% zc;@MUqP)ik|9t5M_`8@5py>VN(@AKcj#+TkWpK8&gNLUVgN|k-oQlTCk;4%YH5|i& zJW-ULgqYJAsIF^8R+c=Dn~4naMP$@4xTqKHYxyZDtV9wWxY=`NA$&xr3|T?XfxmLs z8#i}1>i!EaKu38Ue)#Ge^7?h~4)COY=%j9`#$MXU!N$=8 zp?*H__VJ{=)FGLAucovN`MLSjUx{ezal@VW-cCL24*T9l6l4`)$FXc0_uCcC<n!ys0_`N+aPZbxYM>)7SU!M*-g8gglMi$T3!wOgCdblXH6a#jtmZ# zw`nNH=Xi89a!Tq@MtYq(eiY>lNEY9CGp0@$sd!!mXtYZiamNsS>^OtBS(tJ0CF*@V z^2K(6mWeK|Q5ANrUCUt10kf{Y8R0=bN>BCD!9TWbH)?D=G55M92%;mTSly-vE-o&} zXRwf&7>nz#p3gv_4d1QVhTHDCM}3`q0Bw5P(A7|Z1F`vldn6XmnF#f9UMnzYp>I-6 zS>Ja$mC>;q1Clu8WMt7H%|b8r{LS~?3>VsHxo^P1wh7J*T0Y;nAI0wDasQpSVM2(v zNhUZq2G%>bZo}}|i!oxtIJl~hr@6zSr4j4bt|Ff$UE6n{!D>NWl=c}9z2B0Zn}i_8>5wb3aPSBN$|M{=9gj8}S2VQrQcqSODk=i5PA>59aEF&&8xHN+issf% zWO6)7@yV!Uq;d6aH)A~I(Oy3E5}S$6E@xaYXFBCwR;>X=S*eK0?ZCXn*C5ifopi6l zhAqdrX8y>|NTVN|iIoSnp_6~-Zr`);Sf;ifmjqBHA{PelF=fQ_DCc@w3CkQ>b4@w6uGxfwf)Xsg`EKE9Q z8!;jxu&*t9>TR3W3dE&kBD=g971d41r~m5e>Z0Dq=i_RF{FG>97F8g>x&f&Lxzs!k zsABMT&Em^2Vsr#Ml@YMx&L-p}p2YUUaR?ZHF)m*?1Mcj|as|a+G*#r`;GS*BYou%{ap`pH+zM&1gXe-_5JB5V@B04&b!E*_N#$xrw&OOHxF?_-_+;PVe zxZ1R#qa;`PdZXuGgE5hQDkf+q(PK}UlP{P~zUyIH6%Hq7qmeRDT9Bsh{);`8 zj&l0K4?g;kdY4*9Rf&2sAu*Z!mYhi+xq>|AjM9o~^{AcrY7(leaNX7cVWUT2{74_f zAK!!Il2%NecL_X2ACafr*`OmG>ivvwB#196C`4Lr0jk=1;7|MDLmSgrU5xmXdr8w6 z#3f`Pk-?yot2e5sR~y<}skcHDPe@F)W&Lur_Shkveq-FJcoddZ-Nk44VuGI*W8ztOGcs^0CIO`kWEWq111_2^@13)Qr;iUDDaX;byIwUlMyXnYzspnYkHAN{UA!{Ufho6EJK3We8&ol^b&iCr+G1d}=15PbI?M z(N&$0%3)kJYFre37y9*#lagrn^2-|3$(Y>KI38ypx3nIUCQinznbYCeMf-pJFw$D> zF?sQ|80}*Zbrb4f=b8ozfKnkT1lk~|mZJ>N4E{{6# z$dLo+B)^QCdo7}ZOz)mP1LUHM()+3nc|UYI;#26WCZ!-PJ(v5LjGH?!cfm|d8b6YA zXJkn|<4FIa*%VXNwb`LWd~U{6QDf*|ae(eFj8n@>aVRE5?jb==QX0yrJ3K=p=u`XR zBFdnAI8I&k>uNy8vHjS4x)_mD7vQ4lTJM?gGcFi#vy^W zE;>FAKJ;CR%BoP;)P;zMP&hJ<@Sz^9&8FPMGnQ+j-cCwIB5mccF_W8{qg z8#Q^UShH>$x_w9BwmWaKejlvz`Fc@XmWSy5+fdW%j`0g`#>ikf=VZ#K+*HfB_U%;h zR%H$CA#HCObwzqk`Tvi+zkrkDxYowuXPKEni&|;L%(5lfl1XMecFayMP;-^^O=ja~bB23x%iPb7)sz9{%^c+mM=3k01Qt zIofXe*lbDBMOg_Ajlc=|pjFiAwT#JAsk<*^RAJndX;`{+9{gPTIBy&|4i%^<%12Xe zE&YNFloc1EigSMU(mOC_d=l-K;55;X>!5us!ohRd=remVW-38zTN@%`!pN%*bk^o@ zF6={gg)5$V=}BC>bUx+6`c;296j1-m*UwKKIEuq*wJ>`H;2U=?RqHxK7Ywbncnn-9 z=rwi0(ajCxr!7K^d@hasGC7cOx=B0n_Ksp~Ek)!q#Am&E4G<4$kgKo;DYA z8B~RM>31S(j1Gv5jlrnsQ8;#*0$kgmg!RD(AHtN$W3}+AQu4o&3cN7S-gRUr-=ILc>s2+e!mog*x^%A|x0Iv7;DlIG}=tc;_BYI8Fpl-%x~x zMWV5`3cX!z7#$mp5C*6^jB})OIwL4J0%H>s)k0GV`%%k@BZrTVulm}V2z^g)kGfMD5fMon)`_vB<4NNf zP9sio7Y{7GmJW(z6%HIt!GVlcI%4i#kD_8}MN@q(;$lXrVA0vl70F|hQCeDp(y}tN zceLUAKl&jJLb!S<>tym*ZUqDc24g~UGV+T!Hl<~pGaMTl91kyV&YJ`(e$E3wfAy|} za^{zomLe)58V@pnHq!~3G;M|oKJ|Ey47v|=vE98eapGh+Qy@!=IH#+tD0_{#?)pWD z2oF$mQRFFk!c_xab=A_`72=b%>`E1Bu8;^f-7RFTV)w%}u!V z)|=Fq2xY-^Vtf+kn>)g&gZu+Rkdarye$^@b&p!V=+&$efWy(191u!)>4$d?Xy{OJS ziu|f(gpQl196Xr|8hApQC(oS2@e8BVk-@R9;#k*X=FFKGlMuuH(`eP!W8Z;8C}eQc zM;#KCForez(OgZVO2aMV<3YM&;^N3#4?36mG_=*~tKcub_%gtCB zn-7YLO3_$TfhiLwV##&O=sZVbV)9rzRr1AknS-1^rfz)r@hU92?_0R#w)y&nqT%>7{-ksOT$Tb(b*LN{vjNWe)VP4Na_qiD>ydeCMUx`$WNu(gOt>zoynt_bLOkd=%jdr1XD&ylPr?$+_eYg zoWoAEsiWwSw6-*%oepATXei>M!#M}#1>obUcVq{0uA)cYPx8#O&mbWo%%W$_iXJD< zi$(Kiz@0poR|u&01JdysJ83eayv?Z1JC6ewOHtV2j-J+9v{aX&fsW#d-5Zcu(uQ%< z7bDo)2{l#K=RvV{%=x%4mqYDQ(JFYdVU8YIV&r_32z))sWG zQO4cZ7s=x$K^`^E$zdOB>o`8nm_B6+V#3HPmMNvPc;e(~6cpFc|M0`OiBr_B?0V|& z5PxsAd&0rd5y4?0l$!*cIhTq$2CVLsqZ@9z8S~~&Rl413cN}Ote2B{(OK!Xaaib#O zL4uSmRdYfguJTwo=YC{D5*94JjsdRL#r@qq4CdMq&9NCbajaV4&~t^gsK*0o$7DC) z^jS01j@AxSKd!rWAwo&3(QhmCv+oY*GBcp33`|UlQj4EMz||EKCrzhbs76g?g}Pa? zbjkIc6W77Z&mFU7vyW6xYOMNO;o4A$Lz#65nYvDYIxV z#hDb<M-_P_1?J{bWHEl53jjJ`k(%+z0vbrsYzMW`>~JUMg})g6A+@A6TkKIP+tMU2Aj zcYhP1A$s9;=$z0u6a4%FaQMh6R5Y~Uk?%c)h)`SGCf3)!mT{$iC7U)PDk2J}&YnY7 zULLAA=Np^b@!Yd7s2jJo?K&{%2&2#E&7e-ksDV1GtGxv=kzqRM^7Te^bhO%yP+VG0 zJr{+C?th2@yfdb;Z$S*g2avH{PugsE|4>}F>?Vw5u&n%Im1&`y|2^CpWYca>o_!5{ zp{WR91f=JcP{y&DH~}X4YHLO|Wrg#U|0;4n!__QCm}^0-!kh-UQ=~?8}1$6iH zAS{GBk+jvZT_0~B+DfsJF7OEq!q~ABaN+y~R8*9srcNF?9E%rTct$O<%K}7ipBddQ zUPzui6%m2nYRu%_XYzn>9Bpb0;}JLdS4m0ZP~X;#O8TLV)fJdGdk${C;k~~bFNEo2$0({DR-CIuPU#cC&RT;h15G}A9CSG(=U zCyk~~aX}RIOLWvI9A`{bN7|gJs~&jhAxxf>q=5|k1io(!9*ljX^da{8H~H#(_XZo2h0EW44qRQbuo=jen9 zQ>IgngOHY=iwfGGI*#*f+T5{87GK80lVhHYCeDL$2I^hi?U*-j4*irx2#t=Vj!J?r zjp6xoXK{u)q`JPDaABim#c@RH4AS4`5zxYFVd25?xjO_r(djeV5+}nq=tV}rjhTyuJ z?m#pHm`nDRV3h&VPaWj%7l^`=3Y<;3Kwa6OdbR9^>v6;N3-n?kuT>4kR7pu=QPgrn1+|-2YXnXIu;}%s$=_|qMkO3DsguXUnRNO1`C$ylGneshF=MZ~ zh6aqAFbQ|ueK+Tc58LaP*RR6l=!wZw#v?M=O~*ZynHJi`TM!T$L4TNb(A^1j)s@r- zBKXpulP5u!Ex(KQJzm)^J8@vUZj5O?y!;TKJRP&=%mdw9tE@9$d>Gla*V1X?IM<;; z%`e*Drbfm9)77{J1qCCVabp4Pk>mvhh2Z8}??i0kBqYSgAu-&KI7x^@JM};>)^9)0 z_U7Zhdv8FHx3k5z2@g4M0z)vCe$bgyrzivT$y-`6Y4Q~6(7*NUl9t5^Y1}aID zJ0kL~GsO~>W7m*TPobn4wq)9uv4A`i&hql1}Dqy)%Km5ai z5L}X{^LZt-$N(7_2FgmVPSfQj!>BFgHK^hi(50n}q+81f=sbdfU&^sv{iv5MMuMg9 zA~Z&es=lQ$%{S?_&NbT7H0qqepvqeMIym2`ryG!CxH7gNWn`R1WHnewvVinWE;MbD zH;_fpxGX8^hkq0=tmAA2qnwnHUg%u;Yo(hYr_8qnBUh#J!cN{)rqC&;LL-$^^I81i z)zvuEkc_wf@ngu|AnBn(Z8@wUyqEnj%5AgBlXSUes;L!aXZGO7e|R0!?*DH*{?IbW ztE2U($|1eAj=Unt0)kIt(j%eqTUwHi$d5`h@&uS-gG9fpNrOJK)VG`o7$vONma~Bb z8D}jIMp^xVP&?`BzXhkXAvwBFw$x}!q?DF6 z0T~x5q01Q9jWPp$9N@jqksqs!$OSQ5=8Fv(Mrmsq0de|Nng&a8%eCdIy86=163?oZSoXKeKr6V6+0{#Wz{Zw(roZaK!4ar`or-iV;!1G@WG#cg7p`h z@!0QI;l@b;@Drz?rvxAW^Dp@22Wfb1)jlkqDmy83J%Qo7$Z_a1(fKGt<;A1;(~9FT z1R;PbYl_e%Al$P`ptYt^##UyA!({=B4Ho}H@&)Y6 zSW*=)MPBtBQ~Aa`ww+I^n5B-S*j(Ffd84uNpQW9F zG@kTzR3L5J$N2lEbWFYP7kKoxF$mNw(_qtg0~G%h5}sKI&~w6`A}J!r5IR*|{@5hi z<=X-As`)J+deKg+wS60~CWh-~dAOB9r02Yc z@sIcZf?(#l_BW%qCJnFr=_Bk&cgE_EUPp46ppd!(2JdZK7Lfj!&?JkFRn>UqfB%U- z7f<}@AOBjK=tDDQoYu zO1@E8>RI?`(erxj3>Z|_26Wk0praiB{?~eB=5*sHKYJcA2_d=wL23%3@NEfTguw$ccXy5vIA?1rGmPa z_T|i$uY!k%1&;<@TMkLlsn+@$tUAqr!MKL!r+_8JqHiwkGsutp5}%C$*KMg23)s}O zq+7EsWd(@CXv+ZITc#@C#8QJlI$f&jGbO7^r&wwatB3@pxF1C#c9WZr0;no?;?4xaUzaX#fR-gx70n0oD9xbuPUAlO~@9ZG%+-I>K|wak!k_ z)cg|rEnr{Hl5e9Q2g`#zN%PhRuVU?nby)M+4)}N&JN2z#hfl#sDV8hwu@o`dvX--? z3?vwZt>v`L4c0?CO`WO!s9y6g!Q$6jQ1F_$Hgj#jURnwmI0fWfz_`{RsXWvi<1vtF zNoRueVptq1+bAb6&|^Co=nwm1si~?+lM039&|oRc6p1SY?DGw<;4Ns;}2^_q{yx?CLCeGET;Z7+WL+uz{+o2DqW zWRax7FtkCPCROyyGVPgdfTZw{$*KPjQ^o=^uoQU7AB;1NT0RbKw0C;W^8ng!Jx}& zC8z1K@IMDsg`2e^bXV;0HMcTK4tQX0deT- zKy_IGit`z8@5z8q>{Q&bRPSukJ?%z+kKCcqJL-%>zG!Rw%5J>EvTFU#24b}vO`9Lf zsRekY_^A&gUk(lSXZlKdYV=mXl4HSP>p0ohv0hnE^@)t590On}W(xyV)L$zH^*;gr zrQr~M%6m|1%1TgCT!M1NBYVCTlvN*S%8(svngMcV&lh6>V)!-$n5y`x z9}1DuQckhO4lL#PtMV;r`kb?+zYFHh2Go=nASWXYA8kK?1$RA&+0(`k>{uKCD*~I- zkE)T&pyNSNN=LoM=6F zPj7g;xuUDC0#&6IC@m{Ter_cSYI~44ehmC%MTymv99s}seg=UFy&ZK(y^w`Z_GQ99 zGy;#_d#!rhV)*ix0x50q+*;E%RfC*KKjpC6L;IRW8C#jrzxKE+AoGDdlCd4h#ZreV zH^{kE{FM|N+yj3srEIQ^R5`3O$^gNjm$&WOumh$lI&|N4IbBl5R$CAg`Z`-tS5k=V z)Iw}cYeD>k>9}F8-W_BI1AP}f`cQu@@@P#nD6&#*q@s3ihcMtOG^lMADpfk_zx05EK#$ANmE2#TAy_?^VdmuS0FK31h~@!J7u% z+H1kC&oo+tCvp&9zX@j#9>u25zJPB)7+!e!WrX>As*$zUyb@r!f&SYb2H3VM>Gm>j zUt0J)IA8vY;oay@du$DFs_16ZMPP674*8^8-oTcpGwgiN7+j4?WjjE-b zr0e{_V{BYnQ!QnA*kI*hJDTj1Dx)v0>AD}9N`j)-rjT(cFt7`M>|-k9d>~D=rTM5! z4-cxW{8hTlae(_eoiTu^7ELuJ$V*Sh-Xp2-i@yeQ=PyCfz^+^?DDEkq*@L85Q-u=U zf^E8SuCL{<=7Z*;y_9J_n9<(Sh*O6S;@F8Zh@UhYx7~3&BK-90X6(S=`>=RWk8Qphaf1go8OEvQQiGbWS7JA%+~8Q>082i9C0*y)%7Z0dv^fm0ma&D) zrC6?P#@WcX9+or3cO%sb`bzQ!>mi*cJT?HPKv}=iwP(XK8$G7L(!RB>=79m$GktD* ztwB-*FMsVaE$RHVO_uY4Yjt9jvz8rvsK1u6830M*kCko%EM?hu-DBI4vIR>y{u-FA zT#+lu7wmF2B**4VUs=zsdRyMl*uXhokX?w+cb!0D@@ULoFaaJ6Ol)fozOn@)N9S7k zWCfmE#?YE7X}VW~CDvL}&lh9Zbh<5wAK8ui>Pl?daumBxX5l-J-;3qfO&OqI=p)Gk zFg(hugUi=9hQ%m3xK+C?@K+5QyZ&o0#0n(itDwIG4DQ!gf+2Lug*D+n)P>>U5_EhO z7>w!5U>%v~OzH1ttlNZ(S!HN;jKH!*Gc4~K8M&o_oB{sJYJV?!8JKNZzZPdwbC6oy zg9jdY9Mi@`s>j|`F}Bs+Sc+rEPUG- z^^+I=5`(_YYzOwZhl9(P_%blSIXW<|zQKqM3PE|wahTja5It!MJe`f#Ef9v$XI9P| zc_T~m{~fTWV>qe|iUqEUYN>d|6{Ika{vNbem0x8u@4oahA_LrqK8kR~ zAVge>kgMv||9Q9s9~~U};O^3o;;Lp87FVIVtQ0x&ZrRjKv~)XR>20@T>eSKtoy3>3 zx8!|)SBrX2V5>s_mfvy{=1fV@kF{Eg3;`S3qL6dtVmfv<0CMJZa9d0AGbwxZhTGMc8TvpEh zzYC<%xgYXU@iS-B(B0mLoSbypnk=Mc7Qw;YAGh9hJI0L;Q~JcJmkp3U@=!Z0_{;ev zi;&rzPu<=9c<9NeFeW*{W@n*+LH_cP>Vr{U6$kG@9YbIwT$b`*f^N%(4F&4v zU|$z9v&+!cE9Lf;=2s99Kw*4M+@7hEvB* z;=ul+sOfOTyc_SujERZr6&4zw1n9kNyC1u@AI79fQ!#PIxXUyqnKlKkxbFr?g?@B( zw4k&gAHKoSxc>J0kQ5hT(O3WXU`RKvsBf1BK_T$v(1_mdM^0WAs;a86eED6p8S*-+ zuS^y}Fj%qJU)jRQIhO-#%@J^X4gCFo7cSp1LpB8LOOL>>D**Yp=c&W{u;=he)U@>A z&U^32xN$LX=bRsIZjIEEPP+shS2T84EB!3r--VW@CY;T##DwW{@U2JgVf?CT7c7@p zFuXFjJl!Mbjyzz@qY}N6#lkD%o27&{(>A3CPOl8h)xy_?@#`n;|0rB3cP}Lmmj~^b z3k+LVWXZuDPHusSjY-7VQQ_7FzRSTtAvuwE9hi}oQvfGdPb|CfMl6^$-aaVfNk^v% z71fPsZSR7Mvy1g`(TB6MGhAJqF>XvE{JcEm6+(7!rSgB}a4B3y-pFOXz8PYSvS8DP ziTsook*jUuu2vom@XE;cezMWSK2kua`D zN;k&FNEr@?hLLjy(rYY+l^n>m!Ok>E;vNR@jf=!`N0 zWiMB0Fr8OU_b`oJ8m<)Wr9dr&epz~lhtAGklrsh}nH{lk(PGS*F>zwWAGIMk@s~&C zpklSC$QX=G8V46gy_;;vTEhPaUMxU?5rYFAem$}6hT z!#Tg~hFdUo$`p9I>fOgzhxam+ToRdN+7`IdmZh8{d;@|od-fbmNFHr@kEa#G2=6ew zG8j>WRA1$f(di*22I>k7%{4%FAju&QqUk+Ywo2{FaM*%T!?v_RCovC~eJPOgS6cS} zHe7Arz9ul56}VK>UyZ!0Y2_=ZdfSSZC3sMQ5gHwu%oNKzQOp);|1ySrL>PgNOTnO? z427%0XRuNr7i)ckT$FDbA!hF>4p^m9b)_PrGN&(a}a(}HdCm4a;@gZ!_b=n>(|G-=ck zxH?j;t&NOBa^#TI;jmkATq%s))|G%k4{Zm32G+Cw{{^J$U#7z&)-(7ing0hl`DOd@ zr9m1L7_QMP<-w(J*uq!o+?BR*C2%QRl4sCq$^R;3UjbaYXUtXa=yKGXS_zj9ho!94 zlKriAz4#I!R#G0#ly5d$?d5RztbLF(+m;7k|Gx${^eMmO>!R1dp+R2_#;vc_qobCT z%YoDoP-P8Nc3_kcVCIktwv{aTLvRoV(s`nm33xCrF}yMX8!Q?I>ih@5XjI@o)yA-3 z>APi&tb|-45m&>Lt0{YxaA~TIT!G+u%!PmoseuK`)#%rWzuLPuS4;pIaYsJm*j~b`GnlyM<*p<;~z)195 z9Tt1Ix;}oDmIi}%-e!}UAYqY3dGpK@RWVU?%IGoy(3Zo<=VRxHe2|5dSVau6kiGb zwp4w66ph4#|68DKgn*uLJ?QK0M?qdCV&mfA>OTB(xRrm_v!U=M`r6-t&Za7~^mxKG zAPnBF4r(XSP#BDRq_&2m?@H;?<*r7HUmXa3gT70(aHaHK34AqN2DKx>i5 zKa48@dt4)9HzePhYn<8R9t+@C-yuY&hJ8H=6|3pbqHk=|+B8gL=noptqa)%ITK^ zb30lat5M(W4$qKC_|sV0+Dij%V{r!C2Uwy~QWck$~d2%Jc)^dYEn!7|N23H({s1GL(Y{&0^^9s&f%o2Ba zNRi94h&B4UQIwsI>e@#3LVtJ09?bq!>9hI;YztXVz~i+O3!pJZ25Why|UgR)BH0I zD|E@@T{#7Gf?9`_yDuR!126=a0!d}B%qCP66kzSApP`_vW}uG-7_=~B2UdT$0%uN~LpKHd z(t7>t;*QKD6S~@3aq8ef>^pD(^=-XF=GX8rk|avH<*+Ma-{X-%mm4T~sb>D;VCl!# z0$&c7QR7lIzl=0|nL5VU7+{OTp1wIN zy^t>fcBIOMaah1E|MGC9#fL3-smfPL#}K##!~4~iE?vNA#=beZ8brV_Ndqu~FJlzf z9-Y-I834?d08K5=IiM5$k5wMKd!0_<&|ZmNsp&37}tZ-hXu+@LvcR-^84T8 z-4DJ%XV2y8^`N7^60g1S5iVrc!lXs*O8{FjcOvV=UabA%3lui@DXFtI&Xa!H)b>_< zv3(bI?mvjWfjldJjiiArJCIb4nwl5-YIqrzOiMX^WjAjO7+P-x7*cTXc=KA?GRs`$ zjFaXqIbR)Y+p_Y>ntF+rELpY~G)RWZ2V#mTH5tkC>X%~RY7oyl=Ug?HryC%kX%j#Rbrs$UmqC*AQD?eLI&T!S9|lpNuV{9Z9|&h)f4pdcq3tz9Myw*&;Yaa;)ul9x+XyBfU1 zfNfR7UJ|6=D$5SFq>9W-IiB*c{MZ+}Lb%+l&^VW7M7r_vIZ69)u`aFo3{^~-CWhG zl?;XE0^nQjAHARd=`SiVQ(82t?s`*5i%e{1T0B!5EdC zIPCixGzMF?4Tsa(mi)CZVO+5c+w5+xM|Nf=O3LZXRMeopy$c>59&mSYMmNt(3W{NN za)h&oyQ=9Z1KHDttn@TEdHBM`-3@gWImperNT;KgJSX2Ps?pNX33op~I63MsjT+%* zZ+AP23bK)3R0RijZ+LKAoa_lC3`rP(pBv*+OMrDL#DTeRxNNBOp3LJrJ)SiXRMD+Ba*t;alXiHFuz~x)vX= z-iFMAN?dp2^$76svU~r*(7MB8vxVU)v`^JnHn@gVxYB-J4H$Qwjo+a#H1$g8F!BfY z(Fiv#S6@Kt4u;Fu`_FJ`IGpzM4eiCno*Ge(c`66Oof< z!8ge767SEuk~XdRQrZB!TK(NHcQxVM=`^g{d<;IpVVF2&9K2keEqZh$kR01w<9tvb z)v0ak0@kv+v~fKcEVzbb50`GVF%m8VbUEwHGRj+U4gpD%!@w^vTsbS~`82r9<*r8p zdt4(?d3B(jqv5Euh2h(><%5R7oV11Ecp+CKr`YFcxKytP)1%8-Yv?n1w5_$a5@+^) zj>5V+_>Y|g9}idcF2kW<;2fdNfn?#MTRQNbZFd}Z5L!0A)laUT3@0l6HMq=9YGmuau7LQZxz{lY39)oAMKg%^E8 zH)lsQHr1iLq7?2Po^Wz@(WqFhyS*LhX&LYi3?fZtRF`KUH#d`IrM&jZJ4`0H`}oo) zvVCW6KTKV1$j#0~VOb4az5L-WI&WC|t_Cj6Hv=pqWCO$1v);hH^h`Bs$rxyGFtwrd zz*cP7eH6|KGZE@PlB`=mDyqtZDYHEv8Fa1T`d|;ut&P~cZ5=XlvoUenOpb@{nNh?3 z*Gh|#ZUy$0@kC=)Db{S*3J33SEL(aFJZLb7mK7kb{!X-3m*dy}ScUO3uS0TT@W75l z+xHq;TQ&M)3l{8sjX1k^6LzInB7WYD80G5(4`xZOL%#`h%yDGz9=x?~C#KC`j7dqc zD(_T2vE=n1zmseJhPgN*ptnTFI4sTC)RodF3eJe&2 zR-mr#Z$?{fDPH;Whp6rG#`Q}VBQb6i;$otZ7#jsQ7t3cO2P$eTLva}}G)JziDLo8k zo75Pm7gwX-(Mi4I-5z5<+FPnnlz$P$#bqd~U_8k2arN@y_*~^$!~oin=oTLIP(E96 zAvFzUjKj;z>rmg&4F3?(#k!Uu+XmR0iKpM6r+UkJ2E~EgYri#YTL)* z3&}d@;OQtZ(853-Qz*}M6@|#jFGmO5;N|I|cT1}>QiC#Y>OvOzRaPW?szXhEBLYH# zl^|Gkhyeqwd@0bES2X-0l}2I`vH(C_IBH&^>Fj2kUzsjzQzSbbzE^Hr%E5qi66 zLyC}-L)}1sp_1}cUO}Fk`r+c`3l|bUFmDu^;VBtdFdUp^dtF;xh>zZU9eWNPMRrjs zveVMAf6qR2Ik+PtHXdGX!YpY(n8{ytJ4QFpd8|?epT;SZf`?)l51pv02?qU+4w-o8b}PM_^|A;<1=QVy*MIkWxF$@)lsQw? zBc6i8LanOpAZ5h>s`f=Jv}ou&0l_Z!3UyC)u9^SjU30pG!a)IVsISH01BbD0;}^)v z&c(%y98}l0AUG%(fdPJ~DXzeWZ@dTB0AGa1$EXENC#l<5iC_Nre<3t!Izpqvv18*3 zy!Y;#bSmqSo}P=e^XbUS&4Y`#H~f8l;qK;Yp|1~B3pV)8XMJfYjn93y0FBv;IBZn1Y{&A2oa$;%ggfK8@V5*V{ zF=57gSEK}6?(af#V*`%tIfRX$Z$?IT4$ho8hw_pN1cimeE6`s#zgi|VrGi5!5JdL# z(sE&G%Xo0YZu~@>F}pgyjQm`QF*B!N)R+YQ@^~Rr7qs{r(w@kv)YKH%H8%jlCpq+c zV5H2Da%SfAv|{tR&Def44G(_%+n78lNhwENUK*6za;|F&sWKGO6Wt3EF7r+iDeYPK zsmfTA396q1W2eX0dL_6dMUd-0!9jBSh}}tAP@w6NE5W9(bZIFoJh8mfOLET~I;h<)V0-df-Ex1*R#%1AwTnUX@{%nFNoplK? z3h-Cmnc=Uc4a=1U7q1ju))ZBRRV782mV;vN!C(`{Km^-bv4sYdH|W$gMWJsQjd3#c)&P^{j=DK(Ap zmA{lQR!E8pz^OB*aXh08Q>IUWKb={@V{K1B@K|NZo*n_qmCPM7)@*6dC}&sJ+>N@N zG^|>`6Ma6hxb=>k5ET}nbgypLzSnAJkwpfL9;*|TFDF8meto$VwYDpHQdW4S`z}0} zU)_!+Uowrc5hs~#i4M_i$suKASCEzg0bN&kVq=Hok5U9Y1c&}%*&dbD%ef*~YK(*? z1KYsZNG|^=Jj_#k)N@kTu^jSaAy@xMuSBKw8wK##KlmUF>jBwJ`zr#u`~yP7Ts z^^PP>rIgU6Xkrg0e{ba8A|CCyK3{|t~6#F)59c(^!Q$`~NEbRR{QR8N^?85S!TV?&v+wcjF#MxTWS z0Xy6>C;4m4No5D*OxY3v-AALY=7lle1(Q{#YC17GYsg2pR15(|^S`(D{zClY?N4kH#g^&rzVy zRCH(;ww_8yktJCZ;S{gvfA?FVR`) zm3CQ9DMlXjnL8Wm@xi8Jn7#NmOi1*Ho0@>+nT`D(+?EExC38@Kjr27m=j>6Ot9L~5 zwYMS82k=y7rJp8L<)z~ffBX|BFS-?X+;u1X=qOtQXDP#0*rw4pW8SEd61^QC?KvhA1_mMC%z_85FRV?1!nN9c$iu2cK=(gR16kB#%vkC&xiR;Z+qaytiPH zo=S> z#TvaHGilq9FKI*bTg!-r9`#pR(bo=~4;96w*tBLH&YV7r-8=W<(2*l3EH6VHTH7aGz3r&YO2PZ9)?w-m-#~bn@4zdrq)jQ?-`9--WkUZbgT4 zB*rJj!b9B=l6rmU>T1E+Q^&D}{mabELwZ^c_U%7{(CAo1go~ZCr9@hg5!PchJe3-+ z&}M5hBw5oC`fT{w?_{`sHCEUJb}NFrjR{M83SR9*TN{f6ITwAt@0CQ)G*CrkHM z_gf_yI+roG;mZi1xo>v9m;8=tPdc(y@^F(18&<)apZd0zV=qt&!#Wh&J`U}KNnu29i=Cj5} zpsgYo?`_zJ_=yXVI4Xe55|k$Nb$21{%sCY1m!hP!3eD}E@C~BlLrRn&P%)ajTalMv zNQWs8Wu?U^Eh#}!Ni|B!YvCIh09RKB)K=yq{lZz)Q&Gd&55eJ4@Njcc0iN!ZKqtD} zDv+0#kF1;`6qJ>trK=Y{US8^MZCi58=cM7$jQz@ZhkQxu}XBC9pUZct%a_?6CD)b^z34Fm$tUN2-%qzkWacBTH6s6 z8VqOYTT>Y}ez6U&?Ky|>v*zRWrHe6T{8+?=_`%iP6&`c~8_Vmk{^L(EYQk8=^AP!W zlEH1Nz#o728%(+WZbXlc#f}}Dk)5B9?>_r|Oqnze$#Eg5t}4Zzz5C$h6M*Qrc*`A? z4jegp1gV+XiYun>4vZf^4qo0~idR;CjR2MaPOB`ro85%`yfpG6kBYGZ9ld7dh`P!u zG!4F}fPesI4eQX8J`6Z#Wzn+=#3=uwUpZ$KhjV@cO2cm^foDZeF?wqXJx!e|kF# z8`=;sVHW({ou~vwto0qX3mJK6Xzoypvrg=n6{uAEOlYjF!1?n+cLm3}3hj)0{r!De zwi{J+y7MlkBP%r>2aX;^U}PwbO+PA1s!&l@gTSy5IFjii&njSMU{+F6hTM{R`1uD> z?uOxZe>>V6%aEB`d_T5Lbd_Ovt*JLwR;WcUPljY^qUN+el*?CPS?Gs^(w|aTOyy zGZ%TBHtBA(ejRv!vM)EB3IL`GgG+H6S#A8NFLj@g@EcCnk z!JT8OXzlM%b27J}8cjV;@N;uVd43LMvJ>SMWt8U*;A{QPt85f0;Y1>u5Hi zqqz#FaulRDqJ}T69rny}gY)Y{s9FKxZ~KHK8a!7a$(!;|&ON-xsU=6tScA_8aXHA6SBtf>QqIT^^w$wLu&Ra{;LFW&&TQ5GC2 z`EKRh;wgA~VFVY2rcWqpC2Mx;+PlCFR6bO38+czX= z&~8PcOGt0WiLKjlhWh)u2Oq`MiE(PDgq~k4L%r55cabloqM)z@rNxyftEfguWCVRd zam`HVY$usSdkmJQM?B|%2J1x@KURg~&d?<5OWfzf?SA^Dn548~CB6WMaQC7sUVOx5ftKs484|jJx zXM~TLbcDJbD7TiK&FB-|T!SL&9pSAWBQtqXkAmC_4EBTvRkXV%xQLD50I7Ov6b*~Q zGwYxd6cb?THKDq?63zA1Fw>z)PfzE(FGW*(7o0i2<%XA{i*vb$j$!J>G!zsR)48qS z*wm=8apKrGnmf@%S2ic{D`-C01M=8{*H1F|l2N%UiXUkA#l58K;%(9_+Fyqq(f%T=hN zF7osDhqJSV3!)p?-np}&cH%*vN> zm2LD2%;;)w!RgZ{$@4OfUnOUV6MQIpvK(k&bEKgT4h>?NTBM#hhq%~L2#uotmpmTY z+;iTb6?OC{&ZQT_FDRV+*7MH}%#`K23S{OMp^3K3hjOO%XB*lYXe&z_(dXt%C*HVe z%vNL}uD6qNnSvf?u}_|8tgJ>+K`wm~N6HL+tlRy`d{XIYXj$dRxa&F z3CE@aO`Tl`@T0CLVX{EfZx(q^ho_q>nwzL2FQjptYSF@S;%7S14|SxTDle-;OLHfB zO>M}}JGvo>8ljvW-8q%<6K?e4F5pt5E&beiMgwR={2YpKeVSC z1=(jgr>T=E4<58*ZUZ-MF9Vd(9{RMaHg3bpv&EP(e<5yOv;dRo|3wCHj6K}p&VHt* zp2hwHJ27v;A_UVHlJRh0Bv4yaieLZskGSv2@4>~(9lLgZj#~ETvF|)X-!K`Yqy5>> zY#cp)0)Zi69Pem23omE~wllEK=lm5SWv9&a>C@?B8w-lVz~ICY3k}A=ai_ekRK)=JC@?Dsl!7(a>tEUfqXy#g5s*qV& z0Y8pK=GikS;=CxP4R>~zol>6Y>*+*AX%4b4o=58GGdRuh^$3rlt5r`Qt{81See~0P zsrMU^nRXgxCk8~EOUgf$V5m>zA{V8N9h80jrS~DSPq0X|0?}p?ezi5%(s!YcLK`l2 zu-VkdxP|(PdZC;1^Mdr9GJfpjF*G%|BP=wM{s8SsbpxE7IA1C6eO=wOxw+`+Yejn( z1JJB&+MQAqSJWcFn2Vf8y`9ZCpK=-{^f`K+DIZzq1>b0_x zLfZ0tY}%WOYnK@dhT1N5wl(9_nG`iP1(ov38%G2%CekA*f4kAuT8Ui7R5_gU6(xB% zec}XiJ3TRN@p2>#EEx8n&(w`~-dlyr#!md;M?XSXfT0@%M5l4S(l08buDVG7ERStg zG`FcmB-vylwydf&8*A3@MQw9G?OGQ)*GoC zMkA!Z5%0bEI-H}YVC=+1y1c`UMdL7{7<7>0XA`2aDD5os1-kgeW zeeVZ|34>Y~(HvqqU63HWr|h)U;P8p#`25Im%DFen6e9Ud_*tc^#HXl6%ztDK>`r;7A#V26?oGGM7H)N(CaGWyo>9(U7HRT2@ zUAz#9(IGf+@FeX(9VSnmL_1Btm1Wg}Xm=|vp1+8yjy}$F+GU~EqCLZC5x%E6A&L8sba#h@uOJ1nqyOps%8@c0?0qr z24#$6Q&3ca+WHpwx&!r9d1!8HMI-&l3yiyDkvQA<)HDl}gHAtbp@}N)Y5f}nz3a{d! zFv$*vrdYwqv-*7kLu#nCA~SuoCY(MczDzkvsEaxzF7c(FW^PYMEizK};^>hBjL&;; zBE1?;{z+IcD_PkGB~R4FsTWf4mskIZgo$&o?B<&=WkLvwbJDP7`)SOZe=Yp|#9!0U zOCPhWC>@nG&2aJ&8!tZfU@)VNe#ONLXW`^Yy+B>1_#jAls5ObUGVDKi7<(=j!8c+o zIF&JLS`s`28Rey~i}7e)DLTzg>_)-F@Iqe=lX{dR(NxcFq0Y!{RyP5fkW0J{>_tRTri*{_^z}t0Fe8 z7tIY-I45?;GB)a|Rf%`6WWlhhvI&QGAH$?;7husXOOY7qg;AbP_}hm&5Ib%T=1hsF zLF$8vlljEqgZOmCTIGx#6qR61)u!TD1u_>^U*%w2}?a1J2p(E>n$Yi~lz(S2CEVKYvgOQ&<6 zjRuZhWO%q*0Fp($&1+U6^Z0R`Nl8U*pF1k(AmpT`s6Z!tR3c&`WU)f;VmrL|2)1tD zjWg%Yp(3X6cZEMhuzqAcSqe*U|EqqENw>E}#IZ_&JBxCo zleFT44>0?-M-V+a1}9G(MoUXGo_g+O#8Mbys2D^2ym8^w31rgfjG1#Cg1x9pyDG7A z-Bx%8Ct~URxj27pKawU+ML>9%GHxP3mjW@Qil+>|Wo^PYU?&5 zB03g+UJT%RyYR~&{ttz*nSxu4ZQHjqSkJ(L!zU1(JPDzpRLZ?gICAhfR;<{9_KrrJ zIB@{SPn^cdW2a$aKo=gD1W#9EvWg)wVcmOg;xB*v3nndGis0}NmFH+~M_*MoKKg7c zdP1k-iElrQNr~Y|4EIHKK{~da$btXpYmgl6!{A?3R0kTX%J8F~{}m@va8u z)WQh+*WOBlbA$Ojim=f2N3X#HsD0qGs?-`QE&B2j}1k&YKJ> z$W(Oom>GnRg13hwdfIC7zyJGh2J|hcD9^**J=@jYJh^+5Flics0)0`Mb^^cu<6qHF zMLsq;M$zj)r=T?D7+!sU9a>yQVM1gmHoW~R@=8jOl97eAYd(j!n+LMePvDEKTd-ko z3U0dj7Wg{$qQ0_(?Hs_S&D-c;W-!RUKtotW;~7HP4u*SQH;%4;54%ohA~<<2f_-UR z*=FvAEd1`5zrzd$V7@fiF6cx{HHo1_O>BM|Z@J3McP%kav0L$3^O) zzyIkE2=WWTwrx9c=FAD4JbMn+O?^lnJDPzjT?+=W4K)>NA%DlNy=rWZkx#X)ors7_ zL=YXhrivo`_<#NeXSXn%-n|E(e)ukR#zbt`u^XGW>_sUJ!8O;+;XwDIlFI*!FScOo z-h*l(>0)XsdZ-@4qT?yg;$Zfv#n3Gq)?mk;LpYa~trm;=C`L(%@eDScD4XpJZu9W_ z-@V3O^f7=tf+LjsvnNwf)6j;<(Gxh%&U8jis4gzTzHO9+!^d!*&c&hqhe$?0q7#VA z-Bq>MZ|Y%imWIurZ^8CGq>H#q%c~IJAIN|xm<~4^XYf;4P=sw8wqYL)YH9}gaOgDW zu`8nE8NkW{B)O`LFAZBD1LWGgTq|(Fur>-P(?HzzPcS&)@zXUGn%81Kj=l4&c<`Qz)%% zLim^oYG;?+G^(WYeS~ATg=2e`^Xek?%z6F>GjI$Iqr*V?EH5g;|Ge^7wRn+!@f7y& z-;b2jDJZLHKzPywcu==&TlG4&9=d>#G4rWcT*({ZaXYqr@(EsBxdYc;e;YhIn(>q8 zzJ~zXhmF+P2X-G-y5q>9%~-c>KT4Y1FmGxy+`1a*xMbpsExT~=&`~w#PaHnUz}o>K z(P8kAn+~+!zy1D;X!nf3m`P*RSfJB{{ag291>3ylmYd-2Y$gfSv=PVg*~V?ulc~g& zNttRzDD8ZZuNUq7(AhPRVtA5F=xA-guC42^eajZKwVJSR-(Ci@l)=>XvWA*CKapZ6ss%wgIgnr1`)91sdF&R@(xdR~Rv?GecS36B+@`Nx@ovb))Vx@IR#PEHs-V>V>!h&5F6PlBPo zMy&tvJ^as~Kf=^`H;~^9P9@70dfJhBU>jcf8IfpJ07Fr;~3;^#=l>synFj%|KTGzdGaKVpT2;~#%_#G z7zIDl+tJ=Y-|RGfihVeK>Kx}x3eKK8N4bhea2SIa2Zqqyo%r$7KSW=*30e7B*iCzQ z{^WTEVD#t4PC!V27o585v2NWq?Aeooy803vJ+=>Li0>%n-6x21FglvfJso4tu``_O z8`o~aN%A~3H4PW&Cwh>^kWtZaqfYCo&Bcc+*JI26bhYE*#6kLpXHqEt$KdGckC51S z&WYgyk3k?eWlkQzzdrdK<8FNf58iVtCeb&H65o)%v9}+6LpN9CV-bv9>o*|nTnc@<)2MB1M&kI1v<*Etcl;3cY*>fPloX_uH=v^13FQU($Vxqj zUiz7_@rmf>`2FRV|AiLEAjD0aq~?IwGjj|5o8SEjS+(7mJ%8qauVvduLsbdr5V#Ya z%{ACb-+ja9J8>a3ow^|t)y*9ko0I@wcW3&~HCXZf3dS2}a3TF7bwfRxn%d~k8CgSN1!N_PnUL98qzj;pgi~Ux0yPQ$0>oKW^E!8z*JqljCsl{Ao0MMPb&m zJ1q-_ePm8MYRgLSB7K^tzyAtm(#D9rQ2JBxst?_r&B(ua78~fxY-enho?C#*+Ij>B z1|l>t2+mGDxN!0qTzsQ3XZ~VzRdcQ$+KtCvd>(EL)D=E+7dlHa@b>DB2%C8`9{T2; zm=GI)M8+9Ksb_I0qY&X!mmns@onuHDG?Ukie}4Gef1uAJ6qCk{;XI~3qS6~!G^CGe zs?NdR{`nTx@5{vW8FLWouNM~d(91$mkFpz8bsf0(w(F32{s?@d66rT3!kvYjDeIN_ znRw>ur{ED1O4&TGo+>+i>LUHM5+u^c4-E1{V`UjWedjeCr#zH3G~y6--`TTg>F1}= zS7l5W6+-%e#>R5`5{DRf?ZSn$OvcVaTX-h&(2Z8c&<7bmuVg%K_V&XybLSz<-=8I9V?~>K+U|=DTdDt#;ljmC+PFf_ zcgDw&jKMiBy^W3df-%d@-@KQ&Be8$mQB0dO0dwb0CJvFQel%2-VEtz+;So9-cRutz zOdT7E#3+CIu+><tldo6;APstALYIj8eo`3EooGIzRf+g3&iw00=wwdNIHCAH7 zd;h|_8}=eFc>&@gw4@6g@Tj~9OWE5s&7F%}P{-VWus1jEbA1syfH zc-Bd@BNzRWtt4Ap3OL*BVe^ii+h{oThA8ypCKS9XB5w?Un`&|NAZ=-GDcl015Edfy({`t!_3YB1XTL4j*P^V*!f{z4cJA7TRiCa?8@@7f z^I>xKLL7CYH{-L``YIF`WFR&+8VT{EaP&e2Ji{m9+Br$^6}yIRnt%?x`Ob%wE)RU? z=@&3QIRT@h+~DuwiPzuVLce`9CQpsGP;N$DSuR$-@n`Hfat4u;mLN31P5Zb5K)jTh z(+9TVe}4KixQ8dv)@Z#Zwo*Vgt@M-^VDp|M7&YrU>dqt-=Hy}em8)4&yHj9&H7B!>Be;WWU*o0eLIulLI0`!qgoc z3Cs%9F5vHf`#YvCy&2#8(TjNcspatV=*7EltU*mpyTa5^TZMJ&KSh_hACEuvG`|1* zlUR@(j@Hh0ELeUQ0)ypFzau71TY%?Y{0W}^<{fA%&O%#9x2SOodGgmxMwVgy7kl95 z8;qx3co9E*@o6lalz?6`tFy14flwkI{`RwY_POVf5Eg`mi?7EsFTQ{mUU~`7Km8=8 zjEQ5=NM+VtLkH|M-d%SJ6K39smtOilo__3Z^i`b27n?UCw?(!bJJ9E=MJ5gN$Dgc* zYhX0K^W4k$@pBKNvmy;|tyqr+vkOk87ofVj7I!Y5hbV8$WRb_h)NV#ky{m=BMx~Dc z(mT6h;DtQ;L(7WMbiS92i$rNbHZGJls&ef#CS}#l@Qa&=xzp#uwV{rVXCW1YGJV6s zkTwXi%Ohp)7JR&VJ-p&(vMtfBWC>;vyB{lP~=U zFFbu0LjBD6CzWHfe8qr{M@thO$6ed7oklld?yY$F`Dd^=-krhWR-DXjQ!iuJ6o>-p z!Nt=jX_V7YSW+qbJ2Y*~YAdTn?zxLJJ~KJNo#=!#BAbp!enls;vuludv6KO&6lM)h zc3JRDPtC&dW2uxW{gPMRPx;HBtD_wz7e74n{h#1P%E80;EJs5Y1IU%DP}Aj#@pBjA z(HEY_Hy?TwaglMj{f@iw%u6re#h1T}=bwEPfxdb|s_~P>i1rqoJ9z@B8M&yeYp}F6 z2xxSwax*yb&LOj~6zx5-o!kJSkPeKg6{~jd$J`s2V^oNbdaTA)Mr1LlrnM7E<0eo( zU!tr(f;+EEz{YiJvHxfe2Sg@a=c>PqwQ#r}f` z7(Wd$H0C>2zl)ms23&X7{n&TlI31OGIJDPc^Cv6t@s4A-;el`Ch41S*=+~Z!_uu*# zQmaINIH9wu7?tG}XfcV~uc5oG0|nUyXlD>8iM4@;Ndf#)foKY8)H z2=Q`6b8EZO)*|cO9gR57pla3n&G1j0jTc{h8Q+*2hqFfyVe|1)bSfb9nR@$AfBOJwdG&bwd(YzgFFlV(@45+oZjKy# znX#QTlxg_nv-QYm@xPLB96CQlzG4}0g{OHB|FgeK|Kl<@6P+s3jobs`jHe5J* z5^L9Oh1;k}c>ep(;^F0Ek(HK0hq@5GoPvEF_1Lgx8w2LExRwFdOD{f$pFRB$Jeo@J z^Z)r@ls1w;2G>1Z%?vJ%VdZBV5jk!?%e{pA=f^OBI*4s&is9txgQO^5%qdqN zhmmIvdYikkYtv5Jwg@=;(i-=+A~Q1;pV9fhX8CP!YOBWPPd|ivU<9JaOu?~}7tqdr z)u$iD2OqCQS$_z=`Q2yr*esuq{h#~|+js3nt%*!G_n|B&OYIhymq@54X{TE#lSKt3 z=wbU#=sw*xusEP#VK&2OK0v>Z1C>b6CG| z6V6tc@QrVM56?dJ5Oqi!_HNmU#=15ZbU+UoTVCISd+&RIZ9T*Jbw8$$3*z}tP+l!I zNp}6T;P~lGlol7_#(8l#edaiJUZ}-{3G?9B(txb#V}x{`EG>+O_uRHT9CF?Kqp7gYQ27qM8#A-M*ATM2|(*Il~wbR8#Z?xR_mrN1uG2_T~q8@w<;A+0TUc|NTc~SGH(d z*TZ>w=s4b4#egL$8Q*>GdHm=F@nuGN}62 zE;|dW-(5xB*Q?sovM*7QCz2AQF*Xz^$j?Q2voZH9z~GsR2^nWjsdqEwl>nzeSie-* z&l>|+ri#w}*)u70WGmDM7Zk9*mx~K>+4t)5PB?M?l;o$gues=y_mfay^D=Yb>FulL zPFq6`4jtW(4F}RNY2FRgMbBgLf|)pfbRW*2I;(VfOH&6N-2!pXHy+>|d=5W;@@_a) zUBs)ey^Asqb1$V&7IO>JGVtzOAET<%15Z)*zWc~x+JGvoSi2u>q_3Y&Z%uV6Hf~&p zZCOjnrq7Olb`(qj(+3F&mZCCXP>~5Nnu#^-m5s3 zU(L~Wp-a(%3I-b6c5Fr_1L|wcQqMpChB%r1aM1z(mHKP0QuH@5b`^ z(-G!r$)Fu|pl|3YI}CX#I|kH(uKHsI;i3A+WaC`U-@9!-gWeN(cjF;+kWsyD^tDsy3l;QY?AWnrDLcjBWFy8-T?8+8e+Iv&U~X$f zcWWIEP%pjt&c|4A>z#P|2QT8AH%`Nj4IkoQY9UNseweZF20TlfcF(d!7(XT%cR%zU zJol2!*O##ThQ$c=c1Bc)H-bH!uyyAFRI}6Ce9Df^Y8={k6r+=p?1La1wyR;V9IDU# z%_z+{jCVi%9Ag&VkMGg<{XT8)%yIE(tgd1?XZQt2;9Ji;gKs_k6lTnsf$6iRGmd!? z-+%F0JooKyV)}$+wKyX4xvc0sR&CmWa}|BK{r*R&lb^%1v9WmP)jy)Oy-U4LLcZkx zt*4*B3s2sLxF|0aRW}UCn|@HN59MVgSh?yGw3{9A!1tb}EWe2Fe(M2*GL{lsqvA8O z3EOG6R;}2G$DVkaWAhB2y=MtZ(@x{9HM`N=W2US(Er%ihC#i!`+ z%w{lbdn25hgm&k`@q=jT_riqv*D8sy@=9c(10{Kx`0&Gz&_JX7=#x+5CoepMhi_g8 zPbUYoboX%>d@-4R#|_JtU`%|VzUep^`W#SN(uBg?5-giP6Cos{yPa`ZK^n?>8jxL4 zjf^b4%U|g<2JGcUS-6mqiIl7Ybc#D<sDmdJ7f9157E{?gJ5qLY+1Jwjr40kbky8lT$ zd-r^#AK8I7SMNp(<4Ui<5Cnz<;o|ueloVI$8EEc8Wsd0Od^iM+f}^LOqD=ar`Xc<` z+^nR}+e^I?9Tlb*#!YodIeivyesTn3r!BR_yZ6AUE)|<-W3pR%)Xwh1d$wWY>NVK0 z_YhiJYO!<6CVaYj16F>r21m}NTVptLH=627v2EiPe6VUiVq&J@+YdZ~_~=opge-8i zRi)wG57yvhfeFjM@fdC43rgSiGoCZ|QWhwrF2O-~$1VdKk z=xS?1QBEpO(GHbXv{^pfW04j9dV6}Iv9Y5~t3_x5>|E9=RPmSG|E_XEV_5=!z-xZ^Sb% z{uds)=MKb1hU316A6NSE#g|^jowwbBP{y(fyQ3#&&$|YXJoXqm+8goK8~?=K!=$sz ztoUe4lAI4FaWEKsH4ZyqjE=5qeD?mU_-Oq$-1hMIIB$N4XTSRdCdWsjy;09aU-E3$ zHFsdvoP~(;myHBHHn>QEEX*}l;n0y(%$a{XqB%EQIo@R@^7KtMyr{2s?%Rnr4yN{t z%xG<_Lmqu~xmjM^*kSAP z;BGEKLGF3w%PGlYEMHWRgHPA5Lrzs2Zhhow+KI<8CCM9q`TJ|seJn;=yV@FXk}<$1 z8@IwEY7(CP{txi&duP$#%E5;1=Ro#TZFiuyu>~`(xtV;Ty`deser7U0{QIwvmX(K2 z+K3s`#~?5J91fqkNX@0`ZEf$s@$>2QS9=f^>POlQ?@)Lr`oWBrjyC#pod^sIR{n>h zl;PNH{OEOjzUK@ceg3D^|3AP(-?#^7_8!2t_1o=j<%qzbtDg>X2AREg_b%+*aTp)T zOKzu6#f&NOaFy-ubmTUEx*p*nQMl>WTM<7x5fi74!?jDUL1|q*&ZXrLQ9qS`39729 zkvx7PCQhA**ra&ed&~83ba950w?CX**c=VIk6#F)6UQQHR0KR6tnVNh0QS4RwF&Lb zwQzBBflqKa;u1$=&fM8Z9G#$c*|^aW3y&R*z=Q;NySXALBoy&+K}bxDMPhtB0=zwF zPrJ~^iG2FRakvLZVbP5@vVJ0F%$kgav&JH)ARigU&8mHQl%bykbmOh};O5)zK+@Q8 zh#8ZFM;^FeJ<8eLX-0Zx0erlCksKFcUA>k`&w2vtv8RE_KNLpF(I&tJ4pdYTo($XC zT2S23N}f0%pMkxfPY~jhCm<-;A2AXBICJg->KSio<90caR?TRlay@?L1R`VOaK}C0 zz_^LY7@e4isgoukCNdQIDJvNTNAhqSzVqbwFlKBLk|xCBrlkv!%AlgYvYGv(;L#|D z1c%^;8<%6zlIxI|lz`i=pM$oRHe}{j(U1!+0rJMl88lPv*HbPQ|f}#B%!!l_+vN(Gz}*av zYq0lpI_BSShuYFUuqbH(XBYB(bS!4go{rI!&zLc>xbcRCaCdh@eqIT>XyEnOI3Xcn zEbhGf8yvd?#ItX6rjN(jlq`9oiq3eYQc{)eF1Y=^hmcICJuxW`cP?LoKyNRkoH&n` z#zwZy0isfyGG!W;EWHV1=#a-Gg=5M5iAc@Nm5teSjzckL&TM3-UtmyG&z_h$UQNg= ztWT0yrR^ii6*WlWl?!e4z z<{*Lfr_CIPo30&$s)_?55(TE)rhowvB!NhobneyYV(mojQqQ9*0|(UIR~ecbq(%N+qvNcwKdwdQ^pi!TFj<-Is{#$kU*p zK<1MR>ZXJl-dvI9!=fAP7KbY@F1Y2Fe{ zVW1?qZoYX5LId5f_t&LeW6%BrsA(2HFyy7&h>g=y zrvEs~{Nf?=OZeH@R)?k*I(BYe2ng~~GC!AkKybRq9q>zmjZ$+oJ5kP@5g8SarFY(q zF|;!iCzJotk?5p@-r3yBQvI~!E|^O_J)XgK0^1lxe%^oI9Vjg*WWDa}hxQrM;mN%?x_B?%E5l;211ebUkI4ZBh3<`RD^UeCR0h^0l*S zrjDs9FQiPk!r9YD`4!_y+w>XJIG<#1kBnb0d16LFd?NM#J($UM1lRqyFT?qB3}!2P z6b~Jooaiguf;i?0f5wj=i)9ODptiCcS;bX~$K7@Lw7aL^88?~1)cv%9$!dO0oje|+ zq9Wl+!`nk&XVr)AVKL=yCWDPc(lmwT9=hX39N4uTB~`Xx^VjpsGr$v*l7i6D(}A)U zlf|JmfMuLKu#EC>(@i%}SC6(G8Y{SaAb!GB+;-=kSTIvQpkhgQM9B;YqS{yrm9<>}A@xcTf9b8y$8XkD~QI<`@)R|-Hqa-0i zZc5b44Iy%#avT#AgXOo~#h_z8`AY>{*B#GX~4)tBj71Wxm#ELjR?py}gUR zWEpjD01~5utoGf;H&n0Eu#BJq17jj5j3$j-H5DzW)atS$up5Wye*uD|$r(&-z-`&h*B{UM#rgI!vN&kEXAk z5EBh|_1L)?&gMRJQTM&}_6NB6j{7(t^w>TXd9_TP8TUN+ z9n7KMI+krt9Gi%7V-n%*E&BnT;1dwRvGd}b78$bkOtmA>=ZISBlAexk#0GhwnX%;I z{U=E$<$82DqN1ZwUD=90rEEz7eeLm6W?|X&OR?nIdCE2q0rJo@Fp_%vZd|{3K4POo zEwo#}+>N%fEY#2^4@pRdS70C}O&qJ=IVMVFs`$(*3ivtZ5)xX}Oz-Tkhv2p<(q-Po`E z5PMxHy04cpS|i$>+!*J2(JigP$zvz5dDCvpoIDOw;(|FoRp=y3RZYr+e^e}%-+32q zS;qLr$BVTsB0^O`FXklYlY$zsgWH)eMuM_>yyXa>w!00ham_D8Hzf&Lb3riGj zJ^`|ZlB^aBIyn`|tn2_RkM6 z-q1J1L~CBo#$6dWps}nRt5>hVx4-uc7B5}IST7!9Nz>G1>hiDyq+(PJ@v z)>O=&H%mRWASzKlDm-!0cmM@>aJjvtcuf9}KDm7C}r_F@s^%Ru@D(#6*D5^Pwr z8V^18IQ_2#wl@ZIX3oSS+UE`H)(yim8@V*VL0%qiZl#hrfKOJf#4rBvcchj#;lF2e?wqo1PlQ^GSgl30Ul++ec;24Z>BK0un?PzUR zck)HO)ik!ilLkL9z(;Mvx1AgVLTX2s6mmj%s4tR7dD4LFqEeK{ns*~5yGZRw7;GjK z7FM%hBUlj9(sa;y&tO38;M|GB$9H4fw(Z!lcMo#Ps!++mrMaQaQjhF%@x!Qu$q4k5 z_dXGv18@hG;49BRfj~2zw61PAJ37HdUb-^`q)nk+=ulHor49gtvH`kA0`}Y8MOK3y z(Mx`Ib@$=?iEQ|LxFbG--n?5k=G{0CDQOoN=+>*dCAt;)YaD|?(#$BRu10ZZ5603s z#s|OyKrL4Vz!hO+PvTq7>(cj`ltBsAAAe`?sCspx1#>`6RX*TiXsN} z+ct2Vx8dNSgLDjPIkZ$secEstHFWQX2i+t&g-msjD3&u~HBom69?MHA#C_F_^(}GCw(BIyO+}v#BR@bAb zx(0ifJwm!M~ogi z&@m6d{F$@Voq1WVa|sN9H=URc29>?C>dVrm-gcZiyPs`+f$dwj;|K$?t{!J}_K4gO z`kHa_)P96do`R@x(U$j#5jd8vZtifCi*6GNNoTFx5Gv1CH4x^1aBde@J z1*x`XUo4`lw=*JQB2Yj_PgH{4=sI~JeA@N+#V>z_$M0N(08biGn=wut8h3;LPU;A+}r%*Zm z4&5#6FJ+$d%Z*Oh^l8%&E<1!M?{WjjkpYN&Q$`${%Hs2AVtaw3$J5ubIAl&PNZ`1S z3iL5%7G83@QSg2W6*+V>}a1)Waq>Z-hohA;QNM?tPu;r|dPib|R~|j=`q~q9cRU z4GFc_;Nygu^CzjV@3%HkkDG_HS@zIJtKb!ug!tGfoICI-R=oEfwr|^l)T~^z^)ZAX zk1h+UePIKc*dLAXump@59n8LPtQdj0Inl=Ta!yb$DCgLroAO$QLkIR)#^wMn(9S!P zR}LmwRHR^0FueTzF?OsxLgD~7cUSm^2E$cu&Tw#?$%__wPi|g5Dmr>`DkVkvDBI~c zoy#reylO&gMUk>Q@{Y=R(-Kivm4`q5?N!dLUD(Sx(a4}x@lMH%bifH?$Bf6gxET1! zO;gelHY%2WjfpmnGNY!UnNC?fY2HM8vlTmc?M7x+F>SXqx~R|O24Z7bI%-<#5Hfu( z{KMsOC_zvB)Z4CdBb8-pODd6(S46wnjDx#&lIPorYZuB(XahQ$)N2Op=9Yj{Kf)sd zDEIxS?&z>M&K97|GGfG%yB^{g{u`6x{FLS#0?r;tnzs~x{L7#4@bX#kGKQD)&domr zVg4R)Y${QYl@=70A@h7X8mik-)!d89wqEt#M|q^S-|P)<&eMeWNO&rvcl3mB$QTA$5%3lPaqvX)g4^+f=bphV`k!J8>;k%Bz-%_7 zw}-M!*|xmi(h4?J$}3@R zs-a&|L$%Y7!0-@6Cdb3wlQP%qjSvQjZUOXX8SHk`w-CO|6E)865tzoH%TwK7?ShN9 zBO;?hQC?nUm374fW54Afx@1gfLnh9bn^qpkJhh8Dr4v2fZCJbaC~ms%5%mE9HGaCr zAkbLVq1y36SnNb3M2BGI>%YgwwQI4NeY}uM-R$Z^!vOk(J&h=2jOFGMiG-LW`cBN38}^oRQs0eZQ=XrR ziv^6u`W>xfbKp3At}67m)S{=p5Sc&g5%U!0G5shLRT z{4#uOU5bBckNR3lkyB6%bI1%tQAc_xy}>+=ha3GL@foa^Lh6WZ*BA`YJ)nPLrd^>e zXsXJ`7n`@?$eA)MSg;ItT|XawVlR7z*Sahme33YLF<$@sUvcksbJQcK1M|fI@!La2 z<6FZH+$ z2qFrp?M)kW_k;K1yH7nqT_LXta%H~)5fK`N$S{tnYQhzZu3e;_(ufQTrGBIzUIsr4IT#ZVpYeZvxyBcEOkSN9o;fM(Fg`=|<=WPtXR7udhMU z!ew~mTlXSN9uH@I8DBM~UA@RSb`~K40fyiU7)jqDk@; zNGam=4aTjHzldM`@~4g1vF?n83-IcjuVVQ%vrwCviogH)kNELVU%}2p zr_j_zsyQ}e^J2) zc61Ew8OND<-CD5hz|!?$z=6JE0LF}`|42X1LFPR9=s~;OXOaz8_8`9U$P0e#D|rQ1 zAB-QHh+qHUX}B;x_{H--z}fS8YIBI_HW^A811aXjRYI(D!}=)3+QB@q+31R-KcBrV&IvIPd;9Wl`B^8Sf}3E5EJQ(m?(K+dY@X9@N)IS zE-J>2pKZkFTXtjR?i09X(L#))^JQJ#*Dzp2^oZJkgW6g1?WbPE4a;ss=Gl|@`yYRg zzx?WFIC$&?8W^JwOg}pFW*+jTB%#$F`e0%}Sju4H;74Fx1}9X zY!omjM-@%kfY2acjB3H*9eeTbfBYM7eefYxu3U{z)~-V-6@v?Dmpk%;O2)?7jkE|_ z>9b1aC=$2$lJYWK*^wI*6-{Mj!)pm-*mjpJTab5r$&H1U)>g8JJz-gCTjrt$>7CrkmA=W9gy3G7HUE12REHUP-CqsNlFf5Ocp608co}9Us;*=ByKGQ6DNIwvM(|9NTjM|N6(j z@b>#0n^m7+^_q1k%FBUEuZ)ewXpwd&XAxG?%rx13q|?O+6Bi9%OIth2s~WI=(-!Ja z>QRpKs`cC0_g+j%428E+kdBx>|62U$r@uf19nNhVKE*G8@&oqw&&Vz*V`wE-m?KLj z$qqtSCuj0nn&lW$56UBCa>GI#%kHK!Y~8vQfByG-c;|za_+-V$Shs2w8s%m6!YBdi zjE06<^;nDuj8LH1uNH@7(Sy*_1Cu-&UQ~?DJ9py4RiBb};``(?w9zq|FeU-MtTZ@Q z7GB6#?IgfeBfUxsi<#l z#OH_4VAd_)M1XwYfpbg7ROXkviw-Y)NuPtfIGgEmr&G~Qd=C9C@b>nlZa0XMM$Kvm zMoV)uy5vz?AxjF|fY=;8${Kp%hQQcK_}=%P#oae7L`KRH{Php7;8(x-9rheMkG4*| zIDRG2Pn!Dsy*aMbG1_?W5MI(r)^LvQkL8T&dsc ztBUd38}H)J|9lH8R?^;lyh6Q4th-G=ZY^Wb)769?1`EzUTK9cwsz95 z=)U`<+A&i=7pEx=~eCf`YPQ`h4s0k?2_-Yc}tNBj@Uju~C$C z%lu(`W=9WUm*%lJ9Pa+`qo3q$q+JG_NvtQ8Mpst{nmXk+=y4Hy~yE zTJ*Knps2J6sioDp<(v1b`7V@b9$pGGu9)N*_|+>v$Lz_8*t=~V{_xvZ@Uvh18fVhd z6)%nU9qCIt(eBBw4zY)Vq{`#T;t@#uLWPs7GyDSd$NEGMSm`n_*kTf%$Or0VrK!3Vl`0PaC zk?tzF6Z%Du8gfRkkRXeAj+|g}X{A$u_a1ZOU!+Qqv^Q}Qq>JRAu@mUp|!ky)XtzqcPPP0g4zIu=z$g~(3NP`jKgBBFu*dk^CZ4|fl_$E-l}fnYfqxE=)7 zbe>TbeW=?UTI;cI{~;Vr%g5ahK7zRm=P-K`db%iErt9%9<-yy}AKps$j0Bvk z9^UGSAi0@irCn-JUR~Yc7aE4Y{{64`@PoJU;~zc?Q%?t8{Nb-~`a&T&>Wq-EAS{_P z7VYf~$SA5oR!IXo%>Epk8Hn~1q%t4blIT5g&z0^YP4s)rj(yZ!EJC@oIBdz|TJnZX7cKjr-C-jbb^ofXpY3i|oWZvVRvoTD=MnfBOk6yXhwQ zc)O#gyH&l*Tl^o36mux)7km4tkMZc`vB97zagezg7z7Xc#!^?MN~~&bMn!o(wjDfz z(&}1l`(iWp?%R)=ntJuIqGKm>&`X^!HB@_!tj}@L{ypuSt2ct>9m}%2KygpTkY(t% zYVTcW^rj4ZB9JQBS>E4TTZqp$Zo*$)`xx(jBsTS9Z1`+78f&UZ5A7bS^gDRshFkAK zh`$fcA3F|nTN5%fGB9r1Y{ZR@vh-8)Z`SPD_|5NrhyUdNe$K2uB*4 zdePDCiVd4KV#VrD@G)b!RiCUzW?s5lcNxmFVW7X?jRzfjS3fFQAKZWEVl0~-hjr^1 z6xXy-d^m8v0fN+0jCoUI8_pNhs4c~F69d#2cq-dFQB}#nfAvTB@WapW`K}a1 z2L|H!?oI4-tKx$w3tD(|HP>MG_HEdE=rriJTIgdQ1vk_`0Nzx(m8E6q=%Er*c~aL> z-vZk3>6+@Nua9&!!R+h~56XcUWIbi{p@D{lg3LKf1@5UH71haty%!~U8Q8hyb2PQJ zTlvO@{6hj6)Og}-ZVA5i?Qdhj#3VHdWueq$>Ooj^7^pWaHK@pX+Hmm5aU4H;5#4m8 zjMgPUKvN;!fAclu*1F)Y?|gz4t3JU8@4tg59=-+M?gl{`+~wmERODSfJ$e=20&*&c z<^~zq(}2vhv*hPVw8&m~l|dP?jF|!kLrqmVIC$s~a*C@N94Z!BK$uB|K_%ts+y_&8 zlR{ywt@)Fio{d#2HX%N70^WM>L(;wy@4WplELt!R9&~=hxiji0)D}7hV{&luhnJTx zI$P_|ONY(?Q)eeyDMw*33GfY&?FhqzA^rNV1N?lvF@HilS}F>WdhQ%@E9&7oY6|=! z!Z>G1gn0i3$f?bx0U{&*bg;bK+|VSuKMXKCqpe96HQX>}j9%dE6X6qSl!-%-g|yll z_Jw1urA~5Xb;%twrcJ|!wX5*ayZ^+)_uYmQCyrp#mK_Y9t-O+6SPy~0L>lMA5U%Q& zP@GeM6(4RySa>|%c<+6V%?iBp&cCp1$s%}K7BB{bmJgjSiyPv6I?}NV3y;FMi4*a+ zzx;*zle%-&8Y=E}So_&0c>2k2B8Ub@(c&10q$x}B_G@oq{YS6ir%&CBW82qZ#fEKY z6P;;s_Ov6Rc_q5AqqCjz*9~7;RCnq^$+^9Fm%+ot#oxf|AFQN-ScNy=dlS=>qG@a` ze4>|^m@pP48CmFR(~gfpy(~niGH$L22n)u{h1cMRzxe|L@b{Je|B$q={%k#d_2N^A zp;a{298?|BY3_AIM{5THJUj*hrvd2iLR)PF9nm}u#(1&Gnf748hg+|2C^WV%&7jYa+ne@xi2?h9<)asc1|@hkX%JXo>%W4!k2 z-w;ISLM_JI)Ur~kf90=o>ib>b=@W*222vdiOl6(KfL7{ZdH>y{nX_5gu9w4tYRSP3 z;ivJFPN#zIy+>C zDnV|J^t#Y)G-F)y7zE14nq-$8=a_vas&MfRO=;bAK!L zZeD|bf3Oa({Pk@*wX`>M`2O^ZA0Q&Y*P^o-1j$!X>HIeo6vEssX**% zY~O|Y)}Dc$Xii$VX*QvWPV&M1`*E?L(&n`Rln~PF?dw7xqZp}KnV2+V9%j!Thr>rV zA-g6Yqi0Qmmxq&DyteuQR*)3EfG_W4s=)^zu0lq6Cw}zG@6^qP4_3Z|SN`io1W;F5 zf!tC4lZ9F$5xH(|>7a7)fvcCi$BQz|bZOJzq$Jcg)xpEtjXny!aW_Ydn?41T=Ptlo zAG}U}eoS5anbKD)KKvLjJ+d4g4zfF5Q7J8b1?Z>A;1bLN(u9Rd z@`_=9GkxkV{Ya_;Hs9LOr9N`v;p#$tBn#j?*e8QBxd~fc(}48SCM3njU`m7s&ZQhj zJMB+?ZYDOa5Z`bew(UQU@uMQKeccM>9|;}Saldj9*_3}8NCjmfJStw@l=$s0f5|!Z zF?q^>W7S%G`st_m%O77sr18oYj``3VH})Xew*8acyuF=HsH<+$A5gR;i*N5JTM(Hb zi?If1ehOWj6^2cG7U!8rc3!glvkFbr>j%V16f!Ii;RUYgXQ1+LdNDJ<@8g%*dGn8@>(PdJ(?)~ zI>Ff^kb$``Dho2u-X!k;*4d)x2Yd$g*UA-@ug)~6bLW1Cb$H?B zpCdYX4(;1&<$JvM{@?KU!}k%ho@I(W#wg>aCd1vs8RyQN#i?^Sa0^UCe0&o0!^pk- z>wsR4x1+lsT)cuXZ~lBd|Ip3Y^!{IwSx`;?*&VL5J(Fk8!p+}&M8&%+SJOwP-jjIu zZTdVnO^a5$Zf#*$`^@>^7!Zda{NTrU{gtQDmUAAz`LDm?*s=3y?q=L%DaK$5K@p*- ztf@vrV~6gG>KVDsKk}>a>kNX{5B54n7Ohw9+Dd)BO{WGXuL`m^W;`_)KDNyn;fJ6dR81n^xo5pZpk;u9**4C)%{` zR-DhefN67Pz}tm>n5s)JgR+o$?g&nwK7sm<-hpwpqn-M-vAP7iw{1iT>C<)%f6&hq8!oZX3anW35&rhK-wNli z0B8XE`(24p)~_AmPDAkELl2>%I1fkn9#Ze4>vwX;O*dbQQz!SMsHg-zCNqPGK6S_@ z0IC3`lymcQ;pXLz-~aX(`0Nu|y13HEy2@3&^;gl!vH7dq!}})BoeUu^YtjqFMx?{ zb@OOyZBt)(>+J1=m!I6#cS2cy4q9tl)WTsGj~&O;;1m?EzIy1!{z-uA)JI2vyy#|r zZ9^Tl>_3F#dF5(5Z=we$#`*K`NxlaEdiO(Y+OQTIHf+MWjT`aDU;GHQS*MW4AY|YL z*XCZ7UObI&J#assed)KTtfS$SbW5MYf&vll?TPcpjv*^87u|Fcgm!reetm1Fa)@No zI?Ik584u?5FbHm{ZO88YN0B&wEFF(%;-Zw0Urtm);>2`!ccZn{guTbJ;O`%Z>Eq=q z!O|`jq1-L+>%p40-@xPFxDRJjv#gWU(HWkR@wn^W`;Zh72ycf@22|a28r!hvSRsOf z!!UnZjCzDsa7o}%Us0$Qkbm^wf1)$fZ51gBpX?SY%Fn}?=p^{NyHdQmkk9c>9rSW! zfskM?(5@mUI~@%zog5!=NJ%Q2k&=pUax1N|0{{5^Z}9ZXzeHxCyvI+2okQH$h4Z^U z$J0+ggB2SNp-F$aLWA59rBfRm9;K9D*2R+wzb!Rtp}4jleU0sijfjSet0P+4yU43d zlvGr~M5}AGD`0nJO9l@Y|5!{KHv#R%r;&S}3a-10j%6P*&*!43stO5H#;QfyAz*_? z8*|MA2R9c?nKT-S@d;S@@m4f8c45Iav(GP+>llUGwC_a?CcJ$TL*lRXvL7CQ%un(QVLsM*F#9Sg1$FjSw3GtE9aCLK{@#s=tE-ox7hlyinjK=VbB0Akn z1K8){kJy-KMEiCl_4Gb8x3{aH&O}|*L%~z?LI|PpDlaTUdmZP41eU!$xaZ$S~XkzyPSGz7=_eC72LDO5H57%jdkwJ+c!oz4Rj9_;@?&nuP;{fcnGK zSdHf%e;m*M@Ykqg%xKSVw(1^$sHiBo^|zv`qDVQicJqrmz9=gfkpaO73l4^$8|&_O#QJ6A>moumJnIEFEF`a^wPk$>`YG*#ECoqIhUW^CKO7f#fzGq0VcirdfF zL7?&l9N``0hh^6r4u|Id0vw ze`Td@*m@`jvuDpncn}+PF~ct`7>S)J_)2Fdx-!TRuV*ka81Ff_Nl?|k>*KHqf|Gvythals^?`Y9(ved#zRB8cUB z(MUaUl4H=(rJoXzr8Liw(TMQ%L&bruoI5#aqoL{KoRYcK(9))YUf<9_Oqw=9eaWzu zevjxi(GBt#cz3^csD%>-Z2~6ZNGqv?Grh)WKfNF?{nLEr4Crge+IL^aefK|(lc|=S z*MoHwbw}F19eC=AC-B}H{ZSDOE^N#L$w?7tZpgy^Q>WqRAB}6~PC?eGT@1eH4@{g! zo#LQw@M*gE*M>k_TDvK0b@Xq-8BF-o7wn^4Rg{-9@H)>qWGq(b1rrx}ND02gmBOyxY2FQ@L``Z>xtf5fMg8z2=^g;R& z9vrAX$WSaFk7<`acA~AV1zQgu#e^Ai5Ekf5zp$G;pfkci*F;$nJQX#SSifsOvKxCa zch*!yIo9LYsk2DB=5D<6@hbX;>#&}_;f4(x@$3J70qKXgqoBNY;3er_31s1fvJgZc zX8zPT9NV)F&CN|pKE;n#b4*xiEU=KTBiTX&#Lsjhb!N)2iSwqjvmdGHwMa=T!EM*i zhbPOZnNNG;OPlG;K(RXgwA$q^KC9eZ>ak!EY@;V7VAk|;_~^48_~i4$_{P0=t4FnL zW$>~<9Z|*DrbS*4!f`QC|6IHHdMsLUBf2;-)DsXaCWogh=Uk8{igL41Qd)%$GJtWW z`goO^XEF1ZRA%2 zZBI4hpmw=yE|%EbN;=DoZJu+8;4tNT}~e*lrcp!%Xguo zsuA1HmeW@a!_0BPmW?J-p%WJl@4{nGK7|j~?WB&^Jht=4>9_Y(72?mo`2}A7?>{52 zq{%V>93XQW<*X29w?zE@cYnf$wVz<)`VE{r8?f@dzauH!1&5BE&>z;&k2Z<>`Xzx# z9Qx7JSdaRqdQ6`+gK6@bscz&kPEOCtWC9boh*rKVY9R&sX^m(ag#b2O5*h%`zD=Ojc=7v$x67Leo>yV`JNM`(d zZp+6wdF&88MvX;KM7Xj=x@B8IQC!uh1U+)Jlj`Y&Zo0nOcJ&AkD6hSu=Q&r^k{|V` zG15a}P#VYIi}9SNr#mu_pP6q$b&Cp#cIwkA`pN2%E63S>V>EXf2?2qAzB+76+P{Y<;rOT zYpG{c57<_}sZGWE9gH7j9Y(&i-9^38P8;0^clvXIxbv1pm_9y&BIcz0x$dq`oIHIB zDR~80cE|17{~$WC1hu94c>r?;-9KU{-u&he=D7(~Z}Vb6}OFme7WKeMkFt!zx}gw}YM0oi8l z?Czo*Pqpv|Z%m%M6pLpc#-6nw;u|;3gIBO09{A?%$h>eC@4ovUmERn8qX&JCZU_pC z!<-pO@N#uT{OBZ{IDQPf_U}hwbt4%oTb-S#G~6&_`a~qeg~7$agtG_tqobi7b#KXiN<>&a|->+fm!UdEy4iJOM$Wfz^ zJU$uW@e#QBj_dHn{#5Ls0~ZF+?{{0K|99Qus#1XrU0IIN9b*OhAIa3sTNq!1&oy)uS^q zAzGX2)T2QC6quGaxg)7ftyB#N3C6NzOOg53r`Wh+70TvJQy-x*(IAYSI37NpzG}CP zytq0y{XBMT-ilr)6B?^4QCZ)H?>+q_+}(+Zk|k5Rp}GPG_wPYQeh6CWyrf*H!u)xQ zF(oljkBtIOM^22|vNCwEotnBvrGS)t%iZCCSS(z0J<_sPVdF<1VD$L0DnQ6gZ^QL> z-i+B}d~J56_M)S?p51pueXV=}NI&|c#)@O$;2(;mOK-yZtw*tE{S-KQm{5^-9>+6E z5IjB{|3Bj10>F;rNFV=}nVCUa(Mnn|Gg?MTwrm*^J2^YK+$DFH%P}Mu&IJsK19oDz zWXZBEuxv3ithA_QW@g@f`~RwYX5U*9IJy6?qP0wI9CIUf#KyEN2v^4k~ z^|dO2Yieikr;=(hV*)~*8{B=}@W_43@%jE8c=*w^@O0IENGfPYXCxQoro}|#3WG0; zeDZ#<2R9fnlvLGX(8-?t3m(9Q^6BWr`Bn4HAL^HU(aW9stIAwMF`o&3?yXE5`S=ag zR4j)x>CC!$12G8`v0%k4IJ>#fh|5QV1efr`z{Oc}a3ZbFW*9C%gYXLWMId$j=YQIb zZO_GFSyH4izrqLxxi0Q(Cu@FE+MFDT*Ole8CFG|(O>0l5CU@?>^7XIcmw(ua{ksDY z65xwwI*|0@ZY*IyK0B6?h^M} zd<}&;Ig}+jedmR z^u@F({&;xxRGdBbA(|KrNdPaeZaa7dW8%!|m>B9u9i;wcWT3a5fwc=033~=c&5amz z3B-LXmlxGU(~*#I4Lc#7~%tHS;IX4)h`_F$vMZw~%?}bGWwF zs$@L((h<}$BN4hp4+rf0Fn9h6Jhkc^_P_Q^(%+yK>vb4(KtOOX5=oc5f)h1o1!m{8 z;PRP+IM7y#kiY=y00Tv=Jd-^;%uF;e$86EeCaA}+CKQj$HLOxg1rZiqhDSjSM%%ud(NRV_0pb6ss@=` zXZ4{EXXio45}O`@33z(zCY59y-rJABP$si|z4+|VX}s{Q7Z4IIPe3&)T&|P{k$nyF z+M}y_)lyzlF-zsCziG3l!kx09F{rG1XtS^NpWjZgf7|s#1ux9lN1c=>cv-`I{j^Dq38YlK8 zkuPnyaXAfk?!Itv6Fa1z_iwDO#PzGmaQE`X0R2=~Zwo5R>o9NnB82#xHyegVFg~z~ z1MPvcTAW`F<=37z#|ci9Ytb`1dnc?}u@af*F5&oR2M|;+NzKgx&W*etJ}8UYQ@`xF z@4>N(yO(uJU0mJNx>+)FjxKIA4PJ0|55i*)uf>N<4i4^u1?4t?+u!h=K6Ra1hi=dLlPktHC+O9Gz3ld);dI@G1Sjs5Sw zj@(s?;6i^d5;}Q$B3wP9;OXm)uYU6d{NmLe*tf$UQ4ygkA?$bbM08XH7R;K&`7yc9 zz?U&Vbxl3eYda7YI~CDB`n8v#c@f3#??+ul1#PkmY8v$07sC>3fdT4WXREsC)+P&} zQw8OEfRaE~jgJn-o2m3kG3($N8cSQ!feHSe_}!cD;is>?ujEC&u1_B%i}K5SSbH${ zs0&_!ShQpw4t{FnBM*$>T@i9qfaq#wMiZHDmMkXVuz=vJT;rRS#n2@ht54 z)i2Qb*cM_I4gl^x{_5ghn78;$C-nrEn~Y%uYiwc2*N+J-cXY*s*^BV>Q$2WR|0%rt zdLtIkO=6E?V;vEjFaeV$O~9-L9ml!QqKW29AcCnLhc z9v3cMLsZOzA}x0ZN!$x-~KZG_{X=H%()^kID&Bs{g8_X!V)H7 z@kD?1V1d{udFn>hq;5hx?wSi3E3cZ*NMl@Mhsxf%^@J&brr>o^G>6?qXC7*RcNIC9sTSqLaAf( zm=Hz=cw^pz*~q+j9LJ9CLup47I@&AH`x z7p&N@3E%z0f1)O@4k3v_DoK`c*olE~55eNa^HG#?87KC?gZj2+c=^zAv3Y24B&JWD z0#C+~^%eQJnO;J_Enkz*rk*(B#L=U0_76nN#Hm;)Yh4D$Vgvd6@|z!G@B8l~E-C~$ zg_S62^TyUkAI9`>%9LUl=ZLXUOJfbk<;0w%m3gJuLQS)}NX@?zw-0o6psB8&$$cQ2 z)kO;Cgy_RScOx<~3K;)SMNI5;%BV3jl;eO_Klh_k29;7iXx&9N{abf-<@Jc$OF zb0}&eppGdgvP6)bldQLDtz)g_#JnpM4?eh#dESQz4WX|ZY(-`9b==InMVVQ_c{17w zUwtAlGa(M2zWxWeOkaa|S-Y6NS%ZvI$L%^cWjSdJiVww;&uqhspB%#T&pv~BvCMZ@ zu1D6b9r*b5*HOGsE>sHGK|Yd^Fm)2V>3?NRg42#V=FWI`_^aDCKpP2rZ^X@npU z#M&6cIHR7qf}UqvU_F*L)mPIOxuJsgL2gwl4qF`1TqXHaFSPuInq+DB68w`2#yNE*Z%fd{OSEY@bmU&p5U&&i_quhgQ*ka;P5{L!Dj^- zpz2}>dKo2l_H<&>^eGIQBPd9A9E&@G1A|dtQ-e8kX2Z`{z6j=p<;zy$^tmgzm30gG z1-U3JETV#0kd!!)0emOQtDA5(wMaeLRmEVnFh7q>&cU&LhtO2hfW$e8Y7K}zJ9p#S zjhiT`t>r)_qoKJ4MFj=w>A(eZXR9Ze^cZgC=AyW?3@cWx!$X_5Aizh~ zUZcRvP`E|LCBTD;&$X*p)!m!2@(RSp#v_`6w>=pW79NEO(J{E0aswH+ZlQpE$>!7tg?Tntt*w<0fg(M|fl;X3t83k0gzje(ZenBh=KjqOP+Ec|`?iY3;y2 zeCMAS6eW=73=jvJk(E)1!$+>5qo)g{l_jXCtftd{01rO&Abi~EM2-FwP?-IF{po*Q zvGtiR!Y5`&@}`yPS){v>ciV_{Snc#mm?xtf2~W-2=zYx0qO~xoQiT}=E~Ct9oVvY zD-sf>U=ov!32_lB_^hjK#^l5)h)akeZIlHjIvrgW^}6_~74zWAaY^779~F+mk}_OP zPN94iB7Q;wo_OYI#6<^bhh||g*Vu_^J1eW+lR?7ZDLr z>XqDMM~^7EYieo3%5@v@*kcc?X_ELt`f zzFzVq%^;?7?so3}kilpMa;ZDL{dRcn+2^ogQKFh(Ju{9~clpeitD-`xdIIEAh>5eG{&vWAd!om>3^Te6nVk3sx>&ithd%oWCwM zv;aNb-IzCTuDW=k)?9LP$IO|t)vHj+*RIm8<)NUk0L8^cn3gz=daOUVVJ~aFS=vy~ zzW(sur&zG!K0L5dFV1VujnW<3D^CVM_FiF_w|FULO^H?ZYB(@CqfKjPG&q0RYOGx~ zi^-@VdkO@P1Kb!`)ns48jobo+CM{#&>1HGu(wjYECnj)S2arNPl0$yG(vJW4cmD^S z9i3P~oev7|L0|8ndNph2?CA*NJP3UQ{T8(JI5N42!_a_!6<6`MAoRw~6-pWz7 zecSdYv1QvMYJ5(dAHgNBF)UzG<)=R1$0SAq+dg;d+eA!@34)6f&ko9d9}b<)p^ce= zCm(-+@K|AQ??uEsa)4^5tu>b@L+#Vi0HL{p~@XX6WeZgm<71lIA3-C&eYyZENj; zx33>2Pp8lJ_Cyr@ucYFs)U9mJjT3F|zx~^PsVDJPEnmX9@kMtJdCJIc<&rs+#a^_L zL(^u@!Gu6>^mnu%EIt7VQ>Gz4JPNF>y>bCenr!<3>ih`f7g! z6aG1~XTi&j{)X}qNj}U-oTWZ`C2I!e({2})6vKr|l{7Pvx@BCjkmWae>KKPz!>#ge z+_&iw+7!7%ZPg1^j|cv~!5DD!!;{+|hrg`*tIpa06B}nH@s8fXSh{*GW+VhFzoaqH zSJEcSUH`>vA7Pv^6JE|t2sn2=j$YXO@WTkF-pJbqqM!Ekzl)YFgQwhlHZW*l!tKbo ziF4G}*2bVbL)m|k;Rham3=ZVqYREHgH@@e=k#$YX^v6{ZVps<287jh=- z@$^$)!XuB|uP&I$8b|$(ewfWTFFHg&U8-RaE*>uE?CgdIW1d+|KqaZTE1ey8h&-WU z7%=XL!$igspB_4)<&*j)v9_nD4`xh?Q%|~g+k0bT!c;`lR!BS$KP{25_ylw|x2iA4 z&P|F&4ReDxKDvZK#t4-q87QNiPoY2k_P4(UcLusvKQRbbA3ubK#-gUG4w;$c8Rt+E zHjyW9H)ri{2Ag4T>BB2KPvg12`3^QMn=%xm+-c@)A(A=5m8;iq?$Rao0j2!HGV=Y) zczo-IVKQw;I~f`r1W&(E<}n${kJr^yA|f~dvC)x?ulmu`I(-M!~y6qZ-R!NrHMrx!|?S9P>DAt5dX!2$m8^7CPA=#T8u3iT>lTznk1GM8|1 za7Pm7H#o#wt{WhR3Ey<`df#Wql*|<7ac;`Wl>SVoO_UrZJUo(a&4O$6-{qWx4fn0b zlI1Jm>K#J=HW4AR#DE&4J>}L9H!ogcLSKXFOXtI$vMW4Lmn&$~gsz#>CZmbEdhvQH zWjT-ecs~}*n};a+PkB@%pW{1t_&CxtvN1qkHgWP4CGX-hy}i6KbE>8*EGz;c!NE9v z;<&O6{e$+j4Ub~gQrcQ}XPrmJ+FpL>7;wcyv_F%hXs>Lq^oxz8%sA1ex%z}6Bqjm( ztz8Z;1|M=mqL+R^J_s~{IZ)EfNsOrtn<~E6etS~aEc~ZKAIKwAQ#H1;cjT@CFNDVtC@otzsXD(7wQ;v=*Vuui*-6aXLMZz}HyB&T zG1l(DWXeEnB*z^Zj!De*B;QT9$#>n{J(S*f$c1)=au7W|gJ|z{U>vlBK7#0M2dO|P z6TSkTQq}> z4_pJHs1wuS*5A$exf9FRu4B&1c&fSX}xgpkU5<~eXd1TP6qM{ zbJWvmlbAbt(-+8-j9J;aShRRKHa+wxe7to2t>o0SMG6>KICL_W_Q9V0*D-z83_P-R zHSNqjTre~$O{cs^Ma1CL$ur9D$`hOM^2PZ%(^cPH?QOVp`4WzvIE{+hCin!%2UG1) zUdni@xgJv|O=O(m!(7xIix)1&(Ua#nKe;Gnj3v7C*WY+v@qLIcjzQg!a&ke`tM=Gthui#tql29Ppig{tlx3C>J7^f|<6pl`)tL`8b)e zjof&Yi`5|kfuw^mIDM5|1MnczrX?lOUc}(gp~LJqU-@yd$-(}<^bP8^iwQQnw&WYd z@`RG^Ud)3k8Fr-am&?9{$^?B{{jneHz+g)$zW4GDoJgs`Pyg%NSTS!J&&Zv`KD_pe z*YMhFZ{qd+JJn*F>ak))n$d@if(s}G6;O4k7DK1yKC!i~u4i6_7xmHSjMfUQxa5cD zZMrh+>9GlJS?typlPySX*=%{D!Nen=`lA8SbX&2EsxyqA@>k;)(C7R3tH1g>oE{TD zM^2%Tn)si{+-B^gXcb>uibB*}X5B?srr^?-0 zvzhe6wgwHlTyV)|V$pr-@mV>oYgj?iBy`KF!GcM3K#->L*ErVePT2sw&BAk`ch*T(yVq~_buh3tNN)x?Z1Z8BlWFd=)BrxJtfCdgMXzP{YSMPOTXmDELDdF zj9JF^Iw3X+2wL_H8ce$*APJAulTGGEAyNmjqko5d2Y(e#&I!5P&-#K%(aU{Zi~EL6 zG^JL4RWk!LU(Ewu$Lv$%XvJVhDZc;xx6#`liog5E7Z4gLPmYenW6rq2_o0*0zn&Yx zJ|ust3v-OaLH8^btaCVo&Hz0F+$ifc*;nx5+ehILG7sN;;W32ySuc-kU_E7<`N!@y zF^%FaH`bBbnqFHFUdyrzIu6kvf)amfi&OgI4)=y^oE#2;)RYRg*yUK4iQka;!$J7o z)r)1bHsc?@{mWW!KOjiCK!LL^K4Tj zd&(9&*=UdTtkkrXWz+VXn53pzTlUNd!&dGDtaYS}b(FlCv!TzLZ3Hw8dhR48GHaL^ zmE4hT`8c2SD5cgpV?8yeI--+S`YWYY{5Gf*j5yRFg$9_mL4M_8lV*qE>r|=Uw45_$ zB|l}sAeC^bv5Ev3v4QY=paRLqPGa5jf5N$&7ceQ9Nw7vnso)B6k!$vafxG?CP2thDM_P7}ym8 z3nfa^DYL4jhDVluT={fAUika(;M=dgkM(O-AlS*ym@9*~s!wS#n!ma~!D@`&kOh-p z)-zI1z@Ul82KJ#6{nzME&X2Nbwm5i3J@rMKM%tuAQzvz#75T$m8#4R;7 z7quPsM#E!vhO(L2(|$`Stzvngh|&BzUW8cfc~E>8^ceyBDw%8u~O8hdGweGRN>&?h!e+R51T92;0{WJ#9^uuSdBkIwakGEJvZ zEMRt|WLPNDSftoIWd*??vN?3w)A%FE#O}y}$RCR>o#?Di$AA9YPf_W;7%#s3?+BrT zRdObvWI+1VV;|z5L6^iXva4r*80D(OsAf<*1fxk-AjRC|k49q@sLgiIRAQryKSSsE zBMYXxIyxZGji#@w6?L^WsIRR=SxEtk+uGqbH3@FsHm|EH1jNU`CLVJe+A}*Dg0ZL* zy418wfr8O>xQ)U!bcP`9^_P~0W3mNdy0F~V0BA6ebs2xf;7NZ3b23KNQPZt^)8)2! zCrhG%3_?g7OQ$v zVlB7bG)NsvE9(l)y7m~L#zwb67HC|WM^+$4nHP6|eD(Pk;L%o$&v(9qn$~WHx(3H& z>~fpfpjB@p<-r`AiJu38mw$ym*)&KcO^I%+VR)&~Y!oVx9T|gzsw0VEUuP5Qs;h7_ zEgu=Vg;>3IArdCoTr3n&W4F?v%jH*7WJ-xD6Q0|EiO1Y@8{xOqRRtR4&?u5^h+nEe zm&&iT|9b%4-26i@uviQDSB_cEzh<%QslUq6$azyPd93Eq;3GHVgq1$>O9C`K^Lj3o zQ;T}!|+PhW^zs_ke=0{vGIqSl*(CE*C_a2aV7p5` zhn8i(%Rq(bI-JUP&t{7P-XXV%ky zRUFB`f?t)GyfQbVo^|i`ppy(kn2k;2mILYETCRSizC41U|8ukD0YS}!F@AMI`#5cN zHrvF-x`73RVcDA5#kdq`|_$j_{SUn}@CLV*I#?jCTX+w}RCQS;5 z9LTQvqp%zMf>uu2;3AByr}TcX7tJ+IIP`h4T61^x+QsnqmDiiimSRt2BPc%IA*NB| zQxa)IuhCwCoU_Iuw(@Aj$zR3Kp&^a{DKR$_WM9|TBbS6rE=(FDHhHf8=zaaGZJ*Q; zYh!XoK=#aSXiw3lOH44%=wI>g1U(kpaSQZO4tmhm*nq~G8ZwV_hcvwsOuqGrW%c(nlB<)@{$}^MXU>#L#->gG^uzg`c$sy3kfv zspwWT*O`BGHjt#oydY#v5*66!maZ!xEky=(4<_h3gXr&WLUVlu3bHD3;K)sU>50dYl;Cg7 z1pBgdp{KK1Uwg1q@Iif zvz72c+iy8Cw;^0gM>Q5*X8uzL25`ri8{=r`ulCJDNBl3FamXL(TUv=GNWW&8^t)ii zVmz?<0lf9b8_3KlX1Sg(-Mi4G=f*^5yXjv|w*o^J%z3t+G0KNv2>XaL+>gX{En60=ss-Zb%{S>xQi<^!+G>#!jwSp-xgwga6r^u8lwLK&Iy0*g2c*(A_ z1ub>e$j!>bh14tr&smIl%a*7IYiwyTkF4mpW6Xh-PAOy5ZO-!$jL zVEDyC0pXGKV5`pr)mWDdYj3luU!jRDO)7M`DNnZbv|ee@XXJM*{%ZPl93&4Q4L@%J zb&Gah&RH#q9NBIr=zaNBBeL`Z-EC@a@^TAs{$?h8r_RQr)vMJqV1iH87Mj_Iw9#b* z(k1_f@k;GUee14({E>Qkc7;b{`B)k2c;%cMGl|fpL!163b~Q)*Bh}QNsG`)DxS|iO z4J|mZ?=n6+ejN{O-GGN5S#9$coCb}33@{OLN9oZJh5Cg2N{JPhSz>P1+EQn%R=Uhu z$68N=l1(+wIyTf;hu2X@!Y`hc_OhqWn=;J5CTU<7L+U)U-=eNU`%}g2eZR*O{jnk^8y?im$9#usJsH!YORaGVOvodh= zMl$l6da>-`%~&#bI=qd?z;A=u2JTSHhK4)AP}O^AGZLiPy)>23NV=0&W;=bX@r=HU z^M3(mkD3i;2oE=^7_APG%(vXeDYHuBy>{!pJCZf>xE(i(^K$K@6 zPMtb|sH7DL4feGL*LMYXPq`Q!si$fhbdQ1&Y5DLCiNN-! zo`Ao%e%~~bktwqYdjjKhd?C2=*_$;bk87-%x!#7w5dCH{pj!N#czK7E@CE3 z!}EXhRrN}xn2}K!#zJXjjumdx^$=eqiSY14Oqeg)@~@+&tsmiui`5bY`jWssg4w6- zB-{a{nr&5cDAqD-jjon@q+UIX~i|{RqlY`&w%lGG}=Dg2~11^(pauEcOv;_J{+B8ea{*2@HC#_8xOfMnmngY z%g@Mqa^Cd6<~f@>#_q^gy2LCUpo(=PBQ+NnFXUjwhILrFdY*n|MV;5thOVvcxGlY= zu9%~>p0Nc@r-rd=+`jb?NVQQ_q`+FJ=`#*ggFba*e*Y&&uwc<*OqrQ51`f_melA(2h~{p25T-o6yq_OK&GGo;<6*T6!xxM=hC>ot-_jW#kp$R%wm; z5MN}l|1cVC_wSNx!@xRsW@`Z{GdB&on)O)Mx4_cb!dPxUQZuqhcP_GXOuDUOD66bO zDYvxLRAgml4wYNC9AxI?qjkUmkF9wEzV`LMLXfwyq=N|4Ks$=_a&hGNDfL?3=<#t5bBoZ=*i1eO zDC<&<=Jz-t)#UWZDrWf?fV*inR+nRe)i-h*u580#WT|beVdvtA=`&~H+|?BHbaY_Z zg1L~jDQ(-_CAWrP)I8q>zsx>{^{Wa^TFvbaaGSoylRqmz?y?UXkan8&w`ptXLuW-X zPM$b{bJuR5eb5=J9@&a12@%$%e-sSIF&0m3Vf1j=LXCgKiQDjGB&hKi`vPM%yB%H9 zUdqQjH}pGRjS(<{Zq4JdVd%`Ah#v;5VChF!bs0_^JBIVuQqb5lh}E07B5`WWh`G5J zxHA#A8_RewK4%4N&#`|4zy70dM7A`x$Z!vzGyMvLfxqlbPhQ=Vn8mGP#Yb}!-UcET?`DeJ9oQ}_~WMl2a zTXEmog+no)s(UAX-=;4UG?&M^V0edyG3Y_e{raTj+D< z?@-OLnv78YSJ-z*U zO|idv4xU~*nKzx=5R8XrWVy|;wRh{Dl`pg_m`o8yHoPOEJII5A zyQAiAVECMAy!VcMtoa_jua_oqK~!`UVqzoU%*0v4IAa#N#wfOeK56|OyYg*O2v`Qy z`l_<#-tsle;p(jKl$-6ewbw_(ZN?bK#^2Fk#W5UA-U*D~TEU3ZoH_N+7KEJPF`N8# zLO=7DnR8|!C{)%F)n{x$e@Mg?;SrIDi;skh>z!{2+)Zpa8tl~CS}AFnsH(0-dsjC) z8P9fg4c`X(`VbWyh}ej*A#!bdy4`Ksy93)+<6#(GE?@aQfB7n!+B(qD*@bS>-=#Jy z?Glq%1_sd7P>+uG_F?C2H-BGWOiqYb7vfC+Zvt7X=JKvU)st5mo#E!?heZpLhHjGB z!rjf69cpW8keQXwxXK-0`K#v<9TBK5XephL<^vX#R@NXrGl%0CK6cwJnDZxV^}0CO z-VPnFe`7w!J8hJYV<`w@pBWF`7lmQ`y9e1HYN!6%(T8*HCryF9r61EKO*GcH(;6*c zi`jNxV0<3ffK;%lI940;*tT5AyIY+e!g+Vqhrzwujp1-N?Hik#kerbPXE#r*UB3Zy zXX!OK$BJD>!kw|(aC9tMZi8SfJ=90m`5z6szuTZ41tw1Q%RkmG$H$_2xtC4^q`tY` z5p<>2mS!Z=2iUu~VfET|SU7LGddTB$=4K?A{It%v4`*M|RaKY^2ZHL{%W%j%KN&S{s{4r+_}Tp4Gp%!*o|*c4Y(lg+=)%Ck4jp#iT^b zw7~6Yxw~59f;1nesrHBC;Whg;|7v=N*On4pDwt&Ni~7eg-1Tii`Z8fuGeweFV+6(O z+!$^CVsKl^?gGZ9dkAhbwxPD_&%F*(8fnjGcA=Wz3l7Cx0izNBC|vr;21GaIjP@Q% zQ`uJ5viss-_APKHJW^NpZ)+uN|Bel2AGfPzQ%81hi%|HjE6JCfRf$pGI5IW?O{=LV zTF=aLEYN7i1Zypu`UdF!b>s1EffY>OF&fi7g6h(W<xXIq1Jf<U^O8Gh+g}2x)wE0M28z*>?$e~pdSt?% zLyw8)Zg7lS*Cgv6U9*+qoHqRKeJ#|Np`0{zd+zTrY`XrT@=^ z;d=j<7&UR21ggKoaha$7GN9?biwlPL)~!Zns_<~Q_W|^xwW%D9P3`auo(L~b_j`Oj zb8NUPBYzvX<2X!?veLbb*-DoUf-%5~^Iow3zl7lw3s|Wh3fYY#4dajM&NW8E?c3=7 zN5gpKws3d!-V3d_2TOl9ni{In+~WX`pm6xRIVhd@%YqpD0kk&Mprxr9y%skNI5;6J zDndQ6BA4!mCkC1eBej0?$cLAVIzJQ${AFP@9d`#~wHvF<(l>yX#ug0N55g@j#=^j6up6E2y{KsD zLS%Hf`qJ=UV$^p7w;$d}x-GC!zpLx(;ppIqAb-7X(Arpw#+DxR4LVadoe&un1&=YFho4#S{%RSc)0%@ehY}7>yx*7P+n38I|tGd8>W_+75gn8 z8IZL^o0{9v(AuXpA^!e7LtENNs`fTbMNjhS$1t+dmg{ z%4f{=|4}zurUh=_!M*WS!xw3DC-}d>X$6g4eg($s=l_<5vB0EA;Qt**hw+7a3QH>* znz~TmX^-H*K=`_AKd5Rl-sx$rM_qF#y6HOx?402r6be5tXE+Y)ZWz|T01TkNy8|WV zm9X~=L`W!OWR`0CF>OcRLnqJqtQos!-oh2Y3HCc=`JL1(w+u3+{H*cR2!;I~)6VHR5{( zP4DPel81p*lFhtgETDf>)Ve=5!S_ClUR*r(3Eq19J@`Z>A|fK>3%s65sx1Ae zEGb5h#R1N4uEtm4z8Kh!cPvmrliJh#k^oBD3~Og|VBM8=x~5u4ok#k+T7pMfX{;tL zeVI*z9*xw~ZrY0T{}BiQ!#W?E5-HW(vd)J-8VMYm`pOSMJ?(ZEV;n_;!06gmE)G9C zXz*U1_%M1NqqW>W>F1_9v(BJd*E|v!-FhV4P0jAM1{~hI8y_D%51*(Rhz#~r2JJ5k z!tViOCtt<+6UTA&>MeYBvj1w=r~3kKF`wZ+M;C^Fo#UA6zW7g?q(*yZW~0 zI+;+OIdl@Y3i1&ufcWo{3TpCeIBQ;+{DeCLQI=F z8D1W4HW&8H26v)I&JSR)uM7FPxj3-@QzYL=MOJn`vhr#$Y1&M9yX*A|zX+II5g3aS zDbwZFJ#7~U(B8_Vzpe>Rj!tkK_FCDUz!sMYcR5G5Id%6iR{tjSlZB2xjVsCN*mvYA<}O$XUoX8T(e1(Hgn%t&<4~$Q9RYnbnvFPi<~lB< z)?mt{czE2^T`xf-zNr_jEv+c8Zibh)*S*JcLo~@e_n@P(6~Fw^ACQukiv_C|!I7D# zx?m_@8t!RFN=hmYA326I*VB+vRE4Op5QO>buVU+dO@0X6juJUH`uy$UH@z>vBCm4Z z`lRt_O!o*1fnXbsacsOopKR880-9t1d9;uc{O7K;s=w+ko^XTp_4Nx}}uJ3<`{g zOP9;9Swo1kV4$xbPKGlaI6kB0xNy63Uli^Y=iNVaA`B3{)qjlt%2a92@nR3F*O_ITvEdjj?|u2}P2drKQCtE%B!xSb#X0RQw! zL_t*PBmU2@wIk~fb|CNCS^ROwUbGK-A}%@-ZfbI*qPbbYEV+A~5-@qAZJLS2EFB3$ zSf#=kU091ONWXd+6{BoAa#cy(>QSb+w|RC=<6bQjn3EiS+a=WM<}~sl8i5 zzfqGk^7zkz>gsA0ur)R|8YRQoI1)yctFIU};p5-EgtJ#t(8eN`36k&cwiEM%8dqOH4E)#>f(L0Mrbnp;~H|D;zHWc0P8C^r+W z?OjR@8fuG>opFUY(vXpHi(3u~ODfPgAeXcB1R5%ff!;2Z7w015RyOKdy3nuax{Cn} z!M)VI1B{SYeMfURm{mqp9RtW0^0TkGmi$ap{7k!*gOch-^pHu4cVoew5i1y*4sqF? z=xi;=t^9msmDiwqKrSAR081}}P@d1Ys^%y=zYz5uJ!(!h2v&2m!dU#!{Fi=j5B>f9 zC@CqWEM%a$nL)BqE-)6>F9u>39N-=hhOqdl@b~h-XCLiCeN&Utow52c>ApSv`m8yA z+q!=_+BL3`_{OB`3wAmZS5HSbQm)*@)ho&9CeCqXl5poP1^bxP-X-YKn^>&<>GI*I z9WiL{4&UH#MA9+#^w#6*`Tb~Zs~c$v33o};U@N+6)A82My*QuO1K;q8m@pv@3GpHD zbQ-vK$7=&70GB{$zXf+mi2gfXj; z+r6zwOH0G4OR1pCynB!*Q2OhTdE*LR{{22Q_l%z;j0=h;X{B5C_4Oe&HI=b#2Koog zM3P|VfI&wO`1l7QenJ%bdRvf^d>ZvNWkv-Pg9#%CYwcqgj8!}+Pdc`uu&5YS4K3)m z=t`Q?ecW2mSYLv?@=A28Gk*qXqz0G=x?reSfwdQlJpEdlH zN5gPjcMqf5*(5_FZKYjb2kNezz=030OGIR>a+ z2K&+9(TMz8Ddd&J|C(1f8JDLPmZ84AQ`H>V?bu|9EOw%!wE^$^{vBke=9xno7sfqp z*N^6gV*0_$!{;!IIYkX)X;zW8<~BauxyCS%f!&VB;=@|2vHSPGq@B&7r?y_oG2tG1 z`vRcHbEh$ns&UsBHZ%j|CaUO9CdXLY(g&t+Hei7r{diYvE$2r3%`GHn=cBZ~X}Dh9 zZI>fqtah|FgS0obRVAb;gE@T;bBaP#*SA=6((&M);uX!`+kv*=W}QK`W@>qP8EMK; zesCDrIm5v_6cJHji1Ks9#fz7akzb>7!Mg_;dlPCZi`3X<6WgzK;uibOWmzFw=2m+1Iv3_aB0sw zIC|_1s%Ttg&c@``Z9y8C+c+TI-;PKF?k&f#;L?>dc=O%Y$;WXt(^pveBTho`z=jc0 zEPqvz;F0O>gP)fhrp=s#DYF*A+sj>D?bQZGD#~RusVB$!NdegebR8a9I9uIKc;&m_ zg?-FS%v?B2JxVOsyNwzOlfp%3uMf<%5ePgs&+FQHT&9te1wvIlg1Rff{#u$EaE*p+ z@BUAbnVpBU%v=V6Z3y)DgP)HV>dI>H!JF^H$;T5x(GjFY?r0N#OD%r>gC8JZLJ|VQ z0&()-F1+{78z?SkfJ_`2ske|@Pz+~pZ}P$eF3$S379>a zg5d0Et^}xm$A_WO5~Qw-Q~0W}%aO?^HQFJ`Hw^(xS)?&&8Bn!V0gvs(?8-;Lbvup6 z3VL695Vt(knn3)6J!q3fWhs06_8&w>PA-jh8Y(O55D*d!4_~jLMXX2S5YTw!SMTXM zLbue=^DHF-NMk-y}(txOqzb)5k?+S?&0 zC=gR-FT%uWGvM#;K%J0tf?viZ^QE6nm9|PId0ymHK+h&Y%^}+{ki?)~t3ev*eo3F5 zJpzJ4kuW6@pr^!Gv{m+O+Y1+T=p!{vo(QNq3N?T11gF6*BC_z$5bwB+gt5xH zn>%si+%h zhU%$-0qz=eXn>Y5DUtJnQU3}b1oW>x?B!MSh)7IMoQl@=8sz4tV9nZ%2=EV9Hd5ES z11LK5E(YsSk#P#Y*>w@K?|UB4J$WA%&YFhAsR?j%k+~PzS#6|@8$&Y3IBIUV$&qet z)KMT<%uRl6=UH?@Tr~r|i>WyA*(J=JJpsmQN8MCyYD60UwkLBR6zwssbxM#f=tF5|Qf7i#k-ZG|qOraP)y?hj@b((fr%)mNNqeE*b}R$v@9jrvQ89`$uVUxh zuOq#>3lrupgcr@Vwxeu2(2LUiY})pNICtd+QZusEjX3$(Mo3sBoSjELi6=eiUgTFm z&!7ROVKfba%_%(|p;v6XJ$yr=Fm3icOrJ6lKE|t9vWZudUsJj*>PdSmYMF0^R$>@A zGo)YouThOhQqd9qK_N((9EC6+AH4eBF+B6tzd>Z6j*EtYl5_US{sgnYu(6q-#vp!1 z{JhlBn+B5t87oWa)6vz0GuI1I(&~XtYv-#k;c6brW}P>EWb~-yw~Hdxh>K^F@rPYk z@bt4=;6<62^C~9hj_z0LYF{BGCN2T1Omkx+CkGcqMo+||Lp>SR_`@hASobs)TF;d&R{y1T5B*3jUirhj z=w=KN9}{VOkyk)$SR+mxIgHP)7Gd7PIdCH{R6Apyr3c-+2Hm4EYE`I9Oq*ublRcAS z(>9wpt+?#a*<6ng-+coIkDNqyVJU9iy2Y6E7BaHS5I1oe0(@N=hp~mh3&nQTzm^8E zDN>~6m*W!T%D#(@({Yh4C?9E3p=j1E%?0`OjFOLi?I=0a37o1eLnk&t9l{kO=7!DjdhK$@o zBs0e@tEfVBbR0Z9T-A82v54iT#j=i}ZB>b8T`X%~WMpAU@%hYOAe} zrcyTUN!vaPuARDsx0wrWd*L}alOK)-F8xR9NuT}ja`!;Y)Y(X!H6LNY{&1EgeFzMy z1*bG*%klM8a0)d9pzg03qvBDjP~O{sv!DCcEP*auIjsZn}ml_{yH%Vpo#VgJ3|Jt!@$ zfvc;VVXw?e+(Z}UkUxS)Hr>1QC*_0q{P2DpI(``2pL$k3*>1v}FBr0;n&KMldg~*2 zN5^9R>Xn!l?Tz~441DnU1;iyTrSbEj@sn=l6}N8Wre~s}q#Bh~^$fDQ;YFqkz4|qF zsndp%(lQhjmY}+-3RRU=sHki}Wla-2eZApGOWIglguJXYb@$oBC!7Mn0E&|@q?_i- zj)`DzM>TPkqOhnErShe^-T`>HxxtyyKxbP$3bNBsRZ@(7dq0D7XdL|Mup28YQCnMw zK^Hf;xw$A|u$Q$F+UihLScKxzN|aUCp?8qR$-{#TBQtrvyS)*)g;j8J^Fm{F8H)0= zP+HZ1rp|8o`T0?KNNj5*nSC6;IhKr|xXD;Ce>NsWM4M8_YlUU zATDu={-TlyMpFg8|K0z>jMa}Ia$+GCC9zJ47oZS9nYUL;PNqG(q^iQ?9sKx(N8LT^_q%8Ike(-Krv)v6~N zojmB^q%SIIJ`+@D2PX<;6Af7g=dTG3&28`|ZzX87XHehN*p9lIX42MAn$ziQt5Dh0 z0cTeixEiljSn0B$v8Eh(dD-yv4`MzeO0MS@J(ak8^(KD5?=nJS60l^!ECjgpq44@) zRCe^jck&!~yV#S5BI&)b^!MOaUJ07oyJ#>*zOE#7sGtloWqq}5I_6T2uby>9vE1Y$ zUvD3(84T||@;RERsK<|8M@K^!?pw1QzFuwy8amwOYW(Kq-y)@?6D!u=kBJOG+nB(f zJ%0m!{sD-NjZpHgoo)xZC>I6AwH%QP+{pu5`>7nT7>?dfR@}VYjrp?$j*nw3E-0s5G{Doxn|4qf*&vOf1qEd#Xz%ENj~C007OIV< z3-xtP)Ylexy4tI!J}c{+(Cy%g`U>iNeu2{a9vV?k%DZ~>n=S1uJ*ch9Ltaj%vO^Ud ze>amqcXwCK*MWXC(e|{`M)Yxh}ya`*TO#C49C^SdG1Bwt!$Jr5U3zsH5`jyaGzw4=SScjx>`HakFisZX3Rbhg)U{?eGpHlmSo?C0-q;NlPYZPzb) zsAauOWWBAEc8IpfxY=NW%)_m$RHSF7Va?k6M_e#eUW8!V!Ul$lrCuW{vM%FvRx{>r zd=g0$Jz!{!FpN{$i97~BJq~na> zaTa=&%(V3kz+L$Y!6vq&3;Ef3D5D%yRMw%YwgLVjL27(*IkvmE2gT*(=(gMPGW+-9-JbWYf;YQ zE`}`4)m13V%hR&X*%euLkRW}q4Mo`n*mE=uU;FBpQJ2TSvaAf1g4SUH_CB$LnW%KDrg!U`L`McoFAEIc~Z{Z-I?>O5VCXfaN;YMq5?h&<+=0e;q3rXLT~tgN`=pOlB6UD#}n* zT8tCd^3iJX!~Nz3Lk7A7z3r5#A{3S|zM`zRbkoPvkE`UGedJ}_L`7){t|n*Ua%v$K z&QC&XJ@u9GT3hcRyuCg3g&_Kkw#IT4Qr1h#s+8Vz(>Hlg-{hv7*kTLyIg9jERyD!V z+a3N9fe7^TG5oe|)%u8A-z~7)mueBZ27e>qW|~d~{JCe0}uAY-6T1 zkPnvLX4>>x>Szb)ac3+=PT6&#ud@lSzxDzAeIqa{X)5(nZY4|GZnmq%ks_ zYoibw6Nw3N;qY`LtxQC+GV{nkM`b4@`3G%qO)Y7yWwI+bcPVqVjPskD8QTqX($=PO zuIo@uzw1uAohb7{mz|Q&9`w-8q@`u5`75q%WJF*OUr%?6N*C&?3Q<5X+yuO zCvB+CJ7nF>nxY)^^wWd%570N$SClh%;rRRmgN#^Ql5Gd_&BB=94yjpXNGo^2wr!7K zMtlh3!b6x$yP=bLQB_qXbA~?DRg|EpxPW6H&XBI@4fpB_g< zOaz=goXCqB)V6lOh4$TD`3~9;^0KO|9L0IXsHmXtqTg_#4|H^K(){EYKRbF5hfkiy zQ_p<^v2ha-8y(I0i$Of|ldu49I9NK>vK{ptj~o4>%stRSzfg(d>J|*Rc)^W|Jt%sZ zQ^Gjb11(LZ$jQkdP0i{JDqrf4@KjogZ9{(t$5~BY6%t1+s@uBYrs#G=YcuiYrJ^D~ zAA3JN3jdg?aCUP?18rdqV*wX0=9L_u$e-N+W6Y)+PERIg_@)#>76|m~L{uNm;YstQ>UDu4Pv}F9@?SqJzI0wN2 zj3HGOp63|l2~c_SN#c(x>Pa_sQpcGVbdu)OlZTO-aS=7G6*zP@50hrzhoq?iaB&d3 z!nlKSw{O=+`25gmELgQ3iOdVbow{)9_z6_>c_VS=WVk8W($Lr4ipr8~>Zrto)KM}> zcrZk^0wbr&&1Os$!ko%nlbShdUkhWfT)g?tK{Rz)khEwKCMCooE+&96j01c;{TPS4 zvM%Wx=utP%ii%5>eJQW0hc|OriFIW7?aXYzX4xBH-cXjDfB;^#q&5>h3;)@CglNZs}oMFzkkvMxED#KRmf^|Bx<6W#RFuFT^c zEX~xT3e>jut0$@p7rCF97^D~O1iK)!`iH{_aAE2=6hk(s&h7O98 z1AVz0db{Y`3vW^W^Kkm;Y1G@f!#6YvosG0<3FK^mx<>#ppioY_C%sty{arw*@ z?D_aSrY0uA$5%3T;kyNGO|=R~DSd@p+v#BL<;1+w#eoqvK3kjh@d(sgXMbs)ZjuX{HNVpF?{`JO1H2|BfjW+V@<&a6Dn7rK2cJnRkr3_yHx;mVt2=ey{n4wqo>qWm zOBX3)s>d%1BQn~p4Cm*Ej^gO4bI724%7yB{0DlDd)A`c)-5`GlJl!yJ*-|85x&`L} zXKdQIiphxF`;>m@9B*F6%fEdc&;7#>v2o*aOq&pf06K*;N6*s$R%7nUm1>O2hgn*X zb?y*;_S>E4c8MEScJiEcA)u#b;VTt;)h8$SNz0Mb)#BGlItAAIm34xPG)2nL3+ zk^W4&vas_H@6-6T;YQkZoI6APIG2p3?mmP}mkjdU`IW!o;rw~`;H^8yd4vn zTm{fJfVMB2fyYPh??&3q4E5yFS=x?3I+W1RAO;i`l}x|)&fCbB3&#w!Pn-iN!RgOWVE^ZbFn`TTIQRD;`S1Z8IC2Pe zOwQy^@Z}4aarRmooIL{(6BeK^oYH|yP<-y(1?P50$NyBh?^O?=pA&TJcPr`&5=P|BQle(V;}V0w{%$^jE&a#_$`=ig0Q_+s1v{dT%Xf%H1rHu5E+*MH*vh3O$>(eaQN^sj_U$_ znMl4El-o_S={o1b+Hd*1pzE|3p1S8YKYgG8~)j)Mct=S9ELafmv9 z>hxL4b2cjLTHqZNiC|yqJmuly*(>y8K3F<;I&H^!)O$r@$wT)cg!AjlU`rNUPrsgm zm)<;t=l|{>uwlb;Bu)rHs6!VHow$aAMi(qwGy~qwGS8%m{a!tN41aq0MOZun5H@M5 zx=3Li8|Rz0uaP$NKfd!L6qQtA-tt9?M_N(_sL!-T7cb+$=ZA6RJo%Ylim8kl!UOag zhSJYz+V*3|_QS5X8@qRZj4N5i@b>Y;!S`Orsk7(d6PJW|#xhk`PtsQJL{+yP?M4bN zP-f1ZyGcLn%;1?i;U?BwxT@u_rLGabdH3-67Yu2$^3NT|PhR;W8l1v0eOeqCatwz- z3oLE*_}x!`jE_#G&~L7XzlXiyLugqj3oT5X&z?Am&kh_xPDUE_i}654Ib4FK;DNRC z;O}lh5B*Wj%^Ub+|7WyWE2z@A}}}tKJ@i+v#hVP9XsCIfveYU z;^66XC@QbSO8Q+V>ZvJt8g#Iok(WF61w(>J^2E&;YeISWink&Kx;~sHC~r^yCwm znGlD`6XKavN5j)ilCB>5n?C&OKYkDX-hr4nC7v|*(5zMC;Gr|vdpr*-md=KYrI9}Q z6pk|9@1`BOa`_}KUA~UXmv6$=Cx9_|r1}7k+-R<^E@k3#2Je3G5pFRtOV2H&4Yfy1 zWVpJKk#gl6PVC!@jN}_gFK<9iuLG0X0`)3iFXbX4W+Hs#CSxPx;}7=W0+ZcO4xeN~ z*oifZ=2>N(I^55AAd9x+@R8#|3 zAAayQ>X-y)rruHs+U0B4kXPD(i3yY8=jMR&{4Bit{`<7AS;~L)4fHcsEk{Zw?OJ^s zq9SN-{k)i@wxJ|Dld^M^`hG?Et_zp0qtDKPwmn=W#rBMkFJ8NV;)+WA`+xrslO|3= z!h{K!Ksyr>;7fk%3rQ!Aox{l!H!x|^M73VE#FYg%PUG$U=inPN2NC|x=xeLR5B}#R zvdsZy#hJ>+r!u~}!F(@{@rT^3bE53Yg{{);G<^E`5ga~wfiVn|{@Mlv`uQPCE%c#T*#Gf9ToD~h zzlAH8uEN3I8G)25XD27x*bMgZ3wY9o?fK|a=7J?~cNoB_&vxMOr8ErsPeD?c56-;z zJDg>VTHM@?v!_p!r#Eoy_!WeN#vvjyfH*CT8JOc-lsCx^;rzuL+|n4+%t8=zO6!~% zU|cZ#;LvsE(2Fr|dYJWsq1>3}_%rEC_R~IGr3|D}$5N;luD*c?2?>I$iz@DmDg%%3;i$b&l2 zSD%Bul<|vc*bk zU>{@K8s;$Pv7a{N>P^PK^r!MvakRwIj34*B^)_xWSGkdrj@;s6^fR~3qL0WzVY#A7vSbML0z|CN+}b=&4MoGfhjkxrR0 zB?C^LJ_rsBP!CtgN7)WBKTBrpUsv6LGt}p+*DoP6uaYr&EJFO5OE~o55dB%7qcgtw zwQo>w;u#kQAUZS@Z@m4fdXR3-nnkojoyei@6Wf0BJo6yN73tji`UV(VM8J(al{~a4 zClyEbe~fDvuVeSFy@;JQ50TMfT5lv5rOXxN7T~3q-)4S0h>X-rjJ?iLmebML)`igc z1cpn^IQ#itB%eHq8W6_xCaN?93mR(3=?D6S=&zZyCz)k8> zer_pz`~%<<90(WYlkH`h`0ZPJkWtu-+Oh&%VJwz>Gfmm|z=&w(alz`knt4I9y`>SS zjveRtKZ1ixAl7eK19uP6o&j`sv>+?(rivA=Ub%|vH*VloVI@2RBM|E6$t12FpX}O$ zD;F+N2Ck!29**c|o-I#q=9G{_wmH2C!!*-0p!#BqJkqL8TO82U-bbfx$3)mq-`!x) zSelc8U;p}LL?q3}BVVHN+cKL@s2*><{s|iDyQC$WsC=L7`w(?4ZMbjCV|eQEhcVI5 z9;KD#n6hv=yw#Ic4ooIzW83!U@aXErC{DeC7CL!OnW9Gj4$$dV;-md1Xx5#v`H3g+ zGun@VNDI11vQKa{Ha`3~9^3X9BK^FPG-EY7%gs}NUcQDt^&%!`(|=IhH#`4#uOy#$blSSY-rx$q~*z zn8svl&D3xdW@RF^T-LA~L`PLNiYpu86}tq><}QO{eT{m0rJ3Cb&&C4jW@J}`6qvL# zhw<*tkI~~h84qmx5}w+=9h2iCIQT|o(B%qtHjUgLe*HSGW!K>0C%=Rzwk~EcUyt9v z^$A*-a}BaiU2PdY{d6x5oVWnLDa$#}k7A~GJ3ib0DXtZCN_ZkYawCNdT5p^`%f#z~ zdXhmN^i{M`2RbV1k$WQ*E0!!kWU#Bc+b2&9lsC~}WpSLTC14z-TTo$I>MC%R%5gChnVb@PZtwnWyI-D996y;>2u)0g`nQ35fTXriI z=a|@~WEG-g#DW_pZh4a9^r;g#d+8=xh1IHDgQJT(rp{W(`FI426FgB@UxtdV0izG8 z*@4E|5*#{tg=1^NhIvU$uKtLRj-Nx~stssqYsSU%CrEt{+M4Qdj*j8OJ%`Zmmw?Bg z+=4Z8ym9{ADV$BNK(`^DcL%J@fUdlx2&Whnrex%xm3$mNLZPn@J)}V{%AGiQ5!DTP z{Z4u1(w;$fe^)0Ho-631EXYOjtaRdOXkxQ=27J{hsO*J#lz$+*g z_dl@>kKMNvm-oGci&w9yHA-cUsw>Lj6*3uHp87iV{Ru3dJp&g${~Sj?qrFf8MJMWu zQ}F)B2l2`ET1=X|98YfF3LkrWe8z;krm8{FXR<;@pf+i11L{a!tQHOJrBA8H`Ag^U z!GWXjWJ14g%`){Gl1kpn3h}#FUW99C6gEBcB%XX`4W`C<;uk-88I={Sf(Vw5R^**O zhh6V~jEuSgtlRu3wryUA=9)^J`0N;38fuxCNpf=&e_{|*-aLQ}4?jaYvatD zcgHTuV6uAc^TN>s*uC=;yw9?0oKK1c)x8h5u3gnn2=Gu)4>I7WDXrp|vN<eWL#&!2`p10FiJciW^li=$~yUHN?I+MgVUV94> z@l$Bi9>-Ihn_1HW@cr-q1gROtnocH_g_kbj!}mYJjWQ;k4|8s|u4kOq$i(LmgDx$7 zV}W^bS-&>IM1=F4nw*ZdR_zyshJns16fnUksBTyK)7MdpYZtEIy>~tVg&pfThmUPq zhU*93#Rck7+n_TMI}&$4T%Egd==i5dT(JsUDaW%S?C{y1cX63EQtmD{XI*0Q`2n&! zJV^I8JiYyKY@8pD6FYy2qo>cJT6{T^fLSxA;>?kwNWF5Cez6hPlCRTWB2+!$Q-bs#Gx4S#s)PaIPRwm$doSig7y+ONPlPBgaTY@zYu7fuddN+rDCQ!DPiUK;& zgezB1z@0uXY56Sm=0s~#6N(F~(b?F7>g-fBRf>iQNLvPjnd!K8>J(T(h@9O zz79Ly+mDmSPGk9!X$)4YaO7I5@~J&_l}xHn;*I@R;Sv}_+x`R=Bu&Eg<9m_HV85N6 zDt9>sC>bBA?0r3IZRAs@&miSi2|Bgo9s#sAwCz_fpU3gj7g5{PXYfF9Soe5Ttq)?>^eG&h5mQp@ ziO1OsYaZHy#~9}=UN{$Cev#O+?QzCyPg0(@VAbNO>cYMtY-(!5po5WKb^!90z*Pkeh;C@4k&gS8}lYLGpwC#+gav-rWZ&i?ZgP zfT^S6v2B2MO^=UK#j&L^j;^b$HwMkJet8nP4Cz^n>x3DWE>x5k;G>Ur;ZjN-rY?Md ze(6Dk4iw{){riy9MB5?N`r5E>_Xl`?=PoQ;vypMn6L^+%O$hZtZDTv^T)Z)5))GAS z#M4+mKM8TsQCP9*5p1Kpi+x{Cz4v$1!r4Dy1X>} z>5p%rpvjdn#aD>qF-(mJ!i5js!i7^87}KyGGtdg=IDdHW6P(Ge!>UaWhgf!b5~ZE7+9k%(e?Yr^ z0D1mZJahj%l%$@)`v=dc7|g<0yu2tE@4f#%$|`EG{-Le(xsT$(MN`$2tSyXzgQ6y5 z!$ZuqwroL!p9dB%U5-aN&)b=wZKwW61o^nS-9Ngi&S#{hZVP{9D& z8%uEc#2)PQVM__$y2dch*lk%gv>(YV3LU_tXj8p&0s4Evach*p&JVycnq$tPL%yFWMmS=rJGvS7FOI>RFa0|X zpSiBqn`~*S#HlmKajmEv2~%evGVK2FL)5wi!8albX*VyUwx)=AYA22{|Jb+x zv|-a8#g`shf*U9H;r%@)&}tD+&uB(nXBkUZ<(L~~Pu_-1OV37i(}3~40#d^~EGJoF z>pJb@Eo~^t%f|a}9mLfeMOeS}F>HQ#BRpD5ap;o|QQl@z`e5m4z){lm{zqpK9zPvh zpLhb#J+T>a0q$sT??j)28&*HO8JnMehJJS${CxbC-+kh#$FTjW2WXdO!Bt%Zaza4V zRBYU|5s%)t7;SCrzq$=W=129`kJQv;{OEswj*!Gzc<{?l{*Q-H-|C1hPi)6mw%w0K{{7g$dlwGUrnHG|Czi07DcJNd>A8PB8uR7584W`@ ziq_R`v@y>=aqJLw?>z+HIFZ$DSP^Z(h0}*{{8k<8{KBzx&4YN1{F%WRGI81rJn-l? z%Dd>#cFdR*s~+%i7;IK=S^VJT-Ei=k#2l3|(Ie{+JXnd3cE5%6TIK+Z4F{M{W~E=p zu06X^Onq8(-&TBS+kJ3p%EFF4AA@PJo@>fPOHCm@JFp+G?m36RX$$bRZ#@Ga#x)9P zH{{0IfSnWOuiwgC>1jOr(0UZ+WziRZg6c-vEBe&cn;yiYPd|xS^JXD9B#^Pz2;h_rcZ4 z9rtg26iM^uVCjl^>h48uNf}Zz3yEnE?S(~4R$%4&2Qg>fY&^Dp5gO_n zkda%Z79|}52YW1IqPu+MG9*l#XcbD!pn5&5r==Ma!UNFR)`^>`={S1oEIfVvFpdZ>kwG+V_{_ck$Ux{j2hI&697tL$$%=FgqQ@y)=b z2~=cPXILmVK`}A#^Yc|fvm?i0oh!j4M%IGVf+9q)jhAx|YAOnF^5{8aWn?l5pUQx~ z4^?FTki6PEW74!)ShZ>;lBQ3FyPHaJM}b#R2v)6Ik5$wQA5T3Z6YL$G5f&MXM9S`r zxB$4hI-gDKr)moWTr_aHhDHG6KSp}ca2+UcwM!mXGQk+lc(yMB_Qqg84 z&BKNVHe=qx*?4f%LO9y>a}0Ut>b!fY5i0FSTU=Zm)~;WLnX_i7*C0oMJYnYTjfIOB zvEFJLqoASjISe{rT5JN^nLPE=sTLF#;O9Sn71z&QQFp29TUycQ}a`iF} z96XA^m>Jl%{V6P(I}JVz6y;*}=_AL{+0uoL4?TjU1#@YXr!)B7ipu&%^|ZD0A)4;U zpfx6L0v=?LwqVI}^|aU0Mf2h3>B^i;F19eKr2~kJjK-4X>o8B*p~+Y|e-bX=NK?TK zEbUByGjZlZ3hsMw8x}5Hj0H@h?q9V?eK_a%`E)Rj8wS>q4IiVv*frL;tsuI-dd(Vq zNS>r^)#4;&Rt|zxGHOa*-N0#P1a@; z`{6v;317cJOrO6I3l=TK{CV@#as(}PrD$$$P`qkoP`G>7F07@K+qC%+%%%NbFlP?t z&6uiQWwCQ~LkI)5In?n9@$m>?;5T*3RLa9V>ijGu#74l)c@X`bP1ya>e)t4W!0L^( z>kH>`Zf4-iU)shPtc^~1A33g*B?l555gCJro_GoinBXi{bF%<}KJJviF|Vc!fw~wY zktxTo)~=#Xh+Zfr5Wl06I3~ewl~vuiIBW->5(=F5l-^ua(!Gt#b} zM_pYlgNuh1uNG0KH?3WZJXxSSi-{TODK5;%@e{|eV%;Xpp^na>9?qGQgdl%U^@_`Q zvNjxKfK0-vSE-jy<2SGV0j+kP_}hQ`0oH7M5P=@f#(GzBQL39h?>dU8w~sybESA!j z%$+-r_I7!!hUrCcD)*e&jUL z3yP^j3*konYpa(F)&_8%rcIy8IAn=16UD!6k*%gr!Cu8lpwT$&9!$Uuy zI0Ee8OpHuBMyFg3{$4f#;Oe)s9SRwhfQY!7p!WDC6K44 zPn4fU$cRU3FU=;`ODiL#)b12kyX&B7Fmy+$0DbAS8M82J&OFN99IRZi01IYJ!1c5YRJO_`X!h&mio~Q@DmICV@F&gG zKkBCq+wTZ6Q$rl1E{BIlVD+jM*tmYJdV+ft(7yQwgi@BM)6|{tun^+}33qUXCmp|q zF{$G~H_Eba;XD8KedJY8-Ub{{S5<@PkP!7$w#z^-zYUr>k#9(xM&7R@6L(VI1hb?(Qtix*JVBCiRvb`4(Dvn-PT7pBAB-J!W@$N8)ZD^A8!D z$cucPI(|X|R;<}bn&wf)CSumqNF-tn~AoZY$aqHp3N0GFA z1D35@t@5+=>lR_@%phc@B_lPjLD4HqLJcy{T65o{v@`2S(|pDxE0xT2cC_EcyvTUD zS$fe_U5F!}eumxqk0Bv(0bcm#KVSjn)PwPYy12xxsk{_>_Uut{+J*=3S3XtdX7|+H%)P<_D9QcKY!zIuSZS8&Pjh_QYP9bK}WK5YD zfr^q`U0v#vyx`Dq%wM#e@!Ue{g0X~;{HOt{9}1W~^PAOJzMMJLgb8Xk6$}iZsj?iE z6%|OBJ(o66?5}RFS6L&-0DIeT{^+M_nTKT$ZDWo{AI@BT^Fu2!H9i<`y!`<>jjJg% zY4i~jF=5Iy`s!}=y7`jDbLk7+87H)}tcPRohlh6nmTlZj-@F0~=Fh>Gwr)VywbLjr zEmId}2JBoB91%?!n1RGe@o?o_Dm^iVbZ3x?Vqf}kzQbf<9rk>7CR56P@l`rz+;TMVtiL}8pCc=+8Fd(t4&_b%jmdH{|AMXByv3(deZ{JRw zjMo>VH4&u#$2|10r~HT`qE1Dw0E)wb0yj# zku7`m-3(dl&l#EdrL<{Hn3Xg|y?uG@+9l*vbYjz%CshnLmwxcEO)KE(?1amgQqa^W z7RwGkqr0o;$NJ?n(a>0n z^QncL8CfGz<{6m6oPPZSkEom@iMq4pf%VK?oJ9O6FS687KKAU|jZZ#5jtvhzj&J^%X0&v4}4yoPh9PT};q4CGfeqSc`tg*EbJp#dg9{pjr)V1Q4-mS;vuLPt-Zdaqj^ zWjEg77y?a!IcX-ago#o}fCnaqJL1yClXz?QejGY;5!vMpu-H3VFV?BLY;SBviS_S5 zKf09>KPD5sbk2^n!mjdO#ce=TVpM1|a)}Exy!a1rz0qLJIa|$L$xYJ{3ch#wxwCc5YVqmfo)8Zv~Wr##Ui(24rmg{WQZy>-w>^y)|MUiNGPz@^;vUD{O9y8!7iycTaDwxD@NyR7DAbQ{ zUWH&f*nUf&!5-nA2Rs5}@V)Q-3;yjp&%xJtElLlA#<9BwDe0HtKWzr$k|wBE$J9Az zceuH8m!OR0S9kPT=Fn} z^rIi*vGsGI-fSKTc9htFDE#=PSMZ;I_hkg|@OH|n=r%hL2I&6?2?=E&7Y%y{-T#C* zI$rYJfr2p5E5>IK{dB}$A(2cb=OWfmIAnoaP#6Mb?ZGSVjpE`u zTux3yOM45NyE_2VHmKI?;ciGUoiUAifV_}K@Cd*+|MN$9VBISCs%VbmarZz@%5_{g zPCIn!0&d>SrW1BVM;mp9vS~5&Y+76_mP|>2w?f1OEJ|K?>OegMYY|bwCSa2m4wVT= zsU00N(8`279k2iPPiXIU!2?fxo%1je?)DM_$T}x2IC<eWd#0&-yk%3|_Y3~`-2!64};^GChY{FkX zCHkVNbUHCQ8gm(Z%R_)NHy)mH(Wjd>O}<3#h_sw)RJGV)#fqiyQ#YDqF6?Np+*D9w z1w8=$Yn?|0bja;>m7Iri1WaARWIjOKb$G!qI0oyN%|u2rlR+jfolMaCynGSExk-qX zb=p|b(I5BSe?PwV)oqB3U~qLC7a+2aPUfC&8k^N*shp%^dx{SQ%6Yr*r+KF;~m>*=+(bRj*f94*a_s4Guaa&_v= z6_iy_t{Q4l!@#41k$7@`AqE+6Bu$SeC&jLsSEo6<2H~TgW*c#+3MendnJM_m|NJNN z+q&@RKm99aEtsOlsqLdQvY@rM3m0<>5XnS(HUlMjZC1`W(e}wxTYBB1u|RWOab5)p z_KZ=w;q1_b0`&&NS?XpT9(?X;Oqo3cZbWLwq(g(?bjHl(Yw)kXd>I>Pf5;|nlZJqV z*fF-Def`;sKft&C?kn(i)^&!@wn2putpXL>vrJIz+z=QRjfD6ZwiU(gQ?Fyo>*h}V zwDmpmw4|;b78*p`p2Vb8)}OOSn6Ep++~s0HBRXiRjB0lWj&7K=WCMQuzdyq>4=hk0 zhZzNuF?s~zumA0P_~{S+8FBum{im)EcB)1HGg3;hY$1~ZS^QCM-r31w{M;omPE4eG z>q?N3n}zCrJLDD=;ev`EFQJh3OMF6mWs$mAKOAg%WP(&;P%p0o(vO@!e-5V^n4e`l z*V<#DZSJLS9>IfqV#s(P!31qaNGyK$o1fs@fBU3b=Ti7Ej7rXWoq+Vyq z+0z$mAO8xTeEMlbcuDNTgwd%B8JRcLEBdESUqveIf$&_`h7!8^8FzHM_#&1u(cGC6 z>7RI#Hpf3A1WrD^sH!hfecRD?4^r-KUcZbF-`|P%-rIq9-hB^y51&M1x7eZ)f+5J& zz=gRZ8SH@gxG9)9bux8<=j^-T>MIwcEU=?2yPyMkw{D=U-37DOZiR;j;}>a0{kvrjO@)8l(yS8By>_H((;Pvv%D}RA<)oYCj^BCVpfz7>M6fv^qDF~ zBl5YkW?QOCZzppko-=6}AEXiE4~q|Dv>42ooC&ihJGOqWey|VA> z(if#{Kn_*WP&s#~M}xBNY+*Uty(c0hHo zh7aC z=6(21dIXPdc@lGDeIfUK6;i=VS-6%~hUOk;tX#2>c1;)C(?83)$U0vl3=Kqsc6qF+ z{>l$?;{Fb}yAGhGu?73~pTn+CPGQ6LXR&w#V`<91rKdyjN?+7`QiR~EQMfk$Yt^;*7BA*6J3>ZesD6lze9RNYf#W#0j@6kgDWNciMe+yUW zOT_lw$}L8JUl&S>3z!4w+6t5G6OY=r0YN>4-q;ygM?mIO*1(U7jzLfm?YUhS{C&L; z9r{~Qns)=a#U*GRbXCjjoH=y{XD%e8rOytP^i^%@rUauoiILkHaP#6> zyhj~<_nmj~9=Ed>uA)ihX2WgmFcyr6G3{KO5Ho!;Ce5Th=dr9+t@&@kz<_9RH!Ab5 zk)~cmE?NfP5HDp3#HTxZc&m$2(rzRl?gXP_S>ap{y4#y@;am!B=qiNx%F+b{=IxerG-6{WV{(G8T8B_Bx_da&2lVv7en8*caAoY!R#}1zXHMbd z=`%Qf>MTCkvmXi5XJhl0M=;P?fxP@QI0lC!B7w25^k?q?*Z6qG!ZGM8D@A8Ry_DVo zjPX!Ve7ys)Y{5bVic)c|9X!cLSLSUaz6LELHnN!^P*~*pNLPotAnz0Cg6zWUr2RBy z;XHClPjhECDk|#fPjsJy4xWgJn}wK&Fl~bzys_-Tr}58!^E~2cH=KK_vGcW8@Pk+0 z#eI+c6`tC%9uq_KTGQ59;2!6)vA!P3XV0p)T~1xR!Th=%-97D$iLCj=5R8x|f!o<^ z132{zqMeweY}vXM|NY(XV#}5XQBz%tvdVG>9KsX}dO3;J^=(K_$;6Jg-@&_Yzm2!w z{*a6~i{Ky%M2Or889-D-B!gUcoH}w8hdw=wqbJT`_lYZ5yl4R?M|m2C=}sU6u|q^m z9KQUO7u33Em(QKXuNc(6_`g5IrK>m4M7>her^al7h0*jN6FjYw0w#M%duJ<3$_jBQ zEd_si{}a;vHWhRS1GP*92LvLL-ayu5lF7F)Xt8tjP>fTk8320vg(5b@AMS%)>Wh7n zaL7jrhKr94$b!zY4Fka@tFPG_1JbNLi~-#5voPV8yS8>}ouInvTHMMiKxJ_r(l4LD z@sr0;)!xmrTU4Uc=w^NEn7Ky-P7AJs8!1#L_{0Bg9i3`naBm+PjZu=JM$2lPMLQ~- zhpay*rBq~|a-b4A39XV$DVnK7lo>ZFxKdJeVUbj`kvI@NEWs&Q9O3HchsX(W4D`*l z4GBEuNJ3<>+~YI`*OJq)c-bnfT(um5UTjNaVed?SsRo*VgAUFJiwLH?>8~eCsccFb z=$IRu>LDKt5j(ES3Gyg2kMyq)6fwQwKlVmB=(&xm0 zN|wZAmR!891$5zW{Ar4X9+YXz%HYz!=V%v%EM# z8KU7EAOfLE@z|b;R)nk*NVyef+ux0AG{8UjpC99`w|C&}cizYD4?aXzY8rV#Yejwu z9syVKM~txI_n?Ei<0=>5RG=e8Qhqy7e3b-X5Y;uZSa&%chXV$>XfuWkKtBVOZqzlm zBR#tayWZb{_uhVsc4ilLeDD$ceBBTiBTw7ViE|EQO%4Z+(b32n1{wVM`ueK7$@a3K zl+eyT-CSjHbg3XZNS#tPMcvlm9QL5Tp%JC^9oX^Fr+DY>_ZfKXz>d8~7{Byl#>60X z!CZsxM=ogDh|f?WVDUaN`esab^SaRiln{fuLN@};jt-G@@t(90i#zoa&>Zq zyQ65EfZ&(5bZq2{jIyqw1D;;N@T6|pg4jkY$iBV0n>=7IPc{lgHXwIo`+Lzp=tSoo z#RNoJ2n`ej8VMgy7c|sXV1RbboEv-UzRZp4#mG$|DMzIR2B&ypP?U+jlk8WmDG#!swIAuSr~U0G&VI_IhldY5JPrJ+ zwS{FaXl`yscb9l^0Y~@+`XecE3U$Ltt#uU|5~%SKx_W?yra@%q6yk&3@8T^6wC}wA z9zOm2Qv`U88%GTLyOIWkggcLV|r2KU5t}`Vera zEI0lc!J^cWe&jg@M;8yZ*t|R;#R{Z@m<)N4q~0(W^t<%W#uxnsm*|pQ+!!?czN%)_ zwS_T?*p~fqd=Zh6@b@;?10D+;;2#=}$nYS&E|Mw}jMP`!b(%kTx_ax2eqyhvck(2K zh=4pjDKS`UM;ET7X5)ihA5x~?Qub>1{?9NiAr@i2F4~4D%tFObCnJEoVRBZOlaDw4 zv>UJf=5^wF8y~*?1}wHoJMixNyU-#Zlwg7&c;w5Tz4Eah zS9j{FSXGV9-hp%N(9fiD=+3T+@iuJSi2wMn|G@YE=X?16_rH&?f9-krQ0~VBgQ@0) zY%5S68GpFY_o~JE>AS=(i(ME0te!ArV&mo$4o5E?q39_SX6d*}jaPteEF7!E_ZG%5 z+J@a3tT7FBi@SaHP6%au=IWyDx^O~dUoOaZb#|b=n|x6A%1(W>&MM>lOMwzErR{;V zRi)~8JQy5wTvl1*e}IR6Y_zkR)w7bkc4`I|RO z31pBfWxuzt2W@Sgv~5BiF{s#3zhJ9?orgrn({|Bz^6>VvqG6z`3r)=}w66~6Xl+7m zoql?I7+oAeb#1G9DoGwT5Ev8pP@!(E?C}tApZ37T%@b|Sjre^3DV#ic3g7BEuwCGl&&-0;#xBY}-+2q~zeC%{JYfIc zPt_~#39*sXD&5}Hv+=Nv0+Vq9VGY>FqE%A<` zneC{F!^?#Z+JT{bm$b+e!sgyv75`)U0Ar5L7 zb@O)=b#hEQKfwMv`w$rwuVMk+7vm$!kX)SX?rcX-PY-J|MjZ5EEW;R5?vP22s_Y}> zMvXzrw7n=SEW(Z*I}|_PeeVOr#>M03KmQTJ!UN!7>7t4a!apiX*&#g+{!$QKD674# z?dYSN-x*8}kOtD~?&>;Z`z-ci`^07saY+qK?6v5`sE1^QhXdUxDJej9Q33w6V=vxf zjw2fU# zcZ5NCH;0qwrI; zT2!YN2b~}9gS?!ZsBCLRMMV)l{eXt?tz9%oHxcCNi3>;eqqR-ehcNoJ&I7o^9O0;?2+#4}B z#M9H&$t0gioV%a8$e?@I=XEP9=rd;DLQ{{rb8-3nH53+R=DjZl>Yt z#T!iAY}c3Up#g4(Q-2$(iaJnTQH@n=V&D_lhimD#U}1n066H_X(O7L|(Fz8Qv>km7 z)%eG6{T%|Q&BuSf@>~4;`0&>|P?A@J9x9kPKPNYD_=bgG@!FO6$`TVC+LvLo-ikZtH%v z)C}w2#u&<8RA?}2D@svc*DxeYvRTwPOc0z5hUC!%mwt3nM}-`-uGl>X2JAiz04&W- z6lZyPOMpQD^{t|+5lKn&nK&}BV<+R=|J%V(Zw6=_!fA2TDk@-N*3;6D7I`m)rBXw0 z*8P#7%Ggx+t6p(dr3CV^v$+ek4Rr{c7z|fW+EF(*4yjMQI-~2b_JA`Qs@pMfLOgu^ z3%vN^E6QKK z{OXH%?VaCa-L<;40qxuU+Gtn~QX8JtyAHx9kpHVnoZpwMXNgk>8Z#Q*;K z4_F!#jW_@4-|*>2hfqhqCjM1MEt2HU{-XW-QCV1w_8x6pOwhgC>?wKF+*iL+B&Q`n zCZ0*tX5;d?6!>}gsV7Gx0_7t~EsR}iFyP?}pHPVhG|A$l8*3|Y?fglUS5!jWinVXS zDirprSEetXy^M6)_96SI#;oh8dP=r5T@rxG`62&&8yGWKm!!bh?<7$zJB0qMmC~LW#lt}QALv5H^^5eq^2CHOKGl;0<09Hpj;E(n zuRA1XJ3Cs{6RBbc2FZ6Uj6gq6HDZ%D<|eGS z)om*`#J|}{lj=l+SwsIy9a%!49mh|fN9wJ7RU&7G*^EKPD4q4Vm6?Vsl!Z3>K@AMh zsTcAAr0&5EoGZwM&!kD%vSKm%bFG6uk4+JD4?Z zF@Es#Uy`3MS^Fs@e^<)#GVd=H;M z^}&Ns`m%`dP(;v)c(S3nV33K+gn?e#_9oiZ+ky;44@NKz!iByo7BywXjGJ1O6bY## zHwDZf?PaY({+Flg1q73|j{KT4A;7bWae9+d2L|q5US8;w*t5qJs6oc(cBt=mWFi)V za35EqF?7mAH4HR9D^}T5ZT=ekV3AT))n5G!)f^kN{@BS|MLtew>y|ak%{GGDfw8eW zyxrX4+T8&QV>m6haznlk)wPX?PnZF3Pk9ZS*%uQ{SuZs>JPKi9VF>3uh6nqrC!S?F z8x2-@P(4{3(OlcXM4n2mhcgc7kqgKeom~Ck>F$ZT(mZsx$prw7S^f_9&jbhhXU;)4 z`7b9-OsWob$H$#^%fWtV-7*EFtG%h8WAjEtq{N29FxpU~LC)H4w*zP2a6J8u|HLnT z`p>XbpTci{{|XLYy@ggf!J)SFxABqTs3%#(W9UejvMj%-B0nA_Xm8y%FZKD|huF1OgBrnVko zQQ@G;wpAtdNB>wyFQ9Cz{3Qk2#;8)}$^GaYa8=9Mn6cnc|J;OcN?!=_NADU1M5=Yb zq+9QiaE3pwm@p#|Q|8TL>@T)ou1CCtS6_Jve|+UdJhy!tT-aXsC!qD#1lxT9eLX_B z$1@zT*|P?{rzo&#sKzWmQk#D*%mD&|W8mf)fN%WmKj2p{{z}FFzj*0)c=gp+@y_pm zf|y{rnBLF$NS^xkf{TAJ!lp- z997A_)fw5k2l`Rl-Gv~I)6YXL##!SR6Q>q7`D^@PpWXH@m@o@QsEb&@;W~S{qr1Nc41tG0wG_iZPIrlLKcaZ$J3{ck%9KL zj*_n;IoH=vw!d%RcO+S+@NoX705hw+uK z`~}`Ta25SSvs}Ce@@b^40blv!KgKt{`%@ZzJtB2V6o(3JV42<(~E7TJ@F-fuT@DzcUtWS8sjVRg~pkAX4$1kh}dB%~Yv_RrtLU;X(vP*vAvxiCzpXYXwTj{rBE z%g94uUD^Sw~JUqQ83c4m|sb$54@Z3U9yu z4jMX5OtPuX_AXezaW!gd3enxu4R6Y>*he29ANYECz|G0}^$-ipRVF+$WMeK6-cLJt zBPKQhzV<_?DawGmmmBSwpSEj0o^<>in;>U`t>#b~%4n(2#uFhTrNZ&s=W=c^WbIbk zX33?!4cvXb(9_faCnBC09YxxWOq`(~nN$~&EF->cjN+7MBFHcQhjrOpVFFI_tHp{k+)rin=RPd4aU^`HTS<#jFg(zMiky7vtIVn`+FDIG zdZ`GjR<1;t58dSSAWHKx@K68#96tWVzr{1pK7&0w*5k$Je~8kuT4k4=JiHJY69Ll@ zeQ#qUd_3KhuXp$LhKsKU93ACJ^C=`ng<+tr3ANR=oD&<2njCQ8z$uiKmSD z)?jwlvB(X_XG{O?7ZHo^eCL~Za^EKW`p4f=uSGU>^+Hzk6bmiObR6E2u zbCNU20J^PrfPPnIoa2_bLY|sgd2G!XdH1w z-27zvcZO=p$LaoLEl~*;#SU8Q`^+qijiBhlN&NYj{sg~y?-B-t+}mIZ6a8KI;Xi#1 zfA*LEh?-X8?Fx}~7uq;qLi5BgAB+XNGRy@2MX*gk1Vpxu)O!gNO$0?XlOsLv)*yP+R9DUJ2GtjDgMtI$;p&Z&qDu4+ z%KFN>h1fI)(xhTFHDVj|^!BT-R@ysxA}M)+swbnCi-zjbAe)+dS2Cua5;X7mdOOpG z*0Am*?a&x9Z{*_Q73w4LT8XIw1BZYbJ^3s6CupxDqUR&ra|);4eig$*`sRSlTvz`9 zI%jMujuMm?WzEQ3Q%f^WUA&Iv>vu8M z@F5K%&-UzJ=1}`L0qI9(PW8>?+tI-u6Q*wJP#5VKM|Ej6jvl*)=9W&@xMi?+02!m5 zYY_98FpM{6Blp^Mm>3fZ-EuRhgYq>rs-G%T!2B?fbC{iWofRY=v&;<~oS0X{AS^r> z*Up|$iN4U>-#3YKnN4tX@xYR#Ae!?z=H{aU`QYFYpq5?w>fiqra}E=J{-b}#sWaDT zyA7S5LR|bjxVzZl*#37h#++B^8e;&`)<1;qfg$twgkF82b5gw+ESB7a2~MqC7?KZt z#3il7>XbOd2RSic=4?=w2be2fICl}DAz@gwC|R}F@-c&-0=;tRoUBZ$ZJ9I>cYG`!n*O#wPih9Lra)1t0Z`XSB_hH@xhzaos93 z)>fjqf%!aRY>821U3zbC5A}qwVFRq;)?*FijE&rgAP0$8K1$+18YN#){FJq)z5*hi5NkqvpH#Tw+aeyG5n zBI-%oJ%sUo`oHmM>gxzj9yx~6rq0{i&4$?%Bs}zuNQNt8!jhBw(bXXFuO3@RcMtl+ zSCBB(ig8Dvd2xyxgGVBc-ez+QcA>@ zEAC7C+rTpG3S=VfZ4r?akEYsk47YSpl(qE~85X+@CvPlWwGJKC;Bef$nT!0aOzeDUH#}Wk5gq1<yc{`KuA_-bURiZD%8JX7Q&53uX zuH6{M#%So?Jq#arKXkRXpt`nBJz3)E;R6R}XE@lpAnW2KG!c*7sjRH5RGWzbh?}Q3 z6UH$rQ!U;y!75l?0D>B*t>fp)olb>*G`}?uLyntk*KV$L1y+1 zoIHCOu?v>si=TfQPD8aga3%|GF`Kb-`$l-VQqk<~)dydKT&8gRz1QI!F&~M`m#B*$ z_H-iUS(owZyT>r$9*&0}eFUM*APn>3>S7OviD6t~pwZmajq&kORC9jH7--u%xlpL( zz4&pw|J%3KFWk}E*^0cJVw}60 zgC#51W6P%1AS$Y*C;}y#&f~3L{2WEa)mXms5zJ4Hrx`P!XTL7a?hHDvp{u2Z!EQgU zUdhIljB0G!u>)JTEK+w})I8IGwp3T(mAB5Ri=m%-_DRmK7%ubOBs)j=dHSO+<0jIs zoQDIIw7RwyRaJEuU}EX%>!;RBt1Zn(c5W`p%Bwk#spuaVgrBcF8k-y7EKgy&Q@Ls2 z$A*V+_S_ZqbW&|e4vt?;MZ<^#zWf(|&IH_5ITfW&6TQefa~KCM7a=fy71pmzfw!xi zHGt!!)$FY;#ZP|00NFK6eU;0flO}XaVmLI=fzqOMCt{< z-6+6;gBK7M6OA?7*TGF*+Z5W>Ci6Us{;qbs_R3*Y*LPs=Q;#Az++WqPmBekj2iI=q z;5i1TAN%w(*tT{FdK)|N<}Y4{je{5VJoQOLx>M&1QjuELi}|bfVBtJ>wch0{ppkWA z@R(bWhpg&mczU{_x;!6OFQnqa#hdU64aP%H?SaSSAj+-NX|lrPF522F*QoP>*sysM^~@~0 z!a~*cD8@&IaO5cMadZ+EFG+%<6W!2sKXNm3kXbW=Pk!ntcrfVoad(2Lr5xwa9%hd1 zjvC5(HEmi$TL-3T`+VJ;NqaML^9oT=Sc1&V8z?C*fsc0}+S-^PP+vUhVDuc_FZ7|K zrU;jdIY%pYAxYLkCGpy&Qb%n^)sq6J$eSR)Ae1x7J9*+f>RSfjL`V7XCm(_P)G#VD zQ&H7Dj>TIkC!S2C*}A-{1cin9ShIE$LW9Go+YYqL(|G6I_uxc3(be8TKTwPEmM&!n zL;So+xAZHo+7F>FKNYzQbOvq7i-vmiQ|G;C|DBzj5yiQ=bovAH#m0WrR8`=>;bX9O z3&dCc;xE(2hQXIQliSUHAShdI>&hkpy7ez?Rg#j|N2k=i1x`~{KwCJ3U3cr%!>-c<%?&TJl7&_K?;Hc{ov&?PFqcx$uHrQPNJr! z28Hx>t-S;A@nk^mU<*GtTLxwMxLH!Co@}YeYrq-u&Nn_tUF6-nVJqy&pIh}wjz!ef z{>)!;*@n*6*#YafuZ5dkH(q}6S8xiOkNGKyN;YH(4S&jees(^tU(ZpmP1V&^qqMpn zy)-X@KC~6&b5%(h3T|ehiowwN3zxwBhdwz7t*k3w|MPJ3QUTj^UpuZ{H~|MoR|dxw zIDPIiOp|u_;-CE~0)36wW92E-QCvNC6lv$KA~H< z%7ktdzk2xqKK<;+5#wuvl4op{ghor!HqAEO8Yc z+OrLT9!yNe25BqOC=11um2$SfiiG)z=;&xrzAl)yU2Y~Q{*Uyb@cIS3bs`nM^EY7Y z`sEy-`J4iottpt;_Vt%vLUG$9_CEb-LVTAy~NE*v^?0FHhED5)w#UU?B? zIUw9O9$Pjnha3H+AN9E){UoXw$afA)Y*?*$As2>ygMt}v5szFb<6O%%&#bH)sG#3< zak6I|Qp;q19KrsK4Jq$+^-Y|&LgwB>D&EUZy?|3^Q_(r;jXjT&Z(g)i?X|d?nvRN! zDx{{SqN%AFfdQdtYHo#tn}_-k!{}fWipi(k+!EZ#%0^jP1?Ah3wxFMKZ3kD*jXlSw zp0H#5QA7Fq)o+h5N^-{M7^69;SIDjSH8q5unq0hf@+zuLF4(ksIp#+gA3m8va}9m? z<#Q;l7{O;h`?m-WcA_t}#dp8|8~SQ{eCE%;giyvF<4qNK_snGkt=NY3OXJl#q1LuY z)M|RP2L(4Sp_BRUz=#d1i%W6vy(37^ufWs^AWZ3bW_C96 zOKY%k`);gVwMf0rKGHvc%jeF+GdvOtR;`$OonmeyJsmiI{scS%{7_oLxZ(5}R5Y33 z;yWKtKfa!EXfJMN7IW?=uzur8+9mPeeP}EzKx%0pR&0J0QGO2arr&nA8N#7AUZn5x zMq@`O%4=&--^94b!3F+)^3<;Y}= zT2fkt^Or86pK}-B?TcFG03JU6$~SwvO`(N$^af)qS$?Rxsu(9PWuni{7mq)=5ApI* zoUs9<9Y2a;-zY5HunGRu14$$6%W81&$Ps+$@Bad>ObSMO+K`rd8Rg{nmCPd4H}}FX z&;!j~9qQvtK5nk)p^v&k`xCP?1qln27{`n=shq@nZyrT#?0l?Tzf50?p#In~9~y3@ z4LWm@u?pkYmNvDvx|^G)`WQ+8bB^Zn8^~u4(KR}W2J-3L`E%&(pT@JF|09G2dsAOFmm%#D1wP;PF@QG~x0!@H4dL{J3HuKo{l11{Jq@O>z>60 z*HM^Xh`j7#q-IsamGRXmbAsN%K?L{&!PU(j!Cnq{>*eR+;UA2ex+dIYj&kD% z{SA_mLe0K{V_|$-SC)hGN8d%qlqXWCFY^LjE$bL6&^gr`&p*#xyA~^VKZb;)IGt0; z;3+e~w8J&&muc5BF~m5psi6_o6?LerZK1#MM1Ze10)hf@J^dycs;e+QG>E#I2An>X zhT7(SeEl1LPap4u){07;yL=vxefksV?;S?!g>SgLF^LWM<88`FbPkY?OoMU)o z6g~mIu$vr14Q)jZZB%h_InJiuR39~(m>fn|PZ#Y1<0K{~(IGy}pReP3S^?$9l{sDo z%IHfP8P|IGcvBA@wH(Nw@nKY5yM#)UBN8|6M!2`F6+b7@TwjXJ8&^?TPQ7Ao+%qEi zmIvjq8A0@0cB8GhoK=Gn*Enok5`*H@Bh*8Ctlsu8T5BsAa};9L`VB~2un=z4oB+xRRd1d?5^LmaWiqYBARIGPbm3&R0-egwFPQ zn(#3cFjqTtY(F|p(^$kY$j26)Ct7hmBM%LOo_KWcCha4}2T`4uiQ<-iEZ+V&g1j7= zqkCbhcLX;MAH*PSTnBYg=&EGwTAN}#?;p6InQQD!b>o+h)1G{$DR=ClA*-ek&(zzS70Zz1!OQ=UKxWHU#Xm|ws zcI@K3i-cQ&I6-??c)GfwyO!~4cDh=>zNVrY7a7k@IC)_AQ;%U@s0ZwcyErc&g;}|% zDlNvbvlrkI6s2x3*4H;9G9rrk?g*M2+vwji)t8_{!u;6DAckn0FXgwvJ#rP+rUY}2 zB}S3AA51uNCKVGyv~T@=jP_>+&^}X6M0yxu$W=sk66FeJsh9 z!!dQ#;OJHQ=7`1Eup$=jw+&HA2X|=A?VQ!qsGC3{jxi`1NAeOT0M3qh>F2*>l79h3 zOsE4Q=An+xT!OZN{t>KSvl>ne{(b56TbtYP#yjsI{pL*uzs1m2WdF*Q%ea=7rs~Khi!8^+C%~P~ z&14!uYtIW}>o^la+Hi*%q(9TatMoMyuS`hw5Mh3ETaxpbEh1IK9AtZ>3#l$aZ z$s+X9z+Sj`1z9)Kn6&5O$xnP5pZUzk)f02}G*s=a&77z<cYoA@hN=q zOJ8781rc$vNJvOQeqjlN=u4zM2SFjB>I<^ou1vqD#_{$`AE3Ir5fu&9q%8-{ zE$#THZ~Zft(nty2Hj{lQDXhZLqgT+xF%&RyEG@6XL;D`X(@#GIA9s2EjdP=V6y>tf z3r9~*;h8Uf8J-?4YAo_Ptt=`R6cmQ=kYJoRc>?K~nW%4URTsscdTK8?Eoz?i7$(`) z6YV{t*vx=<^TricYvu8;3h;ffb?sWTb+_aA#q-F{&7*vesIMo5$@*K9lel*7EIv4J z80j<~PU?dw&MNtkd6X|=hJ}T|j-Geu4s74DSbcQHz#$*0IC1n4P949b zzQAll*xR}Cz!96aZ@_}ZiE2(}4UvJKe2Ap4e-!hRld*f>cDPYSbs!`` z<1~iG*}}~q&pz{Ugfd`pq5-s-wnuF8LiNRNPlqW6B|RA87#A#Ff|x+PF3oJ9Jh?by z)vEP4b@3|lIlry-)p&T{Bh=dsaPe@$>NV7d@qUzEKaV4+c_?iiMoE4aYAeg}*+2dw zw$nj*I_kC8#5o&A`PPdY226}kpr^MN-XZ=-S-K4Jb;kK|kqj)Rk)2cJ8|(!1}F%u7f|d1iqGMu_l0toApR;-;pX4TNQ%esL z5)%;eVE3=Wy=x4B=*38gUHU#MPX41!fD61Z(5_;X#KHeaPPt7+aKliZMb;t zCVfr;%k7a67XvRhH}#s%!sJ98I(ddcawhUPH=!Zn_~DO#z+^zn^2GQ!ZEz3fQ5P00 zTzLE3Si$YGVjRn_%#9lsE?9tzmoFpjMh5MP6Tb3Se~0LlB}^^@u_!tSlcPgu@0(<> zpM->YK5E@8dE&RBu?69*H-COS?fMku%7?*S0~52Gl&MVmj5-FWoAKv={w1}zyggmM zlcxvbu~n;DP`9snERW5_!^QE=P^K@K*5b`$ST+~(9 z;JN320w)(+Cb*thp0v=)KF$VfPHB)mTlKV#4SnarHzhU4=|E;JI(6+%tKCnX{wsFs+h>nj^cH3h6 zDD#w44_7ZH0>fCpX+1V>S)~#;sUy#)T-6+6|bPazJ%M8y?=h1A*RdXzCn9P-HwdEQrHUX$D?9egVZD<0vo9MLF%- z^3|L0$NTWZ@=~$&Yio6(yDsQi-|)`bs6&W3YGm?w|b>ogGt#wJK8}K zJL%|+_1kx0S#pTk?#~8CH)r%Q$d6sL5)bX&rYabBb{OQYw9$j=?&*2k4*I!`(?PzDeh{@MI#B z5Fd|A7tg6WxwI!!OeB^sT?A)07eq(LqL&F|>eVZ>dpWfIYq4$TLvZm9p-o(hV7Z`O zR*dI<{v!1z7o)V1Zm#b17x_%s8xX_TFfud{eeF#+cI=e;@_k2V7w5(aEe%a-ZhZZN zXs6@hqAm(@o;jD^UVdn3>%$XIJc6~WQk0y?Hp4HQN!GW=hz;#gToU$dUQ7EX60AXF zw4tt%G3QEb-?oEs5y$G~g`DCV1V)6Dw>#kD=7=%+=Z2m^BraWrg)w33;>fLY;vXCc zA1@zd&{2W1Y2xc zn*vAMadgs8xq5hU-V@r*b$Ia?zr>B)Qkcd8Cs!8~78IhnwG}bp;fM+iRykD{ zW10C&k`WmdP8%lpZPin-i782lON^!N>8EJyIA8J6k?0*7#hJ_Lv}=XvXzxONObkM3 zn_X$6lj5RK%<-Q(cZsy$gnaDjYhV8+;$nl%H+G@?yFKN1(?+@3CE7`_zOiLvkFd~i zbp!2U>Sf%>%3>U~4xjqm7vSa>OrMZ|a3<-P97AnGyZSPBXhaa(GkznU_O21Qc!prz ziiI4HoqDo&IrX`@s}Cp7UE#dss;5QQu3m$v@Bqa-m7m#8BXQ9(baxHo`t@tdZs!#i zVCk||2nz6}FR@elENjFa*#80XWTUf(HsoW^;L%5SF}am_7(-8YKgJz=)z|Suz4V8G zW`lX0_YFd!J!QigJ2r1rYxGOZ73}X$`>kV{l^lzQhYt=PI;`Zby0#v1^XFsDvIXql z25rst>LHrbC$6Esr5|4Y!RYSpL;>@KkTU zzw(z@zka#q6E}6ikaOii-zslM#VuZsRU0WhEM*wUO+ z2`0|7Phb#stXqT5fdSg&^Nfe`QN&nv0sWqQAy+===tTdxYQ<_?y?hDRQ&Va4^3X#Y z|Ke|7LV&-sdJ~9c{izQ%$jdK-d=-81(uJ^@ z7(?p0bNGNUYko;3<2pa|_jjW(zd$Wbvueds`da4xPL7m~Cm3gE;L?@rN@m0^HMe%s z&o6|ZxB2VR0@Q1{Q4gO`%wM<&0q#29)W8@%GYxOO@&>YRmcrKAUGcAwzOuf$4l5QV zskMtc`^DbQ!{WGb40g4^!#|w#u8>E&IDb>>OU(g+!AM!am_0lM`;VPKT4omcF1{-t zPMj-=Ii%mQv2pbE_EYDXi!%=pI@SAaN>BCX$hnzMIm^14!MVAHhK2_G=l}fy0%%(z z=EWm9Hj*}LlsajGs~2^WF_YYyv0?Hc{&&KeG2F5h^dR;wO2h%k+?7c&h(pYw1Yn0K3KVA8GW;5)-*`WXixv?i3Hl%;?h!_ zK2Q6|oLR2-eD;f9Wc(khbjNmT0!Q9IfHTLABd4f{{f1NDd*}~~P+4AwJv(=RBBj=4 z?d>x$A6-fL^kmCP6*G04oDdm3AIXt^`eHNb4P+mlZhp9tegm0|@v|s@!66a!yRnMr z?)1%D*KNR6^6U!rOJZ`dA@WuG#FRzUU&#keN{?a_=3`+z{rs&%G=sP&d9qf($JYZJ z83*?cj9`9ZJbZjyVe4d%DCR|drfFQhe2w##iKUAcV%MHWl%B0vyohnT`6e9Y)y>-j zOBXM|vD24G6X&_8lrbTruj#uz?`VLzhQtK2tKFF1(?=TgtD91$pBymIG59mfB)v+V)IIQ-GSNfwo&hb6j-0`nObacuIMfsLcWHXDWjGZLtVbg#FhwCU_au|ezq+xtI_elDw1FRvqc}9k7DooXYu4?pTZZv{ztm1RN^?KA6t0=daMN>ynPM_59Z)s zzV|J}r-UoY&8TKT@0-ske#k3QI`r1uGg`@yxux(+s;hlfqQH8d)hqO>%mR8}`mi*X z$K7uQ%_CKT2hz%d#ayaDuR|gw!T{MNhlo!~1?w!#^31iIv5~ZVhP$@_aj7|zSAH}P zX4ezi_4tGrVhH6(&P#2%`y&UFI`kw5^jP%^0J@$HoUr`jWgsCX{mYp3`!rGGxEBfU{L%KYRp5D*Msc(8*uPP-$ZJMGd}U{7qB4I z2CmYS=cKj%umlTkkpXKE+_FjRXw*=z%aP|+FInnKr0~onk!Pl5xdo;`a>Q1BC@7s26T5byT+j|GD?IaBDp$E(lklAF-C4yI4lywy&1!! zF-u%ACubD>f=6>t%H`L9nR=?O{xxi}=7Bz|`N~qWeH1WA&_6<*g>I>BoH1}{5PnHr zja6h=&TAPFyU8SHv=5C3e~WLueg?a~{afr^69#YA6wj?&>1{^V%_Y{cm4K>a{HnUb zZ%2%?K&X_F%m%hL&-bl7&|sXGzT}MbCdc+lE)+;*Rfn6rsyDl}+VmV5N5*;nnh%9i zDK)g#x|Z%w{%X%C+M?};%(2vx+VWSoFxpyT+S}*Gnu``4ld{?Ihw-a{_^3bAj0I## z1mT9(GXn&N%$e3fg;&ji=DOHN;e+~k9eF)Q+KoA}^dl{$o#5iHnV;&s6k8EAF+PAF z{P>?yTvmk_e)~S0Y>lU9RaJ%CXs3R7Ak@s1&wx=u^WLJ@G8V?0+|x7)Ke;D}R269_ z7EjhXb)KoGo;*{y2pp&EnR`&pj7?Kx#;Ne~hbQDmHibuRnqp&q<*zk96{9Jd1&=;s zXq?ef@l$l&mVSw4Qd75;Km5_CDbq%ua$uodHo+yEbsO20o^&6A)eL5Q3YUr@DGkJD zZmLe}{>}Z(_$~pBRsS{i4Bw+Dn5n73jMe<3KpAbdL%Gv4r~HzVVe9xqW=-Z$ZcXWO z#T!GPMIX#|)BvMzO_%Uhj?6d-)-tLsv=nH%1*~xi$Wt*gKSpdQ{p)!XFlIo&QhF;` za9X&iLARIJU8RnugnKeRRneHUnbD|TG_`Uh?G134gBqiifSyC6f1{p0mLqP8uab*e z@(JCS0Y*LPX|9}#Am>DuMP|&pY88r^>&Yes1~mk$Is%6M7P};%YAb&z_DRPs_Jgf> z=WBnC8}_ky@*6+Il3>7{h>Uz!(XQH<`J{?9wW`0FGWn4Lqn;eyE?2sa@JNox9dp}T z>&<}Zmf#e6c|v-&@Kag|F2Tu@)w1EH$h_*O~bhA0{9d?%6{6)ngF4+`gI!Msxw#Dn9D5D_YIHs4rP@SIU%Oosm<;LEV%;Vl8Z@Mln9v zgTcOD+Npl@nK#)tw%&mu^o>oaL|Qd8a4R}xo;2M;hn!Ytg*Gd&YC7khKC%YsLB_|Q znUm6k@J~OnDVsW_cqetu7Z;?={5w;}YEOO*UWv0(Ynx4v!bBbn_6i^RZsS3B{~$U{ zW9lPG8bn6*I$O8Tk-29@6YCq1f)vXSH_bmeU~3jwQzv^8f?23E_-Wy={FNi4WTvf> zC85Zo6SG>W^SU?PPXAi)=)VHSo<6d`^yCOe2fLI{v*wX(eSLlEx0n1HVFaYsLRC6r z#-!#-&y!H8a4UMPXGrSt$kw9&Gdz$2U1?_5*tDivutlgQZ6`zeqBdE&YN-RqnTf0(;$0-N6&#A z&Cp|p8Q!Ztxoi0`gY+Rfqp8&YEa#1VT`F~?2Q9;@4(ChOv5`^<#D&Mwr(UZ}K>MXx z<1@>eH4o&xvB~l5x*``ADsKm#RQ#Ho2bS}O9g=^#uDO69_CRRT(r*;YCNyd82pA>C zS<8=6YV28KF=9Z$BjeEZq(5_Gb3LPl1()WxK4&>EU|nx!p3URZd-le02X z@!u9OaOgNwaHulH7rm>?2)5#1G>+T(Eg$1 za|8LARXzho1)XO(bjrj_hZIkJ|llEW946|BYY=FZA)9dnSc0)eOb{iW!6*(sQldAMpGt# zg?H*eZ!;kHI7Z?#>gzUUF#2POrMcd>)YC`Ok5Okfm`jzNGH=pa;nvh?NfMB;%^VO! zie=#uOZ=uz8Tw2dW^6N?rMdhSd{#KQS?v;J7NnI0tk3H@q7Qe94aI{DV`5Z%O^?Ob zSZ{sen};wkGNsnswSeqP|JHgfz`u&;#%O0sRf(m4X)7{gZ2Fv(%4W1O?33*4G7T2Y z2JlGnN|su())#A#Gb~m0=E|uA2TK@7$lRJsbg{9o{z^-IGy_(3)EV=R)SRiUW+-AEIW5OVRb8Eb z6~APTv>%l^s@{D)VQ39fTVtICv)62z3q~ap4Gu1=gU zZpVT(oAKpO>_ecN5Ibi#x_^PYb#+g$Mq)ku-wEmePYJ4)>BzIWL=>yBDwI3Bv*0 zxSWp{|7$v7_oDKgI@?%rz9XT%->x4(c!;OrtY)nyYPOvrwO??uc4u}ALHtx!0j-_ zAbr!$eFzEAA4QsR-v147`-tb}!5uj>2QThe*BYkAFxJ(AE0?dL!^8y59GuMt8#wxe zV9~18hz@Xr(;R)>3is9bJ>gDRJ_`0bw7HM5+zaN!Jlcb%6Yt=A?_R>zul@|XmIuMh zd}?l(o4$`S())$G_bBlD==FB^eU9-qkj_4INB5xcBjLOq#wLdGv!DMb$|`H{lb`+; z&bG4N(Stz!2jsAs7Y~Rf|9x=R0o<9N#uPJ;k z5pLvGGqMrSYXOxuGr*g)b467A5-eXF33pi(_YUyk`~Uwr47TF^?|vOQ_Ve)Y-+dPg z1EAK^yB$6vXCI(!{h`1ZsR4@o4>|k?E{Ff^e7uE#JJItY=zQQjngO{nt3h9OGr%Yn zc<{accf-BcFuWD+z=OMAEEFJH?Hmvp6|L5asj00&b4v@_+uG1KFo-8V@hLpKZyS7_ z^%GWi#Wv^34nL!BPJ7~!-5eS_`6>)6!?P- z?~ZVf-tTg{x3>rB>6s`htt4+cXUU3X>%hW=3*qPE^?(xd-yLqJVQ#8!E4>TxcOTa@ zhI;#O^87_q*Ei0RDPwDEZ-dD+ikRpa1p0dD#oF%^K19{Kz@4ys6zsROc`yVjxE0m- zv!AvJFw4(_jydw|jX|I=XR)Xd{n`e!y+ zar5`a6G3bszjp^#6#Z^+-{Y~O&v@_Tfq}-Y9~9Q8?~Gn*$mX?nlS|BxV~wgwB0<}+&SvQJCK1W8s;vR)}q{gtN-WltT@md)XWt^1jM z<_`}BEt-xGN7<~laxl~6?s$x|Mw5?}XZ$|EAnabW%t^0qZ1^uVE>&_rV00`nx6BCK zFL@6RD9a+xHhO(I(cb&&drtm52q0pyCxU!NXm0Pw;FG}bg^c@w55WV2UQ6}cO9bXF zeGov*YV8?Wr&A}ibN4Lp;CsI}++Y76jX%H3*o1H9_S9F=eFJ3gcFZ4*!r34=WD^{} z3!V=Tv#Ga?&KUP>Y90`s28#QD+4TMY1uXOMQAd6&hL6TaX(gMq`hR5xj4{byJ+3>B z%ozF%nA@sy-Bx4La3?(XQ|Esp7~}d-V^XoSjz7#%_5V{m`9s0oWr}CbKFnB~|0DBQ z(A?Je9y32@o4Micn2fUmQiI1Tc2z6GtCIWd=ALkCPxl6M3nkCwv5FNe@X_ef7_?ow zHS%~6@VirJ4flD$aGd`~(A7JLp-~2;F7EL3@rE0DW_h@Kf;M3kBf}lILPqosWb|)I{ z%|&w!wJ`7qdVBj}vh#qKmxsbN>#4l|Az*Oc02$xp*@_3cQ^Gl+B;qB`S5TnhwshtTj5SEtjmot>tnerG=cH4N%YVbd;9pnQ9c)b z7tpxm@3dNP&(78kZtfm;<-wik`u)HkqPHnbjhWEa*?|cMPxuA+eqzzh<_tF0@@KG@T{;2ug;Qr~dt||N0@Zm>h zVE99}ycO;}_Iua39X?7M4fh<`tO{~rU<$+i-56v{G(K&QNm~cFdilZK!$rm2cZ&@^ z9F-q^OusLfi=un+?|w$7H$aH$eM!CG`U88ygWFo1n@SAEt*;5BA~Ysk0avpF&7*Fl=q_WVL70V*$aV2WHS{fTg~iu~>9# z`+gfvg~}S!+`TH$-I{l-+8Dc>yIaoOf^7=@9Zh)t=g*_Kp%;r+EP*2{d{`Nu>CXIn zzc5$*>8S|}b#-81WE{3mF2=gl_kdeRIEcZfe4I?pMXzlL!h+o9xM0XOGk2(U5Bjf^ znf*$JA$B9^Qw0AkMvs{RdE5S&f3yTD13! zV_sAweZgEx zt4?MpoK-Fd23S>|1-I6bWAmm?vCOLJ6w8#QnO!^sstIA%IqLlQK%e^f)->aGJI3vI z)A~O&7&HsaH73Pu*_15n8uC}SlM>mJI#SAw|J6D9qYr0;xm-YgZaaTZuS%@2X0{wnUio2=cnpE<#xR{qXqUo7XPRQ9wb`6zwe9cKDgeC3hqV0JIJ z_Gz>c`0(_wwb>t8_hl}bf&0U2ZbjN#n%WpM*}>82&RNo}VD$Il$MB$l@%zLuy8HSt zIBo+c(r(Dv9chy5F9V(U?Ju5(cSIsQ{k>J(I=6g&l#xlB2f-0-(?qVCn_5YqE$8Oe zXw<5&(IMnrI)gX&p99=u5EbFA9_V;baK{ndCUjCTg;C~5H?Ew=_0$Zc-zY(2TNfhd zC&Jm`mL;w38}wK$bm-&RV3uc=8+)<|SZYd%Syxp(Yr8!c&ijK~`?)9UbpyGx;tYEy zC%8HRO&M2k>?c1(*3A-hlPZcgF_K+2E||7=g^RO2CVDz?;q}*X;H^Wb?Ki>1Dn{pX z!mRPgP}ydv6V;VvD6OhSVQnLtdj~WQqwYuJ!Q3!mLVsZzj(zYxidu#hy?=1HD}x?1 z+_|#{$=*BR`mng~$r^ENgB>{e<_pNlFG8Oomv@5)F~B>474Eu*DjYqzAC+~L@~rNR zBHsqL(4)sJg~kSqjg6w9ARiYmT|#SH`#tzV4ovnU>&9gqI(rGjEV1-$gr{b>rSI7w zC+UzT2XNu+5uCbqUFnAeE7tJ)5#NuF)%xPz(9i%<(=H)3Jr&~&OlGihzon@O7a0_m zmX(vgW9AdL!-uxlUOC#QU z=K$K)+G~|9v@PHT4iN3`i4i9%+Ibp z1rvjtnu-EcRhFZ?rVdSgLvnfhcC2dP1`p@rf)&g74h|0D?3uH;o|dk@>aS4VM%3Nm9#qZ=cWPrZHh`j&AK>D-OK7A5-&s1X z;TCN2WN=$4&R#f&?8+83AGZSsIX;Z0+6oK~k0{$I!=9#J>}ak+V_g-4*NNM7{v(4T zk=RJva9?j%unGb(N)f@9iV#XzMeV-wW<Z?&so#^NvnnNEfBQ=6f>(Exd$qu9Oco%%>3;NdG~qv9d>`o*op5k*hr5eCJe(%r zU~_kAw_u%X%y)t@4xt^*t&J#YYDce0tlq6Kh2en~G}PoV{w=0XRH^w9-M`;ntl>_h z*Fe5bjleY6Lt9*dii#R;jJ4@Aq`z4*F()GH!nvrkE)#waqrbbI`d+E#Pi(SmGRB&= zj+waazQ@VLx-;t%P98dh;?6PUL;fd$!JWCe#J!0TbT-$bs;Z7YX<|m-rM^xN#h(sS z<}2vKDp6HYi-zWQm{@U6>6~+pJ`4!fF%0%}psB13Z7uB>;Dp>}ukHip8vEREpIGl! zZE6htt@X73Vn4M^l~+}vwxe6cy?58-xq!SM??d6`<9PMm6DaTZgpI2=TwR>uYCi=> zp1yPPtS3?a>b?7gNepy1pt`CEjotFGy<7BedR%=tw3D`@isLJ;szq5#C;CT4V;(FB ztCjrG|F<_(GFB_2j#i_zx&edo4H;Ey_UP_^zUKhB;KjJHxw?!pu46UX$|PQ^rG01a zFT9wWpHgaU0#egNl*4%K#n(`hQ+}t0bHYcW|E};MI#)1=>E1%-B*$^=Mg<1UrFVxZ zOb>SBotIxkO6JGh{D=5q=y<^8xJUuy1d8ojdGiOj! zTZgGx3h{e{+k{zm>@X!?({}QLot-193bJwX*irP1OelWe4!;{mXJA_ARLzY#ry3Y$ zJSoNctQjU@>TkE?ROD4t|L_C%hj)F#aE2gxGRn~faq+QOv1}p29qe)S;6Y?I_aS-1 z7WlcyYmsEAjXiw){IP8LA}olRhmN8m965IZcJmfter$;4OQHfQsilw;w-|FNmu;E? zCHG$YyIPQOXD1^Td^B4VeW92rsf(_*-~HrT6&N@x$$MWo*;FE65XGGrS^Tm ztjfeAuY;;PYje?W2J%v0Q)Vd%kv&U2V~@Xz7t)fY{F#$JGcIFp1ZJIAAb6ygJK#{~ zb!$m7Yby1a3>>v3q9nF0PUD9pyeBkv(1Dh^RG z39|wX)t5BlKt%Qhq)gvoR3~&D+2_32v7V==J7N|r$NWW0;P2sR^@O3+H}@;8n2ckx z4Qc5)DDLsZqmOP=UnkS6K^T2$O7%?XzBL%Qgc1SrccL37kDox7XB>8H-=J_gD7v_b z@SD38u;4L3PRfs^f~C6bNne_Ws?0nmQmR{8&x@tLY(l$%>-IYOOy5vHu4i7SiEzN? zEjyVEa##9baO^^R7CbKr$xBxuA|wD#_QnipNl~+^CLET zX(gbr&VcSi+A~<_sli(Zui$#=1orG&26s-d)HUjBTVpO^e?||MhXUl{$spR8ke)c1 zfqi@TA|%XP)se3p8$(fKDj**ij8^7k;O#fwL|D>#Y~S}Nyxgo8r*^^dsW^P(62fDn;1}ep^iZw3$vXY4bN$LCoH>0OC1vHvE~-F7<i(1L5J~ zr0{9XGJoP4wZsa{kURr|Q=e61Q+pySLhC3R%PaBDFJD4IK_QB&Dv?)Qfr8R%jE;=M z*T)ym&Q4k%)%u)M>UD>Jpb#uxvk9?rQI;oiRmXzeLW%52F9O;Q$seJ|kX6=@G9DXq zBcM@9UAE?Vsjc~`VWzE&eWVxXe)9`twV5z)?REsX*{G)j<*cQ?>=~QkB( zSva590r%LgSefV#SF@bXj7D^U0mpMc`T=~RQs5ip2Z!k~bhp&w-8bJrMNK2ZVi&^Q zP4jEk=w*EJBm3-79nA=%F4sphwfL8{L>9VQk&${G=T2WlQE3$lOKLDQI!>7hf}4|r z`C}($?ihkAM`rZ;oX|mAH#LB~tRlSj+Hq{yumM3KKB_lMD{GLW*(D0*sB9Do%x-Zf zx^yi~*94kdo6uO>fP-%yL|R4;l2)x|3}a8-GSfaW!o<7@S1(+^A`GEZ`ZMUYYyv87nVdvlPb=yw zb8zfZ22NftLugdOC39dKwNMJB`w^dK4Eopojbn3Zo9PKU)=F-P(uIll&TEpXA;x zycXN6?6MrO&ZG%t`AmjSoWaeTv`ObLBCoIzrPVd)GEF0NUJN{G;Izf3ypeA+{qeu% zu7J7Eu>n+{J%aZxWFlbILx>J^hJ0dA&Kh;6Cdbg!PzPIE8`wKJt8xL2=l2B4Cde4r zj~f4Mn#3*)_I9GPsT!A0pTwJoFC%c?VuS>`Lx&$yYZ8+jUqw*?4!yG^e*_CkBwlc{%#tA=Gvb!z(-yzMf*Q z1gu)gVC0YTar|%AX8}V3q>3(;+f;U%Ek;xBXg!y@x2#tz_ue+;?K?F2T;gEoJ4|>zU(!plz|J6@W zJL-o|{`Gh9@UG=pyL=f!g99udxtjrz?Gf6cesm9x!P!;f7)`0JCq2yWQ#NC7d~`zH zEUGWQffrwT8Ql&6n4d@+#Is^`rQXEYAX+MlaPIsiTuIMD4&|Y+q7E+3F7Wpcq%FFI ze)Bf>JXmSm{qZWnj6d0@zO%T zdTypCCh)g^_6@|xC1LT31v7jwP^dkQPkQihaYkg)QluSE8}koKrE!ktvzMsr>mpawQw`H0A|aU$l@qws2x%6B9UzOIi7->~P1P9n0ZvrI2#xqO+wA zZG9$8xOl*cnSl&J1%(WtPwqd0BS+6*Uh)z|hH=m%?YMIJ3VQ5=kdPP+7b{#NL;XyS zYcMh~1zT5l_1f0VFvUZS523cY3@t6~Xzl1mTh9R8J-yXy$MneP?QB3pRXNIY3y@k; z3;)<8xY@G2qYZ;&6d)IO8e=0|9#a8GMQsxXOrz?xY~{0O1}4BnZ@eEJT|H>*9H8@X zh2vdck21q;ct}12tgiMd)Yny`y`zuLXc*H@Zg8Y`5P=yT8%1YlCv3^jfzCEGHa4P# zV;mv1_AYdi#HZ<|qLS{~vI?{}b)c=Y7k$GfIC*$-y7X%tGhMUK?$&y=^bTW!hSY)H zQTm!q+9ZaVU{*5;w|Db_y@L+aXX;RhMMmTcWOWVAXzuLB!04nh{EkA@)G+y5hvAVS z433PVvc8#w%Gz4vs3rB!-H*T zZYV)pZ4HhbKMRl0czF2uW3a0OogJNUaAROa`UZQN(a=z@=(V%A)AUY{Fy>_Y_CEB} zJ-awNSY0H!8<3~dqv-0WMtyA+=cWhU@|1&_!>vLC>HG?y_DU7IOOfJ>@4AEG6ieqM16Qk(vXhcI(Gwe7g z0g=)03h;$Hqf3kKiSaN$El^d1_V!kEwKd{WS`nH?eeuMjTj9xCb|RmYmoCy*!@e3R zr|m{3U$9m+#rZy#z~dYGiRqP?*kO-*e|2E{74 zxH_BVYnoy_jONB_G?Eu>l&fCKg1x(k3gTvDkPdXPx0Uiqd8a;9H?|-(tB`nnuyykm zxY&$gq`!sw(!%z(@D7MXP-rN;+?>p7WQfiP52!bkp{>0aU9?3WzTR|#QLM3WgtiIF?l@X&>(E-)g0_xcw0HHv z%a5{6Ts9b{P3*vllWEw!b0g)w7xi>*%{_G1wzhC3KLxa#8pPO82VVHiAqUF_*X6=k}mvj?4=a|cK2Am!bbI7a(waph_bGK<^MOuKOL_y<_N zVHG0cW0j2B2`|UmaPot<@y?kX1_8?u73R-Ccnqmm(qL!jMB5%mei^To4bybDqM^AL zgA?}DO-H2*vp|9-%5rUGB|3&D;Os>?V6hHWO(-p>z-uoZL{wY?;!+Y2=u3(1twLIA zE{09+h=>d&FYG8&L#V4R=X~_TH#hVl9y>ZX zz|D~(wBdQyX>V@F#0Ud!@~fe)ihie$bJI_o^HM zPuS{U0pwpZlk~h+ct)>6thXJyX)9YgyD(u#oo5|#0O)SQH@^B0Sg_?$M8`#9y1x-8 zj-SAROC?ymdIORZV)O+B%8bZyM{5llDC=$PZ)ki<$-4MdTLw8zb*<>3Ji5__3z5P? zxp*)_8LwgAcCMbB_iqm^&@N0&!izS}k9uKiE1{d%j&Zb@WxbuU z-Zmra^v_fM^pTCItEodlVI?we79lY?9+R}stsLXf*c4pIE3vhA2GIk8B)zxDm~m+R zt2dFcF?2PzprwXz!ziLl5>M2GodsG$x;S1!VbcHSc*ROzA$@up1}8t6r7brbC@<0i_A zL6-&ODK^R$ZA>SVR~PzRW9%|YUEhX&s~+F~(N7Q>8jkdgbnJTgVc622NKMH{{~@UW&N5Ky|^$hI2nT(nBB8LZ8`2o#?^{+2_LK!$AdX z1j-cX^$W9#@WJUaeCykP4~m}}zrm$hFlEAUUo$S>xPkQIGK2&L!CTgO)hE>$O#?l& z?=?!FTgWRr2S>QdMZPJ@emi5>E0=Ihp4O@9LdKnVCf;3WYUx6+X+p`8z2cv= zpGF6hpsvn##%l7F_%^h&4-*rzlG;X|W-P#1vbwq!U4x7-Tp7>V*)e$0MXf1?sFkH82xy1Hl?=l1&n@|>~G#9%jS zYiR$;qgKWZrb&CayE!Yp85wLr{*6<}&B;Oo=kJw+SFvQ>ZY0I}GZ!+W;rz5z72;>V zd;wjyUPxS=0#D}w6qOdEjJgs}pXTe~Y>`y@!H#Cy$bJ*1XkQ(P#~9;G9onqHzE;MV zwTz2>U`zX9%?BI$@rkZ-ymja#(yBT@*TuMKK4PQ1;AoCJB(|#O{7#9@x1B(FSrPS_ zF?mlPak5|LCbpBq=yOr9LeCpaLTdgVr5*=*^rGhms3&HVT-tg2yKVV+xSQyS{tj-#2BoNeTprzr+hk+H}VvJ z!Oa4keE$$4z1;AVpZ^BqVJX1028Z4|jJmcSgvKW!%*z3tMc0vA z(G0J+6$tckR7b51P8YjPddRxf$ z0kly*yO?X(Fvf4IqpUK9w`a~SZ!b#Q&YljqIZZKE?Zp0bh1jxV7vjVH7$*iGi2mM| zdfnUF2GeLS#wG`7-X zq*_nCuBsRfHPyJ3S%HB`H^g!LgY9kTVBRs|ApVj0rQn(zMMs0=L5ztQ6LvJz;;sEh zu<7y7bIi;&*_(>%I3EK;%rP5VSz-$}>fWuOVl4XFdd9BZv>&dFm+Xz0fx@Bg1yJt? zm{ZkIS4D5+sm~F{k8;ypq7a)&${gt!HuZ4MhZx&ZKZ&o4F|y3DtNkQK2HKfFF#n-H zdHc-|5EB;*pCE6>qMc~(9fZBI?+&ydV;JgcK^^1E5j$tbS0Lc7?(IAaPu-Sv!7n8(o$O))-pFz1yL)7o%uhB!Bp z?>4tHk7HbG;+VV`qgd>MhRKmJ`kic)QT{eP_5^LitrrX}^F2VncriT(7t^w_XwgDA zQBRbNnd2f^AGMNkk>nH2{X@(v=_e#+m>y(YREx{Z0Fpp$zp1iHI?*IK)v@=nYU?_L z#YXGggt0b8TXFooxAFe@0(D_El6)E)G2!~v3^-6;;;5@Cr&3}#%v?|BR6~;vbLUhd z3#7ZJvjbHPU2ygErQenjP0w+`P%Ou72#g{!KnBS~tcynDy>t24{m3T}8^QoXITNEH z>(~r2_-VxX^b&MWhU3wPRxuE;)EJ}S9e(}SICv!o0f}o6Bczy1Z2Bol%Y7Q@xicIIxF z(A!FgtwP3GOwIMTKWxSi;9q6-T-7tcsee*5g@wUhlavT{N~+M?E2VO zuxfcUojaYajuI3frYFYHP)&z*{vuAEzl;LPOI>3Je7t?&=O-6DN6}W9hmyK}I`_p0 zaEKAZ7+Ar&A2<|LvRK#?`VmtlG2z?kZ6dr#Fm- z+BW25*5G5GdIozQ--VRqC?qlYDa^0MwbUZohIt5%2*gN#Hx9q|7BVx^ae_9hesCHq zRxHwVtByzx1C!4B3LHOil8HeY@=B{v+0Y7iS2qOv`!m3rB;QRq@cMq#6jq|My9MVj zokvbq2`*erM{rCm{F#6&yEDavx1oakJdcZ4(~+HDq@FC3r}P6t!Z146gX`yy;l{ah zxOpQR)xGR{cmlO$^Z^BVv@-z+j|@{uoLr#mY^_In>QyHGCs0sP!-TyFj*gB9V>0e& z&mgtA0B3L1VbCT7gN^mbxpEHIGqTa!Ka7Z|c?=L4^7XgkjobmZk1psKE&fkiOoOosok)^7hL0o7AmfgarA)Zn7WQ>8Ut&{3PvW2};XqF)=xg znwkpQ+A<7}+aW4~vO`;YZd*6oH}?M=~u{yq8b#I)gd4_2!8$^ z)Ke1*bFxv}HiAV9mcWg%!?nx|Jo>pmRu>FKwrnQ)(U^4+Z=KIV;Mynf`Hw$>^^0SW z7~q35j7gg5%jT_E2X9u^wsrtFu3pB!{OCo5$1Ol~c!1*VET9YyG?w7EZy&_9;ugfm zM8lW;X=^NjG3^@t#i3*8m`IG_FTeB|y!FP*Sh;f_e0=G^b-?!>Baqc;;Cow^k<&J!sJLqIP_xwscUGqn}_XN7Q&N>!RSygD)VmQ zB>mvE%pBxUMw^;j;qMm=PcOMxJ&KC5Jajf!(RY>LbZR}e?^uW7o)&a0Z~eW}3C_zPzW2}HW#-|Azxmd8v3&UgEKZ3=ct|Lm?OfsM>4=fuM!fa@QRJ63 zBQ`o39`15sfr;9<32(lA05>zUux{x*TunWTwV$d{(=$IIq{ZSNTUd6>Dhw1;aP*C52&IvpE-)hF~IT)R?r(KVM2ld8|wzeQU z9T#Y8+lK+akPvu#x+|SFwg(3iGmN6Yw;MMZkM2Kk7zMQJ`Ng#u9Cbltxc(4idqXK| zEAtT$6rf^>)7LApZtJ5+iSr@fwa=9p`-9hCqYbOXAAkLyv31irEKBgiAa(Y@shbE0 z@I!K9w6?9>#+d9LeB}ikPR&PP(mDir=?_i{=rM|~A4S@kgLv-ezeG^NN(3_pQvOJt z=kL@Y+6yyqwXhEJw?4*LVTw2$kdhDz7v@#!M%UCBe)=EJtD84N!+kh%_yCHSr)1pB z1)KvgkN(_YtQ!~TBi}f9RmCEC8E25ryes{B0Tb0}bpgOl@G&NmxZ=nG#_!p*?aa^4 zGw%ow4?`gJ#%98V(v0)??W=FXH)a)r0zEao6T_%TyM*^nU4e~nEaH6~k$?PsOoT7O z!(aLecC1LoqWKXxb0HI{*E8|i&wh$=9TQ&K?6$^QoI86K7cN|4jLkSq{DrM2Wz!qZ zq)BcFHZZ^FsH?!`Gp8`=9RZJ^FbsEh(FSzF%ST>Gl%!u|l{_7vKz(^3jvjm&S98nA z`z=V04}sh^m$z7knbQ>HX5sXyQ%Jv&#hA5B<&}ZVy*yoPF=-ma!S@bhesl~rEK9*V z2advR(I&>JE7V&&_Htujq=UBg8ZI(sdHCs1V)vu_uqr7MJ|mrY?PMCJ0~4@_eR`o>dIrz;HzPyP=&i+MDmbgIwkg z8I(EErnatD>QO1Gn>rCn{SWfI{kV25jq!9g(y~gG9ghffLq}63{lWXpe+ra65U#X!OHN#Z z%KA102ZtiWpD_dNdv!rJj??yEV{CUhrvh%l$%qMMj;Cl6j!vnEV@mUKafJRRKZm(s zK{?WH=FrB6!pFxOPIhC|%__YAn|JB+hv}DdaQWhO+@KwjVjbo%9>iR`LW00<7{iWZXh7nvdH_c_6A%y zbrSC$zXTVbV60!gTs<78B!afMqp23DmoMYM(NoN0Yf#R7LY6%U31Hk#+FL7fP|BPt zFnS3BIH9FkSFz@yod}K9PaoScE*Wet!#Dr&pRj!IAK}^0eFm!+#UmypP+iZ;xS5UR zTXw?3O{_TSpXx__&Lx#ob&UGbpNAiQPduB)X2 z6;yl;Ok}6&a4Jf1>F^OHoE<~PI!hu@#L5IGpUHO8;tkleWe0X_UW+jM9=!O&Z=t-Y z3nPwhOxTmLVe>YuNs2{?pC6J|Y`})CTd;M@7A#ImP*;z27$yT8MNeBj&Yrx4!$&SM z2$&*!wLzExPU2W!GZp4Cj$BGZMPsYwycK^Hn2!w}>y0oqiNU%$JolrYp`^nWOBqaT z+O!@4UM@KK{#&?uIUU1vyrWE(b1r^>U%v7>%7>k?WX(FPS)NSiavE2zUB~FS@RWQQ z?8bBd`F%7^yI{etUD&W~4HDzR(Nb9lTl$9?vbZzS--+zB3pjG*1S;!#_1w&WnMNk5 zd6}s=dF%`_=>SK}XH=lWgr%vb4)48p7OkDbShINxcC1|h$F^KL)x#)k92H$hZFvFq zAG?m4P8$SMAP>FuJQe2*7A)C}tZO$=l6ix6tRGdyd3f`sS82#{5Vmv;)~;Owo2E3J zK6w}=-IH+ikH(@^JF#)yMud2~AR#dcYqo60rmdT?ZObMEdAq@pfop)v6obh;9AgkM zEJF}#hsRNPvzQLO7EenNzm`$%mguoLBvYLc@3kgst9!hqmXrm^hm}qMJIBOPWr%s z3s}5jEjDl6gw30mA%318ha!(!%aEqf+uniW2hO6HLF{t!Y|n<}h@EJ{v4e+jrL>d5 z_B0J*AA{ZVIFnY77n252TT#Kl@f0#L^U*&xrF7780bJ-C zR~Kb2UbuooN6)Iek2FgbnVFJ1ouxRi|82Z~>^N)#*(&0b-0LetPMxsc?Ac~7hu80N662e*tBjr=7q=y zVur{AdHCEBu9TZ)E0$s7l6h#XuSaPO$1WyA(L@k~Mw|EI55A9D(=<|cJw&}(hvkbC z&>$CBNw$n|lF4%M#iMwII+)WAEMB((s}@Hg>-u$EyO={qq_~06v1wE^v|$Bxbo-Vy z*u6QHxls+i_wUc4rI#%53dEB2>q+ZUL`5)>4oSj#>WH*rh2nbT#|*J2|92WZ!BBA z7TY&2qOYt(PH~Z1`$#0+z%BF}Y-e<;h( z!yCVT6(=v}B76~bXUjskqZL2>`ESrg(H00SX_N)dV?rbo651s@>b<5ooe{=#M0 zzIhXNa2~yxAa(YWo8-H|Kvy4boKIErwP@2OEKQ8Xwf%3RtW@kb>)1#VGy$inUewgo zk+OC#o_=^UXr7g<%Q~jr<*C^J!2whb`eWthhp=hG3JkQi;_Ul}(A?CazA_u+ zKAM7|pwhLG&OR=u_p+uw25HB-$s~Q!R@u zEqj@0y!p}#Xrpo_(WY}Q zUI>Xu!lQe3Vf~7Q>f)~4M6t$CaY1oW1BGi6+g2?`kSk*&`hym3qu!3Va3c>bZQ7oy zr)g;?bF;3ii$@s+WkSjAK;Jw-UAlPXB92|lL_b~ITNtqWQSIw+GYa%fm5739j3w4Sc;@{lJTc#b?O%P+?M z_b*_Iet5%{jo7s&5j{nhv48(DH1t7zRO;}%Z>t+Fa>0INxPwXiF}(Tadra65BdfeY z`BpVslf&q0E~n2tg?A6-Au@as9^J8Os7J_&QRSt*6dY2iGo! z6O;1(;So$RA$N-i$Ig8ZVaMuaw0#l~n+KzS8e%hr8|4-BZ8q4xV--R@SvJv!`kG?+ zV*pJ|8cWL?<)u++$v+bqYGo{X;snm7W}>B&!L}LHc?BH7U@xv*JV(EK8jbBHwKnSv z(AN+54>D#MLtSGHy5-4a`bIA%TOmhU@ymRORYU_J2-`5rIy!0!a%`1aDVYF%Mwr#}XUSs6KS)zF-}>H zP+vRrc6G6m4cvVquxuS;lZ{&t?j>I|m1szvB8avRF$NpL!9%Iovhh(YkBfrygj}>9 z#h`5T9qDN|(LFJ3ZpnG1ufLJXoa)Fa)VB=I#O+$dWRzk*MyYp~aq8qT+$^h{@wMiu zAnP4amfOMD#uaPVtj2uWg-*uWU7f9pSIQ$2NAA_Dc;kiF7{8}s`G(!tvS}p(J?!z@ z7hYkK%wT3h-s&o0to9}jp1Xz#|ApAZ*lB)1FV37kftw{gY6&1o;;R}vk+^&l`LqMu zH?4(>sSQ8+=WnBpc3|4h0X~kCD9+8n>1%m2ar*>gf$X$Al$9{vk%U(dk|*V?9MWbo z4|8&LfVZ!wdeT{z*qH9G#-Vp#$GNN)EZ+1mbE*vpaCgNe+J*e=Lh6=&NaN(&Z{xtb z`%zriitMZ`wmFEm-Z+N0-#vkzL0Oksq?WuGH=!cqCSLpXyYvHN*tqLy>|K+jF7ydW z(~OUbQqSPEw-2G+CIrhi>{R|c$lD#`%*m#wrbxdl)^FR4Jx}d}z1(Oboi?-uGeG+? z)h9QE%P|(Ph;qIT?sk*t?CfB?Gfv$dKt=vF3@Y7~@lzsbzthj2!RbRMQC8Vt6>AHN z6bBji4WP2Ll=5;KH*3!L|;z=7z~)uV#f*n{9J~;@Y%iXCWDh(#v+?$uFXM1pnR?smLE8oGcWK|R z=isgVClQyjnErGl9@?}VQvS9aE-hh&~T1u)S8Sl3WM zyEWL=NI!9v@^Ie#CV}RGq?Re>zzr^ut<%}KosW^0DOc-*HAHa$CkDzCSzG`w5 zjiqV$^;-w0R#(hGMog;iB<7wZQVe!0aG}M=&x@~C2)~Ruk=B%sNXmehMSI9i9*}fTDx5dND zxgS6N!E-3DY-CMY-mVR2m`A_!{z>$?hGXNlwOAO^hf5dEAT_rOqiUjUXyXPkGUbYu z^dmbN+wWN(j=Zz);^(g%L^tbb`MekCc~MJw6oMr>O&4{nStyO`TgF{knfO6J^e#rE|n%&GKSVY6T+r|QE~ zPwa*-bE>J4estHDz-6)(O)ZtktkOA^g2{0-m*lCX#jerb^%;xg6J~Ngi8IHCarxwN z9650o<#o+!R06kOFuVl}J1o%E+=>@}@^ifK{3|G`ZO7+7|7k?fi5p0y?$r1gE}uJt z@Bg3wz=?|mShRdC9^1Fh>QQM8t{$G4pRy3^*RDgbzmLQv1QI$ujE1^mc!fs5HP97< zG>Dn$SvYp~G6ETJ#YFm{v94r>E?LVfJS-fmR;@+i{3N)mwIdARFd2L$FISNJ9c2nf+Tpm%zkX& z*Vh*-mMzBy((YtVL=1*%u!W182T~R+#FC|p5gZ(BF24f^!7~ZE0F`fQ0_j(;prEi6 zdmjHZw(fWcJ9q8EW4pE?z+oI$&YwZsfCwHrH$05MFe=R6$FS|8eR%BAy-0|QK-#qn z7&bBZ7UG5n8BAP7VB|b3S-q42sU)5ok&+azcyHaii@OIBl9D+$YY`SS`wrv`Fi4-z zIxAK#M|@nIxln-NTz#tc+gCat)cI_IRyL=Vhboycld%Aj1 zTGfWA$XGg-D6<^S1y(aG2APR-aZxVD?AFiE9VzVM08vd@omD|1>taabHs?9li7%g9E@V1vk7m!bBqbg4(!_Vu5vlD#GZ`Pp;rQOEX9${f&ShH?55)u>OXqB8>EVP2T8euSI!Pg1Hq10tvktFU9kG6V$# zV#3B*ty`giI}%t84Tlei!c5MgeI+=-*_4GJ6 z*dvYs(~i9#Bc82zWJ8>~z?)UksuE9u$-xoi-Yh^I=guqGY4+^O#o?&|lol~yPtCzY zPd$r$k35Q9+sTKGa?>Fi&Q6YspWcBnh=_=U8xuACgx{@ThvM2+_=QF>Xmi8pKp(Qw zGI8PjImCszA~MVerHvhygxJQ8auq|{zM8f@4sLF9I81`@Z53_t+EptVgqRD>qtH_{ zGpPjXoQ)^KLVV$2--WieM)Xr899&$ma@__drfU%w?#)ESRwc%wSq6-aj;Q4aOk-n6 zUb>RO$yV%=i`psFdG(4nYtz|_o;gte*KTA2M0>n-*E)RWnY}oF<~*9@%OwPP?LgK- zS+#ZpZO2aRerO9G+OPn9eM|&PXdhi1ki2*`R&L#ngzykVMMYsX1Iew_L9uD_w56lG z0zFM#ZK}mipfdgjmxO7tHZ|Kk7N6;o!CQM zyBX9JXXT-&xPk$g9oDU*PWd_FYDOl8RdhOr0gmnD#dJLS^yd&jpYQMMqY~!6u1-An zqyNIe{fCfsGY5^WZ5U%`Q<5}MUvFHyj{3@4Y~J}0w(QwOyRaUA^!cYzSzCv5SF4;F8U3xFZva-WUZ?bC7iC^<EK!{qC7ZpelhKc@W=#2g@r0#XD1i8WaB?W$C!JgwRMPdX^Tj2dvvxmGZDUt z8#$RsUY>;Rz9IB6 z@pF0zE!9QXf8spt|I^s__>zaEQ_p+~)IOYp=a0`yas?9spy1)%} zln2V7VNO5uYi7hKI*jmT=YgHF6q8JaY-nJ9VlM)b0 z-Y!{9`?z(j()&fp3Gi?dd&WU9mh`31d6KsAv(G$%qU?11^v5qCH@5_%B*wze2LMx( z80&1t$x|oc9k&Qie(sM{-m#N5dEKf72=bTrG5{~%5Clgi)6To9o7rkT<(XJXI5Y;+ zP#>ykUy|Y%!D)09wM_aiUCu^xb2rv8x9DtXMeiu(T{luWRU*qJZV&SHx-G6(z_>j* zc>$KMSb>=6Xmg=}a9|Rq-eFYK_Q1z z`El&pyN5P;J09M<8_v{;k--6?qxYnYxO%uVpIVP?dl}d6+<_+^-K662LB`+GqbwDa zn39Z5+ZfYJ3{89e$Im>9^h>8uRnJ6~wtXJyT*Msi;qEhfR6PH)7tl-_ z{i#3s2ila!5b5KrpC0E32f90P?b=1yIyz&|Q=h`ly$>n*lqZekNh&*AJN12lWvf?0 zzuGcEKOxJ_SWDz6CWczjJJE}N<0BzY#`pKMBha6D+KQDJVn8rG z?SO5YSJMahp`cu3%RGbD!zQTv-U`g)qFgyJ=5ccMLj2OL*tB(*$^pcGRhH$Vf5-$6 z-(cE-t=PVGGd$fqu^?#?8ooS+TrXIf{mLvH;kR#SuZ6iWoB-+ zk`EyPK6-BCZMTRRtlqi~n^s0KrgLXZ=whA|EeCRw?)A4mfQwfM9(w#KY}vJ4$J(EI z49)a4=gypAeYue@H`eVD5;~9h^FD0fxe1SMPr(RtyOOFtjFIsYm3fi(DGQfl3uA7X zm%V#7V$-q|yz%xC^vX4O)%U%Cc5k8|wN;gP>*cp_=~^~*{$u#`v(F-tdAMHgildks zWDH%2500M4#vPC0$)}$-$L*`Jg#Jv{I-Dfc-a&CXr*f1xiL5XC8^D~Zt`+{_%olxK zVCwI~O~ynQFPv97RcMGe%37>*Dtp?4SgfYbCNfu?>-BUcGd7N_vlc5?EJs*qkokmI z{TUE+w-RFoL}D}PiOTXmB&=A6FaP7;WBbOH@FNC!enADT>_@`P-Z5ty05^Q%kN*f? z`244k5a2Sio`V67K4JLS7ykj@CY}{hj>^EQ{^fO&(E+%-O{1T|@`+P7@c!YeSibWS ztYP5i?d6Q|5#vs(knf1t#VhgU@BAB{{=`!VchmG};CUJ~F9zbdxv5A`Pen#rx>`eG zng+3FU}TmMC=OT?c^gQ7?(hnV#dp8^5BSHw`z$<|hlrUqnrS{b!^S@W&pi7DeE;vh zghf=UyP}-|sU=j%*U~0%B0CGg3GrAwKV0+A#tGrUepnOhjlQ;eH1&>RY&wwTF<81} zHRi_$!q#5a2l2r|8v2gLQkX^u)tY6XfnChNJLAN=xOn6QGMSW=$>Pa@9+;v6&&O`v;d z3|VPuNYBheWB&x&yZh16+M<#H;dKxb-;kIfIM}$+0N7yZszsP~n!q^aU}!`?12xTa z0pZKAV%16-L>4$sATr9IG)$v|4pUQR&}V^JJeQ|zJc6)>#%j2`7Ih`1RKOurHa4Qh zWQS#I*CWFGK>=$JL!vQTVWZN)&I_>#NsJk4@aAt`qFh}^L2)@I?8ylR0MgC^61L8w zW;XDYFov2cVLRN6y2=cUkf^w2Yd9Rrj}@NT zK)%{}V<7|3fBnyY#8Xf1f)@p0HoY<;I%l^4{Q2K}8UOOHe}kw{AEoo6Q5qx!bA+>3 zIF_y4fDr1e%$ZmGI(+^MU&7-nV=&s?j;!JujE%~&1?9-j$e>Q&q~Y(xP=6B>;8u*v z`blL&Oss}z^jE7J71Fz;V``y&b#!;I)F5>(9$3751#X<$j|&Hn;O6xkC@3m}SFn%L z8{x{Bts|y=!?1AKGAx}R2uC|R*trKGJT4qPraJTw%TooDaI_zTtDOlsH#5}h)ajXp z=r&El$&N|JVE1j5-Oh0-8Nqme8?If?$NUA`uq{7=xXp6eGNd))>!i$rl^mqn{YqAG7Zd`zCOe|$<1?1X` zqSetA9$sE>jW6jb<@N*<` z`ki z8*<00-A}`lgeZFLUEsz9QWj&DITZWi?(dJ}b?Xon;|o_7xH(V5#es>b$%F~vf%y1o zJ6t@IhN~BEV8!ZH@by=#l$v20Lw&=@E2_spe-B3b%W*R!9cef6(Az6(_O_$BqYI`< z8@LDgV)M53xN;>O1AUANXlu($b5S>LgBAN8;rQ+0=IM!@Pd|mf`>Q`eq)i_(PaVPc zzx$u~#fz_@u%;0s<0ec^^y2EJG`d74nM@M%)9Fjn3Q*SA1IJ)5WRzx_FY*h=jLi(U z9L@&INy}f$oZlVBFg?_c+?-On)fCS2S_E_cCGN0cpcTZi(+%GNkrZ0I!vu*Ro?zw4 zG4wPP;pmC8$S)seaKA}iBoeKi8e@GSPh{QBA8VN;JpW%m#Eun7I`&|KXYtPj`b)RS z<@mEN|0TZl#itQVTcwI65tMq9CS;|aLwIsDTw;PT?cj-38+M~Cy8zXN&1MQU_*1{v zEnb8$ACW6tghxl=vw!+IY}~Mxw8_&9LfbT!CC|s=_#m}roXGpStsC*3fBOytU3=R3 zad^<~O`5tW>(`K;k&VoXW{gZs!+vrAV`G$KrvUYYrAx1TExQPP6L#oj5}uV?g+1Fg zA;eSkS}r3_!FGHInOD!^?U!G`kALwpUcJ(YolkxVJC?^WIva+my#&Q&h3J@cLStJO z1L$<>S0Q@F9Mnb9uFejsha;A(-GF85S7L#zo9g6)TyHp&NqP>c9Sr{SvM$gE_+aTn+u=f;Q#Oe6<>BL}cyOyY z+%9dpWMpJqy_(n8-GVEZ&)|*2SFrY>XRvwiZg?;d8(~Z|Gt;(iaEe-nKl|%{z}LV0 z2}JtKjNA&G5cfcQ_OJgD-}{$;KvJj?<48>h<^?Wz_>&*QSHJpK@EIFG`td{fzd!mR ze*WUi$g60eY|4Z>GSLi2Y?uv7tI8S2O;IKtaN}w^eO?M8k|UJE7rFw2f)PJIl(IGk zM<(DgQPFU9bcU_Gbuc=nEaVLRgjsScU=F}lis$5y?Yka=y95eT6Xc^KeT@V21L~iu zbtky>EO9bkb@K_tqr11@pa0^Ika^@NzW3FCM@>twC7DuxEqfmcrfkvPAg{@l;W5hX zJU>~fl-=`>tXb_zo2S9jm*}M$^{nJ$8j};Fv=8*zwqqzvOU21kXVB;3jeXC25-BkO zu$vfS9pgzX&Z}=IKK`d)V@~zASQ;sMXFU$V&AGI3#ri$Z;6H!-b8J~W-x9a0nHgo` zUx%8}VZ5&Y_k;K`Q~3E zB1|qeNSba7Z~rhXr~Qbc&zBf5AYl`}^5rjL`;uVl5H*0Z?n2v8ntPcs^i{@l^cTZ@ zuybUL!rVdL-hxjgmeOwxc9f%`qLQcwP%P`&+IV5fiZuuoBS~Kf)xl<)trJVG&Hut&0&!7BV%9s@ann-m_~-t(A18;QG57?Mg}7ayYAlh@2|ANthi@gKkZ1s26eskIPi$3?UP_C8p#VFSMVA78`6 z`!>@?8*w{va6Utm*5F&;_*?8-6NRC=Y`p*C&+&JE^9`K2l!f+w#%Z?1>KTSr8#iO9 zt6Ht|Ez2a7GsdYOcfzvO>zI#AOko2j&rrn3(irX>&GaZD=7q!C-35*ezQ%_o6S*Cv zO!++p>PYX>W-LyekL8I8=4H+t;OFPToOzJ5!g$WQuJBIH#ht;%)erH@ci^u-^D#sX zmE*s@_wP8BT7g0O-P!g@@5|r2W1I^#J&E&t;Yrzh>6Mpoj=9UFj696ej>}DVnID5{ z4YQACV00QKsoC)N_C-`e6y)BdHO!)o@dz9Oux!;j{P5rZ0Z%==N!f+jvLN6F7vE_7 z6=Sh)fBlaTDIZ&7sg_P#q~#Pa&Yi~kbqhf8CosnI^uWf2Q5YNQMOkwv$DuvT!njy$ zS+W3r&Y}y9h2x@W`xzhhh^-Y5ZuBmQj4|F}tSr}OI4br(Gq&k5#)HkcmS2r|D|W#* zK)-TNV?o(U!un+~D9Anw6ZK})&H--0VOX?p+xzkUtxU#Y;(k3EZr9@~wu zAWy4&SLalPg?VTm55S5YkHO2$Y&Y!P;Lf<+i@yH$Ig%Q;P(k7c$NCsc+q2(R=2U0# z&V@Xz-1#`RZ(OT#s^Kxw2{YJvAad1SeB;~S#%CT`M}52{b`+2}lQQ$f=l%>o|M90)@&I2ueao+O%PBH%!$<{c1<1xuG8A~Z-{ za1!C01$Isdj7&g$+&sO`h`CtyoSa>mm~`Xx@$0yF;S!$x%;&Le&1!ghd1GRDf&qwJ zR1un`PkT6ecp)q?k;yUxRu<_&@%+@7dN2R*k;C}O&woKBeHCxL@-jYn=N1GNr+t~s$_F%zvDm@Yfq}}XiGnGw z3-W}k7rynq|G?@+$+&!cKYsO-pWt7<{ja!oqlihmwIXuk-1tYta&DrP;2QI55iA*x z9b6cIEn2n`KIS#Y^qfp$a-<)fUEL_CX~&!I9l~p`yu$4*yno^%gCvNjp;oD3m^4byPDhS2{_x+kW5Wuf|;(SO8%M?EA!DT zkc)gw3KlJ1hP4Ty$i8rdGSG>Vx;l({g<#2=H7Yr~#rB#(k8B16m%SGred<&A!Y3a` zWquld^UMFjzx~_4VgJcXYRx@su${Jr?UYQ8mAt7P3sxDabz)&-Z-;=80K~<|z=3?w zGA&>jP-`F_8o{yQ0UAhRU?4Kn2Lo6e_A8*;jSZu_w+9t8@cWOPP;>L@E3e~~{l_pq zK8n~tSt#6u(cx~j0Ie-`(V9yAoT5gpfyd;BD-+9v z>h9OIQ%CUI=YNiW{Mt8hDK!T}@^}Jaf^<(hdZ@+Jon;M0u^udQaHd0IkT=D|Q=-+* zwibNw#_RayFMml}^g3RDe3<@Y1P-)Snim4bY|jzuOiyCO)Q>LtsDM`}ZG(RL zORu_OZYLg4wIq-29O;bQ;iwQXVY8jUq&Qx)EJ~g6Nn4CfQ1*z+kUxvA6W7!*nw#t3 z9TuVD4>_-B6}uvR7{~VnI%V3Ep}sC0J8>2lu4Un4pZx-stXj|D(VNbD1g>%+&s}M=ov+NPnf$(Mmy@MtUjoHK5yfD5sYW3ug zxuNyAoL$|qYwyE&`st@pR#b==fAs>BlJDZsvEyh_cceA5((1F5yQ%d=4+uFTKeC^6=3EXlj<%D6H-g%UsR)C36`;k6q{-S04m$ z_KAXnr#U%Q6*MhMkL2-Xf~}JiY$ywI|41!t&Y(+PX%;-vpV(XJ`=NbLVC%MBII{m; z)YUd9j_EtQfXOt4_O1z(m)GF^cVDFrzKT~U$45^dgO{(XdU8<7B8~OFC!Rzz?Q4B; zEoFQX7tWl=hFyCQ8so2U$&<<6K_Pf(??d>vfBFaf)8G9imMlranG>h+(hKjPsi_HL z<0iCqF(9pM#)0=gz#Ftzue|a$UV8mqcz8M?Onv>^0P#^AC$||^&Fol%j8-nH>sOCt zY&yxfGt7#qsDl&Z1L*A@fTNQiW!g>Yptf0NNxB0%RcwVzOpK$$6j~Ul9zJ>sLnAiW zv~3?!7A=7n%U$W~?d7(#YA6%uNS_&iRSd{|j3ARRqYDC}mo8q2j*7WXCZ5;iS?pkuL>|+k>Lw8MjBN2N`C{XF%h1^9#lT z#=$Ijj2L)vXNUx|x6JFpZhF6{p>SvX8a{@XgQg`G7rY!O)W#{>>gnsZ{yY1UT2?7?CDp!`#KOqx@4s< zHSe^4A_4MxuyAOS2xMwRj1(^RSyNTTAI5&{d&ty*arr=|oKv9ZRldM$qT}}V@ltVy z<-C9#X`eL%vnyyY^HJiGsne!-spR3nfXv4>}H2vU|U zqYtrsD8f32lX%=EY`8hf zoNzqy`mE@*=EN;9XMrwsb8&`e$NX}dQN59e{?rY-o>Klu{+db;t}Yj5H^-~J|EdFdrI zws$L8*EnSsxRE9s$=_6&70||v_V&X;3@xW=h7K9Ktdlg$auJWMle)2=oDzk=0FPU2 zlF^!__AZEyjY51(kjl@53RRCh=w`ee9yJf{Zsrf$n8)Km*_tBHy7~u{{T1+Foat$A zjv1yYd&Fg9JC5;*am6=jEwQG|jpQO92_y#N1dUGG!z*YWeT~A(6VB|*34UJA=xVK3 zK66s&a&%D_=v|}=+lp+6TuQ8{mW*YNR#{qs7k~NyoUGw! zgB@9V+dRw#98Y&=2L`D3lf*wFG>(~+j?So___C={#uXiK_NC47&~cpRwbo(ztGRhE zn1nlXs=?ka96xy$m(z3bvCn*7Jss@r=8dV5QFUEm2A9gI{E)z$DxA5#ru?}EGajSCU0A|X@}$v%C2{bVH6e77S=qq{sc=FMZ(S2gyHs1bT^G4HevzmN$;8h zxwu%8djsP5lzQ~1em{y*ye0>F;rN*h4GVP@8l2E@#gWmz)IAltHK zik&!d7&pmoHpwQto88ccHw=#1vSf>y8HbsfnR)KqJMTMHx98p=X>4cz_ui3uZbQ|n zQ>PAAcUSMk9ot^UH@^Mv`1Zek8~^;3FCyH}m~^y)2(G-sKm`~WmV1sG#Oxa`UH5Sq z7E>RR(=zE?%PAE4$~a)4IXcvd%8F8?Wap!+--hpYpoL0;Oh<(J1+s5l6nfjF*vNx_Uxj7aetSz71tydMg)3{3ug;!`T(|z%c zA$;BvCa(60rw#0SJcdu4F&w-R6c&%?pMD;{{rOMvPoIAgVZokwZ}0m^%_^{#)8OIc z4o4<}rXeloGFs9%V?xxPuXeq(fx0YLR87Sz{m&>Z1sGtui+g4pCS%0tNC!vJP}jsKrV z*D#Okn>rLO0o}S_9cJ>}j`ZX#R9Drjv`)@ykW#Yo?C1~*atn}^o~xdu7%O9RL&5aC z@MeG-K-xBJ+>9@N{ag4p+Od2`bx^w_@^!OzVwHnD%o8XxltR>jp@@U zT%F8JHisE2aZId(Vv|Xs#|;q?(P*xy!H9J+3|npMzG_D*8&~9%-q+mKf0o8XqOQ0U z$%Qre+&9046$_)`Mp{~0M$pmHhlNpLI$0N>6PK@qWo2ceq`Fy6WK05azDzDMlCC2y zrx0=*$bRxikFK0C=#s_0`_(c8-Ig#UB+Nq~MPh9AQubR)>i|T9Q4>xc@R_v`Pe1ob z{P7n*#J4~57#8`q;PByHIF;AP09vO-S_TY!@@Eviy*(Ia04Pt@$rBRAg}L~zKfH@Y z8y}|r(KdejoA~l)pHs^s7!s*kcC^6R+ZDqW+F@gG2pYSDWNru#=h)O%F;N{DN3*dh zFfL33x|~joLK0@|Lb?~>aTu|&p*3E z#UffxnE*4flBeEA2DPA$mRmrUpXRl^q;fgH?=lIJ)38I-2%b2K#!BGjAvuq8X zcyJ3|Tt5rRr{70rO$W8x2kxGsOpqe5{>}&Rb*qei>$~5_zy0Tb;gvuB6n89}rJsbf z2d%VY{ZhXUo(POvjc7ILsNbxs+#(6-*b$8S1eFZ>UA=qY#%fuF+R#F z?*cN;O|&IB$S5dBpGo5q)W)Xpn+H&qn~wC%9CQrXuV1BUk=ywWZr%u+zYvS4gP(rs zDLnc3LrBlcK{oxh0#|n|UAGbF8$e55K9VyFFzg?WMROAoq@q8WcqKNX8>cH)FnVcd zngPlNSS#oz3XcWv8x3I@L+r zRTZyO;yrx>5FZ(c0|(y2oliW2JMMcxt#>%s(}w!)ZiGaHB7l7#lLyA_6@`*h<)eAf zqT;ae{33pb5hl`=xN+?w`={3g)6EaT++K_2B%~B)s<8t9WPkyJ%+8o0D6N z%KS>0%`z#b{nMne5SZ#D$5;2v068LO%pJIKlR7u*Rn6O#6FEJYBH?OGII&0fHD6dQ48!9vO1~m@w2@FNd`~}#!X$8LW`Dc-N z{R%Q~7V{cgWn2{+mWY6$NPO{Y-^91R{T-E?eCPY$#!r9s-}u63UPQQulQsSrPq6`V zR&|j&zjV_h_}w3WgQebPyz-wv#Mz72&?;-usd5ryIw8>87oH<#v@v&;HRq?os>R=W zg+IEk=COiNSGai1Vy+W{O2#;`vx4yCGxs4T!UrycU1%t&Lx^u2d_Bi6^V3vQf$Nvg zqkCX*Y@BUzos8oU{a`a=r!3UhHYvId?NsH|<^j*pc*HJPi90vlg*Siu1GsjUGtW7o z)*ZCaMn(jOA11T}_oT|w` z!aX(J<>&|xe>Ze97nceK9$P&rY{pPuEAo@i;1{pHi%PG#__rVbH$JglZenXXG+310 zUA}k@X?cZetzV_vR{K3OaE3Eur;YbNk6-`vfAEuk`8=LjI~&J#{R#WeT!TDjqI*nV zK$&-TaKlLNkeCz&W3;nt&W!(u%)=OAOlVNh+SZ7HVZFMh4J2k5KwEnouBS3TYiPE% zF@{Y8?SjSJr#@zqkynBN#timYa--h4+;Z>BijpE~<`zm9Ue`Rj@ zbv(1(x`Ts#(I7Vn7~AB&>i7Sg?Ql3wa86PLJ1RWH9AZh z<+1scvR1?-E<|jI7nO8?$?YI(n5=?pL1R+`&Rdq{#PiR;fabPN^~rU680u`n-oO5aO7rhXDr|){4z&ZG7#+bCCKsRi#EUq0J>8}Z z40;{BBGhC2p5}JOs;!8g9gEobL~e5s8y5*bUz!c2;gb-LIW$ojCB!_GCkvctqNR#l z?di7QVp=`iJbbV?K1km^ld|5P2#yFtK~@&box`fUv>+oXCEXsQ)CZ3Gva`9DyhG_7yCLJ%U3VQbI&{jH#bkTFu}HmP=8-o`Z`fn zF3Fo-b4E5TQ<`-3JGmh{kQ(BqMq#2e5y$9iVIO%$J>ty-J#nl$6AS-9I(cU=ghkE6P)8fu%ZuSa z0T^MTarNXmWYL+(ons+YlS_wX?nnBK8w~Wmhj-sSgkD-9qi-6dgLF219r(pheu-~= z;|Hj!Z63!14K}P(frpPbW+%pCgn_Z#2ab&jSL@qEQnqGs^N}xtlbLaG!EkjNLQ!s> z;!l51KQ5lXj?A12*3za^bVXis658Y=4T_6QwB;!-gD#=EzhCsG9)V0+W5}oFt5@L* zU-*L3n=axNaAaWP!hqYEk|ym*y_&ju~$e^}|A||+OKwOUAYSG@7oOEQIO=eg0_zn(_ zpuMx3K?(^_cX5Ru+ODdnv>CAJHQTff%AG!W0$~1}SUSrf)ux-=cxZ3O>7)__hR0#` zoETdIXBkAZ#tgJvigZ1^BHqkG|z&|($ zy}iA((|t;}$H3VMVF|O*+FXm?`UbLD2NTMsiQGDPBQbG4{LzWaM~-1|P_Ml+#6Ys6 zrx#sf+a0Xaq2onC!Nl`&6T`%D3y<)_H@^513e&IQt+(GnV@r$bpNFp>^&p0cBmFGf znjIZQeT!31orT9o!NXN zqYx~Qt}>zXBrlzvoEU^MNpkgKTJfaq(*Xt;E=9ga-Io$*1Pg zE(S2k_OuM3G(U@e%l0&YjD+SKH=a+vc!o0bIsE*Wf8^NN#`t!iZM8F;+ygKteimA* z(vg{wgl_8%5Si1hYiMC0CP&h;aYIk;sI%83R%BAp$Q3#<%v(HPt#uq08j6rW+AtS0 zYU^5YHmL~9S1x6;Y&@qyNNDe*8c9@lSq?-~RYp81AaY&5S|}Gg;9S7gjrR3$Su^ z3eZ=E`UjAel!4gr7%W-36pw$|4)@<2=k)9S@KDZN^xdljAeJ5E& zo+#<#2vR=fiS7|4`%VaDQu^e5D`4tr!m(4A(IZa{2gcAhM8UDA7Osv1j6Y^mW*L9T zHhUI4^*y7>FtHPC!@(mEQ8Bae^3#vuChQiXb+`V@==G@_D=K-GCC4FOnjo5EN+xNih6ejkT~)1k3F@7^OYcDjIXgSTmE++--mBu2hgpeBv7Gj+Jnb}g zA3TDX6?Y?)zRgw1mF|aFdn2A12a4a)OPNq*S&qBJs`6HkKCiXKte&vY{1I#dQ=uCJ z>XbJVBH?9AX+D{5@KgJ7t@9={)m1TZdmDfJ+ncy|+f(@NfBhTc5<}Eey3(Q|k9N7M zrV#)8i(ld!|NL`YzL7ijVU($0O1emoKl6w%j^VlEhtb)lml}}=3*_yH4*4pV0```= zD1Eii_6biUh8O!FYgb8};=ow)W6wSTZ_+qxb^_$9@uP$N$j#0{O=mwgZn+n}F2>Wp zqbjF*^M8Me?|kpSkX_lMJem!JTItLX&L7-`FMZ}yxR8=#i`)69y|NlDjjed$(_h5@ z{Nfk*@sEFkAN=&E_|dO_g$0RmC`(VpKu?$Q?Y9K`%hMLsv}C8>gbQQWWsBCR{46ju z9F;XSOuw{^luabX)5i;&R?dNm{eOGk1$6X|sLAxP<=Cu3C-P$=eX{2F)`uRz(wJcE z{rhhj*9^ntpD%=m-jy=+l5vNA3+@1ot~710UB2Prl>h~dr}+_-cB zzkO{NhFrYyg>U>b?!9L{BI$?K)A0t7uKL1keCqk<@a^yaRDY0%r)?9kgj;rz7m1^` zvVjD6QTAey7#EFieChL;6&ix7nmRkWBz6jAI_ram(oFTSIsv(^UeR~)K+M8LXlt!Q zPZQ&Pj;XvgcI4Pebab=&IQ?X=hr89s#J>F1f5&@!Pr;;acG;JaR_RMxYw*+m`8B@# z@4rDcV+P^5?$RhWY*>w+juxaa&(rH`TF^;bb}pw1{t+=)79TP8mXwOQxykSk^$+1J zWj}1rJj|YL#P&`!lg?PRECCs*7uC{Z#@I-k8rWOrR01RLRO;v|ujQ*_$>rS<5FVvI z9-4aoAiCLceJf)W#bp(^nwp3EwmzXAn9@(MN=f~hMdKbX z$E0MW2XFu7clgxjzK)z?J;^L`=JW%CE_FK?Mdr2r`00Or6=v^Qh+n!M-n1cN)HLVy zP!4r>;@AK2pV)Wc2sScZX>Fzz7$8^e-oGUo3ZAB=dd8wmz^RTXz*P< zyb$O)ihb|C3ct_@R8W~xZ>HhG)f9aEbDzV~B{6g|f=w1rH6iQjWgOqV8!o}ISa{d{ z2=!sIA%U2KJmu4cix+Od&(jyZ?QJMy5PI~zoyef^Uj6XX4AA1#`VNi`!?29@Bd4GY z`B{bVpwX_YtzyD805=~Ff(w%bHGqAnEiK0H|8@{ft%G>!lh43Iy{m86I}Y+-V=>;^ ze*($X{n)T>wR$z#)_XgcbW)#?Y0Ag|59DX2BR!`SF7%9L#W}cqGadDPPI&6s9Y}}@ zLBG7mfA%OkhAD7#Ox5M3I8P@~*E)aOAAGj0#p0Yr+hTg_Qdncav83`IfA4i1Ka+&n>+Z&y zwab)HI*H)9xWkLVW_{ibb#5~MKDNhxf=+D%(<=lZ4a zqS&aE*ipv26Ysr$8bRI>Xm6`UX?YpE0)kZFD@x|z&<{_iPNWvs;iqq2#loel@u_DY zv|GPpYLM=av4Dy*I@FJ2`}d+tK6zhOigNNjyFk{fbHRi6+=bYP;IZ;9=tV_-GBS&6 z(NAa5&`?L&X+uy@Alzw`x*BWn$G^UX?VtJ_X2(WY6YBBM*V~4o{3PUM=OR5h2dO0u zbkzPB?r2sEC;R*Ost+90Fp$;a~IBAh>S7~Y{_Fa^frp?fz% z?(I6$(Tomv(UD|h=Q}4578`{Pyyjtjz1qgLK{}*nyzu90WH;9p1DZ zMY$PB$}NG9^dqwn7q2A2Ehrqm40P_paiEbr%;Go;VfUDYi~th z0E0*Np|zz3*DhZ`eM2+KORLluUXwD4)T^@Fw%vz72J_vNvy^McnE>~}(ZvU4l)3Yl zFQaSF5f41_80O55Ruh~9^-P3MoPZw#)w-5u6cy#;wKsR8qOKE9Kl3zVV?rpOBFB#C z?CL{G@-=j}(8(}BE3K%3g=6RM>(4;QNqtp3H|+w_3o9@H4+bI?s3@;NMQt+{&QGL+ z?MD;aJ9gq49(wp8%!&*m1AEceRE+Z(4e*$?78{pFK)t-~)Q|qwYMf55!Q3_XAu3S6 zUMQga2%URqKp3i0(sBLVNq7VYpn}0+pg#Bn2EoT|lmSI0a%o?4$@h%Z6v|gB zWiK4PL&MaI0R#kvz>Bt7CU$qfzYk$NmM>#prA<4NR)&v%<}+BaFjlp##@^hG!b`{S z`k`xZ315s&>y{$eS3ku!226Sf>*Y(+zd^0Z6VJZzNrX^FoV5EzLwPMO9XkULUr%%n z^`bbx5NA%FLV0Z$p8D9wX`5-coJN@_l_E3kDoTsWagP3?q05BO;1Ke)8bSU6YH{Fx z`j6DpXVBc#MCV_Q@``F?XO^MM?1_z=H)CG7FKO?=rK1v4jny* z=Rf^Ds`(5=HUU9WT)cXY5I%e4$NNuAnsZf18?acZJNn6fO8j< z&@Ypc{k>|k(L_J#O}bpCv?~4(w;XX zAnp#VT@cK1C-k@D*nxcrS+E`pm#=_#e;eL-?eAFs=!-~5jK%CQe+2ls;*Gao!=Pyt zvlHeaD8Lt9ZcJM9bCF+I0*4VZb+H0@flcTKU%gCXu z51E`$U0s1v28G3yH3$ixrM^6;(+5Yg%ZzIJ%}% zC`!AA6NirBQf3<-efrCY454`$Gw{=cj;kb2aYK-=3mVGOa3d=Zox`q7LhDdbT20w% zLTqddZIKCWE%L?GnsKLxzEdXjhx$73oBw?WiqR1o#Nu z4)o>WXeqA5>GyXbAR?4Dy++x`n))XA2L;2A_Cr2OmP)xSDk&u|GHEA^5D@5vL5}(0 zhy|Xs?Exgfk-q8LjdUg=PUxsF$Ia{4X#>yUdq4T9T6V!^AN$Zzl7>IMxgSlI5Zt|K zJz~R*HS!Fwz(gPN<|}`}&GIhXzx@ToMR>wh#m6S}Fws19`Yf88Tgaa>wPoezAtkE> z3zw|IlaJiT*qMW>CKNs3O8I;DjbGz>Q8S+Z^7pYYKA3VL6QP}qOVV+(Yycm7{wZ{q z=i%Uyqj>oFPpgl>-MnxPA)$fTc;_10{Ay$-ry(yZ7gugvhl7W+dOfX^iIR_hAiSK1 zQD0htw~wUaYkYnQU!u!0v-5khu9VESevsUUzhKl&5U^k)BnI@}f$(GEuFq zsYLhSFx<&c4`*fxrf$6Xmp{=Lp2l-8e+8dpTpY}x(U^E0lT^w=UqdO59#6rkb5)3l z3CFrEtH>+;ikJZt&BXX)CX?<=h}}GJ_QEA}wzgxquZQ-n8YPuI>UGEP5I^+wv@*{~ zMqX|{va6fBHd~x%}O;k5_V}yxyep)(?p1z3Cge7?H z*(cRUtI|fz4r#L6t-f+L(gMgw6`r$R6yG|(vKs%U&WbA$%x>PB)VtU!yJH9Kbp?L6Cmn0n-GS#H-vn=prL*c+FH%k(N4sYj7Hzr*!5%J* z1HBML|M>=O$(;4~A|@^hA@qf9)Q3}N&NB}2Mm7CpA$?al^CEeDKET_clQ{c3aQ^sV zxJNC*yfy0}Z{FL>fs!8jVK@1})be>iPZi%v`gPic++0-BHXJy1TE&+E^y@8+%?RYarzdkJPZzXT&~6mw zFsE{Xyh3^8@CnRHSct8UKZ4LeSqD%1H+dE0t>6AHbERCY-TDOP%umpEL)m31!+dwJ z6IU;$V(;Gbh?$#!Rd=m}Cw-~1EhEE7y>bbavB70IsP7)e*Z$?3h-3V& z{F3BU&YifKUyWbvyn$sa*5VU8?o(qUxGJhjk(HH-l}i>et`5T6`_5wd%5}JNQ8+H2 zKZ}A67ut*+@TZ+lrfn@MEJkvAo_bSDKK9nyroYk}=;urS-i`E&XVC5#fW>#-k6=&n z4Wr1(%S0Yyk_WeLqpuI*$jHADm|81w^zc5o1V*5S`HgIzo}T2T2V<8`lw3QE-@SDZ zE^!<1h0lEk8y3mijGAu6DZ`hI4x_C+8?U{(6Zs8YxbLASkr*kBR3e|Y;2EJ$F?z>?}=2L5eo?P3gljb@&4JM-Eq`Vxz~r_xJz zHz!=Wo`jzE7W6YF%+Jk6J^fn=-1E|(%`_8Q!= zDjcp(CKNG7D=f%|mzOJJuqtGwT*jV*r%>JIim!e3W%x74>F#JkGUK^Y#xpk=&lFWw zAt*cyZH(t+LS14xS;j@h475K>79}D$#0PthoQh)YKgJd_jC7;m!ZEyg_y*i&F{fI)6hShPN1DfE zUD@w;{P|bE#f`!?^-;gYiQ%*zx0^5&xwZaCBHP(st0p_pt6!r8@{MKJ(?TW6^?GwO(USQ!_4|I*aRcHhGnmOnQ^4kdFB0Z+#DQ=FUugy=y1 zD6tL52Nw3E(W-0dVj}!S%nCI=VxgUAU~mwY&5cD-MFkErP!nO7t(APAGjAcn{XMCa zjd1Xph1Dxp!?~}EvQOES*nB z%(2+R06dLGC_N>a$#p$uC(K6_6CPJLcPu6^uU@)9XLFTFPBDJ^(;qSj4}yz#FgC29 z{*x}5L>FGb;z~mf+nnKMWs?a8TGXl!n$uB=wCsKeQ!zVvnM z>?Pc|kwT-}fvj(%BWsvBGl}i^gi5)@``}Xe6T}+M&;o#z~?w)67Wl|2CuwvN~ z1o+8ohfL@O2AO34fx%ZgGSag-pziq5&whf?06i(%*m1P8!3);xH#M2F{L-2xi%}tClm-NWqnB$sE6Q9wQ`^*p^o$JJ zQ3tGCxf0&4PME)NA%iv_?B4k{bs-gv42)jit z^ z>V$mJ*UL8)FMay6SiU5l34*K%*pCZGFC+D4HVUgtk(Qm#U^y3Grd_y?en6gxkf1%7 zIx9~QojiVwJV?XP&>*%y`80x~7GPFPJeJSVHx9(61xG}}!^00LH1Fr=ROpsry`}!gm3@&M~Lu|^_0X%TGW%7Ljyy2 zh5Woqok&YbLrGBu?tAoMJonOb42<>7MzL3S-mxAxQnGQ5Nq9<15_Mj5qKN+YUIt$7 z#3$0~$OP91G4yLmxmmcBlA?HFHalV=>GJWCDF%`EQAGH;5Ks2TLZGcSK0GM_$^ z2}2FyDV!+ZUi4A(kzZbqvlp%@znVurZGY+sj%^@Im{F3ShhP2X_ef=;FYEdShJ>K7 zxCr_ACA5crShkpB$mC_toOx(#YQ(W4hp6wVusD0*lb`t#mM=Pg0l9-NV4I{fsSgo!47(zF?h-t>X)OZOSE5JI%LF|$w*vP*@Cf2(N5})L z8aP&PGQ-8i1Eme!xNq}j40ku7XV8p0Hs8k>K@wXvs&+666Xmv|x(4%?tz*0~4_?Hl zlTrg{JzQMq$LAs=zW|q#lTl0=TfBHN?t9>2CJ5eGy=pPX&^k6uWK85!^2IB7=!H*X z$$EJLSZ=xrFut@npdhaVtJl%)%!ws>((LX^9)_Tfx0eB-U zZUMI5vk^Y-6Z|H@*D6P7HY-=&f7>Fz^S zQxCRn+iIP}WgC>W!kT8r`FA2dTApkax<=s{7=R$g*~R%qxO!RQcIuCVi~1;?Kl6{_ z{%)LL4El+eKZ^$+-p;sHCK;@J97~b}2IvQe(b7E1WV{yZHZI4VTb47<9zS6yFp^i59i@bSco6>G3(|53)YH&K|Mg9O^%FMa(h@b}XF&~|}2m8&D#I@%FE ze+}dI2We~d^>M(#baQk-YtJB_e(4JcXI!9s0Ta;jvMRvKADbR{P<{MN>dMnKV*foU zCw0}8ShjL8f|#U=olc00!-fqj<^JY4Q1P6_Or7eZ#c{!wJ60jcSE5CNkZLfCzlrQi zN814Vc8z`{8N<|{fB(<_RIg5Mxo<1;3jOf~iQAnVI#E#7g`~<+eCp%RV$Hm8bxl2V zV;LcRb?}Ug$Gj!WncRzvJ7IQ2FiOiyVet&Wn$;`d?e0XMxB`6x!`QoLpZe1MP3D*7 zwT+00oJF~c6v{NvKUOlX2&dj8%$*M}Ha2E1$8>@l`Qzs8h7}9q;T;fx);=)CWNg85 za(Q;5eCaeEDID9&j5$R3`!GVD)hno`e;fKztI#FKVi;MZa_|zv}z=nl-J!u)5o}Nzh_4cW^IRXMg8FMsa6~|yv{49=h zBb>Y=@W9=h;4;vQKm7U^xNtd1O&|sO`Jt#VhxV@w0lq$1Fna-3Jd zj{&1h7`L^xV0g%c_3JhO+1` z8k_@S@$}38gat7{*4Rb2HXHh`=@Zu*wi}(n=9^fs(%1ap&2aZW} zL=<*1=Qw%xjEZ40a|-ai|M+jl=(Gzgr}@bp<|7d-JIel^W4xWtvc(m3h=@+0kBfj` za3DfM!g1~LWn8&>1^&K%_{x{Qg8Jr8tlGF4k(7I-8}djikvcedqHov<4?lb_W1-oq z9h)o&jKGU|O+)Jd7B5|id+*)A+*F=?mb%12yRmxJYLr*j;qc+Zj8T%&*42*}Uj72^ zy=Se8+Z_fwP;m7e4qVAZUR@`0(vwkIkca0#^)l{z@P7DH_q0q1LoAGW;?$duN6%hS zV^d1|lNc9|*vN47c6H#~$>Vr`_hI(CRlQZv!Q3g6@tmy7K7UREeWeG&B7!hCaW>w5 ze?P9@OkpgYjL4{1{MQeE1V3*V%7z7fJ@QI+7nUa zV~kf?UXAdGSR};iFCmZF1}bGjVgl{l5;fs2ZzZ{_8|vhRs|)T}xrn)u8GDW}r%KO2 zO-&WYX91Qik(gl=4HZRrZ_h!sKB27X?MFKxANNdS*;v>0wKp4j#7 zPQ` zgS*UUst)N1Icr-~Pz7}Sl3rNPN>|jr!l}+kS)P?$CUXWZJxec7naDMpDb*lY9ObQ2 z1v(gzb3zl#NqOrvqfOmc1E;uAea~Bp8QU0G`zEwY3MqAU8&cnRz*YB2K+Xz`KbFA} zJUlCrssedTU#G$bJ=NJI_}TYDv9VF@Dr^dDulZ`49Y#Ab(ouxJymJn3 zTxrE$fBhpYkM?GJGAfhDW+Dg{jYsx$S#_31RbB;^){V0Q>Q{{;X>)MpU*n41*jUF| zUZ)VOHa;34X9ctLPmWoOvL$0Dpz+AYGUMKBHtNZ18tQ}^DOu!Q%cqR09y_@#CAEZ! zZc7!csWg!2R@2MvegtJC%DWj5l)qeu7>(>V4*xCaB$SHj!l#J(~oK@urx{uNu zsiYU{x0Tc&cLnv>S^1<22w#R--jjPmql|`N?PrGu@WcQ9C6d$fRdTvy>4LG3Eva@C z{oEa+-3D@6TP4A-vD)`lt{61xC;#L~%Yp2R-q`a~K((O8QDjc&)tHUzW1!lR+G5Z3 zRVkK-a_#&!{b+@d@oxU;3#> z)VMAnciQD?j!}3Kw=wPnB|~C!wG26_W1c1sb*%nss*N#Hyj6BzXptigMoE!--5-KQ zzUWaU;XhX2Wcg*-154w zeq%z(gYxgXZxj2ckSVznebWBO+EY7_GS-bUW3wH)cS477MEO$(SwnRQ@4vDW`*t0~ zH-G*!ELtl!xy08s~D?a|p7)cE&qg%JNYQw6}iTp_|tt!>38N79~d%K-Z_x}5aK+GC!OILvyaK*my805x_jzrzf?>2dM*&73Rvri#xKvIod~kGAV^lr$ts(XRZ69gmc($#`pgdc0*%`YE<_j2N|NJ0d7v zNShMNjK8kw5!)+g3@H+BYq^pm#m~-a?ERnbAgRX>PyffCusF;Co~)`~rRC*G@~7J5 z58ISJ$To&cT^D-v6`5}_a%ZEArpqcXR{w2avC$>hSz6^n`bXzR(jO1yJ+%f{%c)ud zf=B3+xK~po^lN=K$_TJ69%?Ez23=Y}&S?4!`)*vb(XP)*Od+KWkQQuwmzKFH{os#P zFAPd_oF-?qO_0_mjg0|{=eE8J=5f~*u%^;YiHo$}NEvB$a@i?Csz}pvFG>u7zT+lm zY@oZSgjjAlao5^SeMJeI9f$#^qRLtAkTo+j2p+1UY}_|$%J29N${A^kxEwXJq=vpO z*W{dFx7LtTYSZ+|RaHwhEoG!E&qx_7$ga@`{a0$(o`sZJW5ullYV=J?STS0GgJtPg zub>SL4q?!`4Gj*eP1c_p7#dQGl$$8oV&oLAG0=gpjUE9x5{1(o)FY*XS>x+dy0o0J z)|5l*k6^cQ$6B9%$Hqo|E6a-69;Bx5P_F8ytYnk38U#7pQsZZvEEJHobdM;AY(w_7 zyC85}H0?%^DheeON$NV@s7&1SU!j}W>@L`L$$J|8!QOrp6}BS3xE4=8_5c#pWTExC zjEzagMvukVHZ*>0hG`+x7vMyu6>|Aw3ykG=94LMYuk^UsjfDPd9cMW=o{#n*R7&4u z)AA=}Wz+Nvh~W|1H1{-p(zezM*;F|-Hf|Kx@vqYBFhuiS2fI>V|L9FHI4PNsG1K)} zOHHs#S=Qx`t#9@qZE9L%lOKD!b-(nJuCmGb3HLGxf>~OYUmFM>IU~QSrj?(9L*dtr zjFnNgRgIs}BXrru&e${u?avad2#vD}7dMdwNjN1jH7GW8Q2L_i62^#KRJKvuH2EvP z3Xkj%w8m-*>_O-e>|=DPJ_;{vbl7jIW!1N_69h5-+J0-z6_ATOa`M89AA17+ZoPPa z_dBR5#QkLZ7syuTZyB_O|YU4GSOjEVfp6VLpWGg8JVConwNhs=~D z^$`yE$KlIg{w$&bC-!EBp!g*mwM`z`Z@OjaiM4fWKjp8Kw@HXmTR?s_?dsKD>lqE) zQ}U_b+Rz-C0u)_(p}xtoB-i*~Hhoe9^<-4~!n#sQ@k8*il>F6WF%blb;IsN83g|eT z3Zx8cI})RHOjH@hH}b2-M%I!Y?t^KdAEl+0IG2)(joUwl#JO|SWTB!(m6ko$7(*gz zEEpu#()zFI^ZJtWrw#bz=N0d&H&eyFM#_$F?!W zl@Yi0K@P2(fNh_Dt$Y_7rOVoUiBVpG;ypRUzKSeJnaN5)A(&5`+972 zdrYX!J=EtObTl{P^yLhM&Ak&FH*P`@*)C-i4i)cexzy+6ulDCQx&*JrKPG^(5rhgU zr^^bTipgJ_pP0|x7FKtY) z9flq#kUG+T(Ld{<6^&u_6fY#f9qB@UPakq}n^4oxiS3VW!R!c6h1XhzJH+KAZOSb- zgE#cHSX*~XpXMg*=|d@F zY|^Gp-{sd@g*!sK#%_m7{f;YVoKd*6{WIvd#nhr*HoD|VT_f*OWz=~cv5e7f1=-b` z&Z!1^dr?r(jN?{$zQoj4+%UHhzpZuMS&$`)Q@)-lBzw!Y=F?J%h>9FfT|6p*N@)MdFKT%<{ zH-k21FmDBPeuF)&$Vj_raq`584dgLrc_qiiMH}LA z9Grq4!8H+|eeblo*>3{&`xl+#(Ux_i!niuS!qZJVz7GqdW)rH4 zvT*9~F}!>52E6^E@cd)<>ev1RI5tDQZOBeb8aFP-t($FZ&Z}1_XVQDsznQb?7GUth z27-1f)JDm%1o{>*ow(X+PTk7HvA4BlkI<+)xxR*5UDGg?{LTzCEjEgDTgI^st*yjF z(2mFc+IV{HYs%}ZuCDHIkw?vMNAnbVW1O+aIT`fT@ib|exi0E}P)}OfgSZ73#LY{< zH@^8Lrx*&_K>M;dNldn%ZSMT4|69dB&1Wlyse zkNv3`gH#X;C0~uBJ$7RdFi93Z6k-L_*POcV6JatfGlB8A4H~A#Wt1I(+0>8Bn^$q> z_(}Zg&7*kalV8WmB?)Q?16%Mha~MyB4Gel_4x%qkfXHnGHg)jccFR*6iV#reQ z574N^^6 z>sd@_Dk;F_Q>XFfTkpa%G#ncreF#4OUdl4rsFJ<$Gz!fhwDsx0)=vZD%&^|Bq&-^= z9^1DtHGg?-d{?Icdn`7Xd}ch{dORnB34NW_$V$45y}M-jj&f{z=wnETPaHRy_Q7yY z4fdm}`DA})GTg48_An8XQCGDiPmP!cP*PfpvzLpoa{XQS{Fk1EpP!y!oSp&=)0HvC z+PMBfd1Ta|8hFjDu~WjZ6l6 z-Wo6-n}J`)a}&#t_0;@+0$onA*l$z_`~aXcZ6Y3g4&X zy*f8_oevB%Kw)c1XrHEIGCA$;0arIS##2+v_pM;6ehO$jhRjS|X8J2O5E+>|gR;RG zTmJztSth4~TQz808-Lw~*LH0>-c1GeRL!KGl#x1WK@oOa zCTntamD`oIgNaSg7<7BLs%+4GTl!_;)cOkS+H)A$`6bqq0!ABhuTvhPb-_2k`AsaI zAAeht+v-|y{J>E}EuM?jTQ;gY@{*K&5YURKTbe10bYEp}rZwzghJztr2Br$Kb28S;9|SB+Vox63tJYVW6}tqV`_h--7pM~w*<@@w8msXB{-a3CDv&b@ z>_=Z7>6R}Ue(h`jgyrP-t>_r*;WS;HnXKO$X2``cn7Yl0SnQ8wU*N;gY19>%s{Tj8 z&DtM`e;=(a4fcE)4~;E#IDhesTKIm;Jr7cQ^_>`d8gE7KjH&x5-C+CU>rF%=vxEIC zXTtu&SK#IqjQcj<0k@ei>~>3-ju(@DOMF$QQHicQnDa|=*(53&9b_U=3m zH)mhm{orPJ`+E_eF7n}F8m1`tNA!I<_#n8AvL^NQ<5&LtXA~A!V&VFG@v)blpZrm& zTXkt1hefM7iN!plgQ8>c3uY)=484>M_PBHX@?E-@2(F!T1M zq^;EWBMnMU2(~6~hjr@Kr@}oEW~NJns4vUKzWv8hTw03I0g1$X5IHMf|{lC&Z zfsx@poI17#$4{I`)a>Q>%oo1MoN6XxbL(DCzA9%vXrGL%_Cs@$8|%u=1dDvJmifJt zel=bCHWk)e_v&`=VQPQK7VWFe5XFp&4Rqk<$%CjKbi=wwoJ&Bve<*ZTb*6mCL10QRtN9}E(@mQ~afU%ho z*b{6Ax8nZ?ZsSA4|4mEN^=t4#3O9|VH_fl?R9g?NwPw!8p z(Pqu$SFYG!wSj?E_HB`$*AA>1RYPcou#_Gw=#4>Wuw&_<+HB3*J^uTsipVKf6 z1v3X>wqTT38@MOv^f!zao&KbTQW-V zS9TRW)8m{8e3ZJ=!WdFxM-j`v)98fa*X<}Ua2ds?E^m~YS$}kCq04~r^*$K3so(?2 z%}0g_^v;N{Gl%K=GCh~%%EYU;p+{&|by$vV*&W@k<|Hs$7O-Iwu=)I%;QU}?XtZOK zcL74nI^=~{59;vmKbX+9Moy;ss(w2L2+-}9L z)~8apYRB3Wm>%a$!48{>^^D_NVb*2r$Ibww)`y{MS{_Ryw?#M&-7~5?9wuU%imoYX zy=7Y(>nO)jZjY*EaEQZkB3|pS0n?(J0yIssBl1UCkv7y74GNci=U9HMg%8ru|3k1R zcRH9>Zmckg2Lg6op9xIFF(uAhz}VQ>S%dL7jH8(mCHMp;_uCHZ4Cf{kE+s2seY9NO zil37~w<&*BEh}wC-uOW%KN8@Vh>{2$H{7tAm1|O*32C@$TlP3&6D26FudZO*y&}_Js0Lbf2L;bDj?H`7# zPXyfD^w)YnDuwp+eIN$^KYMt4UFzoj5;rnikqreCc`_VsO zMxVt4KHeVc$$+V_>Ymxlz;s}g5d$=Wk)aXvbjlakoG|R-3U5z0c)MG_;H1iZSeOYd zw*gTf#Y5e@e*_p*3H)O)+QURT|8aDGFfe-gk867(#<8Kf6^stTJlKV<0Vg=P`%tdj z;6lOru+&Y@P1?JWL3DL?z%(jP5j(@d(;Gfs?n+;#(%EToP2b3e1f#bG>gkZ{t48UG zrT$@LnN6?^bfeGgj8Shtc#=7{dzwICI=Uvpt?0bHO}t$%M`^D*(bvauHoI}0UE$^H z4_Epw>4Q$6H5eXHV0w&FhMRd9{oS1yFv%j}E^5t8KYxFC5a~FacJxb4fvIrct{4Bm z0k`Ub4Q5tqCVduvL)l|~+S%C;A0J=1xX#p@{=WxF*82x~Fk&7ht@;b=|2Jc*IX97i zQ}fdp4J#OjA1coHaA2d>0u%Yu-`5Rik6^|}p6UyR|JV_gc1*PGCbW0;!DMm75ECX3 z4-W*zy*wEc`4pIWxE}-E9T*n-=->)>A1}Ch=(Wr~`q&8dvI+IFeXu@H1yk2F z+L#I0m7N(mgqaq+JsmS{a$1Q^K$0koR z>E%S)Cm#R*6HrerCJYV(t$joA@^FVQ^XIYLS=I%Q@3i~I&*9-W6wf?iM4FBXAE%KuC!Gbo}jV8K67K9cBPstu17`Bb;4bRL94tuvdnuM{7o)`^fgh z2+ENiUO8jq-gKZbXqY(>!@a$@cJ>0AdioI%83|V^`BXudf!!YS2Q>sX<6*xIQJ!0o zoScf}vM$6Y##ko|jb01rI~SUU+l{+DrL1KcMoWDiE}lG#)a(MJ7FEHC$`Umzl%sij z2^t52dRzrL7>~hd-8?*ku8uA^I61-5S#xyCwiG|4mOb4c4Qv{waeZ`*1{iqd*E%+% za(8&LeE#Ddg_TFrnCua^sLJ>@W&It`w_#>_zRSg@D}(3+Eis~P!l3{8Z;mtN@pb|CG~goay{x%C(uTqR5l zUgeS;vk%R+B}l$}1{vv@xOP1i-oa7u7vII}_LF;3TyIuH299wcHgE*(&5gKx`V7*u zvXNh0j)Ky9M8+h*$4jqWYk<&iN97nyL_4V&1E!=wgMn8rn@q##?dyicG74uGk>JVI zKDrH&x-+kTtCBKRVc3&N<;MCfN$?PQIygaYmSIFv5_#4_&EYgDbrD0qf!P0%A#`+fz~V%|>pabxxue4vX70H6*d^S^E=O#1 zBz(PZt=Be?Xl(jYlY7WB%B~TtV<<~4;nL-6 z=$46S>-i52x1eKcP{k7I}5KyuPWm{@c=QVsr08mn7%&I-~)**Y5AaP0lVD66Q!0FNj0 zRd5;j#4+faliQpQZe4B$qBsqPTj4dZ%Gb?$(B09D+PYel*EOKLwG%zVvOy6yNf78zN=xWqmTm>kjCHH7x+ za_s)o&ykarjUg7lt)HBd?E{SW>T&S+dAxBf86~xCXm4*tcUwIi=$Ewvx6z{a#?v+> z+Ua15!uGOlTyAM=#Q94nkW-Yc{MuA~n~4pX2}tYXdvt46fJ2llTIWmZj zraIc~M)VEKw2(b$zRJ^nO|>^d-h$;SuGwq@)P%zqbo% zX|(M$?YG7+Rc=>eA_zyEF-+Uu-qDVB>gC?|-@=Jw2R^_EPiXVDB-mq?!s2bsjE_pN z@5mt(mD;b5eLJ9a9qmzZ+MeA9(cIW!JujdMnHfiHrAEzWq^Blh=gxOfURj~87*}Qb ziD?6xdxlY4U4w%=-@uLQH_*p(Q`wLY0=A(O{>Xs^Jw4rMZE2;hjM(Alo&yu*;XyPt zv|xB>yoWKkd;9S-O4#>sB20tDJcM4#N<)q03f4S* zdMmo7fzfUZG^XI}#nZT6-bwE>4gCUS_?63-@Y-u{prN7RgSIq7kgs%iF;*4ZV#}#& zYs7{$p{=VIgKT(`>`X@tyE8O2fE!7daQ@0!3^Qq;Xn%}tI?%B#GlG$RG}l!yFSg6| zYU@$k(oPxwsM2hMTl&$lO9sUv%PlC$y@*{$PN9vbB}bnMZh>_i+=>@t@PX~-t>Lz@ z!{{J}dKytrxohg|!?4-fzN#ksl5Y&5t*#o4l^mby2GlokT+EVl%(Um@L26B`Y4aCi zQ-(3r*NN8ZDl}ASnP_V7L@z}}=a2tb9?cBcrg?;Zuo!Q?{WenQ2QA~q%89wOGrD_w z7+2Qg{JCQ|a&R|#y4v;S8IY_wI*j37_PweS4b^sfI;I!3XzA{=)74R!`ugzh&YgIF z&mP#No&??K44R&izqXhM7%x|$zNUn<)YxchY(e{wWm^5P!T3?1ag1QCHf?zeZCgtv zj_rO2*%i%do?&XMF^ayv9vnY;0{iwKL`!SijCdn(yExOhs(s?!xRuzn2Ybs+$uill5 z!WKEQyGT&g6o|8vGA1}Xj9^xD9G0%zg5V&3eW#KGpbd(Y8L`&W?{^zGG-h6xK^bVn zkH7JCn8N2{@w(+>uK=oXWWlkrG8?ZBrn)mAZBa4C)-li}lv}pqaA2a_Q(cVL|Nbhn zN-9xMRE&b$Y-E>JqsP$`5kbE2bk?8H)*xm?a0&u_XI@lKa#2-A{gDYmX`6?toV2Gg zNV(zeUR*hI9xh?wShR6HeAzJexNuZgtaPc$s((7< z5O6SrN6sjL?!Q^k02mJ^<5JQ;I8BuHn*+ zG-PpX^2=+~(*?ogwX>7a8v}%PNdk@bggT=H_k~7vsI^w4TCVF7ax{YO9y*hzW?VRN z0jH0hL*$%!@TNY99;$nU@&)mMZWLx^sxMOK6%->gTMT?JA|is|;q9UDX`BW>wJr)6 z)Yw3-NEuD7!1yv&N=HqQ?|S%$#bM!!wTO!jhnu{UY>(O46>yNmoP1DYWB~VA%orOj zXA{T90q!1NSh;2e)~`=QQAQFPU88XS)_dU1>-q@{8)yj;8i~g$BZ74@NEs=uuNq~P z4dH=7!}z+!S^1UCzAo`ivK<1ttS+tlCYyl1F5_(CIlY#t(n~Aq@#Q9fwahp$F?dP4 zf|RNb%-*sCaiOkoS9Hlm79NL_drH5?j>hnfp;c{2nepW`4mFzUpRuoWPF1$zQ+on( zWZ*E4^p&x;xEBxI+U{GW>*;CybTmPg!{(cJF-)jxHWpymYl1Z(T}*-k0(Q-=+5W_7up8X~)K{9Kn9y z9^|~OJ^3p~Mr-i_Xm5p&0u9X_5_A=Xqp*UdF@j{CHgU`%*@i8y*?P)tgt)-qnpq9(|IrlH;Utm;{2z zT0sBE2`0^EGwK>zX`{U1NTY1SVdJCxGg=qW=gsJ4a?#OPjq|kI`%k4HcHU|P`niB^ zgupVi?F0QC$ji*cnd4_rkYA3B>@w6fwIC#N7Q8&%R9V5MLAWig*zCIop+%6&CRa2c z#*ef?ms~R|yrH2HSiJfUghxlgjeQfJql)N1te|V?UEMpIUh7Q8Sd+=n8ygQ;j(m6b z^upS8t8vGj>v1zJ8LrNbczo-_y3RN-XcHW|A9k(V{fJ!2c^d}nRV(PaMn6S%i{DoMMkSK)+QihwzmNj-YK+F3eul1meF~CoM`WI~4v<0&wgDJCyMU-+=(`k=O zK%W&_7`HoG5E(lcOIB|}aDeYvtR`Ia4-CSR6>G3;-U8&Mrot~e1`EkMZw`#JZqN$W z7HkSS1xOiPNnPe2EhR#~1vhUdG0wS)*n|X*k*Adg-S5tpcD(iG0mRKtgs-1FFX$LY zp_w$!21r|SR*#{yC3^-1Qf|~@<&{;Iv_IA5> zGwuuxk3vX*k8R{x!PW-2+{tD94GI0IMO%xSXproW07AI$fMuw|5s~lC|RV9Mn zvEj0|&i{RVo!GVaZRF(Vve-rX-^Tuk~TudO6&66l&@f?c|mKc z?86}FVNeQF2ZVyf#@TXl_yLttA6|VfE!oxm0yLn*#f_d0@=Q`B4G@`4!6ZdY~ zgg{?Eg-4ZCg=OE^Y}9Iwm{40(g6k(vAv-4n>CF8vUb#UXD^rt|`Lu6v^+D*YF!idl zBW;ZnHy00IELyh_OP9}syNWwBR{5(M<&M!mHLlh^8=(2mGwP%|n4~v-T}`-jdLK@o zIg6sQI@;NMwUsrr!z(x%A^z^nzXXA9PgPQ5tq(Lm^`87ymP$@J=$P8j5dqttl$DKO z9cN?g^bt48D5?wbA3yvVd>3!V+<8%BUl^AX8kZhFHC`V6n6rpBnYo0om-Pw5v4Tbm z0=D)9#xBbrRl#;e_T-GF%YvTfdi>=Nzej#)1@bwb1$nv1&MQRsup|8B3G!P_7#h$> zCcpCgSJ5#tiqM4F@Nw=(ar$MPPc1^+q79f8;z`HBrpdrj^N^a+Z*T5GTW25ohD~ty z_EZ!}2h}_CBYkLTX+_Y&6R^imh!r3u$w_{C!Ab8(cjUG z=EfQ{k>;Ku6MTHU;A9>^V{Qt5^s`^XZ|+(I1qV<$22omGMuSy|+0l`R3JW1CwBs1; z6C9;zZS6oyM=yGq%{hCq|H3PyixH`y-My0}5)^CsN^?^++iXI+&^<5&7cVb0c_%y=7#=`d zPa7OZ1~EwE*T8aZ9bFi4AdlR1drHLQ?oMA9s;leJ(b>oius6RYhM&aeq4>xx&cm;&2udgcAC$8cCF4WZ3vwz)azSIU8#^9Xt<8*R1qsHAf_cj*#h7bL>V&lf!$pHA}3lR=S#n(S0&!}|9& zU>QPRO9L7k8%b9;>FQU8(p4tfS-r2npAN7IKD3ziHB}rp>O@Z;EUq4kE+@+SfJu*y z!^nVoJh_o$(<)=bk#lr&S7S5A3mTcB-VW5%RKnTAk4e3LZBYfvl;ie>1{~Xe1nGGt za1RWlKAX^-dj)yrP4J$*5}|=^>h&NE7V09~tFMKtmoFU;+q3Jn!b!RB>Fz>p9s5Xr zw)Ru5oT)o(R}%2yzDC?k%|l9d1u6;)@b+7;V*P_#F>7{|nuv9hAji~(?5r&O=KV9Q z;f}Cie~h%Zq4e4%46=#P#q!9y=)cy*ks;B=4z;F(8}(iKKOR`gGJxKm9@Ma$o4W^T z(vN&Apxl}Inv|>zj=GbU1x#>aXoy{{j@?9CHcUNghP#UgnycwZ+gjn_c>b=r@GrNdSjyJa-=h+`suMzc8l^y{WwR*+UOF6ty}K_xjK2m zP}Vzat3g-K06K#0c676UJ?Jwzz*E-gQiumI zGT4mvo__Q>xGEdlN&9!K1sHYF9de4YLKkq@la38IDea>{}a}o7<`P)GsGb z(&eP^bkUx5^|ZsoX%tPhRV+vP*-vLrI!Vfn=AX#=5M{mFD(kc{{ljo3-#wMA8z7zH zAoR50!0ug0SicqVOBTSBr;So_*Kx|53*&&-<>{aD0_;AXgS#L3G}bH)vIbUGdML*O zef_9v=v1~&Y=t9z#%Ny$n(0@DX+vb4L(5MbL?qp(W52| zk7L^-55mW52=%nv&D|z7+3M-xX6>6mKTQ2Cq?ubw7n;b=VJ1{6W{~lx4mVWOAF)4f z-hL{sagebY=t5m>6FLV+;pRcRB+Q^L4s?i*&~{yXgoBHhx_KdbquZno_O+q5n*3y$ zM$&olW;*(YETr`@j*s@^x8jriMZIgLNOz!^@yYA&oyEfCYrqghkBuYh z?>iVX3^V@Gv6uyey-mnUIfI;>Oym|<;SX;g$Gs0fgZLD>puTt0J`VTwp|!S-deenL2PdUB&dR76)XCFI792cwoHoH_>Ip;A zEl>BiGS+EjGAXY_x{SjiK-rqR$ZPR6#htQKd_-Nf>h%i-b%#-m;8 zwQ6^F(x+ZgrrgrMHIv^>&GMA9j@MniJk^pdIv$d>rfcbUs(C(;ar<&6h*6~7QTaFf zs6n>Du3lU?c^bYkF-#iugrV)D{4z-Nm9&$!RSg(mOyS8GTarP0PKo@AukN9~v^93n z?n?qp|DZ48?1hs!d~`qV+;R`&#vrSVNTSzI-E6{t|KKlp^r43k6fjD?YvwpR!aO{T z7RrYBu1=FhjkBvbUGGrFlmu^Z!UAJpa4JBhyLz9C669{^l3cd z$i5B|e;e~1XD4Tszw{5W?~DzdX+!$jBp1>)WK>>rqo0&~NxK@#1M{!;CfdD*M%wm1 zboUQ(th~tgQ4CSf8|%waSyF^cnKf_^n2lJ*f_>~mJNfV6?oB>W4k^Q(t#!1Y4e<1z zh4}d^5fT~BSi(W^z~H?DZHI~RNkct#Mr;~wv57gA@W4q;ruJi?m$7=!AV!&swNeh- zm={VOCo<}4l`AuctC4Y9duJ1>N@{TVbP~cNA`mt!7_(-DVpe!4+?jiJH`id;BzD@& z+=a0{Y&p819lq@~Yl+Zd~~(muAd zNF3IyCQW6Woz0AIIDW0n$5K)<{{*%msK9sCZx?6IPSE6&CKoJe)|{#gW~BA zB+is;(mxVu>P1UagR(P9zlNF9Tm7VkeXOl0#-+=bP+s4HJqONXXwVTG*R4=DP&Js* z*-(x@{rS(xrTq+#i=|w)AvZr8C3RhxGiM%xSaytV`ls$z#;TNoVK*(~!lQ8@lbE!B zHPzMh4Ni==_4>4W`bvCJCoY}2fJ3L4^E!E}C1~8Jn__1JnLh?`6aOzV-{0LtzEel( z10-f}Ab(Z9E55o?O&ef)ZX$au)IlAuqnUuP?`!O~~r*61VmBG7e+hWM-_^#vIJdIG52P>L~Lq zmqpv(m@#$#S2HzK7IY|lnmxqVrQE>lwD_Vs^%h`=v65~ws zttSkP&Ue;#;o8|G-1+#Uc;@AgW7DcQM0t$j$A3A1*hP0>)uLz$wOKK!EI$v&_Z&oC zP7%^`%1}~KgWyOCDV@Dcgjvk}XsRs4u~TPp`9=zg^0IO9>=~T9oQu+$Zp>Ra2R`1; zD9OEvlLz-BE299X&Rs|1qBRH(w7-Kj3e!**D)U(G^ab2V%|mtp2cUZpp~1oG9q$er zRaw8LkVz2fW_8iUXf2J+&B`?^t-k%sf`31H; zycru;El_JbUAle~!T!Ejx_A+lnY2@p%L_Bv_v1)S%SIacm0Qt(u&77`3a^xNH&I}_ zP?VX0{Ra*rC%+ik`Bm(LGomAd;pOgvL3zSZCJaMEv24RSc#iZUFXbBEKX3^-MU7an zcs{&5=}?%c)RYzB?8)P5y}|74QskFb!814%fuu{~^}2=%oV|EdDPL_#1+HDYhNRRK zI^AIe&x(bwhZq_&1JGuqUAw|DJcq*kTomP|;bwLjECKWI)WaL$-dl-72hQTcje5NN z`7hwHC-2AlRf{nvGJuZF4=zle+?brnBxY}C3$l`~<2c7KBd-t{+4<<{9YkbgqzWt~ z=P+!Ro-n+DfWWW`6NXe+I+FU@YFs>WjIxl1%)DYGCFLQ6vJes)q;N=kmSL8wEX9?} zS8(>yRb&^Iqn5!}kiQQC{M=BSo{8%xP9r%jgX5ZuVH%D)25f~nRVXg1#=@lw8AR&| zIIGG|4|2y=l12thS5KYBrK>lPm0yC~!g>S*%|=L|o=hst&%v&@UPoA5w{+ax)eC4_(0B4?m8lUwjdpSI$NF2m{ztDd_c$$MOY{@ReCs zafXze>!*+6?blyJG>w`+^+ESt-@%fJtyVh!Ygeyv3@;+Pq!Q%~ZSbWr4h!&S5JJ7EZ;-L?4JYj2})$RA(*<~Q-!=CxRp5RI1n zOq@oICU`{uHI3IjS8WmmLOnIJth0cKfVrs&tTjdXx~{T9l@A_j!bxNz|ZYD!CJBWiK^ve=MRG`995AS@PsUe+K?0qHV# z;@t65XmX9lLtF2jFkvWdg8@t}P9J<5S5tEln6MfFKHBixfaNG(H_si%PL54z;zIb- z2B;v$Y(`UQ5ng%qbtJA_2QM!=bgLc-_Nux{oV##}Hem?<)af^NA7k*#6Vz45!S8!Ho{Qp}Gv`&m2e6^(5rvmLemkkcm#PvfHjqU@U|ENWF52vUwDPPG%&h zq#^l6Dz2v#VaV)_DB2_m>@u=%;KGFyaHUhfdj1kp)6;N`c5>7u0JCV5m3WN|p{uDL z*D32K&R#-x5p|jJF6;P(g@(gLCU~)gh;Ki7+FLL`41l+*Y1_qWl8UO*b#x^gW8 z!IbqN#sV%5L+D|D-r0E+Yu2nlRYeBv_I0FZSD?AQ3lXug41OiJA4X?8{Q~)L;o>zU zXXGN2zN>*rj-P)Z17P}a^78Pmcd_%}c?POW5gO!8^&digUM607{arK-BLQu1M+;J~U&E=Brx*(q(Z>}cIV%s|-u`%O z+aqwLOo{y6R=)^(VolVHJXdeXWyH@`Qy2^_1c3r%&Q4?bw-%sqpX*!mJo$jb{r+ zJG=1r|NRa9SpbeR(K~Z71>sEca#By@__<3cYjec1g|QfE&c^Wz*N{`&i@LIMq+UFa zq_iyh-CpW^7`)_$thHUydHJ_rO&GdPo-mXm7{E{ylc_@|aXF_Jaj`Ls_dL{P8!(=+ zbm9i>+8^IKPQ71)@L-+jD;Q-TO@pW^FT$k@CvpA8P1@ulT+b>%P3HhMtzL=6@w1ih zZf~r|jVl*$o%)`YU5J$QJiwj4If8!9X%GdBr;gGWWiZ~&7D@)_G-zn%eCA0K6l)kG5MF3g~O9zKMO^jyZgX^2~}nz5Cq zvU?Nda*#>X`BP`$8xxDg>sG^u9klsDN!W%kJTQo#{qT1vDXzxyb*t3+)rJ&kpv^M( z;rhj7?Avt`?mpfKkDa9^$)sc@ld=rP^>;t;FaiUEwSN@;GM?{h>A}H6Y53T)Pr=hv zg*}H3BD<~|9nGysVos8pnSwz_CfA&P&{$SJ8-Q#vlRS zo^Yb??W!!pn{WIbBaFLK=?AYf?n}#IZqnz77{(V~^fwN)KS?JKAY|!gJo$yM;oc1^ zuwm6ItXZ`}PZnx1M}Ar^-hA_ItlhGieV2(0+UKG&9NTx6G9QAVumDun6w_`TMQ1}Z z%4rj>U%#&6_KyBhghtM$y^$CGJX|r1q78wnHDp^BeHqIV9 zpl$>u7t||ydzu?jOnX(;*oCmzxd`-PEWM2Pirf( z(yud5IE}RQY{mx#sB7(mPhcSZn>YJ80*8(|yz$1{`1JSx7u&Yq&+%J{wcHlXpQ9!V z1!@>;oo0+CHfBnD9ROm)!%;o7mN;>$e%hKKYkL~S>^OO zg_t#G4&_fzM$1!;-OQIVl5gPhxiggOLgW-wQ)yfgER&}6HIxn~<=LwN{8 z^xZ`FD02w8ICbI_bFge=aco*TMi^&?shjzte_3fa(a_e8c}rFzKffCOK4I9jaTWO_ zj~@_^Wf&(9?7_jKC-9kn`w_Nndl+{tkD}Rhz}x#TA}A;rix(!0-H00LX~pHk?_t0A ziP&X`2y|BuGK_~23^Z2Zr~mUmDDQE?;^m9r&dSP`SbC95Km6Kj?;(gf^7I$Ki2Jr| zV9vXWF-!z}7?gQR&PW|>tgl4I%^TF$>&VR2`EFNtKVy#&#zf5e+bePW*kRPP_o_F) zu2L7wjBU#EGO+*1d5(n#=EwM>x2YJrkDWospd<2_r{_@5Ze-@7p`!~43GwO%lRO|K zuLtjb{{U`g<>M;H*xSPc^XLclM5pxL0As>170qoOxOdZL>l20oG8ZwTK5&$poP;CH zu|=M;m^(N1j$&4L5WHm_TH2$yfdl#v+%ja$+qqKPlD_1RoctMGCVD3d`S`IFpPsLDsC$1ekf|jmkG!1r16zooRx}b#u8utr*C21>ebZK zRamxoAwvAT+2)XvnLRrXptYeJF>%pwaU*}2XO`yX;pcxkfX(-8hNsgo+p5IdZyko4 z%P0zSuHvHDcE*N%%yGgPL&^xhmM{^a%LVf3Mg! z?b$`S$;MUE+vntsmX>yu1(46yTSP68qVHKSGn~<`t|@%yJr!Z z2evfSA?ey>oIQIEx%o0SWvK5MWK0tPZ@Eb@G=!Xs7jP}*D%$BUlWx++C#52Vv9OzO z7~=~cxH&qZhwBu;6r!gj#W!BKQ0!@jMLAYyTG{D+U%`C1dAILPKlVs zkwK-rX5dT#us+p7p=zuw!C(IRXSBO`W7fhsOytd|E=l1)oJ4mQ1%c4h*@h!@N?8=R z(3nKbPl&`obv6az0!-dPa8`Hc9pUX4h4?v35Fg-%iwECDQ**nBl8(95UnV5=t=N0` zGL3yBV&~4mqB+se-iKL~Iq04+IT$IS)sT`@4|jL<#YIQ*N{$^UpndJNXzek>H)9aG=OH)WaXf+WicHKx$ub;nxJ$p_w@QK0#Isb}pZy56EuHe>4U3X*Ov2xI>lm&i z6;r|Iuv|Rie4Q}Z(}kX4%0qAj!spLtkP#1GH*Y$#Xw06Mh`IBlFmFz{dRj*ZMKb_8 zlEDTh?njQ}jjTo>EDm$$CSqPf1Ui~ZF*0l-9uqpNbMX4#{)j*BJP4CtB<3VCuoxY{ zySv{(aYd~{HPqgSv}3#Q_rJe^o3$edkBdiSu#A@+4nx4FjY+ba=*N^*s-AEG+2m?!eFpM}+cIl5-uMZQWK}bcU>R_0&nc z|K>Z$p@WbY9z;6TU-DDdQaikV4=!H52LJF_EMfw@AlO+gq}x6q>jwG2**gYztlx;o z?%#w^^?H)n5jzf!G7N6%$Mp+0@ysJzF^|ckgQ)|Z1I_67bwpV!9aVa@TDMY{A4E-Y zI^H|DA4jg-WMHgDcMSBJ5$y4BC$3&Phd1^fMruKs>d_cDxWUmU3eim1=7hOZ&oYo% zP)k0@MJdVPWv~~=kDSKA1Bc)rXhvGzP5kk_ODL@Dhbs;Fu|032v6H&o-Hr4Ud+_&H zUdOen0m@%IVnUp88A8&zv)C(RlUJt3S+&GK!Ku3zmyYd4t$Gp?l>0W+*Vbc*xa(URk)E4@ zj1tx}F!ig4oKQM~oaAvCnpfr|a`b-~$p{)}uo-cBM??41E6C{Y2~d5TDTl@<}pzm?WBEv6J<5pK3Yt4 z`uSJ!7Y6okoGw6*LjZL!0WLg$@xU&WF!>rLuI92F9C-DQ`1`J-a15P8T(R(V9>O2~ z_fKeQlGQ~`@0U}ZJs-SGlT7khcTg!3F>?MPJ;1>u1)>#tFOQ% z`jg70PM))%x2=?k;wilG%1Jb~n-DQ?4kCP=abfpgkei#UY%Prg+UiU3>)-Fj^+Ep^-wd1+yF#9U|=tlj%a^!vF z=M^zMw%V@S!0_-8gYOh3$~R~ygp;f_(uV>jA2)BLp|7`xdNqh#CZzBD)2VO7ZxIbI^^!$ z&-l0u7cX7J{*-#ShQyJkM1;Ez;_|NFA(Kf!x5(8<2d;pa*v%67M2Jqz zLlgs0$DU>;y<%+~;qDWGRd+m!`|rP>@+2Q8qF*QfmDDrI5Fgmq*o4d1OY!g{FRGh{ z@}y69-w>SKnfy0Y(ynHzan`nL1m#R{55NBo&Rn{R7B(~9W>8Qio*C>R?I*Ek?{1`) zR4Ci0HAh-@B3*$9jbDgaVd2QTd4+mj2rb54{sbl4X5BxlyZ!Yy!{Go zToYPI%$@_M)LRIHUDddB`ZQku>wXl`&IHEKL3E%OvXAa$(0v2VBu?9AJJ5to1|f~M zxx1+h$4?!@u2UCK(a@r=2^jUnKbQv8gyCB|cd5iocr&&Z5@oZyrWo(--iP#hh86Qx z(gjYljK z84q`F#Kchk=FG)H#zpb7f|%Us=>U`20p;>!g_j@Zv;3ktJ`B5Z@W(&?1w~bT)^&!R z;T;%@c*ez19uByCbRUzSPBnhFV#(ANQfdsNx4jwfytxbS?LC5yzTqiwF~(pbnNGca z`N7(=ng{I7;nswi5g0A8yttHxp0>o=RNeK)D6 z=`WwXf@Z7i^fgxD+P>HE>YKY!-tVt+oN(V!?A`S~@^b6tL2iN>t#zgN-EV)1PO>X!Gkx3-g44aN7KM%!%=VufGrcqUJL8orAf=H#a5>L6j{uY3<+w|FBrL zvxGU67xwRblSw&cfS1(RFo_@SrvJHg9)G@&i=n{T%624%FqXUeI*uGUh_X(OklgUA z$i>dr_u!qkPQyJg5^?hr5$rU8*M9XQ)HHY7Vj2U4ZwABc8D|`iS?caCfKvFq; zBVs7WapDi@qb-?fmrQkXnx#6hw2_TmP8 z`hlaD(dCthn0ZT)7#o4Ks}~su?_ys`l><33?Z6QfBS3rKf}2TKQC`7#U!9b-Gkb9P z!X*?@uSZ5?vSJ7qjvm7wfAbgIC~8LJg2iAei_)A+*tzclddUE(DL1fFFP^}A2acj1 zp@?LjGH-4K%!A#O-#(`0ZU~A=K-}CIM2308)jNQ+&r?s-El7+dmz=W-Ds?)q_j#?Kz7ro3>-i%BAqI zm}y7Mw1-ZxFkZiK?h4u|50c9W40X5CmmJ5tJKsZoSu00q3ZTyr{n<&|`TqV>=x39< zRmwtNE&X&lW!e=FZrg!H5x)At210;;0D^*o;LKQZxVMS*PvFFbIt0&}JC^UB-~T#Z z-+PjF%avo?&UpL^_Ma_8YsUbZiYRmMzKX({UNkoJAnEJ{bTw3?x3wBa_wL1Oue^cA zelx;o|Kmd)apcWk;aXN9<6P!oj*Kt7{W-n~2oCZ>E_q*CoPA5Nof0hMhPX2KJd?x;s5xRV3WTB3a3h06mp%7SRC9D9zF{X-m#i-OSO&-#~Pm&BnTX0!dXEFl!F`!~9N48wY%U0rv?f2oSryj@l z$G0INDx5xx2BxD1*REbgZKoq1e)2gy{qzoe;$u%>Wnuu5(~^;2*T)te8Hf$RVje+2 zSOo5R@DV)m^mBM($D?>~^BR;_R;z%-LOO?s22oy8M4`?@R%#j(mK1ond1K-HMUeF} zL@B(eZ1WZ@!`4Th!IMuthmSq?EY>Yvjsr|KdKpYh$YZ9#K19dW?-PorKldd(x#KB3 z^VDOwck^llGg%XmJKO4Yk)d7`XQyG;-h&wN2*C3%eH=TU-U@%2o#WUPT)%-U7cOJ# z_UEwesi*Pmvrnj}o64)IaU&^J@m41B(ANP^8kyDivVUyvg=Ze25$i=|DFe)*KJ>IU z;I+5*G5EX#Pe1=6dAS{rZr_SU3_fJiSk%mej%w-B1@Lz5!>h0S4sX7F80i_gXsBz) zFcU>h^C0@$>#*y+0|N8xj_GjZ-l9yUGr1fG52MLbLTx81)P z@v+fxa`8aIf`xeS$)|AleGed<^0V>IP1yeAcJ}j8Jh}Zr_e;8U{jrBIG}wuh z_?`pb?SJXV@S*IvR_YEJS2{r3eiTv%^JM8R=#s zmX8zXZ(z;Fd+@?1K7r@R&nF+;f`trFT-;nqwKIalnQSJ`fscnwCYTlftX4_Gs1ur- z`_ab16}$Wu_csEQX;dv=@W|s^5Ed1P z(>Drm$K4O%&K2{~&SbZN*~6%W(F5IZb@#x^yC25m&pd~ZzxXtQ1O1SZk%`WZ9))!r zO!b7BG%KgMd3t)$pfgZsz~|%dXVn8KpeaLm#?5+J&C7iA76Zi_Hj-au3tKX+IH=jok#o7&-VNngx<6DA#8v48NB@Q zC*eNOiW_NZXk#a)492Dakr8>ts)tVM$l;SnN-w3(K8|NU^(h9)v2b-7L3w%_E}uD% z%?~_Io3;Z_J@Yu8e{MS(o11aw{AHy#!V9O-UgkJ%h@5vP1G*<^7kA*f9go4s#f(k{ z^s+LFvrM3LbRd#Cc<;9Dv`0_iMcT-$v`jSBHmEz9bv0EueDpknl9hPwg%@bA9>XUd z-HZ|1$1~Tm(K#@kwVGnACx9_oXpFABZZSJ98p~I&!Gc9g=@-3K@Vt8UQmkIHM#;T; zPN3az$iJJRYGxz2|}ZkdR1v)~u#X zJViVD7#_X-GXZ6k85l7M)d{6uS4Thj<*bH4^jQ+Z;eHc? zwI$g4_zrA;=0QC1;2NAbc>x_HMLpw0T=I&ulbZ*&Jp2^p{TY1xvCRnZaK@3-*U;V5 zi|*Q7oIZaIBi?iI(3A9CLN|To0~;3ObY-6&#y7tDDO@>o2p3MB#VIB$ z=Q9iN+{>TEefMmHr`rfRDl&2P#&vkbEWs1ci{E|8oIP>~@9jH8y;zB_eCyw^dE+v; zvG2k@d4jDxKOI*tU&gAL06EXs!At6|>U=e1=$HSSX zUH8nce{8_n$pvw$H=*Q5&Al%%;X!q~MlvjPX>me5@lc zD;Y`2DY)ao9e8}lbJVXLxO>wb2=nt$woLq2V@U~ap1*+Ea~9#@r$q;!#HU_*7AD59 z`wyR@<#%Dcvm8%6^(19+GlBww`TKy1Q6v_-`_6UnV|*dt;N(o6M&h3P=`Sgtk3an& zKKZe&$jZz_eql96ICwJYlrWF-$$0khyYGarv&6$Hs+`9S>5 zxOfp<+}$SQ;R$&vQf@>qS+-OqSMn61aV^j@2qRsxnEMd>J&1SS*@-hJ&M49B>gz{0 zg9^0_66L{SqOWic#2s7i#Ws%f^UpnryOt%=Ui6^bdPCNS@!x_)EAa5PXS7XY+`f9j zV(fhNP4o_l?J>iJe$Y%FI|sz$o=2#sJD$PQ+a86NX&A|j!+R}koOyr@XKQmKit-AO z!(1UdCs#cwIw%RA?g5;b6g!Zwl2?eI(s2p15ynh%UO*=4ExjGgp+d0s{zsWpJxTxk zD7so&QC(icesORZW1c;B9Hqq-c=BVPVvIt6N!$L&L-!$eRv7#3jEMNTxQ{kv>%Hp` zmoOX4Hf+K6ryf$a?cw|HMmTd0Jz?mKh0E69QI7LN_uWh0$yAtHtVq+%zpFa2; z9B0kPV~l&nc6{Q6ZCE$gkGa-mWRdCTHH!fd=f6y#E_QK2L6@3%)&vmPo!%tp`XZ*RAviktzk~OOq zF)xm!ufB)5%VT(kIm}&amn(ZVSYM1&r%%BglE|_76mjgp$Dex)cP$D+(#@+#EnrM! zk$iU;7WU80#}{iiKZr+}k3Y`*?71BeqOh=-xt!2710bHk?q+1szP|h3d(=4>Jp0+N z;z`D8KE6}}QGdC)G}M7JCy%14eMI%|DTzNgHV@u&2WEu^u$%*P3Qx>o-W(Ab!H8#o zdLa5>*bia)5cAbe4g9~WQp44!-PF(d~2N9MEQ(?rsGcjd){rse5p6?%-v}aS1!Kd8cvQEvVc0Vc_uqRz0-XEM zP*;uKVGFKax`HC+*AGAS3?8LEY=34eUVQNhB&TK~DWyoInPQnFo@r^UM;>F39P3tD zRfB%@gow1Pbx_+?p`UcfrMLF{D#p-qIXb+yi>J^LN?IF+2+c+&9Y$_?h8?U~KHFs@g{3w1& z#)ftwHk9^iKT7L+RIKf4=~f@!42oNe`yP6ncKs=AzGD?COS914-mLnlZne=~8Ewc@ zx$?%REWs2V6{()eRfHKZ)i66aP$?|!RJya+#ia9R|ND2G&aT4ezVvAU6pEp|GY<5!2V#M+axkPHC;S!8zUiu0?$K=az z#Kd4h)-#~tJ9>b@$!@&8`#t36mmxM{0YU<2sb^A1A` z+Zr&*Y`qJ*n|qORH4V=)NM9VK1xSphkB<+6gM$?&9j^AHq9hNm{PB-y?=fTBbDyEH zTmWw;naU958^pd7#}E`643D866lZ22Gq)TaJ;Q2o>Z0O&<(LPYXzD^@*}t_|oajyA zFfo|&Wm4k|$H8v6j&z}}q5v5sP1yA4$KWgzUUuUopv25I4D0T?2VeN&$CwZ}Aocow z{P9=6#G8M69ceiwXq83jXxQt@(vekA0KdpY8r(7lpV`PRs)BcDED8#W(cL-CxLHB2 zDQ7V-iu7|g;A*BakMy7*FPpfsP}9>1kFZc&%E(py!(cNqv#-OQf%wXM?|_%96^ECv z`jCnjt-K0|4h_ScXnzKLxg0?Y201pPLD5)!$2zQ86b@Ggc&`2th?y6I zhOQFy^vhSRgfMBq9c%7=3P1Vv&+y=e73!lJ;=qJfSwF>M?tzEfAgU@`aQJW%O6vQu z@a_k(c-2a+%O-W7MAvfk!2QpB9RL2yUt{%>dGOR{3FPQNBl0+YEl#r;s6K%}Z~e3} zTzwH35CA_Wdj>x>jF%M~II251BXsUgL+waOPQ{_q+3@v^!XsPOBQ89cK>!mxWwqIY zqaR|H-iPo1%fI4tkKKb{@5wI^$b=x=Be8YcbNJQ2|Fa7GCj|o2f~HaYhJ}Ils+G&& z=nMtTPD_Z|l0p>U^d zaSe1s;`~`Cs;yE_N-6s=y~>OOQ%4J~?RgV>PM$%?ecSQC6OSU=Pu9Z{EQ2_C`8?cw zec(e`EXl}3W@Zr@TKnMZGLmP>EZ

$wwj>ATz0?;v z?e1y@(A_h+qd7I`pjZBCdY#<7Fn`%P27?dbo{g&z6cmA_tMA2DCN_^h@(?yGnZxlW z9~jKt8Zc%vYREc7Pi*9pja}w~vXFq|J)H{*;@it^?5br#m8htc&P9jHw$-xD4mafExm9)pM za?XO_kZ|N|Mni{t-A-Cwr+*D=-TiI z>T*iaP*Ms{4-2Zw^N^X5g_5QQIQV%Xr@9u+G{;4i!^kPU zcA3bT1S-(hpeBQa=iG*eWU^+2LG=K#u3X2t(^udYybyP9c?9zU{b1?s#t6$PS~)(6 z%QxX`-~R=keS8}tX~qrfZkr^aGMKyXAijf`D0l^3_M2q8Al9Ycv$=(ht)p-i|*WrMayF0MPYRDbw+$*DC#>~)zgTg zIx}wD!4n?QEAjHDzJTw4`f1FM(vyMXfmYll1o!Uv41W2OAK=a<(b}$CL6##(qW$#= z3&6U?v#BG-bdB^(D3P>f04X^cjI{>f%Ykjm%b?xPLuGvjf`WsPk$w}*w{&@>*Lv#0 z;P6Cnbs9#n*9ha&O4`{hWMvnleb_~^+B} zezRIr%H-}&H>y?@gtM0?;#V!hvNf?_D8qsG#fI%KsT)jje&Ur!;Nc&Pk)AG8GDpZ_ z+@8z0J=i}IjkWC@7x6bvR09VDvE3)`-h%o6ANKwNFs>s@8-^dLTi9Z@$hOSP?AVFp zFeej6Pk1u3GrO}eGt2ufe9JqSOfq37j$?>pW|?HMEM~C4VwPI1R@3*Ky4~uQEK^{1 zzW;wu(pLAq1*Z<5x^=52N!;$kWIYz~(FU|qulr;%m6sMPl@;dUi|yO-$=2=o@S{)g z(YC$l9FPSXyy;>Ff2esU=T8CGErDUiXu6mbtl6*{v!>7%G2xROD3G?@%hanTiEJZA ze7@@h>GKdaE?uDXMQ9{@XvQ(VagBv5aZW62xw9rqs)YfC|k|l_hNg;w4 zbBQI(S72z+il(|YZ2>X|7C9xJ`NSp!1+xs}iHI=jqTUYkuxkyN8&pf|%99qJuyo-( z+V=S>wwDpgLw}iAvD#EBJRwM1+DA0mg2IM6T(sD*VDVf`4VU-mXmJ>y-?e@*{7o&W zt-FX}nXIL)vNPvPJ+cpV%5 z#qvt5qwR~MTq_{)7cE&1tHm1^Ynqt@2+pJk<$39fRamuf4uTbt!m)1igZQgI-hv1! z(sLrGHQm zA~{Ca?=diXK@C`atd;e9v^o#1E0uK&T z(9I~xJr8@Z9y4xQCTEbaO*Uc(!tM9mgW;AYbk$TqUWfM%@I~zOBt%b2q8SfBWOyVd z&yI!N*AAn_0yAY!2JS_>xAx{okur4>e7!A<7xY-NY@V95mJ)6tCZ6(kEpXCwbVWO5 zpu4dhY1w(0F*yPk%e4Hu;bI+vnG>|M)T6DB`H)uH372u|n&63{9GDax6^5D9#1?zO zkNP0zPlSgXc+UvyNDt$QVH`bp1`)wYu=e+(Oy)K6>(FVUR58xauFR$Ul26Rr9HuUm zT*#!JevGXje~gbl`UuBPr=hjqOuL{3RHHV`19kbTe!7sO-iL+7VDrs4GnWvZ8>Wx) zqrLSfJ|omgJ9?XnP+ZrEq*ae&>de{9Ay|+0Ufz0$jg&3$Ty09Do}+fdDD5Wz5A^uM z3mF9^=pGuxU3ack+r)@34+^GSt(cABflib(w!y5^77oZt)N6_ba5!)!)rptRs zJxmi@EOC*y!-U%MVwB_;qL@DJ*r^Pht?9)XxufwMwe1MPM{{SDn|2dAOo}GEf)(HF2J-z?VWQCgK=1l`KhrnIXMZBedhtS>FvRT2k~G3{R_PL z=5J6?QVydyBdst&p8SzqvZs~viW}@08R$k`Z3Qk=7UR87cH({F{_%%faVEEbL3|(+ zW!Xzl+F*Tg&VC+D_YT6ODRZ!T!A$tjsLM%RasbMvn>XUYhwjHik3NC*Yu2GwR!p5a zhe3;q($LmWk5Adp@80|!KKx)SK74;G(oUtpAyFLr5o0X}ZS;5cU}}8)#Kw{!C!^Wz zR#cP~qmrM100RX&LeWuUt~~v-A+M?ot$p3t{?WVmg#Et%{wMfy?*aBVq*jH9d|JGj ziU%+mV8Sh%|4UtYvMCb=J`P#2Vuq=|7ejVG=tBcFmdHPLZ#sI7Ct58(bu!eZJ8#9G z{@EM&iyyy=`LiY=@BC5x=B?i%zp9;it_?lyjp!K~!TwX3_~66ONP~|^_s?)H`vMlt zo1qTgm}tynKpc(PiteUX@~{p&_w2%ZA8sQ}KVT5{3A_vjwV#=sg5&Xwno= z1l?_`r-frTY}P=eNN`v)vf5>dUeRnrTR)7JA8(Y&VpO-x)*a8Jope#Ux!X@GvF-Dj}DPZld+1yZy1fD z1SyIlGEqUtD~Ch)M9zRuu&fMr-lKCBkX9x%Fv1HZDK2op*N1elTX61t8cw91V{*10 zYu4X_S&6a4-$FY>`=K??|NY<-8izH@Rxsg;QbzJJAPtyRA~s<%HmzQ!CKa#5$uZZ* z7ovW}P<~{*a;jHwknFc2*EPON1uMsElVBkv2m!Q{%F4KSBdZMC z;vbCUIg7Dq;UYwm=o&>mYX}m0geN9r1#L}usKLz}X`IPbTv7}wt4q<<+d)h%Y6V7Z zbv>O3m}JHvG*C-c<*|Z1xs``3svoZa1Vtrc)B5EI)!Wo6?DobwI@u5K*4uB>c6^9W zKK>Ay=TB4UCAkq!i*!4nXT+orVUk4jWX~O;lOe4oY3BU2^9r- z=rIk!iwT!LKvFLmx%6p6@x>Z6_jO_CwvUw0d7n1)(``F2JlKbXmC;sCQE)b){gHx_zcm-CizYWRsD*-SgkbX`b%%JL! zhN00|ym|wsCdR9MKV{EnZ4@rAAr~3!Ce6U=wKpM}Iy-9nCIVH-^DZwi3zvytnXHzd z79E3r+AV*zLTp6cbIFgqI;{^4V+_pi5-8E81qA6aVzWYaW6*HAP7s|Ah{T-PbFp&H zRD{T}&FZ@LM}asx+N1Eqsn~eaYD5Hi?u|-Z*=IN7CIj&`Fo_gB--s_c4iEh`m zUP4W08;%`4fbEp2_ZeTwb~A0XtI6>(EHCubI+ihLay1@=26A{&FEURY#ryBPgLmHj zfVS@w^)^d?cbBq{n(ny3b1+v1SCt9rr{sy&-rEybj(aeBgN)QTjFox)LwTQzOw(I zuMfR2bxruE1rfmPS+j8O-FM>Q2k*l}4?c+d?z>l;{P6&(FCVF)c{9oY?U}3^5s-%< zc7wLpy1!xs{Uh6#mFMC+WbbN=c?h+2ZJ0i7fr3uJ$#Y#BbY`&#=+&se9z3m3}EQhPg2>$lh~OPTE&~ zR({z{kb{keNBkL&P8was=ETlDVEHRV6&|Yk>Z%(gml!g^Hex}1Y>X=Isv2krA}V_F zW|(eCgjBR&2S0ypa#Hoj*f2C&V#8(XkRoUA6NB_EpOIl)tf@eAOFMOpIS$9GVh&Y^ zAU0LXQ6@T?nvpzZiuyTv-}%K8*YTi+tnO0o$rB&OtyN_O*s*OJeOM&c-*E>PE?-QU zHZi}ks+eQ6ExkV`%~*h?j0dBGH5)SqR6&l^HyCR+-h#O^;x$?bk_1_OEeGF?aGWEO zzZ2w^>!bdqJs#}oL^$J3U8qau*f8}pn7+lIR|eWhA70h@`XDGeoN1{q^!5PyPx@bf z8|B5SbY7ELmh=u{91*7u^(2w`u_DOe4-@5A!(uFqq4^a+=;*@R{Q$@d&22sC={Mu# ziNpBt!%y)6eaY5OwxXo;0+J_BQhG2Jr0&&v(Ogf(E4DM&|5*9B?fB&LFA*5x!`R6^dmBKS&YL?M58i*jva=8JvvSoc z`p_VamIT+h-N0WWn`5pr2cf(~kZ0;`c;aCvUu1qs?24R+G1%2Yn`~fSJWHj*8fDmr z^C6M^Rz0eJ#0F}1(35A^diXYam*wQJn|SI>aIiC`RDF3N+JW{$e%P%9|+ez^x9e)OrDM>%!&JRW#z3+{jTJI;C@Ac%}d)gc}g zp_n>jAy%zhkJq2O4@;8$u;io|n(_-^Gt!dlmoq z$3MX4mCM+MYz8^VSj~Y;wN==??L++bc;m_TOqbB_Yi5}gCS473M_hg5x68|p79pl3iIJ9P>c ztk{63o_k*5{>%%n;gui!4gTuKKg7m4a-a zNs+=H>Bkc<&B?og_n1)yg$E;f_B`DE_%nFs`RDN53tRBimha&o{^4Kn{qNj|FiNQx zopM-cBzl|M)J}{mC_=BS zP;}mLgX@ZhxpL#`l7^koTO5YlcB>MjxND$U1jZku*M&EB@@9Cz2!mcupmHSuQd@PP z5(}+Q7iiaAPnN74^QF3CR8^Iuk3oo}gO`IHJ*0hMZU*`WH9l*#*(!-;2n<9^Z6&&S zFKX#}L{6TK+MEIydxn_+CowUXq){7_>vI{9)-bnW=g041*QrWudh}K5=!EF<71#3)20O~HLzUdNKfvyr~{UF78!qJKnt z{no)`E+Q!r(bJOg*t1WM+O!w9ynw&^iy!0VXP#v8@1+uamkk>o2ZAV;8JRW-B5%%>wo_qd9yg(cC*MId>YM1{*5&vETeG_cEHd>to@R(0QV zef13Hw0}LU3(NGH%^0%E!y>8RQGXPq-ZO(3S&vy(p z?BwH6uOSZ6=bLyzCD-HI5@7!>9JD`DPWz>4h5%*G+>ahP(a=8{{=rP_0_7B7AN5d3 z)t7&m2o4WX53OE^h)JaXkh)Bc+Xt|3{~;{dxEQmR&BTF&htbt5+nf*>Xa@$99#d1W z{+*p4OkzUyq_UTdeph%ZRHly@Hlw4J?KrfRnd3o* z;rfgMdEgV8h?jo&GyL>>40jaQ zg3%vBG3`I?`Jn_?ew3*wg!|fXKJ5h7F;HKzaUEhpywKa*h(WJmBqdK8UGXH1nFc#i zR+xq6);7rY(J~PuZEOEIK~9lutwmve4tnT!1$TvpCvvX8s?sQu1UE$lSGQR3i~E)b zc*ug>r!gKF98nJf4eIbV0qwqLFB5`;v>fij#f#M_si}w2tnpKUw5@)KwI4)xdmXa# z3e-V;p7vkopv*9F9~p9AmshTO2L{l|Fx4U}-f1I)Vxti?JqdR_a4%!>Ey_l|z}V=o z|Ljljmp}SG0_n80_{y6JuN)yrUcz}ZO7aTv*|viSjY`C$&q#drES`Dd zVI)RJ!dGmpYY?u#PC~kM$OZ0}HM%>$tA9PKOTYAJ4hxF%OHfr_3vu%CaqMg=T6zZH8$?>shq+%Fy%O-g zu@|4e`Ag(hH(}|6PvfyIFRHlx&Rf^R2eKQYyJrDqa-83Xt-~0SOv&I2KgN~X0~;VN zX(|@4T#Z$0R$$$_Ran1j4&<=cQJEAv@Vm6CE-@Y*qTb8Sv}Wq)HGvJusz<$FAi^U< z)gdr~d5@w!$M0Q#Y@CsC0Q@CVG>WL=HgE@o0f!ae0 z%NX2sPdYYIw}x#2i~*w6f(+$z8E4I2una5LEMcy<8td0A#)8?A0%~M$}f2$J;C@?Y~me&z@T8*9mDAAkq5wIdFXCJdh&s112-@rUg0sKcw{Ln<3_kkk zZOmDBGj6)~KFo-Zq&yFzp}PYSvGhlQ&eLP1GIg`{VmV4GD$q)sDslAXU}GpO2d-qI zqM=!Baix45b1I#G45H#Dp}V^pjg20A(vE@@q+A7uu?@-s<9UrI90z5?I55c8{SXYd@mG!W1RcWtWr)zu2^A8}9G!)iVnitPcG;fi>Y#C5 zW;hMF`+@J^DaN5sZ`p#EUw#4iF&|G1lhdTH17rn>!{JY2$fSgh0bV?w{LVcnypV(J ztYVBXfDz`c-LM*+JHo|mnHR(eLm!WFTG0GWTsSxBjPU~QG`JzqP=}yn4CBrYZlMP zvya?|wM*xry}g5piN{2buO8Fq&0-Lm1Di>nxM^)DqvS;g9vO$}vt~nQHepI!2$n8c zO#D}3;nH=W)hsZa<;aao5o*;s6lU;{t800k`ahB%(9+c;2AtF9rz3Q@H z>3qZ|#i^BoF69vjj*37cX?^U_KJ;|=XyfPY0UEWQKG~%}fVgoCF$q&~*MpDX{>>Y3 z{=jw|*tM6mu`-aBC(gcDy;h#A_97%C3`>?Q!BX-^jdQ_VBu>&!(REEosez6IxnpO7 z6ceAsK-q$ZiYl3hRbeNSz?43f6%Dj6>WCX&(DR5-4m$#md zl8!fNCgP(b7>KoF?(E4}M0r_Ax-TMb^XE+!|G~PF3Mft~9K6Z z(K0EFh>$=moIjKNV_VBsVbPKmbQW^*8Qbv-#q^X}XwJ_@d1fhWlrd?;G-5&DfDvY^ z%Ym_-a60G!8y&z!4AH>Lb*_}^|9kK_C)_+rT zGs-J#Fm2jggwddDz&jjkR_Z~#MrBV;&s7uW(vvth7!pY9g_s!?h-nESlsnSjVMc9v z1)3Y1F>}UzL`AwL3|WgZd3eWX`0;D6s#CpWo>gm$hJ{9WN^&yn-Bl>e$i^@OEa8;A zG9{Z}Yx_e;X)(xhhrh@#*Q!++hvFyX$8^(ulN1KU}%DPw^S3^KSpUs%ti(ScP< zrm7VNYJ7GR>I*aQmw)>=cz@>+^b9(a91q=5f@$u-!7q2*YAzoPrp$^t|NgHiZR(#Oh+!?_dwkDM{P`Q-L$>p9 z8*PllLLZ3Om;^Le)uSN46p0I0sJ*_rstZwFQvt+Gf^PH_w9COwOSWj~$LSMC5gwk5 zwRE%#7cHi}NrYJfP7jg`hM@3V!_q zv1-Fc96Pcf^|ke?gy3NwaiG1cSG{_z;pK%eL!jDwwdrELI>184C65ebq+XQob<3Ba zt*!#)#U+Xq!U40{f$8bM&g?4C|zNez1 z8aerev_q~3IahAoeMLAW?Tx69fZgVW#`-ptRbNC(3jI=81mdPIL?9FPro1%vDVrbC zdeh{jXXG(aHefd64If$AG-$-m&yFImq7^Uy@C`h0|7KL2KaNuePr)ef%=-i)dg^rg zRas4G!|Z8E^v%mCTdNostj6rA$q1wPCB+5Po!C&#_(?Ey(21Nsmx+w*QtDs;_4NuA za}9YE7#8}6B4yrEJoD6d@ciR!i9XR~k%aT|UA&k0da-1hAGOJZs&^nw%_Q7$(_#eqJCHnQHce9ih6cKj zTabq#_A`&V>(5%6#kHfVARGVm(;s2`?k~_u(=I(|9CCf^%<%?rZ0A<|^v`~Ttdcq> zrwA(JAtS`T%cKlJWLxlZ*;-kF@8Yw7mg&n+b0x($ccg)X~bAh`p2cGiFXhP>`>Rw`b3u z$9PX-z9m?`cp+v_Pf_$x9eVj8Bq$VNK3=FQ&ZnMh@s8lFsKW7AmsO#+xfcuOF2u4` zD=>TRJSqw88xS%75B(b-gQL`+P@<( zF;@QbAN~~o{Et6JzfAD(>L{%gP}VG@)qz9!%Rl`moH&sIllTX9k1>W$UK2B+pr{%h z{WeUYe%*1$TBII1fR=`fR6oin5xkQ6i~>8g_jaQsHw%+ereW^7)zr_0h)alt)ha7e zHLRo=`KYNU(oJaWU_4Y&dl5~8Lzul_0mA(C6jK?A!jL{fHSesa`ACJ1mmU88cG#=~ z7&Z=4&%M!hsT-%yXQFL@_FgeiYL2CdCumYl8SwST#*Hg+_|R_pe>w0*qp+DtW=FT! zc2%4ed=U_wK(gA=aj}^8OWS5cvM)(FwRI>iKD*;$;h})y50li`$FJwjG=_j(Tbs<=|;|}Vow-ekDP)5jiD` z@lJd~8~}JghrbMg7MxE%h4$`#txw+U?j3+}*!56HB#60H{4}&&tU_l~o4O+RWQ-2I zKO&Q+U~jxR1J_u2h=i#>7@4)LX zJPxmse*Eg^|AXVlGcY8ZCrejS&Jc=)%U2-v=s{d+YF2p5Eu+bb?!E!azx?7zByQ(d z&7uoR!a*&gkOw8+e!+P3;d`lfM$BBW7-12i(9wU@R92w4?h_@no4;%=ZD=E!^2^B#!I_vd1~`7=3}O=#)HXRW5;R#5KBYUSvJ zNti~TVHz03yjdxXvllVHUXD4;u~TNwK!}=nyBsbT7irOD<$d_Ck6J+}{R##$zohBJ zl|b%^PWaIM_+!PYr5G@FB044-OT@o1c3R4KV)=r(NRE;1WCVX5eWL8~EqdzICxYOs zZAKxbbZU#9p@9zCb8T^$#J~rSoMZ`Nry6p8MqAK8VXI_C-sRLGEG`8x!G5U9I*9h> zc6C^%jBS9j&_y}c#z~M>CP4w1loXHJ(tL&n+M*j5g*6#T))wwrnv5 z`nuG?UxJ5NX(R2{sgimGMJHhK%p~;|fWp%WS_=X$iPJ#l`n}s*bq>X%rHim& z{u~6W+)d-tH9u@d<{Up~ zaO!LtE|gR+9dc9!qXrWW@jP&)M=c{ zDMa6}4aNCcIDa++c}11#)K8fZ=xAy{&WTiHu&*;`&f{EO6Kfx3n+)uUoId1FN& z0)i6N#`3qVnvGB1ejf+FIES9zA&QS4_ix^W_>dsH^{clyE}_?1^5-nFE|9)PlWIp3 zeV15iS2VtpCHynu6@VDotaJT59QpLWk$(CN(oUyi-=Q=*p!uvR?v~DHunVR68OTaM zO`KEN*BP8Xk&Cmr9hkp(1qD0>UIB7~#W4&>!zG9j!*MSYW#YEJMCz`UDp!_=t>IJajjj_yCCXr6kOvYk_n zA+^y7<;iP^{%8yl_>rtV^2r*Qma8n*2? zj7TPS8#b?CTbi6Jv&F!vI6D)S`DLiA)p(-D$bQ#M=3p$^fze=hCRR7vy9vnLMN}vPrF;1grAH`QXCYm^XuZolJa~ zd(lu`p?GtWcjD$0HaV(IdgXlF8+f1El+oj-K=B)&YE zjzNluI4G@W2dqPVs4dQ>?MqWW;QhU45io5H)~;TKD0ZZ5GGY7#-B^1nLAYS#1226b zR?zvs_T=3dY_7!aPkx6I@_fkZgD)25xv%trTswt`FfE@ihh zJXNpmf7D-9PW3My%JaH>mmq*vO&^9722A&?n}VZXzN=*Y6#Ej}mX?)^`t}};N7TKG zRg}xqXBpV%pnbqXJuIcqNypjqIclFP*HB&fX`ua;e_bC9T=+{#%AWfML4qp-l9<4z z#Czf7Uh2(>H0=HI5RPVFP_I#He33Tfp&!3F6IBIlOq(Z`I2=0681q6DT@b-WG z66eKklZR*1@+pUPDyekg9v&8eSyRJMTUCaA`;JoP({Vnxka8?^_ri+BvvAAuIoP%J z6P!sUPtKgg;iE@zEWZ+uzx)Pb7-7nuvl!P@DGl{li%4u zv#v*I$}-%%av6*lC*R@Un_U}K5$us6L7?y{EG6prGv2fW17cfu{pN36$K8}C+@yqC|$iUzK!!KyF zH9C3u`(Xa^dGvwP)qaPmM-D4mq^4&fyShQG>Jw@i`#VvXC)>-MSNjuNhWkmkbflik zKz30XgXI7Ohlb+uhd064(|~9ZU*6Phm8FfKfc%*+h5c6X{n=rU=OvT57< zhV8WNvfGfxaqV9L!9esuu4vN->YLe!OO^SwY3HdAr|`)E##N!Sv3kvVBnMFT=wUlr zYmk0k5`&XdPb(QMUnuhK4>3|T}>!1DMA+g(xuLB)rWjWK@?R^eWl+m zDlSDuoi<^p;V9v(Hxr_%2=VvE=b!CD>ZxoTK2nJGj()@^XnRk~^y^Z_n9EWoooC$6abGA?GE$tEMcH6HP+9|dek}V& zHaFHW?q%$mnU3bpc6De(HvO=~qFs#VgjU*Ut>`E3a&z;MlUIlyp=CAOjN zBC>OeP*dNGA$FwsL6)TH_R`yNhuh9w4 z%YYYk@*rUTJTzRaQvOZoaOCJg>_2*vH1H!0G+!!UHVvXEkFf-4efro5965Oo-ceJq zYW)UnFF5j8wvCXnNHRa#r>7$%AE!(UMqI2f4({EDQ>W6gbI)n|n?m>p$wa46N@_Y~ zcC;OlLprorSp_OHZU^45lS-EQe+%wJ0R%|KyE zDcX8kkY7}&>|+&^`XM=C#9OE0H5rAwEp1FL%kR3zbP#i_2t}droAczu%-X%Fzd!Qhc-#ejy&xsIezPsTT)aIn(n6k%b zaOhwLQuF$-WbsO@m>EtQ%0o7hXTd`AmkK?ne7e!qXnSp7>3}HAUb+$g_@}R9X@mv; z`M3WEHI>Z4Rfs|vHALb5C!WGccLP5B&D+X;o)%x1R>=6aS<`*S?M=01NI!RuIeacU z+S|~4iS~qk=?v{mhst;TarXoFl3t1U^7GHt{J`m>$FO6^7YK|>#vS+Gr{Ykdlz?fV zn|W0sva)hef2m*Tg231q)sFV&izv>Q!x;0`iTkRUhT`X0)m&+Knv!xBS zm6A(K?o8V*`;Jh)26`Hu`B@rmaUD9ETaZn^C3@f7(ZyI?2fx?|Jonuvab(Xn96xYG z@kYjYiZO0A^InlX4J!e`)FnG(y|u#a_JVm_oP*dB6f%Z%E2ecnqd1&t%WD&}~ zQV7vpkv`|*4aTl=MJ5%+rpX@L58t~9-qtQ`{pcgwM^_GT4(GEjaC{!C_nqL$Q335f zrNKJ@YgR2nQ$s#uuOn>ZEWX%r0CmjMTyr1-N(T9mBc9#}!@WA$NPfyba6vs;>!! z1vyF&YN~3`+1yNCW-I*U%~K2MG;`KeJo)H-*#6In{0Fmxqx_0nN$DQjWfFz=kCYXCu(pf&rr$X&1_oPUS2v zE=F!nHu7?Eu>XsrjHUFLIfv~tAbRgtzrk55d45?rd_yA9(AuFkweRg2A!C=q&&Q4+ z{{SZGa_sfzIG2@)JO&&K>5LwF^l`+;25Ya`Dot}>@|0;98W_Q;lP8dumy03>qX|=H zA~|Uibb3D|#wR0|foYoc`-QdM^#fl$GaXWiJ@?=sCR?i zhT(xe2Fm3qt-FMLI@1d|>2zd%c;cB?aMMl85#&R0r+iMEG8sj6&7?y%gR1kGnKBg{ zZn|BaV0!-}cdLU&^m=bhNtv!rK;8GnZWLa~NA87E`1*%n)6HuT;7fNhGJ?j&Mnq3e z!OVFJ5oyq4@$BiSZRkaQaRrtvUxetW5QG{0aMS9wIDG7^nxGKgWaZ}JQfoKn&6|e^ zIyK|q0J^%nF?Gre8rxYE4+dUV8!D@tF>S^iOq)&K1o~syf_XT1>NpiV8x<9mc>VS7 zAv$RWMsxw#xOxG?iPwn9guVL@;K-5V4CL}i^Fqv?yA)5p@FJ#7i&MPO>xk=|X$WTE zyX(L~q-W#Vsn30l%+_F02m5;*gVw8}tH{HAr2GT$aO0=^{n|n;~i%G$wk34`R z=jldDzbJQs5fPXG<9c{sd8xzRJ0Ebu=P1^#B93a#Yr~Fo@2jeE8zYr(eLLg|pN?ernrE)wvBm z4B~1VJMh>`uTi$6)pBuhL$XJS&>|%vf`-0NTj^t`V%Aqy()kEm*Q(hT6ZhxVQ%E*RDZ)awMvmDEmcC#ipB9 zVWg`CtzG@tu=yVHXAtL3eu2!K5>(fIAQTCWx!=ychElLYdhNp!{M7?fPsi znVX_*l+L=N!IHcZrSrzT*^3w~RUnV@TT)(#xVRW>+PoPy9|M*%F^CN@qrV{sJ5$S% zS=NR6iel=_c?5*V;f0sqz$^yg0U}G%q}K8n@bJkT8WTkZf*p=P%vin_DG~md8c(^? z1)`v^1bTltWpp-T!UAd2R^iOKT%2OSk(-mL^trUM4vQAh?lD+**eqyoYf&ebty;ec zQNcb+pES@q6=&Je*#;x&xOVMEgcz7~Q-`c0Oppt!u<5qj5kuPg2l`^}w0Qb36L#!B zMq1FWWoN00^yv5$OiiNwaE##S-u(;|zM$j30K0Dx+Ik02P*AR9>5kiPlkMExV`jQb#R;;AoF}P3Ya|V;LXRT~pzxRt>D5flnA^7fVFT+192GeIRz;yaT z^FSLe_2`g1V(hR+&AKdH5+b2RH$a_1g=Jri9k<#oe&klSYsIbj-EknT^$xL zUV`=OZb3vaeaT|_@E||b*R~=&EDH0M%z!brUO+8dZTgUKS9=gpX5Hhr_Xr$g;Mc%EhR>9=mb^FG}B@I#0QBY_4x z@!PlGWl%T;fBffviU@-@g8X!t8WD(G%5moi{okVb2n`QmyK_|RaN^hzCNAQi3mDIo zVDi)yBqYR=-VQ8XIG?o6VRDp*D%z>n-}nLICQlYZ;-)n#lw6Fnf>!|+W|Ai3NSqXj zn^vttS~mULO}8K{Nc;rjtFR!KW{y zPt7YRfSiupGtiHs;u6YoH)*~YA*5#^X}EmTU6>Rfj|RpuVlUR+w1z&(gwEb!+;#Ip z1X?d)@2NtZDQHJSO(lx6&%vrU;E|_a#Bw=U&RZ2qLoS=o*rTBXCr_s# zJ6qz~Tx3$G=gwP1zRL;>>CYR!e*TyiKMC0d7jUj1A6bmo`UlLIGiwIrAqWXEali;; zciv0CkcY0$PCWGRqp;X?Sh;EircGrWO}qW(fBiSoQct0zx*9q|C=>8D`uPeB4FM~c zEhKMEYX9=jw{AmjegXZijsDAsV)Eir3u95*?Z}V-e#P-BIfG^pyfqO_zEW%=2NN=U*}FTG6r9i#jpEW>EHcoCtKXJh)@1qh}x zDH$K*2yM(K189?Duxsy86w+q+`v&0o@4kqT@LBQDWG<#8Pvy8} z)uj#Pm5rq5V&*TCp{H?(Vji%1%~~9!eaxa=&SP9IeB6A;9S93?#k5|~(Z9w`ibu<( zdM5U#Rm^&^xs$mWW0Y_MEXF>(`_}Jp>O?AiZ3V&@vt4XxKtWL%tTxKrlKE=Ydt%~b z`p{wQ|8fs~WiHApE@J&HcjJMF??FU}ycj%;-u6z$XMtF?W&>i?A`i+wd6UQ7W$hgg zAR$)XKra>ZPTG1@fxoIvW~CB~LMaVeO+XelBXEYT&P zue%p@v^DoV_7MDhwN+)_Iy-$-5OjV)ICvtRIYJpKDr;%8!!Tp|6y_y%%$+d>9o*9>qNmK0<#YD`y#BGG5Okf7jmr0CTi(We>;7445nUA~q}-MU@RW zekKE5l#@pud5nF%1p8sdvK83z#UW&6<}!B8WsVqu zAN=`WATHWp`5Tw7^rMaF>uQI8>`ZL9{a#FocI~_D2{ss*7!_AmVf|f?VdL6`%HL2D zI4;@#$B%h8^^7ug^b~o+cp#7V*4xJ)3ztwH!^5HX^`mWBOnZ=nlhms$+TsG{K(Bo7 z2iSDSZ3ysj(5LmHrkQqV+CnUtl7POZTKICaX1TiQcSufZk(FL@350i+2C@Zc;ZA~4fO`fFu zQhUb$dFzc;E9Sv!9YXVvFJ>*GpNaBEV@)Zh&0C4t)St=G!C19?5e^+ai*x6-d5vt^ zt18OvYSJv&-~%&droFwDx#c7*Sg@47L3=|{%aR0Vkxg&r`;&=7B~E2z&`0KB)v7hj ziJxa4+KI=$b0>35(y!l0+tZEpH?2b??WsKUFdO^P*yDxxsq-;^D)R)L4`xkGWK0so zwsLTWF+p}-4t#xmaqFgA8UH(TS+RBW(FyUB84tCn2lTnQIcVwV!Q^SPRsFX5GVIv7 z7YC1|qJufM@TRV+M&)72DKn9jmeU~U*@8d=gBfojyM=mXSFaApQv)d@Pu*lfF4&(Aeee?g zTUlRTKci8gQp(J- zyj~H=3|uAx<>`!_!PyW48EyTdL!II$$IJ7F8mKmND)YV^gUJRnah4fWc_QT(U|{ed z!Xr3{jkaQlMpCVEbb_y6fHJUpd74L}4-C`k`1=c=Sx?G&7)o&>vaj6GFhfX^TFvCz zsBA3HcI31+0d*Jv9m3$S9cH`qZ$W_I?G*r%$&7$dZBt6M6rWwlQuZNw;0nuN9p&J@zf^p2VnCwKSF%j$_C{-uOxIPa0coFcB{~ zolK-fP!-WeKsK^a@~dC9e|8DW0Zb2?(Vv!CR)Q zC|AT$0uz~(lHgKaR~{nmtybZ&^r%UIA;h5Y)cWJcO1d*BF_CssQ#P#iBTseoTCQ3W zh-y5d%-HD`*sWYQNH8mf2x_&C-K<`<8$^KG<3T1uowo_RPksn4L&V?ee2vRNSq==; z;2W(G&ulDO0)3h+y9Cw)0W`r=YF7Gb?{-N?Ca0K zhXqtQ8I0nU$Z-X3EfDh%?m%2e3n7o3hdz4EirDFT^h7i=&7URt~ z{}C~<)9~Uee+FNUUtW9hW#BD#QKKh=fe{n!=#WYb<*Av~TUNPf2XO?64!Q(O#%^cQ zX*M#XrX2|gQigl%7&UrH5;sh|yo16RkGT%+;dS!ZLH!>T-Sv^zR>=#wX70zJjjaU+ zK@RPfO`z3VM|$cTJ;*2uqaf5FW2v-516JAy;!Z*HWAY@)o2;1A1q6_`jc7dg9)9{! z1!msyQ#^P3JcQc^=r3sdD917}u9_y=R}MPWZFXse7sqa(q!uv_8E!m=+Maq^MDaLjhneV9R~PVGCV$@f@rFen|N?Hf@WeCsIW z>d7eiG&o>^KLa8*>AXfcX5gh>Cnc!8sAzkA{Q}`Hj+tdfXv5_}j^HLTE-MseA1Mi# zLxSXS8)>0PFshN4fz~>K{mI1IAQK0jA7z*fcGr?pGG?ln_%7-BU+jL$C zkVy?&FNQ3Bu=&V=Rw7HROBu7$uh@Kpm0y(+A9NfRN<^BOb}mB zec&)n4D95p*v|mkK#>bg|Lic4HzU+(MLW{f-w^7gCC5SDh)#$M=>vl&;LRu%2+qzu zK!q-fkW!A`L+p^5bfC_AQST%{6mKN26#MzphIk2Yh>Jt~3-75tHN=+K(Pz?8&4{~d z&I&VWX`vqx%!MAJL+WH;-q%JeI>xGE6Ub{Btz7q~f74Mmr5P{MSN80pM^_1q&{z2k zi4jc1)rYo=NuY`?s9$1_&GP!Rj7w3%i?SD>4usNT1{-ZU{iKK-?*;}1VT5I^R>oj* zFq^6&|4t0js(`D6{MOZ%h%##!cJBkaCmqjD)_ z+`w|AqhrLVc1-ordNKq7|F-ZMRxSMxaD z0P2mbgplM+S>;~^eFWJu^KzRgUz zr#$-+CvV#LVa6kV^qX=xkLrsUnI+Do&C+5gc27N&Z5$-_)w-7c_@NCMX5c7pRs51( zWZqxmQmMrIyl14WSx86Gb&>U9_AiN$uh>4?4tZrw1XPRL89|CqRc-lF)+N2RsyIj{ zRoS8fp_z*}{^X4cGu2%gCwV~|EZf-l1-WG0NlOJ%M%onpq5l(ol0$TON3}tlU>h=G z(9W2a_Mf+v-EmO9WUCo#U>FQ6ueZ_H(x#hr^zU?!nk;z&+fuxu&R6WB zKZa=Y0~xpLnaH=FKY(Zd=&$g-U+%~9C36tWw&frLU4Ui-l&sS>IH)%x`as4o+I7L% zMH6j&PQ4*t2GrXqO!647OI#@1=ctKK8fBRZ^r9FADOK3H+aOfYMhi6{*kR5WG`)n4Ru6LxtCoxL{B9Cx5<)o-Vz(By^TRS z(%* zhPT*d$>D_VR>si7v>{pz;f8z!NL$LXkJvTpjl?p8q^pRj+Fw-1{7e>(u%t#SC$Owz zco0TM0K6!NYB7uR)+jjnJtV7~{lciS+Cwh)xP+#(jqKJ)nG)Q^FX%?&Un$ErcuNih za_x;T^#csdPnirV&ta3X(O&xmtAz&MCYEJMJ>y_`fNiJEH|X2ZbRiW_{P|8i^~3+c z1Dlc&{Q_fclTqHN zl2}uFU0Cdg@K_!-E{)`6#)Pn)5u?G%c=Dk!N^?-qNmpn~{ zATFxPyoS@R$(9;s`e*S9GM^(jYIwT9IbxQxNW4b>B~1kqahbd78*(5xNa8fwMcUmV z>V&_{7pNiAPm^auR$o|j&RkSvMd-zRVvv5^~4dqIW%?V^AJlJA1LXpNP88llk214A8ste3JGL-1j2Nf)omc+%?nCJb*7Tn&z4ienJgo_hS% zzrBaT1~dNYuYZiivnI0)g@*?Hmp}gipYJ??w?BOwvu39_XYa2J6ZY|Yh5vmFH1K5K z9||tt79OHb8fYPh3%(8w+-iO|*QMbGbGkj_G~jmKppbubj;J=xof zUoQ`C|6t(iqjbZ>b3L^QUt{Rk*~3=`SNr4P+v;y3xMA|#NQJLK`>zI9;@Fk03%Kri!i^#=@tpV^ z`6_kkcO(}Wmk-Fs2;!S%B3QiA|AlV?NOW5ZnzB>z_+RY7i$D1t9=$zOt-^P~)wn;F z{}Y!Gn5d2GtNC@{>X^IXddg{KT#@CsgRchHi?t`X^5?77b=?yfdtdIzN3PyLudXUz z1HA0xCJbHe-ROkC*z&F>J?js{VeZAQU5BxqpS7}S@cg+<1+ZrDxTT01?L;f)P$kDpfo-`E-BB$Tfh4s%$mC#TfX~8>h;vC zPfBUH{5}lk)!?g2%JtOys%?KYxN>imwMF>NZ~q&KlV{=e@BKM>bEOHx>&19%x!>^h;-i4g3BTFc6Fejpb8^08#V#RAqj?vaLk`I6X6E! zHN9^ee*Y8SW;E6optiaWR<97M|2Ro;NO}6!oBGqKW3K!S7eMZk!6&1r7`BmOeB#wxFuE znY~=DU*ZSkEsZHNQZO|k9{#fAjc{#je!mpD9=KtQu1fZ+wB~3=U)e$Y^}CtSFZeEg z^1>R(x$;+r{|#EX`*eL1_*OBvDm}k__-3j64fAZIA8nb3vE|2qk3V_)Fjg*3f}EW{ zvB&$raCI2Zo3Xz~efNa&_%-=DVVnQ^F!89acdV{*0uz^YEuYZtUa37&KEvmVOU7toS^pc{$}9!AG2L2{9a^+o!HvYm~$G>{A@SA`)6HJb%`!q@Kt*II^e?01y}F?O5^z7ha166%F0K| zUaPzZePjiK{QercbHxe68v_^eE*R4!LGH;)0Sm0d7JRXLKlbk54|!!(mi0^N2VeXl zZn|SLVwe}mWXRV6&)&Z}X|4sXUYtE#Hso>lza_Y=Tj$?z2Cg)Q-_O{t4W6U_D!6#cXghf05?AXIax&hM`N zCvdXAFUKnsxzS#(hcjc!h#9-aSgtnSYt!P&7&W&x;Lwr1h>DKKop(KS72deo5V-!n zG_-~#^n-+tjdJ^ONySHfk_z1LrT z5h+QNoLB!a;VbKoV+4D5eS~eFZimIoK;Dj>;*u4ZI&Xj6cK7XgZa zaAirq9+GT|!Wa;Tdp!dV(`ya;hPk%?Py1voG{OgVNM&VoH zoWqKd!A=}FbOfJl|B}5(A7lHHYjT?BV^2PVd+*$g5QFw=%=N)F@cx#DbtU*_d3Lqd zdePsQkL|}x;hTID9=de_WY?lA!#9iB^}*Oaf6uJ=Ux9CSjNdTNtR{3;=Hu7ze};!& z{~4yI#45Y=zrmB=SA6&F!Fb->2(P|jzW!d|+Bi>K;(Et=J?%>w@fK3%O64S;8$oMR z6aMxu{|Nf|1>)1EV{{I2UAA3LAYD)2o zkIrE8!!KgZqA0b8y&E(>YjMD}rD7r&FDb6y0-o)S?MdLP^me1*iOqPpdjC@M_krQ{ zg4;%qr})+2M#>6|FaOo4aK%@W$2uD+;_O6TnUX)+FZFRa>@3SKqu_qpDXq(9m|6{f z)GQ}qOxrS@sMV3==thJ7D}mOshU1NctG2*bYvD@pRq?u!dRHtfH+b=@;&9{O>PbL) z8~^*;gWum6uLai_v-7rFuO>3};vVHy0M|8d*r{Bujb(}*BapKl5PpU32cWu0` z49dMa+jqAnPv`}NO70e2{m7LoVIlv@N84@na`0^9|MMWU6TNck=tSTwq3*gq-zNNF z($|v;LMs=R0&YwvzWPnTA0}14KG3E)WdA2Wr%iLHZ9p`A_VgKJNB##%4MFNFBlHK# z->)^ME5O%nQ*FN?_MOtA?AAAh;*Ib?E33@sSC08y{dvMyXvs;>8^L~JaJTUF+xjMe zSU3?;F`sjcf|GnMk1MVPu2H$*I_tVR{l-8V^c=}V;KG%EU0=oTj4Lj;G)-{b8ZAt? z4y2{{Xfan^`KwJ8YF9+QwL+dPx=LS1FJA$^e(UZQzCv4Sn-@Z_kHqrGqz~9ej z!j+KU90(1*3Z~*hO|TNrp<&rpRu8*Q2ZLC56ZK6ys1CWY!s-*OHUk#>cjH)Ig}(~E>e&Bq;94X6YB09d8|%YWTHq>lm%ZR@ zHVh06!|sqnaP;t!!#qN?m45QMI!Hfb+rM5sFR$qdS}U5Pzv>7ST&u5Z_4a>ZV)$mL z_HEPNB?SV%4<3J$vuO}!t0b&U)|jx#p)|7JyQf|F`WSta^!&Qu z8jZjo3QqqQt`3?H>o+;n0g!UgiyS6+V-r{c-yHA1zC=s>X;BB;3@~O3VEh>%2bA#6 z|Bh4=A1hjNXtrvbla$0DBI@weEPYn7n z+Uo0Y;^b*m)iu$O5+QlTlEz(dx+-2{9GWo3^^t!ywUW<`5c^7fPS}paPMVr9WEzG; z?+0&hZB$nR4YM1=$5lt*M(Vjh>RhqjIE1cRmN54I*Tw8=)Nu9Wy65p-s<|$IZD3=x zIzt2fkP{KT^fwSpe4}_=4szP2$uvay_NLL$@bZMq@x1(s3oO*by!2F@$tiRu_n)}zI2cb8nMkn@ApPtqRCZf1DRGhtjIKR_a&?$k za$Hvl@`UEP5O#qW>LJuu6yx-nG!&QBps1n&@$rcWG05(M&gK|+47AiBD?1x``6b9H ztbxNX2GLPL@TKmL#Y=wiW4BX5U?au|{Taq+c;A(c!1yW+PlvKM`mFp=pwpD(UV^Z9>3tSn0SKYB!$AGL3 zv6|4{co7GVoIrW?B~(;2<5Ei}rp!o$oOpe8hd*)eSMJqWUdfMq(MpY(uU60VV9B5xsOwQ_P@evQTZn*eFaje`c-Z`Ut@%S(J!Or)OM z{*13bQJ>ca){+y(`+6C$Fc;BXzVdH8w&Q;bOmywr1?iDq*KWsP{}8+xKkDQN^Q!{u zFb^B)>%kXC(%~CC3E?4r&|k$Cy2j-KcS-r?`izGgqsEm>u^sZp*x#oPwvzZ*;Ii_v z(syJ4g{3vvpPHvmt&fWdcfP%*!S!Ks1(4y~NCS)&>_dGxb@%{^OG|K}tQxH*J(4HK ze8q?IQbV*x`+N2Jq91ais&?|NfV;e0k&l#dwWq<=+jN1}|Mix3mvdv{(q7MXfZ9qc z=f|^YcePQN;1a=Em$Kd0Sc%Nc9OM<2AiJ=RG9Hh}aP8E37dT6g!uU2P?qfW-Mk3%@ zQfpoAxb6yEy{)mJwJ`SEb%AtvBOP56wEnL?+u?@GvAz6?)yh1H`L6h0Z^mI)Z&!S) z3EdrCIDR}GF)@>12)f#)bgup`?{{LjcDd_qNh?JL*v)D?J=rElr`Jv=zZ^zjGY`=| zWFqT)G2*5s!H?ro`Jz_fn($R+YeI00V6e3gSy|c0%`alUTL+s@6k;Uby@4eDiW7!o z<8ru@ zY1CyUY$g~h^KmY#0M@8!2no`j6kQ9h-IE(U%ic)6>jGDUH{7tNSPFa{%&t{;9B;l- zS6AXMWyaWpQ~P$oqz|TCMyr`~kHh4z3||S4>xWC1E+R80O&ygU92TLv`>I_I_I2W; zcYlkmdyZn};&qrcHC6>E0++XG0xT`qy7g0heySXI-+u>!Sm;_Ir-Jc~$rB1|7ykX9 z{|T8*L%3!0#t9E+`KmCsJC9#u?85bc#_!US5}ZAkN_$m??c4U_mYZ)uQew16%XTz3 zH#6|aKzV)^KK%SB{3B;z*}@qxxClzn%8NZ3hQ&0Dv#D8l|I=NVy?8MaC&g$E;D&Hy zplKOKcUL$1O*Z%j_^a2N-7Ux}Mk9udM)dX$pm)fOKC1&d2GV}cMCa<@YUw)UeiYcJ z)y)2k=pQt}%g0wGRZ@GR7OpR}@PO+neuGU49c|`8^!2o1&`8^D9ENewj3M&~^#1-z zY_3br#{d(v)<(Sj+uvgUxf~d64s^D)!er{hs+G&sic&$-g{n|k;BwS2zjAq5xnhUI zW`?o16Grmc>Jy-zLMs35Drg^prN0|}{r%|Y7zc(dE`&!KjRQTMzy+ggb9X^e(Xfd zco_TIm*~wPn(CWy_H-U%V<#c-s)6UX4-=#AdDZhf17{ogJ1~OY{$c8$j(i@kgLYVl zX%o8JDf7eRjTytVz}Lr{u{^l$SLT?gn}_<*(;LF>jls9 z@>d>Qi(FBTdis?-4Vp}hASM;Zk!e2KzhF z+1Z6Tv*#c>GT6B_#aD;1)#-2SOh^WM(KTRJtCxiCZnA5o9jZ(l`ulo0P9`xH`W8Kt z5*GZ1Swzs}vwvWKwum$x9HGuypp(S_&cS<*!To*g$gT~Nq)W18L{Cp2M zj=|s`gox;{Yaqaijv;h2=6vAg zFe>~q-e_P`6LfnIr{dp!`2i9VVzFY$d^KSx7+f8TYwP*tx367iggNoP{kzamn}dVL zj-jl>jyvz#2m|kZUH!XS&vxq|jQuSbU~X;p353icyV)x;V`8k@tK!Fgi68lajyA-( zh~gUuhYc3`rJiohj}Huu(AN6UX6fAiTYQatACJK3HJ41e3yd$SfgKI?jf~sRqpTns zzuR^QF;kXe;oM{dTt_nE+2+LUyFm14gdxfxx_fDd2FKc=!NFnLCJW3K8+-y7FSEvV z<(gj?Mw|a8`ucY8z0}fxtlTu{m`8+$N2(sK+=~{sRg_fVZ~o>#uy);g#3w{M?_CRi z5A?najBpIrAtSE_OY0P&s@$v5C@D7=Z6^mz$$#*Y@v2Bh{7|LYp7^KgOo&3SKR0QIT+@#|0aAZ+nwOi2h z@dQ^7@*GK2z&(wiwJQBeZ2`eY{&W43l5$VGuU*$_yGG}xn^s2F5~+I1XK!ax4x$MBAsj^u?45kR!Whm5wR>Uy>H5gOY)g1LijQ{BOP;jaZHS&c3-gz+#-xbA(TQ!XcXu1| zIX-67Ml>DHUnp%!|U z(dzJDDenF=4&)5s-6z5#P zZ{FX7SqoR-{s-@35Ux$`xXTd>kHe1S+n8UJzrFyIKy1IR4)k_3VAt+1a60QEmM&kS zUTKrjDt=2+*4luatZbYm--;^hP+s2*z0L;_;o;5|-JYQJpm9OFr`Zv8nLqeN;pACP zO1gm8?IsL&G-2<7lQ@%6gy5JsghYnGhlR$=it53sM=l){a7&SA*ktGTiFoKLD>gyy zyJ@9;lRn1$ae`J#8-oMw^_AH1$yQXWH&Yx38D+_Y2p+yCAGnMr>HOa@=I0t36rqI}D>~m`SX!GkI~f zCZLsb(cD>!zxXe4F*4z3YrKepv&WF0R|LPfnTQArP!Bq^75f$p^mOz7S)9+#MR9pG zE)-Tk?;C`0@<*O-j>U=pY8~)@?dn+2FmTyx0e30E$)+Ykax+dN<9r&*Dr!+y(+Hz? z03z960BNA8pmn8*miCpPwJleix1Ha{+D9qxx^Ej9LQP2yPMth~qQXLC=H{W0d@QeN zfQ1Q1Y;+X-eUxTs6-YDHpX(o|kSkhYm<{$JCxbfr>up%GW-VeSg^%_nAf@diBk1qx zp_uA#Y!&)-g3E`y+Y}7kSZK2DriIc2+H}(p6RvjJ>a;=}JC*}OL?|NSBAfvx>ytNT zCd?JZ7jW|U2~<>8p|HFLm-;P;jg4l|r#(p3+LZo8zu1pc|219+XeBh#xpdI&k+i!4 zT6xRxAd2%cu;;*WbPeedA0LANF<2g;tvJ-$5|G|JLH$y<_|K><(lAn&30e)I1$n7{ zjI-+=rCsg2sVvv!yL>Dp=KwBbWaHG~RFqd$qLj4CFD^$%_W&XyA`uuU4{2O9kzO?L z#9MuH+H)^QFN_y6@gMIUfIW5{{_d}SgeM-n6Sr>KfS9P@v6I@Wj)wVwaR|edH$U12 zcM;xq+E3>{3JU>O8Ow+Xy`4=cE4+Xm2hTtk5YBis+G$w@_}6MddviUq(^8S0L;qe@ ziJGPkz&{ehyVJXb2?!)Nm=)zBb^!J!L zWg2ELoT+egmDBK%U(PE+1IG}|-5q#;&v~r4`2ozG9tOWr6?d0)$-DgP`nbVWZoJI6 zu9II*Tj%1VfV+$gOpDXiwJY&P;>xZ@{N#tZ6a{#%=v7_3^gbysf5{LM{GcXt;QSyPS*Pqo8EXilKpC6cyy-#K}}tRMjwUxP-ys0Qzs{(8@2l>Ii=Ps5aaf zNFVNB@?En)n(rDXGmP6K4~$4;WCs03AI#Ykf;R#H)o%GyheH=EJb-HWL+5}@<;@SzHC zcPrYz0?u#hkMt+9=^ptgPU?#5^{Ut z=dFZq#A;7+7U_Iz%zh+|V%|KPBXrVD)f}*bG7A8#n=o>J>Xr{3t9mdj& zpjN$*H%1Fae8)e*a)BKqL%nEkZ-uvCuu8t94foh+2rPr>qfzVZ>O(i_K18zk`uJ!| z$L%K4y^W4+7{e6Emd-xvf~-Qdprxe+JuDm`ue>w(F`A6%Hd~>i0`@gGqla|pw>s$j z*cKy(k>PIiG9c`2xrDuY4!8+K_1bBe>aSR-U*+}|ycEa1A_>z7KUj~!5 z;V!m0j6sGfzC`Zwv0edK*Vr$-X(ew=7%+_}-uOsj?HHyb<+$6rP*G8eJUZ0mW$L-*gJf*GBX1&()M0PUUK=%l=K zkJyN}-nq}8>X09kqFD#a^!N3%|90#(u0N;3a@|vH3a+1eR0AT40d(k`2Hg}otBLO8!e)#7tMudQQ6P| zL-Jz8L{LVZ&P;F{mQ^UU4LZs=oUglUG;jjH+pQxQ=;>gPI!rnls|j<_;X*vkM=6aSk^BFx8HEgr0UO*>7jgHA`!(>=xN*A)hYxZ(!y%9 zp`oD>RTTwjEziK7(-~;+ib06M7fF#JNKS}@pRW&n9uqA38b5CvdOO-EV|~iUx$L^P zV}N$P9Ubi*D5d|-$}YjuWs9kL-n8q3N)LV6j@VjLTPsrc>_M+p2mk0;_+4i2MP3A4 zHfbycuLYNv7yYp?Mlq?ecQfJZ95So7B?5_)7j2Zy)W(FQ5@{FeaUnAsdp`L9lV?rE zjKvF-{ORN=ytNA#&K}1HU!r&h~bkJDbfwG#t}XlHIGWcNxPVk>>Q z#4z1#qeJ|pP4+AEqiq-8U_u}7b@UCxpKUAqE7xTemdIrf$Dy~w+uDOqKm8I14rRa) z8p4r`pu4UVIjIL=HAEwJ=4=GeS?GnXY_p$nXlFNNioT?ue$0=0uV_IYsc|}b(Or{) z1KACjyyg+yx+Vz*-jKU)dbvP!jQYhkkEUm!owAyeMBX^>s#PoWU)@3j+7F2ZEYxB@ zS-DC3BR-_IvJ|J!oJM_XH~yc0e+P;2Q?Y*SVrLudk+FcOwE>^++>2w`wTxwaRP0w= zQiaCmek3I)A}pMCo~4!jV~jh{*}<4f1r%zMWfUJ5*lme+%QvF0}UylkWz_cU70NI6ORpuATw-dJ`8b z+aKz~2xH7Hp*Q7IuJ_S5cJ>Z2m@&Xt??d|ZAUFR4`iukkboVY))z)H4VzL_3tQj*9 zPN(T*9iU87CL~8NkD$Mm_MP+`V7Go^+t{wGmhS1OM_)@bKHKpHrZ3xoi0Ej>p0a94 zvmriW14lYgM0W=+c{71> zC~_?R9X7GqCiF5#@*c6Guaz-9ZT0|Bb>#qNQFH{L-p_^s#^;@Evrpm=FUFXRKeRa~@d0*a8+#cu^fAutReH)CN99y5T1d|(%1$%V zG7C{>io(5ju6I6^cZ0O1=@Bi_KohDioJMY4JNze2SF3xpJ~eshCvM#xos1uu_c5Ok zKPhujI`VgrHZ%8pKC1Jxk$Ln0_GXkLe$^eAH7OVY%6`gp`XQF_u| z47zeE(!7s0Ygk7gL0)Tga$-muYh^yN?O+b3%v*jkYR*Wam=Aj+3#5O!QH%9p*vi zlNZrN-+L;%f-z(Wmdu^XyhD2n)~xeW54gQmoWh)${%q3JX;`poF`{C^Rs1P9jnbGt z7nHru_Ez-3F%&Ge6LpxgB zyz!=;b=ovdWBF0}ZX0%=ET%n}i}lN=jxLb79IWiW2NmU&*ngrB(-=36nJ`q9gh>QZ zstG}!5O_1tH1^_?Lpiwh?nf~_o{St*UMu5kxrDQM)wnbegZuAVgDocHNInpz;kW)~EOFct~j!z`s@>&-Klg7HWz7~5w z+l{i)YSN(&jjf%Ch?~R&ZR~52w!vgTk^ZG_`afI4Be$A;A>lK@^-mh^oeBG}4G3 zICe?}mJSB3d%xU|bJ>-cGjlpZ_5H}s%R}mgN?46UxN!6sa*K*l(bR^Zpb&&bgrW0N zH8M`_M;?={v*&YQ^a?;{zZo@UrA#185l@*23-U)}X$H1^wjH6#3u(xLw23w`%>rF(45Mu|dwKbyow*DGjzb_fsbikvADADXl?mV=H`uq7cnsU2iv{vZw@Sj-EtT zUKZ(HiV@0ETU!?j%P*ppPI~HOCLVq|223sZ)qnpd!f7Z=$iuUz(@U)OirG} zar(n_2QHKJ{dCC1xfw`3BU=#^<6=nx zPGsaEAZ01;+O$>$M(Ug{(y*qY4xev50JGJGr0GnMs1I&~Ex3&^!7yRRC;O0{UW93L zsHXpW#58*wQS@C^#bk<<0~{tsV9C>zu9K8*&7^NaD(hhJdX`c2sKz3<`9^@|Yi=)(46 z+2{&L#^Sk&2vLC%X==CQLRupbb>UFwohC<6nM`3nk@PYR)fgcScEWu6hC1e>P1Oj z8d8rRMR92r+o?s%pbhbHk<^TaP&5qb9Nh$ynyVA!%W%p;F!b1BOH7OD8 zH61v#|0w?TcW>hEyEb9N>J*siRR8(yz1Vo`U6`2|3UBhpGDQ4K3dx(3xNxBq`Gvw8 z6L}MZV1FORB(wqT?Mza;k&%~?vfOPe);KElMjI5XD4PT|xc9cTSd<(}8hn6;kwDB{ zvj)LrwsuyYnSp;d-h1;iw0HDk`n;LW$w_I99|gl4Q#bY;I)gKr6-Z2oQ}_tTgOicA za{S4UevFWmoAJ|s{A+C9unuchtwc2aq`aCF6&{33bxkjAsLw*dux3a{ZcKC9^S`**Ov*T0|)mUp#LpG@|=a#>p)DNJPm8sEWnJQ zA(V94u<(gL#UDL=2bQGB1gu8$l8SuVs*}(&$jeDfNA86@+SCgS>O(0rvC!LkY4b91 z^mr=r$lv0U8t8q!(a>0jA_hAHBVMXppsxV%iT$uK*=R?3Q33YsKaAq)dX(0;kT*VvkEX5lHe;Z<4Ciu)Ct@)*Dj025xtO_Z z31%!_tZciUw%FWVhyVQNzlS+wHh%cCzsG&IuEFvJvygp03#U(?!Me?NA;{q43=SDw zlkU3@r6RYc8_5ar2o2P{gd1od?Y+o8bpr2weiA8jmLW3KSF;^TX5{`5<+cK;>3O(q z^JXUgvM&*6yXPssi>(>z>A~)8ThV9gp{;JlxwDk_!U~+vEXU-jlMxnXfS1LJ68fwI zdw0PQ;;&YWWoPFpz3&;(BOxvvfee1F7Rr205mMDEwi4u(RiLK6S*?r-p%2v2r>31a zfo)%AV9AQ*F!)On#-!EKN1ZOip_6$?iVH)SVFc&W7`p^d#UK6X`?%|#Td`(NJW_Xl zh@2KP7Hql?F@Y2x>T%1ZdYn6c0vTEPD55PYuWV#Y5rL3!dNfAByVh2njWcE4 z@J(Kdlt^0hz8+ZZ4hH1f>#-^!R&jw9txa{_l52E;~F4k@3efo{eW$I#w4fMsiL#t|m}%NDP|x-|=xPZh9}4<(sr z@Xkj&vF^cF@%k&zV$GP6I_NBjN>jxeT5n6m_70oq|* z3IuHds|jrt1$ckk4s`oOW9pPR<>O@X$3fe2>ezAY`fL|A-u)1sd-Zi}zIip)uUUk_ zJ`>`jsDt_;WSlyQk9M5Gl2xl{$7Ld4CawnqY zFK1V^!N^3aD3yMxuoQLl`QhQ@VTb{(7pYsv_A<7=fKzGNum?n-y?Y2%rR=-9+8G-% z4zYGJ)+xuf506lyybzKQil9K5IQI0WgfaH?YXE(~V0R1SL;A?`xhgLyW(*Y=8cv%O zPCsczJLBiwd%i>g^(o_go|;JP?CoUy$XL2_5HV3v2xng5U_95=)`$$ojoE2gs46SM zh2j#N%B{!dyB@;4(FsE$Pv2CMc@m#|x{Y!B0z^cGI&FpvWPlDdRh2TQ+J;VMu;Ixw z)C9Yh|A-u0(B9gJ!t6}yY6fkrf>zULO$SWvj&QXb)OPz>}i6oCDCU;=CMRd z61NZIllOKZFQ*K17S4yS%Dp*!`UkXC;@|)KSEzHu;IIDvA8`L|H{mA6Lh-bpcG|lT z%Cf8s^|G7M)!Bjz=g;9>dKU6as!&|nMEOf#T%kS0P&{NTUYK(hr&3R%h_QV^g(1xS zI7nkpFU}n~ii*l2^bPf>xIIsJ&A8StjIjaZors8NtX#DQYiWyCEt!eEAAOA5Uj7Tb z{Km7`ynY3iESLscPXqQHJcSF@M$DQySsf%H6Zm#x2lgI5gPNv3B*#;yN$>63m?N3N zyuJiy&z_*3)=>^GV#@Ss2n-PaOWbLTdz$N!o_3ykk%dCq&x?&M@b@>+pEA$Z$zys8 z3ewYY^6(MV_nK(05)l!kJ@gY$qIpdiTk6VnG@Un5zcSCC#fjr5QA(XJqwOB{31S=* z3P0N8>XI^?K5|0kyM@d>&9n#Y?LD*)7tzw*$6PiMLH;^;*-U7mk36#DOH?qIFDaKj z^(s(S+m8G1zfVmVsx=WpixDHL&K|{QhtI(pF-_&?VqP?Cl_ap34d%{Ec=Ol4L{*Oi zvlq^G=4aA~JVao8Utg#4-7}1r3YhPfG2fLvVI#xI7azuHl+S}Z50LJi=<998Df+U4 z!aBw=g-D!{LYWDGxuXR~zWAInl8dwpw87mrx^ydQO7l_O*v@b?9+M&s(2evX`&=f{ z&oDNlk1MQg#H?A<5k&rKIV0(5x6;N8;$lTPjvP3Qg3@xzXeH{|PH0#tW0zq10P1sn z9*$=e;L?B>YMFx5(6veeE|kB!`*E-{N*lthz7+1cV|qK!>ihH?;!kaW(?7xCWjw&Hj1eXjPAkp7f>3kYH?Z|Q2l{(WEI zt!=weN`V=@e+7`$3}e7~^03%Gg)l4=G;zz&v6;71Z!$hOw%S=;N1T~b`z?(%p zSvbrD=fr{S`0d;8p}4Bk$s4VVR!-<(M%L*g`1LQ|!kN56ahrG@xuzC3+fQ1bP2Mj*4-c%+PgnteOs=YXU=q%2%tgMKPkjb8sR0b7aAgg{* z?e%1*p+0^*6JH!Fg4q#(K}J^5$C~p`oQq$ICwM*BRW6w&JPA_9|k&GF-Yel`aay- zjOO-!40uPA=G0rg4QCJS$3E)Wz>w_sq&>_1D&VOD0$NLEs^T!V9z24{3zuNY-05mU zXN=*r4x^jJtKv z1@g;+p4KLO@&5bR{n-v&loe>|o=dvD(A#6h`I=rN&zgtZHm*faRTcUxTS3wi#D+3h zt+;?+{^H;9$*wO@-EBocpg;62**LU&D^8ZSuooW;bvNMP{?BRCe}mJxr5Ir79Oh?6 z>4~k_wf_KG$Z!X7YQNNgOFedY8{$+u!wmfPZrzIWXEW5RsOI4T6r}CPk)sE2X^66~ z&>TS-$GC6j5hg3Ga>dCXHQlWK!l6S#Oaf8nHEoa#I7ZNvcL6(g?neOwY%d0jdRsG2 zo;-{b=W<~*4pP1tSRUMm&$e$zR#qNbS~~FMuHE=-$4-uSA1W?3JL$?snE-dUHRH&M zG`zp}FpP2WSa&p}8(cu*eA11hlKrb}aR%P!<-F27-?ZZQ*fC`2}o$>%Z~4PrgL6*+4tw z17l}B{^vh`g_e$C)(Iv_LujM?5#T4Q>HHX62l2N7!2$j%Ao7t{fY^3G{3P7-?346I z9XR>vKjCERY1A7RNomE-eW+wtC>1Gw1QAy++sEsG7em~i~do%o+$zKQ(uhEZN<#HT$PZb3~=Et;*- z*tqc?O!w0Ld#TM zbR56<^?%~Sed%cEwbKv!qrJHnho~o2jTu+x8D9O$}2Bv^x;??eN9ZhQnBZ39(wIs)~L}=lO_4(Lm>za@qx|8^1R|h z?T{5xZ~pcRoGu^_t|#rJC=>w>N*rMdD;E> zK5V0{NY5=o&#*(Zk0A8yM{`{zK7MBpzTB6C<%`zf<~8dzate3MjVQ>^!>@N{ptNz2 z{wok6j0McpNjZ>1+L5s*Pnm%&&pnA*lVgLkMWz|zK^U;O3DO5`moBvbiDQJUt#~z z3=FG#0@AkwovrN*!$;u7IHY$_RtU<9cpt1-wHSW#)PgaVyb1EFH-CfDvL;ni#YqE~ za6T;+J5Od|&|*hNeJS?rJ&OHjo6u0-fQ+N>`vGCggzp` zj;f*(Np}eTu96b=r(k&~mD(q6_%!t+#OW=t;C0wH!w) z$@q|Z^YQx!ao~6@bbg@-VVn|TqF>qeJM27`38USQW9UZaxs!PFm%ql*a~bH=8Q`NE zMtSO196NCsbwgf^+ryy?jAq7%Lnj{U~6L^_$aw#)@mkTC<}v#lR}g0~pAw{&Z{x}uZB_hu6s zFV-T0_FS#zWjZpFLG!TyqMSf2kDsuwV^aG z6(4=L3mu)bO9azE3(}5%%-s2H>`A|fJ}b-79_Q0{9r|KBL85KolbNLpbxr zhxmN&SV_xfzP~TznmmI^PFTX%R!!WgzktXxtn{T@fE7z<* z&e@aL@$Oa>mp8&lMOD1AON`f9x1)slkxSn<>eR zX(iv)(_ZQ5OML^#XXLTR=c(bmw4Pd_`2wCrj% z7ZKq{-%)l7yBVLJF1dsuQD(-^rw<*%J3oI5Uo!t3)CD6ra0unOhw;vbyU}gYVOY=H zAt;FU*9Kn>*XGaIo-t;i*kpgb%IRdD$mZn-Z^oZK!3H!mmg4AvkI~oHu5eOZaWHza zb)u>?5C6XH07@zEK89czyav#GVHfs%{vk?QhA76~=x#2?&QEufUI*#B``{I1P>a#_ zy!&%?@S0KDqh_=*r)p+g&$Wd z`hR|fy+;cWKV>%Vzke-!M+*s%K)yP}C&VIUN(vn%1AcW`jK%;iRu{rQIuRjBQ7{eL zP+3yS^?}4@R5{?}_fX*Qbh=WF6HvS*6W8tm$;n|m7#T&1^h^UBQoH~6JL&Jurp;-jM2hO=8I$R_HO;$k?)X)wq{nDe>@QkWll zo#@C2%$PA9ArVm?0!gzz(u;x%7jQnS9BXd69WTB1Dqes28Qi~aE;7z%BCE8G{>lbD z6{o1G1xd43;_l5Gke7B8{=uPmdduqw(EBrJDPmV<#yG?1>!H(2nTzkd{3>33{W;ve zBo?Km`6z4}LS*7(-1nX5@yv5CASFHq8#dm8=g7a8Uw#3vzVZ?#^P1q5GA$V^mM+EN zQ)w!p7PB(Y-G@phACo2}VgB4{qhocEWE^QUPzv9T3|4_!D@EvhpD}8}gHLxIM!=+ncyi0jl$V$A=Fm)VM%oL3A(})27WpRBRmcTlIpOlIc;o zrVPqtd{S~Urc9g8z&+4STbbRFV5f}6@xWOuTC*81zy1bZd*!=$>Y=+ZeR4b#6=jOy ztGBWKEZqO-7Sj9;e2+S@a=}6z{CpR>I_ZFIq@noHPntvEsgL`oC)9XAoO%~AvZl6xg|A-j){PEs1?0kEzSz$0qH&|I$BNU zgoK7Gu8beM@Jc38QWBGCt0ybEYuuBP?4LN~GSIMwOh;nMJWNYYLM-Evs*E$#J13us zo4iJAvC;O0Md6NzpTvu=zJgaCyB%>622@_^RCM=|r&mnKB?X(ibTuA*{#DBIb9mt9 z#khE}9(`(-!45y-vvk!h*z)oZ@G|N2!qZQ}z^)E{v7gS{8&jtyVfCV9I@^mVZ#F91 zBEjwX^c*BjU5L4J7l5${WlQ$+9Kpr98dO#DQ+g36PoBrZxpOgdS`x?3)Tg%_xhGR` zskt4uKlNR_K;3%d^{0@K7>83#WG*p4_4V;WOiUzgz$Ao)gflSzs|q)G9JnUuh! zLiCh@qQ!)+h6>u`{Wy9$3&{(Y;_)B7fyJvAAc*(%3R4S;FXSQZ)EWAfwe%}*;JdH9 zh?ie{22&F!QP){S?e&|0TW?uUg|w;_WJ(q!*mVITqqgcML<9sNJ~Bvc2~b&5g#Cw8 zF?q&fv^4dhr%eJd0g+SknTgl5>C-WZyoO%AOs=>*3T$7tTu6vZz|<)-5y&7#0%EO< z7eb@ru<5p&kezV~|M4%sLf(a9)X*mM4GybvGR=}iy4|#V8tK-6(nf8P!(r(|R@MbX zO`3)I^A|Gsb1=cOs1@FG=FG;X+wVf^>3oC)Md9_Q9zd}7FfLTosR`~e_$e5Fg{cyK zK!58cCJ_<1^Regf!Yfd2CTlAw)_@6{lZJi=RWnweaJ~W zskTg!DHlECI;(920kJc1_ajeH?-|Q{=RrgUc&lJnsO3eUIeEqceCN@p@Ctp`8!tVE z^>gB}ZTntO++p`-lC+QzQ!a^W`Ei$nSs6@T&%4| zb!9oS&!(f7_Gi`V6{NG+ssVM_)hD}-Vc? z0bx-|OeXm~%%FH6wjnqy3=XxVgGsX4NdHxY_uqXVc}4YjPa6n%MSE8Fk{{_Jo+;A{pDvdGiU^*MFnUc0^(C<;E|_a#xu`8gShZu z+;+#^v=dJ-Hr#^mzVIB9V&xDza>y$P5%H54H{Of;?p}e0vP|s!{4-<}HK5NVTW=`p z3rgPru%%%m3QnJZnP1n@Ry|AG^)h3-`Lkz0+rCJzR@lABc=1{K$2i*N+cw><^yW3{ z#uE?Tt>jP+z9}v)LPka&W-nTXXJ31bxdQ7v@i3wz!r6`|!t9KUjb+R~ZTbuZg@=!k zE)7I`w47@CG7unXU_}*(TsaI?d9y+96wN`#l%3c*1Ggzsgmuhfi?_L}^ehvw<*5a{OeuiZW z7b23p(B9y%;M|GhIG>h*yB>ZD&%O2vZNnBk^~6KuyR6WodeQc!%$$X#YnO@fqCIsz zxOehbVh1~UQ;~y+$uaPY4}x)M1eL{QIP}E<+AG>H1|Kao<*Gdad8;RaHY|m4dqi}s z%4IJH;>fstGVSVA+6EuyA)*TksPkT-k=RJRZ>}iBJHL7tr_W}qJ*7MQ8MhHNMI*Zj z7mLzxg7@O)--4&MyhdKq-hTHP%$gbxok~EZRSWaYAgo+}C%((r@D<5-AASf{=4hvn zoxl+JA|t*GJVe}`N9z&GobHJiwqVO^TiD;D)Sp#2P99X(G^-Nw+P~i31B*NqoqrSU z#PcloUEF)eEf{ETWZW+g=RH8sbT&WQrI$AhQ88G#YNdLhTU?N*$q;>AZE-F;(P7r| zwe%Ho(9(!nN!roXgZV3OrmguNUZlL=wt7C^`EVzC=%3^PV^sxtb~GJJmTyqG^J}j> zi^p$WiptV*oXon2Aw>i|Lg~A|!<_Re%J0SEJiPfoZ=oQ+216s`7BPKo5MAr1P0eMT z(XR5K@6s-9dG;x+VI1QxZ?e#L&t14g<-7MhcrRwoPQmSW-Zq-=KEZr9DpXd6Tam_i zJcF|S=$4o8%&V_q3uXP02kt?TuMg##_Q!>|5ShG*jE+#U7slLUl)aw7I}TD%fc-TcOo|@2glAdpjz6EO_{3$^rG^d#^l&2i8wVe$H7O z&#Zve;YEKrfc~BV*c{Ze+waEnQtpMv(N&#KIj*Bl2%l~0Et#dOH!-LBKJj@4&pq)3 z0w@QE_U>a&HL>2f9z?oHDvenfPh7&O{kyU4^Bo9DoQv1~Y#Cf^o;=597gmZbl4~OIcYV_DW6clVsHlD09jiuwdO9JonraSUxpIEfF0B zmh+3AjQgJZ5q|dPKg6mOf9HWHLQ)%ygPn+s&|}zSW|CNj&-R={@YFe2eaD@c6dQ)& z!ERMrML7(sW-KM+e)e~G_>l)OnZckoWhi9uQNhmi>9J^NC_;I;EOjrZ601cz6&FnQWmDPf1BZRmM@|GkC5ntwRR|F>!VhoDbzk!7nli zcRc-l^5&0m)65ViB_3l_BO%?r0t@y9ps6Be?3Zya?m>Lg@Dj# z)G@g!uVzwUh(Sm1AetH)D3@~THDHJi!{m8W8PkSfu-lGJcWh+SHjH@L(cM3w`g1Ut zOPIO_w=>v_4)%t(pB*!1#q$;&U|Wxqhk}=ep#uK&_{HCp(vV45Lq%?AqCi?_KNC@txBe`>^wqskAU#=@!AlG`a9O)&Exas~E)Dt{K z0Z~jRh@R?)JE#Li>gi4hlb$995akTg`+P%Sr(x->ErqwS2^Xu<(P8Yz!VQ}dD4QIN zA*LsY0f@lD6)W-6zxyLR^w4I6vG`b#BA(OXk@>kzON@ApzGv?32tt+P^SO`rBV=yMK zR_XPg)R0bHA5UOgl2kHq4Kmo!(%g$9M=sEgwjyEqIxJef3V}X0m6&REMFs<~={wKh z$N%S_v1a)Kw+@biP}c{Mi79yckAH$KufK{paoS-{S{uV~knY*%bC5h~ArfNez~C2( zwQFbNT*fYrtD8D1T#|<()Qylt+;z`)Fe8y!DfKCM+G;%g?2EW-K_UVjgQ``#E*J}z z--b0SS0F+**wlw%!-Fs4g-0JmqOT+)Huai~qpy=ePBF^Ms?pIli0F`Hv|Q@Ju+0Jg zUW5pes(ZYl^g9)p=RvXygis~|XEy~EmJPfTZ^=NGGMqtbo+;#uMc<$L}v3l)t z#3#n!u_qqIb5Aocdg6Y}of@YmOozIg@x>?aVb2$PF>l4qc=p++v2=#E=b%PU+CCeD z+q1cdh@6jmnBc`5G`*9+a>mSQa@NKKFoAU1|6lh013-@AS_6f@4YN7tBx#j%&RIwT zNeDzvCfHyb0|sMb1IE7hI{RK{V{C&H86<=dLII(i!)kNRIp;LHlV|?#RL|_ru2zBI z``-VqK51sByDOYJb?Sub>S`+t783%!tgz5|J$(8C4(C*1^n_8kb<=VL_R0bXBzf-?!$DAI`$kU0{rBIE|9b8r%$$^@Y^555)M*|OWAW%S z|ACFG=A!7_F6@8%Px$j6e~%9ir=W~_R>qcBpdSFcKt#W$%@_wS29ep>LdVQb_ zX`b`9Kx={nUB)g3_&t0n28Hv+D+{7uu?OQB_ol)r+RXH^7dBsnIYpP|waS zg;_;K8ieMR9Pjd2Y=AtD#r|J?{dIWk$!{Ql!KTi}k{zLlicG@hP3tf!PTS04^28~4 z@PYdZ&SGSXhkW$|eNIjz=@cw1sNV$~<9_F}QF20;C++uJ(sE z+x<~e(~a&|YX7ZxrTT_yYeLLU6TmSqse*X782%dc(Zrpeal5}=>SbA}4 z-!6nj`Os$D)#HgpWhKy4=lF)oq(%w6C|@%cuEre?J%HP9T!IN4zw=h#hsMLC=umM_!uX>*s~mx_lfudID$4jF`D(9hNVc!nT`X zvdBa&!M;FPJnnh?8T{z`Phn=FYYP*1Fr`lrCNAE97yj!fc=+a}2ouuOHKEE6)925} z15Z4H#4tY;9NL4e|NJ{%{L5c)Dm51!j9ToJxuEbEd}Z@Woc(w!+Io!)j{{Iz-;1WE z4&1bQf%285Z@h#25X0aqJTd{+9&b#TJONR3+->ZaPJN$h-*8A#(21X_vDd-vr zL&_WxP2shj*bky8uPSwQ3TWYI?{DHbu3YA2$e5SRE+LEriZH0Sjg= zQctc7F0UWnLdt??09M`n5MKDv&vC>2>CQc^-62Q5n6h969{IsfsB^bqbdW2iVp&!r z(#{-%XG8$zZCWHOV{=Gscoc5fxDh>7rRc6IqfW3h?g&Qm#8tR%%{oK{d8n{!3gZo< z(SX)AS-o{J+?qWCTy43S7o|1zD5`73q9wC1CQ;i@U2e48%zg}{Z=8~u3j+(c^IS+^ zfbzA+#|E)JZv=+~(O&z&VCrFz&a&B_kmzW5(FQjb6{4u14CVD5h)GB!-Lmhx7W`jK zg*)UbW7)O_?BDh(-q^bj0n2W{-CuhMvnNKtPbmg+z{5aU)nmubqgb)*X574%^zxGG z1;%p8$w{CJy$Hhci+JVmL|?BSLH>5N^}(ss0=)U*F+@z6f=zedjxc|JSh{guR=_lHKSjrwDgs+>-;5_7y&Kgh58!V<`YE!D z8z8;x(gGK;=L)zuy$teT+NfAW`pN3B zM6xI?nE~jlL~h{)*dk&uf8|2>Ni}rBW9QDtB+6cINf8XKEmDYjif`tUWw`13RkZ7* z*V7C07A%ClvkCgzCTE#f;-*yKYKx@h$9d>FD-&go4*DO5j^TzCD={%4K)dD_fax>l zQ0@|umstXf+)W~aVj{3)(^@2tmp#iJ>ZN9~dfG%iD(XsNv)H6jaCAvbCF6!MRhu4B zp-E_JU`%D#o`M)cxjGnfhKKSd`X+q%_N(~GpZ6ho?p^rSBVWdv8Ka$TzqnT~3m*F!7pFJ3mVLjnc|x zi$J~YB#0@fM07qn_5AdilXUp@9meeW%dzgJTU4NFH<{r_r$8%9$oHnh7mGO@bkn9! z1AVkXrANbQqT)Px{4oCZkH6rJH($rr*Wbj4AAAUtfuoPkl(5XjHU|eV&73mU8b60n zObC5`)RQ4tzkVf_ESwDSkEOmLDLtr+{UaGz&Bn&%^JpwD3f(RV&A{6kxY$wI*@;Bi z+kngw{?OG@lMnF$9JaBU7D0isGR8?a`ye#HkDX7WY&EkPq%bHP-+ulD zL{Z0o^4?qc=U@MZ-~H|{$SSC0U^>ik(gk4roMl+GatRVb-HnJVo$$d65lItp^Bwmf zDI!QsLa4xvJh%6?qp_(81+}f%zH1-edE-sI^X69UIB=3<)f+|M`qUjVVW_m7`smOVCO?b^;s$Zzkx|*A@ZwLAPw}0U6 z*WbdMZ*0ZsV`tUN7-T$Oq!>RB6#s0n8jN%jz2M{T+>CbsQj>$@un)});9@At^7!M> zwqFWEWkG=kO7`GwZK%Z2L+4Oc)rzI7ugAPqs}K+r48~LNmdSqw`Ab7FZX(v+atmT2 zLI)kW0SE`FGZ;WkUc3~GmoGyohk>Szc+F0kX)Z&4PByxGZK$qlL4IBdVq*f3o^=9e zGqcbyB`Q719`FufkQ(G1BdkAk6lTqyhlS%}*bsR%lz$k&dHP1GRko7O!RRL{c_!8@ zU4$4P8+~LGouCrD{D(i_o!8#PTW`FJZQFLDl=XUWjL0M9a#wcJtZ7J~^H?g6O;Hb4 z8UB`>+hqy zC&HT@BDqvm%2rrmqw`-=(~Pi)vDCqVZha!xF9CH)lY{|zPo%;y{Q1)c#^BIed6qVO_PkkGwPGeB8Nm;x z;V;i1Oq;hH4?lP}e)o&-V?Aw`=#+Qgc^hwR{YY&F?$G%ldDA3#w65S`K?}Gxb6i+RT0E?$;yb+&Q$g$+jDs4wG-$*Gu*mQf07j zV-jO=^KCaXFs7cT{eeSXU=RR*UuQ7S@6eEN%$qX@lRhj{Jp|DsKL z6YtSyIDR@6W}{7Qo1*%L&i(^i)=($ZBB!VfZ4AuvE9fUBj>WvmiPUY*hwL48#72iR zFqwe~6UL&s$AM0J6vD$I5g_|BTKZ9(lYw4i58m9q8?V2$6>q=w7T(*v7kx%E`4OVp zq8_#4w+?!x6HL@^5{St);i|Xvv)zc2f*cf<)xZ%HiOA?Mb$viL2y6`ETe~=(^oSdk zAQWo65;%XzpMi5u3{NAF1!%(SI{Ge$j~_)-S3j1l+rnUnc0%l`C$%*z*Rt@N{XBE& zDy&XJB3#{y9I~(VuXhaC#+~MKrL%k9Oxj(8_Z`)-8S9E|To9+<3Am@gkA2?S2 zrZ?@EFA`$H;X^wlZ=Wu!sB(K0PNYx{v`Hg58eX1Yq;o^y8-R7U-;1U5=b+^LF}(NM zzwmPgya$e)MVES+m-( z_Wty@S<%+%U4kWOlVs=#@Vjo!a-Mpb~07hk$mnw+}w_ zRl}o#)eA9In?&!()&@a7sH&@%c`IdibnMrps3<2*l9D{|G%-125?+@ZS}oO8cz^3Q z{PUl$Dj)Opx3(cOD;-v|o>?SHygIbCG@>{^8$JCd_0*g<^|)|YmLHHqd zR1((SuoM#~i0;&Se-Kh8siFQ@wrUCe_Jzu45?Ro=Mh^OhBWdzo%N%F4#yUVKschHuh0+__^n zs*0<4oxZ*Fjh32l&+G;zM>h29+i+z6F8t-MFXJ`ZkvHD{0Q)~V3Y(#i_DIHtBxPef zg5c{D0D1Ah)^=|sCM06nlEsJ!)ok`425TRL=)C~-g0{7D$DpO59A!+dHHDqyMq+xHx=`Ox%ltjqvF9M;v$s_2ylw9> z^qb6x4WrMd$Y5EbulNyDK6-Q_Lg+8nmdOi8@|1?sMwrGA?OXed)Cw0az9uw_WCUaJ zvg`2Botu$)`V+kQ+TZZ|mtMlQkH!ADCjT{VNxbQ4CsF(>tfP4YV9dfK-r|oW|e|H-7 z^zRbK%~p?{3(sZb$v=&+R|8g{^Yz4-2?^>k4NT#6kP~E#gXbI+ z7NdHMhH#QRm`Z+`VI%DYrDZ6uZG_Ev(bYngMTAW@@NTR_VR<$5CarZ>!XSraf``Hz z7@{0^8#TRF4LjcEgvC12ZID9YufD4hc{!=*>N99m zsLNCqtjjMj8j8NlluDk2*PytdPM#Uy#%d9CVK5-BYwdJU2Xhit7O;Cbp}ZQncXy=V?^2Fi3SW1D|*y*f`MRYa2R@9JH#h-*5D4NH1y)!hug64 z*lGAqTaKk0ZzBEcv1H*)g!szi`I=0*Q>Q^Fc0YDu_&M!ZeM=$g!;z@-qH!Zp448+~ zB}ZNeAWvo5Uf7ZF>pj z*0#{eh(lOVtV*qAveW1^qex!P)K3GW&Rh(uxgU)+CF;@LKCAY)f`Z+I+T2uhcQzoq zEF14oc3*$(Rh&C>iWj}{!N(`is}j4+^iaAl2Q_)<{5=3#HhWJye*5Dep|IV8*_&>` z8V0EAuUm=f6O*8$10^+>;g0~?fZ24mJQ@qoSX_Y%XDg8$pM+`2;ZAC(s~wC)yuz_+ z-Hq6?=>|Oa=oa`G8}Z(*{peT%eqUHZqA1_44gd)AT9Q6iItX{PeYvxUd&d(E#^_2(=4McFHgnI(XXNJXW zL`h*8>S{E*G-P}!&|zbMSfh(HJz4_NUXi-F5Gb219;}C$FVTcf@2@N zg39`CSmHaw@;M|r_ZS91}L?S2Cv98JTjyPm?c-+BzA=uo;OUAQMN?y4=% zLupaInk*e|u`UnsetZxT8cn^>4wIe^o_0Z_NxlXeJ4mH`5AQdjc~VR`x@)U&wQ&=iVAP~Jv`N=fx39*s`P1Ofu`mGIym8aVVfOqPIB@tF>RbAdTi$_1 zORqz8klcjQ_-v!!6E|iuman@Bcij7BJh*u^DvrO0{JdP~9pn_t42_RMaBLD*QYWlq zee15LJ=y#veDfPm;NhE>AyleUl~Ow0JH#F0S9pbC>fDw1&G(BT`gcB{v3g{iv*#YKgvZtoGd!R@O2lG1!J zX8J5Fx#>pS`@rq^?l-@T+UiQ|`}iPCW~L#Lqo$6@XlbzX3~@uSX2& zRerI6&2W#k2KPDUNB+2eD@a1o3z}|l%jSeDl(`&umHve{KPI!=4B*1ZbPj{JKVoMr!b6Wf ziH9F~1e?|^P)~OJ`44|WvFt^z1Vl;^`C?xNdO~lN$tvxvRwc)tT=VjV2V)kQaM2Wt z4ZRHftQHxe!@%IYr=t!9dFkltH>uGgIb8Nh{pLsB5G*lOb8{1n^s@&v2%}b6oiB6l zo&$o!9CX@@hJM&R{Sg=*Ln5VIcTN*_^1sJM9qR>u`cz7D$YqwPQ++dn->x8M);ue4?b{C6for=e$03j6x5-txE>qUF2gMwmouU^qo<`A zy*5vTj~c6TXnf%LPR7WE6=-YiRCN!nQ;tNY%&01>LU~CQEV6&2x5hl+)YtCE+GIOU9{La|HEoQm0}v4GPajo}e#VnS>u`c&&(cjqr&Ke zxf{3OI{JodS1rS&G0~7mur+!F{7ZQqSR8dG;*+_|ji6Y34lon49SL$xHDT zAX08za-6uhr;@v6zdV>r$@RtX4)&u>nhY;L9nvT_Sw;1*`6goe)Y%BrNe*p-N2}LW zf_7ft*U5O)VdLr1|NTznYFtT)%QQ{5<;b;fLFC{LBTGH>iVsWr2^$oBorw|EpG)lrEWCR7Wjrm!Y5$Zb}UlAd>F47`}$ZXPDsugB%#7&sCIQ@GO7%c)~v@5GLAPDMDRiLW_lc$~ZP4EC`;yVsYdOKbiyJ2CY&(um0;tDC)9e7Go+IH|r#(8lUKlsWfddL~^A_=kdT<@t;GI@b#y@i*G-)8vD2Y z3~%l_fTHFO_M@hGHE-{#;DEQzfVJypAv!8lvr9uDCuGHi14)q)STc7WEVQI<{2T%^ z4cdpV{T2WHn?E40Rysu>ryuEGc@9jkPg$xAxhGY7)nJTp= ziZ+x{P+s}vPqE{J9cb;hYQmysV*8Psc?R`OZCG=|?O1!=by&J`H4U1)g-p^&LMQ0Z z`A3Dp$6tq*>QXpl!*@cfLHnzHGuWVF`^nFKjo-h-K}x4kyXJ&slY!h(i|yOq!cTwy z4-{2uoAh4|G;_#7MI*Xm!5kE4r=zyUwTZM9&GOcvnr<3~i5MLdM1e6f?+Ts836DP-cV$o>y5g=^@uBR#K4-5HuRO1m4b z(gye;JSqYeB_*)+YDPgy(E87lipU3k0sdG^#ctPkW7o$gQPbRwIrHa3Hy9~i1;}Ze zHv{)JB*nzDO{;P9%^Psnowq@+??+9o_L3WodfL*EVAj`*1_tW=4BmvEL75t8<`p10 zgv&A^<)fZv(6JAsjcT2fr!OL766ny_(O6L?Pwnx7CyFoRAuTPRxwHvG58qHEjUI*M z*gz!3MqtIN)mX#6S+jNnKWi|4!Wb&EAVkHEg|WL6-SQFxYJD4hq?~gXkXul~vNS2x zdWxpPZa`jUCVuvV-{7NNhuFqpV?x0Si?JK8y!1Ex@+W`bm=q=C;t@@oE*O(1O-Dmc zCQ8yu)Z@`ohCEu;L!O%Gs5tt1QIvKTo^%f9Z`i_Ma3$u>lQ&+6s#O&N83WoIDainJ zhf8QxEmwS&R2pskCjSH?U2-gsvB_O7SKr9pX1T)^8WBleg`&Q)P?BrcRHtcNhzbow zd~gs7GP6)oQKwXoKrj0%@gr~3R5hy!P5HGjL$6s^RhN#o&9N6Y2R%cFhHyO9xcpdib-h+~gX2s*Mh-gflGEQwCFRLNf zu3f7>>#tjb<%{McI$V29RFh?QxS!!~I>iBDF<5r}7TouxFJb1yM0`SLX~(;JP*>OF z5~?b5kXM+eCTo==hs^ zn(@Y)Z{z2``Ui^3>r`FZbvyLb`+D^z6;dIq-FX{BP4LJ_6`khdfn2#9hRPyyQ3r+vhr*9e zW=(mqy3rkyTa# z7av3_rcRoGr07WXLaZe#=E0x(&d31l#Dx+DoDo<&eWG)#0fMs7TUU&~|NUkB@zwWf z<2s!e+#pKIuE*hB@8Vm}`~Z1n?M@z%4xVpls>6lSI@~XN`P_FW?z;VM-1eo%@U@2? zLY!w03Uf2j$)s{uUQz4p42|-U;bPeU>qNh*3=9+)G^J!@!7DhLz5@LOj?rn9vG8y@ z&cX;6BoFQB=9@O*?7`hQd*TEddhM7#Z=vP~D2@pqJxQAntdxm>&`8|AX(eKH-8gvQ z6V&&RaE{YCOPA4(+A(Q#1lFxtq3q!5wVSbeT!7po?}-P<#A+oYcZkTiZKHm_TS z|9t!_(3uSQ_5b`1M^C4rivfpfuiOxeOF$@n;|pg_ptD2UA4{(F=#A*3ENDIt>6Z^{ z({>#e&7j>dmRomX>jQoLv24~924G|KsAq4x<5rxJzBzsj)g4C6T)4#Pqlt`bH7d9ms{Xppil-tJ zYH39$3u79j;n?eN(j`=CZ|EOAIvk~?*(%7Dy0rSJGj&0TrO#{bZ9;2voyynM(~YBN z&p_YXO?@giQkmAJEZLXm4(-MZ-}^RFa!Z^ZC7b2+yQROS^wQi6y!?xw;DenXqRrs) z=SYvIA7(9BhKAB&G!#^80V_FUX7HSnnu+MdBqWa?uTs1Gc4>v6uh)jO?0STSL|`lp z`G6g#PaZ+tR+>*g)hNB9(IjS8im6jzJpymcA~1GnPW_>pAWn_o|i0{gCj@x zqN1vHpxsu+pc1nU_zd!>zIt5W6Be1IBV6`U=?YJOwQq8ew-3fKex5&bJiNWEFzEF- zcJu_I=|_y2G}bvGV^uc~)6Sj4Pk;Cue6s%p47QQ{cKQs4ZoK%Lm+;>|{Uhn=8=y~W zHJSR++SbClsV(`D_#!bW9^>L@Oy$K~OzhD4VeHiDsIRX@k@%^!gDN)*X>IQs@Q?c% zYH;rKNz7lnQTc|;RxZQX@nfiCWL2U@nc}~!C?G@^Dvc<6S@T#pqk4w=Gv)jlHA%L4 z%dJ?m<~l51v=rf?VeqD$D|uuF#fB?E3Rcj1{F3nY!sw}!Fghs~=MU`0`IL0%q9-6h z_7IjrD0d>0qT4h-Pue5f%GhxD_$e$|JdbeMY(eRtnMbsvDT(lWQnI3R+tkO%#nhPnp& zp{{sWdX90cmku*$O+;Z~n%XO0{GrzN4jemk21a>(^l+H^kiKsRe)Qv?;b>~5_K5pM zu%WN13O{-78NB$ncc2%UQ|DzaA`WAd$D^~Z01cH5njYi;Xs&KVO=B~XCQm?QoV??J zEhMkxsTFM^jgrMU_QXl*fw*`ijZJip8#14L6dV|emg-{kcWH}Age)zmINFq9A(E zz?kYF{qT8fC8k=2<@9AHP8o#nE z{Vg~tyNC-wmd@{cI&}NjS?ZLUTCvi441BZ^B z$MJJ%xZ_K=B4KoxN-Y#P=p5IVmmuxP5#(m)p{>v95Qzcw@sAX;yj(f zg9mApo!181rV5n1wM|G%OTqE9Y?RkEIdtSkS%>7w;}IAtkK_%apFv|m_IVsRasrig-4qn(1qC_a$6-NU`Z=VW zO-EWm$>4_T9yA<5iC8j!CME|o`0s#fylAr@T-3wV?ZB>Wit}h9+6iKaXQQfoI83L*%vM#?E-m_Q-qFA z17gFeT;h^cGSxPI;-Z6)oZyEn^8VQI6v}fJGRU(&S$WC?nkkW=nj=ea zoqzr<&YnGwbEGSUyecfKM33;w*AH={M#Es}QE#tIPfNwo6Nj+x&{1^uNl>Ld=Im)R zpfEEX=@&AQn@>LX^`fAl2p2Bopt!6WCMschSw?GpIb|vxsX4_A;9AgJlaDmYSO)X- zFo07Nu?+fyBjd4t^5ME}G9OEpEyKKJ%W>P5TVU;`j!sEsf!bYRDbmw5STBM|uVzlP z30w7Z7-(<;0x@;Q6y)coA}cL}{d*P%4j;s+)NDGz?jgDl#*c}@gz!GR`N4KH1}3Ua z-6;f~!%y`ahZ`P6h& z)Hc90;_9oxz8%^O)}af*xbc(l$h~*qu2oBsy>kaX-o6)YCRvH?!>s;Ty=oC+B0aEU z$2Q7m3S}XKpG>5r=TUy791ERsc+GX?o!gVorF(yUAdTPLyabcXYdbi|N!?jJX( zyYj&pJ#G@dcKZ_SfAfDB5FW$nv^1Q_%s@KzQ6qyd$!F?sL|I9WlFR(!Vzle~P(^*3 zN&Qz)R)H=$k^u?HxaYx#P?ev8TSR$mR+C+1f-9QjXHuc~IaL8FWP%|K!@ z1Jj@X<{1WkI=uACZ*e|5OS?{gCt%DpEMBn!Rhh@I_x%sqM$tpm3#lb&YL$rr)?hK9 zq9~XC9_?3V9&Mc-b+r}LTbW4D$Wc$Lc=-8a_JW02x?m3Sj(>tY@vlxD!u$IVp_f7G ziW@c}TITzuUe<2!?W8R{OS^j-4b=HAe@@1gHns?h5w+#{IC1_pF0~7$#KrI zG#epxdPxB7)IEv{)lb4(J9G{s^j*!!pngmxA39rG(c0Lk_?4NROIuHG(?k46)~E72 zCD8dbu2{Q1ux!9REs?tkQI zEM2@5Q3E>G4R$L!8yb*#rU3tXc?a4U47sNMuNKC>E)?a_Pe?gUe$s!;EJI~uKSqt8 zfT*}or*4yW_#)8Pmp*C&axWalq2tGKigr`{$97r%@5!++YYGBAn{fQZKH5?GMCVeF z-`t_zJ{&^ZEt7f!y3%1rUK;&&_GM;qh1-jgG$=2Twxq7C1gS@lA~&ZHZF=YGYUy9k zV9Z={9hOZ>z>(KqK~BmA_E8p29j6>;6yTowZ$nt5=1t2s13hgO$P~YwW9!gS+WUGP z<}6r(sqsEyEj7Xde6e8l3Y27O2j9`(MMZ|(wq#OIedh&a*o%}0SV(1g-gQ$cE(%ytZ`q5de=fEd>apFumdQCF@!+@WB>gjCcI6X%>DnVUcJ9?XI zm{$D9BE}qgXSp1Oo`Hy%Fb?-T{T1AD<80&~`Um#y--|kf+$s{EL5I8V+6b$;O+7*@ z6R#I8h;NjO+`=m7c%>iLL4S<2Wbs;lLkD{3PuEd~(r7QM8d_no2P0wJ9DMoaHE7E| zfK0}V;y-_K;2icJKaay zo!Seploli}d%9Y1?(}iWu=oqq4dRoZ%Ro*$=u57~k};9wwI`;o+=#^ZNF3YwDo!0e zPP(Z3(z56i717SKR(6OkC>Z0&r~I50WU~Ly$=iw#AHs#4Qc@w(;t4PIVN*@1@*Pui zO4Lf)`ieYUpp0b}l&Tx0a>H}{s5p4r`kC&WvLz>uoX3YB9YTw2FC@=)3i+;f5bjSK zYUiEME<2(5GMWc7Je@DRW5#3V_|a%R`3{ORP9lEpeE3Dmj+9KL%3*}UF;Q1pL_hBW z|$Su(Kioxds{Mq*gut!(qq*z25AVL92<`>+1< zb?8DP@Yu7@VC|ZDkcHesCUson#xgd{9~LtZ~#@!+S5o^4dIQ|ij31IsDo0F zF5@$+81+p(2=ocZ_|X&8M6AXa2V$ZEksR;O7%dIQj-4BbcYBR8?tE~|4NFm9mWqRi z==Z1RV&8=Vl(*;+?&A+%tBrl3RV#3a4_27Xa!#VKyovt0)G!gYut?Uo$ZHZxY2I42|?x7>LjsN@}}l#Gbf`UhxSooD%$$}jHynir|m$##v`pWCagrpgO`4FJ`+~lcoTxDe1{ucX|8lmLro?8;-+Bb z4L4(SgcqB}ip3%1fSx`f3C8|ze7OHGQZuqqRbGwt8*apgn>Nvax?Tc7Cd$5p{RZgU zIxueDGR#=A6d@AVsfuh!Oh|yf*MbXFco}rKii-;H<*z)5Dmu}#SFFY4vGLk7?@TMN z&kyhqQ_p^2LAvrOq)0v`w!8nV4#tac2;eCy5-)lVB!2( z>g5W(%}qFY=m1g+i%?u!OXGY2`tDxb|K)FB&b;aH7kfpzW$&QI#s>WB)wk3{N@iw0 zWj_#)KJ^4fCxxheKU_8Is7{*etKbzW&gU%{8?DofyF17)o4yleH7(e}fMs-&_F@}P zvBa`@ZA2)>#|ERIqzDI2oFVOW&^TZkJM8-#X!Fa^J#!K-?R}iA-<}wl>^{u#aIsH z(vn8xP-fCHsg&7=(`U{{bac3KFFj;eHc!%Bug9+QEx7ldJF#w|Y`!Xdx>|@FsQbHk z?bX-mNK;laGSEX!b+7b|yg7MuU)~=ife;U)UAU{kT6^81@X6&JCWu`MYp}{YyZbba( zNl1)|f?y6Kk_rwA!-5IpQC?PzBPn$F zGP2OdU}5sqX-FCsuf5?lCK4qLo$4k+K~@SP!-8pGm!OCG=7yWs@i-9PRs;U|;vaDI ziTX3g~Xz=t{GYR1@LFz&6+X=!E~OJId00Ts<58{UM6|q znX2g_akA_%cXgq&vl|=kc?fZd+Nv0-%O!o%Hy#!XdRyBO95))v zZ@dNJ0p3c+YU>(M*VK-=OIKq2#AK)3X=Tgf;duoW2oLthl7-XI($scS-}SE-Gx$HqlMrwhd4<0*8m#ra}@&45?uAA)hoqp2GQ39YNIfnMK- zb!#_p4Ew2eYP1td$!N+xgkxxW@&pECp6Iq&FpB*g=I>3p@IV+nzbRA3BQw7oXQ{_z zqBbil2W!_~kNEfqmS95%_4eOhVsLQwEJ_*lnCV0`wRa*fw}dvt2U91>jhqmS9W@4Z zCB-;FXDEfny|l6!4?XfYmModAUizWgL75t~!qeu3B95;)3+H3y`sMP}rBJImV5$*j z_0CH3rwOUxB!o5G@V+JcRb9s4yRIy}N@>M~1rLRajb$bsO)&op;@h=x}+M zOww}1dgx)R&%s_gyQ5a#g)MXfWus^na1vz|)$sT6!ODg6(9>f=R!I}?z4JEqp9!gX z1sF4L1!hg1z~KF3y#MywC}2IE20L2Vr-eBM(0S@Gerz&>ULBfS8qwX=jm3*sB7)Ab zC);W>*>E-`2OBnSLcF}x!`U}MegTLM^v78`tLO9csY7xg3mPU;F9uWB_SDzo+{sf| zam$x*-Ilu$AMUU0mgH7ND2Q`rgD;&lb88EZq!u7<(R!?1L7U5~D&0U@VXnY+!tF18 z84qu7ESWI@{e4~7bND1K&~Yp(E>bInXV01mKQ9Nm8_ThK_kMh|_n><8#!eriy{DHx zMgjdO3novQ&N1tQ32cMOWTg$*O~>#&@`}r_e8cUy`Iei&W++)uH8bGXS^7}XZbsPX z>9}F#BE$qsE8QR~517uxz_z;E6Zbv%Wh6!ULtdBxV;3^=D&Q404L7h~DHa1VK2yV?8o_;Bkx`1q59^k4ER3*Im= z$j!?uMs7hN7A{?i&_F-7$&bG6C}d^i;_Qhd^mkI>9TtPl_dSg1Q$`_BOsYm0{T&-B z%gW&&KM5Odz7=sH&OL!Os?;yhA2#~iRn5Ivc>Nt%G;55LpLV+A5`lT)$k7wnw{M>c zeA39To<1YK`t@&O$)Y)s_o6A=!Qy<`zQ$boUs<_$8c|?`WaCd0^DE zg&0d8u%)aJqo+*8gsC%7SyX{Y>b#Z9W}~&S1~32ZRgUpg>W?1s(TpN_$yr$?rc9rw zY^u%JgYx=b=z)dceH^-GG|?uQ!xAus{&`G*+}5OBt zsUN1&H;jo0hQrXt_@J2nK`9arp2tWL(HbNkt95{;h9g_S}gow$j$_(hf51=;$CNm&fQQ zre@`%uCbjy-)PEo6nPO0KMy0$GH5+PU(KI>%bN8YD6_zlc?*z~6sF41V1v1*4YjRZ zxbZ&v?eUR=HyRYQn9A1QkKVR+OkA)6(-tjKdmk&Aq5tUZ8-Rr~rsD#A!_((e=xe5< zsHhZo-uocdT)z&%evIn%^*C|95c^IRp}nPE)~bC46>mkcP(J0Y z&H1@i?4KN@XIwyfb|!=1`G}1Qg{`jzhYuXaw(Yx7NS)W$XFw-u%cFm5v3nwU!c>k4 zZ;T%kkIo(=ZRHtUIDdx0a0aH*&OG?=BZ#Bzk_k+Uz6&|URam_KX2eE^C>r`X<{Fyn z5l{bU`m|ZJAzp}$3`Id@6V9h(q9FY|rcR!Kq;XT=s)J}58D!QcP<5;8bW z8`}C{Fnge+xKjC5$w{LSALD!jw~P!5h9C9G^oel@=U`~)_r|Cx3lQZ``DBuqs36Rw zJ=k~fG|MYuyp)dG+B&RWOJ0QsDw|iq@&Ep7Zy=RELsfGN`aOM7-`t7<_Ip@Jn34xS z|3J*7FV|92j)O<&Go)qGk1N6b4?c{w*RQ4i)_gb7lk(~9HlefIilwWUVmkfHi~M%h z>+9u>g2Eakj2ed<@7S#Ll*oeEL2vrL%`GjAE8kRlLG))?RV`*NT!DM;xf`RRgOyJj z5E_Pwlc%Dhw2<`EKPFw7Sw&dBVil5Tzr87QKD5tq38S&+=qbumF&e9j=^M^POw3q> zu&)-cSO6cZ5vhj{;Dh&f(v|?`KpDU0L$)sO>h4BCej%F3i`lbh(2w!KxWpK=wRPY` z3jHtf@9F>Ec=PS(GMKSw$vli19Se)0AGOuh2#krx%teb4Lf$ADcVci%2&hA>7NpTH znz~{=CQqV%;MpPmp*$&~??KV2qwr4{hxPYAf_Swjy!b>$*bRETzn$@0c^$`kiHaF> zsq>zA@);~&x=_W8p0pw1!2y^$DjJow75MnrDWpq$cHsi5n>!GfJQ<0MC1vlwqZ||R z=HNTO{B=yCk0Cl=XAFJ1#f8I*w`0l zWaj2!&gxsRe(fTq)6}L1^sB=|gOSKVmywZ%qo>XzJuL$|##A!bFl)gK1bbTP_ZV=6imimoTeSm$;w7ic`e(Kj0u$G z?uJUd_x^|Y_`nfVRae1gwV|fEUX>RfokTxQ_E`5q4BIXHdVaKHJF>GVr`d%F4j)bb z{>w-n70R*L4?|ZADm%O|Zt4O|PISJo(%g@n+Y`t%@9R_JtiGWc)22*f+!ExJXW<+5voGVA z(a{(?W*pMWNI*xizmtx83%rBl;TIgLgi|Wepb3>cX4T!= z2BX!Euy}c8x=R6RbMpfw+f3-|Xrt*3KtM!Z2vU8gpoW8B?kpwrQ>Cd!4L#JFfQQskdF<`U9i##78<>M0}&b(tLTw>YjTS5s~`LX z@r&nR<5%dou#6xbl}=F7dm+&K8q(ue}m>LMf{F&ojrAfel+?WH1jadb3j@`z`k zml*~+;obH?1krK#Hki=S)`-aXc>3KwbQs$a?rTDTy$&z_{S&->%7{Pz@dd>Acf*?o zER=zx1OlpD^^tnz_JaiDdi2Q#{iKopgc_KBz7om{4V`dw7vq(8j^Qu+Yw_|QevYLR z$pfYyXusPpSDy?*YGPWpMc(w>NW*Qj$z3fwTLoa*% zIA}x|Oh+?-@nz6&H}#>Rv6E3eKr(U3uU;A<4Rtr!u)10A6Pi3g&NtMH#C^R zo;Y7t2UkEkS4BdDx)AytfbNUPWZx)*@n(e>KQfu7<~BnA}pMI^3|y&9m(2wAR|mzgwVzEtOhIEy82)?o7hEm^`epx z%Bhdi9ooJ?@`g6`sHA$qM*u(M0Q*TMCnKq_Ro}3Uoic938zKUI{b)$&R58jJlnrr0 z?6CCqqN}T$PEQhbgG-mWARLs&Svj8Njdfn|kBL@Jp^RsF%T~W$uU?kn>+hNzlRTt> ze?;#HFQRB{W!ZdlkB!coOBW7<$hf5st=$aReIw}fFxXOwq&xu+l&RskWDs51%yGz5 zIuDzg1ajDQ4Au+?kJTO#mJdvHINtm{{&27wH$3rM+`T4Qy}eU`MOpeyXz%DiWI`N$ zHPLk`GcQjO1l&hn{`Swvi{Ymr4+w?68m^6~Jq7pqD{he}5k$ zqvAN8oD$432OrrIuyW~h>QP%S11mZNKGb3I9wrquOCPeHmUhx%vQjl@`D5cmH;C?) z{qf9doXTd`!gKZTmM42pCOrd#)cEpdASKTubyLUoQg@4s??D-jr9RWik~l)XwMjZF ztEKba+ykATKSColgq-C!X$p&o zLm-WSmPV@+#zv%Ve;L2tosSh?{U1EAaS|fb1<{Un25!C7ixxzO2GXWlP}gomLLB>p zj#+D48*ITblzCrRy6FQ@HU$<>Z?(!)XbvF{#L@F+zxMZ&kL;6B+F_ly+@=xzWv8Rl zj;N>@%C1PWMw^nczJ4@|yz{rL3=5!qN3txPH+#ouL=R;~7ZwR!fNVzY>@%%ZP6$6O zu+slH`sQ2s&sW~UoxgYq58tu^k-Xxba-d#?!7J*(-mdzhKIt^M;(3 zi+L>Uvvzb?yx)sqv$DpAHw1iDA(Hf9;8RpWj4xs z2W7pVvM#a~8bW(T-PhJ-MPxL~*U9=c;U|3u`g_0l?th`GqZ_~8x)osoI_JcP=q@vQ z7zo>_Q$jet=`Qdbb-2}x`Zgo`Jq(c{lhG-k3~hP$p-V9 zy3p3g-U^6D7`=9Br#l9ufNhmPTpHKWYak5{@?Tz{!W6=D{_2$p5B8zF{K3HhF(f3M zb|OIeVICYa9jz_sH_0Q)tleq5q$Ouai2io+e+a0v9EKkCRFGFl0zw(+YW*%X(~pwA>Fh=ygA4^a4LaI} z2>LtTVsn{JwVQooV?Y1oJ3l}{O(R~~xf7v*KF$e4%B7(j>;gD~qY=z9jBQQu45S_k z4n{ZiCCw)74Rr?notCzCaerBtCMn{lcBk3LaMJp9Esw9_~1G1F!BqE8&G{6>*ut!5Xb z9Fdu3wAWPO<~x6Z*f9(6%Bw#?c!>5!?4hzR=>rEk^2E(qocwPd&^aGBRFX zrls}YKrW}h*1@zrMvfEO2pwfKl5s_#IGem~)AyjYw@>Y%9K@hJgk!P0(}bWv@_++F zV00%f%a1EvMlAr z4)#rVR}cGIo_-nZ74g9X7;uM#(m(W~>`^!MGEG-cADhWC2zhgN2=$2Mk})Dr!?d>3 zUy4dnF^h}?tJQ>F^3RJtbrAKkY)vCUc?-vaJZ>(&qVU?zvD#-aGv*5iMV?7|(bm$e z>XAONc#4#hpNzjE8HWbSTPO$MQ62+ucU;F_0h49uAcSNoPk}GwnHbQABK*iUqV@(CX?p(56dV>4fYf9Kx6+ zh-C_t+%TP#*4j?H+^@aeRkmq}iILb$mJCp!s2{BK6{_1PKTPfG_)1Z=c57G4X9 zq#TlF@nyT&U-ICN&CPF@NB6auO0t`%Pdd=qFM6GRF9%gPbz?`Dp7IdLxL?QS%U=JC ztx2yxb%pq=7wJV8M0ZoSbWr9!!{r5>F1sjglONJ2W@MjE!OQ>iA||i84);CtZO8*b zeo8NP!Q9@6JMMf2>o?qu8*f_=ADLt$Gvj2;Q+W-H2^sISRbBKMWTm@yj;-(wU>v~s zO!13$m%Q|(-1*Tjk`58trC&6HoDjQb>qTi^F@Er)-ykqL3IF-)-y<=?mvp((4fgnO zsHUkl@`^OFWs8OY`p7|yb>+o@I`q-*_TtG0UO;5LG&rZ0=&ucKD4ne(Phe@5t>)$Nq$9AH*=hd zF5+1O^+`*s_SSCUqJKaL?OTW(@Q8h9>_khyH*I7TLOFcZy67W#hs3~-@wU96 zMHcE+wREX^bc`FqLnsqA9rd#jVKL+>>ofIqbF6o%CmCd0v%z@R8_^MA96#C|iRQb? zFVZgjr)p~MFw)l!6**KxSCVOen8o!=AlbfH{Ci7R#s3?My%>Z1a$%6smjVSy29pwb zelAMt5^oJdmG{V~$tlOLe)vmF-MA8We)B7khqcu#G;w8mK07YQ=8HM=8Da0D*Ik4^ z{^JOCr0KBjt>+LQ;2OA}7Q$6C*{?@!emZ{puMZJ8c^#g4>~2Jo(V~D=K&=LmyGnAG zR~bUu__!nuZj5oy`MG)FjuGk@UjEhe{imky5*f6*oCgY5RoWK@(O^>lzfIol5^Q$h zaN5Azf?z`g_1kp256R=vUk>S=?|4w}FwyVC?82OpYa<+A7ljX>D=1OIX zadm1x8sx^xLcIFt-@_C!1$RFF0wzS!&}&x}{%!sJ|0cMEdl)X&_Dk|)Xs3Jz4DFld zno_*=%8O{%CE%{7euT*}9%{w$XMvEWfwVh+sVMYw(K|9OW~Zz6!ISbBe`|9svd*Pq z&-Oj=iA};|zxXBM0zFk5Mfbb#zbS)5>$;e37lzYx>6F89^(j9)K4Tpc6euT~&OiI- z{Miir_fKEM<4=7P>$j{^eR4Ja(x=C#<(|m!@U+e={BtE)xoUZmB8Me&Q7X8K621^< z^s;8Px0?924rHWdsXXwPWz= zU%v*~(rz$^s|>-vu?22?{@>!)Rl$v}FBbo=YULI2>8N$37F>P%{_PwQnkCQ<0>)k*y{Sn{eA*-^1PaJ%M}fm+itvrb=c3Toivcx+TAS z>@fA~k(Y7?r_X2LbWSP0{`liqv1+M`TeSVA1(&YHMT;xN<#K2#-66SM_HgjZm1`Y_ ztI9BPqAO<=zgADx-iOB@{7)n&&cNf(eH|gu5@}pDh5uGusSTf+pPvJ6)>5h)oGjFi zt_)ZDtB0%Pp`}YXpT4XSFapuys9hN@3N%OJ%DA`<7dv``Wh&6oHRxNBot1{8$ByFY z(czzCIKsb&jvhyAXSXZYCFzm#s&sMK_-i;tBRQTbk2Bxr!ew|goDO$f+!l@YLAW1F zDVm7N$^loomn_@)al!pq_N=Qa&&Baij^MTTPQq*t##ip#q#mJC<*{!}eXYpLNx!I1 zT)z+V_d)heb9?8IdWOJ_xJ%JHoX$^;5$bTkRrX-tU%EC|UibW$#X#brEzQ@x|U~X)=QT&m}3N}zHt6_!==XeC3!N!X=&Tlw~uMr zH!Y3Ts&8K3wijugAy|9kEtnePF>sU0g`rekErdwtXT)ZFwo)&`kRr7DhVa2=MR9Hx zUVG;qBrRHo@BjSgNDA~E_UImqVV;rr{h6tBX1Y{fS6v#d8r>sdI8B2ouSD6U%Dhs( zPlHR!Yq4a*IyD*PJ|T7WGVKnnje{+99~)OkuXxYH zQ@Z!nCC8>4E>1H@(GZMKlvc~oOIPB>rLVi{y?lN}uav>Q$vA%o#||CAA7A+wX05vp zD{j2PWTD`a7d|Vlj&7+~Xuld+(MsdDLsklRV%yvMaW=gUKl`7bVd0YTL$)LuijmK{ zPF=a?OG7I{sM7uohica6slbVg<#BkMMyg0gf|sH}Zp05SVX%yguUt)k427%y5%Ree ze{pT@c`j4NXT)XOadES+qTM6W=Q>9TV18E-!zIF%%7x*_Ulc}cgWIX$*DvmqOGC^0 zX>w>7o?|2uKNlP}^mR1h{Hf#Ev3(~l6ck~>O`9-b#lO>)W^(N`VV`o_DAeQWXkAO0s+tXS%NGECsA;o=fpkjcIwbdZ4~(RKOULvvhw z<}y$PJ4+wA5+_D3^~!a*QfSZV$VbD)xukk`5-vApFDGYL9_xc}rMz4lT-PM6d&%1I z)z@`-Xr;NWFx22WxEz7*__XOqf+9f~CK1M~D@pQPjRSV|aA(7(E7wKK#WW2o)4^c4 zsu(Z-^DU&L=Q__1g)D%#d+PAZ|Nd_*m_1bu?91`Esj>lEUwa$l=Fi0HTQ;c`5ZXf1 z%fr3Q&%(4u3Mq)m2ow4 z@bBaOzY~|^m+P9~a_w?YC%9Z%D{Pj2oH%kA|9a(b2#BA8TOa-w)-D>a`20n{y+vPi zpNMf52T?11c)70g8eDZ>6&HtQ9R~LnT@3B2&~?cIF2cxsvB7Tb$Jvu7@$ySA!7q9o zwtV?1T)%wkkX4^9T%M|napm-`6oRX7%V&Z-1z_)o-JwIjx4)Y3&>l7$4xwq}vw|y8 zc@dwT#krU2p62t*>u2Gi$lynD+8S33S8aLKkyWy4PY`}j066r28C)!EIc&dI>ffAt&mIXrRWo%dto z9k(Kg^~oc@TJz<*AEw?;{NyKp!Roa);ktFp;NyE~tuX@IMu4P}Z!P7}^fL7&evIg5 ze!P5Oc0O|Ix`Csm*UHc^w1iIsS1F&pgv;QIAyMIC*^|N<*V#?@{jdIoxTrC>@T(+4qS1r zIl2Xc(-`?tpH|>Bwq$u`{?8_h=)V2k7 zL90RfOMW?X419(|OFi0Jpa4Jw^F zIY@&VyYkq&>}sPKUv0d(@(V7OlYb9fbq?0>>BrswJ}#!(1&wR+PhI1oL-G020ofPb z-Y=p&Rfel|mnh0r&!u@Gxct>Yd@-fCFvtTLE3yGTe^XhCzSR1LA7|gV?2Xnp!)ub` z!E=MZu9gz!W+tsWT)72TpWhur`c{e?C}xndLC862owQM-cxW31a&~08Mk>)ApH2US645k%)`uaW# zUoJ(R=1$A`OP6rz-|F7Oz-IjyH4BiU2PWssoEu! z@x$tE7S=B>KG1n9C9h3N3V7XlSGnKft-W-D$t2CCYSK6|^!n$KMR$x;_P-lfP5T#2 z+~+G!ax(zppDU9tY3=lHjg$iuY!2BB)*Bu=A9#7oX729tpy48(&^fkSO|VdY?eJEc zD2pulG0D}e_Wf+QdJbJ3AO3eRoa#Z^T-ro%IgRppaP>U<-@xUmxjI@kS(IO&r<9Q( zFFn#zCsQ}VPpz!Bb4NLS@^J0D-14l3T`g*I4#XmhQpS=}V z#-}OUy^WGyj^!i22lHP|iC>sa7*@{Z>K?o{aw>I}U#{ZhSo_Ie>R?#T*vn7@o&W_?}PVUgsJ`V;~V~kMN@HPols;j78=rvmzeDSZZkG4~kIQ)|J z-xXG09#XR%cB>uzCOiBCCJBy-{YRq4M8=_+)-1b=*0j6Fw z(GJ1bb_5jnl5nY@!i8?d>>~)qwd+ z7a%HFd%gOngI1T;a~g*Bo%5{v!}I*_N+Bif^{GSinI8Z-Wn0iNbk1v^J!RDjaOI3)zLQ;lE=Lk zE#H-=UN2V{uQ_b6n)N8jPeoNtJ**zVfPVxMrq0BaQBmp}s9Y%uz-hKh_^wKoi#kut`b=BZPZavx^QCK{465_)HwZ4}Ab5$$2+NwVV2I_Ia zVdDRG{oRYVpA*X&66k5`4UdKE&&ek!l!PH|exn@XCw7#V>yTJ1kwl38RxoX{!dc z1Q(;P9UuJjMI25oz_33#7C_%{8Vt5VK?`p*I4N!7*E#Z) zD-6yN;g*MT;xP1}pFx?yWP!;i_05oxJu*z?hQxNvwU4y5HHV)|-KNeYACRrbvw*qBd| zUQ%8<^~hITXRln#uoPF$a8*#-3p@H@G8@rhu*2rzNgZ(yM-**p1ym0T(=XuXe|Z}h@+weWQHi>`dPGJ=W761B+6tr* z+AyST4m(btI)Q@+_Tl{56#Vk{ui>r-9z%Sj_h+8ma?f$O@pw6?HV?}t8pegsPSvN( zN%|#=i~s6DCL8sc!3KT5l>*N3;LQ(-9V}UNc0c-hyEVNhcEW>tk7LJcNV?Ah$`tji zthUzoicFc5Obvc0TShDF^7xg|#~dT+T-Cf`1dVh~6`)QIz3^H4*BzHDL!Fh!XzgYg z$37ug?P27UJaZft#T2hP%NO`JRpLBXV*moIBe zui+Z5;To>t8m{604BRFR#er}-5$X@WoQE!mqoEuRaXPe9Zb@vgS^LmjTZ0pakD;Wv z8U;o5sBdmVRD3ji8GN}1FKR+d%5#<_pFtOm4vjiPC(P|gKH|ivBSlL)T8ob0k8ggA z7TqMwpB|?+S=RPj(x8>-JaSw4s6l?X4%~9c&%r+m15TFOtA$t81&uE4mzGj%lBCu0 zIGm1|dyq6hzib92>7038kRz3pm;3sA@Y29s2GrgoK4**}6@LhzuV#VJP)VT4#aAP86IzfJTcyrmeaOQ9;f}Kb>i` zw(&Dij|T0$t1NW_)_xN*8PK-y@1l*H5b7&CCNiDedAIc8=qJaqW5+(+djCV1vtSDR zNZ5#Uu`I1r?OT%v*&y4BetkQd%JXsXbS7GQf%xc1_&F!hRr~EGbal4kk%9sjer0@csm=Q{bBo9r^uC7ldE0saMHw&OSa^DFdgf3SkPKug$u`zAg8?Dl};I#T;hV?WFvcgc?7b$rZGpu09fI zXVlnt)~EV_{5Bc-NUsH6KC)(auo$=EMu1$se2Pm$Xwv9%NIUr7ok}IUHY2*aYEe>F zhTX?AVE6Dva(vX_xMCWa1Z%1-M^@@dWak#pmXxEp%Lu=)NCf$5liM1FEKl_*f3fXS zmv(*-n(kIVcu`wTD;Szia@x>PS&Sox4&g#}4JJ(Gc-MJ4>C#Hp^0GWBOW>^2dBhYV z$y&+sTcuNnS~^LuU6mszCFGN^O3UYdUD_s>wKLkc$RP76J0NNL&{SE9?H?XMWpx9` zL@^4AD^Ok6uJowr4`q)ex1yOJRghMi>#uxkJTe02Cj9C@e}vQ)U;O#ickz|GZ^7mb z*JHA4vQRP%fV9Y)0uAywzbCw8;#U>sjY(6dVdIUfF@M^0{OQ$wc=++Bkr3gbCbzWl z?&1`o4z(la9|h+b^{v|LAXmqI44hpwyup0YNRa$^k$B(0=v>f@R z)oAXwBbxTa-<$1WDouJ+EmF8TBefa;(5{YHuBM|0b0}UoZI=djnUR~@CUmy7;LPE}$SW#QcDJIs384{T95(^*@{|(9 zrfI#TXwVv{${CJY!EwGt2C|0RrHyb(zgMk!nB(rH>+Da))@wD((m5Su&&3uaG8 zWC$G?cMQ-h9FUyS4y^>Ojl=*SwX_~Gf$Kc2lFKD^M*e7j)wkq#m8HJ&rlL>WBrCloJG$C?)g5vlKVKD;3~i#;2D8BmeYb(X=?Lq@3D?pMIHTl&p{pG| z3_A3rOID5f`1`6mvUUbedZQi|b03rG(bLxlqo+=-y6o%dfR)X%GjP|D7IRM0&~6dk4BZ+i~XnX>|4Iv2@8w^cgLz!=l>d<7HtTCiIwW(0O|^ zADvp#DU@1_2DYOcUcNG$r%f`-=kg$dbbJk@Ti?}#KE0W3w7^4Fs*stUYBMcSE@XtRf6=IV`zroXMGLu9o; zSDWe^NvHV9zLvTM%VMCwz7f9mlAiAN9$Yw=flhYL(v3GDoCW!)e%7O_ya2!W!<(48 z>}K3~_st0L*H$tPlqGL&v=|uZNm=Ya%7fA71s@-6Wsk#dL3>Lb(o$29k&}m)|G6C< zon|avFdJbq%c@GUp}(U6@4vSV+x8!a-eN&>eG$&5o=1n-3**L(N2ovfr79#?rdcmz z& z^Q~NJipFL_f47Vq$`<>k$6$uHltcO@G_=-Y7O!)>rfO~K0B?nYd&Q9hBeR8sz2YA-hJ8#4+*CpioiEHuj*S z_&iQl^dNTrM$C>6z+fmSePmJn*WJ;HzFq_AGIKne;YP@*RNhY zNDpPDwJaa+ee?+qr&XZ6tqt{+B}mICAXkDgW=tad*cM6eL0K1D($m?2Ub5FqCKKhn zDoBHpdGg&(d(ur=?f0aPr4YEL>@{wXZkrMBZ~p)ve)KVRezX_s*KI&zTqMhq>y%Nu zUFmdCH+xvwHtHjhWrOTpNqZx*EVABjWPecq`fwEV(OwEKL?2n%FVcr1p&nwBth6CL z)Q!EOJ4_t=9BW?GA1p|8M?drSn;o>ZESpWxraBF7#w@>ys66+W0ZGwc^}?1L(DR!#6UDcJm7R<$i7mUD}Ghc8+@) z51l3}`^FP~wAmgoawxZ=sHg(xGAnW7&{4ee&zCW3QZgpap20Q?eKyK?2a3|p;Pv+p zBDcJW_Nx_@6%{ypE)y})i5N3FUdg%EkL*J$$6%k4{uTWMvERbXA<$&WZ0JFIw~2m& zzf%_z98)#54fy2n5wtZ{qAK$kwtajU*`1U{3vFg|1EOOhFluxX{FJVC+VSoV@|yk$ z^_nOB7!UDBq$ZZvYtk!y?8UL#%|4X=GjZI|;c<=|fu=ipD4#ug13Jko0}aWLo6Rgs zZswVdedwUy;Nqt^4Ss6URN6&dPrt<^bjTRg(>^PI&8WtNm#iRk=+RMMfj2%pgo7!$ z91~q=<#Ac=33POHAvks_BEtOW+jMH<+2IYFk+RgOjd?SBMSLz#`f3(q7v-`6`j#fV z_3rzaz4}H(#YV$IU14Bev6nI?JZxR0r3Xg44ihHN!_2vJ5E~h)CS8X@%A~JDUrXO7 zHkV_|wML%W-ic0LW~Pp6@vxCVvA z4e$#NL|8;HbaoRm8Hi?)p7R-bIGbLEq|p;m!{ENKxD1VL7L1>e2%V=HwdGYfeds)6 z>wc7%=HuM?^T^H1M@O$2!IAL@^rPd>eksXLN6N{Q$jq+9$#YqlG;=lr{Iz`oRa<05 zmr;*~!U7y)V4PD_g3|n4q@OB zMrc@&HfhMROom=m73SgS(c@}WK|x*#(o=Kb6&8YJ8?GOmFr>qNXxm%JZ}Y;fUw;PU z5`5H4EHs&sK-i|*R##b$L;Lr$tWxBa)S|7&3kmVz@Mn;2WssYhb`BTPGckJNG*s7i zAtW#wtCr74Xo#;cT($4qkpuYsi+{$0-~CT~?sHgk|0)_*gCZP5L#-QVYT#=_lHoxKFG|;gI7QlVxz<1ZRM`RUrz2&Gp~ERSe)0lt+Qw}9eIVt$ji#Wsnh3? zSJi^hsCdLh2ElIXMe6ATcz4@Qm^@;TkQfC&e_3(n6cfVr_j5s-rzVGFlCuX*l_fZN zIt8iO`N%J>g2_%@8Wo`edPceUaQl16Z)(7@wKw9#{_~hQc?#C9S>&X}p(ezS?S2m* zpU%aG2cE%Gk3N9)D`um#unLC{p2F0bQxP2>t=(jjN&EgzY