diff --git a/extra/ops.py b/extra/ops.py index f246fae614c47..f612c7c423593 100644 --- a/extra/ops.py +++ b/extra/ops.py @@ -3,8 +3,8 @@ import functools, hashlib from enum import Enum, auto from dataclasses import dataclass -from tinygrad.helpers import dedup, pretty_print, prod -from tinygrad.ops import ReduceOps, UnaryOps, BinaryOps, TernaryOps, UOp, UOps +from tinygrad.helpers import dedup, prod +from tinygrad.ops import ReduceOps, UnaryOps, BinaryOps, TernaryOps, UOp, UOps, pretty_print from tinygrad.dtype import ImageDType, PtrDType, dtypes, DType, ConstType from tinygrad.shape.symbolic import Variable, sint from tinygrad.shape.shapetracker import ShapeTracker diff --git a/test/test_profiler.py b/test/test_profiler.py index bae5351bcbc39..503303e0ddca5 100644 --- a/test/test_profiler.py +++ b/test/test_profiler.py @@ -1,7 +1,7 @@ import unittest, struct, contextlib, tempfile, pathlib, json, time, atexit, random from tinygrad import Device, Tensor, dtypes, TinyJit -from tinygrad.helpers import CI, getenv, Context, ProfileLogger -from tinygrad.device import Buffer, BufferOptions, HCQCompiled +from tinygrad.helpers import CI, getenv, Context +from tinygrad.device import Buffer, BufferOptions, ProfileLogger, HCQCompiled from tinygrad.engine.schedule import create_schedule from tinygrad.engine.realize import get_runner diff --git a/test/unit/test_helpers.py b/test/unit/test_helpers.py index eead144baf85c..77580491fc0df 100644 --- a/test/unit/test_helpers.py +++ b/test/unit/test_helpers.py @@ -1,7 +1,9 @@ import gzip, unittest from PIL import Image from tinygrad.helpers import Context, ContextVar -from tinygrad.helpers import merge_dicts, strip_parens, prod, round_up, fetch, fully_flatten, from_mv, to_mv, get_contraction, get_shape +from tinygrad.helpers import merge_dicts, strip_parens, prod, round_up, fetch, fully_flatten, from_mv, to_mv +from tinygrad.tensor import get_shape +from tinygrad.codegen.lowerer import get_contraction from tinygrad.shape.symbolic import Variable, NumNode import numpy as np diff --git a/tinygrad/codegen/kernel.py b/tinygrad/codegen/kernel.py index d142130028a9e..ddbacc5c054f8 100644 --- a/tinygrad/codegen/kernel.py +++ b/tinygrad/codegen/kernel.py @@ -10,13 +10,13 @@ from tinygrad.device import Device from tinygrad.renderer import Renderer, TensorCore, Program from tinygrad.dtype import ImageDType, PtrDType -from tinygrad.helpers import all_same, colored, ansilen, dedup, getenv, prod, round_up, all_int, get_contraction, to_function_name, diskcache_put +from tinygrad.helpers import all_same, colored, ansilen, dedup, getenv, prod, round_up, all_int, to_function_name, diskcache_put from tinygrad.helpers import DEBUG, TC_OPT, USE_TC, AMX from tinygrad.shape.shapetracker import ShapeTracker from tinygrad.shape.symbolic import Variable, sint from tinygrad.shape.view import strides_for_shape from tinygrad.codegen.uopgraph import linearize_uop, full_graph_rewrite -from tinygrad.codegen.lowerer import ast_to_uop +from tinygrad.codegen.lowerer import ast_to_uop, get_contraction class OptOps(Enum): TC = auto(); UPCAST = auto(); UPCASTMID = auto(); UNROLL = auto(); LOCAL = auto() # noqa: E702 diff --git a/tinygrad/codegen/lowerer.py b/tinygrad/codegen/lowerer.py index 417716ac1a534..87d3be02258f7 100644 --- a/tinygrad/codegen/lowerer.py +++ b/tinygrad/codegen/lowerer.py @@ -1,6 +1,6 @@ # the job of the lowerer is to do indexing from __future__ import annotations -import functools +import functools, itertools, operator from dataclasses import dataclass from typing import List, Tuple, cast, Optional from tinygrad.shape.shapetracker import ShapeTracker, variable_to_uop @@ -8,7 +8,14 @@ from tinygrad.dtype import dtypes from tinygrad.ops import KernelInfo, BinaryOps, UOp, UOps, graph_rewrite, PatternMatcher, UPat, resolve from tinygrad.renderer import Renderer -from tinygrad.helpers import all_int, get_contraction, prod, partition, flatten +from tinygrad.helpers import all_int, prod, partition, flatten + +# returns the axes to create new_shape if new_shape can be created by combining axis from old_shape +def get_contraction(old_shape:Tuple[sint, ...], new_shape:Tuple[sint, ...]) -> Optional[List[List[int]]]: + acc_old, acc_new = list(itertools.accumulate(old_shape, operator.mul)), list(itertools.accumulate(new_shape, operator.mul)) + try: split = [acc_old.index(acc)+1 if acc != 1 else 0 for acc in acc_new] + except ValueError: return None + return [list(range(st,ed)) for st,ed in zip([0]+split[:-1], split[:-1]+[len(old_shape)])] # ***** indexing ***** diff --git a/tinygrad/device.py b/tinygrad/device.py index 5449429f10417..c356237f7d17b 100644 --- a/tinygrad/device.py +++ b/tinygrad/device.py @@ -1,10 +1,10 @@ from __future__ import annotations -import multiprocessing, decimal, statistics, random +import multiprocessing, decimal, statistics, random, json from dataclasses import dataclass, replace from collections import defaultdict -from typing import List, Optional, Dict, Tuple, Any, cast, Protocol, Type, Iterator +from typing import List, Optional, Dict, Tuple, Any, cast, Protocol, Type, Iterator, Union import importlib, inspect, functools, pathlib, os, ctypes, atexit, time, contextlib, array -from tinygrad.helpers import SAVE_SCHEDULE, getenv, diskcache_get, diskcache_put, DEBUG, GlobalCounters, flat_mv, from_mv, ProfileLogger, PROFILE +from tinygrad.helpers import SAVE_SCHEDULE, getenv, diskcache_get, diskcache_put, DEBUG, GlobalCounters, flat_mv, from_mv, PROFILEPATH, PROFILE from tinygrad.dtype import DType, ImageDType from tinygrad.renderer import Renderer @@ -495,6 +495,44 @@ def __call__(self, *bufs:HCQBuffer, global_size:Tuple[int,int,int]=(1,1,1), loca if wait: self.device.timeline_signal.wait(self.device.timeline_value - 1) return (float(sig_en.timestamp - sig_st.timestamp) / 1e6) if wait else None +class ProfileLogger: + writers: int = 0 + mjson: List[Dict] = [] + actors: Dict[Union[str, Tuple[str, str]], int] = {} + + def __init__(self): self.events, self.deps, ProfileLogger.writers = [], [], ProfileLogger.writers + 1 + + def add_event(self, ev_name, ev_start, ev_end, actor, subactor=None, args=None): self.events += [(ev_name, ev_start, ev_end, actor, subactor, args)] + + def _ensure_actor(self, actor_name, subactor_name): + if actor_name not in self.actors: + self.actors[actor_name] = (pid:=len(self.actors)) + self.mjson.append({"name": "process_name", "ph": "M", "pid": pid, "args": {"name": actor_name}}) + + if (subactor_key:=(actor_name,subactor_name)) not in self.actors: + self.actors[subactor_key] = (tid:=len(self.actors)) + self.mjson.append({"name": "thread_name", "ph": "M", "pid": self.actors[actor_name], "tid":tid, "args": {"name": subactor_name}}) + + return self.actors[actor_name], self.actors.get(subactor_key, -1) + + def __del__(self): + # perfetto json docs: https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview + for name, st, et, actor_name, subactor_name, args in self.events: + pid, tid = self._ensure_actor(actor_name,subactor_name) + args = {k: (v if v.__class__ is str else v(et-st)) for k, v in args.items()} if args is not None else None + self.mjson.append({"name": name, "ph": "X", "pid": pid, "tid": tid, "ts": st, "dur": et-st, "args": args}) + + for en,st,dep_actor_name,dep_subactor_name,actor_name,subactor_name in self.deps: + dep_pid, dep_tid = self._ensure_actor(dep_actor_name,dep_subactor_name) + pid, tid = self._ensure_actor(actor_name,subactor_name) + self.mjson.append({"ph": "s", "pid": dep_pid, "tid": dep_tid, "id": len(self.mjson), "ts": en, "bp": "e"}) + self.mjson.append({"ph": "f", "pid": pid, "tid": tid, "id": len(self.mjson)-1, "ts": st, "bp": "e"}) + + ProfileLogger.writers -= 1 + if ProfileLogger.writers == 0 and len(self.mjson) > 0: + with open(PROFILEPATH.value, "w") as f: f.write(json.dumps({"traceEvents": self.mjson})) + print(f"Saved profile to {PROFILEPATH.value}. Use https://ui.perfetto.dev/ to open it.") + class HCQCompiled(Compiled): """ A base class for devices compatible with the HCQ (Hardware Command Queue) API. diff --git a/tinygrad/helpers.py b/tinygrad/helpers.py index dcac86491fa38..4a19c53327c07 100644 --- a/tinygrad/helpers.py +++ b/tinygrad/helpers.py @@ -1,11 +1,10 @@ from __future__ import annotations import os, functools, platform, time, re, contextlib, operator, hashlib, pickle, sqlite3, tempfile, pathlib, string, ctypes, sys, gzip -import itertools, urllib.request, subprocess, shutil, math, json, contextvars, types, copyreg, inspect, importlib +import urllib.request, subprocess, shutil, math, contextvars, types, copyreg, inspect, importlib from dataclasses import dataclass from typing import Dict, Tuple, Union, List, ClassVar, Optional, Iterable, Any, TypeVar, TYPE_CHECKING, Callable, Sequence if TYPE_CHECKING: # TODO: remove this and import TypeGuard from typing once minimum python supported version is 3.10 from typing_extensions import TypeGuard - from tinygrad.shape.shapetracker import sint T = TypeVar("T") U = TypeVar("U") @@ -68,21 +67,6 @@ def get_child(obj, key): else: obj = getattr(obj, k) return obj -def get_shape(x) -> Tuple[int, ...]: - if not hasattr(x, "__len__") or not hasattr(x, "__getitem__") or isinstance(x, str): return () - if (aapi := (hasattr(x, "shape") and x.shape == ())): return () - subs = [get_shape(xi) for xi in x] - if not all_same(subs): raise ValueError(f"inhomogeneous shape from {x}") - slen = 1 if aapi else len(subs) - return (slen,) + (subs[0] if subs else ()) - -# returns the axes to create new_shape if new_shape can be created by combining axis from old_shape -def get_contraction(old_shape:Tuple[sint, ...], new_shape:Tuple[sint, ...]) -> Optional[List[List[int]]]: - acc_old, acc_new = list(itertools.accumulate(old_shape, operator.mul)), list(itertools.accumulate(new_shape, operator.mul)) - try: split = [acc_old.index(acc)+1 if acc != 1 else 0 for acc in acc_new] - except ValueError: return None - return [list(range(st,ed)) for st,ed in zip([0]+split[:-1], split[:-1]+[len(old_shape)])] - @functools.lru_cache(maxsize=None) def to_function_name(s:str): return ''.join([c if c in (string.ascii_letters+string.digits+'_') else f'{ord(c):02X}' for c in ansistrip(s)]) @functools.lru_cache(maxsize=None) @@ -171,44 +155,6 @@ def __exit__(self, *exc): colored(_format_fcn(fcn).ljust(50), "yellow"), colored(f"<- {(scallers[0][1][2]/tottime)*100:3.0f}% {_format_fcn(scallers[0][0])}", "BLACK") if scallers else '') -class ProfileLogger: - writers: int = 0 - mjson: List[Dict] = [] - actors: Dict[Union[str, Tuple[str, str]], int] = {} - - def __init__(self): self.events, self.deps, ProfileLogger.writers = [], [], ProfileLogger.writers + 1 - - def add_event(self, ev_name, ev_start, ev_end, actor, subactor=None, args=None): self.events += [(ev_name, ev_start, ev_end, actor, subactor, args)] - - def _ensure_actor(self, actor_name, subactor_name): - if actor_name not in self.actors: - self.actors[actor_name] = (pid:=len(self.actors)) - self.mjson.append({"name": "process_name", "ph": "M", "pid": pid, "args": {"name": actor_name}}) - - if (subactor_key:=(actor_name,subactor_name)) not in self.actors: - self.actors[subactor_key] = (tid:=len(self.actors)) - self.mjson.append({"name": "thread_name", "ph": "M", "pid": self.actors[actor_name], "tid":tid, "args": {"name": subactor_name}}) - - return self.actors[actor_name], self.actors.get(subactor_key, -1) - - def __del__(self): - # perfetto json docs: https://docs.google.com/document/d/1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview - for name, st, et, actor_name, subactor_name, args in self.events: - pid, tid = self._ensure_actor(actor_name,subactor_name) - args = {k: (v if v.__class__ is str else v(et-st)) for k, v in args.items()} if args is not None else None - self.mjson.append({"name": name, "ph": "X", "pid": pid, "tid": tid, "ts": st, "dur": et-st, "args": args}) - - for en,st,dep_actor_name,dep_subactor_name,actor_name,subactor_name in self.deps: - dep_pid, dep_tid = self._ensure_actor(dep_actor_name,dep_subactor_name) - pid, tid = self._ensure_actor(actor_name,subactor_name) - self.mjson.append({"ph": "s", "pid": dep_pid, "tid": dep_tid, "id": len(self.mjson), "ts": en, "bp": "e"}) - self.mjson.append({"ph": "f", "pid": pid, "tid": tid, "id": len(self.mjson)-1, "ts": st, "bp": "e"}) - - ProfileLogger.writers -= 1 - if ProfileLogger.writers == 0 and len(self.mjson) > 0: - with open(PROFILEPATH.value, "w") as f: f.write(json.dumps({"traceEvents": self.mjson})) - print(f"Saved profile to {PROFILEPATH.value}. Use https://ui.perfetto.dev/ to open it.") - # *** universal database cache *** _cache_dir: str = getenv("XDG_CACHE_HOME", os.path.expanduser("~/Library/Caches" if OSX else "~/.cache")) @@ -363,16 +309,6 @@ def SI(x): return (f"{x/1000**int(g:=math.log(x,1000)):.{int(3-3*math.fmod(g,1)) class trange(tqdm): def __init__(self, n:int, **kwargs): super().__init__(iterable=range(n), total=n, **kwargs) -def pretty_print(x:Any, rep:Callable, srcfn=lambda x: x.src, cache=None, d=0)->str: - def dfs(x:Any, cache:dict): - for s in srcfn(x) or []: - cache.setdefault(s, [len(cache), 0, False])[1] += 1 - if cache[s][1] == 1: dfs(s, cache) - if cache is None: dfs(x, cache:={}) - if (cx:=cache.setdefault(x, [0,0,False]))[2]: return f"{' '*d} x{cx[0]}" - cx[2], srcs = True, ('None' if srcfn(x) is None else ''.join(f'\n{pretty_print(s, rep, srcfn, cache, d+2)},' for s in srcfn(x))) - return f"{' '*d}{f'x{cx[0]}:=' * (cx[1]>1)}{rep(x)}" % srcs - # *** universal support for code object pickling def _reconstruct_code(*args): return types.CodeType(*args) diff --git a/tinygrad/ops.py b/tinygrad/ops.py index 20f5691dff6d3..0142bb45ca0d0 100644 --- a/tinygrad/ops.py +++ b/tinygrad/ops.py @@ -6,7 +6,7 @@ from dataclasses import dataclass, field from weakref import WeakValueDictionary from tinygrad.dtype import ConstType, ImageDType, PtrDType, dtypes, DType, truncate -from tinygrad.helpers import ContextVar, pretty_print, prod, getenv, all_same +from tinygrad.helpers import ContextVar, prod, getenv, all_same if TYPE_CHECKING: from tinygrad.shape.symbolic import Variable, sint from tinygrad.shape.shapetracker import ShapeTracker @@ -159,6 +159,17 @@ def resolve(x, default:bool=True): def smax(lst): return max(lst, key=lambda x: x if isinstance(x, int) else x.vmax) def ssimplify(uop): return uop.ssimplify() if isinstance(uop, UOp) else uop +# used for UOp and UPat +def pretty_print(x:Any, rep:Callable, srcfn=lambda x: x.src, cache=None, d=0)->str: + def dfs(x:Any, cache:dict): + for s in srcfn(x) or []: + cache.setdefault(s, [len(cache), 0, False])[1] += 1 + if cache[s][1] == 1: dfs(s, cache) + if cache is None: dfs(x, cache:={}) + if (cx:=cache.setdefault(x, [0,0,False]))[2]: return f"{' '*d} x{cx[0]}" + cx[2], srcs = True, ('None' if srcfn(x) is None else ''.join(f'\n{pretty_print(s, rep, srcfn, cache, d+2)},' for s in srcfn(x))) + return f"{' '*d}{f'x{cx[0]}:=' * (cx[1]>1)}{rep(x)}" % srcs + ucache:WeakValueDictionary[Tuple, UOp] = WeakValueDictionary() class UOp(MathTrait): def __reduce__(self): return UOp, (self.op, self.dtype, self.src, self.arg) diff --git a/tinygrad/tensor.py b/tinygrad/tensor.py index 00e5ebc3c8d2b..69a68c1ecdf46 100644 --- a/tinygrad/tensor.py +++ b/tinygrad/tensor.py @@ -6,7 +6,7 @@ from collections import defaultdict from tinygrad.dtype import DType, DTypeLike, dtypes, ImageDType, ConstType, least_upper_float, least_upper_dtype, sum_acc_dtype, to_dtype -from tinygrad.helpers import argfix, make_pair, flatten, prod, all_int, round_up, merge_dicts, argsort, getenv, get_shape, fully_flatten, dedup +from tinygrad.helpers import argfix, make_pair, flatten, prod, all_int, round_up, merge_dicts, argsort, getenv, all_same, fully_flatten, dedup from tinygrad.helpers import IMAGE, DEBUG, WINO, _METADATA, Metadata, TRACEMETA from tinygrad.lazy import LazyBuffer from tinygrad.multi import MultiLazyBuffer @@ -57,6 +57,14 @@ def _fromnp(x: 'np.ndarray') -> LazyBuffer: # type: ignore [name-defined] # noq del ret.srcs return ret +def get_shape(x) -> Tuple[int, ...]: + if not hasattr(x, "__len__") or not hasattr(x, "__getitem__") or isinstance(x, str): return () + if (aapi := (hasattr(x, "shape") and x.shape == ())): return () + subs = [get_shape(xi) for xi in x] + if not all_same(subs): raise ValueError(f"inhomogeneous shape from {x}") + slen = 1 if aapi else len(subs) + return (slen,) + (subs[0] if subs else ()) + def _frompy(x:Union[List, Tuple, bytes], dtype:DType) -> LazyBuffer: if isinstance(x, bytes): ret, data = LazyBuffer.metaop(MetaOps.EMPTY, (len(x)//dtype.itemsize,), dtype, "PYTHON"), x else: