forked from tinygrad/tinygrad
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Deployed 3195bd0 with MkDocs version: 1.6.1
- Loading branch information
0 parents
commit ff6255b
Showing
85 changed files
with
57,445 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
docs.tinygrad.org |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
# tinygrad is a tensor library, and as a tensor library it has multiple parts | ||
# 1. a "runtime". this allows buffer management, compilation, and running programs | ||
# 2. a "Device" that uses the runtime but specifies compute in an abstract way for all | ||
# 3. a "LazyBuffer" that fuses the compute into kernels, using memory only when needed | ||
# 4. a "Tensor" that provides an easy to use frontend with autograd ".backward()" | ||
|
||
|
||
print("******** first, the runtime ***********") | ||
|
||
from tinygrad.runtime.ops_clang import ClangProgram, ClangCompiler, MallocAllocator | ||
|
||
# allocate some buffers | ||
out = MallocAllocator.alloc(4) | ||
a = MallocAllocator.alloc(4) | ||
b = MallocAllocator.alloc(4) | ||
|
||
# load in some values (little endian) | ||
MallocAllocator._copyin(a, memoryview(bytearray([2,0,0,0]))) | ||
MallocAllocator._copyin(b, memoryview(bytearray([3,0,0,0]))) | ||
|
||
# compile a program to a binary | ||
lib = ClangCompiler().compile("void add(int *out, int *a, int *b) { out[0] = a[0] + b[0]; }") | ||
|
||
# create a runtime for the program (ctypes.CDLL) | ||
fxn = ClangProgram("add", lib) | ||
|
||
# run the program | ||
fxn(out, a, b) | ||
|
||
# check the data out | ||
print(val := MallocAllocator._as_buffer(out).cast("I").tolist()[0]) | ||
assert val == 5 | ||
|
||
|
||
print("******** second, the Device ***********") | ||
|
||
DEVICE = "CLANG" # NOTE: you can change this! | ||
|
||
import struct | ||
from tinygrad.dtype import dtypes | ||
from tinygrad.device import Buffer, Device | ||
from tinygrad.ops import UOp, Ops | ||
from tinygrad.shape.shapetracker import ShapeTracker | ||
|
||
# allocate some buffers + load in values | ||
out = Buffer(DEVICE, 1, dtypes.int32).allocate() | ||
a = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 2)))) | ||
b = Buffer(DEVICE, 1, dtypes.int32).allocate().copyin(memoryview(bytearray(struct.pack("I", 3)))) | ||
# NOTE: a._buf is the same as the return from MallocAllocator.alloc | ||
|
||
# describe the computation | ||
buf_1 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 1) | ||
buf_2 = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 2) | ||
ld_1 = UOp(Ops.LOAD, dtypes.int32, (buf_1, ShapeTracker.from_shape((1,)).to_uop())) | ||
ld_2 = UOp(Ops.LOAD, dtypes.int32, (buf_2, ShapeTracker.from_shape((1,)).to_uop())) | ||
alu = ld_1 + ld_2 | ||
output_buf = UOp(Ops.DEFINE_GLOBAL, dtypes.int32.ptr(), (), 0) | ||
st_0 = UOp(Ops.STORE, dtypes.void, (output_buf, ShapeTracker.from_shape((1,)).to_uop(), alu)) | ||
s = UOp(Ops.SINK, dtypes.void, (st_0,)) | ||
|
||
# convert the computation to a "linearized" format (print the format) | ||
from tinygrad.engine.realize import get_kernel, CompiledRunner | ||
kernel = get_kernel(Device[DEVICE].renderer, s).linearize() | ||
|
||
# compile a program (and print the source) | ||
fxn = CompiledRunner(kernel.to_program()) | ||
print(fxn.p.src) | ||
# NOTE: fxn.clprg is the ClangProgram | ||
|
||
# run the program | ||
fxn.exec([out, a, b]) | ||
|
||
# check the data out | ||
assert out.as_buffer().cast('I')[0] == 5 | ||
|
||
|
||
print("******** third, the LazyBuffer ***********") | ||
|
||
from tinygrad.engine.realize import run_schedule | ||
from tinygrad.engine.schedule import create_schedule | ||
|
||
# allocate some values + load in values | ||
a = UOp.metaop(Ops.EMPTY, (1,), dtypes.int32, DEVICE) | ||
b = UOp.metaop(Ops.EMPTY, (1,), dtypes.int32, DEVICE) | ||
a.buffer.allocate().copyin(memoryview(bytearray(struct.pack("I", 2)))) | ||
b.buffer.allocate().copyin(memoryview(bytearray(struct.pack("I", 3)))) | ||
a = a.buf_uop_view() | ||
b = b.buf_uop_view() | ||
|
||
# describe the computation | ||
out = a.alu(Ops.ADD, b) | ||
|
||
# schedule the computation as a list of kernels | ||
sched = create_schedule([out]) | ||
for si in sched: print(si.ast.op) # NOTE: the first two convert it to CLANG | ||
|
||
# DEBUGGING: print the compute ast | ||
print(sched[-1].ast) | ||
# NOTE: sched[-1].ast is the same as st_0 above | ||
|
||
# run that schedule | ||
run_schedule(sched) | ||
|
||
# check the data out | ||
assert out.realized is not None and out.realized.as_buffer().cast('I')[0] == 5 | ||
|
||
|
||
print("******** fourth, the Tensor ***********") | ||
|
||
from tinygrad import Tensor | ||
|
||
a = Tensor([2], dtype=dtypes.int32, device=DEVICE) | ||
b = Tensor([3], dtype=dtypes.int32, device=DEVICE) | ||
out = a + b | ||
|
||
# check the data out | ||
print(val:=out.item()) | ||
assert val == 5 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
# abstractions2 goes from back to front, here we will go from front to back | ||
from typing import List | ||
from tinygrad.helpers import tqdm | ||
|
||
# ***** | ||
# 0. Load mnist on the device | ||
|
||
from tinygrad.nn.datasets import mnist | ||
X_train, Y_train, _, _ = mnist() | ||
X_train = X_train.float() | ||
X_train -= X_train.mean() | ||
|
||
# ***** | ||
# 1. Define an MNIST model. | ||
|
||
from tinygrad import Tensor | ||
|
||
l1 = Tensor.kaiming_uniform(128, 784) | ||
l2 = Tensor.kaiming_uniform(10, 128) | ||
def model(x): return x.flatten(1).dot(l1.T).relu().dot(l2.T) | ||
l1n, l2n = l1.numpy(), l2.numpy() | ||
|
||
# ***** | ||
# 2. Choose a batch for training and do the backward pass. | ||
|
||
from tinygrad.nn.optim import SGD | ||
optim = SGD([l1, l2]) | ||
|
||
Tensor.training = True | ||
X, Y = X_train[(samples:=Tensor.randint(128, high=X_train.shape[0]))], Y_train[samples] | ||
optim.zero_grad() | ||
model(X).sparse_categorical_crossentropy(Y).backward() | ||
optim.schedule_step() # this will step the optimizer without running realize | ||
|
||
# ***** | ||
# 3. Create a schedule. | ||
|
||
# The weight Tensors have been assigned to, but not yet realized. Everything is still lazy at this point | ||
# l1.lazydata and l2.lazydata define a computation graph | ||
|
||
from tinygrad.engine.schedule import ScheduleItem | ||
schedule: List[ScheduleItem] = Tensor.schedule(l1, l2) | ||
|
||
print(f"The schedule contains {len(schedule)} items.") | ||
for si in schedule: print(str(si)[:80]) | ||
|
||
# ***** | ||
# 4. Lower a schedule. | ||
|
||
from tinygrad.engine.realize import lower_schedule_item, ExecItem | ||
lowered: List[ExecItem] = [ExecItem(lower_schedule_item(si).prg, list(si.bufs)) for si in tqdm(schedule)] | ||
|
||
# ***** | ||
# 5. Run the schedule | ||
|
||
for ei in tqdm(lowered): ei.run() | ||
|
||
# ***** | ||
# 6. Print the weight change | ||
|
||
print("first weight change\n", l1.numpy()-l1n) | ||
print("second weight change\n", l2.numpy()-l2n) |
Oops, something went wrong.