diff --git a/test/external/fuzz_linearizer.py b/test/external/fuzz_linearizer.py
index fab8ec6f6586..57d74a15cd3f 100644
--- a/test/external/fuzz_linearizer.py
+++ b/test/external/fuzz_linearizer.py
@@ -140,7 +140,7 @@ def compare_linearizer(lin: Kernel, rawbufs=None, var_vals=None, ground_truth=No
 
   return ("PASS", rawbufs, var_vals, ground_truth, run_state)
 
-def fuzz_linearizer(lin: Kernel, rtol=1e-2, atol=1e-2):
+def fuzz_linearizer(lin: Kernel, rtol=1e-2, atol=1e-2, opts_list=None):
   SEED = getenv("SEED", 42)
   random.seed(SEED)
   np.random.seed(SEED)
@@ -162,10 +162,18 @@ def fuzz_linearizer(lin: Kernel, rtol=1e-2, atol=1e-2):
     print("skipping simple kernel")
     return failures
 
-  for depth in range(getenv("DEPTH", 1 if FUZZ_ALL_ACTIONS else 10)):
+  test_depth = 1 if opts_list is not None else getenv("DEPTH", 1 if FUZZ_ALL_ACTIONS else 10)
+  for depth in range(test_depth):
     next_lins = []
     for lin in last_lins:
-      actions = get_kernel_actions(lin, include_0=False)
+      if opts_list is None: actions = get_kernel_actions(lin, include_0=False)
+      else:
+        actions = {}
+        for oi,opts in enumerate(opts_list):
+          lin2 = lin.copy()
+          for o in opts: lin2.apply_opt(o)
+          actions[oi] = lin2
+
       if not actions: continue
       if depth == 0 and getenv("FUZZ_REQUIRE_TC", 0):
         tc_acts = {i: k for k in actions.values() if k.applied_opts[0].op == OptOps.TC}
@@ -174,7 +182,7 @@ def fuzz_linearizer(lin: Kernel, rtol=1e-2, atol=1e-2):
 
       test_lins = list(actions.values())
       if FUZZ_ALL_ACTIONS: print(f"testing {lin.applied_opts=} with {len(actions)} actions")
-      else: test_lins = [random.choice(test_lins)]
+      elif opts_list is None: test_lins = [random.choice(test_lins)]
 
       for test_lin in test_lins:
         if not FUZZ_ALL_ACTIONS and test_lin.applied_opts: print(f"applied opts: {test_lin.applied_opts}")
@@ -230,12 +238,14 @@ def _is_simple(lin: Kernel) -> bool:
   parser = argparse.ArgumentParser(description="Run a fuzz testing on one or more kernels", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
   parser.add_argument("--ast", type=str, default=None, help="the ast for the kernel to be optimized")
   parser.add_argument("--file", type=str, default=None, help="a file containing asts to be optimized, one per line")
+  parser.add_argument("--beamreplay", type=str, default=None, help="replay asts and opts got from beam with CAPTURE_BEAM")
   parser.add_argument("--logfile", type=str, default=None, help="a file containing a tuple of ast and applied_opts, one per line")
   parser.add_argument("--expected-failures", type=int, default=0, help="the number of expected failed kernels")
   parser.add_argument("--rtol", type=float, default=1e-2, help="relative tolerance for numerical comparison")
   parser.add_argument("--atol", type=float, default=1e-2, help="absolute tolerance for numerical comparison")
   args = parser.parse_args()
 
+  opts_list = None
   if args.ast is not None:
     print("loaded AST from CLI")
     ast_strs = [args.ast]
@@ -243,6 +253,16 @@ def _is_simple(lin: Kernel) -> bool:
     print(f"loading ASTs from file '{args.file}'")
     with open(args.file, 'r') as file:
       ast_strs = file.readlines()
+  elif args.beamreplay is not None:
+    print(f"loading BEAM replay from file '{args.beamreplay}'")
+    with open(args.beamreplay, 'r') as file: fdata = file.readlines()
+    ast_strs, opts_list = [x.split(' :: ')[0] for x in fdata], [x.split(' :: ')[1] for x in fdata]
+
+    # dedup ast_strs and opts_list
+    dct = defaultdict(list)
+    for i in range(len(ast_strs)): dct[ast_strs[i]].append(eval(opts_list[i]))
+    ast_strs_items = list(dct.keys())
+    opts_list = [dct[c] for c in ast_strs_items]
   elif args.logfile is not None:
     print(f"loading ASTs from LOGKERNS file '{args.file}'")
     with open(args.logfile, 'r') as file:
@@ -273,7 +293,7 @@ def _is_simple(lin: Kernel) -> bool:
 
       with Timing(f"tested ast {i}: "):
         tested += 1
-        fuzz_failures = fuzz_linearizer(lin, rtol=args.rtol, atol=args.atol)
+        fuzz_failures = fuzz_linearizer(lin, rtol=args.rtol, atol=args.atol, opts_list=(opts_list[i] if opts_list else None))
         if fuzz_failures: failed_ids.append(i)
         for k, v in fuzz_failures.items():
           for f in v:
diff --git a/tinygrad/engine/search.py b/tinygrad/engine/search.py
index 1d2dea66f25a..c5061e31061a 100644
--- a/tinygrad/engine/search.py
+++ b/tinygrad/engine/search.py
@@ -117,7 +117,7 @@ def get_kernel_actions(lin:Kernel, include_0=True) -> Dict[int, Kernel]:
     except KernelOptError: pass
   return acted_lins
 
-beam_pool, BEAM_DEBUG = None, getenv("BEAM_DEBUG")
+beam_pool, BEAM_DEBUG, CAPTURE_BEAM = None, getenv("BEAM_DEBUG"), getenv("CAPTURE_BEAM", "")
 def beam_search(lin:Kernel, rawbufs:List[Buffer], amt:int, allow_test_size=True, disable_cache=getenv("IGNORE_BEAM_CACHE")) -> Kernel:
   global beam_pool
   key = {"ast": lin.ast.key, "amt": amt, "allow_test_size": allow_test_size, "device": lin.opts.device, "suffix": lin.opts.suffix}
@@ -154,7 +154,8 @@ def beam_search(lin:Kernel, rawbufs:List[Buffer], amt:int, allow_test_size=True,
         # filter out kernels that use 1000x more compute than the smallest
         least_compute_ops = min(this_compute_ops:=sym_infer(p.op_estimate, var_vals), least_compute_ops)
         if least_compute_ops*1000 < this_compute_ops: continue
-        #print(acted_lins[i].colored_shape(), acted_lins[i].applied_opts)  # for debugging BEAMs that segfault
+        if len(CAPTURE_BEAM) > 0:
+          with open(CAPTURE_BEAM, 'a') as f: f.write(str(acted_lins[i].ast).replace('\n','')+f" :: {acted_lins[i].applied_opts}\n")
         seen_libs.add(lib)
         try: tms = _time_program(p, lib, var_vals, rawbufs, early_stop=beam[0][1]*3 if len(beam) else 1.0, clear_l2=hasattr(dev, 'invalidate_caches'))
         except RuntimeError: continue # for runtime issues