diff --git a/vllm_profile/profile_all.sh b/vllm_profile/profile_all.sh
deleted file mode 100755
index df55f25a69b8e..0000000000000
--- a/vllm_profile/profile_all.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/bin/bash
-
-# This runs the entire script, generating two CSVs: one for 1 output token, 1 for 11 output tokens.
-# takes roughly 35 hours
-
-DECODE_TOKENS=(1 11)
-BATCH_SIZES=(1 2 4 8 16 32 64 128)
-CONTEXT_SIZES=(8 16 32 64 128 256 512 1024 2048 4096 8182)
-SM_COUNTS=(16 20 24 28 32 36 40 44 48 52 56 60 64 68 72 76 80 84 88 92 96 100 104 108 112 116 120 124 128 132)
-
-# this will overwrite existing CSVs
-echo "batch size,context size,SM count,milliseconds" > measured_times_decode1.csv
-echo "batch size,context size,SM count,milliseconds" > measured_times_decode11.csv
-
-for decode_tokens in "${DECODE_TOKENS[@]}"; do
-    for batch_size in "${BATCH_SIZES[@]}"; do
-        for context_size in "${CONTEXT_SIZES[@]}"; do
-            for sm_count in "${SM_COUNTS[@]}"; do
-                python profile_green_ctx.py "$decode_tokens" "$batch_size" "$context_size" "$sm_count"
-            done
-        done
-    done
-done
diff --git a/vllm_profile/profile_green_ctx.py b/vllm_profile/profile_green_ctx.py
deleted file mode 100644
index 1053ac8fa22b4..0000000000000
--- a/vllm_profile/profile_green_ctx.py
+++ /dev/null
@@ -1,75 +0,0 @@
-import argparse
-import json
-import os
-import sys
-
-from vllm import LLM, SamplingParams
-from green_ctx import init, make_shard
-
-
-def get_profile_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("decode_tokens", type=int)
-    parser.add_argument("batch_size", type=int)
-    parser.add_argument("context_size", type=int)
-    parser.add_argument("sm_count", type=int)
-
-    args = parser.parse_args()
-    return args.decode_tokens, args.batch_size, args.context_size, args.sm_count
-
-
-def get_kernel_time(trace_json):
-    with open(trace_json, 'r') as f:
-        data = json.load(f)
-        events = data['traceEvents']
-        
-        total_kernel_time = 0  # microseconds
-        for event in events:
-            if 'cat' in event and event['cat'] == 'kernel':
-                total_kernel_time += event['dur']
-        
-    total_kernel_time /= 1000
-    print(f'Kernel time for {trace_json}:', total_kernel_time, 'ms')
-    return total_kernel_time
-
-
-if __name__ == '__main__':
-    # init green context cuda stuff
-    init()
-
-    # get command line args
-    decode_tokens, batch_size, context_size, sm_count = get_profile_args()
-    print(f'\n\nProfile for decode_tokens={decode_tokens}, batch_size={batch_size} context_size={context_size}, sm_count={sm_count}\n\n')
-    
-    with open(f'measured_times_decode{decode_tokens}.csv', 'r') as f:
-        content = f.read()
-        if f'{batch_size},{context_size},{sm_count},' in content:
-            print('skipped\n\n')
-            sys.exit()
-
-    # set env variables for storing trace json files
-    trace_dir = f"/workspace/vllm/vllm_profile/D{decode_tokens}_B{batch_size}_L{context_size}"
-    os.makedirs(trace_dir, exist_ok=True)
-    os.environ["VLLM_TORCH_PROFILER_DIR"] = trace_dir
-    os.environ["SM_COUNT"] = str(sm_count)
-
-    # init vLLM stuff
-    llm = LLM(
-        model="meta-llama/Meta-Llama-3-8B-Instruct",
-        # load_format='dummy',
-        enforce_eager=True,
-    )
-    prompts = ["hi" * (context_size - 1)]
-    sampling_params = SamplingParams(min_tokens=decode_tokens, max_tokens=decode_tokens)
-
-    # create green context and use for inference
-    green_ctx = make_shard(sm_count)
-    with green_ctx.with_context():
-        llm.start_profile()
-        outputs = llm.generate(prompts, sampling_params)
-        llm.stop_profile()
-
-    # get total kernel time from trace json and write to csv
-    time = get_kernel_time(os.path.join(trace_dir, f'sm{sm_count}.json'))
-    with open(f'measured_times_decode{decode_tokens}.csv', 'a') as f:
-        f.write(f'{batch_size},{context_size},{sm_count},{time}\n')