Skip to content

Commit

Permalink
Profiler changes (#95)
Browse files Browse the repository at this point in the history
  • Loading branch information
michaeleisel authored Oct 21, 2024
1 parent 56017f6 commit 8df984f
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 137 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ ETTrace-iphoneos.xcarchive/
output.json
output.folded
.swiftpm
.build
.build
output_*.json
104 changes: 104 additions & 0 deletions ETTrace/Tracer/EMGStackTraceRecorder.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#include "EMGStackTraceRecorder.h"

#import <QuartzCore/QuartzCore.h>
#import <mach-o/arch.h>
#import <mach/mach.h>
#import <pthread.h>
#import <deque>
#import <iostream>
#import <mutex>
#import <unordered_map>

extern "C" {
void FIRCLSWriteThreadStack(thread_t thread, uintptr_t *frames, uint64_t framesCapacity, uint64_t *framesWritten);
}

static const int kMaxFramesPerStack = 1024;

kern_return_t checkMachCall(kern_return_t result) {
if (result != KERN_SUCCESS) {
std::cerr << "Mach call failed with " << result << std::endl;
}
return result;
}

Thread::Thread(thread_t threadId, thread_t mainThreadId) {
name = "Failed to get name"; // Error case

if(threadId == mainThreadId) {
name = "Main Thread";
} else {
// Get thread Name
char cName[1024];
pthread_t pt = pthread_from_mach_thread_np(threadId);
if (pt) {
int rc = pthread_getname_np(pt, cName, sizeof(cName));
if (rc == 0) {
name = cName;
}
}
}
}

std::vector<ThreadSummary> EMGStackTraceRecorder::collectThreadSummaries() {
std::lock_guard<std::mutex> lockGuard(threadsLock);

std::vector<ThreadSummary> summaries;
for (const auto &[threadId, thread] : threadsMap) {
std::vector<StackSummary> stackSummaries;
for (const auto &stack : thread.stacks) {
std::vector<uintptr_t> addresses;
for (auto i = stack.storageStartIndex; i < stack.storageEndIndex; i++) {
addresses.emplace_back(addressStorage[i]);
}
// Reverse the stack addresses to get the correct order
std::reverse(addresses.begin(), addresses.end());
stackSummaries.emplace_back(stack.time, addresses);
}
summaries.emplace_back(threadId, thread.name, stackSummaries);
}
return summaries;
}

void EMGStackTraceRecorder::recordStackForAllThreads(bool recordAllThreads, thread_t mainMachThread, thread_t etTraceThread) {
std::lock_guard<std::mutex> lockGuard(threadsLock);
thread_act_array_t threads = nullptr;
mach_msg_type_number_t threadCount = 0;
if (recordAllThreads) {
int result = checkMachCall(task_threads(mach_task_self(), &threads, &threadCount));
if (result != KERN_SUCCESS) {
threadCount = 0;
}
} else {
threads = &mainMachThread;
threadCount = 1;
}

// This time gets less accurate for later threads, but still good
CFTimeInterval time = CACurrentMediaTime();
for (mach_msg_type_number_t i = 0; i < threadCount; i++) {
if (threads[i] == etTraceThread) {
continue;
}

uintptr_t frames[kMaxFramesPerStack];
uint64_t frameCount = 0;

if (thread_suspend(threads[i]) != KERN_SUCCESS) {
// In theory, the thread may have been destroyed by now, so we exit early if this fails
continue;
}
// BEGIN REENTRANT SECTION
FIRCLSWriteThreadStack(threads[i], frames, kMaxFramesPerStack, &frameCount);
// END REENTRANT SECTION
checkMachCall(thread_resume(threads[i]));

auto emplaceResult = threadsMap.try_emplace(threads[i], threads[i], mainMachThread);
size_t startIndex = addressStorage.size();
for (int frame_idx = 0; frame_idx < frameCount; frame_idx++) {
addressStorage.emplace_back(frames[frame_idx]);
}
size_t endIndex = addressStorage.size();
emplaceResult.first->second.stacks.emplace_back(time, startIndex, endIndex);
}
}
50 changes: 50 additions & 0 deletions ETTrace/Tracer/EMGStackTraceRecorder.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#import <deque>
#import <vector>
#import <unordered_map>
#import <mach/mach.h>
#import <QuartzCore/QuartzCore.h>
#import <iostream>

struct StackSummary {
CFTimeInterval time;
std::vector<uintptr_t> stack;

StackSummary(CFTimeInterval time, std::vector<uintptr_t> &stack) : time(time), stack(stack) {
}
};

struct ThreadSummary {
thread_t threadId;
std::string name;
std::vector<StackSummary> stacks;

ThreadSummary(thread_t threadId, const std::string &name, std::vector<StackSummary> &stacks) : threadId(threadId), name(name), stacks(stacks) {
}
};

struct Stack {
CFTimeInterval time;
size_t storageStartIndex; // Inclusive
size_t storageEndIndex; // Exclusive

Stack(CFTimeInterval time, size_t storageStartIndex, size_t storageEndIndex) : time(time), storageStartIndex(storageStartIndex), storageEndIndex(storageEndIndex) {
}
};

struct Thread {
std::deque<Stack> stacks;
std::string name;

Thread(thread_t threadId, thread_t mainThreadId);
};

class EMGStackTraceRecorder {
std::unordered_map<unsigned int, Thread> threadsMap;
std::mutex threadsLock;
std::deque<uintptr_t> addressStorage;

public:
void recordStackForAllThreads(bool recordAllThreads, thread_t mainMachThread, thread_t etTraceThread);

std::vector<ThreadSummary> collectThreadSummaries();
};
152 changes: 16 additions & 136 deletions ETTrace/Tracer/EMGTracer.mm
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,16 @@
#import <mach-o/arch.h>
#import <sys/utsname.h>
#import <QuartzCore/QuartzCore.h>
#import "EMGStackTraceRecorder.h"

static const int kMaxFramesPerStack = 512;
static NSThread *sStackRecordingThread = nil;
typedef struct {
CFTimeInterval time;
uint64_t frameCount;
uintptr_t frames[kMaxFramesPerStack];
} Stack;

typedef struct {
std::vector<Stack> *stacks;
char name[256];
} Thread;
static std::map<unsigned int, Thread *> *sThreadsMap;
static std::mutex sThreadsLock;

static BOOL sRecordAllThreads = false;

static thread_t sMainMachThread = {0};
static thread_t sETTraceThread = {0};

extern "C" {
void FIRCLSWriteThreadStack(thread_t thread, uintptr_t *frames, uint64_t framesCapacity, uint64_t *framesWritten);
// To avoid static initialization order fiasco, we access it from a function
EMGStackTraceRecorder &getRecorder() {
static EMGStackTraceRecorder recorder;
return recorder;
}

@implementation EMGTracer
Expand All @@ -55,19 +42,16 @@ + (void)stopRecording:(void (^)(NSDictionary *))stopped {
}

+ (NSDictionary *)getResults {
sThreadsLock.lock();
NSMutableDictionary <NSString *, NSDictionary<NSString *, id> *> *threads = [NSMutableDictionary dictionary];

std::map<unsigned int, Thread *>::iterator it;
for (it = sThreadsMap->begin(); it != sThreadsMap->end(); it++) {
Thread thread = *it->second;
NSString *threadId = [[NSNumber numberWithUnsignedInt:it->first] stringValue];

auto threadSummaries = getRecorder().collectThreadSummaries();
for (const auto &thread : threadSummaries) {
NSString *threadId = [@(thread.threadId) stringValue];
threads[threadId] = @{
@"name": [NSString stringWithFormat:@"%s", thread.name],
@"stacks": [self arrayFromStacks: *thread.stacks]
@"name": @(thread.name.c_str()),
@"stacks": [self arrayFromStacks:thread.stacks]
};
}
sThreadsLock.unlock();

const NXArchInfo *archInfo = NXGetLocalArchInfo();
NSString *cpuType = [NSString stringWithUTF8String:archInfo->description];
Expand All @@ -83,13 +67,12 @@ + (NSDictionary *)getResults {
};
}

+ (NSArray <NSDictionary <NSString *, id> *> *) arrayFromStacks: (std::vector<Stack>)stacks {
+ (NSArray <NSDictionary <NSString *, id> *> *) arrayFromStacks: (const std::vector<StackSummary> &)stacks {
NSMutableArray <NSDictionary <NSString *, id> *> *threadStacks = [NSMutableArray array];
for (const auto &cStack : stacks) {
NSMutableArray <NSNumber *> *stack = [NSMutableArray array];
// Add the addrs in reverse order so that they start with the lowest frame, e.g. `start`
for (int j = (int)cStack.frameCount - 1; j >= 0; j--) {
[stack addObject:@((NSUInteger)cStack.frames[j])];
for (const auto &address : cStack.stack) {
[stack addObject:@((NSUInteger)address)];
}
NSDictionary *stackDictionary = @{
@"stack": [stack copy],
Expand Down Expand Up @@ -139,103 +122,6 @@ + (NSString *)deviceName {
return [NSString stringWithCString:systemInfo.machine encoding:NSUTF8StringEncoding];
}

Thread* createThread(thread_t threadId)
{
Thread *thread = new Thread;

if(threadId == sMainMachThread) {
strcpy(thread->name,"Main Thread");
} else {
// Get thread Name
char name[256];
pthread_t pt = pthread_from_mach_thread_np(threadId);
if (pt) {
name[0] = '\0';
int rc = pthread_getname_np(pt, name, sizeof name);
strcpy(thread->name, name);
}
}

// Create stacks vector
thread->stacks = new std::vector<Stack>;
thread->stacks->reserve(400);

return thread;
}

+ (void)recordStackForAllThreads
{
thread_act_array_t threads;
mach_msg_type_number_t thread_count;
if (sRecordAllThreads) {
if (task_threads(mach_task_self(), &threads, &thread_count) != KERN_SUCCESS) {
thread_count = 0;
}
} else {
threads = &sMainMachThread;
thread_count = 1;
}

std::map<thread_t, Stack *> stackMap;
for (mach_msg_type_number_t i = 0; i < thread_count; i++) {
if (threads[i] == sETTraceThread) {
continue;
}

Stack *stack = new Stack;
stackMap.insert(std::pair<unsigned int, Stack *>(threads[i], stack));
}

// Suspend all threads but ETTrace's
for (mach_msg_type_number_t i = 0; i < thread_count; i++) {
if (threads[i] != sETTraceThread) {
thread_suspend(threads[i]);
}
}

CFTimeInterval time = CACurrentMediaTime();
for (mach_msg_type_number_t i = 0; i < thread_count; i++) {
if (threads[i] == sETTraceThread) {
continue;
}

Stack *stack = stackMap.at(threads[i]);
stack->time = time;
FIRCLSWriteThreadStack(threads[i], stack->frames, kMaxFramesPerStack, &(stack->frameCount));
}

for (mach_msg_type_number_t i = 0; i < thread_count; i++) {
if (threads[i] != sETTraceThread)
thread_resume(threads[i]);
}

std::vector<Stack> *threadStack;
std::map<thread_t, Stack *>::iterator it;
sThreadsLock.lock();
for (it = stackMap.begin(); it != stackMap.end(); it++) {
thread_t t_id = it->first;
if (sThreadsMap->find(t_id) == sThreadsMap->end()) {
Thread *thread = createThread(t_id);
// Add to hash map
sThreadsMap->insert(std::pair<thread_t, Thread *>(t_id, thread));

threadStack = thread->stacks;
} else {
threadStack = sThreadsMap->at(t_id)->stacks;
}
Stack *stack = it->second;
try {
threadStack->emplace_back(*stack);
} catch (const std::length_error& le) {
fflush(stdout);
fflush(stderr);
throw le;
}
delete stack;
}
sThreadsLock.unlock();
}

+ (void)setup {
sMainMachThread = mach_thread_self();
EMGBeginCollectingLibraries();
Expand All @@ -256,18 +142,12 @@ + (void)setupStackRecording:(BOOL) recordAllThreads
// usleep is guaranteed to sleep more than that, in practice ~5ms. We could use a
// dispatch_timer, which at least tries to compensate for drift etc., but the
// timer's queue could theoretically end up run on the main thread
sRecordAllThreads = recordAllThreads;

sThreadsMap = new std::map<unsigned int, Thread *>;

sStackRecordingThread = [[NSThread alloc] initWithBlock:^{
if (!sETTraceThread) {
sETTraceThread = mach_thread_self();
}
thread_t etTraceThread = mach_thread_self();

NSThread *thread = [NSThread currentThread];
while (!thread.cancelled) {
[self recordStackForAllThreads];
getRecorder().recordStackForAllThreads(recordAllThreads, sMainMachThread, etTraceThread);
usleep(4500);
}
}];
Expand Down

0 comments on commit 8df984f

Please sign in to comment.