diff --git a/.gitignore b/.gitignore index a52a855..05909a2 100644 --- a/.gitignore +++ b/.gitignore @@ -8,4 +8,5 @@ ETTrace-iphoneos.xcarchive/ output.json output.folded .swiftpm -.build \ No newline at end of file +.build +output_*.json diff --git a/ETTrace/Tracer/EMGStackTraceRecorder.cpp b/ETTrace/Tracer/EMGStackTraceRecorder.cpp new file mode 100644 index 0000000..d00243d --- /dev/null +++ b/ETTrace/Tracer/EMGStackTraceRecorder.cpp @@ -0,0 +1,104 @@ +#include "EMGStackTraceRecorder.h" + +#import +#import +#import +#import +#import +#import +#import +#import + +extern "C" { +void FIRCLSWriteThreadStack(thread_t thread, uintptr_t *frames, uint64_t framesCapacity, uint64_t *framesWritten); +} + +static const int kMaxFramesPerStack = 1024; + +kern_return_t checkMachCall(kern_return_t result) { + if (result != KERN_SUCCESS) { + std::cerr << "Mach call failed with " << result << std::endl; + } + return result; +} + +Thread::Thread(thread_t threadId, thread_t mainThreadId) { + name = "Failed to get name"; // Error case + + if(threadId == mainThreadId) { + name = "Main Thread"; + } else { + // Get thread Name + char cName[1024]; + pthread_t pt = pthread_from_mach_thread_np(threadId); + if (pt) { + int rc = pthread_getname_np(pt, cName, sizeof(cName)); + if (rc == 0) { + name = cName; + } + } + } +} + +std::vector EMGStackTraceRecorder::collectThreadSummaries() { + std::lock_guard lockGuard(threadsLock); + + std::vector summaries; + for (const auto &[threadId, thread] : threadsMap) { + std::vector stackSummaries; + for (const auto &stack : thread.stacks) { + std::vector addresses; + for (auto i = stack.storageStartIndex; i < stack.storageEndIndex; i++) { + addresses.emplace_back(addressStorage[i]); + } + // Reverse the stack addresses to get the correct order + std::reverse(addresses.begin(), addresses.end()); + stackSummaries.emplace_back(stack.time, addresses); + } + summaries.emplace_back(threadId, thread.name, stackSummaries); + } + return summaries; +} + +void EMGStackTraceRecorder::recordStackForAllThreads(bool recordAllThreads, thread_t mainMachThread, thread_t etTraceThread) { + std::lock_guard lockGuard(threadsLock); + thread_act_array_t threads = nullptr; + mach_msg_type_number_t threadCount = 0; + if (recordAllThreads) { + int result = checkMachCall(task_threads(mach_task_self(), &threads, &threadCount)); + if (result != KERN_SUCCESS) { + threadCount = 0; + } + } else { + threads = &mainMachThread; + threadCount = 1; + } + + // This time gets less accurate for later threads, but still good + CFTimeInterval time = CACurrentMediaTime(); + for (mach_msg_type_number_t i = 0; i < threadCount; i++) { + if (threads[i] == etTraceThread) { + continue; + } + + uintptr_t frames[kMaxFramesPerStack]; + uint64_t frameCount = 0; + + if (thread_suspend(threads[i]) != KERN_SUCCESS) { + // In theory, the thread may have been destroyed by now, so we exit early if this fails + continue; + } + // BEGIN REENTRANT SECTION + FIRCLSWriteThreadStack(threads[i], frames, kMaxFramesPerStack, &frameCount); + // END REENTRANT SECTION + checkMachCall(thread_resume(threads[i])); + + auto emplaceResult = threadsMap.try_emplace(threads[i], threads[i], mainMachThread); + size_t startIndex = addressStorage.size(); + for (int frame_idx = 0; frame_idx < frameCount; frame_idx++) { + addressStorage.emplace_back(frames[frame_idx]); + } + size_t endIndex = addressStorage.size(); + emplaceResult.first->second.stacks.emplace_back(time, startIndex, endIndex); + } +} diff --git a/ETTrace/Tracer/EMGStackTraceRecorder.h b/ETTrace/Tracer/EMGStackTraceRecorder.h new file mode 100644 index 0000000..9156d80 --- /dev/null +++ b/ETTrace/Tracer/EMGStackTraceRecorder.h @@ -0,0 +1,50 @@ +#import +#import +#import +#import +#import +#import + +struct StackSummary { + CFTimeInterval time; + std::vector stack; + + StackSummary(CFTimeInterval time, std::vector &stack) : time(time), stack(stack) { + } +}; + +struct ThreadSummary { + thread_t threadId; + std::string name; + std::vector stacks; + + ThreadSummary(thread_t threadId, const std::string &name, std::vector &stacks) : threadId(threadId), name(name), stacks(stacks) { + } +}; + +struct Stack { + CFTimeInterval time; + size_t storageStartIndex; // Inclusive + size_t storageEndIndex; // Exclusive + + Stack(CFTimeInterval time, size_t storageStartIndex, size_t storageEndIndex) : time(time), storageStartIndex(storageStartIndex), storageEndIndex(storageEndIndex) { + } +}; + +struct Thread { + std::deque stacks; + std::string name; + + Thread(thread_t threadId, thread_t mainThreadId); +}; + +class EMGStackTraceRecorder { + std::unordered_map threadsMap; + std::mutex threadsLock; + std::deque addressStorage; + +public: + void recordStackForAllThreads(bool recordAllThreads, thread_t mainMachThread, thread_t etTraceThread); + + std::vector collectThreadSummaries(); +}; diff --git a/ETTrace/Tracer/EMGTracer.mm b/ETTrace/Tracer/EMGTracer.mm index 4c11591..2eb6b98 100644 --- a/ETTrace/Tracer/EMGTracer.mm +++ b/ETTrace/Tracer/EMGTracer.mm @@ -15,29 +15,16 @@ #import #import #import +#import "EMGStackTraceRecorder.h" -static const int kMaxFramesPerStack = 512; static NSThread *sStackRecordingThread = nil; -typedef struct { - CFTimeInterval time; - uint64_t frameCount; - uintptr_t frames[kMaxFramesPerStack]; -} Stack; - -typedef struct { - std::vector *stacks; - char name[256]; -} Thread; -static std::map *sThreadsMap; -static std::mutex sThreadsLock; - -static BOOL sRecordAllThreads = false; static thread_t sMainMachThread = {0}; -static thread_t sETTraceThread = {0}; -extern "C" { -void FIRCLSWriteThreadStack(thread_t thread, uintptr_t *frames, uint64_t framesCapacity, uint64_t *framesWritten); +// To avoid static initialization order fiasco, we access it from a function +EMGStackTraceRecorder &getRecorder() { + static EMGStackTraceRecorder recorder; + return recorder; } @implementation EMGTracer @@ -55,19 +42,16 @@ + (void)stopRecording:(void (^)(NSDictionary *))stopped { } + (NSDictionary *)getResults { - sThreadsLock.lock(); NSMutableDictionary *> *threads = [NSMutableDictionary dictionary]; - - std::map::iterator it; - for (it = sThreadsMap->begin(); it != sThreadsMap->end(); it++) { - Thread thread = *it->second; - NSString *threadId = [[NSNumber numberWithUnsignedInt:it->first] stringValue]; + + auto threadSummaries = getRecorder().collectThreadSummaries(); + for (const auto &thread : threadSummaries) { + NSString *threadId = [@(thread.threadId) stringValue]; threads[threadId] = @{ - @"name": [NSString stringWithFormat:@"%s", thread.name], - @"stacks": [self arrayFromStacks: *thread.stacks] + @"name": @(thread.name.c_str()), + @"stacks": [self arrayFromStacks:thread.stacks] }; } - sThreadsLock.unlock(); const NXArchInfo *archInfo = NXGetLocalArchInfo(); NSString *cpuType = [NSString stringWithUTF8String:archInfo->description]; @@ -83,13 +67,12 @@ + (NSDictionary *)getResults { }; } -+ (NSArray *> *) arrayFromStacks: (std::vector)stacks { ++ (NSArray *> *) arrayFromStacks: (const std::vector &)stacks { NSMutableArray *> *threadStacks = [NSMutableArray array]; for (const auto &cStack : stacks) { NSMutableArray *stack = [NSMutableArray array]; - // Add the addrs in reverse order so that they start with the lowest frame, e.g. `start` - for (int j = (int)cStack.frameCount - 1; j >= 0; j--) { - [stack addObject:@((NSUInteger)cStack.frames[j])]; + for (const auto &address : cStack.stack) { + [stack addObject:@((NSUInteger)address)]; } NSDictionary *stackDictionary = @{ @"stack": [stack copy], @@ -139,103 +122,6 @@ + (NSString *)deviceName { return [NSString stringWithCString:systemInfo.machine encoding:NSUTF8StringEncoding]; } -Thread* createThread(thread_t threadId) -{ - Thread *thread = new Thread; - - if(threadId == sMainMachThread) { - strcpy(thread->name,"Main Thread"); - } else { - // Get thread Name - char name[256]; - pthread_t pt = pthread_from_mach_thread_np(threadId); - if (pt) { - name[0] = '\0'; - int rc = pthread_getname_np(pt, name, sizeof name); - strcpy(thread->name, name); - } - } - - // Create stacks vector - thread->stacks = new std::vector; - thread->stacks->reserve(400); - - return thread; -} - -+ (void)recordStackForAllThreads -{ - thread_act_array_t threads; - mach_msg_type_number_t thread_count; - if (sRecordAllThreads) { - if (task_threads(mach_task_self(), &threads, &thread_count) != KERN_SUCCESS) { - thread_count = 0; - } - } else { - threads = &sMainMachThread; - thread_count = 1; - } - - std::map stackMap; - for (mach_msg_type_number_t i = 0; i < thread_count; i++) { - if (threads[i] == sETTraceThread) { - continue; - } - - Stack *stack = new Stack; - stackMap.insert(std::pair(threads[i], stack)); - } - - // Suspend all threads but ETTrace's - for (mach_msg_type_number_t i = 0; i < thread_count; i++) { - if (threads[i] != sETTraceThread) { - thread_suspend(threads[i]); - } - } - - CFTimeInterval time = CACurrentMediaTime(); - for (mach_msg_type_number_t i = 0; i < thread_count; i++) { - if (threads[i] == sETTraceThread) { - continue; - } - - Stack *stack = stackMap.at(threads[i]); - stack->time = time; - FIRCLSWriteThreadStack(threads[i], stack->frames, kMaxFramesPerStack, &(stack->frameCount)); - } - - for (mach_msg_type_number_t i = 0; i < thread_count; i++) { - if (threads[i] != sETTraceThread) - thread_resume(threads[i]); - } - - std::vector *threadStack; - std::map::iterator it; - sThreadsLock.lock(); - for (it = stackMap.begin(); it != stackMap.end(); it++) { - thread_t t_id = it->first; - if (sThreadsMap->find(t_id) == sThreadsMap->end()) { - Thread *thread = createThread(t_id); - // Add to hash map - sThreadsMap->insert(std::pair(t_id, thread)); - - threadStack = thread->stacks; - } else { - threadStack = sThreadsMap->at(t_id)->stacks; - } - Stack *stack = it->second; - try { - threadStack->emplace_back(*stack); - } catch (const std::length_error& le) { - fflush(stdout); - fflush(stderr); - throw le; - } - delete stack; - } - sThreadsLock.unlock(); -} - + (void)setup { sMainMachThread = mach_thread_self(); EMGBeginCollectingLibraries(); @@ -256,18 +142,12 @@ + (void)setupStackRecording:(BOOL) recordAllThreads // usleep is guaranteed to sleep more than that, in practice ~5ms. We could use a // dispatch_timer, which at least tries to compensate for drift etc., but the // timer's queue could theoretically end up run on the main thread - sRecordAllThreads = recordAllThreads; - - sThreadsMap = new std::map; - sStackRecordingThread = [[NSThread alloc] initWithBlock:^{ - if (!sETTraceThread) { - sETTraceThread = mach_thread_self(); - } + thread_t etTraceThread = mach_thread_self(); NSThread *thread = [NSThread currentThread]; while (!thread.cancelled) { - [self recordStackForAllThreads]; + getRecorder().recordStackForAllThreads(recordAllThreads, sMainMachThread, etTraceThread); usleep(4500); } }];