add utilities for graphing bad calling links. (#530)

* add utilities for graphing bad calling links. * add new reporting tool, and clean up instructions in other tools.
wireapp · Dec 17, 2021 · 11ffac0 · 11ffac0
1 parent dd5a88f
commit 11ffac0
Show file tree

Hide file tree

Showing 5 changed files with 358 additions and 0 deletions.
diff --git a/default.nix b/default.nix
@@ -45,6 +45,9 @@ rec {
       mirror-apt
       generate-gpg1-key
       kubeadm
+      # for RTP session debugging
+      wireshark
+      gnuplot
 
       niv
       nix-prefetch-docker

diff --git a/nix/overlay.nix b/nix/overlay.nix
@@ -5,6 +5,8 @@ self: super: {
     super.python38Packages.boto3
     super.python38Packages.cryptography
     super.python38Packages.six
+    # for packet debugging and reporting.
+    super.python38Packages.pyshark
   ]));
 
   kubectl = self.callPackage ./pkgs/kubectl.nix { };

diff --git a/utils/generate_graph.pl b/utils/generate_graph.pl
@@ -0,0 +1,41 @@
+#!/usr/bin/env -S gnuplot -c
+
+####################################################################
+# GNUPlot script to display reports on packet captured RTP streams #
+####################################################################
+
+##############################
+# General Usage
+#
+# once you have a report from rtpstreams_graph.py saved to a file,
+# provide it to this utility, and get a graphical output.
+
+##############################
+# Requirements
+#
+# If you're not using wire-server-deploy's direnv and nix setup,
+# you will need to install a version of gnuplot greater than version 5.
+
+if (ARGC != 2) { print "usage: ", ARG0, " <txtfilein> <pngfileout>";
+		 exit -1
+}
+
+set boxwidth 0.3
+set style fill solid
+
+set style line 1 lc rgb "blue"
+set style line 2 lc rgb "red"
+
+set term pngcairo size 1024,768 enhance font 'Verdana,10'
+
+set title "Packet size against mean pairwise transmission delay"
+
+set xlabel "Packet size ranges per bucket (bytes)"
+set xrange [0:]
+set ylabel "Packet-pairwise transmission delay (microseconds)"
+set yrange [0:]
+
+set output ARG2
+
+plot sprintf("<cat %s",ARG1) every 2    using 2:3:xtic(1) with boxes ls 1 title 'Mean packet delay in bucket', \
+                   ''        every 2::1 using 2:3         with boxes ls 2 title 'Max packet delay in bucket'
diff --git a/utils/rtpstreams_graph.py b/utils/rtpstreams_graph.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+
+###############################################################
+# Utility to derive statistics on packet captured RTP streams #
+###############################################################
+
+######################
+# General Usage:
+#
+# First, capture a call's packets with tcpdump:
+#
+# kubenode1> tcpdump -i ens160 -s 0 -w testnumber.pcap host <client IP> and udp
+#
+# *place call from host here*
+#
+# Next, copy this pcap file to a place where you have these tools, and run this command on a pcap file to find out what udp ports were seen during the capture:
+#
+# adminhost> ./analyse_rtp_streams.py testnumber.pcap
+# usage: ./analyse_rtp_streams.py <pcap file> <port>
+# finding source ports for you, be patient...
+# pcap contains 21 packets with source port 37462
+# pcap contains 29 packets with source port 38654
+# pcap contains 67 packets with source port 80
+# pcap contains 13 packets with source port 56899
+# pcap contains 58 packets with source port 44279
+# pcap contains 8340 packets with source port 50996
+# pcap contains 5650 packets with source port 34096
+# adminhost>
+#
+# Pick the port that has a lot of packets captured, as those are probably your calls.
+#
+# adminhost> ./analyse_rtp_streams.py testnumber.pcap 50996
+# capture file found. generating reports..
+# Processing session 220450815 with 4180 packets
+# <START REPORT>
+# ...
+# <END REPORT>
+# Processing session 2008506802 with 3422 packets
+# <START REPORT>
+# ...
+# <END REPORT>
+#
+# Copy everything between the start report, and the end report marker, and place it in a text file.
+#
+# Use generate_graph.pl to create a graph from your report!
+#
+# adminhost> ./generate_graph.pl report1.txt report1.png
+
+##############################
+# Interpreting these results:
+#
+# TL;dr: any packet delayed by more than 0:00:00.12 is problems. these will show as the red bars.
+# delayed packets can cause SFT to lose track of the stream, and wait for the next keyframe.
+
+
+##################
+# Requirements:
+#
+# If you're not using nix and direnv in our wire-server-deploy directory, you'll need:
+# Python 3
+# pyshark
+# wireshark
+
+import datetime
+import sys
+import time
+import pyshark
+import functools
+import collections
+
+BUCKETS = 10
+
+if len(sys.argv) < 3:
+    print('usage: {} <pcap file> <port>'.format(sys.argv[0]))
+
+if len(sys.argv) == 1:
+    exit (-1)
+
+fname = sys.argv[1]
+ss = dict()
+
+if len(sys.argv) == 2:
+    cap = pyshark.FileCapture (fname)
+    print('Finding source ports for you, be patient...')
+    for pkt in cap:
+        if 'udp' in pkt:
+            id = int(pkt.udp.srcport)
+            if id not in ss:
+                ss[id] = list()
+            ss[id].append(pkt.udp.dstport)
+    for id in ss:
+        print ('pcap contains {} packets with source port {}'.format(len(ss[id]), id))
+    exit (0)
+
+port = sys.argv[2]
+cap = pyshark.FileCapture (fname,
+                           display_filter='udp',
+                           decode_as={'udp.port=={}'.format(port):'rtp'})
+
+print('Capture file found. Generating reports..')
+for pkt in cap:
+    # only keep rtp packets of type 100
+    if 'rtp' in pkt and pkt.rtp.get('p_type') == '100':
+        id = int(pkt.rtp.ssrc, 16)
+        # bucket packets by which rtp session they belong to
+        if id not in ss:
+            ss[id] = list()
+        ss[id].append(pkt)
+
+for id in ss:
+    print('Processing session {} with {} packets'.format(id, len(ss[id])))
+
+    # sort packets by the time they were recorded by the filter program
+    pkts = sorted(ss[id], key=lambda p: p.sniff_time)
+
+    # retrieve the length of each packet, and the pairwise delay between
+    # each packet and its predecessor. caution: this uses the length of the IP
+    # datagram, not the length of the inner udp datagram.
+    szdel = map(lambda i: {
+        'size': int(pkts[i].length),
+        'delay': pkts[i].sniff_time - pkts[i-1].sniff_time
+    }, range(1, len(pkts)))
+
+    # flatten timestamps into microseconds
+    szdel = map(lambda i: {
+        'size': i['size'],
+        'delay': i['delay'].microseconds + (i['delay'].seconds * 1000000)
+    }, szdel)
+
+    # sort the list by packet size
+    szdel = sorted(szdel, key=lambda p: p['size'])
+
+    # split the list into N buckets by packet size
+    bksz = len(szdel) / BUCKETS
+    bknum = 0
+    buckets = list()
+    buckets.append(list())
+
+    for i in range(0, len(szdel)):
+        if i >= ((bknum + 1) * bksz) and (bknum + 1) < BUCKETS:
+            bknum += 1
+            buckets.append(list())
+
+        buckets[bknum].append(szdel[i])
+
+    # calculate the mean and max pairwise delay for each packet size bucket,
+    # and retrieve the min and max size for labelling.
+    avgs = map(lambda b: {
+        'smin': min(map(lambda x: x['size'], b)),
+        'smax': max(map(lambda x: x['size'], b)),
+        'davg': functools.reduce(lambda x, y: x + y['delay'], b, 0) / len(b),
+        'dmax': max(map(lambda x: x['delay'], b))
+    }, buckets)
+
+    avgs = list(avgs)
+
+    print('<START REPORT>')
+    # report
+    for i in range(0, len(avgs)):
+        a = avgs[i]
+        lo = a['smin']
+        hi = a['smax']
+        print('{}-{} {} {}'.format(lo, hi, i+1, a['davg']))
+        #              v-- gnuplot magic hacks.
+        print('{}-{} {}.3 {}'.format(lo, hi, i+1, a['dmax']))
+        print()
+    print('<END REPORT>')
diff --git a/utils/rtpstreams_summary.py b/utils/rtpstreams_summary.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+
+##############################################################
+# Utility to generate summary of packet captured RTP streams #
+##############################################################
+
+######################
+# General Usage:
+#
+# capture packets with tcpdump:
+#
+# kubenode1> tcpdump -i ens160 -s 0 -w testnumber.pcap host <client IP> and udp
+#
+# run this command on a pcap file to find out what udp ports were seen during the capture
+#
+# Copy this pcap file to a place where you have these tools.
+#
+# adminhost> ./rtpstreams_summary.py testnumber.pcap
+# usage: ./analyse_rtp_streams.py <pcap file> <port>
+# finding source ports for you, be patient...
+# pcap contains 21 packets with source port 37462
+# pcap contains 29 packets with source port 38654
+# pcap contains 67 packets with source port 80
+# pcap contains 13 packets with source port 56899
+# pcap contains 58 packets with source port 44279
+# pcap contains 8340 packets with source port 50996
+# pcap contains 5650 packets with source port 34096
+# adminhost>
+#
+# Pick the big ones, as those are probably your calls.
+#
+# adminhost> ./rtpstreams_summary.py testnumber.pcap 50996
+# Capture file found. Generating summary..
+# SSRC 220450815: 4180 packets
+# packet 27697 delayed by 0:00:00.137442
+# packet 27705 delayed by 0:00:00.310505
+# 4180 packets recved, 0 lost (0 %) and 0 with same seq
+# max delay between packets 0:00:00.310505
+# SSRC 2008506802: 3422 packets
+# packet 257 delayed by 0:00:00.142737
+# packet 271 delayed by 0:00:00.160726
+# packet 491 delayed by 0:00:00.169627
+# packet 640 delayed by 0:00:00.182204
+# packet 1261 delayed by 0:00:00.121933
+# packet 1614 delayed by 0:00:00.200193
+# packet 1945 delayed by 0:00:00.168273
+# packet 2059 delayed by 0:00:00.127896
+# packet 2639 delayed by 0:00:00.169698
+# packet 2761 delayed by 0:00:00.132851
+# packet 2781 delayed by 0:00:00.160073
+# 3422 packets recved, 64 lost (1 %) and 0 with same seq
+# max delay between packets 0:00:00.200193
+#
+
+##############################
+# Interpreting these results:
+#
+# TL;dr: any packet delayed by more than 0:00:00.12 is problems, and packet loss of above 0.1% can also be problematic.
+# Both of these situations can cause SFT to lose track of the stream, and wait for the next keyframe.
+
+###### Requirements:
+# If you're not using nix and direnv in our wire-server-deploy directory, you'll need:
+# Python 3
+# pyshark
+# wireshark
+
+import datetime
+import pyshark
+import sys
+import time
+
+if len(sys.argv) < 3:
+    print('usage: {} <pcap file> <port>'.format(sys.argv[0]))
+
+if len(sys.argv) == 1:
+    exit (-1)
+
+fname = sys.argv[1]
+ss = dict()
+
+if len(sys.argv) == 2:
+    cap = pyshark.FileCapture (fname)
+
+    print('Finding source ports for you, be patient...')
+    for pkt in cap:
+        if 'udp' in pkt:
+            id = int(pkt.udp.srcport)
+            if id not in ss:
+                ss[id] = list()
+            ss[id].append(pkt.udp.dstport)
+    for id in ss:
+        print ('pcap contains {} packets with source port {}'.format(len(ss[id]), id))
+
+    exit (0)
+
+port = sys.argv[2]
+cap = pyshark.FileCapture(fname,
+                          display_filter='udp',
+                          decode_as={'udp.port=={}'.format(port):'rtp'})
+seqs = {}
+print('Capture file found. Generating summary..')
+
+for packet in cap:
+    if 'rtp' in packet:
+        r = packet.rtp
+        if r.get('p_type') == '100':
+            ssrc = int(r.ssrc, 16)
+            if ssrc not in seqs:
+                seqs[ssrc] = []
+            seqs[ssrc].append({'seq': int(r.seq),
+                               'ts': int(r.timestamp),
+                               'sts': packet.sniff_time})
+
+for ssrc in seqs:
+    print('SSRC {}: {} packets'.format(ssrc, len(seqs[ssrc])))
+    pid = sorted(seqs[ssrc], key=lambda x: x['ts'])
+    s = 0
+    lastts = None
+    maxts = datetime.timedelta(0)
+    limitts = datetime.timedelta(seconds=0.12)
+    lost = 0
+    recv = 0
+    rsnd = 0
+
+    for pkt in pid:
+        idx = pkt['seq']
+        ts = pkt['sts']
+
+        if lastts != None and ts - lastts > limitts:
+            print('packet {} delayed by {}'.format(idx, ts-lastts))
+
+        if lastts != None and ts - lastts > maxts:
+            maxts = ts - lastts
+
+        if s != 0 and idx >= s+1:
+            lost += idx - s - 1
+        elif s != 0 and idx == s:
+            rsnd += 1
+
+        lastts = ts
+        s = idx
+        recv += 1
+
+    print('{} packets recved, {} lost ({} %) and {} with same seq'.format(recv, lost, int(lost * 100 / recv), rsnd))
+    print('max delay between packets {}'.format(maxts))