diff --git a/Makefile b/Makefile index 5f1d631..845b6ea 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ default: all -CXXFLAGS=-O2 -std=c++11 -LIBS=-lz -lpcap +CXXFLAGS=-O2 -I/usr/local/include +LIBS=-lz -lpcap -lpcre PREFIX?=/usr/local INSTALL_BIN=$(PREFIX)/bin diff --git a/README.md b/README.md index 1247100..ab9c635 100644 --- a/README.md +++ b/README.md @@ -15,16 +15,16 @@ brew install httpflow ### Linux -* Install [zlib](http://www.zlib.net/), [libpcap](http://www.tcpdump.org/) +* Install [zlib](http://www.zlib.net/), [pcap](http://www.tcpdump.org/), [pcre](http://pcre.org/) ```bash ## On CentOS yum update -yum install libpcap-devel zlib-devel +yum install libpcap-devel zlib-devel pcre-devel ## On Ubuntu / Debian apt-get update -apt-get install libpcap-dev zlib1g-dev +apt-get install libpcap-dev zlib1g-dev libpcre3 libpcre3-dev ``` * Building httpflow @@ -78,6 +78,7 @@ Usage: httpflow [-i interface | -r pcap-file] [-f packet-filter] [-u url-filter] ``` * Use the regexp to filter request urls + ```bash > httpflow -u '(google.com|httpbin.org)/.*/get' ``` diff --git a/custom_parser.cpp b/custom_parser.cpp index ced2d3a..2191780 100644 --- a/custom_parser.cpp +++ b/custom_parser.cpp @@ -1,5 +1,3 @@ -#include - #include "custom_parser.h" #include "util.h" @@ -113,13 +111,16 @@ int custom_parser::on_message_complete(http_parser *parser) { return 0; } -bool custom_parser::filter_url(const std::regex *url_filter, const std::string &url) { - return !url_filter ? true : std::regex_search(url, *url_filter); +bool custom_parser::filter_url(const pcre *url_filter_re, const pcre_extra *url_filter_extra, const std::string &url) { + if (!url_filter_re) return true; + int ovector[30]; + int rc = pcre_exec(url_filter_re, url_filter_extra, url.c_str(), url.size(), 0, 0, ovector, 30); + return rc >= 0; } -void custom_parser::save_http_request(const std::regex *url_filter, const std::string &output_path, const std::string &join_addr) { +void custom_parser::save_http_request(const pcre *url_filter_re, const pcre_extra *url_filter_extra, const std::string &output_path, const std::string &join_addr) { std::string host_with_url = host + url; - if (!filter_url(url_filter, host_with_url)) { + if (!filter_url(url_filter_re, url_filter_extra, host_with_url)) { return; } std::cout << ANSI_COLOR_CYAN << request_address << " -> " << response_address << " " << host_with_url << ANSI_COLOR_RESET << std::endl; diff --git a/custom_parser.h b/custom_parser.h index afa9093..f77c63b 100644 --- a/custom_parser.h +++ b/custom_parser.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include "http_parser.h" class custom_parser { @@ -59,9 +59,9 @@ class custom_parser { void set_addr(const std::string &src_addr, const std::string &dst_addr); - bool filter_url(const std::regex *url_filter, const std::string &url); + bool filter_url(const pcre *url_filter_re, const pcre_extra *url_filter_extra, const std::string &url); - void save_http_request(const std::regex *url_filter, const std::string &output_path, const std::string &join_addr); + void save_http_request(const pcre *url_filter_re, const pcre_extra *url_filter_extra, const std::string &output_path, const std::string &join_addr); static int on_url(http_parser *parser, const char *at, size_t length); diff --git a/http_flow.cpp b/http_flow.cpp index 1276b0b..f07c093 100644 --- a/http_flow.cpp +++ b/http_flow.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -11,7 +12,6 @@ #include #include #include -#include #include "util.h" #include "custom_parser.h" #include "data_link.h" @@ -27,7 +27,8 @@ struct capture_config { char device[IFNAMSIZ]; std::string file_name; std::string filter; - std::regex* url_filter; + pcre* url_filter_re; + pcre_extra* url_filter_extra; int datalink_size; }; @@ -137,7 +138,7 @@ struct ether_header { u_short ether_type; }; -void process_packet(const std::regex *url_filter, const std::string &output_path, const u_char* data, size_t len) { +void process_packet(const pcre *url_filter_re, const pcre_extra *url_filter_extra, const std::string &output_path, const u_char* data, size_t len) { struct packet_info packet; bool ret = process_ipv4(&packet, data, len); @@ -194,7 +195,7 @@ void process_packet(const std::regex *url_filter, const std::string &output_path for (std::list::iterator it = parser_list.begin(); it != parser_list.end();) { if ((*it)->is_response_complete() || packet.is_fin) { - (*it)->save_http_request(url_filter, output_path, join_addr); + (*it)->save_http_request(url_filter_re, url_filter_extra, output_path, join_addr); delete (*it); it = iter->second.erase(it); } else { @@ -219,7 +220,7 @@ void pcap_callback(u_char *arg, const struct pcap_pkthdr *header, const u_char * content += conf->datalink_size; size_t len = header->caplen - conf->datalink_size; - return process_packet(conf->url_filter, conf->output_path, content, len); + return process_packet(conf->url_filter_re, conf->url_filter_extra, conf->output_path, content, len); } static const struct option longopts[] = { @@ -263,6 +264,8 @@ capture_config *default_config() { conf->snaplen = MAXIMUM_SNAPLEN; conf->device[0] = 0; conf->filter = "tcp"; + conf->url_filter_re = NULL; + conf->url_filter_extra = NULL; return conf; } @@ -283,14 +286,15 @@ int init_capture_config(int argc, char **argv, capture_config *conf, char *errbu conf->filter = optarg; break; case 'u': - try { - url_regex.assign(optarg); - conf->url_filter = new std::regex(url_regex); - } catch (const std::regex_error& e) { - std::cerr << "invalid regular expression (" << url_regex << "): " << e.what() << std::endl; + url_regex.assign(optarg); + const char *err; + int erroffset; + conf->url_filter_re = pcre_compile(url_regex.c_str(), 0, &err, &erroffset, NULL); + if (!conf->url_filter_re) { + std::cerr << "invalid regular expression at offset " << erroffset << ": " << err << std::endl; exit(1); } - + conf->url_filter_extra = pcre_study(conf->url_filter_re, 0, &err); break; case 'r': conf->file_name = optarg; @@ -343,7 +347,6 @@ int init_capture_config(int argc, char **argv, capture_config *conf, char *errbu } int main(int argc, char **argv) { - is_atty = isatty(fileno(stdout)); char errbuf[PCAP_ERRBUF_SIZE]; diff --git a/util.h b/util.h index 452ba12..4add275 100644 --- a/util.h +++ b/util.h @@ -3,6 +3,7 @@ #include #include +#include #include #include