-
Notifications
You must be signed in to change notification settings - Fork 7
/
proxy.rb
204 lines (182 loc) · 6.08 KB
/
proxy.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
#!/usr/bin/env ruby
require 'socket'
# Global constants.
$DEFAULT_PORT = 80
$NUM_REQ_ARGS = 3
$MAX_CACHE_SIZE = 52428800
$MAX_OBJECT_SIZE = 1048576
$MAX_TIME = 3600
# Request class that holds information to a HTTP request.
class Request
attr_accessor :type, :host, :port, :filename, :version, :timestamp
def initialize(*args)
@type, @host, @port, @filename, @version, @timestamp = *args
end
end
# CacheLine class that allows us to cache page data.
class CacheLine
attr_accessor :url, :response, :timestamp
def initialize(*args)
@url, @response, @timestamp = *args
end
end
# Proxy method.
def crappy_proxy(port)
# Open a socket to the client - in this case, the browser.
server = TCPServer.open("localhost", port)
#puts "Server Started"
cache = Hash.new()
# Listen on the given port.
loop do
request = Array.new()
headers = Hash.new()
time = ""
url = ""
# Accept the client's connect.
socket = server.accept
#puts "Accepted connection from #{socket.peeraddr[3]}"
while line = socket.gets()
get_req = line.split()
# Parsing the first 2 lines of the request.
# Check if it is a valid GET request.
if (get_req[0].eql?("GET"))
url = get_req[1]
request.push(line)
# Check if there is a Host line.
elsif (get_req[0].eql?("Host:"))
request.push(line)
time = Time.new()
req = parse_request(request, time)
# If there is a new line, we have reached the end and we get the headers.
elsif (line.eql?("\r\n"))
header_str = parse_headers(headers)
break
# We keep adding the other headers into a hash map.
else
colon_index = line.index(':')
headers[line[0, colon_index + 1]] = line[colon_index + 1, line.length]
end
end
# If there is a cache hit, we retrieve from cache.
if (cache.has_key?(url))
#puts "We have a hit!"
cacheObject = cache[url]
socket.send(cacheObject.response, port.to_i())
# Else there is no cache hit.
else
response = openWebConn(req, header_str)
size = cacheSize(cache)
# We check if the page request exceeds 1MB. If it is, we cache the page.
if ((response.bytesize() <= $MAX_OBJECT_SIZE) &&
(size <= $MAX_CACHE_SIZE))
#puts "We have a miss!"
cache[url] = CacheLine.new(url, response, time)
socket.send(response, port.to_i())
# We check the cache size. If there is no more room for one more object,
# we delete the objects that have been in the cache for one hour or
# more.
if (size + $MAX_OBJECT_SIZE > $MAX_CACHE_SIZE)
deleteCachedObjects(cache)
end
# Otherwise we don't cache the page.
else
socket.send(response, port.to_i())
end
end
socket.close()
end
end
# Delete objects in the cache that are more than an hour old.
def deleteCacheObject(cache)
cache.each_pair do |k, v|
if (Time.parse(Time.new()) - Time.parse(v.timestamp) > $MAX_TIME)
cache.delete(k)
end
end
end
# Returns the total size of the cache.
def cacheSize(cache)
key = ""
value = ""
total_cache = ""
i = 0
cache.each_pair do |k, v|
total_cache = total_cache + k + v.url + v.response + v.timestamp.asctime
i += 1
end
total_cache.bytesize()
end
# Open a connection to the web server
def openWebConn(request, header_str)
req = "GET #{request.filename} #{request.version}\r\n\r\n"
socket = TCPSocket.open(request.host, request.port)
socket.print(req)
response = socket.read()
socket.close()
response
end
# Parses the headers of the request and makes modifications
def parse_headers(headers)
str = ""
headers.each_pair do |k, v|
if (k.casecmp("Keep-Alive:") == 0)
elsif (k.casecmp("Proxy-Connection:") == 0)
str += "Proxy-Connection: Connection: close\r\n"
else
str = str + k + v
end
end
str
end
# Parses first two lines of the GET request and creates a new Request objcet
def parse_request(request, time)
host = ""
port = ""
filename = ""
line_start = "http://"
host_str = "Host: "
# Check if request is a HTTP request
if (!request[0].include?(line_start))
abort("Malformed request line - only http requests are parsed.")
end
# Check if request conforms to METHOD URL HTTP_VERSION
req = request[0].split()
if (req.length == $NUM_REQ_ARGS)
# Check for 'GET' verb (kind of redundant since we checked it earlier in the
# daryl_proxy method).
if (!req[0].eql?"GET")
abort("Request is not a 'GET' request. Ignored.")
end
# Look for '/' that separates hostname and URI
temp = req[1]
uri = temp[line_start.length, temp.length()]
slash_index = uri.index('/')
if (slash_index.eql?(nil))
filename = "/"
else
len = uri.length()
filename = uri[slash_index, len]
end
# Look for ':' that separates hostname and port
uri_no_filename = uri[0, slash_index]
colon_index = uri.index(':')
if (colon_index.eql?(nil))
port = $DEFAULT_PORT
else
port = uri_no_filename[colon_index + 1, uri_no_filename.length()]
end
host = request[1]
host = host[host_str.length(), host.length()]
if (host.length.eql?(0))
abort( "Malformed request line - invalid host.")
end
end
request = Request.new("GET", host.chomp(), port, filename, "HTTP/1.0", time)
end
if __FILE__ == $0
if (ARGV.length() != 1)
abort("Usage: ruby proxy.rb <port>\n")
else
crappy_proxy(ARGV[0])
end
end