-
Notifications
You must be signed in to change notification settings - Fork 5
/
test.rb
executable file
·112 lines (94 loc) · 2.23 KB
/
test.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#!/usr/bin/env ruby
#
require 'thread'
# Convert text to an array to avoid UTF-8 complexity.
puts("Reading file.")
text = File.read('corpus.txt')
puts("done.")
def word_boundary?(arr, i)
case arr[i]
when "\r"
true
when "\n"
true
when "\t"
true
when " "
true
else
false
end
end
def time_this(&call)
start_time = Time.now
r = call.call()
stop_time = Time.now
puts("time #{(stop_time - start_time) * 1000}ms")
r
end
def find_shortest(text, start, stop)
i = start # The index of the word start we are considering.
loc = start # The location of the longest work for final reporting.
len = 1 # The length of the longest word.
while i < stop do
new_longest = true
# Check backwards. If found before len, rewind to that and step.
back_i = 0
while back_i < len do
if word_boundary?(text, i - back_i)
new_longest = false
break
end
back_i += 1
end
if new_longest
loc = i - len + 1
until word_boundary?(text, i)
i += 1
end
len = i - loc
else
i = i - back_i + len
end
end
[ len, text[loc...loc+len] ]
end
def find_shortest_threaded(text, thread_count)
# Split the array.
segment = (text.length / thread_count).ceil
i = 0
args = []
while i < text.length do
j = i + segment
j = text.index(" ", j) || text.length
args << [text, i, j]
i = j
end
args.map do |args|
Thread.new(args) do |a|
find_shortest(*a)
end
end.reduce([0, '']) do |m, t|
len, word = t.value
if m[0] > len
m
else
[ len, word ]
end
end
end
puts "Single thread."
len, word = time_this { find_shortest(text, 0, text.length) }
puts("longest #{len} \"#{word}\"")
len, word = time_this { find_shortest(text, 0, text.length) }
puts("longest #{len} \"#{word}\"")
puts "Multi-thread 8."
len, word = time_this { find_shortest_threaded(text, 8) }
puts("longest #{len} \"#{word}\"")
len, word = time_this { find_shortest_threaded(text, 8) }
puts("longest #{len} \"#{word}\"")
puts "Multi-thread 16."
len, word = time_this { find_shortest_threaded(text, 8) }
puts("longest #{len} \"#{word}\"")
len, word = time_this { find_shortest_threaded(text, 8) }
puts("longest #{len} \"#{word}\"")