forked from jminardi/audio_fingerprinting
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdetect_features.py
149 lines (122 loc) · 4.94 KB
/
detect_features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#! /usr/bin/env python
import os
import cv
import pyaudio
import numpy as np
from scipy.io import wavfile
from matplotlib import pyplot as plt
from skimage.feature import match_template
def detect_surf(image, equalize=False):
if equalize: cv.EqualizeHist(image, image)
keypoints, descriptors = cv.ExtractSURF(image, None, cv.CreateMemStorage(),
(0, 800, 4, 5))
for keypoint in keypoints:
x, y = int(keypoint[0][0]), int(keypoint[0][1])
cv.Circle(image, (x, y), 1, cv.RGB(0, 0, 255), 3, 8, 0)
return image
def detect_gftt(image, equalize=False, cornerCount=500, qualityLevel=0.005,
minDistance=30):
if equalize: cv.EqualizeHist(image, image)
eigImage = cv.CreateImage(cv.GetSize(image), cv.IPL_DEPTH_32F, 1)
tempImage = cv.CreateImage(cv.GetSize(image), cv.IPL_DEPTH_32F, 1)
cornerMem = cv.GoodFeaturesToTrack(image, eigImage, tempImage,
cornerCount, qualityLevel,
minDistance, None, 2,
False)
for point in cornerMem:
x, y = int(point[0]), int(point[1])
cv.Circle(image, (x, y), 1, cv.RGB(0, 255, 0), 3, 8, 0)
return image
def detect_harris(array):
image = as_ipl(array)
cornerMap = cv.CreateMat(image.height, image.width, cv.CV_32FC1)
cv.CornerHarris(image, cornerMap, 3)
signature = np.zeros((image.height, image.width)).astype('uint8')
for y in range(0, image.height):
for x in range(0, image.width):
harris = cv.Get2D(cornerMap, y, x)
if harris[0] > 10e-06:
cv.Circle(image,(x,y),1,cv.RGB(155, 0, 25), 3,8,0)
signature[y, x] = 255
corner_array = np.asarray(signature[:, :])
return corner_array
def as_ipl(source):
bitmap = cv.CreateImageHeader((source.shape[1], source.shape[0]),
cv.IPL_DEPTH_8U, 1)
cv.SetData(bitmap, source.tostring(), source.dtype.itemsize*source.shape[1])
return bitmap
def ipl_as_array(ipl):
return np.asarray(ipl[:, :])
def show_images(layout, images, show=True):
fig = plt.figure()
for i, image in enumerate(images):
ax = fig.add_subplot(layout*10 + (i+1))
ax.imshow(image)
if show: plt.show()
################################################################################
def spectrogram_from_file(f, channel=0, sample_size=512):
sampling_rate, audio_data = wavfile.read(f)
track = np.sum(audio_data, 1) if channel == 'sum' else audio_data[:, channel]
track = track.astype('float32')
downsampled = track.reshape((-1, 4)).T.mean(0)
normalized = downsampled / downsampled.max()
segmented = normalized.reshape((-1, sample_size))
spectrum = abs(np.fft.fft(segmented))[:, :sample_size / 2].T
return spectrum, sampling_rate
def spectrogram(data, segment_size=60):
end = len(data) - len(data) % segment_size
stacked = data[:end].reshape((-1, segment_size))
freq_space = np.fft.fft(stacked)
real = np.abs(freq_space)
# fft results are mirrored, this trims the excess
trimmed = real.T[:segment_size/2, :]
return trimmed
def downsample2d(a, factor):
e0 = a.shape[0] - a.shape[0] % factor
e1 = a.shape[1] - a.shape[1] % factor
shape = a.shape[0] / factor, a.shape[1] / factor
sh = shape[0], a.shape[0]//shape[0], shape[1], a.shape[1]//shape[1]
return a[:e0, :e1].reshape(sh).mean(-1).mean(1)
def process_wavfile(filename, store):
""" Open the given wavfile, downsample it, compute the
spectrogram, and downsample again. Store the result in
the given `store` keyed under the filename.
"""
name = filename.split('/')[-1].split('.')[0]
sampling_rate, audio = wavfile.read(filename)
downsampled = audio.reshape((-1, 16)).mean(1)
spec = spectrogram(downsampled, segment_size=512)
down_spec = downsample2d(spec, 2)
store[name] = down_spec
def acquire_audio(seconds=5):
""" Acquire audio for the given duration.
"""
rate = 11025
chunk = 1024
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16,
channels=1,
rate=rate,
input=True,
frames_per_buffer=chunk)
frames = []
for _ in range(int(rate / chunk * seconds)):
frames.append(stream.read(chunk))
stream.stop_stream()
stream.close()
p.terminate()
ary = np.fromstring(b''.join(frames), dtype=np.short)
ary = ary.reshape((-1, 2)).mean(1)
return ary
def process_acquired(ary):
""" Calculate the spectrogram and downsample the
given audio array.
"""
spec = spectrogram(ary, segment_size=512)
down_spec = downsample2d(spec, 2)
return down_spec
spectrum, rate = spectrogram_from_file('adv_time/ep1.wav')
template = spectrum[:, 5000:15000]
result = match_template(spectrum, template)
score = result.max()
print score