-
Notifications
You must be signed in to change notification settings - Fork 1
/
p398dlp_read_audio_function.py
187 lines (135 loc) · 7.8 KB
/
p398dlp_read_audio_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
"""
################################################################################
This file is p398dlp_read_audio_function.py.
Read an audio binary file written by an Arduino Mega 2560 using the Arduino
program sound_record.io.
George Gollin, University of Illinois, September 16, 2018
The binary audio file format written by the Arduino for 10-bit ADC data follows.
An unsigned long is 4 bytes, while an unsigned short is 2 bytes. The following
is from AnalogBinLogger.h:
*** First block of file (512 bytes) ***
unsigned long adcFrequency; // ADC clock frequency
unsigned long cpuFrequency; // CPU clock frequency
unsigned long sampleInterval; // Sample interval in CPU cycles.
unsigned long recordEightBits; // Size of ADC values, nonzero for 8-bits.
unsigned long pinCount; // Number of analog pins in a sample.
unsigned long pinNumber[123]; // List of pin numbers in a sample.
*** Subsequent blocks (also 512 bytes each) ***
unsigned short count; // count of data values (should be 254)
unsigned short overrun; // count of overruns since last block (0, one hopes)
unsigned short data[254]; // ADC data, low byte comes before by high byte.
// For example, 511 (= 255 + 256) will have
// data[0] = 255 (= 0xFF) and data[1] = 1 (= 0x01)
// This is "little endian" format.
The program takes about 80 seconds on a Macbook Air to read and unpack 2,000,000
# buffers, or about 508 million ADC samples. This would correspond to about 4
# hours, 24 minutes of recording at 32 kHz.
################################################################################
"""
def read_audio(audio_filename, max_buffers_to_read):
############################## initialize stuff ################################
# get a structure-handling library.
import struct
# also the ubiquitous numpy
import numpy as np
# a library of stuff about US vs European text conventions
import locale
# open this binary file in read mode
audio_file = open(audio_filename, 'rb')
print("reading from file ", audio_filename)
# file's block size
file_block_size = 512
# maximum number of ADC words in the buffer
max_ADC_words_per_buffer = 254
# create arrays to hold the data buffer's ADC data.
# bytes...
ADC_data_array_bytes = np.empty(2 * max_ADC_words_per_buffer, dtype=int)
# two-byte words...
ADC_data_array = np.empty(max_ADC_words_per_buffer, dtype=int)
#create an array to hold all the data we read for the user
ADC_all_the_data_array = \
np.empty(max_buffers_to_read * max_ADC_words_per_buffer, dtype=int)
# initialize the number of ADC digitizations read so far
ADC_digitizations_read_so_far = 0
######################### read the file's header block #########################
# read the four-byte ADC clock frequency from the header.
ADC_clock_frequency_bytes = audio_file.read(4)
# unpack the bytes: "<" means "little endian" (low byte comes before high byte)
# and "I" means four bytes. Note that struct.unpack always returns an array,
# even if it is of unit length, so I need to put [0] at the end to get an
# integer from the unit-length array.
ADC_clock_frequency = struct.unpack("<I", ADC_clock_frequency_bytes)[0]
# inform the user...
print("ADC clock frequency (kHz) ", ADC_clock_frequency / 1000)
# read the four-byte CPU clock frequency from the header. Arduino Mega 2560
# has a 16 MHz clock.
CPU_clock_frequency_bytes = audio_file.read(4)
CPU_clock_frequency = struct.unpack("<I", CPU_clock_frequency_bytes)[0]
print("CPU clock frequency (kHz) ", CPU_clock_frequency / 1000)
# read the sample interval in CPU cycles. Should be about 16 MHz / 32 kHz = 500.
sample_interval_bytes = audio_file.read(4)
sample_interval = struct.unpack("<I", sample_interval_bytes)[0]
print("ADC sample interval in CPU cycles ", sample_interval)
# read (and ignore) the ADC data word size (2 bytes) and number of pins used (1).
audio_file.read(8)
#read the first pin number. (Should be 7!)
ADC_pin_bytes = audio_file.read(4)
ADC_pin = struct.unpack("<I", ADC_pin_bytes)[0]
print("Arduino ADC analog pin recorded ", ADC_pin)
# now read (and ignore) the remaining 488 bytes of the header.
audio_file.read(file_block_size - 24)
############################ read blocks of ADC data ###########################
for buffer_index in range(0, max_buffers_to_read):
########################### read an ADC data block #########################
# read the number of ADC data words in this block
ADC_words_this_block_bytes = audio_file.read(2)
# check that we got something that wasn't of zero length, which indicates
# the end of the file
if len(ADC_words_this_block_bytes) <= 0:
print("Hit end of file, so exit the ADC data reading loop")
break
# convert (little endian) bytes to an integer
ADC_words_this_block = struct.unpack("<H", ADC_words_this_block_bytes)[0]
# print, if we want to...
# print("ADC words this block: ", ADC_words_this_block)
# do the same for the data buffer overrun count (whatever that means!)
# data_overruns_bytes = audio_file.read(2)
# data_overruns = struct.unpack("<H", data_overruns_bytes)[0]
audio_file.read(2)
# now read from the file, loading the ADC data in the rest of this block
# into an array.
ADC_data_array_bytes = audio_file.read(2 * ADC_words_this_block)
# check that we got something that wasn't of zero length, which would
# indicate the end of the file
if len(ADC_data_array_bytes) <= 0:
print("Premature end of this data block, so look for the next block")
break
# unpack into a two-byte integer array. See //stackoverflow.com/questions/
# 45187101/converting-bytearray-to-short-int-in-python
ADC_data_array = \
struct.unpack('<'+'H'*(len(ADC_data_array_bytes)//2), ADC_data_array_bytes)
# inject this array into the big array ADC_all_the_data_array
i_first = int(ADC_digitizations_read_so_far)
i_last = i_first + len(ADC_data_array)
# np.copyto(destination, source). Note that index range does NOT include
# the upper end point.
np.copyto(ADC_all_the_data_array[i_first : i_last], ADC_data_array[:])
# increment the samples-read count
ADC_digitizations_read_so_far += len(ADC_data_array_bytes)/2
# print, if we want to...
# print("ADC_data_array\n", ADC_data_array)
######################## done with this data block #########################
########################### all done reading ADC data ##########################
# for improved readability, get versions of a few BIG numbers formatted to
# include commas separating thousands.
buffer_index_string = locale.format("%d", buffer_index, grouping=True)
ADC_digitizations_read_so_far_string = \
locale.format("%d", ADC_digitizations_read_so_far, grouping=True)
print("\nAll done. ADC data buffers read: ", buffer_index_string)
print("ADC digitizations read: ", ADC_digitizations_read_so_far_string)
print("this corresponds to ", ADC_digitizations_read_so_far / 32000. / 60., \
" minutes of recording")
# now close the audio file.
audio_file.close()
# return ADC_all_the_data_array[0 : int(ADC_digitizations_read_so_far - 1)]
return ADC_all_the_data_array[0 : int(ADC_digitizations_read_so_far)]