-
Notifications
You must be signed in to change notification settings - Fork 3
/
aukit.lua
3620 lines (3463 loc) · 162 KB
/
aukit.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
--- AUKit: aukit.Audio decoding and processing framework for ComputerCraft
--- @file
---
--- AUKit is a framework designed to simplify the process of loading, modifying,
--- and playing audio files in various formats. It includes support for loading
--- audio from many sources, including PCM, DFPWM, G.711, and ADPCM codecs, as
--- well as WAV, AIFF, AU, and FLAC files. It can also generate audio on-the-fly
--- as tones, noise, or silence.
---
--- AUKit uses a structure called aukit.Audio to store information about each audio
--- chunk. An audio object holds the sample rate of the audio, as well as the
--- data for each channel stored as floating-point numbers. aukit.Audio objects can
--- hold any number of channels at any sample rate with any duration.
---
--- To obtain an audio object, you can use any of the main functions in the aukit
--- module. These allow loading from various raw codecs or file formats, with
--- data sources as strings, or tables if using a raw codec loader.
---
--- Once the audio is loaded, various basic operations are available. A subset of
--- the string library is available to simplify operations on the audio, and a
--- number of operators (+, *, .., #) are overridden as well. There's also built-
--- in functions for resampling the audio, with nearest-neighbor, linear, cubic,
--- and sinc interpolation available; as well as mixing channels (including down to
--- mono) and combining/splitting channels. Finally, audio objects can be exported
--- back to PCM, DFPWM, or WAV data, allowing changes to be easily stored on disk.
--- The stream function also automatically chunks data for use with a speaker.
--- All of these functions return a new audio object, leaving the original intact.
---
--- There are also a number of effects available for audio. These are contained
--- in the aukit.effects table, and modify the audio passed to them (as well as
--- returning the audio for streamlining). The effects are intended to speed up
--- common operations on audio. More effects may be added in future versions.
---
--- For simple audio playback tasks, the aukit.stream table provides a number of
--- functions that can quickly decode audio for real-time playback. Each function
--- returns an iterator function that can be called multiple times to obtain fully
--- decoded chunks of audio in 8-bit PCM, ready for playback to one or more
--- speakers. The functions decode the data, resample it to 48 kHz (using the
--- default resampling method), apply a low-pass filter to decrease interpolation
--- error, mix to mono if desired, and then return a list of tables with samples
--- in the range [-128, 127], plus the current position of the audio. The
--- iterators can be passed directly to the aukit.play function, which complements
--- the aukit.stream suite by playing the decoded audio on speakers while decoding
--- it in real-time, handling synchronization of speakers as best as possible.
---
--- If you're really lazy, you can also call `aukit` as a function, which takes
--- the path to a file, and plays this on all available speakers.
---
--- Be aware that processing large amounts of audio (especially loading FLAC or
--- resampling with higher quality) is *very* slow. It's recommended to use audio
--- files with lower data size (8-bit mono PCM/WAV/AIFF is ideal), and potentially
--- a lower sample rate, to reduce the load on the system - especially as all
--- data gets converted to 8-bit DFPWM data on playback anyway. The code yields
--- internally when things take a long time to avoid abort timeouts.
---
--- For an example of how to use AUKit, see the accompanying auplay.lua file.
---
--- @author JackMacWindows
---
--- @copyright
---
--- MIT License
---
--- Copyright (c) 2021-2024 JackMacWindows
---
--- Permission is hereby granted, free of charge, to any person obtaining a copy
--- of this software and associated documentation files (the "Software"), to deal
--- in the Software without restriction, including without limitation the rights
--- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
--- copies of the Software, and to permit persons to whom the Software is
--- furnished to do so, subject to the following conditions:
---
--- The above copyright notice and this permission notice shall be included in all
--- copies or substantial portions of the Software.
---
--- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
--- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
--- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
--- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
--- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
--- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
--- SOFTWARE.
local expect = require "cc.expect"
local dfpwm = require "cc.audio.dfpwm"
local bit32_band, bit32_bxor, bit32_lshift, bit32_rshift, bit32_arshift, bit32_btest, bit32_extract = bit32.band, bit32.bxor, bit32.lshift, bit32.rshift, bit32.arshift, bit32.btest, bit32.extract
local math_floor, math_ceil, math_sin, math_abs, math_fmod, math_min, math_max, math_pi = math.floor, math.ceil, math.sin, math.abs, math.fmod, math.min, math.max, math.pi
local os_epoch, os_queueEvent, os_pullEvent = os.epoch, os.queueEvent, os.pullEvent
local str_pack, str_unpack, str_sub, str_byte, str_rep = string.pack, string.unpack, string.sub, string.byte, string.rep
local table_pack, table_unpack, table_insert, table_remove = table.pack, table.unpack, table.insert, table.remove
--- The main AUKit module.
---@class aukit
---@field _VERSION string The version of AUKit that is loaded. This follows [SemVer](https://semver.org) format.
---@field defaultInterpolation "none"|"linear"|"cubic"|"sinc" Default interpolation mode for `Audio.resample` and other functions that need to resample.
local aukit = setmetatable({
_VERSION = "1.10.0",
defaultInterpolation = "linear"
}, {__call = function(aukit, path)
expect(1, path, "string")
local file = assert(fs.open(path, "rb"))
local type = aukit.detect(file.read(64)) or "dfpwm"
file.seek("set", 0)
aukit.play(aukit.stream[type](function() return file.read(48000) end), peripheral.find("speaker"))
file.close()
end})
--- Effects that can modify audio chunks.
---@class aukit.effects
aukit.effects = {}
--- Loader functions for streaming audio from a remote resource. These are usually used with `aukit.play`.
---@class aukit.stream
aukit.stream = {}
--- The aukit.Audio class represents a chunk of audio with variable channels and sample rate.
---@class aukit.Audio
---@field data number[][] The samples in each channel.
---@field sampleRate number The sample rate of the audio.
---@field metadata table Stores any metadata read from the file if present.
---@field info Metadata Stores any decoder-specific information, including `bitDepth` and `dataType`.
local Audio = {}
local Audio_mt
local dfpwmUUID = "3ac1fa38-811d-4361-a40d-ce53ca607cd1" -- UUID for DFPWM in WAV files
local function uuidBytes(uuid) return uuid:gsub("-", ""):gsub("%x%x", function(c) return string.char(tonumber(c, 16)) end) end
local sincWindowSize = jit and 30 or 10
local wavExtensible = {
dfpwm = uuidBytes(dfpwmUUID),
pcm = uuidBytes "01000000-0000-1000-8000-00aa00389b71",
msadpcm = uuidBytes "02000000-0000-1000-8000-00aa00389b71",
alaw = uuidBytes "06000000-0000-1000-8000-00aa00389b71",
ulaw = uuidBytes "07000000-0000-1000-8000-00aa00389b71",
adpcm = uuidBytes "11000000-0000-1000-8000-00aa00389b71",
pcm_float = uuidBytes "03000000-0000-1000-8000-00aa00389b71"
}
local wavExtensibleChannels = {
0x04,
0x03,
0x07,
0x33,
0x37,
0x3F,
0x637,
0x63F,
0x50F7,
0x50FF,
0x56F7,
0x56FF
}
local ima_index_table = {
[0] = -1, -1, -1, -1, 2, 4, 6, 8,
-1, -1, -1, -1, 2, 4, 6, 8
}
local ima_step_table = {
[0] = 7, 8, 9, 10, 11, 12, 13, 14, 16, 17,
19, 21, 23, 25, 28, 31, 34, 37, 41, 45,
50, 55, 60, 66, 73, 80, 88, 97, 107, 118,
130, 143, 157, 173, 190, 209, 230, 253, 279, 307,
337, 371, 408, 449, 494, 544, 598, 658, 724, 796,
876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066,
2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358,
5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899,
15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
}
local msadpcm_adaption_table = {
[0] = 230, 230, 230, 230, 307, 409, 512, 614,
[-8] = 768, [-7] = 614, [-6] = 512, [-5] = 409, [-4] = 307, [-3] = 230, [-2] = 230, [-1] = 230
}
local flacMetadata = {
tracknumber = "trackNumber",
["encoded-by"] = "encodedBy",
sourcemedia = "sourceMedia",
labelno = "labelNumber",
discnumber = "discNumber",
partnumber = "partNumber",
productnumber = "productNumber",
catalognumber = "catalogNumber",
["release date"] = "releaseDate",
["source medium"] = "sourceMedium",
["source artist"] = "sourceArtist",
["guest artist"] = "guestArtist",
["source work"] = "sourceWork",
disctotal = "discCount",
tracktotal = "trackCount",
parttotal = "partCount",
tcm = "composer"
}
local wavMetadata = {
IPRD = "album",
INAM = "title",
IART = "artist",
IWRI = "author",
IMUS = "composer",
IPRO = "producer",
IPRT = "trackNumber",
ITRK = "trackNumber",
IFRM = "trackCount",
PRT1 = "partNumber",
PRT2 = "partCount",
TLEN = "length",
IRTD = "rating",
ICRD = "date",
ITCH = "encodedBy",
ISFT = "encoder",
ISRF = "media",
IGNR = "genre",
ICMT = "comment",
ICOP = "copyright",
ILNG = "language"
}
local function utf8decode(str, pos)
local codes = {utf8.codepoint(str, 1, -1)}
for i, v in ipairs(codes) do if v > 0xFF then codes[i] = 0x3F end end
return string.char(table_unpack(codes)), pos
end
local function clamp(n, min, max)
if n < min then return min
elseif n > max then return max
else return n end
end
local function expectAudio(n, var)
if type(var) == "table" and getmetatable(var) == Audio_mt then return var end
expect(n, var, "Audio") -- always fails
end
local function copy(tab)
local t = {}
for k, v in pairs(tab) do t[k] = v end
return t
end
local function intunpack(str, pos, sz, signed, be)
local n = 0
if be then for i = 0, sz - 1 do n = n * 256 + str_byte(str, pos+i) end
else for i = 0, sz - 1 do n = n + str_byte(str, pos+i) * 2^(8*i) end end
if signed and n >= 2^(sz*8-1) then n = n - 2^(sz*8) end
return n, pos + sz
end
local interpolate = {
none = function(data, x)
return data[math_floor(x)]
end,
linear = function(data, x)
local ffx = math_floor(x)
return data[ffx] + ((data[ffx+1] or data[ffx]) - data[ffx]) * (x - ffx)
end,
cubic = function(data, x)
local ffx = math_floor(x)
local p0, p1, p2, p3, fx = data[ffx-1], data[ffx], data[ffx+1], data[ffx+2], x - ffx
p0, p2, p3 = p0 or p1, p2 or p1, p3 or p2 or p1
return (-0.5*p0 + 1.5*p1 - 1.5*p2 + 0.5*p3)*fx^3 + (p0 - 2.5*p1 + 2*p2 - 0.5*p3)*fx^2 + (-0.5*p0 + 0.5*p2)*fx + p1
end,
sinc = function(data, x)
local ffx = math_floor(x)
local fx = x - ffx
local sum = 0
for n = -sincWindowSize, sincWindowSize do
local idx = ffx+n
local d = data[idx]
if d then
local px = math_pi * (fx - n)
if px == 0 then sum = sum + d
else sum = sum + d * math_sin(px) / px end
end
end
return sum
end
}
local interpolation_start = {none = 1, linear = 1, cubic = 0, sinc = 0}
local interpolation_end = {none = 1, linear = 2, cubic = 3, sinc = 0}
local wavegen = {
sine = function(x, freq, amplitude)
return math_sin(2 * x * math_pi * freq) * amplitude
end,
triangle = function(x, freq, amplitude)
return 2.0 * math_abs(amplitude * math_fmod(2.0 * x * freq + 1.5, 2.0) - amplitude) - amplitude
end,
square = function(x, freq, amplitude, duty)
if (x * freq) % 1 >= duty then return -amplitude else return amplitude end
end,
sawtooth = function(x, freq, amplitude)
return amplitude * math_fmod(2.0 * x * freq + 1.0, 2.0) - amplitude
end
}
--[[
.########.##..........###.....######.
.##.......##.........##.##...##....##
.##.......##........##...##..##......
.######...##.......##.....##.##......
.##.......##.......#########.##......
.##.......##.......##.....##.##....##
.##.......########.##.....##..######.
]]
local decodeFLAC do
-- Simple FLAC decoder (Java)
--
-- Copyright (c) 2017 Project Nayuki. (MIT License)
-- https://www.nayuki.io/page/simple-flac-implementation
--
-- Permission is hereby granted, free of charge, to any person obtaining a copy of
-- this software and associated documentation files (the "Software"), to deal in
-- the Software without restriction, including without limitation the rights to
-- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
-- the Software, and to permit persons to whom the Software is furnished to do so,
-- subject to the following conditions:
-- - The above copyright notice and this permission notice shall be included in
-- all copies or substantial portions of the Software.
-- - The Software is provided "as is", without warranty of any kind, express or
-- implied, including but not limited to the warranties of merchantability,
-- fitness for a particular purpose and noninfringement. In no event shall the
-- authors or copyright holders be liable for any claim, damages or other
-- liability, whether in an action of contract, tort or otherwise, arising from,
-- out of or in connection with the Software or the use or other dealings in the
-- Software.
local FIXED_PREDICTION_COEFFICIENTS = {
{},
{1},
{2, -1},
{3, -3, 1},
{4, -6, 4, -1},
};
local function BitInputStream(data, pos)
local obj = {}
local bitBuffer, bitBufferLen = 0, 0
function obj.alignToByte()
bitBufferLen = bitBufferLen - bitBufferLen % 8
end
function obj.readByte()
return obj.readUint(8)
end
function obj.readUint(n)
if n == 0 then return 0 end
while bitBufferLen < n do
local temp = str_byte(data, pos)
pos = pos + 1
if temp == nil then return nil end
bitBuffer = (bitBuffer * 256 + temp) % 0x100000000000
bitBufferLen = bitBufferLen + 8
end
bitBufferLen = bitBufferLen - n
local result = math_floor(bitBuffer / 2^bitBufferLen)
if n < 32 then result = result % 2^n end
return result
end
function obj.readSignedInt(n)
local v = obj.readUint(n)
if v >= 2^(n-1) then v = v - 2^n end
return v
end
function obj.readRiceSignedInt(param)
local val = 0
while (obj.readUint(1) == 0) do val = val + 1 end
val = val * 2^param + obj.readUint(param)
if bit32_btest(val, 1) then return -math_floor(val / 2) - 1
else return math_floor(val / 2) end
end
return obj
end
local function decodeResiduals(inp, warmup, blockSize, result)
local method = inp.readUint(2);
if (method >= 2) then error("Reserved residual coding method " .. method) end
local paramBits = method == 0 and 4 or 5;
local escapeParam = method == 0 and 0xF or 0x1F;
local partitionOrder = inp.readUint(4);
local numPartitions = 2^partitionOrder;
if (blockSize % numPartitions ~= 0) then
error("Block size not divisible by number of Rice partitions")
end
local partitionSize = math_floor(blockSize / numPartitions);
for i = 0, numPartitions-1 do
local start = i * partitionSize + (i == 0 and warmup or 0);
local endd = (i + 1) * partitionSize;
local param = inp.readUint(paramBits);
if (param < escapeParam) then
for j = start, endd - 1 do
result[j+1] = inp.readRiceSignedInt(param)
end
else
local numBits = inp.readUint(5);
for j = start, endd - 1 do
result[j+1] = inp.readSignedInt(numBits)
end
end
end
end
local function restoreLinearPrediction(result, coefs, shift, blockSize)
for i = #coefs, blockSize - 1 do
local sum = 0
for j = 0, #coefs - 1 do
sum = sum + result[i - j] * coefs[j + 1]
end
result[i + 1] = result[i + 1] + math_floor(sum / 2^shift)
end
end
local function decodeFixedPredictionSubframe(inp, predOrder, sampleDepth, blockSize, result)
for i = 1, predOrder do
result[i] = inp.readSignedInt(sampleDepth);
end
decodeResiduals(inp, predOrder, blockSize, result);
restoreLinearPrediction(result, FIXED_PREDICTION_COEFFICIENTS[predOrder+1], 0, blockSize);
end
local function decodeLinearPredictiveCodingSubframe(inp, lpcOrder, sampleDepth, blockSize, result)
for i = 1, lpcOrder do
result[i] = inp.readSignedInt(sampleDepth);
end
local precision = inp.readUint(4) + 1;
local shift = inp.readSignedInt(5);
local coefs = {};
for i = 1, lpcOrder do
coefs[i] = inp.readSignedInt(precision);
end
decodeResiduals(inp, lpcOrder, blockSize, result);
restoreLinearPrediction(result, coefs, shift, blockSize);
end
local function decodeSubframe(inp, sampleDepth, blockSize, result)
inp.readUint(1);
local type = inp.readUint(6);
local shift = inp.readUint(1);
if (shift == 1) then
while (inp.readUint(1) == 0) do shift = shift + 1 end
end
sampleDepth = sampleDepth - shift
if (type == 0) then -- Constant coding
local c = inp.readSignedInt(sampleDepth)
for i = 1, blockSize do result[i] = c end
elseif (type == 1) then -- Verbatim coding
for i = 1, blockSize do
result[i] = inp.readSignedInt(sampleDepth);
end
elseif (8 <= type and type <= 12) then
decodeFixedPredictionSubframe(inp, type - 8, sampleDepth, blockSize, result)
elseif (32 <= type and type <= 63) then
decodeLinearPredictiveCodingSubframe(inp, type - 31, sampleDepth, blockSize, result)
else
error("Reserved subframe type")
end
for i = 1, blockSize do
result[i] = result[i] * 2^shift
end
end
local function decodeSubframes(inp, sampleDepth, chanAsgn, blockSize, result)
local subframes = {}
for i = 1, #result do subframes[i] = {} end
if (0 <= chanAsgn and chanAsgn <= 7) then
for ch = 1, #result do
decodeSubframe(inp, sampleDepth, blockSize, subframes[ch])
end
elseif (8 <= chanAsgn and chanAsgn <= 10) then
decodeSubframe(inp, sampleDepth + (chanAsgn == 9 and 1 or 0), blockSize, subframes[1])
decodeSubframe(inp, sampleDepth + (chanAsgn == 9 and 0 or 1), blockSize, subframes[2])
if (chanAsgn == 8) then
for i = 1, blockSize do
subframes[2][i] = subframes[1][i] - subframes[2][i]
end
elseif (chanAsgn == 9) then
for i = 1, blockSize do
subframes[1][i] = subframes[1][i] + subframes[2][i]
end
elseif (chanAsgn == 10) then
for i = 1, blockSize do
local side = subframes[2][i]
local right = subframes[1][i] - math_floor(side / 2)
subframes[2][i] = right
subframes[1][i] = right + side
end
end
else
error("Reserved channel assignment");
end
for ch = 1, #result do
for i = 1, blockSize do
local s = subframes[ch][i]
if s >= 2^(sampleDepth-1) then s = s - 2^sampleDepth end
result[ch][i] = s / 2^sampleDepth
end
end
end
local function decodeFrame(inp, numChannels, sampleDepth, out2, callback)
local out = {}
for i = 1, numChannels do out[i] = {} end
-- Read a ton of header fields, and ignore most of them
local temp = inp.readByte()
if temp == nil then
return false
end
local sync = temp * 64 + inp.readUint(6);
if sync ~= 0x3FFE then error("Sync code expected") end
inp.readUint(2);
local blockSizeCode = inp.readUint(4);
local sampleRateCode = inp.readUint(4);
local chanAsgn = inp.readUint(4);
inp.readUint(4);
temp = inp.readUint(8);
local t2 = -1
for i = 7, 0, -1 do if not bit32_btest(temp, 2^i) then break end t2 = t2 + 1 end
for i = 1, t2 do inp.readUint(8) end
local blockSize
if (blockSizeCode == 1) then
blockSize = 192
elseif (2 <= blockSizeCode and blockSizeCode <= 5) then
blockSize = 576 * 2^(blockSizeCode - 2)
elseif (blockSizeCode == 6) then
blockSize = inp.readUint(8) + 1
elseif (blockSizeCode == 7) then
blockSize = inp.readUint(16) + 1
elseif (8 <= blockSizeCode and blockSizeCode <= 15) then
blockSize = 256 * 2^(blockSizeCode - 8)
else
error("Reserved block size")
end
if (sampleRateCode == 12) then
inp.readUint(8)
elseif (sampleRateCode == 13 or sampleRateCode == 14) then
inp.readUint(16)
end
inp.readUint(8)
decodeSubframes(inp, sampleDepth, chanAsgn, blockSize, out)
inp.alignToByte()
inp.readUint(16)
if callback then callback(out) else
for c = 1, numChannels do
local n = #out2[c]
for i = 1, blockSize do out2[c][n+i] = out[c][i] end
end
end
return true
end
function decodeFLAC(inp, callback, head)
local out = {}
local pos = 1
-- Handle FLAC header and metadata blocks
local temp temp, pos = intunpack(inp, pos, 4, false, true)
if temp ~= 0x664C6143 then error("Invalid magic string") end
local sampleRate, numChannels, sampleDepth, numSamples
local last = false
local meta = {}
while not last do
temp, pos = str_byte(inp, pos), pos + 1
last = bit32_btest(temp, 0x80)
local type = bit32_band(temp, 0x7F);
local length length, pos = intunpack(inp, pos, 3, false, true)
if type == 0 then -- Stream info block
pos = pos + 10
sampleRate, pos = intunpack(inp, pos, 2, false, true)
sampleRate = sampleRate * 16 + bit32_rshift(str_byte(inp, pos), 4)
numChannels = bit32_band(bit32_rshift(str_byte(inp, pos), 1), 7) + 1;
sampleDepth = bit32_band(str_byte(inp, pos), 1) * 16 + bit32_rshift(str_byte(inp, pos+1), 4) + 1;
numSamples, pos = intunpack(inp, pos + 2, 4, false, true)
numSamples = numSamples + bit32_band(str_byte(inp, pos-5), 15) * 2^32
pos = pos + 16
elseif type == 4 then
local ncomments
meta.vendor, ncomments, pos = str_unpack("<s4I4", inp, pos)
for i = 1, ncomments do
local str
str, pos = utf8decode(str_unpack("<s4", inp, pos))
local k, v = str:match "^([^=]+)=(.*)$"
if k then meta[flacMetadata[k:lower()] or k:lower()] = v end
end
else
pos = pos + length
end
end
if not sampleRate then error("Stream info metadata block absent") end
if sampleDepth % 8 ~= 0 then error("Sample depth not supported") end
for i = 1, numChannels do out[i] = {} end
if callback then callback(sampleRate, numSamples) end
-- Decode FLAC audio frames and write raw samples
if head then return {sampleRate = sampleRate, data = out, metadata = meta, info = {bitDepth = sampleDepth, dataType = "signed"}} end
inp = BitInputStream(inp, pos)
repeat until not decodeFrame(inp, numChannels, sampleDepth, out, callback)
if not callback then return {sampleRate = sampleRate, data = out, metadata = meta, info = {bitDepth = sampleDepth, dataType = "signed"}} end
end
end
--[[
....###....##.....##.########..####..#######.
...##.##...##.....##.##.....##..##..##.....##
..##...##..##.....##.##.....##..##..##.....##
.##.....##.##.....##.##.....##..##..##.....##
.#########.##.....##.##.....##..##..##.....##
.##.....##.##.....##.##.....##..##..##.....##
.##.....##..#######..########..####..#######.
]]
--- Audio
---@section Audio
---@alias Metadata {bitDepth: number|nil, dataType: string|nil}
--- Returns the length of the audio object in seconds.
---@return number _ The audio length
function Audio:len()
return #self.data[1] / self.sampleRate
end
--- Returns the number of channels in the audio object.
---@return number _ The number of channels
function Audio:channels()
return #self.data
end
--- Creates a new audio object with the data resampled to a different sample rate.
--- If the target rate is the same, the object is copied without modification.
---@param sampleRate number The new sample rate in Hertz
---@param interpolation? "none"|"linear"|"cubic" The interpolation mode to use
---@return aukit.Audio _ A new audio object with the resampled data
function Audio:resample(sampleRate, interpolation)
expect(1, sampleRate, "number")
interpolation = expect(2, interpolation, "string", "nil") or aukit.defaultInterpolation
if not interpolate[interpolation] then error("bad argument #2 (invalid interpolation type)", 2) end
local new = setmetatable({sampleRate = sampleRate, data = {}, metadata = copy(self.metadata), info = copy(self.info)}, Audio_mt)
local ratio = sampleRate / self.sampleRate
local newlen = #self.data[1] * ratio
local interp = interpolate[interpolation]
local start = os_epoch "utc"
for y, c in ipairs(self.data) do
local line = {}
for i = 1, newlen do
if os_epoch "utc" - start > 3000 then start = os_epoch "utc" sleep(0) end
local x = (i - 1) / ratio + 1
if x % 1 == 0 then line[i] = c[x]
else line[i] = clamp(interp(c, x), -1, 1) end
end
new.data[y] = line
end
return new
end
--- Mixes down all channels to a new mono-channel audio object.
---@return aukit.Audio _ A new audio object with the audio mixed to mono
function Audio:mono()
local new = setmetatable({sampleRate = self.sampleRate, data = {{}}, metadata = copy(self.metadata), info = copy(self.info)}, Audio_mt)
local ndata = new.data[1]
local cn = #self.data
local start = os_epoch "utc"
for i = 1, #self.data[1] do
if os_epoch "utc" - start > 3000 then start = os_epoch "utc" sleep(0) end
local s = 0
for c = 1, cn do s = s + self.data[c][i] end
ndata[i] = s / cn
end
return new
end
--- Concatenates this audio object with another, adding the contents of each
--- new channel to the end of each old channel, resampling the new channels to match
--- this one (if necessary), and inserting silence in any missing channels.
---@param ... aukit.Audio The audio objects to concatenate
---@return aukit.Audio _ The new concatenated audio object
function Audio:concat(...)
local audios = {self, ...}
local l = {#self.data[1]}
local cn = #self.data
for i = 2, #audios do
expectAudio(i-1, audios[i])
if audios[i].sampleRate ~= self.sampleRate then audios[i] = audios[i]:resample(self.sampleRate) end
l[i] = #audios[i].data[1]
cn = math_max(cn, #audios[i].data)
end
local obj = setmetatable({sampleRate = self.sampleRate, data = {}, metadata = copy(self.metadata), info = copy(self.info)}, Audio_mt)
for c = 1, cn do
local ch = {}
local pos = 0
for a = 1, #audios do
local sch = audios[a].data[c]
if sch then for i = 1, l[a] do ch[pos+i] = sch[i] end
else for i = 1, l[a] do ch[pos+i] = 0 end end
pos = pos + l[a]
end
obj.data[c] = ch
end
return obj
end
--- Takes a subregion of the audio and returns a new audio object with its contents.
--- This takes the same arguments as `string.sub`, but positions start at 0.
---@param start? number The start position of the audio in seconds
---@param last? number The end position of the audio in seconds (0 means end of file)
---@return aukit.Audio _ The new split audio object
function Audio:sub(start, last)
start = math_floor(expect(1, start, "number", "nil") or 0)
last = math_floor(expect(2, last, "number", "nil") or 0)
local len = #self.data[1] / self.sampleRate
if start < 0 then start = len + start end
if last <= 0 then last = len + last end
expect.range(start, 0, len)
expect.range(last, 0, len)
start, last = start * self.sampleRate + 1, last * self.sampleRate + 1
local obj = setmetatable({sampleRate = self.sampleRate, data = {}, metadata = copy(self.metadata), info = copy(self.info)}, Audio_mt)
for c = 1, #self.data do
local ch = {}
local sch = self.data[c]
for i = start, last do ch[i-start+1] = sch[i] end
obj.data[c] = ch
end
return obj
end
--- Combines the channels of this audio object with another, adding the new
--- channels on the end of the new object, resampling the new channels to match
--- this one (if necessary), and extending any channels that are shorter than the
--- longest channel with zeroes.
---@param ... aukit.Audio The audio objects to combine with
---@return aukit.Audio _ The new combined audio object
function Audio:combine(...)
local audios = {self, ...}
local len = #self.data[1]
for i = 2, #audios do
expectAudio(i-1, audios[i])
if audios[i].sampleRate ~= self.sampleRate then audios[i] = audios[i]:resample(self.sampleRate) end
len = math_max(len, #audios[i].data[1])
end
local obj = setmetatable({sampleRate = self.sampleRate, data = {}, metadata = copy(self.metadata), info = copy(self.info)}, Audio_mt)
local pos = 0
for a = 1, #audios do
for c = 1, #audios[a].data do
local sch, ch = audios[a].data[c], {}
for i = 1, len do ch[i] = sch[i] or 0 end
obj.data[pos+c] = ch
end
pos = pos + #audios[a].data
end
return obj
end
--- Splits this audio object into one or more objects with the specified channels.
--- Passing a channel that doesn't exist will throw an error.
---@param ... number[] The lists of channels in each new object
---@return aukit.Audio ... The new audio objects created from the channels in each list
---@usage Split a stereo track into independent mono objects
--
--- local left, right = stereo:split({1}, {2})
function Audio:split(...)
local retval = {}
for n, cl in ipairs{...} do
expect(n, cl, "table")
if #cl == 0 then error("bad argument #" .. n .. " (cannot use empty table)") end
local obj = setmetatable({sampleRate = self.sampleRate, data = {}, metadata = copy(self.metadata), info = copy(self.info)}, Audio_mt)
for cd, cs in ipairs(cl) do
local sch, ch = self.data[expect(cd, cs, "number")], {}
if not sch then error("channel " .. cs .. " (in argument " .. n .. ") out of range", 2) end
for i = 1, #sch do ch[i] = sch[i] end
obj.data[cd] = ch
end
retval[#retval+1] = obj
end
return table_unpack(retval)
end
--- Mixes two or more audio objects into a single object, amplifying each sample
--- with a multiplier (before clipping) if desired, and clipping any values
--- outside the audio range ([-1, 1]). Channels that are shorter are padded with
--- zeroes at the end, and non-existent channels are replaced with all zeroes.
--- Any audio objects with a different sample rate are resampled to match this one.
---@param amplifier number|Audio The multiplier to apply, or the first audio object
---@param ... aukit.Audio The objects to mix with this one
---@return aukit.Audio _ The new mixed audio object
function Audio:mix(amplifier, ...)
local audios = {self, ...}
local len = #self.data[1]
local cn = #self.data
for i = 2, #audios do
expectAudio(i, audios[i])
if audios[i].sampleRate ~= self.sampleRate then audios[i] = audios[i]:resample(self.sampleRate) end
len = math_max(len, #audios[i].data[1])
cn = math_max(cn, #audios[i].data)
end
if type(amplifier) ~= "number" then
expectAudio(1, amplifier)
if amplifier.sampleRate ~= self.sampleRate then amplifier = amplifier:resample(self.sampleRate) end
len = math_max(len, #amplifier.data[1])
cn = math_max(cn, #amplifier.data)
table_insert(audios, 2, amplifier)
amplifier = 1
end
local obj = setmetatable({sampleRate = self.sampleRate, data = {}, metadata = copy(self.metadata), info = copy(self.info)}, Audio_mt)
for c = 1, cn do
local ch = {}
local sch = {}
for a = 1, #audios do sch[a] = audios[a].data[c] end
for i = 1, len do
local s = 0
for a = 1, #audios do if sch[a] then s = s + (sch[a][i] or 0) end end
ch[i] = clamp(s * amplifier, -1, 1)
end
obj.data[c] = ch
end
return obj
end
--- Returns a new audio object that repeats this audio a number of times.
---@param count number The number of times to play the audio
---@return aukit.Audio _ The repeated audio
function Audio:rep(count)
if type(self) ~= "table" and type(count) == "table" then self, count = count, self end
expect(1, count, "number")
local obj = setmetatable({sampleRate = self.sampleRate, data = {}, metadata = copy(self.metadata), info = copy(self.info)}, Audio_mt)
for c = 1, #self.data do
local sch, ch = self.data[c], {}
for n = 0, count - 1 do
local pos = n * #sch
for i = 1, #sch do ch[pos+i] = sch[i] end
end
obj.data[c] = ch
end
return obj
end
--- Returns a reversed version of this audio.
---@return aukit.Audio _ The reversed audio
function Audio:reverse()
local obj = setmetatable({sampleRate = self.sampleRate, data = {}, metadata = copy(self.metadata), info = copy(self.info)}, Audio_mt)
for c = 1, #self.data do
local sch, ch = self.data[c], {}
local len = #sch
for i = 1, len do ch[len-i+1] = sch[i] end
obj.data[c] = ch
end
return obj
end
local function encodePCM(info, pos)
local maxValue = 2^(info.bitDepth-1)
local add = info.dataType == "unsigned" and maxValue or 0
local source = info.audio.data
local encode
if info.dataType == "float" then encode = function(d) return d end
else encode = function(d) return d * (d < 0 and maxValue or maxValue-1) + add end end
local data = {}
local nc = #source
local len = #source[1]
if pos > len then return nil end
local start = os_epoch "utc"
if info.interleaved then for n = pos, pos + info.len - 1 do if os_epoch "utc" - start > 3000 then start = os_epoch "utc" sleep(0) end for c = 1, nc do data[(n-1)*nc+c] = encode(source[c][n]) end end
elseif info.multiple then
for c = 1, nc do
data[c] = {}
for n = pos, pos + info.len - 1 do
if os_epoch "utc" - start > 3000 then start = os_epoch "utc" sleep(0) end
local s = source[c][n]
if not s then break end
data[c][n-pos+1] = encode(s)
end
end
return pos + info.len, table_unpack(data)
else for c = 1, nc do for n = pos, pos + info.len - 1 do if os_epoch "utc" - start > 3000 then start = os_epoch "utc" sleep(0) end data[(c-1)*len+n] = encode(source[c][n]) end end end
return data
end
--- Converts the audio data to raw PCM samples.
---@param bitDepth? number The bit depth of the audio (8, 16, 24, 32)
---@param dataType? "signed"|"unsigned"|"float" The type of each sample
---@param interleaved? boolean Whether to interleave each channel
---@return number[]|nil ... The resulting audio data
function Audio:pcm(bitDepth, dataType, interleaved)
bitDepth = expect(1, bitDepth, "number", "nil") or 8
dataType = expect(2, dataType, "string", "nil") or "signed"
expect(3, interleaved, "boolean", "nil")
if interleaved == nil then interleaved = true end
if bitDepth ~= 8 and bitDepth ~= 16 and bitDepth ~= 24 and bitDepth ~= 32 then error("bad argument #2 (invalid bit depth)", 2) end
if dataType ~= "signed" and dataType ~= "unsigned" and dataType ~= "float" then error("bad argument #3 (invalid data type)", 2) end
if dataType == "float" and bitDepth ~= 32 then error("bad argument #2 (float audio must have 32-bit depth)", 2) end
return encodePCM({audio = self, bitDepth = bitDepth, dataType = dataType, interleaved = interleaved, len = #self.data[1]}, 1)
end
--- Returns a function that can be called to encode PCM samples in chunks.
--- This is useful as a for iterator, and can be used with `aukit.play`.
---@param chunkSize? number The size of each chunk
---@param bitDepth? number The bit depth of the audio (8, 16, 24, 32)
---@param dataType? "signed"|"unsigned"|"float" The type of each sample
---@return fun():number[][]|nil,number|nil _ An iterator function that returns
--- chunks of each channel's data as arrays of signed 8-bit 48kHz PCM, as well as
--- the current position of the audio in seconds
---@return number _ The total length of the audio in seconds
function Audio:stream(chunkSize, bitDepth, dataType)
chunkSize = expect(1, chunkSize, "number", "nil") or 131072
bitDepth = expect(2, bitDepth, "number", "nil") or 8
dataType = expect(3, dataType, "string", "nil") or "signed"
if bitDepth ~= 8 and bitDepth ~= 16 and bitDepth ~= 24 and bitDepth ~= 32 then error("bad argument #2 (invalid bit depth)", 2) end
if dataType ~= "signed" and dataType ~= "unsigned" and dataType ~= "float" then error("bad argument #3 (invalid data type)", 2) end
if dataType == "float" and bitDepth ~= 32 then error("bad argument #2 (float audio must have 32-bit depth)", 2) end
local info, pos = {audio = self, bitDepth = bitDepth, dataType = dataType, interleaved = false, multiple = true, len = chunkSize}, 1
return function()
if info == nil then return nil end
local p = pos / self.sampleRate
local v = {encodePCM(info, pos)}
if v[1] == nil then info = nil return nil end
pos = table_remove(v, 1)
return v, p
end, #self.data[1] / self.sampleRate
end
--- Coverts the audio data to a WAV file.
---@param bitDepth? number The bit depth of the audio (1 = DFPWM, 8, 16, 24, 32)
---@return string _ The resulting WAV file data
function Audio:wav(bitDepth)
-- TODO: Support float data
bitDepth = expect(1, bitDepth, "number", "nil") or 16
if bitDepth == 1 then
local str = self:dfpwm(true)
if self.metadata and next(self.metadata) then
local info = {}
for k, v in pairs(self.metadata) do
for l, w in pairs(wavMetadata) do
if w == k then
info[#info+1] = l
info[#info+1] = tostring(v)
break
end
end
end
local list = str_pack("!2<c4" .. ("c4s4Xh"):rep(#info / 2), "INFO", table.unpack(info))
return str_pack("<c4Ic4c4IHHIIHHHHIc16c4IIc4s4c4I",
"RIFF", #str + 72, "WAVE",
"fmt ", 40, 0xFFFE, #self.data, self.sampleRate, self.sampleRate * #self.data / 8, math_ceil(#self.data / 8), 1,
22, 1, wavExtensibleChannels[#self.data] or 0, wavExtensible.dfpwm,
"fact", 4, #self.data[1],
"LIST", list,
"data", #str) .. str
else
return str_pack("<c4Ic4c4IHHIIHHHHIc16c4IIc4I",
"RIFF", #str + 72, "WAVE",
"fmt ", 40, 0xFFFE, #self.data, self.sampleRate, self.sampleRate * #self.data / 8, math_ceil(#self.data / 8), 1,
22, 1, wavExtensibleChannels[#self.data] or 0, wavExtensible.dfpwm,
"fact", 4, #self.data[1],
"data", #str) .. str
end
elseif bitDepth ~= 8 and bitDepth ~= 16 and bitDepth ~= 24 and bitDepth ~= 32 then error("bad argument #2 (invalid bit depth)", 2) end
local data = self:pcm(bitDepth, bitDepth == 8 and "unsigned" or "signed", true)
local str = ""
local csize = jit and 7680 or 32768
local format = ((bitDepth == 8 and "I" or "i") .. (bitDepth / 8)):rep(csize)
for i = 1, #data - csize, csize do str = str .. format:pack(table_unpack(data, i, i + csize - 1)) end
str = str .. ((bitDepth == 8 and "I" or "i") .. (bitDepth / 8)):rep(#data % csize):pack(table_unpack(data, math_floor(#data / csize) * csize))
if self.metadata and next(self.metadata) then
local info = {}
for k, v in pairs(self.metadata) do
for l, w in pairs(wavMetadata) do
if w == k then
info[#info+1] = l
info[#info+1] = tostring(v)
break
end
end
end
local list = str_pack("!2<c4" .. ("c4s4Xh"):rep(#info / 2), "INFO", table.unpack(info))
return str_pack("<c4Ic4c4IHHIIHHc4s4c4I", "RIFF", #str + 36, "WAVE", "fmt ", 16, 1, #self.data, self.sampleRate, self.sampleRate * #self.data * bitDepth / 8, #self.data * bitDepth / 8, bitDepth, "LIST", list, "data", #str) .. str
else
return str_pack("<c4Ic4c4IHHIIHHc4I", "RIFF", #str + 36, "WAVE", "fmt ", 16, 1, #self.data, self.sampleRate, self.sampleRate * #self.data * bitDepth / 8, #self.data * bitDepth / 8, bitDepth, "data", #str) .. str
end
end
--- Converts the audio data to DFPWM. All channels share the same encoder, and
--- channels are stored sequentially uninterleaved if `interleaved` is false, or