-
Notifications
You must be signed in to change notification settings - Fork 1
/
cutter.c
367 lines (308 loc) · 13.8 KB
/
cutter.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
/*
* http://ffmpeg.org/doxygen/trunk/index.html
*
* Main components
*
* Format (Container) - a wrapper, providing sync, metadata and muxing for the streams.
* Stream - a continuous stream (audio or video) of data over time.
* Codec - defines how data are enCOded (from Frame to Packet)
* and DECoded (from Packet to Frame).
* Packet - are the data (kind of slices of the stream data) to be decoded as raw frames.
* Frame - a decoded raw frame (to be encoded or filtered).
*/
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
// Required to create the PNG files
#include <png.h>
// Print out the steps and errors
static void logging(const char *fmt, ...);
// Decode packets into frames
static int decode_packet(AVPacket *pPacket, AVCodecContext *pCodecContext, AVFrame *pFrame);
// Save a frame into a .png file
static int save_frame_to_png(AVFrame *frame, const char *filename);
// Number of images to create
#define IMAGES_TOTAL 10
int main(int argc, const char *argv[])
{
if (argc < 2) {
printf("You need to specify a media file.\n");
return -1;
}
logging("*** Initializing all the containers, codecs and protocols...");
// AVFormatContext holds the header information from the format (Container)
// Allocating memory for this component
// http://ffmpeg.org/doxygen/trunk/structAVFormatContext.html
AVFormatContext *format_context = avformat_alloc_context();
if (!format_context) {
logging("ERROR could not allocate memory for Format Context");
return -1;
}
logging("*** Opening the input file (%s) and loading format (container) header", argv[1]);
// Open the file and read its header. The codecs are not opened.
// The function arguments are:
// AVFormatContext (the component we allocated memory for),
// url (filename),
// AVInputFormat (if you pass NULL it'll do the auto detect)
// and AVDictionary (which are options to the demuxer)
// http://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html
if (avformat_open_input(&format_context, argv[1], NULL, NULL) != 0) {
logging("ERROR could not open the file");
return -1;
}
// now we have access to some information about our file
// since we read its header we can say what format (container) it's
// and some other information related to the format itself.
logging("*** Format: %s, Duration: %lld us, Bitrate: %lld", format_context->iformat->name, format_context->duration, format_context->bit_rate);
logging("*** Finding stream info from format...");
// read Packets from the Format to get stream information
// this function populates pFormatContext->streams
// (of size equals to pFormatContext->nb_streams)
// the arguments are:
// the AVFormatContext
// and options contains options for codec corresponding to i-th stream.
// On return each dictionary will be filled with options that were not found.
// https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html
if (avformat_find_stream_info(format_context, NULL) < 0) {
logging("ERROR could not get the stream info");
return -1;
}
// The component that knows how to enCOde and DECode the stream
// it's the codec (audio or video)
// http://ffmpeg.org/doxygen/trunk/structAVCodec.html
AVCodec *input_codec = NULL;
// this component describes the properties of a codec used by the stream i
// https://ffmpeg.org/doxygen/trunk/structAVCodecParameters.html
AVCodecParameters *input_codec_parameters = NULL;
int video_stream_index = -1;
// Loop though all the streams and print its main information
for (int i = 0; i < format_context->nb_streams; i++) {
AVCodecParameters *local_codec_parameters = NULL;
local_codec_parameters = format_context->streams[i]->codecpar;
logging(" AVStream->time_base before open coded %d/%d", format_context->streams[i]->time_base.num, format_context->streams[i]->time_base.den);
logging(" AVStream->r_frame_rate before open coded %d/%d", format_context->streams[i]->r_frame_rate.num, format_context->streams[i]->r_frame_rate.den);
logging(" AVStream->start_time %" PRId64, format_context->streams[i]->start_time);
logging(" AVStream->duration %" PRId64, format_context->streams[i]->duration);
logging("Finding the proper decoder (CODEC)");
logging("---");
AVCodec *local_codec = NULL;
// Finds the registered decoder for a codec ID
// https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html
local_codec = avcodec_find_decoder(local_codec_parameters->codec_id);
if (local_codec==NULL) {
logging("ERROR unsupported codec!");
// In this example if the codec is not found we just skip it
continue;
}
// When the stream is a video we store its index, codec parameters and codec
if (local_codec_parameters->codec_type == AVMEDIA_TYPE_VIDEO) {
if (video_stream_index == -1) {
video_stream_index = i;
input_codec = local_codec;
input_codec_parameters = local_codec_parameters;
}
logging("Video Codec: resolution %d x %d", local_codec_parameters->width, local_codec_parameters->height);
} else if (local_codec_parameters->codec_type == AVMEDIA_TYPE_AUDIO) {
logging("Audio Codec: %d channels, sample rate %d", local_codec_parameters->channels, local_codec_parameters->sample_rate);
}
// Print its name, id and bitrate
logging("\tCodec %s ID %d bit_rate %lld", local_codec->name, local_codec->id, local_codec_parameters->bit_rate);
}
if (video_stream_index == -1) {
logging("File %s does not contain a video stream!", argv[1]);
return -1;
}
// https://ffmpeg.org/doxygen/trunk/structAVCodecContext.html
AVCodecContext *codec_context = avcodec_alloc_context3(input_codec);
if (!codec_context) {
logging("Failed to allocated memory for AVCodecContext");
return -1;
}
// Fill the codec context based on the values from the supplied codec parameters
// https://ffmpeg.org/doxygen/trunk/group__lavc__core.html
if (avcodec_parameters_to_context(codec_context, input_codec_parameters) < 0) {
logging("Failed to copy codec params to codec context");
return -1;
}
// Initialize the AVCodecContext to use the given AVCodec.
// https://ffmpeg.org/doxygen/trunk/group__lavc__core.html
if (avcodec_open2(codec_context, input_codec, NULL) < 0) {
logging("Failed to open codec through avcodec_open2");
return -1;
}
// https://ffmpeg.org/doxygen/trunk/structAVFrame.html
AVFrame *input_frame = av_frame_alloc();
if (!input_frame) {
logging("Failed to allocate memory for AVFrame");
return -1;
}
// https://ffmpeg.org/doxygen/trunk/structAVPacket.html
AVPacket *input_packet = av_packet_alloc();
if (!input_packet) {
logging("Failed to allocate memory for AVPacket");
return -1;
}
int ret = 0;
int counter = 0;
// Fill the Packet with data from the Stream
// https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html
while (av_read_frame(format_context, input_packet) >= 0) {
// If it's the video stream
if (input_packet->stream_index == video_stream_index) {
logging("---");
logging("AVPacket->pts %" PRId64, input_packet->pts);
ret = decode_packet(input_packet, codec_context, input_frame);
if (ret < 0)
break;
// Stop it, otherwise we'll be saving hundreds of frames
if (counter > IMAGES_TOTAL)
break;
counter++;
}
// https://ffmpeg.org/doxygen/trunk/group__lavc__packet.html
av_packet_unref(input_packet);
}
logging("---");
logging("Releasing all the resources...");
avformat_close_input(&format_context);
av_packet_free(&input_packet);
av_frame_free(&input_frame);
avcodec_free_context(&codec_context);
return 0;
}
static void logging(const char *fmt, ...)
{
va_list args;
fprintf( stderr, "LOG: " );
va_start( args, fmt );
vfprintf( stderr, fmt, args );
va_end( args );
fprintf( stderr, "\n" );
}
static int decode_packet(AVPacket *input_packet, AVCodecContext *codec_context, AVFrame *input_frame)
{
// Supply raw packet data as input to a decoder
// https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html
int ret = avcodec_send_packet(codec_context, input_packet);
if (ret < 0) {
logging("Error while sending a packet to the decoder: %s", av_err2str(ret));
return ret;
}
while (ret >= 0) {
// Return decoded output data (into a frame) from a decoder
// https://ffmpeg.org/doxygen/trunk/group__lavc__decoding.html
ret = avcodec_receive_frame(codec_context, input_frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
break;
} else if (ret < 0) {
logging("Error while receiving a frame from the decoder: %s", av_err2str(ret));
return ret;
}
if (ret >= 0) {
logging(
"Frame %d (type=%c, size=%d bytes, format=%d) pts %d key_frame %d [DTS %d]",
codec_context->frame_number,
av_get_picture_type_char(input_frame->pict_type),
input_frame->pkt_size,
input_frame->format,
input_frame->pts,
input_frame->key_frame,
input_frame->coded_picture_number);
char frame_filename[1024];
snprintf(frame_filename, sizeof(frame_filename), "output/%s-%d.png", "frame", codec_context->frame_number);
// Check if the frame is a planar YUV 4:2:0, 12bpp
// That is the format of the provided .mp4 file
// RGB formats will definitely not give a gray image
// Other YUV image may do so, but untested, so give a warning
if (input_frame->format != AV_PIX_FMT_YUV420P) {
logging("Warning: the generated file may not be a grayscale image, but could e.g. be just the R component if the video format is RGB");
}
// To create the PNG files, the AVFrame data must be translated from YUV420P format into RGB24
struct SwsContext *sws_ctx = sws_getContext(
input_frame->width, input_frame->height, input_frame->format,
input_frame->width, input_frame->height, AV_PIX_FMT_RGB24,
SWS_BILINEAR, NULL, NULL, NULL);
// Allocate a new AVFrame for the output RGB24 image
AVFrame* rgb_frame = av_frame_alloc();
// Set the properties of the output AVFrame
rgb_frame->format = AV_PIX_FMT_RGB24;
rgb_frame->width = input_frame->width;
rgb_frame->height = input_frame->height;
int ret = av_frame_get_buffer(rgb_frame, 0);
if (ret < 0) {
logging("Error while preparing RGB frame: %s", av_err2str(ret));
return ret;
}
logging("Transforming frame format from YUV420P into RGB24...");
ret = sws_scale(sws_ctx, input_frame->data, input_frame->linesize, 0, input_frame->height, rgb_frame->data, rgb_frame->linesize);
if (ret < 0) {
logging("Error while translating the frame format from YUV420P into RGB24: %s", av_err2str(ret));
return ret;
}
// save a frame into a .PNG file
ret = save_frame_to_png(rgb_frame, frame_filename);
if (ret < 0) {
fprintf(stderr, "Failed to write PNG file\n");
return -1;
}
av_frame_free(&rgb_frame);
}
}
return 0;
}
// Function to save an AVFrame to a PNG file
int save_frame_to_png(AVFrame *frame, const char *filename)
{
int ret = 0;
logging("Creating PNG file -> %s", filename);
// Open the PNG file for writing
FILE *fp = fopen(filename, "wb");
if (!fp) {
fprintf(stderr, "Failed to open file '%s'\n", filename);
return -1;
}
// Create the PNG write struct and info struct
png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL);
if (!png_ptr) {
fprintf(stderr, "Failed to create PNG write struct\n");
fclose(fp);
return -1;
}
png_infop info_ptr = png_create_info_struct(png_ptr);
if (!info_ptr) {
fprintf(stderr, "Failed to create PNG info struct\n");
png_destroy_write_struct(&png_ptr, NULL);
fclose(fp);
return -1;
}
// Set up error handling for libpng
if (setjmp(png_jmpbuf(png_ptr))) {
fprintf(stderr, "Error writing PNG file\n");
png_destroy_write_struct(&png_ptr, &info_ptr);
fclose(fp);
return -1;
}
// Set the PNG file as the output for libpng
png_init_io(png_ptr, fp);
// Set the PNG image attributes
png_set_IHDR(png_ptr, info_ptr, frame->width, frame->height, 8, PNG_COLOR_TYPE_RGB,
PNG_INTERLACE_NONE, PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE);
// Allocate memory for the row pointers and fill them with the AVFrame data
png_bytep *row_pointers = (png_bytep *) malloc(sizeof(png_bytep) * frame->height);
for (int y = 0; y < frame->height; y++) {
row_pointers[y] = (png_bytep) (frame->data[0] + y * frame->linesize[0]);
}
// Write the PNG file
png_set_rows(png_ptr, info_ptr, row_pointers);
png_write_png(png_ptr, info_ptr, PNG_TRANSFORM_IDENTITY, NULL);
// Clean up
free(row_pointers);
png_destroy_write_struct(&png_ptr, &info_ptr);
fclose(fp);
return ret;
}