forked from hachi/MogileFS-Utils
-
Notifications
You must be signed in to change notification settings - Fork 34
/
mogtool
executable file
·1535 lines (1153 loc) · 46 KB
/
mogtool
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/perl
# vim:ts=4 sw=4 et ft=perl:
eval 'exec /usr/bin/perl -S $0 ${1+"$@"}'
if 0; # not running under some shell
############################################################################
=head1 NAME
mogtool -- Inject/extract data to/from a MogileFS installation
B<WARNING>: this utility is deprecated! See L<MogileFS::Utils>
=head1 SYNOPSIS
$ mogtool [general-opts] <command> [command-opts] <command-args>
$ mogtool --trackers=127.0.0.1:6001 --domain=foo --class=bar ...
$ mogtool --conf=foo.conf ...
$ mogtool inject thefile.tgz thefilekey
$ mogtool inject --bigfile thebigfile.tgz thefilekey
$ mogtool inject --bigfile --gzip thebigfile.tar thefilekey
$ mogtool inject --bigfile --gzip mydirectory thedirkey
$ mogtool inject --bigfile --gzip /dev/hda4 thedevkey
$ mogtool inject --nobigfile bigcontiguousfile bigcfilekey
$ mogtool inject --bigfile --gzip --verify \
--description="Description" \
--receipt="[email protected], [email protected]" \
--concurrent=5 --chunksize=32M \
somehugefile thefilekey
$ mogtool extract thefilekey thenewfile.tgz
$ mogtool extract thefilekey -
$ mogtool extract --bigfile thedirkey .
$ mogtool extract --bigfile --asfile thedirkey thefile.tgz
$ mogtool extract --bigfile thedevkey /dev/hda4
$ mogtool delete thekey
$ mogtool locate --noverify thekey
$ mogtool locate --bigfile thekey
$ mogtool list
$ mogtool listkey key
=head1 GENERAL OPTIONS
=over 4
=item --debug
Turn on MogileFS debug output.
=item --trackers=<[preferred_ip/]ip:port>[,<[preferred_ip/]ip:port>]*
Specify one or more trackers for your MogileFS installation. Note that
you can specify preferred IPs to override the default IPs with. So it
would look something like B<10.10.0.1/10.0.0.1:8081>.
=item --domain=<domain>
Set the MogileFS domain to use.
=item --class=<class>
Set the class within the domain to use. Defaults to _default.
=item --conf=<file>
Specify a configuration file to load from.
=item --lib=<directory>
Specify a directory to use as a library path. Right now, this should
be the directory where you expect to find the MogileFS.pm file, if it's
not actually installed.
=back
=head1 COMMANDS
=over 4
=item inject|i
Insert a resource into MogileFS. See L</"INJECT OPTIONS"> and L</"INJECT ARGUMENTS">
for the rest of how to use the inject mode.
=item extract|x
Extract a resource from MogileFS. See L</"EXTRACT OPTIONS"> and L</"EXTRACT ARGUMENTS">
for how to use extract.
=item delete|rm
Delete a resource. See L</"DELETE OPTIONS"> and L</"DELETE ARGUMENTS">.
=item locate|lo key
List the paths to the file identified by the given key.
=item list|ls
List all big files contained in MogileFS. No options, no arguments.
=item listkey|lsk key
List all files which match the key. Key is just a prefix, and this will list
all keys which match the prefix. So if you specify key as "ABC1" then you'll
get all keys which start with the characters "ABC1"
=back
=head1 INJECT OPTIONS
The following options are used to control the behavior of the injector.
=over 4
=item --bigfile|-b
If specified, use chunking to break the resource into manageable pieces.
=item --chunksize=<size>[B|K|M|G]
When instructed to break files into chunks, the injector will use the specified
chunk size as the maximum chunk size. Defaults to 64M. You can specify the
chunk size manually and specify the units--defaults to bytes.
=item --gzip|-z
If specified, mogtool will gzip the data as it's going into MogileFS. The resource
will be marked as compressed.
Note that you do not need to specify this if the resource is already gzipped, but
it doesn't hurt. (We automatically detect that and mark it as compressed.)
=item --overwrite
If you previously were working on injecting a big file as chunks and the process
died, normally mogtool refuses to do it again. Specify this option to force the
overwrite of that file.
B<NOTE:> Other than in the above case (partial failure), mogtool will not prompt
before overwriting an existing file.
=item --verify
If on, we do a full MD5 verification of every chunk after it is replicated. This
can take a while on large files!
=item --description=<text>
Specifies a description for this file. Optional, but assists in reporting and
listing the large files in MogileFS. (This is also displayed in any receipts
that are created.)
=item --receipt=<email address>[, <email address>]*
If specified, emails a copy of the receipt file to the specified comma-separated
email addresses. Also creates a local filesystem copy of the receipt file.
=item --concurrent=<number>
Specifies the number of concurrent processes to run for MogileFS insertion. If
you are noticing mogtool spend most of it's time waiting for children and not
actually buffering data, you may wish to raise this number. The default is 1
but we've found 3 or 4 work well.
=back
=head1 INJECT ARGUMENTS
=over 4
=item resource
What you actually want to inject. This can be a file, directory, or a raw
partition in the format I</dev/X>.
Please see L</"USAGE EXAMPLES"> for more information on how to inject these
different types of resources and the differences thereof.
=item key
Specifies the key to save this file to. For big files, the key is actually
"_big_N:key" and "key,#" where N is one of a bunch of things we use and # is
the chunk number.
Generally, you want this to be descriptive so you remember what it is later
and can identify the file just by looking at the key.
=back
=head1 EXTRACT OPTIONS
=over 4
=item --bigfile|-b
If specified, indicates that this resource was chunked on injection and should be
reassembled for extraction.
=item --gzip|-z
Specifies to mogtool that it should ungzip the output if and only if it was
compressed when inserted into the MogileFS system. So, if you're extracting a
file that wasn't gzipped to begin with, this doesn't do anything.
=item --asfile
Useful when extracting something previously inserted as a directory--this option
instructs mogtool to treat the resource as a file and not actually run it
through tar for decompression.
=back
=head1 EXTRACT ARGUMENTS
=over 4
=item key
Specifies the key to get the file from.
=item destination
What destination means varies depending on what type of resource you're extracting.
However, no matter what, you can specify a single dash (B<->) to mean STDOUT.
Please see the usage examples for more information on how extract works.
=back
=head1 DELETE OPTIONS
=over 4
=item --bigfile|-b
The resource is a "big file" and all chunks should be deleted.
=back
=head1 DELETE ARGUMENTS
=over 4
=item key
Specifies the key of the file to delete.
=back
=head1 LOCATE OPTIONS
=over 4
=item --verify
Verify that the returned paths actually contain the file. The locate
commands defaults to verify, you can disable it with --noverify.
=item --bigfile|-b
The resource is a "big file" and the locations of the information key should be printed.
=back
=head1 LOCATE ARGUMENTS
=over 4
=item key
Specifies the key of the file to locate
=back
=head1 RETURN VALUES
=over 4
=item 0
Success during operation.
=item 1
During the locate, list, or listkey operation, the key was not found.
=item 2
Some fatal error occurred.
=back
=head1 USAGE EXAMPLES
I<Please note that all examples assume you have a default config file that
contains the tracker and domain to use. Saves us from having to clutter up
the command line.>
=head2 Small Files (<64MB)
When it comes to using small files, mogtool is very, very easy.
=head3 Injection
$ mogtool inject foo.dbm foo.dbm.2004.12
Injects the file I<foo.dbm> into MogileFS under the key of I<foo.dbm.2004.12>.
$ mogtool inject --gzip foo.dbm foo.dbm.2004.12
Injects the same file to the same key, but compresses it on the fly for you.
=head3 Extraction
$ mogtool extract foo.dbm.2004.12 newfoo.dbm
Retrieves the key I<foo.dbm.2004.12> and saves it as I<newfoo.dbm>.
$ mogtool extract --gzip foo.dbm.2004.12 newfoo.dbm
Gets the file and automatically decompresses it, if and only if it was compressed.
So basically, you can turn on gzip in your config file and mogtool will do the
smart thing each time.
$ mogtool extract foo.dbm.2004.12 -
Print the resource to standard out. If you want, you can pipe it somewhere or
redirect to a file (but why not just specify the filename?).
=head2 Large Files (>64MB)
Given mogtool's ability to break files into chunks and later reassemble them,
inserting large files (even files over the 4GB barrier) is relatively easy.
=head3 Injection
$ mogtool inject --bigfile largefile.dat largefile.dat
As expected, inserts the file I<largefile.dat> into the MogileFS system under
the name I<largefile.dat>. Not very creative. Uses the default 64MB chunks.
$ mogtool inject --bigfile --chunksize=16M largefile.dat largefile.dat
Specify to use 16MB chunks instead of the default. Otherwise, the same.
$ mogtool inject --bigfile --chunksize=1000K --gzip largefile.dat somekey
Do it again, but specify 1000KB chunks, gzip automatically, and upload it under
a different key I<somekey>.
$ mogtool inject --bigfile --concurrent=5 --gzip largefile.dat somekey
Same as above, but use 5 children processes for uploading chunks to MogileFS.
This can take up to 300MB of memory in this example! (It tends to use about
(concurrency + 1) * chunksize bytes.)
$ mogtool inject --bigfile --chunksize=32M --concurrent=3 --gzip \
--receipt="[email protected]" --verify --description="A large file" \
largefile.dat somekey
Break this file into 128MB chunks, set a description, use 3 children to
upload them, gzip the file as you go, do a full MD5 verification of every
chunk, then email a receipt with all of the MogileFS paths to me.
Lots of flexibility with mogtool.
=head3 Extraction
$ mogtool extract --bigfile somekey newfile.dat
In its basic form, extracts the previously inserted large file and saves it as
I<newfile.dat>.
$ mogtool extract --bigfile --gzip somekey newfile.dat
If the file was gzipped on entry, ungzip it and save the result. If it wasn't
gzipped, then we just save it.
=head2 Directories
Directories are easily injected and extracted with mogtool. To create the data
stream that is inserted into MogileFS, we use tar.
=head3 Injection
$ mogtool inject --bigfile mydir mykey
Run I<mydir> through tar and then save it as I<mykey>.
$ mogtool inject --bigfile --gzip --concurrent=5 mydir mykey
Inject, but also gzip and use multiple injectors.
I<Note how this is just like injecting a large file. See injection examples for
large files for more examples.>
=head3 Extraction
$ mogtool extract --bigfile mykey .
Extract the previously injected directory I<mykey> to your local directory.
$ mogtool extract --bigfile --asfile mykey foo.tar
Take the previously generated tarball and save it as I<foo.tar>. Simply creates
the file instead of extracting everything inside.
=head2 Partitions/Devices
mogtool has the ability to inject raw partitions into MogileFS and to retrieve
them later and write them back to a partition. They're treated just like directories
for the most part, we just don't pipe things through tar.
=head3 Injection
$ mogtool inject --bigfile /dev/hda3 hda3.backup
Save a raw copy of your partition I</dev/hda3> to the key I<hda3.backup>.
$ mogtool inject --bigfile --gzip /dev/hda3 hda3.backup
Same, but compress on the fly during injection.
=head3 Extraction
$ mogtool extract --bigfile hda3.backup /dev/hda4
Extract the partition at I<hda3.backup> to the partition I</dev/hda4>. B<WARNING:>
mogtool won't ask for confirmation, make sure you don't mistype partition numbers!
=head2 Deleting a Resource
B<WARNING:> Please make sure you're specifying the right parameter, as delete does
not prompt for confirmation of the request!
$ mogtool delete thekey
Delete a normal file.
$ mogtool delete --bigfile thekey
Delete a chunked file--this deletes all chunks and the receipt, so the file is gone.
=head2 Listing Big Files
$ mogtool list backup
Lists all large files stored in MogileFS. It is not possible to list all normal files
at this time.
=head2 Listing Files Matching a Key
$ mogtool listkey abc1
Lists all files in MogileFS whose keys start with the characters "abc1".
=head1 CONFIGURATION FILE
Instead of adding a ton of options to the command line every time, mogtool enables
you to create a default configuration file that it will read all of the options from.
It searches two locations for a default configuration file: B<~/.mogtool> and
B</etc/mogilefs/mogtool.conf>. (Alternately, you can specify B<--conf=whatever> as
an option on the command line.)
The file can consist of any number of the following items:
trackers = 10.0.0.3:7001, 10.10.0.5/10.0.0.5:7001
domain = mogiledomain
class = fileclass
lib = /home/foo/lib
gzip = 1
big = 1
overwrite = 1
chunksize = 32M
receipt = [email protected], [email protected]
verify = 1
concurrent = 3
=head1 KNOWN BUGS
None? Send me any you find! :)
=head1 PLANNED FEATURES
=over 4
=item --concurrent for extract
It would be nice to have concurrent extraction going on.
=item recover mode
If the receipt file is ever corrupt in MogileFS it would be useful to recover a
file given just a receipt. It would have the same arguments as the extract mode,
except use a receipt file as the data source.
=item partition size verification
We can easily get the partition size when we save one to MogileFS, so we should
use that information to determine during extraction if a target partition is going
to be big enough.
=item on the fly gzip extraction
Right now we can gzip on an injection, but we should support doing decompression
on the fly coming out of MogileFS.
=item make list take a prefix
If you can specify a prefix, that makes things easier for finding small files that
are stored in MogileFS.
=item more information on list
Have list load up the info file and parse it for information about each of the
big files being stored. Maybe have this as an option (-l). (This means the
reading and parsing of info files should be abstracted into a function.)
=back
=head1 AUTHOR
Mark Smith E<lt>[email protected]<gt> - most of the implementation and maintenance.
Brad Fitzpatrick E<lt>[email protected]<gt> - concepts and rough draft.
Robin H. Johnson E<lt>[email protected]<gt> - locate function.
Copyright (c) 2002-2004 Danga Interactive. All rights reserved.
=cut
##############################################################################
use strict;
use Getopt::Long;
use Pod::Usage qw{ pod2usage };
use Digest::MD5 qw{ md5_hex };
use Time::HiRes qw{ gettimeofday tv_interval };
use LWP::Simple;
use POSIX qw(:sys_wait_h);
use Compress::Zlib;
$| = 1;
use constant ERR_MISSING => 1;
use constant ERR_FATAL => 2;
my %opts;
$opts{help} = 0;
abortWithUsage() unless
GetOptions(
# general purpose options
'trackers=s' => \$opts{trackers},
'domain=s' => \$opts{domain},
'class=s' => \$opts{class},
'config=s' => \$opts{config},
'help' => \$opts{help},
'debug' => \$MogileFS::DEBUG,
'lib' => \$opts{lib},
# extract+inject options
'gzip|z' => \$opts{gzip},
'bigfile|b' => \$opts{big},
'nobigfile' => \$opts{nobig},
# inject options
'overwrite' => \$opts{overwrite},
'chunksize=s' => \$opts{chunksize},
'receipt=s' => \$opts{receipt},
'reciept=s' => \$opts{receipt}, # requested :)
'verify!' => \$opts{verify},
'description=s' => \$opts{des},
'concurrent=i' => \$opts{concurrent},
'noreplwait' => \$opts{noreplwait},
# extract options
'asfile' => \$opts{asfile},
);
# now load the config file?
my @confs = ( $opts{config}, "$ENV{HOME}/.mogtool", "/etc/mogilefs/mogtool.conf" );
foreach my $conf (@confs) {
next unless $conf && -e $conf;
open FILE, "<$conf";
foreach (<FILE>) {
s!#.*!!;
next unless m!(\w+)\s*=\s*(.+)!;
$opts{$1} = $2 unless $opts{$1};
}
close FILE;
}
# now bring in MogileFS, because hopefully we have a lib by now
if ($opts{lib}) {
eval "use lib '$opts{lib}';";
}
# no trackers and domain..?
unless ($opts{trackers} && $opts{domain}) {
abortWithUsage("--trackers and --domain configuration required");
}
eval qq{
use MogileFS::Client; 1
} or die "Failed to load MogileFS::Client module: $@\n";
# init connection to mogile
my $mogfs = get_mogfs();
# get our command and pass off to our functions
my $cmd = shift;
inject() if $cmd eq 'i' || $cmd eq "inject";
extract() if $cmd eq 'x' || $cmd eq "extract";
list() if $cmd eq 'ls' || $cmd eq "list";
listkey() if $cmd eq 'lsk' || $cmd eq "listkey";
mdelete() if $cmd eq 'rm' || $cmd eq "delete";
locate() if $cmd eq 'lo' || $cmd eq "locate";
# fail if we get this far
abortWithUsage();
######################################################################
sub get_mogfs {
my @trackerinput = split(/\s*,\s*/, $opts{trackers});
my @trackers;
my %pref_ip;
foreach my $tracker (@trackerinput) {
if ($tracker =~ m!(.+)/(.+):(\d+)!) {
$pref_ip{$2} = $1;
push @trackers, "$2:$3";
} else {
push @trackers, $tracker;
}
}
my $mogfs = MogileFS::Client->new(
domain => $opts{domain},
hosts => \@trackers,
)
or error("Could not initialize MogileFS", ERR_FATAL);
$mogfs->set_pref_ip(\%pref_ip);
return $mogfs;
}
sub error {
my $err = shift() || "ERROR: no error message provided!";
my $mogerr = undef;
if ($mogerr = $mogfs->errstr) {
$mogerr =~ s/^\s+//;
$mogerr =~ s/\s+$//;
}
my $syserr = undef;
if ($@) {
$syserr = $@;
$syserr =~ s/[\r\n]+$//;
}
my $exitcode = shift();
print STDERR "$err\n";
print STDERR "MogileFS backend error message: $mogerr\n" if $mogerr && $exitcode != ERR_MISSING;
print STDERR "System error message: $@\n" if $syserr;
# if a second argument, exit
if (defined ($exitcode)) {
exit $exitcode+0;
}
}
sub inject {
my $src = shift @ARGV;
my $key = shift @ARGV;
abortWithUsage("source and key required to inject") unless $src && $key;
# make sure the source exists and the key is valid
die "Error: source $src doesn't exist.\n"
unless -e $src;
die "Error: key $key isn't valid; must not contain spaces or commas.\n"
unless $key =~ /^[^\s\,]+$/;
# before we get too far, find sendmail?
my $sendmail;
if ($opts{receipt}) {
$sendmail = `which sendmail` || '/usr/sbin/sendmail';
$sendmail =~ s/[\r\n]+$//;
unless (-e $sendmail) {
die "Error: attempted to find sendmail binary in /usr/sbin but couldn't.\n";
}
}
# open up O as the handle to use for reading data
my $type = 'unknown';
if (-d $src) {
my $taropts = ($opts{gzip} ? 'z' : '') . "cf";
$type = 'tarball';
open (O, '-|', 'tar', $taropts, '-', $src)
or die "Couldn't open tar for reading: $!\n";
} elsif (-f $src) {
$type = 'file';
open (O, "<$src")
or die "Couldn't open file for reading: $!\n";
} elsif (-b $src) {
$type = 'partition';
open (O, "<$src")
or die "Couldn't open block device for reading: $!\n";
} else {
die "Error: not file, directory, or partition.\n";
}
# now do some pre-file checking...
my $size = -s $src;
if ($type ne 'file') {
die "Error: you specified to store a file of type $type but didn't specify --bigfile. Please see documentation.\n"
unless $opts{big};
} elsif ($size > 64 * 1024 * 1024) {
die "Error: the file is more than 64MB and you didn't specify --bigfile. Please see documentation, or use --nobigfile to disable large file chunking and allow large single file uploads\n"
unless $opts{big} || $opts{nobig};
}
if ($opts{big} && $opts{nobig}) {
die "Error: You cannot specify both --bigfile and --nobigfile\n";
}
if ($opts{nobigfile} && $opts{gzip}) {
die "Error: --gzip is not compatible with --nobigfile\n";
}
# see if there's already a pre file?
if ($opts{big}) {
my $data = $mogfs->get_file_data("_big_pre:$key");
if (defined $data) {
unless ($opts{overwrite}) {
error(<<MSG, ERR_FATAL);
ERROR: The pre-insert file for $key exists. This indicates that a previous
attempt to inject a file failed--or is still running elsewhere! Please
verify that a previous injection of this file is finished, or run mogtool
again with the --overwrite inject option.
$$data
MSG
}
# delete the pre notice since we didn't die (overwrite must be on)
$mogfs->delete("_big_pre:$key")
or error("ERROR: Unable to delete _big_pre:$key.", ERR_FATAL);
}
# now create our pre notice
my $prefh = $mogfs->new_file("_big_pre:$key", $opts{class})
or error("ERROR: Unable to create _big_pre:$key.", ERR_FATAL);
$prefh->print("starttime:" . time());
$prefh->close()
or error("ERROR: Unable to save to _big_pre:$key.", ERR_FATAL);
}
# setup config and temporary variables we're going to be using
my $chunk_size = 64 * 1024 * 1024; # 64 MB
if ($opts{big}) {
if ($opts{chunksize} && ($opts{chunksize} =~ m!^(\d+)(G|M|K|B)?!i)) {
$chunk_size = $1;
unless (lc $2 eq 'b') {
$chunk_size *= (1024 ** ( { g => 3, m => 2, k => 1 }->{lc $2} || 2 ));
}
print "NOTE: Using chunksize of $chunk_size bytes.\n";
}
}
my $read_size = ($chunk_size > 1024*1024 ? 1024*1024 : $chunk_size);
# temporary variables
my $buf;
my $bufsize = 0;
my $chunknum = 0;
my %chunkinfo; # { id => [ md5, length ] }
my %chunkbuf; # { id => data }
my %children; # { pid => chunknum }
my %chunksout; # { chunknum => pid }
# this function writes out a chunk
my $emit = sub {
my $cn = shift() + 0;
return unless $cn;
# get the length of the chunk we're going to send
my $bufsize = length $chunkbuf{$cn};
return unless $bufsize;
# now spawn off a child to do the real work
if (my $pid = fork()) {
print "Spawned child $pid to deal with chunk number $cn.\n";
$chunksout{$cn} = $pid;
$children{$pid} = $cn;
return;
}
# drop other memory references we're not using anymore
foreach my $chunknum (keys %chunkbuf) {
next if $chunknum == $cn;
delete $chunkbuf{$chunknum};
}
# as a child, get a new mogile connection
my $mogfs = get_mogfs();
my $dkey = $opts{big} ? "$key,$chunknum" : "$key";
my $start_time = [ gettimeofday() ];
my $try = 0;
while (1) {
$try++;
eval {
my $fh = $mogfs->new_file($dkey, $opts{class}, $bufsize);
unless (defined $fh) {
die "Unable to create new file";
}
$fh->print($chunkbuf{$cn});
unless ($fh->close) {
die "Close failed";
}
};
if (my $err = $@) {
error("WARNING: Unable to save file '$dkey': $err");
printf "This was try #$try and it's been %.2f seconds since we first tried. Retrying...\n", tv_interval($start_time);
sleep 1;
next;
}
last;
}
my $diff = tv_interval($start_time);
printf " chunk $cn saved in %.2f seconds.\n", $diff;
# make sure we never return, always exit
exit 0;
};
# just used to reap our children in a loop until they're done. also
# handles respawning a child that failed.
my $reap_children = sub {
# find out if we have any kids dead
while ((my $pid = waitpid -1, WNOHANG) > 0) {
my $cnum = delete $children{$pid};
unless ($cnum) {
print "Error: reaped child $pid, but no idea what they were doing...\n";
next;
}
if (my $status = $?) {
print "Error: reaped child $pid for chunk $cnum returned non-zero status... Retrying...\n";
$emit->($cnum);
next;
}
my @paths = grep { defined $_ } $mogfs->get_paths($opts{big} ? "$key,$cnum" : "$key", 1);
unless (@paths) {
print "Error: reaped child $pid for chunk $cnum but no paths exist... Retrying...\n";
$emit->($cnum);
next;
}
delete $chunkbuf{$cnum};
delete $chunksout{$cnum};
print "Child $pid successfully finished with chunk $cnum.\n";
}
};
# this function handles parallel threads
$opts{concurrent} ||= 1;
$opts{concurrent} = 1 if $opts{concurrent} < 1;
my $handle_children = sub {
# here we pause while our children are working
my $first = 1;
while ($first || scalar(keys %children) >= $opts{concurrent}) {
$first = 0;
$reap_children->();
select undef, undef, undef, 0.1;
}
# now spawn until we hit the limit
foreach my $cnum (keys %chunkbuf) {
next if $chunksout{$cnum};
$emit->($cnum);
last if scalar(keys %children) >= $opts{concurrent};
}
};
# setup compression stuff
my $dogzip = 0;
my $zlib;
if ($opts{gzip}) {
# if they turned gzip on we may or may not need this stream, so make it
$zlib = deflateInit()
or error("Error: unable to create gzip deflation stream", ERR_FATAL);
}
my $upload_fh;
if ($opts{nobig}) {
eval {
$upload_fh = $mogfs->new_file($key, $opts{class}, $size);
unless (defined $upload_fh) {
die "Unable to create new file";
}
};
if (my $err = $@) {
error("ERROR: Unable to open file '$key': $err");
die "Giving up.\n";
}
}
# read one meg chunks while we have data
my $sum = 0;
my $readbuf = '';
while (my $rv = read(O, $readbuf, $read_size)) {
# if this is a file, and this is our first read, see if it's gzipped
if (!$sum && $rv >= 2) {
if (substr($readbuf, 0, 2) eq "\x1f\x8b") {
# this is already gzipped, so just mark it as such and insert it
$opts{gzip} = 1;
} else {
# now turn on our gzipping if the user wants the output gzipped
$dogzip = 1 if $opts{gzip};
}
}
# now run it through the deflation stream before we process it here
if ($dogzip) {
my ($out, $status) = $zlib->deflate($readbuf);
error("Error: Deflation failure processing stream", ERR_FATAL)
unless $status == Z_OK;
$readbuf = $out;
$rv = length $readbuf;
# we don't always get a chunk from deflate
next unless $rv;
}
$sum += $rv;
# Short circuit if we're just plopping up a big file.
if ($opts{nobig}) {
$upload_fh->print($readbuf);
if ($size) {
printf "Upload so far: $sum bytes [%.2f%% complete]\n",
($sum / $size * 100);
}
next;
}
# now stick our data into our real buffer
$buf .= $readbuf;
$bufsize += $rv;
$readbuf = '';
# generate output
if ($type ne 'tarball' && $size && $size > $read_size) {
printf "Buffer so far: $bufsize bytes [%.2f%% complete]\r", ($sum / $size * 100);
} else {
print "Buffer so far: $bufsize bytes\r";
}
# if we have one chunk, handle it
if ($opts{big} && $bufsize >= $chunk_size) {
$chunkbuf{++$chunknum} = substr($buf, 0, $chunk_size);
# calculate the md5, print out status, and save this chunk
my $md5 = md5_hex($buf);
if ($opts{big}) {
print "chunk $key,$chunknum: $md5, len = $chunk_size\n";
} else {
print "file $key: $md5, len = $chunk_size\n";
}
$chunkinfo{$chunknum} = [ $md5, $chunk_size ];
# reset for the next read loop
$buf = substr($buf, $chunk_size);
$bufsize = length $buf;
# now spawn children to save chunks
$handle_children->();
}
}
close O;
# now we need to flush the gzip engine
if ($dogzip) {
my ($out, $status) = $zlib->flush;
error("Error: Deflation failure processing stream", ERR_FATAL)
unless $status == Z_OK;
$buf .= $out;
$bufsize += length $out;
$sum += length $out;
}
# final piece
if ($buf) {
$chunkbuf{++$chunknum} = $buf;