From 1eecfc0dce5101f4c4e8de8383c2fa217b365a1e Mon Sep 17 00:00:00 2001 From: Karthik U S Date: Fri, 17 Jul 2015 12:03:55 +0530 Subject: [PATCH] C++ implementation of yadl (#24) /* The previous code was in C. Now it is changed to C++ by replacing the structures with class and creating the objects of classes to call its functions. The code has some errors and need to be fixed. The boost library functions for logging and vector is pending. */ Signed-off-by: Karthik U S --- .gitignore | 0 .travis.yml | 0 .travis_cmockery2.sh | 0 Makefile.am | 0 README.md | 0 configure.ac | 0 img/arch.png | Bin src/Makefile.am | 48 --- src/Rabin_Karp.h | 84 ----- src/block.h | 48 --- src/catalog.c | 188 ----------- src/catalog.h | 57 ---- src/{clean_buff.c => clean_buff.cpp} | 3 - src/clean_buff.h | 0 src/dedup.h | 135 -------- src/dedup_test.c | 18 - src/delete.h | 18 - src/delete_test.c | 18 - src/hash.h | 55 --- src/main.c | 141 -------- src/main.h | 14 - src/{md5.c => md5.cpp} | 2 +- src/md5.h | 2 +- src/object_store.h | 22 -- src/restore.h | 56 ---- src/{sha1.c => sha1.cpp} | 0 src/sha1.h | 0 src/stub.h | 31 -- src/{vector.c => vector.cpp} | 0 src/vector.h | 0 src/{block.c => ydl_block.cpp} | 68 ++-- src/ydl_block.h | 30 ++ src/ydl_catalog.cpp | 202 +++++++++++ src/ydl_catalog.h | 30 ++ src/{dedup.c => ydl_dedup.cpp} | 60 ++-- src/ydl_dedup.h | 52 +++ src/{delete.c => ydl_delete.cpp} | 17 +- src/ydl_delete.h | 16 + src/{hash.c => ydl_hash.cpp} | 55 ++- src/ydl_hash.h | 25 ++ src/ydl_main.cpp | 140 ++++++++ src/ydl_main.h | 20 ++ src/{object_store.c => ydl_object_store.cpp} | 8 +- src/ydl_object_store.h | 18 + src/{Rabin_Karp.c => ydl_rabin_karp.cpp} | 89 ++--- src/ydl_rabin_karp.h | 38 +++ src/{Restore_file.c => ydl_restore.cpp} | 334 ++++++++++--------- src/ydl_restore.h | 28 ++ src/{stub.c => ydl_stub.cpp} | 13 +- src/ydl_stub.h | 18 + tests/basic/compare_files.sh | 0 yadl.pc.in | 0 52 files changed, 936 insertions(+), 1265 deletions(-) mode change 100644 => 100755 .gitignore mode change 100644 => 100755 .travis.yml mode change 100644 => 100755 .travis_cmockery2.sh mode change 100644 => 100755 Makefile.am mode change 100644 => 100755 README.md mode change 100644 => 100755 configure.ac mode change 100644 => 100755 img/arch.png delete mode 100644 src/Makefile.am delete mode 100644 src/Rabin_Karp.h delete mode 100644 src/block.h delete mode 100644 src/catalog.c delete mode 100644 src/catalog.h rename src/{clean_buff.c => clean_buff.cpp} (86%) mode change 100644 => 100755 mode change 100644 => 100755 src/clean_buff.h delete mode 100644 src/dedup.h delete mode 100644 src/dedup_test.c delete mode 100644 src/delete.h delete mode 100644 src/delete_test.c delete mode 100644 src/hash.h delete mode 100644 src/main.c delete mode 100644 src/main.h rename src/{md5.c => md5.cpp} (96%) mode change 100644 => 100755 mode change 100644 => 100755 src/md5.h delete mode 100644 src/object_store.h delete mode 100644 src/restore.h rename src/{sha1.c => sha1.cpp} (100%) mode change 100644 => 100755 mode change 100644 => 100755 src/sha1.h delete mode 100644 src/stub.h rename src/{vector.c => vector.cpp} (100%) mode change 100644 => 100755 mode change 100644 => 100755 src/vector.h rename src/{block.c => ydl_block.cpp} (66%) mode change 100644 => 100755 create mode 100755 src/ydl_block.h create mode 100755 src/ydl_catalog.cpp create mode 100755 src/ydl_catalog.h rename src/{dedup.c => ydl_dedup.cpp} (84%) mode change 100644 => 100755 create mode 100755 src/ydl_dedup.h rename src/{delete.c => ydl_delete.cpp} (70%) mode change 100644 => 100755 create mode 100755 src/ydl_delete.h rename src/{hash.c => ydl_hash.cpp} (79%) mode change 100644 => 100755 create mode 100755 src/ydl_hash.h create mode 100755 src/ydl_main.cpp create mode 100755 src/ydl_main.h rename src/{object_store.c => ydl_object_store.cpp} (96%) mode change 100644 => 100755 create mode 100755 src/ydl_object_store.h rename src/{Rabin_Karp.c => ydl_rabin_karp.cpp} (87%) mode change 100644 => 100755 create mode 100755 src/ydl_rabin_karp.h rename src/{Restore_file.c => ydl_restore.cpp} (81%) mode change 100644 => 100755 create mode 100755 src/ydl_restore.h rename src/{stub.c => ydl_stub.cpp} (67%) mode change 100644 => 100755 create mode 100755 src/ydl_stub.h mode change 100644 => 100755 tests/basic/compare_files.sh mode change 100644 => 100755 yadl.pc.in diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/.travis.yml b/.travis.yml old mode 100644 new mode 100755 diff --git a/.travis_cmockery2.sh b/.travis_cmockery2.sh old mode 100644 new mode 100755 diff --git a/Makefile.am b/Makefile.am old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/configure.ac b/configure.ac old mode 100644 new mode 100755 diff --git a/img/arch.png b/img/arch.png old mode 100644 new mode 100755 diff --git a/src/Makefile.am b/src/Makefile.am deleted file mode 100644 index 9629cc2..0000000 --- a/src/Makefile.am +++ /dev/null @@ -1,48 +0,0 @@ - - -# Setup libyadl sources here -lib_LTLIBRARIES = libyadl.la -libyadl_la_SOURCES = dedup.c delete.c Restore_file.c \ - hash.c block.c stub.c clean_buff.c \ - catalog.c md5.c sha1.c Rabin_Karp.c \ - vector.c object_store.c -libyadl_la_LDFLAGS = -lssl -lcrypto - -noinst_HEADERS = block.h catalog.h clean_buff.h \ - config.h dedup.h delete.h \ - hash.h main.h md5.h restore.h \ - sha1.h stub.h Rabin_Karp.h \ - vector.h object_store.h - -# Create a program called 'dedup' but do not install it -bin_PROGRAMS = yadl_dedup -yadl_dedup_SOURCES = main.c -yadl_dedup_CFLAGS = -O2 -g -yadl_dedup_LDADD = libyadl.la - -# --- UNIT TESTS -# Initialize variables -CLEANFILES = *_xunit.xml -TESTS = -noinst_PROGRAMS = - -# dedup_test -dedup_test_CFLAGS = $(UNITTEST_CFLAGS) -dedup_test_LDFLAGS = $(UNITTEST_LIBS) -dedup_test_SOURCES = dedup_test.c #dedup.c dedup.h -TESTS += dedup_test - -# delete_test -delete_test_CFLAGS = $(UNITTEST_CFLAGS) -delete_test_LDFLAGS = $(UNITTEST_LIBS) -delete_test_SOURCES = delete_test.c #delete.c delete.h -TESTS += delete_test - -# --- End UNIT TEST - -# Make TESTS be programs which are not installed -noinst_PROGRAMS += $(TESTS) - -# Here we place the exported header -#yadlincludedir = $(includedir)/yadl -#yadlinclude_HEADERS = yadl.h diff --git a/src/Rabin_Karp.h b/src/Rabin_Karp.h deleted file mode 100644 index 77923ed..0000000 --- a/src/Rabin_Karp.h +++ /dev/null @@ -1,84 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define PRIME 23 -#define N 2048 -#define FINGER_PRINT 938 -#define BUFFER_LEN 10240 -#define M 1021 - -typedef unsigned int y_uint32; - -/*@description:Function to find the rolling hash of a particular window. -Input: - char* buffer : Buffer to store window content - y_uint32 *power : Stores the value PRIME^window length - int *ret : To return the status 0 on success, - -1 on failure -Output: - y_uint32 hash_value : Rolling hash of a particular window -*/ -y_uint32 calc_hash (char *buffer, y_uint32 *power, int *ret); - -/*@description:Function to keep track remaining content of previous - buffer when there is no match with fingerprint - -Input: - ssize_t remaining_length : Remaining length of the - previous buffer - char** remaining_buffer_content: Contains remaining content of - the previous buffer - char** remaining_window_content: Contains remaining content of - the previous window - char** buffer : Contains current buffer content - ssize_t start : Starting offset of buffer -Output: - int ret : 0 on success, -1 on failure -*/ -int -get_remaining_buffer_content(char **remaining_buffer_content, - char **remaining_window_content, ssize_t remaining_length, - char **buffer, ssize_t start); - -/*@description:Function to get the chunk when there is a match with - fingerprint -Input: - ssize_t* remaining_length : Remaining length of the - previous buffer - char** remaining_buffer_content: Contains remaining content of - the previous buffer - char** remaining_window_content: Contains remaining content of - the previous window - char** chunk_buffer : Holds the chunk content - char** buffer : Contains current buffer content - ssize_t start : Starting offset of buffer - ssize_t end : Ending offset of bufferw - ssize_t slide_incr : Keeps track of buffer sliding - ssize_t* remaining_content_incr: Keeps track of sliding of - previous buffer -Output: - int ret : 0 on success, -1 on failure -*/ -int -get_chunk_buffer(ssize_t *remaining_content_incr, ssize_t *remaining_length, - char **chunk_buffer, char **buffer, char **remaining_buffer_content, - char **remaining_window_content, ssize_t start, ssize_t end, - ssize_t slide_incr, int *chunk_length); - -/*@description:Function to generate variable size chunk using rabin-karp. -Input: - int fd : File descriptor of file that to be chuncked - int *ret : Pointer to return 0 on success, -1 on failure - int *chunk_flag : Pointer to notify the chunk - int *size : Poniter to return remaining size of the file -Output: - char* : Chunk to be returned -*/ -char *get_variable_chunk (int fd, int *ret, int *size,int *chunk_flag, int *chunk_length); diff --git a/src/block.h b/src/block.h deleted file mode 100644 index ee6238a..0000000 --- a/src/block.h +++ /dev/null @@ -1,48 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(CFLAG) -#define COMMON_DIGEST_FOR_OPENSSL -#include -#include -#define SHA1 CC_SHA1 -#else -#include -#include -#endif -#define NAME_SIZE 100 -#define INT_SIZE sizeof(int) - -struct block_store -{ - size_t fd_block; -}; - - -/*@description:Function to create blockstore -@in: void -@out: int -@return: -1 for error and 0 if created successfully */ -int init_block_store(); - -/*@description:Function to get specific block from specified position -@in: int pos-position of block, -@out: char* -@return: block */ -char* get_block(int pos, int *l); - -/*@description:Function to close filedescriptor of blockstore -@in: void -@out: int -@return: -1 for error and 0 if closed successfully */ -int fini_block_store(); diff --git a/src/catalog.c b/src/catalog.c deleted file mode 100644 index e392158..0000000 --- a/src/catalog.c +++ /dev/null @@ -1,188 +0,0 @@ -#include "catalog.h" -#include "clean_buff.h" - -/* Globals */ -static size_t fd_cat; - -/*Function to create catalog file. -Input:void -Output:int*/ -int -init_catalog_store() -{ - - int ret = -1; - - fd_cat = open("filecatalog.txt", O_APPEND|O_CREAT|O_RDWR, - S_IRUSR|S_IWUSR); - if (fd_cat < 1) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - ret = 0; -out: - return ret; - -} - -/*Function to write contents to a catalog file. -Input:char* filename -Output:int -*/ -int -writecatalog(char *filename) -{ - - int ret = -1; - char actualpath[PATH_MAX+1]; - char *real_path = NULL; - int size_of_real_path = 0; - - if (filename == '\0' || filename == NULL) { - goto out; - } - real_path = realpath(filename, actualpath); - if (real_path == NULL) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - size_of_real_path = strlen(real_path); - if (-1 == write(fd_cat, &size_of_real_path, int_size)) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - if (-1 == write(fd_cat, real_path, size_of_real_path)) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - ret = 0; -out: - return ret; - -} - -/*Function to read all deduped files from a catalog file. -Input:void -Output:int -*/ -int -readfilecatalog() -{ - - struct stat st; - int ret = -1; - char *buffer = NULL; - int size = 0; - int length = 0; - - fstat(fd_cat, &st); - size = st.st_size; - if (size > 0) { - if (-1 == lseek(fd_cat, 0, SEEK_SET)) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - } else { - goto out; - } - printf("\nAbsolute path of deduped files are:\n"); - while (size > 0) { - ret = read(fd_cat, &length, int_size); - if (ret == -1) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - buffer = (char *)calloc(1, length + 1); - ret = read(fd_cat, buffer, length); - if (ret == -1) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - buffer[length] = '\0'; - printf("%s\n", buffer); - size -= (length + int_size); - memset(buffer, 0, length+1); - clean_buff(&buffer); - ret = 1; - } - -out: - return ret; - -} - -/*Function to compare absolute path of file in file catalog. -Input:char out[],int fd_cat -Output:int -*/ -int -comparepath(char out[]) -{ - - struct stat st; - int size = 0; - size_t length = 0; - int ret = -1; - char *buffer = NULL; - - fstat(fd_cat, &st); - size = st.st_size; - /*rewind the stream pointer to the start of catalog file*/ - if (size > 0) { - if (-1 == lseek(fd_cat, 0, SEEK_SET)) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - } - if (size == 0) { - ret = 1; - goto out; - } - while (size > 0) { - ret = read(fd_cat, &length, int_size); - if (ret == -1) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - buffer = (char *)calloc(1, length+1); - ret = read(fd_cat, buffer, length); - if (ret == -1) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - buffer[length] = '\0'; - if (strcmp(out, buffer) == 0) { - ret = 0; - clean_buff(&buffer); - break; - } - size -= (length + int_size); - memset(buffer, 0, length+1); - clean_buff(&buffer); - ret = 1; - } -out: - return ret; - -} - -/*Function to close catalog fd. -Input:void -Output:int*/ -int -fini_catalog_store() -{ - - int ret = -1; - - if (fd_cat != -1) - ret = close(fd_cat); - if (ret == -1) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - ret = 0; -out: -return ret; - -} diff --git a/src/catalog.h b/src/catalog.h deleted file mode 100644 index df48928..0000000 --- a/src/catalog.h +++ /dev/null @@ -1,57 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(CFLAG) -#define COMMON_DIGEST_FOR_OPENSSL -#include -#include -#define SHA1 CC_SHA1 -#else -#include -#include -#endif -#define block 10 -#define NAME_SIZE 100 -#define int_size sizeof(int) - - -/*@description:Function to create catalogstore -@in: void -@out: int -@return: -1 for error and 0 if created successfully */ -int init_catalog_store(); - -/*@description:Function to write the full path of file to catalog -@in: char* filename-filename of file that has been deduped -@out: int -@return: -1 for error and 0 if inserted successfully */ -int writecatalog(char* filename); - -/*@description:Function to read all deduped files from a catalog file. -@in: void -@out: int -@return: -1 for error and 0 if created successfully */ -int readfilecatalog(); - -/*@description:Function to compare absolute path of file in file catalog. -@in: char out[]-path of file. -@out: int -@return: -1 for error and 0 if created successfully */ -int comparepath(char out[]); - -/*@description:Function to close filedescriptor of catalogstore -@in: void -@out: int -@return: -1 for error and 0 if closed successfully */ -int fini_catalog_store(); - diff --git a/src/clean_buff.c b/src/clean_buff.cpp old mode 100644 new mode 100755 similarity index 86% rename from src/clean_buff.c rename to src/clean_buff.cpp index fdaed0b..4e96cd0 --- a/src/clean_buff.c +++ b/src/clean_buff.cpp @@ -1,6 +1,4 @@ #include "clean_buff.h" -#include -#include /*Function to clean buffer contents. Input:char* path @@ -15,4 +13,3 @@ void clean_buff(char **buffer) } } - diff --git a/src/clean_buff.h b/src/clean_buff.h old mode 100644 new mode 100755 diff --git a/src/dedup.h b/src/dedup.h deleted file mode 100644 index 3c8a3fb..0000000 --- a/src/dedup.h +++ /dev/null @@ -1,135 +0,0 @@ -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "vector.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(CFLAG) -#define COMMON_DIGEST_FOR_OPENSSL -#include -#include -#define SHA1 CC_SHA1 -#else -#include -#include -#endif -#define int_size sizeof(int) -/* -Function to dedup a file whose path is specified by the user. -Input:char* filename,int chunk_type,int hash_type,int block_size -Output:int -*/ -int dedup_file (char* filename,int chunk_type,int hash_type,int block_size,int store); - -/*@description:Function to get hash of a particular block. -@in: vector_ptr list-block contents strored in vector,int length-length of buffer, - int hash_type-type of hash(sha1 or md5) -@out: char** hash-hash generated from a block,int *h_length-length of hash generated -@return: hash */ -int get_hash(int hash_type,char** hash,int *h_length, vector_ptr list); - -/*@description:Function to insert hash to hashstore -@in: char *buff-buffer that contains hash,int offset-starting position of block -@out: int -@return: -1 for error and 0 if inserted successfully */ -int insert_hash(char *buff,int offset); - -/*@description:Function to insert block to blockstore -@in: vector_ptr list-buffer containing block,size_t length-size of block -@out: int -@return: -1 for error and 0 if inserted successfully */ -int insert_block(vector_ptr list,size_t length); - -/*@description:Function to write the full path of file to catalog -@in: char* filename-filename of file that has been deduped -@out: int -@return: -1 for error and 0 if inserted successfully */ -int writecatalog(char* filename); - -/*@description:Function to read the full path of file from catalog -@in: int fc-file descriptor of catalog file -@out: int -@return: -1 for error and 0 if read successfully */ -int readcatalog(int fc); - -/*@description:Function to check whether path is present or not -@in: char out[]-path,int filedes-file descriptor of file -@out: int -@return: -1 for error and 0 if found. */ -int searchpath(char out[],int filedes); - -/*@description:Function to check whether file exist or not. -@in: char *filename-filename of arguments that has been passed via CLI. -@out: int -@return: -1 for error and 0 if found. */ -int file_exist(char *filename); - -/*@description:Function to write hash,beginning offset and ending offset of block to stub -@in: char *buff-buffer containing hash,size_t l-length of hash,int filedes-file -descriptor of stub,int b_offset-beginning offset of block,int e_offset-ending offset of block -@out: int -@return: -1 for error and 0 if found. */ -int write_to_stub(char buff[],size_t l,int filedes,int b_offset,int e_offset); - -/*@description:Function to get specific block from specified position -@in: int pos-position of block, -@out: char* -@return: block */ -char* get_block(int pos, int *l); - -/*@description:Function to check single instance of block of specified position -@in: int st1-filedescriptor of stub,int b_offset-beginning offset of block, -int e_offset-ending offset of block -@out: int -@return: -1 for error and 0 if found. */ -int searchstubhash(int st1,int b_offset,int e_offset); - -/*@description:Function to create hash using md5. -@in: vector_ptr list-Vector to store block for which the hash is generated, -int length-length of block -@out: char*-returns hash of block using md5 -@return: hash */ -char* str2md5(vector_ptr list); - -/*@description:Function to create hash using sha1. -@in: vector_ptr list-Vector to store block for which the hash is generated, -int length-length of block -@out: char*-returns hash of block using sha1 -@return: hash */ -char* sha1(vector_ptr list); - -/*@description:Function to insert block to blockstore object -@in: vector_ptr list-buffer containing block,size_t length-size of block -@out: int -@return: -1 for error and 0 if inserted successfully */ -int insert_block_to_object(char *hash,vector_ptr list); - -/*@description:Function to get chunk from file -@in: char **buffer-buffer containing block,size_t length-size of block, int fd_input - - file descriptor of file, int chunk_type - type of chunk to be used. -@out: int -@return: -1 for error and 0 if inserted successfully */ -int get_next_chunk(int fd_input, int chunk_type,int block_size, - char **buffer, int *length); - -/*@description:Function to insert block to blockstore object -@in: vector_ptr list-buffer containing block,size_t length-size of block, char *hash- -hash value of chunk, int h_length - length of the hash, int store - type of store, -int e_offset - Ending offset, int b_offset - Beginning offset. -@out: int -@return: -1 for error and 0 if inserted successfully */ -int chunk_store(vector_ptr list, char *hash, int length, int h_length, - int e_offset, int b_offset, int fd_stub, int store); - diff --git a/src/dedup_test.c b/src/dedup_test.c deleted file mode 100644 index e912fa0..0000000 --- a/src/dedup_test.c +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include -#include -#include -#include - -// A test case that does nothing and succeeds. -static void null_test_success(void **state) { - (void) state; -} - -int main(void) { - const UnitTest tests[] = { - unit_test(null_test_success), - }; - - return run_tests(tests, "dedup_test"); -} diff --git a/src/delete.h b/src/delete.h deleted file mode 100644 index 1b51d79..0000000 --- a/src/delete.h +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include -#include -#include -#include -#define FILE_SIZE 200 - -/*@description:Function to read the full path of file from catalog -@in: void -@out: int -@return: -1 for error and 0 if read successfully */ -int readfilecatalog(); - -/*@description:Function to delete file. -@in: void -@out: int -@return: -1 for error and 0 if read successfully */ -int delete_file(); diff --git a/src/delete_test.c b/src/delete_test.c deleted file mode 100644 index 8b057cb..0000000 --- a/src/delete_test.c +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include -#include -#include -#include - -// A test case that does nothing and succeeds. -static void null_test_success(void **state) { - (void) state; -} - -int main(void) { - const UnitTest tests[] = { - unit_test(null_test_success), - }; - - return run_tests(tests, "delete_test"); -} diff --git a/src/hash.h b/src/hash.h deleted file mode 100644 index e2e56b8..0000000 --- a/src/hash.h +++ /dev/null @@ -1,55 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(CFLAG) -#define COMMON_DIGEST_FOR_OPENSSL -#include -#include -#define SHA1 CC_SHA1 -#else -#include -#include -#endif -#define block 10 -#define NAME_SIZE 100 -#define int_size sizeof(int) - -/*@description:Function to create hashstore -@in: void -@out: int -@return: -1 for error and 0 if created successfully */ -int init_hash_store(); - -/*@description:Function to insert hash to hashstore -@in: char *buff-buffer that contains hash,int offset-starting position of block -@out: int -@return: -1 for error and 0 if inserted successfully */ -int insert_hash(char *buff,int offset); - -/*@description:Function to check whether hash is already present or not. -@in: char *out-input hash -@out: int hash -@return: -1 for error and 0 if hash already present */ -int searchhash(char *out); - -/*@description:Function to get the position of specific block in hash -@in: char* hash-hash -@out: int -@return: -1 for error and 0 if found. */ -int getposition(char* hash); - -/*@description:Function to create blockstore -@in: void -@out: int -@return: -1 for error and 0 if closed successfully */ -int fini_hash_store(); diff --git a/src/main.c b/src/main.c deleted file mode 100644 index 7953bcb..0000000 --- a/src/main.c +++ /dev/null @@ -1,141 +0,0 @@ -#include "main.h" -#include "catalog.h" -#include "block.h" -#include "hash.h" -#include "dedup.h" -#include "restore.h" -#include "delete.h" -#include "string.h" - -/*Main program!*/ -int -main (int argc, char *argv[]) -{ - - int chunk_type = 0; - int hash_type = 0; - char *filename = NULL; - int ch = 0; - int block_size = 0; - int ret = -1; - int store = 0; - enum stores store_type; - - filename = (char *)calloc(1, FILE_SIZE); - ret = init_block_store(); - if (ret == -1) - goto out; - ret = init_hash_store(); - if (ret == -1) - goto out; - ret = init_catalog_store(); - if (ret == -1) - goto out; - - while (1) { - printf("\n1.Do you want to dedup a file\n"); - printf("2.Do you want restore a file\n"); - printf("3.delete a file\n"); - printf("4.Exit\n"); - printf("Enter your choice\n"); - ret = scanf("%d", &ch); - if (ret <= 0) - goto out; - - switch (ch) { - case 1: - printf("\nPlease give filename with the full path\n"); - ret = scanf("%s", filename); - if (ret <= 0) - goto out; - ret = file_exist(filename); - if (ret == 0) { - fprintf(stderr, "%s\n", strerror(errno)); - continue; - } - printf("\nDo you want to do fixed or variable chunking\n"); - printf("\n1.fixed or 2.variable\n"); - while (scanf("%d", &chunk_type)) { - if (!(chunk_type == 1 || chunk_type == 2)) { - printf("\nInvalid choice please enter valid choice\n"); - } else { - break; - } - } - printf("\nEnter block size\n"); - printf("For variable chunking block size should be zero\n"); - ret = scanf("%d", &block_size); - if (ret <= 0) - goto out; - printf("\nChoose algorithm to hash\n"); - printf("1.md5 2.sha1\n"); - while (scanf("%d", &hash_type)) { - if (!(hash_type == 1 || hash_type == 2)) { - printf("\nInvalid choice please enter valid choice\n"); - } else { - break; - } - } - printf("\nSelect the store type\n"); - printf("0 : default_store \n1 : object_store\n"); - ret = scanf("%d", &store); - if (ret <= 0) - goto out; - switch (store) { - case 0: - printf("Default store selected\n"); - store_type = default_store; - break; - case 1: - printf("Object store selected\n"); - store_type = object_store; - break; - default: - printf("Invalid option\n"); - goto out; - } - printf("Deduplication in progress...\n"); - ret = dedup_file(filename, chunk_type, hash_type, - block_size, store_type); - if (ret == -1) - goto out; - break; - case 2: - ret = restore_file(); - if (ret == -1) - goto out; - break; - case 3: - ret = delete_file(); - if (ret == -1) - goto out; - break; - case 4: - goto out; - - default: - printf("\nInvalid choice\nEnter valid choice\n "); - } - } - ret = 0; -out: - ret = fini_block_store(); - ret = fini_hash_store(); - ret = fini_catalog_store(); - return ret; - -} - -/*Function to check whether second argument exist in directory or not. -Input:char *filename -Output:int -*/ -inline int -file_exist(char *filename) -{ - - struct stat buffer; - - return (stat (filename, &buffer) == 0); - -} diff --git a/src/main.h b/src/main.h deleted file mode 100644 index b51d94c..0000000 --- a/src/main.h +++ /dev/null @@ -1,14 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#define FILE_SIZE 200 - -enum stores { default_store = 0, object_store }; - - - diff --git a/src/md5.c b/src/md5.cpp old mode 100644 new mode 100755 similarity index 96% rename from src/md5.c rename to src/md5.cpp index 6b9021c..0298d58 --- a/src/md5.c +++ b/src/md5.cpp @@ -4,7 +4,7 @@ Input:vector_ptr Output:char* */ -char *str2md5(vector_ptr list) +char *md5(vector_ptr list) { int n; diff --git a/src/md5.h b/src/md5.h old mode 100644 new mode 100755 index ba1ffca..92d06c3 --- a/src/md5.h +++ b/src/md5.h @@ -19,4 +19,4 @@ @in: char *str-Buffer to store block for which the hash is generated,int length-length of block @out: char*-returns hash of block using md5 @return: hash */ -char *str2md5(vector_ptr list); +char *md5(vector_ptr list); diff --git a/src/object_store.h b/src/object_store.h deleted file mode 100644 index 39ffa17..0000000 --- a/src/object_store.h +++ /dev/null @@ -1,22 +0,0 @@ -#include "vector.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -/*@description:Function to insert block to blockstore object -@in: vector_ptr list-buffer containing block,size_t length-size of block -@out: int -@return: -1 for error and 0 if inserted successfully */ -int insert_block_to_object(char *hash,vector_ptr list); - -/*@description:Function to get specific block from object -@in: char *hash - hash of block -@out: char* -@return: block */ -char *get_block_from_object(char *hash,int *length); diff --git a/src/restore.h b/src/restore.h deleted file mode 100644 index 36773e9..0000000 --- a/src/restore.h +++ /dev/null @@ -1,56 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(CFLAG) -#define COMMON_DIGEST_FOR_OPENSSL -#include -#include -#define SHA1 CC_SHA1 -#else -#include -#include -#endif -#define block 10 -#define NAME_SIZE 100 -#define int_size sizeof(int) -#define FILE_SIZE 200 - -/*@description:Function to get the position of specific block in hash -@in: char* hash-hash -@out: int -@return: -1 for error and 0 if found. */ -int getposition(char* hash); - -/*@description:Function to restore file. -@in: char* path-path of file to be restored -@out: int -@return: -1 for error and 0 if found. */ -int restorefile(char* path); - -/*@description:Function to search whether file path is present or not.If present will call restorefile to restore file. -@in: void -@out: int -@return: -1 for error and 0 if found. */ -int restore_file(); - -/*@description:Function to get specific block from specified position -@in: int pos-position of block, -@out: char* -@return: block */ -char* get_block(int pos, int *length); - -/*@description:Function to get specific block from object -@in: char *hash - hash of block -@out: char* -@return: block */ -char *get_block_from_object(char *hash,int *length); diff --git a/src/sha1.c b/src/sha1.cpp old mode 100644 new mode 100755 similarity index 100% rename from src/sha1.c rename to src/sha1.cpp diff --git a/src/sha1.h b/src/sha1.h old mode 100644 new mode 100755 diff --git a/src/stub.h b/src/stub.h deleted file mode 100644 index e5dd770..0000000 --- a/src/stub.h +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#if defined(CFLAG) -#define COMMON_DIGEST_FOR_OPENSSL -#include -#include -#define SHA1 CC_SHA1 -#else -#include -#include -#endif -#define block 10 -#define NAME_SIZE 100 -#define int_size sizeof(int) - -/*@description:Function to write hash,beginning offset and ending offset of block to stub -@in: char *buff-buffer containing hash,size_t l-length of hash,int filedes-file descriptor of stub,int b_offset-beginning offset of block,int e_offset-ending offset of bloc -@out: int -@return: -1 for error and 0 if found. */ -int write_to_stub(char buff[],size_t l,int fd_stub,int b_offset,int e_offset); diff --git a/src/vector.c b/src/vector.cpp old mode 100644 new mode 100755 similarity index 100% rename from src/vector.c rename to src/vector.cpp diff --git a/src/vector.h b/src/vector.h old mode 100644 new mode 100755 diff --git a/src/block.c b/src/ydl_block.cpp old mode 100644 new mode 100755 similarity index 66% rename from src/block.c rename to src/ydl_block.cpp index 8fdcb49..e395f95 --- a/src/block.c +++ b/src/ydl_block.cpp @@ -1,23 +1,17 @@ -#include "block.h" -#include "clean_buff.h" -#include "vector.h" - -static struct block_store fd; +#include "ydl_block.h" /*Function to create block file. Input:void Output:int*/ -int -init_block_store() +int ydl_block::init_block_store() { - int ret = -1; - fd.fd_block = open("blockstore.txt", O_APPEND|O_CREAT|O_RDWR, + block_fd = open("blockstore.txt", O_APPEND|O_CREAT|O_RDWR, S_IWUSR|S_IRUSR); - if (fd.fd_block == -1) { - printf("\nCreation of block file failed with error [%s]\n", - strerror(errno)); + if (block_fd == -1) { + cout << "\nCreation of block file failed with error " + << strerror(errno) << endl; goto out; } ret = 0; @@ -30,21 +24,20 @@ init_block_store() Input:vector_ptr list,size_t length Output:int */ -int -insert_block(vector_ptr list, size_t length) +int ydl_block::insert_block(vector_ptr list,size_t length) { - int ret = -1; vector_ptr temp_node = NULL; if (length <= 0) { goto out; } - if (write (fd.fd_block, &length, INT_SIZE) == -1) { + if (write (block_fd, &length, INT_SIZE) == -1) { fprintf(stderr, "%s\n", strerror(errno)); goto out; } - ret = lseek(fd.fd_block, 1, SEEK_CUR); + ret = lseek(block_fd, 1, SEEK_CUR); + ret = lseek(block_fd, 1, SEEK_CUR); if (ret == -1) goto out; if (list == NULL) { @@ -54,7 +47,7 @@ insert_block(vector_ptr list, size_t length) do { temp_node = list; - if (-1 == write(fd.fd_block, temp_node->vector_element, + if (-1 == write(block_fd, temp_node->vector_element, temp_node->length)) { fprintf(stderr, "%s\n", strerror(errno)); goto out; @@ -72,10 +65,8 @@ insert_block(vector_ptr list, size_t length) Input:int pos Output:char* */ -char* -get_block(int pos, int *l) +char *ydl_block::get_block(int pos,int *l) { - struct stat st; int size = 0; int length = 0; @@ -83,21 +74,22 @@ get_block(int pos, int *l) char *buffer = NULL; int position = 1; - fstat(fd.fd_block, &st); + fstat(block_fd, &st); size = st.st_size; /*rewind the stream pointer to the start of block file*/ if (size > 0) { - if (-1 == lseek(fd.fd_block, 0, SEEK_SET)) { - printf("\nLseek failed with error: [%s]\n", - strerror(errno)); + if (-1 == lseek(block_fd, 0, SEEK_SET)) { + cout << "\nLseek failed with error:" + << strerror(errno) << endl; goto out; } } while (size > 0) { - ret = read(fd.fd_block, &length, INT_SIZE); + ret = read(block_fd, &length, INT_SIZE); *l = length; if (ret == -1) { - printf("\nError while reading %s", strerror(errno)); + cout << "\nError while reading " + << strerror(errno) << endl; goto out; } position = position+ret; @@ -107,29 +99,29 @@ get_block(int pos, int *l) buffer = (char *)calloc(1, length+1); if (position == pos) { - ret = read(fd.fd_block, buffer, length); + ret = read(block_fd, buffer, length); if (ret == -1) { - printf("\nRead failed with error %s\n", - strerror(errno)); + cout << "\nRead failed with error " + << strerror(errno) << endl; goto out; } ret = 0; buffer[length] = '\0'; break; } - ret = read(fd.fd_block, buffer, length); + ret = read(block_fd, buffer, length); if (ret == -1) { - printf("\nRead failed with error %s\n", - strerror(errno)); + cout << "\nRead failed with error " + << strerror(errno) << endl; goto out; } position = position + length; buffer[length] = '\0'; size -= (length+INT_SIZE); clean_buff(&buffer); - } ret = 0; + out: if (ret == -1) { memset(buffer, 0, length+1); @@ -141,14 +133,12 @@ get_block(int pos, int *l) /*Function to close block fd. Input:void Output:int*/ -int -fini_block_store() +int ydl_block::fini_block_store() { - int ret = -1; - if (fd.fd_block != -1) - ret = close(fd.fd_block); + if (block_fd != -1) + ret = close(block_fd); if (ret == -1) { fprintf(stderr, "%s\n", strerror(errno)); goto out; diff --git a/src/ydl_block.h b/src/ydl_block.h new file mode 100755 index 0000000..6661b5c --- /dev/null +++ b/src/ydl_block.h @@ -0,0 +1,30 @@ +#include +using namespace std; +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "vector.h" +#include "clean_buff.h" +#ifndef YDL_BLOCK_H +#define YDL_BLOCK_H +#define INT_SIZE sizeof(int) + +class ydl_block +{ + static int block_fd; +public: + int init_block_store(); + char* get_block(int pos, int *l); + int insert_block(vector_ptr list, size_t length); + int fini_block_store(); +}; + +int ydl_block::block_fd; +#endif // YDL_BLOCK_H diff --git a/src/ydl_catalog.cpp b/src/ydl_catalog.cpp new file mode 100755 index 0000000..6508b12 --- /dev/null +++ b/src/ydl_catalog.cpp @@ -0,0 +1,202 @@ +#include "ydl_catalog.h" + +/*Function to create catalog file. +Input:void +Output:int*/ +int +ydl_catalog::init_catalog_store() +{ + int ret = -1; + + cat_fd =open("filecatalog.txt",O_APPEND|O_CREAT|O_RDWR, S_IRUSR|S_IWUSR); + if (cat_fd< 1) + { + fprintf(stderr,"%s\n",strerror(errno)); + goto out; + } + ret=0; +out: + return ret; + +} + +/*Function to write contents to a catalog file. +Input:char* filename +Output:int +*/ +int +ydl_catalog::writecatalog(char* filename) +{ + int ret = -1; + char actualpath [PATH_MAX+1]; + char *real_path = NULL; + int size_of_real_path = 0; + + if (filename== '\0' || filename == NULL) + { + goto out; + } + real_path = realpath(filename, actualpath); + if (real_path== NULL) + { + fprintf(stderr,"%s\n",strerror(errno)); + goto out; + } + size_of_real_path=strlen(real_path); + if (-1 == write(cat_fd,&size_of_real_path,INT_SIZE)) + { + fprintf(stderr,"%s\n",strerror(errno)); + goto out; + } + if (-1 == write(cat_fd,real_path,size_of_real_path)) + { + fprintf(stderr,"%s\n",strerror(errno)); + goto out; + } + ret=0; +out: + return ret; + +} + +/*Function to read all deduped files from a catalog file. +Input:void +Output:int +*/ +int +ydl_catalog::readfilecatalog() +{ + + struct stat st; + int ret = -1; + char *buffer = NULL; + int size = 0; + int length = 0; + + fstat(cat_fd, &st); + size = st.st_size; + if (size> 0) + { + if (-1 == lseek(cat_fd,0,SEEK_SET)) + { + fprintf(stderr,"%s\n",strerror(errno)); + goto out; + } + } + else + { + goto out; + } + cout <<"\nAbsolute path of deduped files are:\n"; + while (size> 0) + { + ret=read(cat_fd,&length,INT_SIZE); + if (ret== -1) + { + fprintf(stderr,"%s\n",strerror(errno)); + goto out; + } + buffer=(char*)calloc(1,length+1); + ret = read(cat_fd,buffer,length); + if (ret== -1) + { + fprintf(stderr,"%s\n",strerror(errno)); + goto out; + } + buffer[length]='\0'; + cout << buffer << endl; + size-=(length+INT_SIZE); + memset(buffer,0,length+1); + clean_buff(&buffer); + ret=1; + } + +out: + return ret; + +} + +/*Function to compare absolute path of file in file catalog. +Input:char out[],int cat_fd +Output:int +*/ +int +ydl_catalog::comparepath(char *out) +{ + + struct stat st; + int size = 0; + size_t length = 0; + int ret = -1; + char* buffer = NULL; + + fstat(cat_fd, &st); + size = st.st_size; + // rewind the stream pointer to the start of catalog file + if (size> 0) + { + if (-1 == lseek(cat_fd,0,SEEK_SET)) + { + fprintf(stderr,"%s\n",strerror(errno)); + goto out; + } + } + if (size== 0) + { + ret=1; + goto out; + } + while (size> 0) + { + ret=read(cat_fd,&length,INT_SIZE); + if (ret== -1) + { + fprintf(stderr,"%s\n",strerror(errno)); + goto out; + } + buffer=(char*)calloc(1,length+1); + ret = read(cat_fd,buffer,length); + if (ret== -1) + { + fprintf(stderr,"%s\n",strerror(errno)); + goto out; + } + buffer[length]='\0'; + if (strcmp(out,buffer)== 0) + { + ret=0; + clean_buff(&buffer); + break; + } + size-=(length+INT_SIZE); + memset(buffer,0,length+1); + clean_buff(&buffer); + ret=1; + } +out: + return ret; + +} + +/*Function to close catalog fd. +Input:void +Output:int*/ +int +ydl_catalog::fini_catalog_store() +{ + + int ret = -1; + + if (cat_fd != -1) + ret=close(cat_fd); + if (ret== -1) + { + fprintf(stderr,"%s\n",strerror(errno)); + goto out; + } + ret=0; +out: +return ret; + +} + diff --git a/src/ydl_catalog.h b/src/ydl_catalog.h new file mode 100755 index 0000000..732540d --- /dev/null +++ b/src/ydl_catalog.h @@ -0,0 +1,30 @@ +#include +using namespace std; +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "clean_buff.h" +#define INT_SIZE sizeof(int) +#ifndef YDL_CATALOG_H +#define YDL_CATALOG_H + +class ydl_catalog +{ + static int cat_fd; +public: + int init_catalog_store(); + int writecatalog(char* filename); + int readfilecatalog(); + int comparepath(char *out); + int fini_catalog_store(); +}; + +int ydl_catalog::cat_fd; +#endif // YDL_CATALOG_H diff --git a/src/dedup.c b/src/ydl_dedup.cpp old mode 100644 new mode 100755 similarity index 84% rename from src/dedup.c rename to src/ydl_dedup.cpp index 0ba4a08..e0f8a70 --- a/src/dedup.c +++ b/src/ydl_dedup.cpp @@ -1,11 +1,4 @@ -#include "dedup.h" -#include "clean_buff.h" -#include "main.h" -#include "Rabin_Karp.h" -#include "catalog.h" -#include "hash.h" - -#define NAME_SIZE 100 +#include "ydl_dedup.h" /* Function to dedup a file whose path is specified by the user. @@ -13,8 +6,8 @@ Input:char* filename,int chunk_type,int hash_type,int block_size Output:int */ int -dedup_file (char *filename, int chunk_type, int hash_type, int block_size, -int store_type) +ydl_dedup::dedup_file (char *filename, int chunk_type, int hash_type, + int block_size, int store_type) { int ret = -1; @@ -38,6 +31,8 @@ int store_type) FILE *fp = NULL; struct stat st; vector_ptr list = NULL; + ydl_catalog catalog; + ydl_rabin_karp rabin_karp; ts1 = strdup(filename); ts2 = strdup(filename); @@ -50,7 +45,7 @@ int store_type) fprintf(stderr, "%s\n", strerror(errno)); goto out; } - ret = comparepath(filename); + ret = catalog.comparepath(filename); if (ret == -1) { goto out; } @@ -63,7 +58,7 @@ int store_type) fprintf(stderr, "%s\n", strerror(errno)); goto out; } - if (write (fd_stub, &store_type, int_size) == -1) { + if (write (fd_stub, &store_type, INT_SIZE) == -1) { fprintf(stderr, "%s\n", strerror(errno)); goto out; } @@ -114,7 +109,7 @@ int store_type) list = NULL; chunk_flag = 0; while (chunk_flag == 0) { - chunk_buffer = get_variable_chunk(fd_input, + chunk_buffer = rabin_karp.get_variable_chunk(fd_input, &ret, &size, &chunk_flag, &chunk_length); if (ret == -1) { fprintf (stderr, @@ -143,7 +138,7 @@ int store_type) break; } } - ret = writecatalog(filename); + ret = catalog.writecatalog(filename); if (ret == -1) goto out; ret = 0; @@ -162,8 +157,8 @@ int *length Output:int */ int -get_next_chunk(int fd_input, int chunk_type, int block_size, char **buffer, -int *length) +ydl_dedup::get_next_chunk(int fd_input, int chunk_type, int block_size, + char **buffer, int *length) { int ret = -1; @@ -184,7 +179,8 @@ Input:char *buffer,int length,int hash_type,char** hash,int *h_length Output:int */ int -get_hash(int hash_type, char **hash, int *h_length, vector_ptr list) +ydl_dedup::get_hash(int hash_type, char **hash, int *h_length, + vector_ptr list) { int ret = -1; @@ -193,7 +189,7 @@ get_hash(int hash_type, char **hash, int *h_length, vector_ptr list) switch (hash_type) { case 1: - buf = str2md5(list); + buf = md5(list); *hash = buf; *h_length = strlen(buf); break; @@ -215,36 +211,41 @@ int b_offset,int fd_stub Output:int */ int -chunk_store(vector_ptr list, char *hash, int length, int h_length, int e_offset, -int b_offset, int fd_stub, int store_type) +ydl_dedup::chunk_store(vector_ptr list, char *hash, int length, + int h_length, int e_offset, int b_offset, + int fd_stub, int store_type) { int off = -1; int ret = -1; + ydl_stub stub_store; + ydl_hash hash_store; + ydl_block block_store; + ydl_object_store object_store; if (store_type == 0) { - ret = searchhash(hash); + ret = hash_store.searchhash(hash); if (ret == -1) { goto out; } if (ret == 0) { - ret = write_to_stub(hash, h_length, fd_stub, b_offset, + ret = stub_store.write_to_stub(hash, h_length, fd_stub, b_offset, e_offset); if (ret == -1) { goto out; } } else { - off = insert_block(list, length); + off = block_store.insert_block(list, length); if (off == -1) { fprintf(stderr, "%s\n", strerror(errno)); goto out; } - ret = insert_hash(hash, off); + ret = hash_store.insert_hash(hash, off); if (ret == -1) { fprintf(stderr, "%s\n", strerror(errno)); goto out; } - ret = write_to_stub(hash, h_length, fd_stub, b_offset, + ret = stub_store.write_to_stub(hash, h_length, fd_stub, b_offset, e_offset); if (ret == -1) { fprintf(stderr, "%s\n", strerror(errno)); @@ -252,12 +253,12 @@ int b_offset, int fd_stub, int store_type) } } } else { - ret = insert_block_to_object(hash, list); + ret = object_store.insert_block_to_object(hash, list); if (ret == -1) { fprintf(stderr, "%s\n", strerror(errno)); goto out; } - ret = write_to_stub(hash, h_length, fd_stub, b_offset, + ret = stub_store.write_to_stub(hash, h_length, fd_stub, b_offset, e_offset); if (ret == -1) { fprintf(stderr, "%s\n", strerror(errno)); @@ -269,8 +270,3 @@ int b_offset, int fd_stub, int store_type) return ret; } - - - - - diff --git a/src/ydl_dedup.h b/src/ydl_dedup.h new file mode 100755 index 0000000..d8afda7 --- /dev/null +++ b/src/ydl_dedup.h @@ -0,0 +1,52 @@ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "clean_buff.h" +#include "ydl_rabin_karp.h" +#include "ydl_catalog.h" +#include "ydl_hash.h" +#include "ydl_object_store.h" +#include "ydl_stub.h" +#include "ydl_block.h" +#include "sha1.h" +#include "md5.h" + +#if defined(CFLAG) +#define COMMON_DIGEST_FOR_OPENSSL +#include +#include +#define SHA1 CC_SHA1 +#else +#include +#include +#endif +#define INT_SIZE sizeof(int) +#define NAME_SIZE 100 + +class ydl_dedup +{ +public: + int dedup_file (char* filename,int chunk_type,int hash_type, + int block_size,int store); + int get_hash(int hash_type,char** hash,int *h_length, + vector_ptr list); + int get_next_chunk(int fd_input, int chunk_type,int block_size, + char **buffer, int *length); + int chunk_store(vector_ptr list, char *hash, int length, + int h_length,int e_offset, int b_offset, + int fd_stub, int store); +}; \ No newline at end of file diff --git a/src/delete.c b/src/ydl_delete.cpp old mode 100644 new mode 100755 similarity index 70% rename from src/delete.c rename to src/ydl_delete.cpp index bf6ecc3..1a59dea --- a/src/delete.c +++ b/src/ydl_delete.cpp @@ -1,12 +1,11 @@ -#include "clean_buff.h" -#include "delete.h" +#include "ydl_delete.h" /*Function to delete file from a directory. Input:void Output:int */ int -delete_file() +ydl_delete::delete_file() { int fd_cat = -1; @@ -20,21 +19,21 @@ delete_file() fprintf(stderr, "%s\n", strerror(errno)); goto out; } - printf("\n deduped files are\n"); + cout << "\n deduped files are\n"; ret = readfilecatalog(); if (ret == -1) { fprintf(stderr, "%s\n", strerror(errno)); goto out; } - printf("\nSelect the file you want to delete\n"); - while (scanf("%s", filename) <= 0) + cout << "\nSelect the file you want to delete\n"; + while ( cin >> filename <= 0) ; - printf("\nFILE%s", filename); + cout << "\nFILE" << filename; status = remove(filename); if (status == 0) { - printf("\nFile deleted successfully.\n"); + cout << "\nFile deleted successfully.\n"; } else { - printf("\nUnable to delete the file\n"); + cout << "\nUnable to delete the file\n"; perror("\nError\n"); goto out; } diff --git a/src/ydl_delete.h b/src/ydl_delete.h new file mode 100755 index 0000000..7a2990f --- /dev/null +++ b/src/ydl_delete.h @@ -0,0 +1,16 @@ +#include +using namespace std; +#include +#include +#include +#include +#include +#include "clean_buff.h" +#define FILE_SIZE 200 + +class ydl_delete +{ +public: + int readfilecatalog(); + int delete_file(); +}; \ No newline at end of file diff --git a/src/hash.c b/src/ydl_hash.cpp old mode 100644 new mode 100755 similarity index 79% rename from src/hash.c rename to src/ydl_hash.cpp index 9b0a1ff..4c412ea --- a/src/hash.c +++ b/src/ydl_hash.cpp @@ -1,20 +1,17 @@ -#include "hash.h" -#include "clean_buff.h" - -static int fd_hash; +#include "ydl_hash.h" /*Function to create hash for a given block Input:void Output:int*/ int -init_hash_store() +ydl_hash::init_hash_store() { int ret = -1; - fd_hash = open("filehashDedup.txt", O_APPEND|O_CREAT|O_RDWR, + hash_fd = open("filehashDedup.txt", O_APPEND|O_CREAT|O_RDWR, S_IRUSR|S_IWUSR); - if (fd_hash == -1) { + if (hash_fd == -1) { printf("\nCreation of hash file failed with error [%s]\n", strerror(errno)); goto out; @@ -31,22 +28,22 @@ Input:char *buff,int offset Output:int */ int -insert_hash(char *buff, int offset) +ydl_hash::insert_hash(char *buff, int offset) { size_t length; int ret = -1; length = strlen(buff); - if (write (fd_hash, &length, int_size) == -1) { + if (write (hash_fd, &length, INT_SIZE) == -1) { printf("\nWrite failed with error%s\n", strerror(errno)); goto out; } - if (-1 == write(fd_hash, buff, length)) { + if (-1 == write(hash_fd, buff, length)) { printf("\nWrite1 failed with error%s\n", strerror(errno)); goto out; } - if (write (fd_hash, &offset, int_size) == -1) { + if (write (hash_fd, &offset, INT_SIZE) == -1) { printf("\nWrite failed with error%s\n", strerror(errno)); goto out; } @@ -61,21 +58,21 @@ Input:char *out Output:int */ int -searchhash(char *out) +ydl_hash::searchhash(char *out) { struct stat st; - int fd2 = fd_hash; + int fd2 = hash_fd; int size = 0; size_t length = 0; int ret = -1; int offset = 0; char *buffer = NULL; - fstat(fd_hash, &st); + fstat(hash_fd, &st); size = st.st_size; - /*rewind the stream pointer to the start of temporary file*/ - if (-1 == lseek(fd_hash, 0, SEEK_SET)) { + /*Rewind the stream pointer to the start of temporary file*/ + if (-1 == lseek(hash_fd, 0, SEEK_SET)) { printf("\nLseek failed with error: [%s]\n", strerror(errno)); goto out; } @@ -84,7 +81,7 @@ searchhash(char *out) goto out; } while (size > 0) { - ret = read(fd2, &length, int_size); + ret = read(fd2, &length, INT_SIZE); if (ret == -1) { printf("\nError while reading %s\n", strerror(errno)); goto out; @@ -96,7 +93,7 @@ searchhash(char *out) strerror(errno)); goto out; } - ret = read(fd2, &offset, int_size); + ret = read(fd2, &offset, INT_SIZE); if (ret == -1) { printf("\nError while reading %s\n", strerror(errno)); goto out; @@ -108,7 +105,7 @@ searchhash(char *out) clean_buff(&buffer); break; } - size -= (length+int_size+int_size); + size -= (length+INT_SIZE+INT_SIZE); clean_buff(&buffer); ret = 1; } @@ -122,7 +119,7 @@ Input:char* hash Output:int */ int -getposition(char *hash) +ydl_hash::getposition(char *hash) { struct stat st; @@ -133,11 +130,11 @@ getposition(char *hash) char *buffer = NULL; int h_length = 0; - fstat(fd_hash, &st); + fstat(hash_fd, &st); size = st.st_size; h_length = strlen(hash); /* rewind the stream pointer to the start of temporary file*/ - if (-1 == lseek(fd_hash, 0, SEEK_SET)) { + if (-1 == lseek(hash_fd, 0, SEEK_SET)) { printf("\nLseek failed with error: [%s]\n", strerror(errno)); goto out; } @@ -147,20 +144,20 @@ getposition(char *hash) } while (size > 0) { - ret = read(fd_hash, &length, int_size); + ret = read(hash_fd, &length, INT_SIZE); if (ret == -1) { printf("\nError while reading %s", strerror(errno)); goto out; } buffer = (char *)calloc(1, length+1); hash[h_length] = '\0'; - ret = read(fd_hash, buffer, length); + ret = read(hash_fd, buffer, length); if (ret == -1) { printf("\nRead failed with error %s\n", strerror(errno)); goto out; } - ret = read(fd_hash, &offset, int_size); + ret = read(hash_fd, &offset, INT_SIZE); if (ret == -1) { printf("\nError while reading %s", strerror(errno)); goto out; @@ -171,7 +168,7 @@ getposition(char *hash) clean_buff(&buffer); break; } - size -= (length+int_size+int_size); + size -= (length+INT_SIZE+INT_SIZE); clean_buff(&buffer); ret = -1; } @@ -187,13 +184,13 @@ getposition(char *hash) Input:void Output:int*/ int -fini_hash_store() +ydl_hash::fini_hash_store() { int ret = -1; - if (fd_hash != -1) - ret = close(fd_hash); + if (hash_fd != -1) + ret = close(hash_fd); if (ret == -1) { fprintf(stderr, "%s\n", strerror(errno)); goto out; diff --git a/src/ydl_hash.h b/src/ydl_hash.h new file mode 100755 index 0000000..c8b9628 --- /dev/null +++ b/src/ydl_hash.h @@ -0,0 +1,25 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "clean_buff.h" +#define INT_SIZE sizeof(int) + +class ydl_hash +{ + static int hash_fd; +public: + int init_hash_store(); + int insert_hash(char *buff,int offset); + int searchhash(char *out); + int getposition(char* hash); + int fini_hash_store(); +}; + +int ydl_hash::hash_fd; \ No newline at end of file diff --git a/src/ydl_main.cpp b/src/ydl_main.cpp new file mode 100755 index 0000000..35a4667 --- /dev/null +++ b/src/ydl_main.cpp @@ -0,0 +1,140 @@ +#include "ydl_main.h" +#include "ydl_dedup.h" +#include "ydl_restore.h" +#include "ydl_delete.h" + +/*Main program!*/ +int +main (int argc, char *argv[]) +{ + + int chunk_type = 0; + int hash_type = 0; + char *filename = NULL; + int ch = 0; + int block_size = 0; + int ret = -1; + int store = 0; + enum stores store_type; + + ydl_block block_store; + ydl_catalog catalog_store; + ydl_hash hash_store; + ydl_dedup dedupe_obj; + ydl_restore restore_obj; + ydl_delete delete_obj; + + filename = (char *)calloc(1, FILE_SIZE); + ret = block_store.init_block_store(); + if (ret == -1) + goto out; + ret = hash_store.init_hash_store(); + if (ret == -1) + goto out; + ret = catalog_store.init_catalog_store(); + if (ret == -1) + goto out; + + while (1) { + cout << "\n1.Do you want to dedup a file\n"; + cout << "2.Do you want restore a file\n"; + cout << "3.delete a file\n"; + cout << "4.Exit\n"; + cout << "Enter your choice\n"; + cin >> ch; + switch (ch) { + case 1: + cout << "\nPlease give filename with the " + << "full path\n"; + cin >> filename; + ret = file_exist(filename); + if (ret == 0) { + fprintf(stderr, "%s\n", strerror(errno)); + continue; + } + cout << "\nDo you want to do fixed or " + << "variable chunking\n"; + cout << "\n1.fixed or 2.variable\n"; + while ( cin >> chunk_type ) { + if (!(chunk_type == 1 || chunk_type == 2)) { + cout <<"\nInvalid choice please" + << " enter valid choice\n"; + } else { + break; + } + } + cout << "\nEnter block size\n"; + cout << "For variable chunking block size " + << "should be zero\n"; + cin >> block_size; + cout << "\nChoose algorithm to hash\n" ; + cout << "1.md5 2.sha1\n"; + while (cin >> hash_type) { + if (!(hash_type == 1 || hash_type == 2)) { + cout <<"\nInvalid choice please" + <<"enter valid choice\n"; + } else { + break; + } + } + cout << "\nSelect the store type\n"; + cout << "0 : default_store\n1 : object_store\n"; + cin >> store; + switch (store) { + case 0: + cout << "Default store selected\n"; + store_type = default_store; + break; + case 1: + cout << "Object store selected\n"; + store_type = object_store; + break; + default: + cout << "Invalid option\n"; + goto out; + } + cout << "Deduplication in progress...\n"; + ret = dedupe_obj.dedup_file(filename, chunk_type, + hash_type, block_size, store_type); + if (ret == -1) + goto out; + break; + case 2: + ret = restore_obj.restore_file(); + if (ret == -1) + goto out; + break; + case 3: + ret = delete_obj.delete_file(); + if (ret == -1) + goto out; + break; + case 4: + goto out; + + default: + cout << "\nInvalid choice Enter valid choice\n"; + } + } + ret = 0; +out: + ret = block_store.fini_block_store(); + ret = hash_store.fini_hash_store(); + ret = catalog_store.fini_catalog_store(); + return ret; + +} + +/*Function to check whether second argument exist in directory or not. +Input:char *filename +Output:int +*/ +inline int +file_exist(char *filename) +{ + + struct stat buffer; + + return (stat (filename, &buffer) == 0); + +} diff --git a/src/ydl_main.h b/src/ydl_main.h new file mode 100755 index 0000000..11b5db8 --- /dev/null +++ b/src/ydl_main.h @@ -0,0 +1,20 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "ydl_catalog.h" +#include "ydl_block.h" +#include "ydl_hash.h" +#include "ydl_dedup.h" +#include "ydl_restore.h" +#include "ydl_delete.h" + +#define FILE_SIZE 200 + +enum stores { default_store = 0, object_store }; + +inline int file_exist(char *filename); \ No newline at end of file diff --git a/src/object_store.c b/src/ydl_object_store.cpp old mode 100644 new mode 100755 similarity index 96% rename from src/object_store.c rename to src/ydl_object_store.cpp index b9e02e5..f82ceda --- a/src/object_store.c +++ b/src/ydl_object_store.cpp @@ -1,4 +1,4 @@ -#include "object_store.h" +#include "ydl_object_store.h" /*Function to insert block to blockstore object Input: vector_ptr list : buffer containing block,size_t length-size of block @@ -6,7 +6,7 @@ Input: vector_ptr list : buffer containing block,size_t length-size of block Output: int : -1 for error and 0 if inserted successfully */ int -insert_block_to_object(char *hash, vector_ptr list) +ydl_object_store::insert_block_to_object(char *hash, vector_ptr list) { DIR *dp1 = NULL; @@ -87,8 +87,8 @@ insert_block_to_object(char *hash, vector_ptr list) Input: int *length : length of the hash char *hash : hash value of the block Output: int : -1 for error and 0 if inserted successfully */ -char -*get_block_from_object(char *hash, int *length) +char* +ydl_object_store::get_block_from_object(char *hash, int *length) { DIR *dp1 = NULL; diff --git a/src/ydl_object_store.h b/src/ydl_object_store.h new file mode 100755 index 0000000..8533a2e --- /dev/null +++ b/src/ydl_object_store.h @@ -0,0 +1,18 @@ +#include "vector.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +class ydl_object_store +{ +public: + int insert_block_to_object(char *hash,vector_ptr list); + char *get_block_from_object(char *hash,int *length); +}; \ No newline at end of file diff --git a/src/Rabin_Karp.c b/src/ydl_rabin_karp.cpp old mode 100644 new mode 100755 similarity index 87% rename from src/Rabin_Karp.c rename to src/ydl_rabin_karp.cpp index 6a8d805..99e48ba --- a/src/Rabin_Karp.c +++ b/src/ydl_rabin_karp.cpp @@ -1,5 +1,4 @@ -#include "Rabin_Karp.h" -#include "clean_buff.h" +#include "ydl_rabin_karp.h" /*Function to calculate the rolling hash for the first window of chunk. Input: @@ -11,7 +10,7 @@ y_uint32 hash_value : Rolling hash of a first window of chunk. */ y_uint32 -calc_hash (char *buffer, y_uint32 *power, int *ret) +ydl_rabin_karp::calc_hash (char *buffer, y_uint32 *power, int *ret) { int i = 0; @@ -26,7 +25,7 @@ calc_hash (char *buffer, y_uint32 *power, int *ret) /*calculates the rolling hash for the first window of chunk and power's PRIME with window length*/ - for (i = 0; i < N; i++) { + for (i = 0; i < WINDOW_SIZE; i++) { hash_value = (PRIME * hash_value + buffer[i]) % M; *power = (*power * PRIME) % M; } @@ -51,7 +50,7 @@ calc_hash (char *buffer, y_uint32 *power, int *ret) int ret : 0 on success, -1 on failure */ int -get_remaining_buffer_content(char **remaining_buffer_content, +ydl_rabin_karp::get_remaining_buffer_content(char **remaining_buffer_content, char **remaining_window_content, ssize_t remaining_length, char **buffer, ssize_t start) { @@ -71,9 +70,9 @@ get_remaining_buffer_content(char **remaining_buffer_content, /*remaining_window_content holds last window content of previous buffer, if remaining length of previous buffer is greater then window size*/ - if (remaining_length >= N) { + if (remaining_length >= WINDOW_SIZE) { *remaining_window_content = (char *)calloc - (1, N + 1); + (1, WINDOW_SIZE + 1); if (*remaining_window_content == NULL) { fprintf (stderr, "Error in buffer allocation\n"); @@ -81,7 +80,7 @@ get_remaining_buffer_content(char **remaining_buffer_content, } memcpy (*remaining_window_content, - *buffer + BUFFER_LEN - N, N); + *buffer + BUFFER_LEN - WINDOW_SIZE, WINDOW_SIZE); } ret = 0; out: @@ -108,10 +107,13 @@ get_remaining_buffer_content(char **remaining_buffer_content, int ret : 0 on success, -1 on failure */ int -get_chunk_buffer(ssize_t *remaining_content_incr, ssize_t *remaining_length, - char **chunk_buffer, char **buffer, char **remaining_buffer_content, - char **remaining_window_content, ssize_t start, ssize_t end, - ssize_t slide_incr, int *chunk_length) +ydl_rabin_karp::get_chunk_buffer(ssize_t *remaining_content_incr, + ssize_t *remaining_length, + char **chunk_buffer, char **buffer, + char **remaining_buffer_content, + char **remaining_window_content, + ssize_t start, ssize_t end, + ssize_t slide_incr, int *chunk_length) { ssize_t chunk_size = 0; @@ -142,7 +144,7 @@ get_chunk_buffer(ssize_t *remaining_content_incr, ssize_t *remaining_length, /*Generates the chunk from previous chunk boundary to where the fingerprint is matched*/ else { - *chunk_length = N+slide_incr; + *chunk_length = WINDOW_SIZE+slide_incr; *chunk_buffer = (char *)calloc (1, *chunk_length+1); if (*chunk_buffer == NULL) { @@ -166,7 +168,8 @@ get_chunk_buffer(ssize_t *remaining_content_incr, ssize_t *remaining_length, char* : Chunk to be returned */ char* -get_variable_chunk (int fd, int *ret, int *size, int *chunk_flag, int *chunk_length) +ydl_rabin_karp::get_variable_chunk (int fd, int *ret, int *size, + int *chunk_flag, int *chunk_length) { int counter1 = 0; @@ -214,22 +217,22 @@ get_variable_chunk (int fd, int *ret, int *size, int *chunk_flag, int *chunk_len *ret = -1; goto out; } - if (remaining_length == 0 && buffer_length <= N) { + if (remaining_length == 0 && buffer_length <= WINDOW_SIZE) { *size = 0; return buffer; } start = 0; slide_incr = 0; - end = N; + end = WINDOW_SIZE; /*If there is remaining content in previous buffer, set the end pointer of new buffer and remaining_content_incr according to the window size*/ if (remaining_length > 0) { - if (remaining_length < N) { - end = N - remaining_length; - remaining_content_incr = N - + if (remaining_length < WINDOW_SIZE) { + end = WINDOW_SIZE - remaining_length; + remaining_content_incr = WINDOW_SIZE - remaining_length; } else { end = 0; @@ -243,7 +246,7 @@ get_variable_chunk (int fd, int *ret, int *size, int *chunk_flag, int *chunk_len file and length of chunk to .csv file*/ while (end < buffer_length) { if (flag == 0) { - temp_buffer = (char *)calloc(1, N + 1); + temp_buffer = (char *)calloc(1, WINDOW_SIZE + 1); if (temp_buffer == NULL) { fprintf (stderr, "Error in buffer allocation\n"); @@ -251,11 +254,11 @@ get_variable_chunk (int fd, int *ret, int *size, int *chunk_flag, int *chunk_len goto out; } - if (remaining_length == N && + if (remaining_length == WINDOW_SIZE && remaining_content_incr != 0) { memcpy (temp_buffer, - remaining_window_content, - N); + remaining_window_content, + WINDOW_SIZE); hash = calc_hash (temp_buffer, &power, ret); } @@ -265,12 +268,12 @@ get_variable_chunk (int fd, int *ret, int *size, int *chunk_flag, int *chunk_len from current buffer*/ if (remaining_length > 0 && remaining_content_incr != 0) { - if (remaining_length < N) { + if (remaining_length < WINDOW_SIZE) { memcpy (temp_buffer, remaining_buffer_content, remaining_length); - memcpy (temp_buffer + (N-end), + memcpy (temp_buffer + (WINDOW_SIZE-end), buffer, end); remaining_flag = 1; hash = calc_hash (temp_buffer, @@ -283,7 +286,7 @@ get_variable_chunk (int fd, int *ret, int *size, int *chunk_flag, int *chunk_len from current buffer*/ else { memcpy (temp_buffer, - buffer + start, N); + buffer + start, WINDOW_SIZE); hash = calc_hash (temp_buffer, &power, ret); } @@ -298,11 +301,12 @@ get_variable_chunk (int fd, int *ret, int *size, int *chunk_flag, int *chunk_len if (hash == FINGER_PRINT) { *ret = get_chunk_buffer - (&remaining_content_incr, &remaining_length, - &chunk_buffer, &buffer, - &remaining_buffer_content, - &remaining_window_content, - start, end, slide_incr, chunk_length); + (&remaining_content_incr, + &remaining_length, + &chunk_buffer, &buffer, + &remaining_buffer_content, + &remaining_window_content, + start, end, slide_incr, chunk_length); if (*ret == -1) goto out; @@ -313,7 +317,7 @@ get_variable_chunk (int fd, int *ret, int *size, int *chunk_flag, int *chunk_len flag = 0; start = end; remaining_length = buffer_length - end; - end += N; + end += WINDOW_SIZE; clean_buff(&previous_remaining_block); previous_block_length = 0; return chunk_buffer; @@ -323,7 +327,7 @@ get_variable_chunk (int fd, int *ret, int *size, int *chunk_flag, int *chunk_len in previous buffer with remaining length less then window size*/ if (remaining_content_incr != 0 && - remaining_content_incr < N && + remaining_content_incr < WINDOW_SIZE && remaining_flag == 1) { hash = (hash * PRIME - power * remaining_buffer_content[counter1] + @@ -337,7 +341,7 @@ get_variable_chunk (int fd, int *ret, int *size, int *chunk_flag, int *chunk_len in previous buffer with remaining length greater then window size*/ if (remaining_content_incr != 0 && - remaining_content_incr <= N && + remaining_content_incr <= WINDOW_SIZE && remaining_flag == 0) { hash = (hash * PRIME - power * remaining_window_content[counter2] + @@ -350,14 +354,14 @@ get_variable_chunk (int fd, int *ret, int *size, int *chunk_flag, int *chunk_len /*Sliding window with the current buffer*/ if ((remaining_content_incr == 0 || - remaining_content_incr >= N) && + remaining_content_incr >= WINDOW_SIZE) && counter1 == 0 && counter2 == 0) { /*Returns the remaining content of previous buffer when there is no match with finger print even after sliding whole window to current buffer*/ - if (remaining_content_incr >= N) { + if (remaining_content_incr >= WINDOW_SIZE) { *chunk_length = remaining_length; remaining_length = buffer_length; clean_buff(&remaining_window_content); @@ -402,12 +406,13 @@ get_variable_chunk (int fd, int *ret, int *size, int *chunk_flag, int *chunk_len chunk*/ if (remaining_length > 0 && *size == 0) { *ret = get_chunk_buffer - (&remaining_content_incr, &previous_block_length, - &chunk_buffer, &buffer, - &previous_remaining_block, - &remaining_window_content, - start, end, slide_incr, chunk_length); - if (remaining_length < N) + (&remaining_content_incr, + &previous_block_length, + &chunk_buffer, &buffer, + &previous_remaining_block, + &remaining_window_content, + start, end, slide_incr, chunk_length); + if (remaining_length < WINDOW_SIZE) *chunk_length = remaining_length; clean_buff(&remaining_buffer_content); *chunk_flag = 1; diff --git a/src/ydl_rabin_karp.h b/src/ydl_rabin_karp.h new file mode 100755 index 0000000..5c8d4c8 --- /dev/null +++ b/src/ydl_rabin_karp.h @@ -0,0 +1,38 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "clean_buff.h" + +#define PRIME 23 +#define FINGER_PRINT 938 +#define BUFFER_LEN 10240 +#define M 1021 +#define WINDOW_SIZE 2048 + +typedef unsigned int y_uint32; + +class ydl_rabin_karp +{ +public: + y_uint32 calc_hash (char *buffer, y_uint32 *power, int *ret); + int get_remaining_buffer_content(char **remaining_buffer_content, + char **remaining_window_content, + ssize_t remaining_length, + char **buffer, + ssize_t start); + int get_chunk_buffer(ssize_t *remaining_content_incr, + ssize_t *remaining_length, + char **chunk_buffer, char **buffer, + char **remaining_buffer_content, + char **remaining_window_content, + ssize_t start, ssize_t end, + ssize_t slide_incr, int *chunk_length); + char *get_variable_chunk (int fd, int *ret, int *size, + int *chunk_flag, int *chunk_length); +}; \ No newline at end of file diff --git a/src/Restore_file.c b/src/ydl_restore.cpp old mode 100644 new mode 100755 similarity index 81% rename from src/Restore_file.c rename to src/ydl_restore.cpp index 0303b0a..6ca01b4 --- a/src/Restore_file.c +++ b/src/ydl_restore.cpp @@ -1,165 +1,169 @@ -#include "restore.h" -#include "catalog.h" -#include "clean_buff.h" - -/*Function to enter a filename that has to be restored. -Input:void -Output:int -*/ -int -restore_file() -{ - - int ret = -1; - char* path = NULL; - - printf("\ndeduped files\n"); - ret = readfilecatalog(); - if (ret == -1) { - goto out; - } - path = (char *)calloc(1, FILE_SIZE); - printf("\nEnter the full path of dedup file to be restored\n"); - if (scanf("%s", path) <= 0) { - goto out; - } - ret = comparepath(path); - if (ret == -1) { - goto out; - } - if (ret == 1) { - printf("\nInvalid path"); - goto out; - } - ret = restorefile(path); - if (ret == -1) { - goto out; - } - ret = 0; -out: - clean_buff(&path); - return ret; - -} - -/* Function to delete file and restore it with original contents. -Input : char* path -Output : int -*/ -int -restorefile(char *path) -{ - - int l = 0; - int ret = -1; - char temp_name[NAME_SIZE] = ""; - int size = 0; - int size1 = 0; - int pos = 0; - char *buffer = NULL; - char *buffer2 = NULL; - int length = 0; - int sd1 = -1; - struct stat st; - int bset = 0; - int eset = 0; - int fd2 = -1; - int store_type = -1; - char *ts1 = NULL; - char *ts2 = NULL; - char *dir = NULL; - char *filename1 = NULL; - - ts1 = strdup(path); - ts2 = strdup(path); - dir = dirname(ts1); - filename1 = basename(ts2); - sprintf(dir, "%s/", dir); - sprintf(temp_name, "%sDedup_%s", dir, filename1); - printf("%s\n", dir); - printf("\npath%s", path); - printf("%s\n", filename1); - printf("\n%s\n", temp_name); - sd1 = open(temp_name, O_RDONLY, S_IRUSR|S_IWUSR); - if (sd1 < 1) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } else { - printf("\nStub file opened\n"); - } - - fstat(sd1, &st); - size = st.st_size; - fd2 = open(path, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR); - if (fd2 < 1) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } else { - printf("\nRestore file created\n"); - } - if (size > 0) { - if (-1 == lseek(sd1,0,SEEK_SET)) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - } - if (size == 0) { - printf("\nNo contents\n"); - ret = -1; - goto out; - } - ret = read(sd1, &store_type, int_size); - if (ret== -1) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - while (size > 0) { - ret = read(sd1, &length, int_size); - if (ret == -1) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - buffer = (char *)calloc(1, length+1); - ret = read(sd1, buffer, length); - if (ret == -1) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - ret = read(sd1, &bset, int_size); - if (ret == -1) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - ret = read(sd1, &eset, int_size); - if (ret == -1) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - buffer[length] = '\0'; - if (store_type == 0) { - pos = getposition(buffer); - if (pos == -1) - goto out; - buffer2 = get_block(pos, &l); - if (buffer2 == NULL) { - goto out; - } - } else { - buffer2 = get_block_from_object(buffer, &l); - if (buffer2 == NULL) - goto out; - } - ret = write(fd2, buffer2, l); - if (ret < 0) { - fprintf(stderr, "%s\n", strerror(errno)); - goto out; - } - size1 -= (length+int_size+int_size+int_size); - clean_buff(&buffer); - clean_buff(&buffer2); - } - ret = 0; -out: - return ret; - -} +#include "ydl_restore.h" + +/*Function to enter a filename that has to be restored. +Input:void +Output:int +*/ +int +ydl_restore::restore_file() +{ + + int ret = -1; + char* path = NULL; + + ydl_catalog catalog; + + cout << "\nDeduped files\n"; + ret = catalog.readfilecatalog(); + if (ret == -1) { + goto out; + } + path = (char *)calloc(1, FILE_SIZE); + cout << "\nEnter the full path of dedup file to be restored\n"; + if ((cin >> path) <= 0) { + goto out; + } + ret = catalog.comparepath(path); + if (ret == -1) { + goto out; + } + if (ret == 1) { + cout << "\nInvalid path"; + goto out; + } + ret = restorefile(path); + if (ret == -1) { + goto out; + } + ret = 0; +out: + clean_buff(&path); + return ret; + +} + +/* Function to delete file and restore it with original contents. +Input : char* path +Output : int +*/ +int +ydl_restore::restorefile(char *path) +{ + + int l = 0; + int ret = -1; + char temp_name[NAME_SIZE] = ""; + int size = 0; + int size1 = 0; + int pos = 0; + char *buffer = NULL; + char *buffer2 = NULL; + int length = 0; + int sd1 = -1; + struct stat st; + int bset = 0; + int eset = 0; + int fd2 = -1; + int store_type = -1; + char *ts1 = NULL; + char *ts2 = NULL; + char *dir = NULL; + char *filename1 = NULL; + + ydl_block block_store; + ydl_hash hash_store; + ydl_object_store object_store; + + ts1 = strdup(path); + ts2 = strdup(path); + dir = dirname(ts1); + filename1 = basename(ts2); + sprintf(dir, "%s/", dir); + sprintf(temp_name, "%sDedup_%s", dir, filename1); + cout << dir << endl; + cout << "\npath" << path; + cout << filename1 << endl; + cout << endl << temp_name << endl; + sd1 = open(temp_name, O_RDONLY, S_IRUSR|S_IWUSR); + if (sd1 < 1) { + fprintf(stderr, "%s\n", strerror(errno)); + goto out; + } else { + cout << "\nStub file opened\n"; + } + + fstat(sd1, &st); + size = st.st_size; + fd2 = open(path, O_CREAT|O_RDWR, S_IRUSR|S_IWUSR); + if (fd2 < 1) { + fprintf(stderr, "%s\n", strerror(errno)); + goto out; + } else { + cout << "\nRestore file created\n"; + } + if (size > 0) { + if (-1 == lseek(sd1,0,SEEK_SET)) { + fprintf(stderr, "%s\n", strerror(errno)); + goto out; + } + } + if (size == 0) { + cout << "\nNo contents\n"; + ret = -1; + goto out; + } + ret = read(sd1, &store_type, int_size); + if (ret== -1) { + fprintf(stderr, "%s\n", strerror(errno)); + goto out; + } + while (size > 0) { + ret = read(sd1, &length, int_size); + if (ret == -1) { + fprintf(stderr, "%s\n", strerror(errno)); + goto out; + } + buffer = (char *)calloc(1, length+1); + ret = read(sd1, buffer, length); + if (ret == -1) { + fprintf(stderr, "%s\n", strerror(errno)); + goto out; + } + ret = read(sd1, &bset, int_size); + if (ret == -1) { + fprintf(stderr, "%s\n", strerror(errno)); + goto out; + } + ret = read(sd1, &eset, int_size); + if (ret == -1) { + fprintf(stderr, "%s\n", strerror(errno)); + goto out; + } + buffer[length] = '\0'; + if (store_type == 0) { + pos = hash_store.getposition(buffer); + if (pos == -1) + goto out; + buffer2 = block_store.get_block(pos, &l); + if (buffer2 == NULL) { + goto out; + } + } else { + buffer2 = object_store.get_block_from_object(buffer, &l); + if (buffer2 == NULL) + goto out; + } + ret = write(fd2, buffer2, l); + if (ret < 0) { + fprintf(stderr, "%s\n", strerror(errno)); + goto out; + } + size1 -= (length+int_size+int_size+int_size); + clean_buff(&buffer); + clean_buff(&buffer2); + } + ret = 0; +out: + return ret; + +} diff --git a/src/ydl_restore.h b/src/ydl_restore.h new file mode 100755 index 0000000..b16e9e5 --- /dev/null +++ b/src/ydl_restore.h @@ -0,0 +1,28 @@ +#include +using namespace std; +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "ydl_block.h" +#include "ydl_catalog.h" +#include "ydl_hash.h" +#include "ydl_object_store.h" + +#define NAME_SIZE 100 +#define int_size sizeof(int) +#define FILE_SIZE 200 + +class ydl_restore +{ +public: + int restorefile(char* path); + int restore_file(); +}; \ No newline at end of file diff --git a/src/stub.c b/src/ydl_stub.cpp old mode 100644 new mode 100755 similarity index 67% rename from src/stub.c rename to src/ydl_stub.cpp index 9fb7700..6ebef64 --- a/src/stub.c +++ b/src/ydl_stub.cpp @@ -1,5 +1,4 @@ -#include "stub.h" -#include "clean_buff.h" +#include "ydl_stub.h" /* * Function to write contents to a stub file. @@ -7,14 +6,14 @@ * Output:int * */ - int -write_to_stub(char buff[],size_t length,int fd_stub,int b_offset,int e_offset) +ydl_stub::write_to_stub( char buff[], size_t length, int fd_stub, + int b_offset, int e_offset ) { int ret = -1; - if (write (fd_stub, &length, int_size) == -1) { + if (write (fd_stub, &length, INT_SIZE) == -1) { fprintf(stderr, "%s\n", strerror(errno)); goto out; } @@ -22,11 +21,11 @@ write_to_stub(char buff[],size_t length,int fd_stub,int b_offset,int e_offset) fprintf(stderr, "%s\n", strerror(errno)); goto out; } - if (write (fd_stub, &b_offset, int_size) == -1) { + if (write (fd_stub, &b_offset, INT_SIZE) == -1) { fprintf(stderr,"%s\n",strerror(errno)); goto out; } - if (write (fd_stub, &e_offset, int_size) == -1) { + if (write (fd_stub, &e_offset, INT_SIZE) == -1) { fprintf(stderr,"%s\n",strerror(errno)); goto out; } diff --git a/src/ydl_stub.h b/src/ydl_stub.h new file mode 100755 index 0000000..e4ccecf --- /dev/null +++ b/src/ydl_stub.h @@ -0,0 +1,18 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#define INT_SIZE sizeof(int) + +class ydl_stub +{ +public: + int write_to_stub( char *buff,size_t length,int fd_stub, + int b_offset,int e_offset); +}; \ No newline at end of file diff --git a/tests/basic/compare_files.sh b/tests/basic/compare_files.sh old mode 100644 new mode 100755 diff --git a/yadl.pc.in b/yadl.pc.in old mode 100644 new mode 100755