From c431ca37217faba10b1fe2e6e4c0f15a4f095191 Mon Sep 17 00:00:00 2001 From: ppetrov Date: Thu, 7 Sep 2017 16:04:11 +0300 Subject: [PATCH] Initial commit --- .gitignore | 2 + README.md | 1 + composer.json | 21 ++ sample-data/empty.csv | 0 sample-data/info-no-head-row.csv | 4 + sample-data/info-semicolon-separator.csv | 5 + ...-with-empty-rows-before-actual-content.csv | 8 + sample-data/info.csv | 5 + src/CsvFile.php | 185 ++++++++++++++ src/Exception.php | 6 + tests/CsvFileTest.php | 230 ++++++++++++++++++ 11 files changed, 467 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 composer.json create mode 100644 sample-data/empty.csv create mode 100644 sample-data/info-no-head-row.csv create mode 100644 sample-data/info-semicolon-separator.csv create mode 100644 sample-data/info-with-empty-rows-before-actual-content.csv create mode 100644 sample-data/info.csv create mode 100644 src/CsvFile.php create mode 100644 src/Exception.php create mode 100644 tests/CsvFileTest.php diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8b7ef35 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/vendor +composer.lock diff --git a/README.md b/README.md new file mode 100644 index 0000000..564781e --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# carbon-csv diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..32fa618 --- /dev/null +++ b/composer.json @@ -0,0 +1,21 @@ +{ + + "name": "htmlburger/carbon-csv", + "description": "Simple CSV file parser", + + "require": { + "php": "^5.4.0 || ^7.0" + }, + + "autoload": { + "psr-4": { + "Carbon_CSV\\": "src/" + } + }, + + "require-dev": { + "illuminate/support": "^5.4", + "symfony/var-dumper": "^3.3", + "phpunit/phpunit": "^6.1" + } +} diff --git a/sample-data/empty.csv b/sample-data/empty.csv new file mode 100644 index 0000000..e69de29 diff --git a/sample-data/info-no-head-row.csv b/sample-data/info-no-head-row.csv new file mode 100644 index 0000000..0f70ebb --- /dev/null +++ b/sample-data/info-no-head-row.csv @@ -0,0 +1,4 @@ +John,Doe,Funny Company Name,"Some Address 2, 12345, Country A" +Jane,Dove,Nice Company Name,"That Address 3, 456, Country B" +John,Smith,Nice Company Name, +Jane,Smith,Funny Company Name,"This Address 4, City, Country C" diff --git a/sample-data/info-semicolon-separator.csv b/sample-data/info-semicolon-separator.csv new file mode 100644 index 0000000..ff8f554 --- /dev/null +++ b/sample-data/info-semicolon-separator.csv @@ -0,0 +1,5 @@ +First Name;Last Name;Company Name;Address +John;Doe;Funny Company Name;"Some Address 2; 12345; Country A" +Jane;Dove;Nice Company Name;"That Address 3; 456; Country B" +John;Smith;Nice Company Name; +Jane;Smith;Funny Company Name;"This Address 4; City; Country C" diff --git a/sample-data/info-with-empty-rows-before-actual-content.csv b/sample-data/info-with-empty-rows-before-actual-content.csv new file mode 100644 index 0000000..0c54ff9 --- /dev/null +++ b/sample-data/info-with-empty-rows-before-actual-content.csv @@ -0,0 +1,8 @@ + + + +First Name,Last Name,Company Name,Address +John,Doe,Funny Company Name,"Some Address 2, 12345, Country A" +Jane,Dove,Nice Company Name,"That Address 3, 456, Country B" +John,Smith,Nice Company Name, +Jane,Smith,Funny Company Name,"This Address 4, City, Country C" diff --git a/sample-data/info.csv b/sample-data/info.csv new file mode 100644 index 0000000..80b2e9e --- /dev/null +++ b/sample-data/info.csv @@ -0,0 +1,5 @@ +First Name,Last Name,Company Name,Address +John,Doe,Funny Company Name,"Some Address 2, 12345, Country A" +Jane,Dove,Nice Company Name,"That Address 3, 456, Country B" +John,Smith,Nice Company Name, +Jane,Smith,Funny Company Name,"This Address 4, City, Country C" diff --git a/src/CsvFile.php b/src/CsvFile.php new file mode 100644 index 0000000..3a04d46 --- /dev/null +++ b/src/CsvFile.php @@ -0,0 +1,185 @@ +file_path = $file_path; + parent::__construct($file_path, 'r'); + $this->setFlags(File::READ_CSV | File::READ_AHEAD | File::SKIP_EMPTY | File::DROP_NEW_LINE); + $this->setCsvControl($delimiter, $enclosure, $escape); + } + + /** + * Read number of lines in CSV + * @return int number of lines + */ + function count() { + return count($this->to_array()); + } + + public function to_array() { + $rows = []; + foreach ($this as $row) { + $rows[] = $row; + } + + return $rows; + } + + public function rewind() { + $this->seek($this->offset_row); + } + + /** + * Override the key function in order to allow shifting in indecies according + * to the current offset. + */ + public function key() { + return $this->row_counter - 1; + } + + public function current() { + $this->row_counter++; + $row = parent::current(); + + $row_keys = array_keys($row); + if (!in_array($this->start_column, $row_keys)) { + throw new Exception(sprintf('Start column must be between %d and %d.', min($row_keys), max($row_keys))); + } + + $formatted_row = $this->format_row($row); + + return $formatted_row; + } + + private function remove_columns($old_row) { + $new_row = array(); + + $index = 0; + foreach ($old_row as $column_name => $column_value) { + if (!in_array($index, $this->columns_to_skip)) { + $new_row[$column_name] = $column_value; + } + + $index++; + } + + return $new_row; + } + + private function format_row($row) { + $row = array_combine( + $this->get_column_names($row), + $row + ); + + // don't remove columns from the head row + // we remove columns after the row is combined with the header columns + if (!$this->is_head_row) { + $row = $this->remove_columns($row); + } + + if (!$this->uses_column_names) { + $row = array_values($row); + } + + return $row; + } + + private function get_column_names($row) { + if (!empty($this->column_names)) { + return $this->column_names; + } + + return array_keys($row); + } + + public function set_column_names($mapping) { + $this->uses_column_names = true; + + if (empty($this->column_names)) { + $this->column_names = $mapping; + } else { + $this->column_names = array_combine( + array_flip($this->column_names), + $mapping + ); + } + } + + public function use_first_row_as_header() { + if ($this->row_counter !== 0) { + throw new \LogicException("Column mapping can't be changed after CSV processing has been started"); + } + + $this->uses_column_names = true; + + $this->is_head_row = true; + $this->column_names = $this->current(); + $this->is_head_row = false; + + // Start processing from the second row(since the first one isn't part of the data) + $this->offset_row++; + $this->rewind(); + } + + public function skip_to_row($row) { + $this->offset_row = $row; + $this->rewind(); + } + + public function skip_columns($indexes) { + $this->set_columns_to_skip($indexes); + } + + public function skip_to_column($column_index) { + if (!is_int($column_index)) { + throw new Exception('Only numbers are allowed for skip to column.'); + } + + if ($column_index < 0) { + throw new Exception('Please use numbers larger than zero.'); + } + + $this->start_column = $column_index; + + // this is to handle the strange case, when the user wants to start from the first column (which happens by default) + if ($column_index === 0) { + $last_column_index = 0; + } else { + $last_column_index = $column_index - 1; + } + + $this->set_columns_to_skip(range(0, $last_column_index)); + } + + private function set_columns_to_skip($columns) { + $this->columns_to_skip = array_unique(array_merge($columns, $this->columns_to_skip)); + } +} diff --git a/src/Exception.php b/src/Exception.php new file mode 100644 index 0000000..8598f60 --- /dev/null +++ b/src/Exception.php @@ -0,0 +1,6 @@ +assertEquals(5, count($csv)); + } + + /** + * @expectedException \Carbon_CSV\Exception + */ + function test_it_throws_exception_when_constructed_with_a_missing_file() { + new CsvFile(__DIR__ . '/no-such-file-71e37259-48d0-416a-b640-2beeb23aa38f.tmp'); + } + + function get_expected_result_indexed() { + return [ + [0 => 'John', 1 => 'Doe', 2 => 'Funny Company Name', 3 => 'Some Address 2, 12345, Country A'], + [0 => 'Jane', 1 => 'Dove', 2 => 'Nice Company Name', 3 => 'That Address 3, 456, Country B'], + [0 => 'John', 1 => 'Smith', 2 => 'Nice Company Name', 3 => ''], + [0 => 'Jane', 1 => 'Smith', 2 => 'Funny Company Name', 3 => 'This Address 4, City, Country C'], + ]; + } + + function get_expected_result_custom_columns() { + return [ + ['first_name' => 'John', 'last_name' => 'Doe', 'company_name' => 'Funny Company Name', 'address' => 'Some Address 2, 12345, Country A'], + ['first_name' => 'Jane', 'last_name' => 'Dove', 'company_name' => 'Nice Company Name', 'address' => 'That Address 3, 456, Country B'], + ['first_name' => 'John', 'last_name' => 'Smith', 'company_name' => 'Nice Company Name', 'address' => ''], + ['first_name' => 'Jane', 'last_name' => 'Smith', 'company_name' => 'Funny Company Name', 'address' => 'This Address 4, City, Country C'], + ]; + } + + function get_expected_result_actual_columns() { + return [ + ['First Name' => 'John', 'Last Name' => 'Doe', 'Company Name' => 'Funny Company Name', 'Address' => 'Some Address 2, 12345, Country A'], + ['First Name' => 'Jane', 'Last Name' => 'Dove', 'Company Name' => 'Nice Company Name', 'Address' => 'That Address 3, 456, Country B'], + ['First Name' => 'John', 'Last Name' => 'Smith', 'Company Name' => 'Nice Company Name', 'Address' => ''], + ['First Name' => 'Jane', 'Last Name' => 'Smith', 'Company Name' => 'Funny Company Name', 'Address' => 'This Address 4, City, Country C'], + ]; + } + + /** + * @expectedException \Carbon_CSV\Exception + * @expectedExceptionMessage Empty file. + */ + function test_it_throws_exception_when_constructed_with_an_empty_file() { + $csv = new CsvFile(__DIR__ . '/../sample-data/empty.csv'); + } + + function test_without_setup() { + $csv = new CsvFile(__DIR__ . '/../sample-data/info-no-head-row.csv'); + $this->assertEquals($this->get_expected_result_indexed(), $csv->to_array()); + } + + function test_iterator() { + $csv = new CsvFile(__DIR__ . '/../sample-data/info.csv'); + $csv->skip_to_row(1); + $csv->set_column_names([ + 0 => 'first_name', + 1 => 'last_name', + 2 => 'company_name', + 3 => 'address', + ]); + + $i = 0; + foreach ($csv as $row_number => $row) { + $this->assertEquals($i++, $row_number); + } + } + + function test_setup_head_rows() { + $csv = new CsvFile(__DIR__ . '/../sample-data/info-no-head-row.csv'); + + $csv->set_column_names([ + 0 => 'first_name', + 1 => 'last_name', + 2 => 'company_name', + 3 => 'address', + ]); + + $this->assertEquals($this->get_expected_result_custom_columns(), $csv->to_array()); + } + + function test_with_head_row() { + $csv = new CsvFile(__DIR__ . '/../sample-data/info.csv'); + $csv->use_first_row_as_header(); + + $this->assertEquals($this->get_expected_result_actual_columns(), $csv->to_array()); + } + + function test_with_head_row_and_custom_mappings() { + $csv = new CsvFile(__DIR__ . '/../sample-data/info.csv'); + $csv->use_first_row_as_header(); + $csv->set_column_names([ + 'First Name' => 'first_name', + 'Last Name' => 'last_name', + 'Company Name' => 'company_name', + 'Address' => 'address', + ]); + + $this->assertEquals($this->get_expected_result_custom_columns(), $csv->to_array()); + } + + function test_skip_rows() { + $csv = new CsvFile(__DIR__ . '/../sample-data/info.csv'); + $csv->skip_to_row(1); + $csv->set_column_names([ + 0 => 'first_name', + 1 => 'last_name', + 2 => 'company_name', + 3 => 'address', + ]); + + $this->assertEquals($this->get_expected_result_custom_columns(), $csv->to_array()); + } + + function test_use_first_row_after_skipping_rows() { + $csv = new CsvFile(__DIR__ . '/../sample-data/info-with-empty-rows-before-actual-content.csv'); + $csv->skip_to_row(3); + $csv->use_first_row_as_header(); + + $this->assertEquals($this->get_expected_result_actual_columns(), $csv->to_array()); + } + + function test_separator_is_respected() { + $csv = new CsvFile(__DIR__ . '/../sample-data/info-semicolon-separator.csv', ';'); + $csv->use_first_row_as_header(); + + $this->assertEquals([ + ['First Name' => 'John', 'Last Name' => 'Doe', 'Company Name' => 'Funny Company Name', 'Address' => 'Some Address 2; 12345; Country A'], + ['First Name' => 'Jane', 'Last Name' => 'Dove', 'Company Name' => 'Nice Company Name', 'Address' => 'That Address 3; 456; Country B'], + ['First Name' => 'John', 'Last Name' => 'Smith', 'Company Name' => 'Nice Company Name', 'Address' => ''], + ['First Name' => 'Jane', 'Last Name' => 'Smith', 'Company Name' => 'Funny Company Name', 'Address' => 'This Address 4; City; Country C'], + ], $csv->to_array()); + } + + function test_skip_to_column() { + $csv = new CsvFile(__DIR__ . '/../sample-data/info-no-head-row.csv'); + $csv->skip_to_column(1); + + $this->assertEquals( [ + [0 => 'Doe', 1 => 'Funny Company Name', 2 => 'Some Address 2, 12345, Country A'], + [0 => 'Dove', 1 => 'Nice Company Name', 2 => 'That Address 3, 456, Country B'], + [0 => 'Smith', 1 => 'Nice Company Name', 2 => ''], + [0 => 'Smith', 1 => 'Funny Company Name', 2 => 'This Address 4, City, Country C'], + ], $csv->to_array() ); + } + + function test_skip_to_first_column_and_use_first_row_as_header() { + $csv = new CsvFile(__DIR__ . '/../sample-data/info.csv'); + $csv->skip_to_column(1); + $csv->use_first_row_as_header(); + + $this->assertEquals( [ + ['Last Name' => 'Doe', 'Company Name' => 'Funny Company Name', 'Address' => 'Some Address 2, 12345, Country A'], + ['Last Name' => 'Dove', 'Company Name' => 'Nice Company Name', 'Address' => 'That Address 3, 456, Country B'], + ['Last Name' => 'Smith', 'Company Name' => 'Nice Company Name', 'Address' => ''], + ['Last Name' => 'Smith', 'Company Name' => 'Funny Company Name', 'Address' => 'This Address 4, City, Country C'], + ], $csv->to_array() ); + } + + function test_skip_to_column_outside_of_column_index_range() { + $this->expectException(Exception::class); + + $csv = new CsvFile(__DIR__ . '/../sample-data/info.csv' ); + $csv->skip_to_column(999); + $csv->to_array(); + } + + function test_exclude_multiple_columns() { + $csv = new CsvFile(__DIR__ . '/../sample-data/info-no-head-row.csv'); + $csv->skip_columns(array(0, 2, 3)); + + $this->assertEquals([ + [ + 0 => 'Doe' + ], + [ + 0 => 'Dove' + ], + [ + 0 => 'Smith' + ], + [ + 0 => 'Smith' + ] + ], $csv->to_array()); + } + + function test_custom_header_with_exclude_multiple_columns() { + $csv = new CsvFile(__DIR__ . '/../sample-data/info.csv' ); + $csv->use_first_row_as_header(); + $csv->skip_columns(array(0, 2, 3)); + $csv->set_column_names([ + 'First Name' => 'first_name', + 'Last Name' => 'last_name', + 'Company Name' => 'company_name', + 'Address' => 'address' + ]); + + $this->assertEquals( [ + [ + 'last_name' => 'Doe' + ], + [ + 'last_name' => 'Dove' + ], + [ + 'last_name' => 'Smith' + ], + [ + 'last_name' => 'Smith' + ] + ], $csv->to_array() ); + } + + function test_throws_exception_when_using_non_number_characters_for_skip_to_column() { + $this->expectException(Exception::class); + $this->expectExceptionMessage('Only numbers are allowed for skip to column.'); + + $csv = new CsvFile(__DIR__ . '/../sample-data/info.csv' ); + $csv->skip_to_column('a'); + } +}