Skip to content

Commit

Permalink
Try to detect csv file content separator automatically
Browse files Browse the repository at this point in the history
  • Loading branch information
eijei521 committed Jul 10, 2023
1 parent f9b15a5 commit c881c42
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 1 deletion.
31 changes: 30 additions & 1 deletion src/Code/Converters/CsvConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

class CsvConverter implements ConverterContract
{
public static $allowedSeparators = [",", ";", "|", "\t"];

public function canParseFileContent($file_content)
{
$csv = self::csvToArray($file_content);
Expand Down Expand Up @@ -78,12 +80,39 @@ private static function csvToArray($content)
fputs($fp, $content);
rewind($fp);

$separator = self::detectSeparator($content);
$csv = [];
while ( ($data = fgetcsv($fp) ) !== false ) {
while ( ($data = fgetcsv($fp, 0, $separator) ) !== false ) {
$csv[] = $data;
}
fclose($fp);

return $csv;
}

private static function detectSeparator($file_content)
{
$lines = explode("\n", $file_content);
$results = [];
foreach ($lines as $line) {
foreach (self::$allowedSeparators as $delimiter) {
$count = count(explode($delimiter, $line));
if ($count === 1) continue; // delimiter not found in line

if (empty($results[$delimiter])) {
$results[$delimiter] = [];
}
$results[$delimiter][] = $count;
}
}

foreach ($results as $delimiter => $value) {
$flipped = array_flip($value);
$results[$delimiter] = count($flipped);
}

arsort($results, SORT_NUMERIC);

return !empty($results) ? key($results) : self::$allowedSeparators[0];
}
}
27 changes: 27 additions & 0 deletions tests/formats/CsvTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,31 @@ public function testClientAnsiFile()
->getInternalFormat();
$this->assertInternalFormatsEqual($expected, $actual);
}

/**
* @dataProvider differentContentSeparatorProvider
*/
public function testDifferentContentSeparators($string)
{
$actual_internal_format = Subtitles::loadFromString($string, 'csv')->getInternalFormat();
$expected_internal_format = (new Subtitles())
->add(1, 2, ['Oh! Can I believe my eyes!'])
->add(2, 3, ['If Heaven and earth.'])->getInternalFormat();

$this->assertInternalFormatsEqual($expected_internal_format, $actual_internal_format);
}

public static function differentContentSeparatorProvider()
{
$original_string = 'Start,End,Text
00:00:1,00:00:2,Oh! Can I believe my eyes!
00:00:2,00:00:3,If Heaven and earth.';

$strings = [];
foreach (CsvConverter::$allowedSeparators as $separator) {
$strings[] = str_replace(',', $separator, $original_string);
}

return [$strings];
}
}

0 comments on commit c881c42

Please sign in to comment.