Skip to content

Commit

Permalink
Parse csv with text in first column
Browse files Browse the repository at this point in the history
  • Loading branch information
mantas-done committed Mar 29, 2024
1 parent f40aa3f commit 9736bc1
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 24 deletions.
45 changes: 21 additions & 24 deletions src/Code/Converters/CsvConverter.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,32 @@ public function canParseFileContent($file_content)
{
$csv = self::csvToArray(trim($file_content));

if (!isset($csv[1][0]) || !isset($csv[1][0])) {
$count = count($csv);
if ($count < 2) {
return false;
}
$is_end_time = (bool) preg_match(self::timeRegex(), $csv[1][1]);
if ($is_end_time && !isset($csv[1][2])) {
return false;
}

$row_count = null;
foreach ($csv as $rows) {
$count = count($rows);
if ($row_count === null) {
$row_count = $count;
$last_row = $csv[$count - 1];

$has_timestamp = false;
$has_text = false;
foreach ($last_row as $cell) {
$is_time = (bool)preg_match(self::timeRegex(), $cell);
$timestamp = preg_replace(self::timeRegex(), '', $cell);
$only_timestamp_in_cell = trim($timestamp) === '';
if ($is_time) {
if ($only_timestamp_in_cell) {
$has_timestamp = true;
continue;
} else {
return false;
}
}
if ($row_count !== $count) {
return false; // if not every row has the same column count
$is_text = TxtConverter::hasText($cell);
if ($has_timestamp && $is_text) {
return true;
}
}

if (!isset($csv[1][0])) {
return false;
}
$cell = $csv[1][0];
$timestamp = preg_replace(self::timeRegex(), '', $cell);
$only_timestamp_on_first_column = trim($timestamp) === '';
return count($csv[1]) >= 2 && $only_timestamp_on_first_column; // at least 2 columns: timestamp + text
return $has_timestamp && $has_text;
}

/**
Expand All @@ -62,9 +62,6 @@ public function fileContentToInternalFormat($file_content, $original_file_conten
$column_count = count($last_row);
$checked_column = 0;
foreach ($last_row as $k => $column) {
if (TxtConverter::hasText($column)) {
break;
}
if (preg_match(self::timeRegex(), $column)) {
$start_time_column = $k;
$checked_column = $k;
Expand Down
15 changes: 15 additions & 0 deletions tests/formats/CsvTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -192,4 +192,19 @@ public function testGapsInFront()
$this->assertInternalFormatsEqual($expected_internal_format, $actual_internal_format);

}

public function testSpeakerInFront()
{
$string = <<< TEXT
"Speaker Name","Start Time","End Time","Text"
"Unknown","00:00:00:00","00:00:01:00","a"
TEXT;
$actual_internal_format = Subtitles::loadFromString($string)->getInternalFormat();
$expected_internal_format = (new Subtitles())
->add(0, 1, 'a')
->getInternalFormat();

$this->assertInternalFormatsEqual($expected_internal_format, $actual_internal_format);
}
}

0 comments on commit 9736bc1

Please sign in to comment.