Skip to content

Commit

Permalink
Candidate Release
Browse files Browse the repository at this point in the history
  • Loading branch information
Ostico committed May 24, 2024
1 parent a95f40a commit 41e2a65
Show file tree
Hide file tree
Showing 11 changed files with 626 additions and 204 deletions.
90 changes: 57 additions & 33 deletions src/Enum/CTypeEnum.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,53 +2,77 @@

namespace Matecat\SubFiltering\Enum;

use Matecat\SubFiltering\Utils\Utils;
use ReflectionClass;

class CTypeEnum {

// Layer 1
const ORIGINAL_X = 'x-original_x';
const ORIGINAL_SELF_CLOSE_PH_WITH_EQUIV_TEXT = 'x-original_ph';
const ORIGINAL_PC_OPEN = 'x-original_pc_open';
const ORIGINAL_PC_CLOSE = 'x-original_pc_close';
const ORIGINAL_PH_CONTENT = 'x-original_ph_content';
const HTML = 'x-html';
const TWIG = 'x-twig';
const RUBY_ON_RAILS = 'x-ruby-on-rails';
const SNAILS = 'x-snails';
const CURLY_BRACKETS = 'x-curly-brackets';
const PERCENT_SNAILS = 'x-percent-snails';
const PERCENT_NUMBER_SNAILS = 'x-percent-number-snails';
const PERCENTAGES = 'x-percentages';
const SPRINTF = 'x-sprintf';
const PERCENT_VARIABLE = 'x-percent-variable';
const SMART_COUNT = 'x-smart-count';
const DOUBLE_SQUARE_BRACKETS = 'x-double-square-brackets';
const DOLLAR_CURLY_BRACKETS = 'x-dollar-curly-brackets';
const SQUARE_SPRINTF = 'x-square-sprintf';
const ORIGINAL_PH_CONTENT = 'x-original_ph_content';
const HTML = 'x-html';
const TWIG = 'x-twig';
const RUBY_ON_RAILS = 'x-ruby-on-rails';
const SNAILS = 'x-snails';
const CURLY_BRACKETS = 'x-curly-brackets';
const PERCENT_SNAILS = 'x-percent-snails';
const PERCENT_NUMBER_SNAILS = 'x-percent-number-snails';
const PERCENTAGES = 'x-percentages';
const SPRINTF = 'x-sprintf';
const PERCENT_VARIABLE = 'x-percent-variable';
const SMART_COUNT = 'x-smart-count';
const DOUBLE_SQUARE_BRACKETS = 'x-double-square-brackets';
const DOLLAR_CURLY_BRACKETS = 'x-dollar-curly-brackets';
const SQUARE_SPRINTF = 'x-square-sprintf';

// Data Ref Layer 2
const ORIGINAL_PH_OR_NOT_DATA_REF = 'x-original_ph_no_data_ref';
const PH_DATA_REF = 'x-ph_data_ref';
const PC_OPEN_DATA_REF = 'x-pc_open_data_ref';
const PC_CLOSE_DATA_REF = 'x-pc_close_data_ref';
const PC_SELF_CLOSE_DATA_REF = 'x-pc_sc_data_ref';
const SC_DATA_REF = 'x-sc_data_ref';
const EC_DATA_REF = 'x-ec_data_ref';

protected static $constantsValues = [];

protected static function getConstantsMap() {
if ( empty( static::$constantsValues ) ) {
$reflectedProperty = ( new ReflectionClass( static::class ) )->getConstants();
static::$constantsValues = array_flip( $reflectedProperty );
const ORIGINAL_PC_OPEN_NO_DATA_REF = 'x-original_pc_open';
const ORIGINAL_PC_CLOSE_NO_DATA_REF = 'x-original_pc_close';
const ORIGINAL_PH_OR_NOT_DATA_REF = 'x-original_ph_no_data_ref';
const PH_DATA_REF = 'x-ph_data_ref';
const PC_OPEN_DATA_REF = 'x-pc_open_data_ref';
const PC_CLOSE_DATA_REF = 'x-pc_close_data_ref';
const PC_SELF_CLOSE_DATA_REF = 'x-pc_sc_data_ref';
const SC_DATA_REF = 'x-sc_data_ref';
const EC_DATA_REF = 'x-ec_data_ref';

protected static $allConstantValues = [];
protected static $layer2ConstantValues = [];

/**
* @return array
*/
protected static function getAllConstantValuesMap() {
if ( empty( static::$allConstantValues ) ) {
$reflectedProperty = ( new ReflectionClass( static::class ) )->getConstants();
static::$allConstantValues = array_flip( $reflectedProperty );
static::$layer2ConstantValues = array_flip(
array_filter( $reflectedProperty, function ( $key ) {
return Utils::contains( 'DATA_REF', $key );
}, ARRAY_FILTER_USE_KEY )
);
}

return static::$constantsValues;
return [ 'all' => static::$allConstantValues, 'layer2' => static::$layer2ConstantValues ];
}

/**
* @param $ctype string
*
* @return bool
*/
public static function isMatecatCType( $ctype ) {
return array_key_exists( $ctype, static::getConstantsMap() );
return array_key_exists( $ctype, static::getAllConstantValuesMap()[ 'all' ] );
}

/**
* @param $ctype string
*
* @return bool
*/
public static function isLayer2Constant( $ctype ) {
return array_key_exists( $ctype, static::getAllConstantValuesMap()[ 'layer2' ] );
}

}
4 changes: 2 additions & 2 deletions src/Filters/DataRefReplace.php
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ private function replace_Pc_TagsWithoutDataRefCorrespondenceToMatecatPhTags( $se
$segment = preg_replace(
'/' . preg_quote( $openingPcTag, '/' ) . '/',
'<ph id="' . $this->getPipeline()->getNextId() .
'" ctype="' . CTypeEnum::ORIGINAL_PC_OPEN .
'" ctype="' . CTypeEnum::ORIGINAL_PC_OPEN_NO_DATA_REF .
'" equiv-text="base64:' . base64_encode( $openingPcTag ) .
'"/>',
$segment,
Expand All @@ -166,7 +166,7 @@ private function replace_Pc_TagsWithoutDataRefCorrespondenceToMatecatPhTags( $se
$segment = preg_replace(
'/' . preg_quote( $closingPcTag, '/' ) . '/',
'<ph id="' . $this->getPipeline()->getNextId() .
'" ctype="' . CTypeEnum::ORIGINAL_PC_CLOSE .
'" ctype="' . CTypeEnum::ORIGINAL_PC_CLOSE_NO_DATA_REF .
'" equiv-text="base64:' . base64_encode( $closingPcTag ) .
'"/>',
$segment,
Expand Down
4 changes: 2 additions & 2 deletions src/Filters/DataRefRestore.php
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ private function restoreXliffPhTagsFromMatecatPhTags( $segment ) {
* @return string
*/
private function restoreXliffPcTagsFromMatecatPhTags( $segment ) {
preg_match_all( '|<ph.+? ctype="' . CTypeEnum::ORIGINAL_PC_OPEN . '" equiv-text="base64:(.*?)"/>|iu', $segment, $matches );
preg_match_all( '|<ph.+? ctype="' . CTypeEnum::ORIGINAL_PC_CLOSE . '" equiv-text="base64:(.*?)"/>|iu', $segment, $matches );
preg_match_all( '|<ph.+? ctype="' . CTypeEnum::ORIGINAL_PC_OPEN_NO_DATA_REF . '" equiv-text="base64:(.*?)"/>|iu', $segment, $matches );
preg_match_all( '|<ph.+? ctype="' . CTypeEnum::ORIGINAL_PC_CLOSE_NO_DATA_REF . '" equiv-text="base64:(.*?)"/>|iu', $segment, $matches );

if ( empty( $matches[ 0 ] ) ) {
return $segment;
Expand Down
4 changes: 2 additions & 2 deletions src/Filters/EncodeToRawXML.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
namespace Matecat\SubFiltering\Filters;

use Matecat\SubFiltering\Commons\AbstractHandler;
use Matecat\SubFiltering\Utils\CatUtils;
use Matecat\SubFiltering\Utils\Utils;

class EncodeToRawXML extends AbstractHandler {
public function transform( $segment ) {
Expand All @@ -21,7 +21,7 @@ public function transform( $segment ) {
$segment = preg_replace( '/&#09;|\t/', '##_ent_09_##', $segment );

//Substitute 4(+)-byte characters from a UTF-8 string to htmlentities
$segment = preg_replace_callback( '/([\xF0-\xF7]...)/s', [ CatUtils::class, 'htmlentitiesFromUnicode' ], $segment );
$segment = preg_replace_callback( '/([\xF0-\xF7]...)/s', [ Utils::class, 'htmlentitiesFromUnicode' ], $segment );

// handling &#10;
if ( strpos( $segment, '##_ent_0D_##' ) !== false ) {
Expand Down
39 changes: 2 additions & 37 deletions src/Filters/FromLayer2ToRawXML.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

use Matecat\SubFiltering\Commons\AbstractHandler;
use Matecat\SubFiltering\Enum\ConstantEnum;
use Matecat\SubFiltering\Utils\CatUtils;
use Matecat\SubFiltering\Utils\Utils;

/**
* Class FromLayer2ToRawXML
Expand All @@ -25,9 +25,6 @@ class FromLayer2ToRawXML extends AbstractHandler {

public function transform( $segment ) {

// Filters BUG, segmentation on HTML, we should never get this at this level ( Should be fixed, anyway we try to cover )
// $segment = $this->placeHoldBrokenHTML( $segment );

//normal control characters must be converted to entities
$segment = str_replace(
[ "\r\n", "\r", "\n", "\t", "", ],
Expand All @@ -40,40 +37,8 @@ public function transform( $segment ) {
], $segment );

// now convert the real &nbsp;
$segment = str_replace( ConstantEnum::nbspPlaceholder, CatUtils::unicode2chr( 0Xa0 ), $segment );

// Filters BUG, segmentation on HTML, we should never get this at this level ( Should be fixed, anyway we try to cover )
// $segment = $this->resetBrokenHTML( $segment );

return $segment;
return str_replace( ConstantEnum::nbspPlaceholder, Utils::unicode2chr( 0Xa0 ), $segment );

}

// private function placeHoldBrokenHTML( $segment ) {
//
// //Filters BUG, segmentation on HTML, we should never get this at this level ( Should be fixed, anyway we try to cover )
// // &lt;a href="/help/article/1381?
// $this->brokenHTML = false;
//
// //This is from Layer 2 to Layer 1
// if ( stripos( $segment, '<a href="' ) ) {
// $segment = str_replace( '<a href="', '##__broken_lt__##a href=##__broken_quot__##', $segment );
// $this->brokenHTML = true;
// }
//
// return $segment;
//
// }
//
// private function resetBrokenHTML( $segment ) {
//
// // Reset
// if ( $this->brokenHTML ) {
// $segment = str_replace( '##__broken_lt__##a href=##__broken_quot__##', '&lt;a href="', $segment );
// }
//
// return $segment;
//
// }

}
52 changes: 52 additions & 0 deletions src/Utils/ArrayList.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?php
/**
* Created by PhpStorm.
* @author hashashiyyin [email protected] / [email protected]
* Date: 24/05/24
* Time: 18:25
*
*/

namespace Matecat\SubFiltering\Utils;

use ArrayObject;

class ArrayList extends ArrayObject {

/**
* @param array $list
*/
public function __construct( array $list = [] ) {
parent::__construct( $list );
}

public static function instance( array $list = [] ) {
return new static( $list );
}

/**
* @param $key
*
* @return false|mixed|null
*/
public function offsetGet( $key ) {
if ( $this->offsetExists( $key ) ) {
return parent::offsetGet( $key );
}

return null;
}

/**
* Returns the element at the specified position in this list.
*
* @param $key
*
* @return false|mixed|null the element at the specified position in this list
*/
public function get( $key ) {
return $this->offsetGet( $key );
}


}
Loading

0 comments on commit 41e2a65

Please sign in to comment.