Feature: Labelprint für Kistenetiketten hinzugefügt
This commit is contained in:
362
vendor/tecnickcom/tc-lib-unicode/src/Bidi.php
vendored
Normal file
362
vendor/tecnickcom/tc-lib-unicode/src/Bidi.php
vendored
Normal file
@@ -0,0 +1,362 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Bidi.php
|
||||
*
|
||||
* @since 2011-05-23
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* This file is part of tc-lib-unicode software library.
|
||||
*/
|
||||
|
||||
namespace Com\Tecnick\Unicode;
|
||||
|
||||
use Com\Tecnick\Unicode\Bidi\Shaping;
|
||||
use Com\Tecnick\Unicode\Bidi\StepI;
|
||||
use Com\Tecnick\Unicode\Bidi\StepL;
|
||||
use Com\Tecnick\Unicode\Bidi\StepN;
|
||||
use Com\Tecnick\Unicode\Bidi\StepP;
|
||||
use Com\Tecnick\Unicode\Bidi\StepW;
|
||||
use Com\Tecnick\Unicode\Bidi\StepX;
|
||||
use Com\Tecnick\Unicode\Bidi\StepXten;
|
||||
use Com\Tecnick\Unicode\Data\Pattern as UniPattern;
|
||||
use Com\Tecnick\Unicode\Data\Type as UniType;
|
||||
use Com\Tecnick\Unicode\Exception as UnicodeException;
|
||||
|
||||
/**
|
||||
* Com\Tecnick\Unicode\Bidi
|
||||
*
|
||||
* @since 2015-07-13
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*/
|
||||
class Bidi
|
||||
{
|
||||
/**
|
||||
* String to process
|
||||
*/
|
||||
protected string $str = '';
|
||||
|
||||
/**
|
||||
* Array of UTF-8 chars
|
||||
*
|
||||
* @var array<string>
|
||||
*/
|
||||
protected array $chrarr = [];
|
||||
|
||||
/**
|
||||
* Array of UTF-8 codepoints
|
||||
*
|
||||
* @var array<int>
|
||||
*/
|
||||
protected array $ordarr = [];
|
||||
|
||||
/**
|
||||
* Processed string
|
||||
*/
|
||||
protected string $bidistr = '';
|
||||
|
||||
/**
|
||||
* Array of processed UTF-8 chars
|
||||
*
|
||||
* @var array<string>
|
||||
*/
|
||||
protected array $bidichrarr = [];
|
||||
|
||||
/**
|
||||
* Array of processed UTF-8 codepoints
|
||||
*
|
||||
* @var array<int>
|
||||
*/
|
||||
protected array $bidiordarr = [];
|
||||
|
||||
/**
|
||||
* If 'R' forces RTL, if 'L' forces LTR
|
||||
*/
|
||||
protected string $forcedir = '';
|
||||
|
||||
/**
|
||||
* If true enable shaping
|
||||
*/
|
||||
protected bool $shaping = true;
|
||||
|
||||
/**
|
||||
* True if the string contains arabic characters
|
||||
*/
|
||||
protected bool $arabic = false;
|
||||
|
||||
/**
|
||||
* Array of character data
|
||||
*
|
||||
* @var array<int, array{
|
||||
* 'char': int,
|
||||
* 'i': int,
|
||||
* 'level': int,
|
||||
* 'otype': string,
|
||||
* 'pdimatch': int,
|
||||
* 'pos': int,
|
||||
* 'type': string,
|
||||
* 'x': int,
|
||||
* }>
|
||||
*/
|
||||
protected array $chardata = [];
|
||||
|
||||
/**
|
||||
* Convert object
|
||||
*/
|
||||
protected Convert $conv;
|
||||
|
||||
/**
|
||||
* Reverse the RLT substrings using the Bidirectional Algorithm
|
||||
* http://unicode.org/reports/tr9/
|
||||
*
|
||||
* @param ?string $str String to convert (if null it will be generated from $chrarr or $ordarr)
|
||||
* @param ?array<string> $chrarr Array of UTF-8 chars (if empty it will be generated from $str or $ordarr)
|
||||
* @param ?array<int> $ordarr Array of UTF-8 codepoints (if empty it will be generated from $str or $chrarr)
|
||||
* @param string $forcedir If 'R' forces RTL, if 'L' forces LTR
|
||||
* @param bool $shaping If true enable the shaping algorithm
|
||||
*/
|
||||
public function __construct(
|
||||
?string $str = null,
|
||||
?array $chrarr = null,
|
||||
?array $ordarr = null,
|
||||
string $forcedir = '',
|
||||
bool $shaping = true
|
||||
) {
|
||||
if (($str === null) && ($chrarr === null || $chrarr === []) && ($ordarr === null || $ordarr === [])) {
|
||||
throw new UnicodeException('empty input');
|
||||
}
|
||||
|
||||
$this->conv = new Convert();
|
||||
$this->setInput($str, $chrarr, $ordarr, $forcedir);
|
||||
|
||||
if (! $this->isRtlMode()) {
|
||||
$this->bidistr = $this->str;
|
||||
$this->bidichrarr = $this->chrarr;
|
||||
$this->bidiordarr = $this->ordarr;
|
||||
return;
|
||||
}
|
||||
|
||||
$this->shaping = ($shaping && $this->arabic);
|
||||
|
||||
$this->process();
|
||||
}
|
||||
|
||||
/**
|
||||
* Set Input data
|
||||
*
|
||||
* @param ?string $str String to convert (if null it will be generated from $chrarr or $ordarr)
|
||||
* @param ?array<string> $chrarr Array of UTF-8 chars (if empty it will be generated from $str or $ordarr)
|
||||
* @param ?array<int> $ordarr Array of UTF-8 codepoints (if empty it will be generated from $str or $chrarr)
|
||||
* @param string $forcedir If 'R' forces RTL, if 'L' forces LTR
|
||||
*
|
||||
* @SuppressWarnings("PHPMD.CyclomaticComplexity")
|
||||
*/
|
||||
protected function setInput(
|
||||
?string $str = null,
|
||||
?array $chrarr = null,
|
||||
?array $ordarr = null,
|
||||
string $forcedir = ''
|
||||
): void {
|
||||
if ($str === null) {
|
||||
if (($chrarr === null || $chrarr === []) && ($ordarr !== null && $ordarr !== [])) {
|
||||
$chrarr = $this->conv->ordArrToChrArr($ordarr);
|
||||
}
|
||||
|
||||
$str = implode('', $chrarr);
|
||||
}
|
||||
|
||||
if ($chrarr === null || $chrarr === []) {
|
||||
$chrarr = $this->conv->strToChrArr($str);
|
||||
}
|
||||
|
||||
if ($ordarr === null || $ordarr === []) {
|
||||
$ordarr = $this->conv->chrArrToOrdArr($chrarr);
|
||||
}
|
||||
|
||||
$this->str = $str;
|
||||
$this->chrarr = $chrarr;
|
||||
$this->ordarr = $ordarr;
|
||||
$this->forcedir = '';
|
||||
if ($forcedir !== '') {
|
||||
$this->forcedir = strtoupper($forcedir[0]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the processed array of UTF-8 codepoints
|
||||
*
|
||||
* @return array<int>
|
||||
*/
|
||||
public function getOrdArray(): array
|
||||
{
|
||||
return $this->bidiordarr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the processed array of UTF-8 chars
|
||||
*
|
||||
* @return array<string>
|
||||
*/
|
||||
public function getChrArray(): array
|
||||
{
|
||||
if ($this->bidichrarr === []) {
|
||||
$this->bidichrarr = $this->conv->ordArrToChrArr($this->bidiordarr);
|
||||
}
|
||||
|
||||
return $this->bidichrarr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of characters in the processed string
|
||||
*/
|
||||
public function getNumChars(): int
|
||||
{
|
||||
return count($this->getChrArray());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the processed string
|
||||
*/
|
||||
public function getString(): string
|
||||
{
|
||||
if ($this->bidistr === '') {
|
||||
$this->bidistr = implode('', $this->getChrArray());
|
||||
}
|
||||
|
||||
return $this->bidistr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array with processed chars as keys
|
||||
*
|
||||
* @return array<int, true>
|
||||
*/
|
||||
public function getCharKeys(): array
|
||||
{
|
||||
return array_fill_keys(array_values($this->bidiordarr), true);
|
||||
}
|
||||
|
||||
/**
|
||||
* P1. Split the text into separate paragraphs.
|
||||
* A paragraph separator is kept with the previous paragraph.
|
||||
*
|
||||
* @return array<int, array<int>>
|
||||
*/
|
||||
protected function getParagraphs(): array
|
||||
{
|
||||
$paragraph = [
|
||||
0 => [],
|
||||
];
|
||||
$pdx = 0; // paragraphs index
|
||||
foreach ($this->ordarr as $ord) {
|
||||
$paragraph[$pdx][] = $ord;
|
||||
if (isset(UniType::UNI[$ord]) && (UniType::UNI[$ord] == 'B')) {
|
||||
++$pdx;
|
||||
$paragraph[$pdx] = [];
|
||||
}
|
||||
}
|
||||
|
||||
return $paragraph;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the string
|
||||
*
|
||||
* @SuppressWarnings("PHPMD.CyclomaticComplexity")
|
||||
*/
|
||||
protected function process(): void
|
||||
{
|
||||
// split the text into separate paragraphs.
|
||||
$paragraph = $this->getParagraphs();
|
||||
|
||||
// Within each paragraph, apply all the other rules of this algorithm.
|
||||
foreach ($paragraph as $par) {
|
||||
$pel = $this->getPel($par);
|
||||
$stepx = new StepX($par, $pel);
|
||||
$stepx10 = new StepXten($stepx->getChrData(), $pel);
|
||||
$ilrs = $stepx10->getIsolatedLevelRunSequences();
|
||||
$chardata = [];
|
||||
$maxlevel = 0;
|
||||
foreach ($ilrs as $ilr) {
|
||||
$stepw = new StepW($ilr);
|
||||
$stepn = new StepN($stepw->getSequence());
|
||||
$stepi = new StepI($stepn->getSequence());
|
||||
$ilr = $stepi->getSequence();
|
||||
if ($this->shaping) {
|
||||
$shaping = new Shaping($ilr);
|
||||
$ilr = $shaping->getSequence();
|
||||
}
|
||||
|
||||
$chardata = array_merge($chardata, $ilr['item']);
|
||||
|
||||
if ($ilr['maxlevel'] > $maxlevel) {
|
||||
$maxlevel = $ilr['maxlevel'];
|
||||
}
|
||||
}
|
||||
|
||||
$stepl = new StepL($chardata, $pel, $maxlevel);
|
||||
$chardata = $stepl->getChrData();
|
||||
foreach ($chardata as $chardatum) {
|
||||
$this->bidiordarr[] = $chardatum['char'];
|
||||
}
|
||||
|
||||
// add back the paragraph separators
|
||||
$lastchar = end($par);
|
||||
if ($lastchar === false) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($lastchar < 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (! isset(UniType::UNI[$lastchar])) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (UniType::UNI[$lastchar] != 'B') {
|
||||
continue;
|
||||
}
|
||||
|
||||
$this->bidiordarr[] = $lastchar;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the paragraph embedding level
|
||||
*
|
||||
* @param array<int> $par Paragraph
|
||||
*/
|
||||
protected function getPel($par): int
|
||||
{
|
||||
if ($this->forcedir === 'R') {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ($this->forcedir === 'L') {
|
||||
return 0;
|
||||
}
|
||||
|
||||
$stepp = new StepP($par);
|
||||
return $stepp->getPel();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the input string contains RTL characters to process
|
||||
*/
|
||||
protected function isRtlMode(): bool
|
||||
{
|
||||
$this->arabic = (bool) preg_match(UniPattern::ARABIC, $this->str);
|
||||
return (($this->forcedir === 'R') || $this->arabic || preg_match(UniPattern::RTL, $this->str));
|
||||
}
|
||||
}
|
||||
140
vendor/tecnickcom/tc-lib-unicode/src/Bidi/Shaping.php
vendored
Normal file
140
vendor/tecnickcom/tc-lib-unicode/src/Bidi/Shaping.php
vendored
Normal file
@@ -0,0 +1,140 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Shaping.php
|
||||
*
|
||||
* @since 2011-05-23
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* This file is part of tc-lib-unicode software library.
|
||||
*/
|
||||
|
||||
namespace Com\Tecnick\Unicode\Bidi;
|
||||
|
||||
use Com\Tecnick\Unicode\Data\Arabic as UniArabic;
|
||||
use Com\Tecnick\Unicode\Data\Constant as UniConstant;
|
||||
|
||||
/**
|
||||
* Com\Tecnick\Unicode\Bidi\Shaping
|
||||
*
|
||||
* @since 2015-07-13
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* @phpstan-import-type SeqData from \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
|
||||
*/
|
||||
class Shaping extends \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
|
||||
{
|
||||
/**
|
||||
* Shaping
|
||||
* Cursively connected scripts, such as Arabic or Syriac,
|
||||
* require the selection of positional character shapes that depend on adjacent characters.
|
||||
* Shaping is logically applied after the Bidirectional Algorithm is used and is limited to
|
||||
* characters within the same directional run.
|
||||
*
|
||||
* @param SeqData $seq isolated Sequence array
|
||||
*/
|
||||
public function __construct(array $seq)
|
||||
{
|
||||
$this->seq = $seq;
|
||||
$this->newchardata = $seq['item'];
|
||||
$this->process();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the processed sequence
|
||||
*
|
||||
* @return SeqData
|
||||
*/
|
||||
public function getSequence(): array
|
||||
{
|
||||
return $this->seq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process
|
||||
*/
|
||||
protected function process(): void
|
||||
{
|
||||
$this->setAlChars();
|
||||
for ($idx = 0; $idx < $this->seq['length']; ++$idx) {
|
||||
if ($this->seq['item'][$idx]['otype'] == 'AL') {
|
||||
$thischar = $this->seq['item'][$idx];
|
||||
$pos = $thischar['x'];
|
||||
$prevchar = (($pos > 0) ? $this->alchars[($pos - 1)] : null);
|
||||
$nextchar = ((($pos + 1) < $this->numalchars) ? $this->alchars[($pos + 1)] : null);
|
||||
$this->processAlChar($idx, $pos, $prevchar, $thischar, $nextchar);
|
||||
}
|
||||
}
|
||||
|
||||
$this->combineShadda();
|
||||
$this->removeDeletedChars();
|
||||
$this->seq['item'] = array_values($this->newchardata);
|
||||
unset($this->newchardata);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set AL chars array
|
||||
*/
|
||||
protected function setAlChars(): void
|
||||
{
|
||||
$this->numalchars = 0;
|
||||
for ($idx = 0; $idx < $this->seq['length']; ++$idx) {
|
||||
if (
|
||||
($this->seq['item'][$idx]['otype'] == 'AL')
|
||||
|| ($this->seq['item'][$idx]['char'] == UniConstant::SPACE)
|
||||
|| ($this->seq['item'][$idx]['char'] == UniConstant::ZERO_WIDTH_NON_JOINER)
|
||||
) {
|
||||
$this->alchars[$this->numalchars]['i'] = $idx;
|
||||
$this->alchars[$this->numalchars] = array_merge(
|
||||
$this->alchars[$this->numalchars],
|
||||
$this->seq['item'][$idx]
|
||||
);
|
||||
$this->seq['item'][$idx]['x'] = $this->numalchars;
|
||||
++$this->numalchars;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Combine characters that can occur with Arabic Shadda (0651 HEX, 1617 DEC).
|
||||
* Putting the combining mark and shadda in the same glyph allows
|
||||
* to avoid the two marks overlapping each other in an illegible manner.
|
||||
*/
|
||||
protected function combineShadda(): void
|
||||
{
|
||||
$last = ($this->seq['length'] - 1);
|
||||
for ($idx = 0; $idx < $last; ++$idx) {
|
||||
$cur = $this->newchardata[$idx]['char'];
|
||||
$nxt = $this->newchardata[($idx + 1)]['char'];
|
||||
if (
|
||||
($cur == UniArabic::SHADDA)
|
||||
&& ($nxt >= 0) && (isset(UniArabic::DIACRITIC[$nxt]))
|
||||
) {
|
||||
$this->newchardata[$idx]['char'] = -1;
|
||||
$this->newchardata[($idx + 1)]['char'] = UniArabic::DIACRITIC[$nxt];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove marked characters
|
||||
*/
|
||||
protected function removeDeletedChars(): void
|
||||
{
|
||||
foreach ($this->newchardata as $key => $value) {
|
||||
if ($value['char'] < 0) {
|
||||
unset($this->newchardata[$key]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
266
vendor/tecnickcom/tc-lib-unicode/src/Bidi/Shaping/Arabic.php
vendored
Normal file
266
vendor/tecnickcom/tc-lib-unicode/src/Bidi/Shaping/Arabic.php
vendored
Normal file
@@ -0,0 +1,266 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Arabic.php
|
||||
*
|
||||
* @since 2011-05-23
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* This file is part of tc-lib-unicode software library.
|
||||
*/
|
||||
|
||||
namespace Com\Tecnick\Unicode\Bidi\Shaping;
|
||||
|
||||
use Com\Tecnick\Unicode\Data\Arabic as UniArabic;
|
||||
|
||||
/**
|
||||
* Com\Tecnick\Unicode\Bidi\Shaping\Arabic
|
||||
*
|
||||
* @since 2015-07-13
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* @phpstan-type CharData array{
|
||||
* 'char': int,
|
||||
* 'i': int,
|
||||
* 'level': int,
|
||||
* 'otype': string,
|
||||
* 'pdimatch': int,
|
||||
* 'pos': int,
|
||||
* 'type': string,
|
||||
* 'x': int,
|
||||
* }
|
||||
*
|
||||
* @phpstan-type SeqData array{
|
||||
* 'e': int,
|
||||
* 'edir': string,
|
||||
* 'end': int,
|
||||
* 'eos': string,
|
||||
* 'length': int,
|
||||
* 'maxlevel': int,
|
||||
* 'sos': string,
|
||||
* 'start': int,
|
||||
* 'item': array<int, CharData>,
|
||||
* }
|
||||
*/
|
||||
abstract class Arabic
|
||||
{
|
||||
/**
|
||||
* Sequence to process and return
|
||||
*
|
||||
* @var SeqData
|
||||
*/
|
||||
protected array $seq = [
|
||||
'e' => 0,
|
||||
'edir' => '',
|
||||
'end' => 0,
|
||||
'eos' => '',
|
||||
'length' => 0,
|
||||
'maxlevel' => 0,
|
||||
'sos' => '',
|
||||
'start' => 0,
|
||||
'item' => [],
|
||||
];
|
||||
|
||||
/**
|
||||
* Array of processed chars
|
||||
*
|
||||
* @var array<int, CharData>
|
||||
*/
|
||||
protected array $newchardata = [];
|
||||
|
||||
/**
|
||||
* Array of AL characters
|
||||
*
|
||||
* @var array<int, CharData>
|
||||
*/
|
||||
protected array $alchars = [];
|
||||
|
||||
/**
|
||||
* Number of AL characters
|
||||
*/
|
||||
protected int $numalchars = 0;
|
||||
|
||||
/**
|
||||
* Check if it is a LAA LETTER
|
||||
*
|
||||
* @param ?CharData $prevchar Previous char
|
||||
* @param CharData $thischar Current char
|
||||
*/
|
||||
protected function isLaaLetter(?array $prevchar, array $thischar): bool
|
||||
{
|
||||
return ($prevchar !== null)
|
||||
&& ($prevchar['char'] == UniArabic::LAM)
|
||||
&& (isset(UniArabic::LAA[$thischar['char']]));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check next char
|
||||
*
|
||||
* @param CharData $thischar Current char
|
||||
* @param ?CharData $nextchar Next char
|
||||
*/
|
||||
protected function hasNextChar(array $thischar, ?array $nextchar): bool
|
||||
{
|
||||
return (($nextchar !== null)
|
||||
&& (($nextchar['otype'] == 'AL') || ($nextchar['otype'] == 'NSM'))
|
||||
&& ($nextchar['type'] == $thischar['type'])
|
||||
&& ($nextchar['char'] != UniArabic::QUESTION_MARK)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check previous char
|
||||
*
|
||||
* @param ?CharData $prevchar Previous char
|
||||
* @param CharData $thischar Current char
|
||||
*/
|
||||
protected function hasPrevChar(?array $prevchar, array $thischar): bool
|
||||
{
|
||||
return ((($prevchar !== null)
|
||||
&& (($prevchar['otype'] == 'AL') || ($prevchar['otype'] == 'NSM'))
|
||||
&& ($prevchar['type'] == $thischar['type']))
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if it is a middle character
|
||||
*
|
||||
* @param ?CharData $prevchar Previous char
|
||||
* @param CharData $thischar Current char
|
||||
* @param ?CharData $nextchar Next char
|
||||
*/
|
||||
protected function isMiddleChar(?array $prevchar, array $thischar, ?array $nextchar): bool
|
||||
{
|
||||
return ($this->hasPrevChar($prevchar, $thischar) && $this->hasNextChar($thischar, $nextchar));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if it is a final character
|
||||
*
|
||||
* @param ?CharData $prevchar Previous char
|
||||
* @param CharData $thischar Current char
|
||||
* @param ?CharData $nextchar Next char
|
||||
*/
|
||||
protected function isFinalChar(?array $prevchar, array $thischar, ?array $nextchar): bool
|
||||
{
|
||||
if ($this->hasPrevChar($prevchar, $thischar)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return (($nextchar !== null) && ($nextchar['char'] == UniArabic::QUESTION_MARK));
|
||||
}
|
||||
|
||||
/**
|
||||
* Set initial or middle char
|
||||
*
|
||||
* @param int $idx Current index
|
||||
* @param ?CharData $prevchar Previous char
|
||||
* @param CharData $thischar Current char
|
||||
* @param array<int, array<int>> $arabicarr Substitution array
|
||||
*/
|
||||
protected function setMiddleChar(int $idx, ?array $prevchar, array $thischar, array $arabicarr): void
|
||||
{
|
||||
if (($prevchar != null) && in_array($prevchar['char'], UniArabic::END)) {
|
||||
if (isset($arabicarr[$thischar['char']][2])) {
|
||||
// initial
|
||||
$this->newchardata[$idx]['char'] = $arabicarr[$thischar['char']][2];
|
||||
}
|
||||
} elseif (isset($arabicarr[$thischar['char']][3])) {
|
||||
// medial
|
||||
$this->newchardata[$idx]['char'] = $arabicarr[$thischar['char']][3];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set initial char
|
||||
*
|
||||
* @param int $idx Current index
|
||||
* @param CharData $thischar Current char
|
||||
* @param array<int, array<int>> $arabicarr Substitution array
|
||||
*/
|
||||
protected function setInitialChar(int $idx, array $thischar, array $arabicarr): void
|
||||
{
|
||||
if (isset($arabicarr[$this->seq['item'][$idx]['char']][2])) {
|
||||
$this->newchardata[$idx]['char'] = $arabicarr[$thischar['char']][2];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set final char
|
||||
*
|
||||
* @param int $idx Current index
|
||||
* @param ?CharData $prevchar Previous char
|
||||
* @param CharData $thischar Current char
|
||||
* @param array<int, array<int>> $arabicarr Substitution array
|
||||
*/
|
||||
protected function setFinalChar(int $idx, ?array $prevchar, array $thischar, array $arabicarr): void
|
||||
{
|
||||
if (
|
||||
($idx > 1)
|
||||
&& ($thischar['char'] == UniArabic::HEH)
|
||||
&& ($this->seq['item'][($idx - 1)]['char'] == UniArabic::LAM)
|
||||
&& ($this->seq['item'][($idx - 2)]['char'] == UniArabic::LAM)
|
||||
) {
|
||||
// Allah Word
|
||||
$this->newchardata[($idx - 2)]['char'] = -1;
|
||||
$this->newchardata[($idx - 1)]['char'] = -1;
|
||||
$this->newchardata[$idx]['char'] = UniArabic::LIGATURE_ALLAH_ISOLATED_FORM;
|
||||
} elseif (($prevchar !== null) && in_array($prevchar['char'], UniArabic::END)) {
|
||||
if (isset($arabicarr[$thischar['char']][0])) {
|
||||
// isolated
|
||||
$this->newchardata[$idx]['char'] = $arabicarr[$thischar['char']][0];
|
||||
}
|
||||
} elseif (isset($arabicarr[$thischar['char']][1])) {
|
||||
// final
|
||||
$this->newchardata[$idx]['char'] = $arabicarr[$thischar['char']][1];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process AL character
|
||||
*
|
||||
* @param int $idx Current index
|
||||
* @param int $pos Current char position
|
||||
* @param ?CharData $prevchar Previous char
|
||||
* @param CharData $thischar Current char
|
||||
* @param ?CharData $nextchar Next char
|
||||
*/
|
||||
protected function processAlChar(int $idx, int $pos, ?array $prevchar, array $thischar, ?array $nextchar): void
|
||||
{
|
||||
$laaletter = $this->isLaaLetter($prevchar, $thischar);
|
||||
if ($laaletter) {
|
||||
$arabicarr = UniArabic::LAA;
|
||||
$prevchar = (($pos > 1) ? $this->alchars[($pos - 2)] : null);
|
||||
} else {
|
||||
$arabicarr = UniArabic::SUBSTITUTE;
|
||||
}
|
||||
|
||||
if ($this->isMiddleChar($prevchar, $thischar, $nextchar)) {
|
||||
$this->setMiddleChar($idx, $prevchar, $thischar, $arabicarr);
|
||||
} elseif ($this->hasNextChar($thischar, $nextchar)) {
|
||||
$this->setInitialChar($idx, $thischar, $arabicarr);
|
||||
} elseif ($this->isFinalChar($prevchar, $thischar, $nextchar)) {
|
||||
// final
|
||||
$this->setFinalChar($idx, $prevchar, $thischar, $arabicarr);
|
||||
} elseif (isset($arabicarr[$thischar['char']][0])) {
|
||||
// isolated
|
||||
$this->newchardata[$idx]['char'] = $arabicarr[$thischar['char']][0];
|
||||
}
|
||||
|
||||
// if laa letter
|
||||
if ($laaletter) {
|
||||
// mark characters to delete
|
||||
$this->newchardata[($this->alchars[($pos - 1)]['i'])]['char'] = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
78
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepBase.php
vendored
Normal file
78
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepBase.php
vendored
Normal file
@@ -0,0 +1,78 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* StepBase.php
|
||||
*
|
||||
* @since 2011-05-23
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* This file is part of tc-lib-unicode software library.
|
||||
*/
|
||||
|
||||
namespace Com\Tecnick\Unicode\Bidi;
|
||||
|
||||
/**
|
||||
* Com\Tecnick\Unicode\Bidi\StepBase
|
||||
*
|
||||
* @since 2015-07-13
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* @phpstan-import-type SeqData from \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
|
||||
*/
|
||||
abstract class StepBase
|
||||
{
|
||||
/**
|
||||
* Initialize Sequence to process
|
||||
*
|
||||
* @param SeqData $seq Isolated Sequence array
|
||||
* @param bool $process If false disable automatic processing (this is a testing flag)
|
||||
*/
|
||||
public function __construct(
|
||||
/**
|
||||
* Sequence to process and return
|
||||
*/
|
||||
protected array $seq,
|
||||
$process = true
|
||||
) {
|
||||
if ($process) {
|
||||
$this->process();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the processed array
|
||||
*
|
||||
* @return SeqData
|
||||
*/
|
||||
public function getSequence(): array
|
||||
{
|
||||
return $this->seq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the current step
|
||||
*/
|
||||
abstract protected function process(): void;
|
||||
|
||||
/**
|
||||
* Generic step
|
||||
*
|
||||
* @param string $method Processing methos
|
||||
*/
|
||||
public function processStep($method): void
|
||||
{
|
||||
for ($idx = 0; $idx < $this->seq['length']; ++$idx) {
|
||||
$this->$method($idx);
|
||||
}
|
||||
}
|
||||
}
|
||||
71
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepI.php
vendored
Normal file
71
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepI.php
vendored
Normal file
@@ -0,0 +1,71 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* StepI.php
|
||||
*
|
||||
* @since 2011-05-23
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* This file is part of tc-lib-unicode software library.
|
||||
*/
|
||||
|
||||
namespace Com\Tecnick\Unicode\Bidi;
|
||||
|
||||
/**
|
||||
* Com\Tecnick\Unicode\Bidi\StepI
|
||||
*
|
||||
* @since 2015-07-13
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*/
|
||||
class StepI extends \Com\Tecnick\Unicode\Bidi\StepBase
|
||||
{
|
||||
/**
|
||||
* Process I steps
|
||||
*/
|
||||
protected function process(): void
|
||||
{
|
||||
$this->seq['maxlevel'] = 0;
|
||||
$this->processStep('processI');
|
||||
}
|
||||
|
||||
/**
|
||||
* I1. For all characters with an even (left-to-right) embedding level, those of type R go up one level and those
|
||||
* of type AN or EN go up two levels.
|
||||
* I2. For all characters with an odd (right-to-left) embedding level, those of type L, EN or AN go up one level.
|
||||
*
|
||||
* @param int $idx Current character position
|
||||
*/
|
||||
protected function processI(int $idx): void
|
||||
{
|
||||
$odd = ($this->seq['item'][$idx]['level'] % 2);
|
||||
if ($odd !== 0) {
|
||||
if (
|
||||
($this->seq['item'][$idx]['type'] == 'L')
|
||||
|| ($this->seq['item'][$idx]['type'] == 'EN')
|
||||
|| ($this->seq['item'][$idx]['type'] == 'AN')
|
||||
) {
|
||||
++$this->seq['item'][$idx]['level'];
|
||||
}
|
||||
} elseif ($this->seq['item'][$idx]['type'] == 'R') {
|
||||
++$this->seq['item'][$idx]['level'];
|
||||
} elseif (
|
||||
($this->seq['item'][$idx]['type'] == 'AN')
|
||||
|| ($this->seq['item'][$idx]['type'] == 'EN')
|
||||
) {
|
||||
$this->seq['item'][$idx]['level'] += 2;
|
||||
}
|
||||
|
||||
// update the maximum level
|
||||
$this->seq['maxlevel'] = max($this->seq['maxlevel'], $this->seq['item'][$idx]['level']);
|
||||
}
|
||||
}
|
||||
173
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepL.php
vendored
Normal file
173
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepL.php
vendored
Normal file
@@ -0,0 +1,173 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* StepL.php
|
||||
*
|
||||
* @since 2011-05-23
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* This file is part of tc-lib-unicode software library.
|
||||
*/
|
||||
|
||||
namespace Com\Tecnick\Unicode\Bidi;
|
||||
|
||||
use Com\Tecnick\Unicode\Data\Constant as UniConstant;
|
||||
use Com\Tecnick\Unicode\Data\Mirror as UniMirror;
|
||||
|
||||
/**
|
||||
* Com\Tecnick\Unicode\Bidi\StepL
|
||||
*
|
||||
* @since 2015-07-13
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* @phpstan-import-type CharData from \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
|
||||
*/
|
||||
class StepL
|
||||
{
|
||||
/**
|
||||
* Array of characters data to return
|
||||
*
|
||||
* @var array<int, CharData>
|
||||
*/
|
||||
protected array $chardata = [];
|
||||
|
||||
/**
|
||||
* Number of characters in $this->chardata
|
||||
*/
|
||||
protected int $numchars;
|
||||
|
||||
/**
|
||||
* L steps
|
||||
*
|
||||
* @param array<int, CharData> $chardata Array of characters data
|
||||
* @param int $pel Paragraph embedding level
|
||||
* @param int $maxlevel Maximum level
|
||||
*/
|
||||
public function __construct(
|
||||
array $chardata,
|
||||
/**
|
||||
* Paragraph embedding level
|
||||
*/
|
||||
protected int $pel,
|
||||
/**
|
||||
* Maximum level
|
||||
*/
|
||||
protected int $maxlevel
|
||||
) {
|
||||
// reorder chars by their original position
|
||||
usort(
|
||||
$chardata,
|
||||
static fn ($apos, $bpos): int => ($apos['pos'] - $bpos['pos'])
|
||||
);
|
||||
$this->chardata = $chardata;
|
||||
$this->numchars = count($this->chardata);
|
||||
$this->processL1();
|
||||
$this->processL2();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the processed array
|
||||
*
|
||||
* @return array<int, CharData>
|
||||
*/
|
||||
public function getChrData(): array
|
||||
{
|
||||
return $this->chardata;
|
||||
}
|
||||
|
||||
/**
|
||||
* L1. On each line, reset the embedding level of the following characters to the paragraph embedding level:
|
||||
* 1. Segment separators,
|
||||
* 2. Paragraph separators,
|
||||
* 3. Any sequence of whitespace characters and/or isolate formatting characters (FSI, LRI, RLI, and PDI)
|
||||
* preceding a segment separator or paragraph separator, and
|
||||
* 4. Any sequence of whitespace characters and/or isolate formatting characters (FSI, LRI, RLI, and PDI)
|
||||
* at the end of the line.
|
||||
*/
|
||||
protected function processL1(): void
|
||||
{
|
||||
for ($idx = 0; $idx < $this->numchars; ++$idx) {
|
||||
$this->processL1b($idx, $idx);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal L1 step
|
||||
*
|
||||
* @param int $idx Main character index
|
||||
* @param int $jdx Current index
|
||||
*/
|
||||
protected function processL1b(int $idx, int $jdx): void
|
||||
{
|
||||
if ($jdx >= ($this->numchars - 1)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (
|
||||
(($this->chardata[$jdx]['otype'] == 'S') || ($this->chardata[$jdx]['otype'] == 'B'))
|
||||
|| (($jdx === $this->numchars - 1) && ($this->chardata[$jdx]['otype'] == 'WS'))
|
||||
) {
|
||||
$this->chardata[$idx]['level'] = $this->pel;
|
||||
return;
|
||||
}
|
||||
|
||||
if ($this->chardata[$jdx]['otype'] == 'WS') {
|
||||
return;
|
||||
}
|
||||
|
||||
if ($this->chardata[$idx]['char'] >= UniConstant::LRI && $this->chardata[$idx]['char'] <= UniConstant::PDI) {
|
||||
return;
|
||||
}
|
||||
|
||||
$this->processL1b($idx, ($jdx + 1));
|
||||
}
|
||||
|
||||
/**
|
||||
* L2. From the highest level found in the text to the lowest odd level on each line,
|
||||
* including intermediate levels not actually present in the text,
|
||||
* reverse any contiguous sequence of characters that are at that level or higher.
|
||||
* This rule reverses a progressively larger series of substrings.
|
||||
*/
|
||||
protected function processL2(): void
|
||||
{
|
||||
for ($level = $this->maxlevel; $level > 0; --$level) {
|
||||
$ordered = [];
|
||||
$reversed = [];
|
||||
foreach ($this->chardata as $chardatum) {
|
||||
if ($chardatum['level'] >= $level) {
|
||||
if (($chardatum['type'] == 'R') && (isset(UniMirror::UNI[$chardatum['char']]))) {
|
||||
// L4. A character is depicted by a mirrored glyph if and only if
|
||||
// (a) the resolved directionality of that character is R, and
|
||||
// (b) the Bidi_Mirrored property value of that character is true.
|
||||
$chardatum['char'] = UniMirror::UNI[$chardatum['char']];
|
||||
}
|
||||
|
||||
$reversed[] = $chardatum;
|
||||
} else {
|
||||
if ($reversed !== []) {
|
||||
$ordered = array_merge($ordered, array_reverse($reversed));
|
||||
$reversed = [];
|
||||
}
|
||||
|
||||
$ordered[] = $chardatum;
|
||||
}
|
||||
}
|
||||
|
||||
if ($reversed !== []) {
|
||||
$ordered = array_merge($ordered, array_reverse($reversed));
|
||||
}
|
||||
|
||||
$this->chardata = $ordered;
|
||||
}
|
||||
}
|
||||
}
|
||||
312
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepN.php
vendored
Normal file
312
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepN.php
vendored
Normal file
@@ -0,0 +1,312 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* StepN.php
|
||||
*
|
||||
* @since 2011-05-23
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* This file is part of tc-lib-unicode software library.
|
||||
*/
|
||||
|
||||
namespace Com\Tecnick\Unicode\Bidi;
|
||||
|
||||
use Com\Tecnick\Unicode\Data\Bracket as UniBracket;
|
||||
|
||||
/**
|
||||
* Com\Tecnick\Unicode\Bidi\StepN
|
||||
*
|
||||
* @since 2015-07-13
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*/
|
||||
class StepN extends \Com\Tecnick\Unicode\Bidi\StepBase
|
||||
{
|
||||
/**
|
||||
* List or bracket pairs positions
|
||||
*
|
||||
* @var array<int, int>
|
||||
*/
|
||||
protected array $brackets = [];
|
||||
|
||||
/**
|
||||
* Stack used to store bracket positions
|
||||
*
|
||||
* @var array<int, array{int, int}>
|
||||
*/
|
||||
protected array $bstack = [];
|
||||
|
||||
/**
|
||||
* Process N steps
|
||||
* Resolving Neutral and Isolate Formatting Types
|
||||
*
|
||||
* Neutral and isolate formatting (i.e. NI) characters are resolved one isolating run sequence at a time.
|
||||
* Its results are that all NIs become either R or L. Generally, NIs take on the direction of the surrounding text.
|
||||
* In case of a conflict, they take on the embedding direction.
|
||||
* At isolating run sequence boundaries where the type of the character on the other side of the boundary
|
||||
* is required, the type assigned to sos or eos is used.
|
||||
*
|
||||
* Bracket pairs within an isolating run sequence are processed as units so that both the opening and the closing
|
||||
* paired bracket in a pair resolve to the same direction. Note that this rule is applied based on the current
|
||||
* bidirectional character type of each paired bracket and not the original type, as this could have changed under
|
||||
* X6. The current bidirectional character type may also have changed under a previous iteration of the for loop in
|
||||
* N0 in the case of nested bracket pairs.
|
||||
*/
|
||||
protected function process(): void
|
||||
{
|
||||
$this->processStep('getBracketPairs');
|
||||
$this->processN0();
|
||||
$this->processStep('processN1');
|
||||
$this->processStep('processN2');
|
||||
}
|
||||
|
||||
/**
|
||||
* BD16. Find all bracket pairs
|
||||
*/
|
||||
protected function getBracketPairs(int $idx): void
|
||||
{
|
||||
$char = $this->seq['item'][$idx]['char'];
|
||||
if (isset(UniBracket::OPEN[$char])) {
|
||||
// process open bracket
|
||||
if ($char == 0x3008) {
|
||||
$char = 0x2329;
|
||||
}
|
||||
|
||||
$this->bstack[] = [$idx, $char];
|
||||
} elseif (isset(UniBracket::CLOSE[$char])) {
|
||||
// process closign bracket
|
||||
if ($char == 0x3009) {
|
||||
$char = 0x232A;
|
||||
}
|
||||
|
||||
// find matching opening bracket
|
||||
$tmpstack = $this->bstack;
|
||||
while ($tmpstack !== []) {
|
||||
$item = array_pop($tmpstack);
|
||||
if ($char == UniBracket::OPEN[$item[1]]) {
|
||||
$this->brackets[$item[0]] = $idx;
|
||||
$this->bstack = $tmpstack;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sort the list of pairs of text positions in ascending order
|
||||
// based on the text position of the opening paired bracket.
|
||||
ksort($this->brackets);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the normalized chat type for the N0 step
|
||||
* Within this scope, bidirectional types EN and AN are treated as R.
|
||||
*
|
||||
* @param string $type Char type
|
||||
*/
|
||||
protected function getN0Type(string $type): string
|
||||
{
|
||||
return ((($type == 'AN') || ($type == 'EN')) ? 'R' : $type);
|
||||
}
|
||||
|
||||
/**
|
||||
* N0. Process bracket pairs in an isolating run sequence sequentially in the logical order of the text positions
|
||||
* of the opening paired brackets.
|
||||
*/
|
||||
protected function processN0(): void
|
||||
{
|
||||
$odir = (($this->seq['edir'] == 'L') ? 'R' : 'L');
|
||||
// For each bracket-pair element in the list of pairs of text positions
|
||||
foreach ($this->brackets as $open => $close) {
|
||||
if ($this->processInsideBrackets($open, $close, $odir)) {
|
||||
for ($jdx = ($open - 1); $jdx >= 0; --$jdx) {
|
||||
$btype = $this->getN0Type($this->seq['item'][$jdx]['type']);
|
||||
if ($btype == $odir) {
|
||||
// 1. If the preceding strong type is also opposite the embedding direction,
|
||||
// context is established, so set the type for both brackets in the pair to that direction.
|
||||
$this->setBracketsType($open, $close, $odir);
|
||||
break;
|
||||
} elseif ($btype == $this->seq['edir']) {
|
||||
// 2. Otherwise set the type for both brackets in the pair to the embedding direction.
|
||||
$this->setBracketsType($open, $close, $this->seq['edir']);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($jdx < 0) {
|
||||
$this->setBracketsType($open, $close, $this->seq['sos']);
|
||||
}
|
||||
}
|
||||
|
||||
// d. Otherwise, there are no strong types within the bracket pair. Therefore, do not set the type for that
|
||||
// bracket pair. Note that if the enclosed text contains no strong types the bracket pairs will both
|
||||
// resolve to the same level when resolved individually using rules N1 and N2.
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Inspect the bidirectional types of the characters enclosed within the bracket pair.
|
||||
*
|
||||
* @param int $open Open bracket entry
|
||||
* @param int $close Close bracket entry
|
||||
* @param string $odir Opposite direction (L or R)
|
||||
*
|
||||
* @return bool True if type has not been found
|
||||
*/
|
||||
protected function processInsideBrackets(int $open, int $close, string $odir): bool
|
||||
{
|
||||
$opposite = false;
|
||||
// a. Inspect the bidirectional types of the characters enclosed within the bracket pair.
|
||||
for ($jdx = ($open + 1); $jdx < $close; ++$jdx) {
|
||||
$btype = $this->getN0Type($this->seq['item'][$jdx]['type']);
|
||||
// b. If any strong type (either L or R) matching the embedding direction is found,
|
||||
// set the type for both brackets in the pair to match the embedding direction.
|
||||
if ($btype == $this->seq['edir']) {
|
||||
$this->setBracketsType($open, $close, $this->seq['edir']);
|
||||
break;
|
||||
} elseif ($btype === $odir) {
|
||||
// c. Otherwise, if there is a strong type it must be opposite the embedding direction.
|
||||
$opposite = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Therefore, test for an established context with a preceding strong type by checking backwards before
|
||||
// the opening paired bracket until the first strong type (L, R, or sos) is found.
|
||||
return (($jdx === $close) && $opposite);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the brackets type
|
||||
*
|
||||
* @param int $open Open bracket entry
|
||||
* @param int $close Close bracket entry
|
||||
* @param string $type Type
|
||||
*/
|
||||
protected function setBracketsType(int $open, int $close, string $type): void
|
||||
{
|
||||
$this->seq['item'][$open]['type'] = $type;
|
||||
$this->seq['item'][$close]['type'] = $type;
|
||||
|
||||
// Any number of characters that had original bidirectional character type NSM
|
||||
// prior to the application of W1 that immediately follow a paired bracket which
|
||||
// changed to L or R under N0 should change to match the type of their preceding bracket.
|
||||
$next = ($close + 1);
|
||||
while (isset($this->seq['item'][$next]['otype']) && ($this->seq['item'][$next]['otype'] == 'NSM')) {
|
||||
$this->seq['item'][$next]['type'] = $type;
|
||||
++$next;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* N1. A sequence of NIs takes the direction of the surrounding strong text if the text on both sides has the same
|
||||
* direction. European and Arabic numbers act as if they were R in terms of their influence on NIs.
|
||||
* The start-of-sequence (sos) and end-of-sequence (eos) types are used at isolating run sequence boundaries.
|
||||
*
|
||||
* @param int $idx Current character position
|
||||
*/
|
||||
protected function processN1(int $idx): void
|
||||
{
|
||||
if ($this->seq['item'][$idx]['type'] == 'NI') {
|
||||
$bdx = ($idx - 1);
|
||||
$prev = $this->processN1prev($bdx);
|
||||
if ($prev === '') {
|
||||
return;
|
||||
}
|
||||
|
||||
$jdx = $this->getNextN1Char($idx);
|
||||
$next = $this->processN1next($jdx);
|
||||
if ($next === '') {
|
||||
return;
|
||||
}
|
||||
|
||||
if ($next === $prev) {
|
||||
for ($bdx = $idx; (($bdx < $jdx) && ($bdx < $this->seq['length'])); ++$bdx) {
|
||||
$this->seq['item'][$bdx]['type'] = $next;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next direction
|
||||
*
|
||||
* @param int $bdx Position of the preceding character
|
||||
*
|
||||
* @return string Previous position
|
||||
*/
|
||||
protected function processN1prev(int &$bdx): string
|
||||
{
|
||||
if ($bdx < 0) {
|
||||
$bdx = 0;
|
||||
return $this->seq['sos'];
|
||||
}
|
||||
|
||||
if (in_array($this->seq['item'][$bdx]['type'], ['R', 'AN', 'EN'])) {
|
||||
return 'R';
|
||||
}
|
||||
|
||||
if ($this->seq['item'][$bdx]['type'] == 'L') {
|
||||
return 'L';
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the next direction
|
||||
*
|
||||
* @param int $jdx Position of the next character
|
||||
*
|
||||
* @return string Previous position
|
||||
*/
|
||||
protected function processN1next(int &$jdx): string
|
||||
{
|
||||
if ($jdx >= $this->seq['length']) {
|
||||
$jdx = $this->seq['length'];
|
||||
return $this->seq['eos'];
|
||||
}
|
||||
|
||||
if (in_array($this->seq['item'][$jdx]['type'], ['R', 'AN', 'EN'])) {
|
||||
return 'R';
|
||||
}
|
||||
|
||||
if ($this->seq['item'][$jdx]['type'] == 'L') {
|
||||
return 'L';
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the index of the next valid char for N1
|
||||
*
|
||||
* @param int $idx Start index
|
||||
*/
|
||||
protected function getNextN1Char(int $idx): int
|
||||
{
|
||||
$jdx = ($idx + 1);
|
||||
while (($jdx < $this->seq['length']) && ($this->seq['item'][$jdx]['type'] == 'NI')) {
|
||||
++$jdx;
|
||||
}
|
||||
|
||||
return $jdx;
|
||||
}
|
||||
|
||||
/**
|
||||
* N2. Any remaining NIs take the embedding direction.
|
||||
*
|
||||
* @param int $idx Current character position
|
||||
*/
|
||||
protected function processN2($idx): void
|
||||
{
|
||||
if ($this->seq['item'][$idx]['type'] == 'NI') {
|
||||
$this->seq['item'][$idx]['type'] = $this->seq['edir'];
|
||||
}
|
||||
}
|
||||
}
|
||||
89
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepP.php
vendored
Normal file
89
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepP.php
vendored
Normal file
@@ -0,0 +1,89 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* StepP.php
|
||||
*
|
||||
* @since 2011-05-23
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* This file is part of tc-lib-unicode software library.
|
||||
*/
|
||||
|
||||
namespace Com\Tecnick\Unicode\Bidi;
|
||||
|
||||
use Com\Tecnick\Unicode\Data\Constant as UniConstant;
|
||||
use Com\Tecnick\Unicode\Data\Type as UniType;
|
||||
|
||||
/**
|
||||
* Com\Tecnick\Unicode\Bidi\StepP
|
||||
*
|
||||
* @since 2015-07-13
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*/
|
||||
class StepP
|
||||
{
|
||||
/**
|
||||
* P Steps for Bidirectional algorithm
|
||||
*
|
||||
* @param array<int> $ordarr Array of UTF-8 codepoints
|
||||
*/
|
||||
public function __construct(
|
||||
/**
|
||||
* Array of UTF-8 codepoints
|
||||
*/
|
||||
protected array $ordarr
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Paragraph Embedding Level
|
||||
*/
|
||||
public function getPel(): int
|
||||
{
|
||||
// P2. In each paragraph, find the first character of type L, AL, or R
|
||||
// while skipping over any characters between an isolate initiator and its matching PDI or,
|
||||
// if it has no matching PDI, the end of the paragraph.
|
||||
// P3. If a character is found in P2 and it is of type AL or R,
|
||||
// then set the paragraph embedding level to one; otherwise, set it to zero.
|
||||
$isolate = 0;
|
||||
foreach ($this->ordarr as $ord) {
|
||||
$isolate = $this->getIsolateLevel($ord, $isolate);
|
||||
if (($isolate == 0) && isset(UniType::UNI[$ord])) {
|
||||
$type = UniType::UNI[$ord];
|
||||
if ($type === 'L') {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (($type === 'R') || ($type === 'AL')) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the level of explicit directional isolates
|
||||
*/
|
||||
protected function getIsolateLevel(int $ord, int $isolate): int
|
||||
{
|
||||
if (($ord == UniConstant::LRI) || ($ord == UniConstant::RLI) || ($ord == UniConstant::FSI)) {
|
||||
++$isolate;
|
||||
} elseif ($ord == UniConstant::PDI) {
|
||||
--$isolate;
|
||||
}
|
||||
|
||||
return max(0, $isolate);
|
||||
}
|
||||
}
|
||||
215
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepW.php
vendored
Normal file
215
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepW.php
vendored
Normal file
@@ -0,0 +1,215 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* StepW.php
|
||||
*
|
||||
* @since 2011-05-23
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* This file is part of tc-lib-unicode software library.
|
||||
*/
|
||||
|
||||
namespace Com\Tecnick\Unicode\Bidi;
|
||||
|
||||
use Com\Tecnick\Unicode\Data\Constant as UniConstant;
|
||||
|
||||
/**
|
||||
* Com\Tecnick\Unicode\Bidi\StepW
|
||||
*
|
||||
* @since 2015-07-13
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*/
|
||||
class StepW extends \Com\Tecnick\Unicode\Bidi\StepBase
|
||||
{
|
||||
/**
|
||||
* Process W steps
|
||||
* Resolving Weak Types
|
||||
*/
|
||||
protected function process(): void
|
||||
{
|
||||
$this->processStep('processW1');
|
||||
$this->processStep('processW2');
|
||||
$this->processStep('processW3');
|
||||
$this->processStep('processW4');
|
||||
$this->processStep('processW5');
|
||||
$this->processStep('processW6');
|
||||
$this->processStep('processW7');
|
||||
}
|
||||
|
||||
/**
|
||||
* W1. Examine each nonspacing mark (NSM) in the isolating run sequence, and
|
||||
* change the type of the NSM to Other Neutral if the previous character is an isolate initiator or PDI, and
|
||||
* to the type of the previous character otherwise.
|
||||
* If the NSM is at the start of the isolating run sequence, it will get the type of sos.
|
||||
* (Note that in an isolating run sequence, an isolate initiator followed by an NSM or any type
|
||||
* other than PDI must be an overflow isolate initiator.)
|
||||
*
|
||||
* @param int $idx Current character position
|
||||
*/
|
||||
protected function processW1(int $idx): void
|
||||
{
|
||||
if ($this->seq['item'][$idx]['type'] == 'NSM') {
|
||||
$jdx = ($idx - 1);
|
||||
if ($jdx < 0) {
|
||||
$this->seq['item'][$idx]['type'] = $this->seq['sos'];
|
||||
} elseif (
|
||||
($this->seq['item'][$jdx]['char'] >= UniConstant::LRI)
|
||||
&& ($this->seq['item'][$jdx]['char'] <= UniConstant::PDI)
|
||||
) {
|
||||
$this->seq['item'][$idx]['type'] = 'ON';
|
||||
} else {
|
||||
$this->seq['item'][$idx]['type'] = $this->seq['item'][$jdx]['type'];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sos)
|
||||
* is found. If an AL is found, change the type of the European number to Arabic number.
|
||||
*
|
||||
* @param int $idx Current character position
|
||||
*/
|
||||
protected function processW2(int $idx): void
|
||||
{
|
||||
if ($this->seq['item'][$idx]['type'] == 'EN') {
|
||||
$jdx = ($idx - 1);
|
||||
while ($jdx >= 0) {
|
||||
if ($this->seq['item'][$jdx]['type'] == 'AL') {
|
||||
$this->seq['item'][$idx]['type'] = 'AN';
|
||||
break;
|
||||
} elseif (in_array($this->seq['item'][$jdx]['type'], ['R', 'L'])) {
|
||||
break;
|
||||
}
|
||||
|
||||
--$jdx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* W3. Change all ALs to R.
|
||||
*
|
||||
* @param int $idx Current character position
|
||||
*/
|
||||
protected function processW3(int $idx): void
|
||||
{
|
||||
if ($this->seq['item'][$idx]['type'] == 'AL') {
|
||||
$this->seq['item'][$idx]['type'] = 'R';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* W4. A single European separator between two European numbers changes to a European number.
|
||||
* A single common separator between two numbers of the same type changes to that type.
|
||||
*
|
||||
* @param int $idx Current character position
|
||||
*/
|
||||
protected function processW4(int $idx): void
|
||||
{
|
||||
if (in_array($this->seq['item'][$idx]['type'], ['ES', 'CS'])) {
|
||||
$bdx = ($idx - 1);
|
||||
$fdx = ($idx + 1);
|
||||
if (
|
||||
($bdx >= 0)
|
||||
&& ($fdx < $this->seq['length'])
|
||||
&& $this->seq['item'][$bdx]['type'] == $this->seq['item'][$fdx]['type']
|
||||
&& in_array($this->seq['item'][$bdx]['type'], ['EN', 'AN'])
|
||||
) {
|
||||
$this->seq['item'][$idx]['type'] = $this->seq['item'][$bdx]['type'];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* W5. A sequence of European terminators adjacent to European numbers changes to all European numbers.
|
||||
*
|
||||
* @param int $idx Current character position
|
||||
*/
|
||||
protected function processW5(int $idx): void
|
||||
{
|
||||
if ($this->seq['item'][$idx]['type'] == 'ET') {
|
||||
$this->processW5a($idx);
|
||||
$this->processW5b($idx);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* W5a
|
||||
*
|
||||
* @param int $idx Current character position
|
||||
*/
|
||||
protected function processW5a(int $idx): void
|
||||
{
|
||||
for ($jdx = ($idx - 1); $jdx >= 0; --$jdx) {
|
||||
if ($this->seq['item'][$jdx]['type'] == 'EN') {
|
||||
$this->seq['item'][$idx]['type'] = 'EN';
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* W5b
|
||||
*
|
||||
* @param int $idx Current character position
|
||||
*/
|
||||
protected function processW5b(int $idx): void
|
||||
{
|
||||
if ($this->seq['item'][$idx]['type'] == 'ET') {
|
||||
for ($jdx = ($idx + 1); $jdx < $this->seq['length']; ++$jdx) {
|
||||
if ($this->seq['item'][$jdx]['type'] == 'EN') {
|
||||
$this->seq['item'][$idx]['type'] = 'EN';
|
||||
} elseif ($this->seq['item'][$jdx]['type'] != 'ET') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* W6. Otherwise, separators and terminators change to Other Neutral.
|
||||
*
|
||||
* @param int $idx Current character position
|
||||
*/
|
||||
protected function processW6(int $idx): void
|
||||
{
|
||||
if (in_array($this->seq['item'][$idx]['type'], ['ET', 'ES', 'CS', 'ON'])) {
|
||||
$this->seq['item'][$idx]['type'] = 'ON';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* W7. Search backward from each instance of a European number until the first strong type (R, L, or sos) is found.
|
||||
* If an L is found, then change the type of the European number to L.
|
||||
*
|
||||
* @param int $idx Current character position
|
||||
*/
|
||||
protected function processW7(int $idx): void
|
||||
{
|
||||
if ($this->seq['item'][$idx]['type'] == 'EN') {
|
||||
for ($jdx = ($idx - 1); $jdx >= 0; --$jdx) {
|
||||
if ($this->seq['item'][$jdx]['type'] == 'L') {
|
||||
$this->seq['item'][$idx]['type'] = 'L';
|
||||
break;
|
||||
} elseif ($this->seq['item'][$jdx]['type'] == 'R') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (($this->seq['sos'] == 'L') && ($jdx < 0)) {
|
||||
$this->seq['item'][$idx]['type'] = 'L';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
424
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepX.php
vendored
Normal file
424
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepX.php
vendored
Normal file
@@ -0,0 +1,424 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* StepX.php
|
||||
*
|
||||
* @since 2011-05-23
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* This file is part of tc-lib-unicode software library.
|
||||
*/
|
||||
|
||||
namespace Com\Tecnick\Unicode\Bidi;
|
||||
|
||||
use Com\Tecnick\Unicode\Data\Constant as UniConstant;
|
||||
use Com\Tecnick\Unicode\Data\Type as UniType;
|
||||
|
||||
/**
|
||||
* Com\Tecnick\Unicode\Bidi\StepX
|
||||
*
|
||||
* @since 2015-07-13
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* @phpstan-import-type SeqData from \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
|
||||
* @phpstan-import-type CharData from \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
|
||||
*
|
||||
* @phpstan-type DssData array{
|
||||
* 'ord': int,
|
||||
* 'cel': int,
|
||||
* 'dos': string,
|
||||
* 'dis': bool,
|
||||
* }
|
||||
*/
|
||||
class StepX
|
||||
{
|
||||
/**
|
||||
* Maximum embedding level
|
||||
*/
|
||||
public const MAX_DEPTH = 125;
|
||||
|
||||
/**
|
||||
* Directional Status Stack
|
||||
*
|
||||
* @var array<int, DssData>
|
||||
*/
|
||||
protected array $dss = [];
|
||||
|
||||
/**
|
||||
* Overflow Isolate Count
|
||||
*/
|
||||
protected int $oic = 0;
|
||||
|
||||
/**
|
||||
* Overflow Embedding Count
|
||||
*/
|
||||
protected int $oec = 0;
|
||||
|
||||
/**
|
||||
* Valid Isolate Count
|
||||
*/
|
||||
protected int $vic = 0;
|
||||
|
||||
/**
|
||||
* Array of characters data to return
|
||||
*
|
||||
* @var array<int, CharData>
|
||||
*/
|
||||
protected array $chardata = [];
|
||||
|
||||
/**
|
||||
* X Steps for Bidirectional algorithm
|
||||
* Explicit Levels and Directions
|
||||
*
|
||||
* @param array<int> $ordarr Array of UTF-8 codepoints
|
||||
* @param int $pel Paragraph embedding level
|
||||
*/
|
||||
public function __construct(
|
||||
/**
|
||||
* Array of UTF-8 codepoints
|
||||
*/
|
||||
protected array $ordarr,
|
||||
int $pel
|
||||
) {
|
||||
// - Push onto the stack an entry consisting of the paragraph embedding level,
|
||||
// a neutral directional override status, and a false directional isolate status.
|
||||
$this->dss[] = [
|
||||
'ord' => -1, // dummy value, not used
|
||||
'cel' => $pel,
|
||||
'dos' => 'NI',
|
||||
'dis' => false,
|
||||
];
|
||||
// - Process each character iteratively, applying rules X2 through X8.
|
||||
// Only embedding levels from 0 through max_depth are valid in this phase.
|
||||
// (Note that in the resolution of levels in rules I1 and I2,
|
||||
// the maximum embedding level of max_depth+1 can be reached.)
|
||||
$this->processX();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the processed array
|
||||
*
|
||||
* @return array<int, CharData>
|
||||
*/
|
||||
public function getChrData(): array
|
||||
{
|
||||
return $this->chardata;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the Least Even
|
||||
*
|
||||
* @param int $num Number to process
|
||||
*/
|
||||
protected function getLEven(int $num): int
|
||||
{
|
||||
return (2 + $num - ($num % 2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the Least Odd
|
||||
*
|
||||
* @param int $num Number to process
|
||||
*/
|
||||
protected function getLOdd(int $num): int
|
||||
{
|
||||
return (1 + $num + ($num % 2));
|
||||
}
|
||||
|
||||
/**
|
||||
* Process X1
|
||||
*/
|
||||
protected function processX(): void
|
||||
{
|
||||
foreach ($this->ordarr as $key => $ord) {
|
||||
$this->processXcase($key, $ord);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Process X1 case
|
||||
*
|
||||
* @param int $pos Original character position in the input string
|
||||
* @param int $ord Char code
|
||||
*
|
||||
* @SuppressWarnings("PHPMD.CyclomaticComplexity")
|
||||
*/
|
||||
protected function processXcase(int $pos, int $ord): void
|
||||
{
|
||||
$edss = end($this->dss);
|
||||
if ($edss === false) {
|
||||
return;
|
||||
}
|
||||
|
||||
switch ($ord) {
|
||||
case UniConstant::RLE:
|
||||
// X2
|
||||
$this->setDss($this->getLOdd($edss['cel']), UniConstant::RLE, 'NI');
|
||||
break;
|
||||
case UniConstant::LRE:
|
||||
// X3
|
||||
$this->setDss($this->getLEven($edss['cel']), UniConstant::LRE, 'NI');
|
||||
break;
|
||||
case UniConstant::RLO:
|
||||
// X4
|
||||
$this->setDss($this->getLOdd($edss['cel']), UniConstant::RLO, 'R');
|
||||
break;
|
||||
case UniConstant::LRO:
|
||||
// X5
|
||||
$this->setDss($this->getLEven($edss['cel']), UniConstant::LRO, 'L');
|
||||
break;
|
||||
case UniConstant::RLI:
|
||||
// X5a
|
||||
$this->processChar($pos, $ord, $edss);
|
||||
$this->setDss($this->getLOdd($edss['cel']), UniConstant::RLI, 'NI', true, true, 1);
|
||||
break;
|
||||
case UniConstant::LRI:
|
||||
// X5b
|
||||
$this->processChar($pos, $ord, $edss);
|
||||
$this->setDss($this->getLEven($edss['cel']), UniConstant::LRI, 'NI', true, true, 1);
|
||||
break;
|
||||
case UniConstant::FSI:
|
||||
// X5c
|
||||
$this->processChar($pos, $ord, $edss);
|
||||
$this->processFsiCase($pos, $edss);
|
||||
break;
|
||||
case UniConstant::PDI:
|
||||
// X6a
|
||||
$this->processPdiCase($pos, $ord, $edss);
|
||||
break;
|
||||
case UniConstant::PDF:
|
||||
// X7
|
||||
$this->processPdfCase($edss);
|
||||
break;
|
||||
default:
|
||||
// X6
|
||||
$this->processChar($pos, $ord, $edss);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set temporary data (X2 to X5)
|
||||
*
|
||||
* @param int $cel Embedding Level
|
||||
* @param int $ord Char code
|
||||
* @param string $dos Directional override status
|
||||
* @param bool $dis Directional isolate status
|
||||
* @param bool $isolate True if Isolate initiator
|
||||
* @param int $ivic increment for the valid isolate count
|
||||
*/
|
||||
protected function setDss(
|
||||
int $cel,
|
||||
int $ord,
|
||||
string $dos,
|
||||
bool $dis = false,
|
||||
bool $isolate = false,
|
||||
int $ivic = 0
|
||||
): void {
|
||||
// X2 to X5
|
||||
// - Compute the least odd|even embedding level greater than the embedding level of the last entry
|
||||
// on the directional status stack.
|
||||
// - If this new level would be valid, and the overflow isolate count and overflow embedding
|
||||
// count are both zero, then this RLE is valid. Push an entry consisting of the new embedding
|
||||
// level, neutral|left|right directional override status, and false directional isolate status onto the
|
||||
// directional status stack.
|
||||
// - Otherwise, this is an overflow RLE. If the overflow isolate count is zero, increment the
|
||||
// overflow embedding|isolate count by one. Leave all other variables unchanged.
|
||||
if (($cel >= self::MAX_DEPTH) || ($this->oic != 0) || ($this->oec != 0)) {
|
||||
if ($isolate) {
|
||||
++$this->oic;
|
||||
} elseif ($this->oic == 0) {
|
||||
++$this->oec;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
$this->vic += $ivic;
|
||||
$this->dss[] = [
|
||||
'ord' => $ord,
|
||||
'cel' => $cel,
|
||||
'dos' => $dos,
|
||||
'dis' => $dis,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Push a char on the stack
|
||||
*
|
||||
* @param int $pos Original character position in the input string
|
||||
* @param int $ord Char code
|
||||
* @param DssData $edss Last entry in the Directional Status Stack
|
||||
*/
|
||||
protected function pushChar(int $pos, int $ord, array $edss): void
|
||||
{
|
||||
$unitype = (UniType::UNI[$ord] ?? $edss['dos']);
|
||||
$this->chardata[] = [
|
||||
'char' => $ord,
|
||||
'i' => -1,
|
||||
'level' => $edss['cel'],
|
||||
'otype' => $unitype,
|
||||
'pdimatch' => -1,
|
||||
'pos' => $pos,
|
||||
'type' => (($edss['dos'] !== 'NI') ? $edss['dos'] : $unitype),
|
||||
'x' => -1,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Process normal char (X6)
|
||||
*
|
||||
* @param int $pos Original character position in the input string
|
||||
* @param int $ord Char code
|
||||
* @param DssData $edss Last entry in the Directional Status Stack
|
||||
*/
|
||||
protected function processChar(int $pos, int $ord, array $edss): void
|
||||
{
|
||||
// X6. For all types besides B, BN, RLE, LRE, RLO, LRO, PDF, RLI, LRI, FSI, and PDI:
|
||||
// - Set the current character’s embedding level to the embedding level
|
||||
// of the last entry on the directional status stack.
|
||||
// - Whenever the directional override status of the last entry on the directional status stack
|
||||
// is not neutral, reset the current character type according to the directional override
|
||||
// status of the last entry on the directional status stack.
|
||||
if (isset(UniType::UNI[$ord]) && ((UniType::UNI[$ord] == 'B') || (UniType::UNI[$ord] == 'BN'))) {
|
||||
return;
|
||||
}
|
||||
|
||||
$this->pushChar($pos, $ord, $edss);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the PDF type character
|
||||
*
|
||||
* @param DssData $edss Last entry in the Directional Status Stack
|
||||
*/
|
||||
protected function processPdfCase(array $edss): void
|
||||
{
|
||||
// X7. With each PDF, perform the following steps:
|
||||
// - If the overflow isolate count is greater than zero, do nothing. (This PDF is within the
|
||||
// scope of an overflow isolate initiator. It either matches and terminates the scope of an
|
||||
// overflow embedding initiator within that overflow isolate, or does not match any
|
||||
// embedding initiator.)
|
||||
if ($this->oic > 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// - Otherwise, if the overflow embedding count is greater than zero, decrement it by one.
|
||||
// (This PDF matches and terminates the scope of an overflow embedding initiator that is not
|
||||
// within the scope of an overflow isolate initiator.)
|
||||
if ($this->oec > 0) {
|
||||
--$this->oec;
|
||||
return;
|
||||
}
|
||||
|
||||
// - Otherwise, if the directional isolate status of the last entry on the directional status
|
||||
// stack is false, and the directional status stack contains at least two entries, pop the
|
||||
// last entry from the directional status stack. (This PDF matches and terminates the scope
|
||||
// of a valid embedding initiator. Since the stack has at least two entries, this pop does
|
||||
// not leave the stack empty.)
|
||||
if (($edss['dis'] === false) && (count($this->dss) > 1)) {
|
||||
array_pop($this->dss);
|
||||
}
|
||||
|
||||
// - Otherwise, do nothing. (This PDF does not match any embedding initiator.)
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the PDI type character
|
||||
*
|
||||
* @param int $pos Original character position in the input string
|
||||
* @param int $ord Char code
|
||||
* @param DssData $edss Last entry in the Directional Status Stack
|
||||
*/
|
||||
protected function processPdiCase(int $pos, int $ord, array $edss): void
|
||||
{
|
||||
// X6a. With each PDI, perform the following steps:
|
||||
// - If the overflow isolate count is greater than zero, this PDI matches an overflow isolate
|
||||
// initiator. Decrement the overflow isolate count by one.
|
||||
if ($this->oic > 0) {
|
||||
--$this->oic;
|
||||
return;
|
||||
}
|
||||
|
||||
// - Otherwise, if the valid isolate count is zero, this PDI does not match any isolate
|
||||
// initiator, valid or overflow. Do nothing.
|
||||
if ($this->vic == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// - Otherwise, this PDI matches a valid isolate initiator. Perform the following steps:
|
||||
// - Reset the overflow embedding count to zero. (This terminates the scope of those overflow
|
||||
// embedding initiators within the scope of the matched isolate initiator whose scopes have
|
||||
// not been terminated by a matching PDF, and which thus lack a matching PDF.)
|
||||
$this->oec = 0;
|
||||
// - While the directional isolate status of the last entry on the stack is false, pop the
|
||||
// last entry from the directional status stack. (This terminates the scope of those valid
|
||||
// embedding initiators within the scope of the matched isolate initiator whose scopes have
|
||||
// not been terminated by a matching PDF, and which thus lack a matching PDF. Given that the
|
||||
// valid isolate count is non-zero, the directional status stack before this step is
|
||||
// executed must contain an entry with directional isolate status true, and thus after this
|
||||
// step is executed the last entry on the stack will indeed have a true directional isolate
|
||||
// status, i.e. represent the scope of the matched isolate initiator. This cannot be the
|
||||
// stack's first entry, which always belongs to the paragraph level and has a false
|
||||
// directional status, so there is at least one more entry below it on the stack.)
|
||||
$count_dss = count($this->dss);
|
||||
while (($edss['dis'] === false) && ($count_dss > 1)) {
|
||||
array_pop($this->dss);
|
||||
--$count_dss;
|
||||
$edss = end($this->dss);
|
||||
if ($edss === false) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// - Pop the last entry from the directional status stack and decrement the valid isolate
|
||||
// count by one. (This terminates the scope of the matched isolate initiator. Since the
|
||||
// preceding step left the stack with at least two entries, this pop does not leave the
|
||||
// stack empty.)
|
||||
array_pop($this->dss);
|
||||
--$this->vic;
|
||||
|
||||
$edss = end($this->dss);
|
||||
if ($edss === false) {
|
||||
return;
|
||||
}
|
||||
|
||||
// - In all cases, look up the last entry on the directional status stack left after the
|
||||
// steps above and:
|
||||
// - Set the PDI’s level to the entry's embedding level.
|
||||
// - If the entry's directional override status is not neutral, reset the current character type
|
||||
// from PDI to L if the override status is left-to-right, and to R if the override status is
|
||||
// right-to-left.
|
||||
$this->pushChar($pos, $ord, $edss);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process the PDF type character
|
||||
*
|
||||
* @param int $pos Original character position in the input string
|
||||
* @param DssData $edss Last entry in the Directional Status Stack
|
||||
*/
|
||||
protected function processFsiCase(int $pos, array $edss): void
|
||||
{
|
||||
// X5c. With each FSI, apply rules P2 and P3 to the sequence of characters between the FSI and its
|
||||
// matching PDI, or if there is no matching PDI, the end of the paragraph, as if this sequence
|
||||
// of characters were a paragraph. If these rules decide on paragraph embedding level 1, treat
|
||||
// the FSI as an RLI in rule X5a. Otherwise, treat it as an LRI in rule X5b.
|
||||
$stepp = new StepP(array_slice($this->ordarr, $pos));
|
||||
if ($stepp->getPel() == 0) {
|
||||
$this->setDss($this->getLEven($edss['cel']), UniConstant::LRI, 'NI', true, true, 1);
|
||||
} else {
|
||||
$this->setDss($this->getLOdd($edss['cel']), UniConstant::RLI, 'NI', true, true, 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
236
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepXten.php
vendored
Normal file
236
vendor/tecnickcom/tc-lib-unicode/src/Bidi/StepXten.php
vendored
Normal file
@@ -0,0 +1,236 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* StepXten.php
|
||||
*
|
||||
* @since 2011-05-23
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* This file is part of tc-lib-unicode software library.
|
||||
*/
|
||||
|
||||
namespace Com\Tecnick\Unicode\Bidi;
|
||||
|
||||
use Com\Tecnick\Unicode\Data\Constant as UniConstant;
|
||||
|
||||
/**
|
||||
* Com\Tecnick\Unicode\Bidi\StepXten
|
||||
*
|
||||
* @since 2015-07-13
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* @phpstan-import-type SeqData from \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
|
||||
* @phpstan-import-type CharData from \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
|
||||
*/
|
||||
class StepXten
|
||||
{
|
||||
/**
|
||||
* Number of characters
|
||||
*/
|
||||
protected int $numchars;
|
||||
|
||||
/**
|
||||
* Array of Level Run sequences
|
||||
*
|
||||
* @var array<int, array{'start': int, 'end': int, 'e': int}>
|
||||
*/
|
||||
protected array $runseq = [];
|
||||
|
||||
/**
|
||||
* Number of Level Run sequences
|
||||
*/
|
||||
protected int $numrunseq = 0;
|
||||
|
||||
/**
|
||||
* Array of Isolated Level Run sequences
|
||||
*
|
||||
* @var array<int, SeqData>
|
||||
*/
|
||||
protected array $ilrs = [];
|
||||
|
||||
/**
|
||||
* X Steps for Bidirectional algorithm
|
||||
*
|
||||
* @param array<int, CharData> $chardata Array of UTF-8 codepoints
|
||||
* @param int $pel Paragraph Embedding Level
|
||||
*/
|
||||
public function __construct(
|
||||
/**
|
||||
* Array of characters data to return
|
||||
*/
|
||||
protected array $chardata,
|
||||
/**
|
||||
* Paragraph Embedding Level
|
||||
*/
|
||||
protected int $pel
|
||||
) {
|
||||
$this->numchars = count($chardata);
|
||||
$this->setIsolatedLevelRunSequences();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Isolated Run Sequences
|
||||
*
|
||||
* @return array<int, SeqData>
|
||||
*/
|
||||
public function getIsolatedLevelRunSequences(): array
|
||||
{
|
||||
return $this->ilrs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the embedded direction (L or R)
|
||||
*/
|
||||
protected function getEmbeddedDirection(int $level): string
|
||||
{
|
||||
return ((($level % 2) == 0) ? 'L' : 'R');
|
||||
}
|
||||
|
||||
protected function setLevelRunSequences(): void
|
||||
{
|
||||
$start = 0;
|
||||
while ($start < $this->numchars) {
|
||||
$end = ($start + 1);
|
||||
while (($end < $this->numchars) && ($this->chardata[$end]['level'] == $this->chardata[$start]['level'])) {
|
||||
++$end;
|
||||
}
|
||||
|
||||
--$end;
|
||||
$this->runseq[] = [
|
||||
'start' => $start,
|
||||
'end' => $end,
|
||||
'e' => $this->chardata[$start]['level'],
|
||||
];
|
||||
++$this->numrunseq;
|
||||
$start = ($end + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* returns true if the input char is an Isolate Initiator
|
||||
*/
|
||||
protected function isIsolateInitiator(int $ord): bool
|
||||
{
|
||||
return (($ord == UniConstant::RLI) || ($ord == UniConstant::LRI) || ($ord == UniConstant::FSI));
|
||||
}
|
||||
|
||||
/**
|
||||
* Set level Isolated Level Run Sequences
|
||||
*/
|
||||
protected function setIsolatedLevelRunSequences(): void
|
||||
{
|
||||
$this->setLevelRunSequences();
|
||||
$numiso = 0;
|
||||
foreach ($this->runseq as $idx => $seq) {
|
||||
// Create a new level run sequence, and initialize it to contain just that level run
|
||||
$isorun = [
|
||||
'e' => $seq['e'],
|
||||
'edir' => $this->getEmbeddedDirection($seq['e']), // embedded direction
|
||||
'start' => $seq['start'], // position of the first char
|
||||
'end' => $seq['end'], // position of the last char
|
||||
'length' => ($seq['end'] - $seq['start'] + 1),
|
||||
'sos' => '', // start-of-sequence
|
||||
'eos' => '', // end-of-sequence
|
||||
'maxlevel' => 0,
|
||||
'item' => [],
|
||||
];
|
||||
for ($jdx = 0; $jdx < $isorun['length']; ++$jdx) {
|
||||
$isorun['item'][$jdx] = $this->chardata[($seq['start'] + $jdx)];
|
||||
}
|
||||
|
||||
$endchar = $isorun['item'][($jdx - 1)]['char'];
|
||||
|
||||
// While the level run currently last in the sequence ends with an isolate initiator that has a
|
||||
// matching PDI, append the level run containing the matching PDI to the sequence.
|
||||
// (Note that this matching PDI must be the first character of its level run.)
|
||||
$pdimatch = -1;
|
||||
if ($this->isIsolateInitiator($endchar)) {
|
||||
// find the next sequence with the same level that starts with a PDI
|
||||
for ($kdx = ($idx + 1); $kdx < $this->numrunseq; ++$kdx) {
|
||||
if (
|
||||
($this->runseq[$kdx]['e'] == $isorun['e'])
|
||||
&& ($this->chardata[$this->runseq[$kdx]['start']]['char'] == UniConstant::PDI)
|
||||
) {
|
||||
$pdimatch = $this->runseq[$kdx]['start'];
|
||||
$this->chardata[$pdimatch]['pdimatch'] = $numiso;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For each level run in the paragraph whose first character is not a PDI,
|
||||
// or is a PDI that does not match any isolate initiator
|
||||
if ($this->chardata[$seq['start']]['pdimatch'] >= 0) {
|
||||
$parent = $this->chardata[$seq['start']]['pdimatch'];
|
||||
|
||||
$this->ilrs[$parent]['item'] = array_merge(
|
||||
$this->ilrs[$parent]['item'],
|
||||
$isorun['item']
|
||||
);
|
||||
|
||||
$this->ilrs[$parent]['length'] += $isorun['length'];
|
||||
$this->ilrs[$parent]['end'] += $isorun['end'];
|
||||
if ($pdimatch >= 0) {
|
||||
$this->chardata[$pdimatch]['pdimatch'] = $parent;
|
||||
}
|
||||
} else {
|
||||
$this->ilrs[$numiso] = $isorun;
|
||||
++$numiso;
|
||||
}
|
||||
}
|
||||
|
||||
$this->setStartEndOfSequence();
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the start-of-sequence (sos) and end-of-sequence (eos) types, either L or R,
|
||||
* for each isolating run sequence.
|
||||
*/
|
||||
protected function setStartEndOfSequence(): void
|
||||
{
|
||||
foreach ($this->ilrs as $key => $seq) {
|
||||
// For sos, compare the level of the first character in the sequence with the level of the character
|
||||
// preceding it in the paragraph (not counting characters removed by X9), and if there is none,
|
||||
// with the paragraph embedding level.
|
||||
$lev = $seq['item'][0]['level'];
|
||||
if ($seq['start'] == 0) {
|
||||
$prev = $this->pel;
|
||||
} else {
|
||||
$lastchr = $this->chardata[($seq['start'] - 1)];
|
||||
$prev = $lastchr['level'];
|
||||
}
|
||||
|
||||
$this->ilrs[$key]['sos'] = $this->getEmbeddedDirection(max($prev, $lev));
|
||||
|
||||
// For eos, compare the level of the last character in the sequence with the level of the character
|
||||
// following it in the paragraph (not counting characters removed by X9), and if there is none or the
|
||||
// last character of the sequence is an isolate initiator (lacking a matching PDI), with the paragraph
|
||||
// embedding level.
|
||||
$lastchr = end($seq['item']);
|
||||
if ($lastchr === false) {
|
||||
return;
|
||||
}
|
||||
|
||||
$lev = $lastchr['level'];
|
||||
if ((! isset($this->chardata[($seq['end'] + 1)]['level'])) || $this->isIsolateInitiator($lastchr['char'])) {
|
||||
$next = $this->pel;
|
||||
} else {
|
||||
$next = $this->chardata[($seq['end'] + 1)]['level'];
|
||||
}
|
||||
|
||||
$this->ilrs[$key]['eos'] = $this->getEmbeddedDirection(max($next, $lev));
|
||||
|
||||
// If the higher level is odd, the sos or eos is R; otherwise, it is L.
|
||||
}
|
||||
}
|
||||
}
|
||||
131
vendor/tecnickcom/tc-lib-unicode/src/Convert.php
vendored
Normal file
131
vendor/tecnickcom/tc-lib-unicode/src/Convert.php
vendored
Normal file
@@ -0,0 +1,131 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Convert.php
|
||||
*
|
||||
* @since 2011-05-23
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* This file is part of tc-lib-unicode software library.
|
||||
*/
|
||||
|
||||
namespace Com\Tecnick\Unicode;
|
||||
|
||||
use Com\Tecnick\Unicode\Exception as UniException;
|
||||
|
||||
/**
|
||||
* Com\Tecnick\Unicode\Convert
|
||||
*
|
||||
* @since 2015-07-13
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*/
|
||||
class Convert extends \Com\Tecnick\Unicode\Convert\Encoding
|
||||
{
|
||||
/**
|
||||
* Returns the unicode string containing the character specified by value
|
||||
*
|
||||
* @param int $ord Unicode character value to convert
|
||||
*
|
||||
* @return string Returns the unicode string
|
||||
*/
|
||||
public function chr(int $ord): string
|
||||
{
|
||||
return mb_convert_encoding(pack('N', $ord), 'UTF-8', 'UCS-4BE');
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the unicode value of the specified character
|
||||
*
|
||||
* @param string $chr Unicode character
|
||||
*
|
||||
* @return int Returns the unicode value
|
||||
*/
|
||||
public function ord(string $chr): int
|
||||
{
|
||||
$uni = unpack('N', mb_convert_encoding($chr, 'UCS-4BE', 'UTF-8'));
|
||||
if (($uni === false) || (!isset($uni[1])) || (!is_int($uni[1]))) {
|
||||
throw new UniException('Error converting string');
|
||||
}
|
||||
|
||||
return $uni[1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an UTF-8 string to an array of UTF-8 codepoints (integer values)
|
||||
*
|
||||
* @param string $str String to convert
|
||||
*
|
||||
* @return array<int, string>
|
||||
*/
|
||||
public function strToChrArr(string $str): array
|
||||
{
|
||||
$ret = preg_split('//u', $str, -1, PREG_SPLIT_NO_EMPTY);
|
||||
if ($ret === false) {
|
||||
throw new UniException('Error splitting string');
|
||||
}
|
||||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an array of UTF-8 chars to an array of codepoints (integer values)
|
||||
*
|
||||
* @param array<string> $chars Array of UTF-8 chars
|
||||
*
|
||||
* @return array<int>
|
||||
*/
|
||||
public function chrArrToOrdArr(array $chars): array
|
||||
{
|
||||
return array_map(fn (string $chr): int => $this->ord($chr), $chars);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an array of UTF-8 code points array of chars
|
||||
*
|
||||
* @param array<int> $ords Array of UTF-8 code points
|
||||
*
|
||||
* @return array<string>
|
||||
*/
|
||||
public function ordArrToChrArr(array $ords): array
|
||||
{
|
||||
return array_map(fn (int $ord): string => $this->chr($ord), $ords);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an UTF-8 string to an array of UTF-8 codepoints (integer values)
|
||||
*
|
||||
* @param string $str Convert to convert
|
||||
*
|
||||
* @return array<int>
|
||||
*/
|
||||
public function strToOrdArr(string $str): array
|
||||
{
|
||||
return $this->chrArrToOrdArr($this->strToChrArr($str));
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract a slice of the $uniarr array and return it as string
|
||||
*
|
||||
* @param array<string> $uniarr The input array of characters
|
||||
* @param int $start The position of the starting element
|
||||
* @param int|null $end The position of the first element that will not be returned.
|
||||
*/
|
||||
public function getSubUniArrStr(array $uniarr, int $start = 0, ?int $end = null): string
|
||||
{
|
||||
if ($end === null) {
|
||||
$end = count($uniarr);
|
||||
}
|
||||
|
||||
return implode('', array_slice($uniarr, $start, ($end - $start)));
|
||||
}
|
||||
}
|
||||
136
vendor/tecnickcom/tc-lib-unicode/src/Convert/Encoding.php
vendored
Normal file
136
vendor/tecnickcom/tc-lib-unicode/src/Convert/Encoding.php
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Encoding.php
|
||||
*
|
||||
* @since 2011-05-23
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*
|
||||
* This file is part of tc-lib-unicode software library.
|
||||
*/
|
||||
|
||||
namespace Com\Tecnick\Unicode\Convert;
|
||||
|
||||
use Com\Tecnick\Unicode\Data\Latin as Latin;
|
||||
|
||||
/**
|
||||
* Com\Tecnick\Unicode\Convert\Encoding
|
||||
*
|
||||
* @since 2015-07-13
|
||||
* @category Library
|
||||
* @package Unicode
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-unicode
|
||||
*/
|
||||
class Encoding
|
||||
{
|
||||
/**
|
||||
* Converts UTF-8 code array to Latin1 codes
|
||||
*
|
||||
* @param array<int> $ordarr Array containing UTF-8 code points
|
||||
*
|
||||
* @return array<int> Array containing Latin1 code points
|
||||
*/
|
||||
public function uniArrToLatinArr(array $ordarr): array
|
||||
{
|
||||
$latarr = [];
|
||||
foreach ($ordarr as $chr) {
|
||||
if ($chr < 256) {
|
||||
$latarr[] = $chr;
|
||||
} elseif (array_key_exists($chr, Latin::SUBSTITUTE)) {
|
||||
$latarr[] = Latin::SUBSTITUTE[$chr];
|
||||
} elseif ($chr !== 0xFFFD) {
|
||||
$latarr[] = 63; // '?' character
|
||||
}
|
||||
}
|
||||
|
||||
return $latarr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an array of Latin1 code points to a string
|
||||
*
|
||||
* @param array<int> $latarr Array of Latin1 code points
|
||||
*/
|
||||
public function latinArrToStr(array $latarr): string
|
||||
{
|
||||
return implode('', array_map('chr', $latarr));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert a string to an hexadecimal string (byte string) representation (as in the PDF standard)
|
||||
*
|
||||
* @param string $str String to convert
|
||||
*/
|
||||
public function strToHex(string $str): string
|
||||
{
|
||||
$hexstr = '';
|
||||
$len = strlen($str);
|
||||
for ($idx = 0; $idx < $len; ++$idx) {
|
||||
$hexstr .= sprintf('%02s', dechex(ord($str[$idx])));
|
||||
}
|
||||
|
||||
return $hexstr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an hexadecimal string (byte string - as in the PDF standard) to string
|
||||
*
|
||||
* @param string $hex Hex code to convert
|
||||
*/
|
||||
public function hexToStr(string $hex): string
|
||||
{
|
||||
if (strlen($hex) == 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
$str = '';
|
||||
$bytes = str_split($hex, 2);
|
||||
foreach ($bytes as $byte) {
|
||||
$str .= chr((int) hexdec($byte));
|
||||
}
|
||||
|
||||
return $str;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a string with an unknown encoding to UTF-8
|
||||
*
|
||||
* @param string $str String to convert
|
||||
* @param null|string|array<string> $enc Array or comma separated list string of encodings
|
||||
*
|
||||
* @return string UTF-8 encoded string
|
||||
*/
|
||||
public function toUTF8(string $str, null|string|array $enc = null): string
|
||||
{
|
||||
if ($enc === null) {
|
||||
$enc = (array) mb_detect_order();
|
||||
}
|
||||
|
||||
$chrenc = mb_detect_encoding($str, $enc);
|
||||
if ($chrenc === false) {
|
||||
$chrenc = null;
|
||||
}
|
||||
|
||||
return mb_convert_encoding($str, 'UTF-8', $chrenc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an UTF-8 string to UTF-16BE
|
||||
*
|
||||
* @param string $str UTF-8 String to convert
|
||||
*
|
||||
* @return string UTF-16BE encoded string
|
||||
*/
|
||||
public function toUTF16BE(string $str): string
|
||||
{
|
||||
return mb_convert_encoding($str, 'UTF-16BE', 'UTF-8');
|
||||
}
|
||||
}
|
||||
34
vendor/tecnickcom/tc-lib-unicode/src/Exception.php
vendored
Normal file
34
vendor/tecnickcom/tc-lib-unicode/src/Exception.php
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
<?php
|
||||
|
||||
/**
|
||||
* Exception.php
|
||||
*
|
||||
* @since 2015-07-28
|
||||
* @category Library
|
||||
* @package File
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2015-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-file
|
||||
*
|
||||
* This file is part of tc-lib-unicode software library.
|
||||
*/
|
||||
|
||||
namespace Com\Tecnick\Unicode;
|
||||
|
||||
/**
|
||||
* Com\Tecnick\Unicode\Exception
|
||||
*
|
||||
* Custom Exception class
|
||||
*
|
||||
* @since 2015-07-28
|
||||
* @category Library
|
||||
* @package File
|
||||
* @author Nicola Asuni <info@tecnick.com>
|
||||
* @copyright 2015-2024 Nicola Asuni - Tecnick.com LTD
|
||||
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
|
||||
* @link https://github.com/tecnickcom/tc-lib-file
|
||||
*/
|
||||
class Exception extends \Exception
|
||||
{
|
||||
}
|
||||
Reference in New Issue
Block a user