Feature: Labelprint für Kistenetiketten hinzugefügt

This commit is contained in:
2025-10-27 12:14:44 +01:00
parent 43bc416554
commit 14bae6c9ef
1068 changed files with 229014 additions and 1807 deletions

View File

@@ -0,0 +1,140 @@
<?php
/**
* Shaping.php
*
* @since 2011-05-23
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* This file is part of tc-lib-unicode software library.
*/
namespace Com\Tecnick\Unicode\Bidi;
use Com\Tecnick\Unicode\Data\Arabic as UniArabic;
use Com\Tecnick\Unicode\Data\Constant as UniConstant;
/**
* Com\Tecnick\Unicode\Bidi\Shaping
*
* @since 2015-07-13
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* @phpstan-import-type SeqData from \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
*/
class Shaping extends \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
{
/**
* Shaping
* Cursively connected scripts, such as Arabic or Syriac,
* require the selection of positional character shapes that depend on adjacent characters.
* Shaping is logically applied after the Bidirectional Algorithm is used and is limited to
* characters within the same directional run.
*
* @param SeqData $seq isolated Sequence array
*/
public function __construct(array $seq)
{
$this->seq = $seq;
$this->newchardata = $seq['item'];
$this->process();
}
/**
* Returns the processed sequence
*
* @return SeqData
*/
public function getSequence(): array
{
return $this->seq;
}
/**
* Process
*/
protected function process(): void
{
$this->setAlChars();
for ($idx = 0; $idx < $this->seq['length']; ++$idx) {
if ($this->seq['item'][$idx]['otype'] == 'AL') {
$thischar = $this->seq['item'][$idx];
$pos = $thischar['x'];
$prevchar = (($pos > 0) ? $this->alchars[($pos - 1)] : null);
$nextchar = ((($pos + 1) < $this->numalchars) ? $this->alchars[($pos + 1)] : null);
$this->processAlChar($idx, $pos, $prevchar, $thischar, $nextchar);
}
}
$this->combineShadda();
$this->removeDeletedChars();
$this->seq['item'] = array_values($this->newchardata);
unset($this->newchardata);
}
/**
* Set AL chars array
*/
protected function setAlChars(): void
{
$this->numalchars = 0;
for ($idx = 0; $idx < $this->seq['length']; ++$idx) {
if (
($this->seq['item'][$idx]['otype'] == 'AL')
|| ($this->seq['item'][$idx]['char'] == UniConstant::SPACE)
|| ($this->seq['item'][$idx]['char'] == UniConstant::ZERO_WIDTH_NON_JOINER)
) {
$this->alchars[$this->numalchars]['i'] = $idx;
$this->alchars[$this->numalchars] = array_merge(
$this->alchars[$this->numalchars],
$this->seq['item'][$idx]
);
$this->seq['item'][$idx]['x'] = $this->numalchars;
++$this->numalchars;
}
}
}
/**
* Combine characters that can occur with Arabic Shadda (0651 HEX, 1617 DEC).
* Putting the combining mark and shadda in the same glyph allows
* to avoid the two marks overlapping each other in an illegible manner.
*/
protected function combineShadda(): void
{
$last = ($this->seq['length'] - 1);
for ($idx = 0; $idx < $last; ++$idx) {
$cur = $this->newchardata[$idx]['char'];
$nxt = $this->newchardata[($idx + 1)]['char'];
if (
($cur == UniArabic::SHADDA)
&& ($nxt >= 0) && (isset(UniArabic::DIACRITIC[$nxt]))
) {
$this->newchardata[$idx]['char'] = -1;
$this->newchardata[($idx + 1)]['char'] = UniArabic::DIACRITIC[$nxt];
}
}
}
/**
* Remove marked characters
*/
protected function removeDeletedChars(): void
{
foreach ($this->newchardata as $key => $value) {
if ($value['char'] < 0) {
unset($this->newchardata[$key]);
}
}
}
}

View File

@@ -0,0 +1,266 @@
<?php
/**
* Arabic.php
*
* @since 2011-05-23
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* This file is part of tc-lib-unicode software library.
*/
namespace Com\Tecnick\Unicode\Bidi\Shaping;
use Com\Tecnick\Unicode\Data\Arabic as UniArabic;
/**
* Com\Tecnick\Unicode\Bidi\Shaping\Arabic
*
* @since 2015-07-13
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* @phpstan-type CharData array{
* 'char': int,
* 'i': int,
* 'level': int,
* 'otype': string,
* 'pdimatch': int,
* 'pos': int,
* 'type': string,
* 'x': int,
* }
*
* @phpstan-type SeqData array{
* 'e': int,
* 'edir': string,
* 'end': int,
* 'eos': string,
* 'length': int,
* 'maxlevel': int,
* 'sos': string,
* 'start': int,
* 'item': array<int, CharData>,
* }
*/
abstract class Arabic
{
/**
* Sequence to process and return
*
* @var SeqData
*/
protected array $seq = [
'e' => 0,
'edir' => '',
'end' => 0,
'eos' => '',
'length' => 0,
'maxlevel' => 0,
'sos' => '',
'start' => 0,
'item' => [],
];
/**
* Array of processed chars
*
* @var array<int, CharData>
*/
protected array $newchardata = [];
/**
* Array of AL characters
*
* @var array<int, CharData>
*/
protected array $alchars = [];
/**
* Number of AL characters
*/
protected int $numalchars = 0;
/**
* Check if it is a LAA LETTER
*
* @param ?CharData $prevchar Previous char
* @param CharData $thischar Current char
*/
protected function isLaaLetter(?array $prevchar, array $thischar): bool
{
return ($prevchar !== null)
&& ($prevchar['char'] == UniArabic::LAM)
&& (isset(UniArabic::LAA[$thischar['char']]));
}
/**
* Check next char
*
* @param CharData $thischar Current char
* @param ?CharData $nextchar Next char
*/
protected function hasNextChar(array $thischar, ?array $nextchar): bool
{
return (($nextchar !== null)
&& (($nextchar['otype'] == 'AL') || ($nextchar['otype'] == 'NSM'))
&& ($nextchar['type'] == $thischar['type'])
&& ($nextchar['char'] != UniArabic::QUESTION_MARK)
);
}
/**
* Check previous char
*
* @param ?CharData $prevchar Previous char
* @param CharData $thischar Current char
*/
protected function hasPrevChar(?array $prevchar, array $thischar): bool
{
return ((($prevchar !== null)
&& (($prevchar['otype'] == 'AL') || ($prevchar['otype'] == 'NSM'))
&& ($prevchar['type'] == $thischar['type']))
);
}
/**
* Check if it is a middle character
*
* @param ?CharData $prevchar Previous char
* @param CharData $thischar Current char
* @param ?CharData $nextchar Next char
*/
protected function isMiddleChar(?array $prevchar, array $thischar, ?array $nextchar): bool
{
return ($this->hasPrevChar($prevchar, $thischar) && $this->hasNextChar($thischar, $nextchar));
}
/**
* Check if it is a final character
*
* @param ?CharData $prevchar Previous char
* @param CharData $thischar Current char
* @param ?CharData $nextchar Next char
*/
protected function isFinalChar(?array $prevchar, array $thischar, ?array $nextchar): bool
{
if ($this->hasPrevChar($prevchar, $thischar)) {
return true;
}
return (($nextchar !== null) && ($nextchar['char'] == UniArabic::QUESTION_MARK));
}
/**
* Set initial or middle char
*
* @param int $idx Current index
* @param ?CharData $prevchar Previous char
* @param CharData $thischar Current char
* @param array<int, array<int>> $arabicarr Substitution array
*/
protected function setMiddleChar(int $idx, ?array $prevchar, array $thischar, array $arabicarr): void
{
if (($prevchar != null) && in_array($prevchar['char'], UniArabic::END)) {
if (isset($arabicarr[$thischar['char']][2])) {
// initial
$this->newchardata[$idx]['char'] = $arabicarr[$thischar['char']][2];
}
} elseif (isset($arabicarr[$thischar['char']][3])) {
// medial
$this->newchardata[$idx]['char'] = $arabicarr[$thischar['char']][3];
}
}
/**
* Set initial char
*
* @param int $idx Current index
* @param CharData $thischar Current char
* @param array<int, array<int>> $arabicarr Substitution array
*/
protected function setInitialChar(int $idx, array $thischar, array $arabicarr): void
{
if (isset($arabicarr[$this->seq['item'][$idx]['char']][2])) {
$this->newchardata[$idx]['char'] = $arabicarr[$thischar['char']][2];
}
}
/**
* Set final char
*
* @param int $idx Current index
* @param ?CharData $prevchar Previous char
* @param CharData $thischar Current char
* @param array<int, array<int>> $arabicarr Substitution array
*/
protected function setFinalChar(int $idx, ?array $prevchar, array $thischar, array $arabicarr): void
{
if (
($idx > 1)
&& ($thischar['char'] == UniArabic::HEH)
&& ($this->seq['item'][($idx - 1)]['char'] == UniArabic::LAM)
&& ($this->seq['item'][($idx - 2)]['char'] == UniArabic::LAM)
) {
// Allah Word
$this->newchardata[($idx - 2)]['char'] = -1;
$this->newchardata[($idx - 1)]['char'] = -1;
$this->newchardata[$idx]['char'] = UniArabic::LIGATURE_ALLAH_ISOLATED_FORM;
} elseif (($prevchar !== null) && in_array($prevchar['char'], UniArabic::END)) {
if (isset($arabicarr[$thischar['char']][0])) {
// isolated
$this->newchardata[$idx]['char'] = $arabicarr[$thischar['char']][0];
}
} elseif (isset($arabicarr[$thischar['char']][1])) {
// final
$this->newchardata[$idx]['char'] = $arabicarr[$thischar['char']][1];
}
}
/**
* Process AL character
*
* @param int $idx Current index
* @param int $pos Current char position
* @param ?CharData $prevchar Previous char
* @param CharData $thischar Current char
* @param ?CharData $nextchar Next char
*/
protected function processAlChar(int $idx, int $pos, ?array $prevchar, array $thischar, ?array $nextchar): void
{
$laaletter = $this->isLaaLetter($prevchar, $thischar);
if ($laaletter) {
$arabicarr = UniArabic::LAA;
$prevchar = (($pos > 1) ? $this->alchars[($pos - 2)] : null);
} else {
$arabicarr = UniArabic::SUBSTITUTE;
}
if ($this->isMiddleChar($prevchar, $thischar, $nextchar)) {
$this->setMiddleChar($idx, $prevchar, $thischar, $arabicarr);
} elseif ($this->hasNextChar($thischar, $nextchar)) {
$this->setInitialChar($idx, $thischar, $arabicarr);
} elseif ($this->isFinalChar($prevchar, $thischar, $nextchar)) {
// final
$this->setFinalChar($idx, $prevchar, $thischar, $arabicarr);
} elseif (isset($arabicarr[$thischar['char']][0])) {
// isolated
$this->newchardata[$idx]['char'] = $arabicarr[$thischar['char']][0];
}
// if laa letter
if ($laaletter) {
// mark characters to delete
$this->newchardata[($this->alchars[($pos - 1)]['i'])]['char'] = -1;
}
}
}

View File

@@ -0,0 +1,78 @@
<?php
/**
* StepBase.php
*
* @since 2011-05-23
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* This file is part of tc-lib-unicode software library.
*/
namespace Com\Tecnick\Unicode\Bidi;
/**
* Com\Tecnick\Unicode\Bidi\StepBase
*
* @since 2015-07-13
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* @phpstan-import-type SeqData from \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
*/
abstract class StepBase
{
/**
* Initialize Sequence to process
*
* @param SeqData $seq Isolated Sequence array
* @param bool $process If false disable automatic processing (this is a testing flag)
*/
public function __construct(
/**
* Sequence to process and return
*/
protected array $seq,
$process = true
) {
if ($process) {
$this->process();
}
}
/**
* Returns the processed array
*
* @return SeqData
*/
public function getSequence(): array
{
return $this->seq;
}
/**
* Process the current step
*/
abstract protected function process(): void;
/**
* Generic step
*
* @param string $method Processing methos
*/
public function processStep($method): void
{
for ($idx = 0; $idx < $this->seq['length']; ++$idx) {
$this->$method($idx);
}
}
}

View File

@@ -0,0 +1,71 @@
<?php
/**
* StepI.php
*
* @since 2011-05-23
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* This file is part of tc-lib-unicode software library.
*/
namespace Com\Tecnick\Unicode\Bidi;
/**
* Com\Tecnick\Unicode\Bidi\StepI
*
* @since 2015-07-13
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*/
class StepI extends \Com\Tecnick\Unicode\Bidi\StepBase
{
/**
* Process I steps
*/
protected function process(): void
{
$this->seq['maxlevel'] = 0;
$this->processStep('processI');
}
/**
* I1. For all characters with an even (left-to-right) embedding level, those of type R go up one level and those
* of type AN or EN go up two levels.
* I2. For all characters with an odd (right-to-left) embedding level, those of type L, EN or AN go up one level.
*
* @param int $idx Current character position
*/
protected function processI(int $idx): void
{
$odd = ($this->seq['item'][$idx]['level'] % 2);
if ($odd !== 0) {
if (
($this->seq['item'][$idx]['type'] == 'L')
|| ($this->seq['item'][$idx]['type'] == 'EN')
|| ($this->seq['item'][$idx]['type'] == 'AN')
) {
++$this->seq['item'][$idx]['level'];
}
} elseif ($this->seq['item'][$idx]['type'] == 'R') {
++$this->seq['item'][$idx]['level'];
} elseif (
($this->seq['item'][$idx]['type'] == 'AN')
|| ($this->seq['item'][$idx]['type'] == 'EN')
) {
$this->seq['item'][$idx]['level'] += 2;
}
// update the maximum level
$this->seq['maxlevel'] = max($this->seq['maxlevel'], $this->seq['item'][$idx]['level']);
}
}

View File

@@ -0,0 +1,173 @@
<?php
/**
* StepL.php
*
* @since 2011-05-23
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* This file is part of tc-lib-unicode software library.
*/
namespace Com\Tecnick\Unicode\Bidi;
use Com\Tecnick\Unicode\Data\Constant as UniConstant;
use Com\Tecnick\Unicode\Data\Mirror as UniMirror;
/**
* Com\Tecnick\Unicode\Bidi\StepL
*
* @since 2015-07-13
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* @phpstan-import-type CharData from \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
*/
class StepL
{
/**
* Array of characters data to return
*
* @var array<int, CharData>
*/
protected array $chardata = [];
/**
* Number of characters in $this->chardata
*/
protected int $numchars;
/**
* L steps
*
* @param array<int, CharData> $chardata Array of characters data
* @param int $pel Paragraph embedding level
* @param int $maxlevel Maximum level
*/
public function __construct(
array $chardata,
/**
* Paragraph embedding level
*/
protected int $pel,
/**
* Maximum level
*/
protected int $maxlevel
) {
// reorder chars by their original position
usort(
$chardata,
static fn ($apos, $bpos): int => ($apos['pos'] - $bpos['pos'])
);
$this->chardata = $chardata;
$this->numchars = count($this->chardata);
$this->processL1();
$this->processL2();
}
/**
* Returns the processed array
*
* @return array<int, CharData>
*/
public function getChrData(): array
{
return $this->chardata;
}
/**
* L1. On each line, reset the embedding level of the following characters to the paragraph embedding level:
* 1. Segment separators,
* 2. Paragraph separators,
* 3. Any sequence of whitespace characters and/or isolate formatting characters (FSI, LRI, RLI, and PDI)
* preceding a segment separator or paragraph separator, and
* 4. Any sequence of whitespace characters and/or isolate formatting characters (FSI, LRI, RLI, and PDI)
* at the end of the line.
*/
protected function processL1(): void
{
for ($idx = 0; $idx < $this->numchars; ++$idx) {
$this->processL1b($idx, $idx);
}
}
/**
* Internal L1 step
*
* @param int $idx Main character index
* @param int $jdx Current index
*/
protected function processL1b(int $idx, int $jdx): void
{
if ($jdx >= ($this->numchars - 1)) {
return;
}
if (
(($this->chardata[$jdx]['otype'] == 'S') || ($this->chardata[$jdx]['otype'] == 'B'))
|| (($jdx === $this->numchars - 1) && ($this->chardata[$jdx]['otype'] == 'WS'))
) {
$this->chardata[$idx]['level'] = $this->pel;
return;
}
if ($this->chardata[$jdx]['otype'] == 'WS') {
return;
}
if ($this->chardata[$idx]['char'] >= UniConstant::LRI && $this->chardata[$idx]['char'] <= UniConstant::PDI) {
return;
}
$this->processL1b($idx, ($jdx + 1));
}
/**
* L2. From the highest level found in the text to the lowest odd level on each line,
* including intermediate levels not actually present in the text,
* reverse any contiguous sequence of characters that are at that level or higher.
* This rule reverses a progressively larger series of substrings.
*/
protected function processL2(): void
{
for ($level = $this->maxlevel; $level > 0; --$level) {
$ordered = [];
$reversed = [];
foreach ($this->chardata as $chardatum) {
if ($chardatum['level'] >= $level) {
if (($chardatum['type'] == 'R') && (isset(UniMirror::UNI[$chardatum['char']]))) {
// L4. A character is depicted by a mirrored glyph if and only if
// (a) the resolved directionality of that character is R, and
// (b) the Bidi_Mirrored property value of that character is true.
$chardatum['char'] = UniMirror::UNI[$chardatum['char']];
}
$reversed[] = $chardatum;
} else {
if ($reversed !== []) {
$ordered = array_merge($ordered, array_reverse($reversed));
$reversed = [];
}
$ordered[] = $chardatum;
}
}
if ($reversed !== []) {
$ordered = array_merge($ordered, array_reverse($reversed));
}
$this->chardata = $ordered;
}
}
}

View File

@@ -0,0 +1,312 @@
<?php
/**
* StepN.php
*
* @since 2011-05-23
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* This file is part of tc-lib-unicode software library.
*/
namespace Com\Tecnick\Unicode\Bidi;
use Com\Tecnick\Unicode\Data\Bracket as UniBracket;
/**
* Com\Tecnick\Unicode\Bidi\StepN
*
* @since 2015-07-13
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*/
class StepN extends \Com\Tecnick\Unicode\Bidi\StepBase
{
/**
* List or bracket pairs positions
*
* @var array<int, int>
*/
protected array $brackets = [];
/**
* Stack used to store bracket positions
*
* @var array<int, array{int, int}>
*/
protected array $bstack = [];
/**
* Process N steps
* Resolving Neutral and Isolate Formatting Types
*
* Neutral and isolate formatting (i.e. NI) characters are resolved one isolating run sequence at a time.
* Its results are that all NIs become either R or L. Generally, NIs take on the direction of the surrounding text.
* In case of a conflict, they take on the embedding direction.
* At isolating run sequence boundaries where the type of the character on the other side of the boundary
* is required, the type assigned to sos or eos is used.
*
* Bracket pairs within an isolating run sequence are processed as units so that both the opening and the closing
* paired bracket in a pair resolve to the same direction. Note that this rule is applied based on the current
* bidirectional character type of each paired bracket and not the original type, as this could have changed under
* X6. The current bidirectional character type may also have changed under a previous iteration of the for loop in
* N0 in the case of nested bracket pairs.
*/
protected function process(): void
{
$this->processStep('getBracketPairs');
$this->processN0();
$this->processStep('processN1');
$this->processStep('processN2');
}
/**
* BD16. Find all bracket pairs
*/
protected function getBracketPairs(int $idx): void
{
$char = $this->seq['item'][$idx]['char'];
if (isset(UniBracket::OPEN[$char])) {
// process open bracket
if ($char == 0x3008) {
$char = 0x2329;
}
$this->bstack[] = [$idx, $char];
} elseif (isset(UniBracket::CLOSE[$char])) {
// process closign bracket
if ($char == 0x3009) {
$char = 0x232A;
}
// find matching opening bracket
$tmpstack = $this->bstack;
while ($tmpstack !== []) {
$item = array_pop($tmpstack);
if ($char == UniBracket::OPEN[$item[1]]) {
$this->brackets[$item[0]] = $idx;
$this->bstack = $tmpstack;
}
}
}
// Sort the list of pairs of text positions in ascending order
// based on the text position of the opening paired bracket.
ksort($this->brackets);
}
/**
* Return the normalized chat type for the N0 step
* Within this scope, bidirectional types EN and AN are treated as R.
*
* @param string $type Char type
*/
protected function getN0Type(string $type): string
{
return ((($type == 'AN') || ($type == 'EN')) ? 'R' : $type);
}
/**
* N0. Process bracket pairs in an isolating run sequence sequentially in the logical order of the text positions
* of the opening paired brackets.
*/
protected function processN0(): void
{
$odir = (($this->seq['edir'] == 'L') ? 'R' : 'L');
// For each bracket-pair element in the list of pairs of text positions
foreach ($this->brackets as $open => $close) {
if ($this->processInsideBrackets($open, $close, $odir)) {
for ($jdx = ($open - 1); $jdx >= 0; --$jdx) {
$btype = $this->getN0Type($this->seq['item'][$jdx]['type']);
if ($btype == $odir) {
// 1. If the preceding strong type is also opposite the embedding direction,
// context is established, so set the type for both brackets in the pair to that direction.
$this->setBracketsType($open, $close, $odir);
break;
} elseif ($btype == $this->seq['edir']) {
// 2. Otherwise set the type for both brackets in the pair to the embedding direction.
$this->setBracketsType($open, $close, $this->seq['edir']);
break;
}
}
if ($jdx < 0) {
$this->setBracketsType($open, $close, $this->seq['sos']);
}
}
// d. Otherwise, there are no strong types within the bracket pair. Therefore, do not set the type for that
// bracket pair. Note that if the enclosed text contains no strong types the bracket pairs will both
// resolve to the same level when resolved individually using rules N1 and N2.
}
}
/**
* Inspect the bidirectional types of the characters enclosed within the bracket pair.
*
* @param int $open Open bracket entry
* @param int $close Close bracket entry
* @param string $odir Opposite direction (L or R)
*
* @return bool True if type has not been found
*/
protected function processInsideBrackets(int $open, int $close, string $odir): bool
{
$opposite = false;
// a. Inspect the bidirectional types of the characters enclosed within the bracket pair.
for ($jdx = ($open + 1); $jdx < $close; ++$jdx) {
$btype = $this->getN0Type($this->seq['item'][$jdx]['type']);
// b. If any strong type (either L or R) matching the embedding direction is found,
// set the type for both brackets in the pair to match the embedding direction.
if ($btype == $this->seq['edir']) {
$this->setBracketsType($open, $close, $this->seq['edir']);
break;
} elseif ($btype === $odir) {
// c. Otherwise, if there is a strong type it must be opposite the embedding direction.
$opposite = true;
}
}
// Therefore, test for an established context with a preceding strong type by checking backwards before
// the opening paired bracket until the first strong type (L, R, or sos) is found.
return (($jdx === $close) && $opposite);
}
/**
* Set the brackets type
*
* @param int $open Open bracket entry
* @param int $close Close bracket entry
* @param string $type Type
*/
protected function setBracketsType(int $open, int $close, string $type): void
{
$this->seq['item'][$open]['type'] = $type;
$this->seq['item'][$close]['type'] = $type;
// Any number of characters that had original bidirectional character type NSM
// prior to the application of W1 that immediately follow a paired bracket which
// changed to L or R under N0 should change to match the type of their preceding bracket.
$next = ($close + 1);
while (isset($this->seq['item'][$next]['otype']) && ($this->seq['item'][$next]['otype'] == 'NSM')) {
$this->seq['item'][$next]['type'] = $type;
++$next;
}
}
/**
* N1. A sequence of NIs takes the direction of the surrounding strong text if the text on both sides has the same
* direction. European and Arabic numbers act as if they were R in terms of their influence on NIs.
* The start-of-sequence (sos) and end-of-sequence (eos) types are used at isolating run sequence boundaries.
*
* @param int $idx Current character position
*/
protected function processN1(int $idx): void
{
if ($this->seq['item'][$idx]['type'] == 'NI') {
$bdx = ($idx - 1);
$prev = $this->processN1prev($bdx);
if ($prev === '') {
return;
}
$jdx = $this->getNextN1Char($idx);
$next = $this->processN1next($jdx);
if ($next === '') {
return;
}
if ($next === $prev) {
for ($bdx = $idx; (($bdx < $jdx) && ($bdx < $this->seq['length'])); ++$bdx) {
$this->seq['item'][$bdx]['type'] = $next;
}
}
}
}
/**
* Get the next direction
*
* @param int $bdx Position of the preceding character
*
* @return string Previous position
*/
protected function processN1prev(int &$bdx): string
{
if ($bdx < 0) {
$bdx = 0;
return $this->seq['sos'];
}
if (in_array($this->seq['item'][$bdx]['type'], ['R', 'AN', 'EN'])) {
return 'R';
}
if ($this->seq['item'][$bdx]['type'] == 'L') {
return 'L';
}
return '';
}
/**
* Get the next direction
*
* @param int $jdx Position of the next character
*
* @return string Previous position
*/
protected function processN1next(int &$jdx): string
{
if ($jdx >= $this->seq['length']) {
$jdx = $this->seq['length'];
return $this->seq['eos'];
}
if (in_array($this->seq['item'][$jdx]['type'], ['R', 'AN', 'EN'])) {
return 'R';
}
if ($this->seq['item'][$jdx]['type'] == 'L') {
return 'L';
}
return '';
}
/**
* Return the index of the next valid char for N1
*
* @param int $idx Start index
*/
protected function getNextN1Char(int $idx): int
{
$jdx = ($idx + 1);
while (($jdx < $this->seq['length']) && ($this->seq['item'][$jdx]['type'] == 'NI')) {
++$jdx;
}
return $jdx;
}
/**
* N2. Any remaining NIs take the embedding direction.
*
* @param int $idx Current character position
*/
protected function processN2($idx): void
{
if ($this->seq['item'][$idx]['type'] == 'NI') {
$this->seq['item'][$idx]['type'] = $this->seq['edir'];
}
}
}

View File

@@ -0,0 +1,89 @@
<?php
/**
* StepP.php
*
* @since 2011-05-23
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* This file is part of tc-lib-unicode software library.
*/
namespace Com\Tecnick\Unicode\Bidi;
use Com\Tecnick\Unicode\Data\Constant as UniConstant;
use Com\Tecnick\Unicode\Data\Type as UniType;
/**
* Com\Tecnick\Unicode\Bidi\StepP
*
* @since 2015-07-13
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*/
class StepP
{
/**
* P Steps for Bidirectional algorithm
*
* @param array<int> $ordarr Array of UTF-8 codepoints
*/
public function __construct(
/**
* Array of UTF-8 codepoints
*/
protected array $ordarr
) {
}
/**
* Get the Paragraph Embedding Level
*/
public function getPel(): int
{
// P2. In each paragraph, find the first character of type L, AL, or R
// while skipping over any characters between an isolate initiator and its matching PDI or,
// if it has no matching PDI, the end of the paragraph.
// P3. If a character is found in P2 and it is of type AL or R,
// then set the paragraph embedding level to one; otherwise, set it to zero.
$isolate = 0;
foreach ($this->ordarr as $ord) {
$isolate = $this->getIsolateLevel($ord, $isolate);
if (($isolate == 0) && isset(UniType::UNI[$ord])) {
$type = UniType::UNI[$ord];
if ($type === 'L') {
return 0;
}
if (($type === 'R') || ($type === 'AL')) {
return 1;
}
}
}
return 0;
}
/**
* Update the level of explicit directional isolates
*/
protected function getIsolateLevel(int $ord, int $isolate): int
{
if (($ord == UniConstant::LRI) || ($ord == UniConstant::RLI) || ($ord == UniConstant::FSI)) {
++$isolate;
} elseif ($ord == UniConstant::PDI) {
--$isolate;
}
return max(0, $isolate);
}
}

View File

@@ -0,0 +1,215 @@
<?php
/**
* StepW.php
*
* @since 2011-05-23
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* This file is part of tc-lib-unicode software library.
*/
namespace Com\Tecnick\Unicode\Bidi;
use Com\Tecnick\Unicode\Data\Constant as UniConstant;
/**
* Com\Tecnick\Unicode\Bidi\StepW
*
* @since 2015-07-13
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*/
class StepW extends \Com\Tecnick\Unicode\Bidi\StepBase
{
/**
* Process W steps
* Resolving Weak Types
*/
protected function process(): void
{
$this->processStep('processW1');
$this->processStep('processW2');
$this->processStep('processW3');
$this->processStep('processW4');
$this->processStep('processW5');
$this->processStep('processW6');
$this->processStep('processW7');
}
/**
* W1. Examine each nonspacing mark (NSM) in the isolating run sequence, and
* change the type of the NSM to Other Neutral if the previous character is an isolate initiator or PDI, and
* to the type of the previous character otherwise.
* If the NSM is at the start of the isolating run sequence, it will get the type of sos.
* (Note that in an isolating run sequence, an isolate initiator followed by an NSM or any type
* other than PDI must be an overflow isolate initiator.)
*
* @param int $idx Current character position
*/
protected function processW1(int $idx): void
{
if ($this->seq['item'][$idx]['type'] == 'NSM') {
$jdx = ($idx - 1);
if ($jdx < 0) {
$this->seq['item'][$idx]['type'] = $this->seq['sos'];
} elseif (
($this->seq['item'][$jdx]['char'] >= UniConstant::LRI)
&& ($this->seq['item'][$jdx]['char'] <= UniConstant::PDI)
) {
$this->seq['item'][$idx]['type'] = 'ON';
} else {
$this->seq['item'][$idx]['type'] = $this->seq['item'][$jdx]['type'];
}
}
}
/**
* W2. Search backward from each instance of a European number until the first strong type (R, L, AL, or sos)
* is found. If an AL is found, change the type of the European number to Arabic number.
*
* @param int $idx Current character position
*/
protected function processW2(int $idx): void
{
if ($this->seq['item'][$idx]['type'] == 'EN') {
$jdx = ($idx - 1);
while ($jdx >= 0) {
if ($this->seq['item'][$jdx]['type'] == 'AL') {
$this->seq['item'][$idx]['type'] = 'AN';
break;
} elseif (in_array($this->seq['item'][$jdx]['type'], ['R', 'L'])) {
break;
}
--$jdx;
}
}
}
/**
* W3. Change all ALs to R.
*
* @param int $idx Current character position
*/
protected function processW3(int $idx): void
{
if ($this->seq['item'][$idx]['type'] == 'AL') {
$this->seq['item'][$idx]['type'] = 'R';
}
}
/**
* W4. A single European separator between two European numbers changes to a European number.
* A single common separator between two numbers of the same type changes to that type.
*
* @param int $idx Current character position
*/
protected function processW4(int $idx): void
{
if (in_array($this->seq['item'][$idx]['type'], ['ES', 'CS'])) {
$bdx = ($idx - 1);
$fdx = ($idx + 1);
if (
($bdx >= 0)
&& ($fdx < $this->seq['length'])
&& $this->seq['item'][$bdx]['type'] == $this->seq['item'][$fdx]['type']
&& in_array($this->seq['item'][$bdx]['type'], ['EN', 'AN'])
) {
$this->seq['item'][$idx]['type'] = $this->seq['item'][$bdx]['type'];
}
}
}
/**
* W5. A sequence of European terminators adjacent to European numbers changes to all European numbers.
*
* @param int $idx Current character position
*/
protected function processW5(int $idx): void
{
if ($this->seq['item'][$idx]['type'] == 'ET') {
$this->processW5a($idx);
$this->processW5b($idx);
}
}
/**
* W5a
*
* @param int $idx Current character position
*/
protected function processW5a(int $idx): void
{
for ($jdx = ($idx - 1); $jdx >= 0; --$jdx) {
if ($this->seq['item'][$jdx]['type'] == 'EN') {
$this->seq['item'][$idx]['type'] = 'EN';
} else {
break;
}
}
}
/**
* W5b
*
* @param int $idx Current character position
*/
protected function processW5b(int $idx): void
{
if ($this->seq['item'][$idx]['type'] == 'ET') {
for ($jdx = ($idx + 1); $jdx < $this->seq['length']; ++$jdx) {
if ($this->seq['item'][$jdx]['type'] == 'EN') {
$this->seq['item'][$idx]['type'] = 'EN';
} elseif ($this->seq['item'][$jdx]['type'] != 'ET') {
break;
}
}
}
}
/**
* W6. Otherwise, separators and terminators change to Other Neutral.
*
* @param int $idx Current character position
*/
protected function processW6(int $idx): void
{
if (in_array($this->seq['item'][$idx]['type'], ['ET', 'ES', 'CS', 'ON'])) {
$this->seq['item'][$idx]['type'] = 'ON';
}
}
/**
* W7. Search backward from each instance of a European number until the first strong type (R, L, or sos) is found.
* If an L is found, then change the type of the European number to L.
*
* @param int $idx Current character position
*/
protected function processW7(int $idx): void
{
if ($this->seq['item'][$idx]['type'] == 'EN') {
for ($jdx = ($idx - 1); $jdx >= 0; --$jdx) {
if ($this->seq['item'][$jdx]['type'] == 'L') {
$this->seq['item'][$idx]['type'] = 'L';
break;
} elseif ($this->seq['item'][$jdx]['type'] == 'R') {
break;
}
}
if (($this->seq['sos'] == 'L') && ($jdx < 0)) {
$this->seq['item'][$idx]['type'] = 'L';
}
}
}
}

View File

@@ -0,0 +1,424 @@
<?php
/**
* StepX.php
*
* @since 2011-05-23
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* This file is part of tc-lib-unicode software library.
*/
namespace Com\Tecnick\Unicode\Bidi;
use Com\Tecnick\Unicode\Data\Constant as UniConstant;
use Com\Tecnick\Unicode\Data\Type as UniType;
/**
* Com\Tecnick\Unicode\Bidi\StepX
*
* @since 2015-07-13
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* @phpstan-import-type SeqData from \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
* @phpstan-import-type CharData from \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
*
* @phpstan-type DssData array{
* 'ord': int,
* 'cel': int,
* 'dos': string,
* 'dis': bool,
* }
*/
class StepX
{
/**
* Maximum embedding level
*/
public const MAX_DEPTH = 125;
/**
* Directional Status Stack
*
* @var array<int, DssData>
*/
protected array $dss = [];
/**
* Overflow Isolate Count
*/
protected int $oic = 0;
/**
* Overflow Embedding Count
*/
protected int $oec = 0;
/**
* Valid Isolate Count
*/
protected int $vic = 0;
/**
* Array of characters data to return
*
* @var array<int, CharData>
*/
protected array $chardata = [];
/**
* X Steps for Bidirectional algorithm
* Explicit Levels and Directions
*
* @param array<int> $ordarr Array of UTF-8 codepoints
* @param int $pel Paragraph embedding level
*/
public function __construct(
/**
* Array of UTF-8 codepoints
*/
protected array $ordarr,
int $pel
) {
// - Push onto the stack an entry consisting of the paragraph embedding level,
// a neutral directional override status, and a false directional isolate status.
$this->dss[] = [
'ord' => -1, // dummy value, not used
'cel' => $pel,
'dos' => 'NI',
'dis' => false,
];
// - Process each character iteratively, applying rules X2 through X8.
// Only embedding levels from 0 through max_depth are valid in this phase.
// (Note that in the resolution of levels in rules I1 and I2,
// the maximum embedding level of max_depth+1 can be reached.)
$this->processX();
}
/**
* Returns the processed array
*
* @return array<int, CharData>
*/
public function getChrData(): array
{
return $this->chardata;
}
/**
* Calculate the Least Even
*
* @param int $num Number to process
*/
protected function getLEven(int $num): int
{
return (2 + $num - ($num % 2));
}
/**
* Calculate the Least Odd
*
* @param int $num Number to process
*/
protected function getLOdd(int $num): int
{
return (1 + $num + ($num % 2));
}
/**
* Process X1
*/
protected function processX(): void
{
foreach ($this->ordarr as $key => $ord) {
$this->processXcase($key, $ord);
}
}
/**
* Process X1 case
*
* @param int $pos Original character position in the input string
* @param int $ord Char code
*
* @SuppressWarnings("PHPMD.CyclomaticComplexity")
*/
protected function processXcase(int $pos, int $ord): void
{
$edss = end($this->dss);
if ($edss === false) {
return;
}
switch ($ord) {
case UniConstant::RLE:
// X2
$this->setDss($this->getLOdd($edss['cel']), UniConstant::RLE, 'NI');
break;
case UniConstant::LRE:
// X3
$this->setDss($this->getLEven($edss['cel']), UniConstant::LRE, 'NI');
break;
case UniConstant::RLO:
// X4
$this->setDss($this->getLOdd($edss['cel']), UniConstant::RLO, 'R');
break;
case UniConstant::LRO:
// X5
$this->setDss($this->getLEven($edss['cel']), UniConstant::LRO, 'L');
break;
case UniConstant::RLI:
// X5a
$this->processChar($pos, $ord, $edss);
$this->setDss($this->getLOdd($edss['cel']), UniConstant::RLI, 'NI', true, true, 1);
break;
case UniConstant::LRI:
// X5b
$this->processChar($pos, $ord, $edss);
$this->setDss($this->getLEven($edss['cel']), UniConstant::LRI, 'NI', true, true, 1);
break;
case UniConstant::FSI:
// X5c
$this->processChar($pos, $ord, $edss);
$this->processFsiCase($pos, $edss);
break;
case UniConstant::PDI:
// X6a
$this->processPdiCase($pos, $ord, $edss);
break;
case UniConstant::PDF:
// X7
$this->processPdfCase($edss);
break;
default:
// X6
$this->processChar($pos, $ord, $edss);
break;
}
}
/**
* Set temporary data (X2 to X5)
*
* @param int $cel Embedding Level
* @param int $ord Char code
* @param string $dos Directional override status
* @param bool $dis Directional isolate status
* @param bool $isolate True if Isolate initiator
* @param int $ivic increment for the valid isolate count
*/
protected function setDss(
int $cel,
int $ord,
string $dos,
bool $dis = false,
bool $isolate = false,
int $ivic = 0
): void {
// X2 to X5
// - Compute the least odd|even embedding level greater than the embedding level of the last entry
// on the directional status stack.
// - If this new level would be valid, and the overflow isolate count and overflow embedding
// count are both zero, then this RLE is valid. Push an entry consisting of the new embedding
// level, neutral|left|right directional override status, and false directional isolate status onto the
// directional status stack.
// - Otherwise, this is an overflow RLE. If the overflow isolate count is zero, increment the
// overflow embedding|isolate count by one. Leave all other variables unchanged.
if (($cel >= self::MAX_DEPTH) || ($this->oic != 0) || ($this->oec != 0)) {
if ($isolate) {
++$this->oic;
} elseif ($this->oic == 0) {
++$this->oec;
}
return;
}
$this->vic += $ivic;
$this->dss[] = [
'ord' => $ord,
'cel' => $cel,
'dos' => $dos,
'dis' => $dis,
];
}
/**
* Push a char on the stack
*
* @param int $pos Original character position in the input string
* @param int $ord Char code
* @param DssData $edss Last entry in the Directional Status Stack
*/
protected function pushChar(int $pos, int $ord, array $edss): void
{
$unitype = (UniType::UNI[$ord] ?? $edss['dos']);
$this->chardata[] = [
'char' => $ord,
'i' => -1,
'level' => $edss['cel'],
'otype' => $unitype,
'pdimatch' => -1,
'pos' => $pos,
'type' => (($edss['dos'] !== 'NI') ? $edss['dos'] : $unitype),
'x' => -1,
];
}
/**
* Process normal char (X6)
*
* @param int $pos Original character position in the input string
* @param int $ord Char code
* @param DssData $edss Last entry in the Directional Status Stack
*/
protected function processChar(int $pos, int $ord, array $edss): void
{
// X6. For all types besides B, BN, RLE, LRE, RLO, LRO, PDF, RLI, LRI, FSI, and PDI:
// - Set the current characters embedding level to the embedding level
// of the last entry on the directional status stack.
// - Whenever the directional override status of the last entry on the directional status stack
// is not neutral, reset the current character type according to the directional override
// status of the last entry on the directional status stack.
if (isset(UniType::UNI[$ord]) && ((UniType::UNI[$ord] == 'B') || (UniType::UNI[$ord] == 'BN'))) {
return;
}
$this->pushChar($pos, $ord, $edss);
}
/**
* Process the PDF type character
*
* @param DssData $edss Last entry in the Directional Status Stack
*/
protected function processPdfCase(array $edss): void
{
// X7. With each PDF, perform the following steps:
// - If the overflow isolate count is greater than zero, do nothing. (This PDF is within the
// scope of an overflow isolate initiator. It either matches and terminates the scope of an
// overflow embedding initiator within that overflow isolate, or does not match any
// embedding initiator.)
if ($this->oic > 0) {
return;
}
// - Otherwise, if the overflow embedding count is greater than zero, decrement it by one.
// (This PDF matches and terminates the scope of an overflow embedding initiator that is not
// within the scope of an overflow isolate initiator.)
if ($this->oec > 0) {
--$this->oec;
return;
}
// - Otherwise, if the directional isolate status of the last entry on the directional status
// stack is false, and the directional status stack contains at least two entries, pop the
// last entry from the directional status stack. (This PDF matches and terminates the scope
// of a valid embedding initiator. Since the stack has at least two entries, this pop does
// not leave the stack empty.)
if (($edss['dis'] === false) && (count($this->dss) > 1)) {
array_pop($this->dss);
}
// - Otherwise, do nothing. (This PDF does not match any embedding initiator.)
}
/**
* Process the PDI type character
*
* @param int $pos Original character position in the input string
* @param int $ord Char code
* @param DssData $edss Last entry in the Directional Status Stack
*/
protected function processPdiCase(int $pos, int $ord, array $edss): void
{
// X6a. With each PDI, perform the following steps:
// - If the overflow isolate count is greater than zero, this PDI matches an overflow isolate
// initiator. Decrement the overflow isolate count by one.
if ($this->oic > 0) {
--$this->oic;
return;
}
// - Otherwise, if the valid isolate count is zero, this PDI does not match any isolate
// initiator, valid or overflow. Do nothing.
if ($this->vic == 0) {
return;
}
// - Otherwise, this PDI matches a valid isolate initiator. Perform the following steps:
// - Reset the overflow embedding count to zero. (This terminates the scope of those overflow
// embedding initiators within the scope of the matched isolate initiator whose scopes have
// not been terminated by a matching PDF, and which thus lack a matching PDF.)
$this->oec = 0;
// - While the directional isolate status of the last entry on the stack is false, pop the
// last entry from the directional status stack. (This terminates the scope of those valid
// embedding initiators within the scope of the matched isolate initiator whose scopes have
// not been terminated by a matching PDF, and which thus lack a matching PDF. Given that the
// valid isolate count is non-zero, the directional status stack before this step is
// executed must contain an entry with directional isolate status true, and thus after this
// step is executed the last entry on the stack will indeed have a true directional isolate
// status, i.e. represent the scope of the matched isolate initiator. This cannot be the
// stack's first entry, which always belongs to the paragraph level and has a false
// directional status, so there is at least one more entry below it on the stack.)
$count_dss = count($this->dss);
while (($edss['dis'] === false) && ($count_dss > 1)) {
array_pop($this->dss);
--$count_dss;
$edss = end($this->dss);
if ($edss === false) {
break;
}
}
// - Pop the last entry from the directional status stack and decrement the valid isolate
// count by one. (This terminates the scope of the matched isolate initiator. Since the
// preceding step left the stack with at least two entries, this pop does not leave the
// stack empty.)
array_pop($this->dss);
--$this->vic;
$edss = end($this->dss);
if ($edss === false) {
return;
}
// - In all cases, look up the last entry on the directional status stack left after the
// steps above and:
// - Set the PDIs level to the entry's embedding level.
// - If the entry's directional override status is not neutral, reset the current character type
// from PDI to L if the override status is left-to-right, and to R if the override status is
// right-to-left.
$this->pushChar($pos, $ord, $edss);
}
/**
* Process the PDF type character
*
* @param int $pos Original character position in the input string
* @param DssData $edss Last entry in the Directional Status Stack
*/
protected function processFsiCase(int $pos, array $edss): void
{
// X5c. With each FSI, apply rules P2 and P3 to the sequence of characters between the FSI and its
// matching PDI, or if there is no matching PDI, the end of the paragraph, as if this sequence
// of characters were a paragraph. If these rules decide on paragraph embedding level 1, treat
// the FSI as an RLI in rule X5a. Otherwise, treat it as an LRI in rule X5b.
$stepp = new StepP(array_slice($this->ordarr, $pos));
if ($stepp->getPel() == 0) {
$this->setDss($this->getLEven($edss['cel']), UniConstant::LRI, 'NI', true, true, 1);
} else {
$this->setDss($this->getLOdd($edss['cel']), UniConstant::RLI, 'NI', true, true, 1);
}
}
}

View File

@@ -0,0 +1,236 @@
<?php
/**
* StepXten.php
*
* @since 2011-05-23
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* This file is part of tc-lib-unicode software library.
*/
namespace Com\Tecnick\Unicode\Bidi;
use Com\Tecnick\Unicode\Data\Constant as UniConstant;
/**
* Com\Tecnick\Unicode\Bidi\StepXten
*
* @since 2015-07-13
* @category Library
* @package Unicode
* @author Nicola Asuni <info@tecnick.com>
* @copyright 2011-2024 Nicola Asuni - Tecnick.com LTD
* @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
* @link https://github.com/tecnickcom/tc-lib-unicode
*
* @phpstan-import-type SeqData from \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
* @phpstan-import-type CharData from \Com\Tecnick\Unicode\Bidi\Shaping\Arabic
*/
class StepXten
{
/**
* Number of characters
*/
protected int $numchars;
/**
* Array of Level Run sequences
*
* @var array<int, array{'start': int, 'end': int, 'e': int}>
*/
protected array $runseq = [];
/**
* Number of Level Run sequences
*/
protected int $numrunseq = 0;
/**
* Array of Isolated Level Run sequences
*
* @var array<int, SeqData>
*/
protected array $ilrs = [];
/**
* X Steps for Bidirectional algorithm
*
* @param array<int, CharData> $chardata Array of UTF-8 codepoints
* @param int $pel Paragraph Embedding Level
*/
public function __construct(
/**
* Array of characters data to return
*/
protected array $chardata,
/**
* Paragraph Embedding Level
*/
protected int $pel
) {
$this->numchars = count($chardata);
$this->setIsolatedLevelRunSequences();
}
/**
* Get the Isolated Run Sequences
*
* @return array<int, SeqData>
*/
public function getIsolatedLevelRunSequences(): array
{
return $this->ilrs;
}
/**
* Get the embedded direction (L or R)
*/
protected function getEmbeddedDirection(int $level): string
{
return ((($level % 2) == 0) ? 'L' : 'R');
}
protected function setLevelRunSequences(): void
{
$start = 0;
while ($start < $this->numchars) {
$end = ($start + 1);
while (($end < $this->numchars) && ($this->chardata[$end]['level'] == $this->chardata[$start]['level'])) {
++$end;
}
--$end;
$this->runseq[] = [
'start' => $start,
'end' => $end,
'e' => $this->chardata[$start]['level'],
];
++$this->numrunseq;
$start = ($end + 1);
}
}
/**
* returns true if the input char is an Isolate Initiator
*/
protected function isIsolateInitiator(int $ord): bool
{
return (($ord == UniConstant::RLI) || ($ord == UniConstant::LRI) || ($ord == UniConstant::FSI));
}
/**
* Set level Isolated Level Run Sequences
*/
protected function setIsolatedLevelRunSequences(): void
{
$this->setLevelRunSequences();
$numiso = 0;
foreach ($this->runseq as $idx => $seq) {
// Create a new level run sequence, and initialize it to contain just that level run
$isorun = [
'e' => $seq['e'],
'edir' => $this->getEmbeddedDirection($seq['e']), // embedded direction
'start' => $seq['start'], // position of the first char
'end' => $seq['end'], // position of the last char
'length' => ($seq['end'] - $seq['start'] + 1),
'sos' => '', // start-of-sequence
'eos' => '', // end-of-sequence
'maxlevel' => 0,
'item' => [],
];
for ($jdx = 0; $jdx < $isorun['length']; ++$jdx) {
$isorun['item'][$jdx] = $this->chardata[($seq['start'] + $jdx)];
}
$endchar = $isorun['item'][($jdx - 1)]['char'];
// While the level run currently last in the sequence ends with an isolate initiator that has a
// matching PDI, append the level run containing the matching PDI to the sequence.
// (Note that this matching PDI must be the first character of its level run.)
$pdimatch = -1;
if ($this->isIsolateInitiator($endchar)) {
// find the next sequence with the same level that starts with a PDI
for ($kdx = ($idx + 1); $kdx < $this->numrunseq; ++$kdx) {
if (
($this->runseq[$kdx]['e'] == $isorun['e'])
&& ($this->chardata[$this->runseq[$kdx]['start']]['char'] == UniConstant::PDI)
) {
$pdimatch = $this->runseq[$kdx]['start'];
$this->chardata[$pdimatch]['pdimatch'] = $numiso;
break;
}
}
}
// For each level run in the paragraph whose first character is not a PDI,
// or is a PDI that does not match any isolate initiator
if ($this->chardata[$seq['start']]['pdimatch'] >= 0) {
$parent = $this->chardata[$seq['start']]['pdimatch'];
$this->ilrs[$parent]['item'] = array_merge(
$this->ilrs[$parent]['item'],
$isorun['item']
);
$this->ilrs[$parent]['length'] += $isorun['length'];
$this->ilrs[$parent]['end'] += $isorun['end'];
if ($pdimatch >= 0) {
$this->chardata[$pdimatch]['pdimatch'] = $parent;
}
} else {
$this->ilrs[$numiso] = $isorun;
++$numiso;
}
}
$this->setStartEndOfSequence();
}
/**
* Determine the start-of-sequence (sos) and end-of-sequence (eos) types, either L or R,
* for each isolating run sequence.
*/
protected function setStartEndOfSequence(): void
{
foreach ($this->ilrs as $key => $seq) {
// For sos, compare the level of the first character in the sequence with the level of the character
// preceding it in the paragraph (not counting characters removed by X9), and if there is none,
// with the paragraph embedding level.
$lev = $seq['item'][0]['level'];
if ($seq['start'] == 0) {
$prev = $this->pel;
} else {
$lastchr = $this->chardata[($seq['start'] - 1)];
$prev = $lastchr['level'];
}
$this->ilrs[$key]['sos'] = $this->getEmbeddedDirection(max($prev, $lev));
// For eos, compare the level of the last character in the sequence with the level of the character
// following it in the paragraph (not counting characters removed by X9), and if there is none or the
// last character of the sequence is an isolate initiator (lacking a matching PDI), with the paragraph
// embedding level.
$lastchr = end($seq['item']);
if ($lastchr === false) {
return;
}
$lev = $lastchr['level'];
if ((! isset($this->chardata[($seq['end'] + 1)]['level'])) || $this->isIsolateInitiator($lastchr['char'])) {
$next = $this->pel;
} else {
$next = $this->chardata[($seq['end'] + 1)]['level'];
}
$this->ilrs[$key]['eos'] = $this->getEmbeddedDirection(max($next, $lev));
// If the higher level is odd, the sos or eos is R; otherwise, it is L.
}
}
}