Completed
Push — master ( 90d412...9ff52c )
by Mark
27s queued 14s
created

Text::slug()   B

Complexity

Conditions 6
Paths 16

Size

Total Lines 31

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
nc 16
nop 2
dl 0
loc 31
rs 8.8017
c 0
b 0
f 0
1
<?php
2
/**
3
 * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
4
 * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
5
 *
6
 * Licensed under The MIT License
7
 * For full copyright and license information, please see the LICENSE.txt
8
 * Redistributions of files must retain the above copyright notice.
9
 *
10
 * @copyright     Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
11
 * @link          https://cakephp.org CakePHP(tm) Project
12
 * @since         1.2.0
13
 * @license       https://opensource.org/licenses/mit-license.php MIT License
14
 */
15
namespace Cake\Utility;
16
17
use InvalidArgumentException;
18
19
/**
20
 * Text handling methods.
21
 */
22
class Text
23
{
24
    /**
25
     * Default transliterator.
26
     *
27
     * @var \Transliterator Transliterator instance.
28
     */
29
    protected static $_defaultTransliterator;
30
31
    /**
32
     * Default transliterator id string.
33
     *
34
     * @var string $_defaultTransliteratorId Transliterator identifier string.
35
     */
36
    protected static $_defaultTransliteratorId = 'Any-Latin; Latin-ASCII; [\u0080-\u7fff] remove';
37
38
    /**
39
     * Default html tags who must not be count for truncate text.
40
     *
41
     * @var array
42
     */
43
    protected static $_defaultHtmlNoCount = [
44
        'style',
45
        'script',
46
    ];
47
48
    /**
49
     * Generate a random UUID version 4
50
     *
51
     * Warning: This method should not be used as a random seed for any cryptographic operations.
52
     * Instead you should use the openssl or mcrypt extensions.
53
     *
54
     * It should also not be used to create identifiers that have security implications, such as
55
     * 'unguessable' URL identifiers. Instead you should use `Security::randomBytes()` for that.
56
     *
57
     * @see https://www.ietf.org/rfc/rfc4122.txt
58
     * @return string RFC 4122 UUID
59
     * @copyright Matt Farina MIT License https://github.com/lootils/uuid/blob/master/LICENSE
60
     */
61
    public static function uuid()
62
    {
63
        $random = function_exists('random_int') ? 'random_int' : 'mt_rand';
64
65
        return sprintf(
66
            '%04x%04x-%04x-%04x-%04x-%04x%04x%04x',
67
            // 32 bits for "time_low"
68
            $random(0, 65535),
69
            $random(0, 65535),
70
            // 16 bits for "time_mid"
71
            $random(0, 65535),
72
            // 12 bits before the 0100 of (version) 4 for "time_hi_and_version"
73
            $random(0, 4095) | 0x4000,
74
            // 16 bits, 8 bits for "clk_seq_hi_res",
75
            // 8 bits for "clk_seq_low",
76
            // two most significant bits holds zero and one for variant DCE1.1
77
            $random(0, 0x3fff) | 0x8000,
78
            // 48 bits for "node"
79
            $random(0, 65535),
80
            $random(0, 65535),
81
            $random(0, 65535)
82
        );
83
    }
84
85
    /**
86
     * Tokenizes a string using $separator, ignoring any instance of $separator that appears between
87
     * $leftBound and $rightBound.
88
     *
89
     * @param string $data The data to tokenize.
90
     * @param string $separator The token to split the data on.
91
     * @param string $leftBound The left boundary to ignore separators in.
92
     * @param string $rightBound The right boundary to ignore separators in.
93
     * @return string|string[] Array of tokens in $data or original input if empty.
94
     */
95
    public static function tokenize($data, $separator = ',', $leftBound = '(', $rightBound = ')')
96
    {
97
        if (empty($data)) {
98
            return [];
99
        }
100
101
        $depth = 0;
102
        $offset = 0;
103
        $buffer = '';
104
        $results = [];
105
        $length = mb_strlen($data);
106
        $open = false;
107
108
        while ($offset <= $length) {
109
            $tmpOffset = -1;
110
            $offsets = [
111
                mb_strpos($data, $separator, $offset),
112
                mb_strpos($data, $leftBound, $offset),
113
                mb_strpos($data, $rightBound, $offset),
114
            ];
115
            for ($i = 0; $i < 3; $i++) {
116
                if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) {
117
                    $tmpOffset = $offsets[$i];
118
                }
119
            }
120
            if ($tmpOffset !== -1) {
121
                $buffer .= mb_substr($data, $offset, $tmpOffset - $offset);
122
                $char = mb_substr($data, $tmpOffset, 1);
123
                if (!$depth && $char === $separator) {
124
                    $results[] = $buffer;
125
                    $buffer = '';
126
                } else {
127
                    $buffer .= $char;
128
                }
129
                if ($leftBound !== $rightBound) {
130
                    if ($char === $leftBound) {
131
                        $depth++;
132
                    }
133
                    if ($char === $rightBound) {
134
                        $depth--;
135
                    }
136
                } else {
137
                    if ($char === $leftBound) {
138
                        if (!$open) {
139
                            $depth++;
140
                            $open = true;
141
                        } else {
142
                            $depth--;
143
                            $open = false;
144
                        }
145
                    }
146
                }
147
                $tmpOffset += 1;
148
                $offset = $tmpOffset;
149
            } else {
150
                $results[] = $buffer . mb_substr($data, $offset);
151
                $offset = $length + 1;
152
            }
153
        }
154
        if (empty($results) && !empty($buffer)) {
155
            $results[] = $buffer;
156
        }
157
158
        if (!empty($results)) {
159
            return array_map('trim', $results);
160
        }
161
162
        return [];
163
    }
164
165
    /**
166
     * Replaces variable placeholders inside a $str with any given $data. Each key in the $data array
167
     * corresponds to a variable placeholder name in $str.
168
     * Example:
169
     * ```
170
     * Text::insert(':name is :age years old.', ['name' => 'Bob', 'age' => '65']);
171
     * ```
172
     * Returns: Bob is 65 years old.
173
     *
174
     * Available $options are:
175
     *
176
     * - before: The character or string in front of the name of the variable placeholder (Defaults to `:`)
177
     * - after: The character or string after the name of the variable placeholder (Defaults to null)
178
     * - escape: The character or string used to escape the before character / string (Defaults to `\`)
179
     * - format: A regex to use for matching variable placeholders. Default is: `/(?<!\\)\:%s/`
180
     *   (Overwrites before, after, breaks escape / clean)
181
     * - clean: A boolean or array with instructions for Text::cleanInsert
182
     *
183
     * @param string $str A string containing variable placeholders
184
     * @param array $data A key => val array where each key stands for a placeholder variable name
185
     *     to be replaced with val
186
     * @param array $options An array of options, see description above
187
     * @return string
188
     */
189
    public static function insert($str, $data, array $options = [])
190
    {
191
        $defaults = [
192
            'before' => ':', 'after' => null, 'escape' => '\\', 'format' => null, 'clean' => false,
193
        ];
194
        $options += $defaults;
195
        $format = $options['format'];
196
        $data = (array)$data;
197
        if (empty($data)) {
198
            return $options['clean'] ? static::cleanInsert($str, $options) : $str;
199
        }
200
201
        if (!isset($format)) {
202
            $format = sprintf(
203
                '/(?<!%s)%s%%s%s/',
204
                preg_quote($options['escape'], '/'),
205
                str_replace('%', '%%', preg_quote($options['before'], '/')),
206
                str_replace('%', '%%', preg_quote($options['after'], '/'))
207
            );
208
        }
209
210
        if (strpos($str, '?') !== false && is_numeric(key($data))) {
211
            $offset = 0;
212
            while (($pos = strpos($str, '?', $offset)) !== false) {
213
                $val = array_shift($data);
214
                $offset = $pos + strlen($val);
215
                $str = substr_replace($str, $val, $pos, 1);
216
            }
217
218
            return $options['clean'] ? static::cleanInsert($str, $options) : $str;
219
        }
220
221
        $dataKeys = array_keys($data);
222
        $hashKeys = array_map('crc32', $dataKeys);
223
        $tempData = array_combine($dataKeys, $hashKeys);
224
        krsort($tempData);
225
226
        foreach ($tempData as $key => $hashVal) {
227
            $key = sprintf($format, preg_quote($key, '/'));
228
            $str = preg_replace($key, $hashVal, $str);
229
        }
230
        $dataReplacements = array_combine($hashKeys, array_values($data));
231
        foreach ($dataReplacements as $tmpHash => $tmpValue) {
232
            $tmpValue = is_array($tmpValue) ? '' : $tmpValue;
233
            $str = str_replace($tmpHash, $tmpValue, $str);
234
        }
235
236 View Code Duplication
        if (!isset($options['format']) && isset($options['before'])) {
237
            $str = str_replace($options['escape'] . $options['before'], $options['before'], $str);
238
        }
239
240
        return $options['clean'] ? static::cleanInsert($str, $options) : $str;
241
    }
242
243
    /**
244
     * Cleans up a Text::insert() formatted string with given $options depending on the 'clean' key in
245
     * $options. The default method used is text but html is also available. The goal of this function
246
     * is to replace all whitespace and unneeded markup around placeholders that did not get replaced
247
     * by Text::insert().
248
     *
249
     * @param string $str String to clean.
250
     * @param array $options Options list.
251
     * @return string
252
     * @see \Cake\Utility\Text::insert()
253
     */
254
    public static function cleanInsert($str, array $options)
255
    {
256
        $clean = $options['clean'];
257
        if (!$clean) {
258
            return $str;
259
        }
260
        if ($clean === true) {
261
            $clean = ['method' => 'text'];
262
        }
263
        if (!is_array($clean)) {
264
            $clean = ['method' => $options['clean']];
265
        }
266
        switch ($clean['method']) {
267
            case 'html':
268
                $clean += [
269
                    'word' => '[\w,.]+',
270
                    'andText' => true,
271
                    'replacement' => '',
272
                ];
273
                $kleenex = sprintf(
274
                    '/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i',
275
                    preg_quote($options['before'], '/'),
276
                    $clean['word'],
277
                    preg_quote($options['after'], '/')
278
                );
279
                $str = preg_replace($kleenex, $clean['replacement'], $str);
280
                if ($clean['andText']) {
281
                    $options['clean'] = ['method' => 'text'];
282
                    $str = static::cleanInsert($str, $options);
283
                }
284
                break;
285
            case 'text':
286
                $clean += [
287
                    'word' => '[\w,.]+',
288
                    'gap' => '[\s]*(?:(?:and|or)[\s]*)?',
289
                    'replacement' => '',
290
                ];
291
292
                $kleenex = sprintf(
293
                    '/(%s%s%s%s|%s%s%s%s)/',
294
                    preg_quote($options['before'], '/'),
295
                    $clean['word'],
296
                    preg_quote($options['after'], '/'),
297
                    $clean['gap'],
298
                    $clean['gap'],
299
                    preg_quote($options['before'], '/'),
300
                    $clean['word'],
301
                    preg_quote($options['after'], '/')
302
                );
303
                $str = preg_replace($kleenex, $clean['replacement'], $str);
304
                break;
305
        }
306
307
        return $str;
308
    }
309
310
    /**
311
     * Wraps text to a specific width, can optionally wrap at word breaks.
312
     *
313
     * ### Options
314
     *
315
     * - `width` The width to wrap to. Defaults to 72.
316
     * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
317
     * - `indent` String to indent with. Defaults to null.
318
     * - `indentAt` 0 based index to start indenting at. Defaults to 0.
319
     *
320
     * @param string $text The text to format.
321
     * @param array|int $options Array of options to use, or an integer to wrap the text to.
322
     * @return string Formatted text.
323
     */
324
    public static function wrap($text, $options = [])
325
    {
326
        if (is_numeric($options)) {
327
            $options = ['width' => $options];
328
        }
329
        $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
330
        if ($options['wordWrap']) {
331
            $wrapped = self::wordWrap($text, $options['width'], "\n");
332
        } else {
333
            $wrapped = trim(chunk_split($text, $options['width'] - 1, "\n"));
334
        }
335
        if (!empty($options['indent'])) {
336
            $chunks = explode("\n", $wrapped);
337 View Code Duplication
            for ($i = $options['indentAt'], $len = count($chunks); $i < $len; $i++) {
338
                $chunks[$i] = $options['indent'] . $chunks[$i];
339
            }
340
            $wrapped = implode("\n", $chunks);
341
        }
342
343
        return $wrapped;
344
    }
345
346
    /**
347
     * Wraps a complete block of text to a specific width, can optionally wrap
348
     * at word breaks.
349
     *
350
     * ### Options
351
     *
352
     * - `width` The width to wrap to. Defaults to 72.
353
     * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
354
     * - `indent` String to indent with. Defaults to null.
355
     * - `indentAt` 0 based index to start indenting at. Defaults to 0.
356
     *
357
     * @param string $text The text to format.
358
     * @param array|int $options Array of options to use, or an integer to wrap the text to.
359
     * @return string Formatted text.
360
     */
361
    public static function wrapBlock($text, $options = [])
362
    {
363
        if (is_numeric($options)) {
364
            $options = ['width' => $options];
365
        }
366
        $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
367
368
        if (!empty($options['indentAt']) && $options['indentAt'] === 0) {
369
            $indentLength = !empty($options['indent']) ? strlen($options['indent']) : 0;
370
            $options['width'] -= $indentLength;
371
372
            return self::wrap($text, $options);
373
        }
374
375
        $wrapped = self::wrap($text, $options);
376
377
        if (!empty($options['indent'])) {
378
            $indentationLength = mb_strlen($options['indent']);
379
            $chunks = explode("\n", $wrapped);
380
            $count = count($chunks);
381
            if ($count < 2) {
382
                return $wrapped;
383
            }
384
            $toRewrap = '';
385 View Code Duplication
            for ($i = $options['indentAt']; $i < $count; $i++) {
386
                $toRewrap .= mb_substr($chunks[$i], $indentationLength) . ' ';
387
                unset($chunks[$i]);
388
            }
389
            $options['width'] -= $indentationLength;
390
            $options['indentAt'] = 0;
391
            $rewrapped = self::wrap($toRewrap, $options);
392
            $newChunks = explode("\n", $rewrapped);
393
394
            $chunks = array_merge($chunks, $newChunks);
395
            $wrapped = implode("\n", $chunks);
396
        }
397
398
        return $wrapped;
399
    }
400
401
    /**
402
     * Unicode and newline aware version of wordwrap.
403
     *
404
     * @param string $text The text to format.
405
     * @param int $width The width to wrap to. Defaults to 72.
406
     * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
407
     * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
408
     * @return string Formatted text.
409
     */
410
    public static function wordWrap($text, $width = 72, $break = "\n", $cut = false)
411
    {
412
        $paragraphs = explode($break, $text);
413
        foreach ($paragraphs as &$paragraph) {
414
            $paragraph = static::_wordWrap($paragraph, $width, $break, $cut);
415
        }
416
417
        return implode($break, $paragraphs);
418
    }
419
420
    /**
421
     * Unicode aware version of wordwrap as helper method.
422
     *
423
     * @param string $text The text to format.
424
     * @param int $width The width to wrap to. Defaults to 72.
425
     * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
426
     * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
427
     * @return string Formatted text.
428
     */
429
    protected static function _wordWrap($text, $width = 72, $break = "\n", $cut = false)
430
    {
431
        if ($cut) {
432
            $parts = [];
433
            while (mb_strlen($text) > 0) {
434
                $part = mb_substr($text, 0, $width);
435
                $parts[] = trim($part);
436
                $text = trim(mb_substr($text, mb_strlen($part)));
437
            }
438
439
            return implode($break, $parts);
440
        }
441
442
        $parts = [];
443
        while (mb_strlen($text) > 0) {
444
            if ($width >= mb_strlen($text)) {
445
                $parts[] = trim($text);
446
                break;
447
            }
448
449
            $part = mb_substr($text, 0, $width);
450
            $nextChar = mb_substr($text, $width, 1);
451
            if ($nextChar !== ' ') {
452
                $breakAt = mb_strrpos($part, ' ');
453
                if ($breakAt === false) {
454
                    $breakAt = mb_strpos($text, ' ', $width);
455
                }
456
                if ($breakAt === false) {
457
                    $parts[] = trim($text);
458
                    break;
459
                }
460
                $part = mb_substr($text, 0, $breakAt);
461
            }
462
463
            $part = trim($part);
464
            $parts[] = $part;
465
            $text = trim(mb_substr($text, mb_strlen($part)));
466
        }
467
468
        return implode($break, $parts);
469
    }
470
471
    /**
472
     * Highlights a given phrase in a text. You can specify any expression in highlighter that
473
     * may include the \1 expression to include the $phrase found.
474
     *
475
     * ### Options:
476
     *
477
     * - `format` The piece of HTML with that the phrase will be highlighted
478
     * - `html` If true, will ignore any HTML tags, ensuring that only the correct text is highlighted
479
     * - `regex` A custom regex rule that is used to match words, default is '|$tag|iu'
480
     * - `limit` A limit, optional, defaults to -1 (none)
481
     *
482
     * @param string $text Text to search the phrase in.
483
     * @param string|array $phrase The phrase or phrases that will be searched.
484
     * @param array $options An array of HTML attributes and options.
485
     * @return string The highlighted text
486
     * @link https://book.cakephp.org/3/en/core-libraries/text.html#highlighting-substrings
487
     */
488
    public static function highlight($text, $phrase, array $options = [])
489
    {
490
        if (empty($phrase)) {
491
            return $text;
492
        }
493
494
        $defaults = [
495
            'format' => '<span class="highlight">\1</span>',
496
            'html' => false,
497
            'regex' => '|%s|iu',
498
            'limit' => -1,
499
        ];
500
        $options += $defaults;
501
502
        $html = $format = $limit = null;
503
        /**
504
         * @var bool $html
505
         * @var string|array $format
506
         * @var int $limit
507
         */
508
        extract($options);
509
510
        if (is_array($phrase)) {
511
            $replace = [];
512
            $with = [];
513
514
            foreach ($phrase as $key => $segment) {
515
                $segment = '(' . preg_quote($segment, '|') . ')';
516
                if ($html) {
517
                    $segment = "(?![^<]+>)$segment(?![^<]+>)";
518
                }
519
520
                $with[] = is_array($format) ? $format[$key] : $format;
521
                $replace[] = sprintf($options['regex'], $segment);
522
            }
523
524
            return preg_replace($replace, $with, $text, $limit);
525
        }
526
527
        $phrase = '(' . preg_quote($phrase, '|') . ')';
528
        if ($html) {
529
            $phrase = "(?![^<]+>)$phrase(?![^<]+>)";
530
        }
531
532
        return preg_replace(sprintf($options['regex'], $phrase), $format, $text, $limit);
533
    }
534
535
    /**
536
     * Strips given text of all links (<a href=....).
537
     *
538
     * *Warning* This method is not an robust solution in preventing XSS
539
     * or malicious HTML.
540
     *
541
     * @param string $text Text
542
     * @return string The text without links
543
     * @deprecated 3.2.12 This method will be removed in 4.0.0
544
     */
545
    public static function stripLinks($text)
546
    {
547
        deprecationWarning('This method will be removed in 4.0.0.');
548
        do {
549
            $text = preg_replace('#</?a([/\s][^>]*)?(>|$)#i', '', $text, -1, $count);
550
        } while ($count);
0 ignored issues
show
Bug Best Practice introduced by
The expression $count of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
551
552
        return $text;
553
    }
554
555
    /**
556
     * Truncates text starting from the end.
557
     *
558
     * Cuts a string to the length of $length and replaces the first characters
559
     * with the ellipsis if the text is longer than length.
560
     *
561
     * ### Options:
562
     *
563
     * - `ellipsis` Will be used as beginning and prepended to the trimmed string
564
     * - `exact` If false, $text will not be cut mid-word
565
     *
566
     * @param string $text String to truncate.
567
     * @param int $length Length of returned string, including ellipsis.
568
     * @param array $options An array of options.
569
     * @return string Trimmed string.
570
     */
571
    public static function tail($text, $length = 100, array $options = [])
572
    {
573
        $default = [
574
            'ellipsis' => '...', 'exact' => true,
575
        ];
576
        $options += $default;
577
        $exact = $ellipsis = null;
578
        /**
579
         * @var string $ellipsis
580
         * @var bool $exact
581
         */
582
        extract($options);
583
584
        if (mb_strlen($text) <= $length) {
585
            return $text;
586
        }
587
588
        $truncate = mb_substr($text, mb_strlen($text) - $length + mb_strlen($ellipsis));
589
        if (!$exact) {
590
            $spacepos = mb_strpos($truncate, ' ');
591
            $truncate = $spacepos === false ? '' : trim(mb_substr($truncate, $spacepos));
592
        }
593
594
        return $ellipsis . $truncate;
595
    }
596
597
    /**
598
     * Truncates text.
599
     *
600
     * Cuts a string to the length of $length and replaces the last characters
601
     * with the ellipsis if the text is longer than length.
602
     *
603
     * ### Options:
604
     *
605
     * - `ellipsis` Will be used as ending and appended to the trimmed string
606
     * - `exact` If false, $text will not be cut mid-word
607
     * - `html` If true, HTML tags would be handled correctly
608
     * - `trimWidth` If true, $text will be truncated with the width
609
     *
610
     * @param string $text String to truncate.
611
     * @param int $length Length of returned string, including ellipsis.
612
     * @param array $options An array of HTML attributes and options.
613
     * @return string Trimmed string.
614
     * @link https://book.cakephp.org/3/en/core-libraries/text.html#truncating-text
615
     */
616
    public static function truncate($text, $length = 100, array $options = [])
617
    {
618
        $default = [
619
            'ellipsis' => '...', 'exact' => true, 'html' => false, 'trimWidth' => false,
620
        ];
621
        if (!empty($options['html']) && strtolower(mb_internal_encoding()) === 'utf-8') {
622
            $default['ellipsis'] = "\xe2\x80\xa6";
623
        }
624
        $options += $default;
625
626
        $prefix = '';
627
        $suffix = $options['ellipsis'];
628
629
        if ($options['html']) {
630
            $ellipsisLength = self::_strlen(strip_tags($options['ellipsis']), $options);
631
632
            $truncateLength = 0;
633
            $totalLength = 0;
634
            $openTags = [];
635
            $truncate = '';
636
637
            preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER);
638
            foreach ($tags as $tag) {
0 ignored issues
show
Bug introduced by
The expression $tags of type null|array<integer,array<integer,string>> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
639
                $contentLength = 0;
640
                if (!in_array($tag[2], static::$_defaultHtmlNoCount, true)) {
641
                    $contentLength = self::_strlen($tag[3], $options);
642
                }
643
644
                if ($truncate === '') {
645
                    if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/i', $tag[2])) {
646
                        if (preg_match('/<[\w]+[^>]*>/', $tag[0])) {
647
                            array_unshift($openTags, $tag[2]);
648
                        } elseif (preg_match('/<\/([\w]+)[^>]*>/', $tag[0], $closeTag)) {
649
                            $pos = array_search($closeTag[1], $openTags, true);
650
                            if ($pos !== false) {
651
                                array_splice($openTags, $pos, 1);
652
                            }
653
                        }
654
                    }
655
656
                    $prefix .= $tag[1];
657
658
                    if ($totalLength + $contentLength + $ellipsisLength > $length) {
659
                        $truncate = $tag[3];
660
                        $truncateLength = $length - $totalLength;
661
                    } else {
662
                        $prefix .= $tag[3];
663
                    }
664
                }
665
666
                $totalLength += $contentLength;
667
                if ($totalLength > $length) {
668
                    break;
669
                }
670
            }
671
672
            if ($totalLength <= $length) {
673
                return $text;
674
            }
675
676
            $text = $truncate;
677
            $length = $truncateLength;
678
679
            foreach ($openTags as $tag) {
680
                $suffix .= '</' . $tag . '>';
681
            }
682
        } else {
683
            if (self::_strlen($text, $options) <= $length) {
684
                return $text;
685
            }
686
            $ellipsisLength = self::_strlen($options['ellipsis'], $options);
687
        }
688
689
        $result = self::_substr($text, 0, $length - $ellipsisLength, $options);
690
691
        if (!$options['exact']) {
692
            if (self::_substr($text, $length - $ellipsisLength, 1, $options) !== ' ') {
693
                $result = self::_removeLastWord($result);
694
            }
695
696
            // If result is empty, then we don't need to count ellipsis in the cut.
697
            if (!strlen($result)) {
698
                $result = self::_substr($text, 0, $length, $options);
699
            }
700
        }
701
702
        return $prefix . $result . $suffix;
703
    }
704
705
    /**
706
     * Truncate text with specified width.
707
     *
708
     * @param string $text String to truncate.
709
     * @param int $length Length of returned string, including ellipsis.
710
     * @param array $options An array of HTML attributes and options.
711
     * @return string Trimmed string.
712
     * @see \Cake\Utility\Text::truncate()
713
     */
714
    public static function truncateByWidth($text, $length = 100, array $options = [])
715
    {
716
        return static::truncate($text, $length, ['trimWidth' => true] + $options);
717
    }
718
719
    /**
720
     * Get string length.
721
     *
722
     * ### Options:
723
     *
724
     * - `html` If true, HTML entities will be handled as decoded characters.
725
     * - `trimWidth` If true, the width will return.
726
     *
727
     * @param string $text The string being checked for length
728
     * @param array $options An array of options.
729
     * @return int
730
     */
731
    protected static function _strlen($text, array $options)
732
    {
733
        if (empty($options['trimWidth'])) {
734
            $strlen = 'mb_strlen';
735
        } else {
736
            $strlen = 'mb_strwidth';
737
        }
738
739
        if (empty($options['html'])) {
740
            return $strlen($text);
741
        }
742
743
        $pattern = '/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i';
744
        $replace = preg_replace_callback(
745
            $pattern,
746
            function ($match) use ($strlen) {
747
                $utf8 = html_entity_decode($match[0], ENT_HTML5 | ENT_QUOTES, 'UTF-8');
748
749
                return str_repeat(' ', $strlen($utf8, 'UTF-8'));
750
            },
751
            $text
752
        );
753
754
        return $strlen($replace);
755
    }
756
757
    /**
758
     * Return part of a string.
759
     *
760
     * ### Options:
761
     *
762
     * - `html` If true, HTML entities will be handled as decoded characters.
763
     * - `trimWidth` If true, will be truncated with specified width.
764
     *
765
     * @param string $text The input string.
766
     * @param int $start The position to begin extracting.
767
     * @param int $length The desired length.
768
     * @param array $options An array of options.
769
     * @return string
770
     */
771
    protected static function _substr($text, $start, $length, array $options)
772
    {
773
        if (empty($options['trimWidth'])) {
774
            $substr = 'mb_substr';
775
        } else {
776
            $substr = 'mb_strimwidth';
777
        }
778
779
        $maxPosition = self::_strlen($text, ['trimWidth' => false] + $options);
780
        if ($start < 0) {
781
            $start += $maxPosition;
782
            if ($start < 0) {
783
                $start = 0;
784
            }
785
        }
786
        if ($start >= $maxPosition) {
787
            return '';
788
        }
789
790
        if ($length === null) {
791
            $length = self::_strlen($text, $options);
792
        }
793
794
        if ($length < 0) {
795
            $text = self::_substr($text, $start, null, $options);
796
            $start = 0;
797
            $length += self::_strlen($text, $options);
798
        }
799
800
        if ($length <= 0) {
801
            return '';
802
        }
803
804
        if (empty($options['html'])) {
805
            return (string)$substr($text, $start, $length);
806
        }
807
808
        $totalOffset = 0;
809
        $totalLength = 0;
810
        $result = '';
811
812
        $pattern = '/(&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};)/i';
813
        $parts = preg_split($pattern, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
814
        foreach ($parts as $part) {
815
            $offset = 0;
816
817
            if ($totalOffset < $start) {
818
                $len = self::_strlen($part, ['trimWidth' => false] + $options);
819
                if ($totalOffset + $len <= $start) {
820
                    $totalOffset += $len;
821
                    continue;
822
                }
823
824
                $offset = $start - $totalOffset;
825
                $totalOffset = $start;
826
            }
827
828
            $len = self::_strlen($part, $options);
829
            if ($offset !== 0 || $totalLength + $len > $length) {
830
                if (
831
                    strpos($part, '&') === 0 && preg_match($pattern, $part)
832
                    && $part !== html_entity_decode($part, ENT_HTML5 | ENT_QUOTES, 'UTF-8')
833
                ) {
834
                    // Entities cannot be passed substr.
835
                    continue;
836
                }
837
838
                $part = $substr($part, $offset, $length - $totalLength);
839
                $len = self::_strlen($part, $options);
840
            }
841
842
            $result .= $part;
843
            $totalLength += $len;
844
            if ($totalLength >= $length) {
845
                break;
846
            }
847
        }
848
849
        return $result;
850
    }
851
852
    /**
853
     * Removes the last word from the input text.
854
     *
855
     * @param string $text The input text
856
     * @return string
857
     */
858
    protected static function _removeLastWord($text)
859
    {
860
        $spacepos = mb_strrpos($text, ' ');
861
862
        if ($spacepos !== false) {
863
            $lastWord = mb_strrpos($text, $spacepos);
864
865
            // Some languages are written without word separation.
866
            // We recognize a string as a word if it doesn't contain any full-width characters.
867
            if (mb_strwidth($lastWord) === mb_strlen($lastWord)) {
868
                $text = mb_substr($text, 0, $spacepos);
869
            }
870
871
            return $text;
872
        }
873
874
        return '';
875
    }
876
877
    /**
878
     * Extracts an excerpt from the text surrounding the phrase with a number of characters on each side
879
     * determined by radius.
880
     *
881
     * @param string $text String to search the phrase in
882
     * @param string $phrase Phrase that will be searched for
883
     * @param int $radius The amount of characters that will be returned on each side of the founded phrase
884
     * @param string $ellipsis Ending that will be appended
885
     * @return string Modified string
886
     * @link https://book.cakephp.org/3/en/core-libraries/text.html#extracting-an-excerpt
887
     */
888
    public static function excerpt($text, $phrase, $radius = 100, $ellipsis = '...')
889
    {
890
        if (empty($text) || empty($phrase)) {
891
            return static::truncate($text, $radius * 2, ['ellipsis' => $ellipsis]);
892
        }
893
894
        $append = $prepend = $ellipsis;
895
896
        $phraseLen = mb_strlen($phrase);
897
        $textLen = mb_strlen($text);
898
899
        $pos = mb_stripos($text, $phrase);
900
        if ($pos === false) {
901
            return mb_substr($text, 0, $radius) . $ellipsis;
902
        }
903
904
        $startPos = $pos - $radius;
905
        if ($startPos <= 0) {
906
            $startPos = 0;
907
            $prepend = '';
908
        }
909
910
        $endPos = $pos + $phraseLen + $radius;
911
        if ($endPos >= $textLen) {
912
            $endPos = $textLen;
913
            $append = '';
914
        }
915
916
        $excerpt = mb_substr($text, $startPos, $endPos - $startPos);
917
        $excerpt = $prepend . $excerpt . $append;
918
919
        return $excerpt;
920
    }
921
922
    /**
923
     * Creates a comma separated list where the last two items are joined with 'and', forming natural language.
924
     *
925
     * @param string[] $list The list to be joined.
926
     * @param string|null $and The word used to join the last and second last items together with. Defaults to 'and'.
927
     * @param string $separator The separator used to join all the other items together. Defaults to ', '.
928
     * @return string The glued together string.
929
     * @link https://book.cakephp.org/3/en/core-libraries/text.html#converting-an-array-to-sentence-form
930
     */
931
    public static function toList(array $list, $and = null, $separator = ', ')
932
    {
933
        if ($and === null) {
934
            $and = __d('cake', 'and');
935
        }
936
        if (count($list) > 1) {
937
            return implode($separator, array_slice($list, null, -1)) . ' ' . $and . ' ' . array_pop($list);
938
        }
939
940
        return array_pop($list);
941
    }
942
943
    /**
944
     * Check if the string contain multibyte characters
945
     *
946
     * @param string $string value to test
947
     * @return bool
948
     */
949
    public static function isMultibyte($string)
950
    {
951
        $length = strlen($string);
952
953
        for ($i = 0; $i < $length; $i++) {
954
            $value = ord($string[$i]);
955
            if ($value > 128) {
956
                return true;
957
            }
958
        }
959
960
        return false;
961
    }
962
963
    /**
964
     * Converts a multibyte character string
965
     * to the decimal value of the character
966
     *
967
     * @param string $string String to convert.
968
     * @return array
969
     */
970
    public static function utf8($string)
971
    {
972
        $map = [];
973
974
        $values = [];
975
        $find = 1;
976
        $length = strlen($string);
977
978
        for ($i = 0; $i < $length; $i++) {
979
            $value = ord($string[$i]);
980
981
            if ($value < 128) {
982
                $map[] = $value;
983
            } else {
984
                if (empty($values)) {
985
                    $find = ($value < 224) ? 2 : 3;
986
                }
987
                $values[] = $value;
988
989
                if (count($values) === $find) {
990
                    if ($find == 3) {
991
                        $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
992
                    } else {
993
                        $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
994
                    }
995
                    $values = [];
996
                    $find = 1;
997
                }
998
            }
999
        }
1000
1001
        return $map;
1002
    }
1003
1004
    /**
1005
     * Converts the decimal value of a multibyte character string
1006
     * to a string
1007
     *
1008
     * @param array $array Array
1009
     * @return string
1010
     */
1011
    public static function ascii(array $array)
1012
    {
1013
        $ascii = '';
1014
1015
        foreach ($array as $utf8) {
1016
            if ($utf8 < 128) {
1017
                $ascii .= chr($utf8);
1018
            } elseif ($utf8 < 2048) {
1019
                $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
1020
                $ascii .= chr(128 + ($utf8 % 64));
1021
            } else {
1022
                $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
1023
                $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
1024
                $ascii .= chr(128 + ($utf8 % 64));
1025
            }
1026
        }
1027
1028
        return $ascii;
1029
    }
1030
1031
    /**
1032
     * Converts filesize from human readable string to bytes
1033
     *
1034
     * @param string $size Size in human readable string like '5MB', '5M', '500B', '50kb' etc.
1035
     * @param mixed $default Value to be returned when invalid size was used, for example 'Unknown type'
1036
     * @return mixed Number of bytes as integer on success, `$default` on failure if not false
1037
     * @throws \InvalidArgumentException On invalid Unit type.
1038
     * @link https://book.cakephp.org/3/en/core-libraries/text.html#Cake\Utility\Text::parseFileSize
1039
     */
1040
    public static function parseFileSize($size, $default = false)
1041
    {
1042
        if (ctype_digit($size)) {
1043
            return (int)$size;
1044
        }
1045
        $size = strtoupper($size);
1046
1047
        $l = -2;
1048
        $i = array_search(substr($size, -2), ['KB', 'MB', 'GB', 'TB', 'PB'], true);
1049
        if ($i === false) {
1050
            $l = -1;
1051
            $i = array_search(substr($size, -1), ['K', 'M', 'G', 'T', 'P'], true);
1052
        }
1053
        if ($i !== false) {
1054
            $size = (float)substr($size, 0, $l);
1055
1056
            return $size * pow(1024, $i + 1);
1057
        }
1058
1059
        if (substr($size, -1) === 'B' && ctype_digit(substr($size, 0, -1))) {
1060
            $size = substr($size, 0, -1);
1061
1062
            return (int)$size;
1063
        }
1064
1065
        if ($default !== false) {
1066
            return $default;
1067
        }
1068
        throw new InvalidArgumentException('No unit type.');
1069
    }
1070
1071
    /**
1072
     * Get the default transliterator.
1073
     *
1074
     * @return \Transliterator|null Either a Transliterator instance, or `null`
1075
     *   in case no transliterator has been set yet.
1076
     * @since 3.7.0
1077
     */
1078
    public static function getTransliterator()
1079
    {
1080
        return static::$_defaultTransliterator;
1081
    }
1082
1083
    /**
1084
     * Set the default transliterator.
1085
     *
1086
     * @param \Transliterator $transliterator A `Transliterator` instance.
1087
     * @return void
1088
     * @since 3.7.0
1089
     */
1090
    public static function setTransliterator(\Transliterator $transliterator)
1091
    {
1092
        static::$_defaultTransliterator = $transliterator;
1093
    }
1094
1095
    /**
1096
     * Get default transliterator identifier string.
1097
     *
1098
     * @return string Transliterator identifier.
1099
     */
1100
    public static function getTransliteratorId()
1101
    {
1102
        return static::$_defaultTransliteratorId;
1103
    }
1104
1105
    /**
1106
     * Set default transliterator identifier string.
1107
     *
1108
     * @param string $transliteratorId Transliterator identifier.
1109
     * @return void
1110
     */
1111
    public static function setTransliteratorId($transliteratorId)
1112
    {
1113
        static::setTransliterator(transliterator_create($transliteratorId));
1114
        static::$_defaultTransliteratorId = $transliteratorId;
1115
    }
1116
1117
    /**
1118
     * Transliterate string.
1119
     *
1120
     * @param string $string String to transliterate.
1121
     * @param \Transliterator|string|null $transliterator Either a Transliterator
1122
     *   instance, or a transliterator identifier string. If `null`, the default
1123
     *   transliterator (identifier) set via `setTransliteratorId()` or
1124
     *   `setTransliterator()` will be used.
1125
     * @return string
1126
     * @see https://secure.php.net/manual/en/transliterator.transliterate.php
1127
     */
1128
    public static function transliterate($string, $transliterator = null)
1129
    {
1130
        if (!$transliterator) {
1131
            $transliterator = static::$_defaultTransliterator ?: static::$_defaultTransliteratorId;
1132
        }
1133
1134
        return transliterator_transliterate($transliterator, $string);
1135
    }
1136
1137
    /**
1138
     * Returns a string with all spaces converted to dashes (by default),
1139
     * characters transliterated to ASCII characters, and non word characters removed.
1140
     *
1141
     * ### Options:
1142
     *
1143
     * - `replacement`: Replacement string. Default '-'.
1144
     * - `transliteratorId`: A valid transliterator id string.
1145
     *   If `null` (default) the transliterator (identifier) set via
1146
     *   `setTransliteratorId()` or `setTransliterator()` will be used.
1147
     *   If `false` no transliteration will be done, only non words will be removed.
1148
     * - `preserve`: Specific non-word character to preserve. Default `null`.
1149
     *   For e.g. this option can be set to '.' to generate clean file names.
1150
     *
1151
     * @param string $string the string you want to slug
1152
     * @param array $options If string it will be use as replacement character
1153
     *   or an array of options.
1154
     * @return string
1155
     * @see setTransliterator()
1156
     * @see setTransliteratorId()
1157
     */
1158
    public static function slug($string, $options = [])
1159
    {
1160
        if (is_string($options)) {
1161
            $options = ['replacement' => $options];
1162
        }
1163
        $options += [
1164
            'replacement' => '-',
1165
            'transliteratorId' => null,
1166
            'preserve' => null,
1167
        ];
1168
1169
        if ($options['transliteratorId'] !== false) {
1170
            $string = static::transliterate($string, $options['transliteratorId']);
1171
        }
1172
1173
        $regex = '^\p{Ll}\p{Lm}\p{Lo}\p{Lt}\p{Lu}\p{Nd}';
1174
        if ($options['preserve']) {
1175
            $regex .= preg_quote($options['preserve'], '/');
1176
        }
1177
        $quotedReplacement = preg_quote($options['replacement'], '/');
1178
        $map = [
1179
            '/[' . $regex . ']/mu' => $options['replacement'],
1180
            sprintf('/^[%s]+|[%s]+$/', $quotedReplacement, $quotedReplacement) => '',
1181
        ];
1182
        if (is_string($options['replacement']) && strlen($options['replacement']) > 0) {
1183
            $map[sprintf('/[%s]+/mu', $quotedReplacement)] = $options['replacement'];
1184
        }
1185
        $string = preg_replace(array_keys($map), $map, $string);
1186
1187
        return $string;
1188
    }
1189
}
1190