mirror of
https://github.com/fzaninotto/Faker.git
synced 2025-04-20 23:41:50 +02:00
Merge pull request #254 from TimWolla/advancedTextProvider
Add an advanced text generator based on markov chains.
This commit is contained in:
commit
680b36daa4
@ -117,6 +117,10 @@ Each of the generator properties (like `name`, `address`, and `lorem`) are calle
|
||||
paragraphs($nb = 3) // array('Quidem ut sunt et quidem est accusamus aut. Fuga est placeat rerum ut. Enim ex eveniet facere sunt.', 'Aut nam et eum architecto fugit repellendus illo. Qui ex esse veritatis.', 'Possimus omnis aut incidunt sunt. Asperiores incidunt iure sequi cum culpa rem. Rerum exercitationem est rem.')
|
||||
text($maxNbChars = 200) // 'Fuga totam reiciendis qui architecto fugiat nemo. Consequatur recusandae qui cupiditate eos quod.'
|
||||
|
||||
### `Faker\Provider\Text`
|
||||
|
||||
realText($maxNbChars = 200, $indexSize = 2) // 'At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur.'
|
||||
|
||||
### `Faker\Provider\Internet`
|
||||
|
||||
email // 'tkshlerin@collins.com'
|
||||
|
@ -6,7 +6,7 @@ class Factory
|
||||
{
|
||||
const DEFAULT_LOCALE = 'en_US';
|
||||
|
||||
protected static $defaultProviders = array('Address', 'Color', 'Company', 'DateTime', 'File', 'Image', 'Internet', 'Lorem', 'Miscellaneous', 'Payment', 'Person', 'PhoneNumber', 'UserAgent', 'Uuid');
|
||||
protected static $defaultProviders = array('Address', 'Color', 'Company', 'DateTime', 'File', 'Image', 'Internet', 'Lorem', 'Miscellaneous', 'Payment', 'Person', 'PhoneNumber', 'Text', 'UserAgent', 'Uuid');
|
||||
|
||||
public static function create($locale = self::DEFAULT_LOCALE)
|
||||
{
|
||||
|
@ -39,6 +39,8 @@ namespace Faker;
|
||||
* @method string paragraphs()
|
||||
* @method string text()
|
||||
*
|
||||
* @method string realText()
|
||||
*
|
||||
* @property string email
|
||||
* @property string safeEmail
|
||||
* @property string freeEmail
|
||||
|
97
src/Faker/Provider/Text.php
Normal file
97
src/Faker/Provider/Text.php
Normal file
@ -0,0 +1,97 @@
|
||||
<?php
|
||||
|
||||
namespace Faker\Provider;
|
||||
|
||||
abstract class Text extends \Faker\Provider\Base
|
||||
{
|
||||
protected static $baseText = '';
|
||||
protected $explodedText = null;
|
||||
protected $consecutiveWords = array();
|
||||
|
||||
/**
|
||||
* Generate a text string by the Markov chain algorithm.
|
||||
* Depending on the $maxNbChars, returns a random valid looking text. The algorithm
|
||||
* generates a weighted table with the specified number of words as the index and the
|
||||
* possible following words as the value.
|
||||
*
|
||||
* @example 'Lorem ipsum dolor sit amet'
|
||||
* @param integer $maxNbChars Maximum number of characters the text should contain (minimum: 10)
|
||||
* @param integer $indexSize Determines how many words are considered for the generation of the next word. The minimum is 1, and it produces the higher level of randomness, although the
|
||||
* generated text usually doesn't make sense. Higher index size (up to 10) produce more correct text, at the price of less randomness.
|
||||
* @return string
|
||||
*/
|
||||
public function realText($maxNbChars = 200, $indexSize = 2)
|
||||
{
|
||||
if ($maxNbChars < 10) {
|
||||
throw new \InvalidArgumentException('maxNbChars must be at least 10');
|
||||
}
|
||||
|
||||
if ($indexSize < 1) {
|
||||
throw new \InvalidArgumentException('indexSize must be at least 1');
|
||||
}
|
||||
|
||||
if ($indexSize > 10) {
|
||||
throw new \InvalidArgumentException('indexSize must be at most 10');
|
||||
}
|
||||
|
||||
if (!isset($this->consecutiveWords[$indexSize])) {
|
||||
$parts = $this->getExplodedText();
|
||||
|
||||
// generate look up table
|
||||
$table = array();
|
||||
for ($i = $indexSize, $max = count($parts) - 1; $i < $max; $i++) {
|
||||
// calculate index
|
||||
$index = implode(' ', array_slice($parts, $i - $indexSize, $indexSize));
|
||||
if (!isset($table[$index])) $table[$index] = array();
|
||||
|
||||
// value: next part
|
||||
$table[$index][] = $parts[$i];
|
||||
}
|
||||
|
||||
// cache look up table for performance
|
||||
$this->consecutiveWords[$indexSize] = $table;
|
||||
}
|
||||
|
||||
$table = $this->consecutiveWords[$indexSize];
|
||||
$result = array();
|
||||
$resultLength = 0;
|
||||
|
||||
// take a random starting point
|
||||
$next = static::randomKey($table);
|
||||
while ($resultLength < $maxNbChars && isset($table[$next])) {
|
||||
// fetch a random element to append
|
||||
$append = static::randomElement($table[$next]);
|
||||
|
||||
// calculate next index
|
||||
$next = explode(' ', $next);
|
||||
$next[] = $append;
|
||||
array_shift($next);
|
||||
$next = implode(' ', $next);
|
||||
|
||||
// ensure text starts with an uppercase letter
|
||||
if ($resultLength == 0 && !preg_match('/^\p{Lu}/u', $append)) continue;
|
||||
|
||||
// append the element
|
||||
$result[] = $append;
|
||||
$resultLength += strlen($append);
|
||||
}
|
||||
|
||||
// remove the element that caused the text to overflow
|
||||
array_pop($result);
|
||||
|
||||
// build result
|
||||
$result = implode(' ', $result);
|
||||
|
||||
return $result.'.';
|
||||
}
|
||||
|
||||
protected function getExplodedText()
|
||||
{
|
||||
if ($this->explodedText === null) {
|
||||
$this->explodedText = static::$baseText;
|
||||
$this->explodedText = explode(' ', preg_replace('/\s+/', ' ', $this->explodedText));
|
||||
}
|
||||
|
||||
return $this->explodedText;
|
||||
}
|
||||
}
|
2045
src/Faker/Provider/de_DE/Text.php
Normal file
2045
src/Faker/Provider/de_DE/Text.php
Normal file
File diff suppressed because it is too large
Load Diff
3734
src/Faker/Provider/en_US/Text.php
Normal file
3734
src/Faker/Provider/en_US/Text.php
Normal file
File diff suppressed because it is too large
Load Diff
54
test/Faker/Provider/TextTest.php
Normal file
54
test/Faker/Provider/TextTest.php
Normal file
@ -0,0 +1,54 @@
|
||||
<?php
|
||||
namespace Faker\Test\Provider;
|
||||
|
||||
use Faker\Provider\en_US\Text;
|
||||
use Faker\Generator;
|
||||
|
||||
class TextTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
public function testTextMaxLength()
|
||||
{
|
||||
$generator = new Generator();
|
||||
$generator->addProvider(new Text($generator));
|
||||
$generator->seed(0);
|
||||
|
||||
$lengths = array(10, 20, 50, 70, 90, 120, 150, 200, 500);
|
||||
|
||||
foreach ($lengths as $length) {
|
||||
$this->assertLessThan($length, $generator->realText($length));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \InvalidArgumentException
|
||||
*/
|
||||
public function testTextMaxIndex()
|
||||
{
|
||||
$generator = new Generator();
|
||||
$generator->addProvider(new Text($generator));
|
||||
$generator->seed(0);
|
||||
$generator->realText(200, 11);
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \InvalidArgumentException
|
||||
*/
|
||||
public function testTextMinIndex()
|
||||
{
|
||||
$generator = new Generator();
|
||||
$generator->addProvider(new Text($generator));
|
||||
$generator->seed(0);
|
||||
$generator->realText(200, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* @expectedException \InvalidArgumentException
|
||||
*/
|
||||
public function testTextMinLength()
|
||||
{
|
||||
$generator = new Generator();
|
||||
$generator->addProvider(new Text($generator));
|
||||
$generator->seed(0);
|
||||
$generator->realText(9);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user