1
0
mirror of https://github.com/fzaninotto/Faker.git synced 2025-04-20 23:41:50 +02:00

Merge pull request #254 from TimWolla/advancedTextProvider

Add an advanced text generator based on markov chains.
This commit is contained in:
Francois Zaninotto 2014-03-03 16:49:37 +01:00
commit 680b36daa4
7 changed files with 5937 additions and 1 deletions

View File

@ -117,6 +117,10 @@ Each of the generator properties (like `name`, `address`, and `lorem`) are calle
paragraphs($nb = 3) // array('Quidem ut sunt et quidem est accusamus aut. Fuga est placeat rerum ut. Enim ex eveniet facere sunt.', 'Aut nam et eum architecto fugit repellendus illo. Qui ex esse veritatis.', 'Possimus omnis aut incidunt sunt. Asperiores incidunt iure sequi cum culpa rem. Rerum exercitationem est rem.')
text($maxNbChars = 200) // 'Fuga totam reiciendis qui architecto fugiat nemo. Consequatur recusandae qui cupiditate eos quod.'
### `Faker\Provider\Text`
realText($maxNbChars = 200, $indexSize = 2) // 'At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur.'
### `Faker\Provider\Internet`
email // 'tkshlerin@collins.com'

View File

@ -6,7 +6,7 @@ class Factory
{
const DEFAULT_LOCALE = 'en_US';
protected static $defaultProviders = array('Address', 'Color', 'Company', 'DateTime', 'File', 'Image', 'Internet', 'Lorem', 'Miscellaneous', 'Payment', 'Person', 'PhoneNumber', 'UserAgent', 'Uuid');
protected static $defaultProviders = array('Address', 'Color', 'Company', 'DateTime', 'File', 'Image', 'Internet', 'Lorem', 'Miscellaneous', 'Payment', 'Person', 'PhoneNumber', 'Text', 'UserAgent', 'Uuid');
public static function create($locale = self::DEFAULT_LOCALE)
{

View File

@ -39,6 +39,8 @@ namespace Faker;
* @method string paragraphs()
* @method string text()
*
* @method string realText()
*
* @property string email
* @property string safeEmail
* @property string freeEmail

View File

@ -0,0 +1,97 @@
<?php
namespace Faker\Provider;
abstract class Text extends \Faker\Provider\Base
{
protected static $baseText = '';
protected $explodedText = null;
protected $consecutiveWords = array();
/**
* Generate a text string by the Markov chain algorithm.
* Depending on the $maxNbChars, returns a random valid looking text. The algorithm
* generates a weighted table with the specified number of words as the index and the
* possible following words as the value.
*
* @example 'Lorem ipsum dolor sit amet'
* @param integer $maxNbChars Maximum number of characters the text should contain (minimum: 10)
* @param integer $indexSize Determines how many words are considered for the generation of the next word. The minimum is 1, and it produces the higher level of randomness, although the
* generated text usually doesn't make sense. Higher index size (up to 10) produce more correct text, at the price of less randomness.
* @return string
*/
public function realText($maxNbChars = 200, $indexSize = 2)
{
if ($maxNbChars < 10) {
throw new \InvalidArgumentException('maxNbChars must be at least 10');
}
if ($indexSize < 1) {
throw new \InvalidArgumentException('indexSize must be at least 1');
}
if ($indexSize > 10) {
throw new \InvalidArgumentException('indexSize must be at most 10');
}
if (!isset($this->consecutiveWords[$indexSize])) {
$parts = $this->getExplodedText();
// generate look up table
$table = array();
for ($i = $indexSize, $max = count($parts) - 1; $i < $max; $i++) {
// calculate index
$index = implode(' ', array_slice($parts, $i - $indexSize, $indexSize));
if (!isset($table[$index])) $table[$index] = array();
// value: next part
$table[$index][] = $parts[$i];
}
// cache look up table for performance
$this->consecutiveWords[$indexSize] = $table;
}
$table = $this->consecutiveWords[$indexSize];
$result = array();
$resultLength = 0;
// take a random starting point
$next = static::randomKey($table);
while ($resultLength < $maxNbChars && isset($table[$next])) {
// fetch a random element to append
$append = static::randomElement($table[$next]);
// calculate next index
$next = explode(' ', $next);
$next[] = $append;
array_shift($next);
$next = implode(' ', $next);
// ensure text starts with an uppercase letter
if ($resultLength == 0 && !preg_match('/^\p{Lu}/u', $append)) continue;
// append the element
$result[] = $append;
$resultLength += strlen($append);
}
// remove the element that caused the text to overflow
array_pop($result);
// build result
$result = implode(' ', $result);
return $result.'.';
}
protected function getExplodedText()
{
if ($this->explodedText === null) {
$this->explodedText = static::$baseText;
$this->explodedText = explode(' ', preg_replace('/\s+/', ' ', $this->explodedText));
}
return $this->explodedText;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,54 @@
<?php
namespace Faker\Test\Provider;
use Faker\Provider\en_US\Text;
use Faker\Generator;
class TextTest extends \PHPUnit_Framework_TestCase
{
public function testTextMaxLength()
{
$generator = new Generator();
$generator->addProvider(new Text($generator));
$generator->seed(0);
$lengths = array(10, 20, 50, 70, 90, 120, 150, 200, 500);
foreach ($lengths as $length) {
$this->assertLessThan($length, $generator->realText($length));
}
}
/**
* @expectedException \InvalidArgumentException
*/
public function testTextMaxIndex()
{
$generator = new Generator();
$generator->addProvider(new Text($generator));
$generator->seed(0);
$generator->realText(200, 11);
}
/**
* @expectedException \InvalidArgumentException
*/
public function testTextMinIndex()
{
$generator = new Generator();
$generator->addProvider(new Text($generator));
$generator->seed(0);
$generator->realText(200, 0);
}
/**
* @expectedException \InvalidArgumentException
*/
public function testTextMinLength()
{
$generator = new Generator();
$generator->addProvider(new Text($generator));
$generator->seed(0);
$generator->realText(9);
}
}