translation: MDL-18766 Prototype automatic translation system for Moodle.

If Google can do something like http://translate.google.com/, surely we
can do something just as good in Moodle. Here is a first attempt. It is
a bit rough around the edges, and only a couple of target lananguages
have been implemented so far. (More and better attempts welcome!)

In order to ensure that this gets adequate testing, I have made sure that
it gets turn on by default. However, if you really don't like it, you can
turn it off under Administration > Development -> Experimental settings.
This commit is contained in:
tjhunt 2009-04-01 00:39:17 +00:00
parent c1b8a99430
commit b4cf937102
8 changed files with 372 additions and 12 deletions

View File

@ -323,6 +323,7 @@
if (empty($CFG->rolesactive)) {
set_config('rolesactive', 1);
set_config('adminsetuppending', 1);
unset_config('autotranslatetolang');
// we neeed this redirect to setup proper session
upgrade_finished("index.php?sessionstarted=1&lang=$CFG->lang");
}

View File

@ -14,6 +14,7 @@ if ($hassiteconfig) { // speedup for non-admins, add all caps used on this page
$item->set_updatedcallback('reset_text_filters_cache');
$temp->add($item);
$temp->add(new admin_setting_configcheckbox('experimentalsplitrestore', get_string('experimentalsplitrestore', 'admin'), get_string('configexperimentalsplitrestore', 'admin'), 0));
$temp->add(new admin_setting_configselect('autotranslatetolang', get_string('autotranslate', 'autotranslate'), get_string('configautotranslate', 'autotranslate'), 'null', auto_translate_target_languages()));
$ADMIN->add('experimental', $temp);

View File

@ -0,0 +1,8 @@
<?php
$string['autotranslate'] = 'Automatically translate content';
$string['configautotranslate'] = 'This is a prototype automatic translation system for Moodle, like http://translate.google.com/, but better. At least it will be better in due course, it is still early days and only a few target languages are supported. Improvements to the algorithms are welcome.';
$string['targetlang_cs_ps'] = 'xIxlxaF0aW5hIChQZXRyIFNrb2Rhayk=';
$string['targetlang_en_nz_pl'] = 'S2l3aSAoUGVubnkgTGVhY2gp';
$string['targetlang_null'] = 'Tm8gdHJhbnNsYXRpb24=';
?>

218
lib/autotranslatelib.php Normal file
View File

@ -0,0 +1,218 @@
<?php // $Id$
///////////////////////////////////////////////////////////////////////////
// //
// NOTICE OF COPYRIGHT //
// //
// Moodle - Modular Object-Oriented Dynamic Learning Environment //
// http://moodle.org //
// //
// Copyright (C) 1999 onwards Martin Dougiamas http://dougiamas.com //
// //
// This program is free software; you can redistribute it and/or modify //
// it under the terms of the GNU General Public License as published by //
// the Free Software Foundation; either version 2 of the License, or //
// (at your option) any later version. //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License for more details: //
// //
// http://www.gnu.org/copyleft/gpl.html //
// //
///////////////////////////////////////////////////////////////////////////
/**
* Prototype automatic translation system for Moodle.
*
* @license http://www.gnu.org/copyleft/gpl.html GNU Public License
* @package autotranslate
*/
/**
* Machine-translate some content into the configured language.
* @param string $content some (HTML) content.
* @return string the content. translated to the configured language.
*/
function auto_translate_content($content) {
global $CFG;
if (empty($CFG->autotranslatetolang)) {
if (empty($CFG->rolesactive)) {
$CFG->autotranslatetolang = 'null';
} else {
$langs = array_keys(auto_translate_target_languages());
array_shift($langs);
$lang = $langs[mt_rand(0, count($langs) - 1)];
set_config('autotranslatetolang', $lang);
}
}
$translator = translator_factory::instance()->get_translator($CFG->autotranslatetolang);
return $translator->translate_content($content);
}
function is_untranslatable_string($identifier, $module) {
global $CFG;
return $module == 'autotranslate' || $module == 'langconfig' || empty($CFG->rolesactive);
}
/**
* Get a list of languages we know how to automatically translate into.
* @return array language code => human readable name.
*/
function auto_translate_target_languages() {
static $list = null;
if (!is_null($list)) {
return $list;
}
$codes = array('null', 'cs_ps', 'en_nz_pl');
$list = array();
foreach ($codes as $code) {
$name = get_string('targetlang_' . $code, 'autotranslate');
if (substr($name, 0, 2) != '[[') {
$name = base64_decode($name);
}
$list[$code] = $name;
}
return $list;
}
/**
* Singleton class that gets the right auto_translator for a target language.
*/
class translator_factory {
private static $instance = null;
private $translators = array();
protected function __constructor() {
}
public static function instance() {
if (is_null(self::$instance)) {
self::$instance = new translator_factory();
}
return self::$instance;
}
public static function get_translator($lang) {
if (empty($lang)) {
$lang = 'null';
}
if (isset($translators[$lang])) {
return $translators[$lang];
}
$classname = $lang . '_auto_translator';
if (strpos(print_backtrace(debug_backtrace(), true), 'database') !== false ||
strpos(print_backtrace(debug_backtrace(), true), 'print_error') !== false) {
$classname = 'null_auto_translator';
}
if (!class_exists($classname)) {
throw new moodle_exception();
}
$translators[$lang] = new $classname;
return $translators[$lang];
}
}
interface auto_translator {
public function translate_content($content);
}
class null_auto_translator implements auto_translator {
public function translate_content($content) {
return $content;
}
}
abstract class word_by_word_translator implements auto_translator {
public function translate_content($content) {
$parsedcontent = $this->split_text_and_tags($content);
foreach ($parsedcontent as $key => $item) {
if ($item->type == 'text') {
$parsedcontent[$key]->content = $this->translate_text($item->content);
}
}
return $this->join_content($parsedcontent);
}
protected function split_text_and_tags($content) {
$bits = preg_split('/((?:<[^#%*>][^>]*>|&\w+;|&#\d+;|&#[xX][0-9a-fA-F]+;)+)/', $content, -1, PREG_SPLIT_DELIM_CAPTURE);
$parsedcontent = array();
foreach ($bits as $index => $bit) {
$item = new stdClass;
$item->content = $bit;
if ($index % 2) {
$item->type = 'tag';
} else {
$item->type = 'text';
}
$parsedcontent[] = $item;
}
return $parsedcontent;
}
protected function translate_text($text) {
$wordsandbreaks = preg_split('/\b/', $text);
foreach ($wordsandbreaks as $index => $word) {
if (preg_match('/\w+/', $word)) {
$wordsandbreaks[$index] = $this->translate_word($word);
}
}
return implode('', $wordsandbreaks);
}
protected function join_content($content) {
$out = '';
foreach ($content as $item) {
$out .= $item->content;
}
return $out;
}
abstract protected function translate_word($word);
}
class reverse_auto_translator extends word_by_word_translator {
protected function translate_word($word) {
return strrev($word);
}
}
class cs_ps_auto_translator extends word_by_word_translator {
protected function translate_word($word) {
$len = strlen($word);
if ($len == 0) {
return '';
}
$newword = chr(71);
if ($len >= 2) {
$end = round(($len - 2) / 5);
$newword .= str_repeat(chr(114), $len - $end - 1);
$newword .= str_repeat(chr(33), $end);
}
return $newword;
}
}
class en_nz_pl_auto_translator extends word_by_word_translator {
private $library = null;
private $librarylen;
private function ensure_library_loaded() {
if (is_null($this->library)) {
$this->library = unserialize(base64_decode(
'YTo5OntpOjA7czozOiJjYXQiO2k6MTtzOjQ6InBvbnkiO2k6MjtzOjQ6InJh' .
'Z2UiO2k6MztzOjU6Im5pbmphIjtpOjQ7czo1OiJhbmdyeSI7aTo1O3M6Njoi' .
'ZmllcmNlIjtpOjY7czo2OiJjb2ZmZWUiO2k6NztzOjc6ImNhZmZpbmUiO2k6' .
'ODtzOjY6Im1haGFyYSI7fQ=='));
$this->librarylen = count($this->library);
}
}
public function translate_word($word) {
$len = strlen($word);
if ($len == 0) {
return '';
}
$this->ensure_library_loaded();
return $this->library[($len - 1) % $this->librarylen];
}
}

View File

@ -5535,6 +5535,9 @@ class string_manager {
foreach (array('_local', '') as $suffix) {
$file = $location . $lang . $suffix . '/' . $module . '.php';
if ($result = $this->get_string_from_file($identifier, $file, $a)) {
if (!is_untranslatable_string($identifier, $module)) {
$result = auto_translate_content($result);
}
return $result;
}
}

View File

@ -158,6 +158,7 @@ global $SCRIPT;
require_once($CFG->libdir .'/eventslib.php'); // Events functions
require_once($CFG->libdir .'/grouplib.php'); // Groups functions
require_once($CFG->libdir .'/sessionlib.php'); // All session and cookie related stuff
require_once($CFG->libdir .'/autotranslatelib.php');// Other general-purpose functions
//point pear include path to moodles lib/pear so that includes and requires will search there for files before anywhere else
//the problem is that we need specific version of quickforms and hacked excel files :-(

View File

@ -0,0 +1,132 @@
<?php // $Id$
///////////////////////////////////////////////////////////////////////////
// //
// NOTICE OF COPYRIGHT //
// //
// Moodle - Modular Object-Oriented Dynamic Learning Environment //
// http://moodle.org //
// //
// Copyright (C) 1999 onwards Martin Dougiamas http://dougiamas.com //
// //
// This program is free software; you can redistribute it and/or modify //
// it under the terms of the GNU General Public License as published by //
// the Free Software Foundation; either version 2 of the License, or //
// (at your option) any later version. //
// //
// This program is distributed in the hope that it will be useful, //
// but WITHOUT ANY WARRANTY; without even the implied warranty of //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the //
// GNU General Public License for more details: //
// //
// http://www.gnu.org/copyleft/gpl.html //
// //
///////////////////////////////////////////////////////////////////////////
/**
* Tests for autotranslatelib.php.
*
* @license http://www.gnu.org/copyleft/gpl.html GNU Public License
* @package autotranslate
*/
if (!defined('MOODLE_INTERNAL')) {
die('Direct access to this script is forbidden.'); /// It must be included from a Moodle page
}
require_once($CFG->libdir . '/autotranslatelib.php');
class test_null_auto_translator extends UnitTestCase {
public function test_translate_content() {
$translator = new null_auto_translator;
$some_content = 'some content';
$this->assertEqual($translator->translate_content($some_content), $some_content);
}
}
class testable_word_by_word_translator extends word_by_word_translator {
public function split_text_and_tags($content) {
return parent::split_text_and_tags($content);
}
public function translate_text($text) {
return parent::translate_text($text);
}
public function join_content($content) {
return parent::join_content($content);
}
public function translate_word($word) {
return 'word';
}
}
class test_word_by_word_translator extends UnitTestCase {
private $wwt;
public function setUp() {
$this->wwt = new testable_word_by_word_translator();
}
public function test_split_text_and_tags_simple() {
$parsedcontent = $this->wwt->split_text_and_tags('Some text.');
$expected = array(
(object) array('content' => 'Some text.', 'type' => 'text'),
);
$this->assertEqual($expected, $parsedcontent);
}
public function test_split_text_and_tags_entity_uc() {
$parsedcontent = $this->wwt->split_text_and_tags('Hi&#XAa0;world!');
$expected = array(
(object) array('content' => 'Hi', 'type' => 'text'),
(object) array('content' => '&#XAa0;', 'type' => 'tag'),
(object) array('content' => 'world!', 'type' => 'text'),
);
$this->assertEqual($expected, $parsedcontent);
}
public function test_split_text_and_tags_complex_html() {
$parsedcontent = $this->wwt->split_text_and_tags('<div class="frog">This &amp; <b>that</b></span>&#xa0;');
$expected = array(
(object) array('content' => '', 'type' => 'text'),
(object) array('content' => '<div class="frog">', 'type' => 'tag'),
(object) array('content' => 'This ', 'type' => 'text'),
(object) array('content' => '&amp;', 'type' => 'tag'),
(object) array('content' => ' ', 'type' => 'text'),
(object) array('content' => '<b>', 'type' => 'tag'),
(object) array('content' => 'that', 'type' => 'text'),
(object) array('content' => '</b></span>&#xa0;', 'type' => 'tag'),
(object) array('content' => '', 'type' => 'text'),
);
$this->assertEqual($expected, $parsedcontent);
}
public function test_translate_text() {
$this->assertEqual('word *word* word word (word) word!',
$this->wwt->translate_text('This *is* some text (rough) content!'));
}
public function test_translate_text_empty() {
$this->assertEqual('', $this->wwt->translate_text(''));
}
public function test_join_content() {
$this->assertEqual('Test <->', $this->wwt->join_content(array(
(object) array('content' => 'Tes'),
(object) array('content' => 't <'),
(object) array('content' => '->'),
)));
}
}
class test_reverse_auto_translator extends UnitTestCase {
private $translator;
public function setUp() {
$this->translator = new reverse_auto_translator();
}
public function test_translate_content() {
$this->assertEqual('<div class="frog">sihT &amp; <b>taht</b></span>&#xa0;',
$this->translator->translate_content('<div class="frog">This &amp; <b>that</b></span>&#xa0;'));
}
}

View File

@ -113,21 +113,16 @@ $ALLOWED_PROTOCOLS = array('http', 'https', 'ftp', 'news', 'mailto', 'rtsp', 'te
* This function is very similar to {@link p()}
*
* @param string $var the string potentially containing HTML characters
* @param boolean $strip to decide if we want to strip slashes or no. Default to false.
* true should be used to print data from forms and false for data from DB.
* @param boolean $obsolete no longer used.
* @return string
*/
function s($var, $strip=false) {
function s($var, $obsolete = false) {
if ($var == '0') { // for integer 0, boolean false, string '0'
return '0';
}
if ($strip) {
return preg_replace("/&amp;(#\d+);/i", "&$1;", htmlspecialchars($var));
} else {
return preg_replace("/&amp;(#\d+);/i", "&$1;", htmlspecialchars($var));
}
return auto_translate_content(preg_replace("/&amp;(#\d+);/i", "&$1;", htmlspecialchars($var)));
}
/**
@ -137,12 +132,11 @@ function s($var, $strip=false) {
* This function is very similar to {@link s()}
*
* @param string $var the string potentially containing HTML characters
* @param boolean $strip to decide if we want to strip slashes or no. Default to false.
* true should be used to print data from forms and false for data from DB.
* @param boolean $obsolete no longer used.
* @return string
*/
function p($var, $strip=false) {
echo s($var, $strip);
function p($var, $obsolete = false) {
echo s($var, $obsolete);
}
/**
@ -1378,6 +1372,7 @@ function format_text($text, $format=FORMAT_MOODLE, $options=NULL, $courseid=NULL
}
break;
}
$text = auto_translate_content($text);
if (empty($options->nocache) and !empty($CFG->cachetext) and $CFG->currenttextiscacheable) {
if (CLI_SCRIPT) {
@ -1506,6 +1501,7 @@ function format_string ($string, $striplinks=true, $courseid=NULL ) {
}
$string = clean_text($string);
}
$string = auto_translate_content($string);
//Store to cache
$strcache[$md5] = $string;