mirror of
https://github.com/moodle/moodle.git
synced 2025-04-22 08:55:15 +02:00
Initial commit
This commit is contained in:
parent
1cd1252752
commit
682d403259
70
blocks/search/block_search.php
Normal file
70
blocks/search/block_search.php
Normal file
@ -0,0 +1,70 @@
|
||||
<?php
|
||||
|
||||
/* This is the global search shortcut block - a single query can be entered, and
|
||||
the user will be redirected to the query page where they can enter more
|
||||
advanced queries, and view the results of their search. When searching from
|
||||
this block, the broadest possible selection of documents is searched.
|
||||
|
||||
Author: Michael Champanis (mchampan)
|
||||
Date: 2006 06 23
|
||||
|
||||
Todo: make strings -> get_string()
|
||||
*/
|
||||
|
||||
class block_search extends block_base {
|
||||
|
||||
function init() {
|
||||
$this->title = "Global Search"; //get_string()
|
||||
$this->version = 20060625;
|
||||
} //init
|
||||
|
||||
// only one instance of this block is required
|
||||
function instance_allow_multiple() {
|
||||
return false;
|
||||
} //instance_allow_multiple
|
||||
|
||||
// label and button values can be set in admin
|
||||
function has_config() {
|
||||
return true;
|
||||
} //has_config
|
||||
|
||||
function get_content() {
|
||||
global $CFG;
|
||||
|
||||
//cache block contents
|
||||
if ($this->content !== NULL) {
|
||||
return $this->content;
|
||||
} //if
|
||||
|
||||
$this->content = new stdClass;
|
||||
|
||||
//lazy check for the moment
|
||||
if (check_php_version("5.0.0")) {
|
||||
//fetch values if defined in admin, otherwise use defaults
|
||||
$label = (isset($CFG->block_search_text)) ? $CFG->block_search_text : "Search Moodle";
|
||||
$button = (isset($CFG->block_search_button)) ? $CFG->block_search_button : "Go";
|
||||
|
||||
//basic search form
|
||||
$this->content->text =
|
||||
'<form name="query" method="post" action="search/query.php">'
|
||||
. "<label for=''>$label</label>"
|
||||
. '<input type="text" name="query_string" length="50" value=""/>'
|
||||
. '<input type="submit" value="'.$button.'"/>'
|
||||
. '</form>';
|
||||
} else {
|
||||
$this->content->text = "Sorry folks, PHP 5 is needed for the new search module.";
|
||||
} //else
|
||||
|
||||
//no footer, thanks
|
||||
$this->content->footer = '';
|
||||
|
||||
return $this->content;
|
||||
} //get_content
|
||||
|
||||
function specialisation() {
|
||||
//empty!
|
||||
} //specialisation
|
||||
|
||||
} //block_search
|
||||
|
||||
?>
|
19
blocks/search/config_global.html
Normal file
19
blocks/search/config_global.html
Normal file
@ -0,0 +1,19 @@
|
||||
<div style="text-align:center;">
|
||||
<label for="block_search_text">Search label</label>
|
||||
<input type="text" name="block_search_text" value="<?php
|
||||
if(isset($CFG->block_search_text)) {
|
||||
p($CFG->block_search_text);
|
||||
} else {
|
||||
p("Search Moodle");
|
||||
} ?>"/><br>
|
||||
|
||||
<label for="block_search_button">Button label</label>
|
||||
<input type="text" name="block_search_button" value="<?php
|
||||
if(isset($CFG->block_search_button)) {
|
||||
p($CFG->block_search_button);
|
||||
} else {
|
||||
p("Go");
|
||||
} ?>"/><br><br>
|
||||
|
||||
<input type="submit" value="<?php print_string('savechanges'); ?>" />
|
||||
</div>
|
120
mod/wiki/lib.php
120
mod/wiki/lib.php
@ -352,6 +352,126 @@ function wiki_get_entries(&$wiki, $byindex=NULL) {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*==== Global search modifications
|
||||
* Author: Michael Champanis (mchampan)
|
||||
* Last date: 2006 06 25
|
||||
* These modifications allow wiki documents to be indexed in the new
|
||||
* search engine module - they are probably not final, and as such
|
||||
* shouldn't be used by other stuff for the time being
|
||||
**/
|
||||
|
||||
//rescued and converted from ewikimoodlelib.php
|
||||
//retrieves latest version of a page
|
||||
function wiki_get_latest_page(&$entry, $pagename, $version=0) {
|
||||
global $CFG;
|
||||
|
||||
//need something like this in datalib.php?
|
||||
switch ($CFG->dbtype) {
|
||||
case 'mysql':
|
||||
$f = 'mysql_real_escape_string';
|
||||
break;
|
||||
case 'postgres7':
|
||||
$f = 'pg_escape_string';
|
||||
break;
|
||||
default:
|
||||
$f = 'addslashes';
|
||||
} //switch
|
||||
|
||||
$pagename = "'".$f($pagename)."'";
|
||||
|
||||
if ($version > 0 and is_int($version)) {
|
||||
$version = "AND (version=$version)";
|
||||
} else {
|
||||
$version = '';
|
||||
} //else
|
||||
|
||||
$select = "(pagename=$pagename) AND wiki=".$entry->id." $version ";
|
||||
$sort = 'version DESC';
|
||||
|
||||
//change this to recordset_select, as per http://docs.moodle.org/en/Datalib_Notes
|
||||
if ($result_arr = get_records_select('wiki_pages', $select, $sort, '*', 0, 1)) {
|
||||
foreach ($result_arr as $obj) {
|
||||
$result_obj = $obj;
|
||||
} //foreach
|
||||
} //if
|
||||
|
||||
if (isset($result_obj)) {
|
||||
$result_obj->meta = @unserialize($result_obj->meta);
|
||||
return $result_obj;
|
||||
} else {
|
||||
return false;
|
||||
} //else
|
||||
} //wiki_get_latest_page
|
||||
|
||||
//fetches all pages, including old versions
|
||||
function wiki_get_pages(&$entry) {
|
||||
return get_records('wiki_pages', 'wiki', $entry->id);
|
||||
} //wiki_get_pages
|
||||
|
||||
//fetches all the latest versions of all the pages
|
||||
function wiki_get_latest_pages(&$entry) {
|
||||
//== (My)SQL for this
|
||||
/* select * from wiki_pages
|
||||
inner join
|
||||
(select wiki_pages.pagename, max(wiki_pages.version) as ver
|
||||
from wiki_pages group by pagename) as a
|
||||
on ((wiki_pages.version = a.ver) and
|
||||
(wiki_pages.pagename like a.pagename)) */
|
||||
|
||||
$pages = array();
|
||||
|
||||
//http://moodle.org/bugs/bug.php?op=show&bugid=5877&pos=0
|
||||
//if ($ids = get_records('wiki_pages', 'wiki', $entry->id, '', 'distinct pagename')) {
|
||||
if ($rs = get_recordset('wiki_pages', 'wiki', $entry->id, '', 'distinct pagename')) {
|
||||
$ids = $rs->GetRows();
|
||||
//--
|
||||
foreach ($ids as $id) {
|
||||
$pages[] = wiki_get_latest_page($entry, $id[0]);
|
||||
} //foreach
|
||||
} else {
|
||||
return false;
|
||||
} //else
|
||||
|
||||
return $pages;
|
||||
} //wiki_get_latest_pages
|
||||
|
||||
function wiki_iterator() {
|
||||
return get_all_instances_in_courses("wiki", get_courses());
|
||||
} //wiki_search_index
|
||||
|
||||
function wiki_get_content_for_index(&$wiki) {
|
||||
$documents = array();
|
||||
|
||||
$entries = wiki_get_entries($wiki);
|
||||
foreach($entries as $entry) {
|
||||
//all pages
|
||||
//$pages = wiki_get_pages($entry);
|
||||
|
||||
//latest pages
|
||||
$pages = wiki_get_latest_pages($entry);
|
||||
$i = 0;
|
||||
|
||||
if (is_array($pages)) {
|
||||
foreach($pages as $page) {
|
||||
if (strlen($page->content) > 0) {
|
||||
$i++;
|
||||
$documents[] = new WikiSearchDocument($page, $entry->wikiid, $entry->course, $entry->userid, $entry->groupid);
|
||||
} //if
|
||||
} //foreach
|
||||
|
||||
//print "$entry->id : $i"; print "<br>";
|
||||
} else {
|
||||
print $pages;
|
||||
} //else
|
||||
} //foreach
|
||||
|
||||
return $documents;
|
||||
} //wiki_get_content_for_index
|
||||
|
||||
/*==== Global search modifications end */
|
||||
|
||||
|
||||
function wiki_get_default_entry(&$wiki, &$course, $userid=0, $groupid=0) {
|
||||
/// Returns the wiki entry according to the wiki type.
|
||||
/// Optionally, will return wiki entry for $userid student wiki, or
|
||||
|
22
search/README.txt
Normal file
22
search/README.txt
Normal file
@ -0,0 +1,22 @@
|
||||
This is the initial release (prototype) of Moodle's new search module -
|
||||
so basically watch out for sharp edges.
|
||||
|
||||
The structure has not been finalised, but this is what is working at the
|
||||
moment, when I start looking at other content to index, it will most likely
|
||||
change. I don't recommend trying to make your own content modules indexable,
|
||||
at least not until the whole flow is finalised. I will be implementing the
|
||||
functions needed to index all of the default content modules on Moodle, so
|
||||
expect that around mid-August.
|
||||
|
||||
Wiki pages were my goal for this release, they can be indexed and searched,
|
||||
but not updated or deleted at this stage (was waiting for ZF 0.14 actually).
|
||||
|
||||
I need to check the PostgreSQL sql file, I don't have a PG7 install lying
|
||||
around to test on, so the script is untested.
|
||||
|
||||
To index for the first time, login as an admin user and browse to /search/index.php
|
||||
or /search/stats.php - there will be a message and a link telling you to go index.
|
||||
|
||||
-- Michael Champanis (mchampan)
|
||||
cynnical@gmail.com
|
||||
Summer of Code 2006
|
30
search/Zend/Exception.php
Executable file
30
search/Zend/Exception.php
Executable file
@ -0,0 +1,30 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Exception extends Exception
|
||||
{}
|
||||
|
15
search/Zend/IMPORTANT.txt
Normal file
15
search/Zend/IMPORTANT.txt
Normal file
@ -0,0 +1,15 @@
|
||||
We are running cutting-edge (i.e. HEAD) Zend Framework:
|
||||
URL: http://framework.zend.com/svn/framework/trunk
|
||||
Revision: 696
|
||||
Last Changed Rev: 696
|
||||
Last Changed Date: 2006-06-23 02:14:54 +0200 (Fri, 23 Jun 2006)
|
||||
|
||||
This Zend Framework present in this directory only contains the minimum
|
||||
to run Zend_Search_Lucene - I don't foresee any problems, since the license
|
||||
is new BSD...
|
||||
|
||||
To obtain a full Zend Framework package, please visit:
|
||||
http://framework.zend.com/
|
||||
|
||||
Or alternatively check it out from SVN:
|
||||
svn checkout http://framework.zend.com/svn/framework/trunk
|
27
search/Zend/LICENSE.txt
Normal file
27
search/Zend/LICENSE.txt
Normal file
@ -0,0 +1,27 @@
|
||||
Copyright (c) 2006, Zend Technologies USA, Inc.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification,
|
||||
are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Zend Technologies USA, Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from this
|
||||
software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
36
search/Zend/Search/Exception.php
Normal file
36
search/Zend/Search/Exception.php
Normal file
@ -0,0 +1,36 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Framework base exception
|
||||
*/
|
||||
require_once 'Zend/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Exception extends Zend_Exception
|
||||
{}
|
||||
|
614
search/Zend/Search/Lucene.php
Normal file
614
search/Zend/Search/Lucene.php
Normal file
@ -0,0 +1,614 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_Document */
|
||||
require_once 'Zend/Search/Lucene/Document.php';
|
||||
|
||||
/** Zend_Search_Lucene_Storage_Directory */
|
||||
require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_Term */
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_TermInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/TermInfo.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_FieldInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/FieldInfo.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_Writer */
|
||||
require_once 'Zend/Search/Lucene/Index/Writer.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryParser */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryParser.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryHit */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryHit.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Similarity */
|
||||
require_once 'Zend/Search/Lucene/Search/Similarity.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene
|
||||
{
|
||||
/**
|
||||
* File system adapter.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_Directory
|
||||
*/
|
||||
private $_directory = null;
|
||||
|
||||
/**
|
||||
* File system adapter closing option
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_closeDirOnExit = true;
|
||||
|
||||
/**
|
||||
* Writer for this index, not instantiated unless required.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Writer
|
||||
*/
|
||||
private $_writer = null;
|
||||
|
||||
/**
|
||||
* Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
|
||||
*
|
||||
* @var array Zend_Search_Lucene_Index_SegmentInfo
|
||||
*/
|
||||
private $_segmentInfos = array();
|
||||
|
||||
/**
|
||||
* Number of documents in this index.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_docCount = 0;
|
||||
|
||||
/**
|
||||
* Flag for index changes
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_hasChanges = false;
|
||||
|
||||
/**
|
||||
* Opens the index.
|
||||
*
|
||||
* IndexReader constructor needs Directory as a parameter. It should be
|
||||
* a string with a path to the index folder or a Directory object.
|
||||
*
|
||||
* @param mixed $directory
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct($directory = null, $create = false)
|
||||
{
|
||||
if ($directory === null) {
|
||||
throw new Zend_Search_Exception('No index directory specified');
|
||||
}
|
||||
|
||||
if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
|
||||
$this->_directory = $directory;
|
||||
$this->_closeDirOnExit = false;
|
||||
} else {
|
||||
$this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
|
||||
$this->_closeDirOnExit = true;
|
||||
}
|
||||
|
||||
if ($create) {
|
||||
$this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, true);
|
||||
} else {
|
||||
$this->_writer = null;
|
||||
}
|
||||
|
||||
$this->_segmentInfos = array();
|
||||
|
||||
$segmentsFile = $this->_directory->getFileObject('segments');
|
||||
|
||||
$format = $segmentsFile->readInt();
|
||||
|
||||
if ($format != (int)0xFFFFFFFF) {
|
||||
throw new Zend_Search_Lucene_Exception('Wrong segments file format');
|
||||
}
|
||||
|
||||
// read version
|
||||
$segmentsFile->readLong();
|
||||
|
||||
// read counter
|
||||
$segmentsFile->readInt();
|
||||
|
||||
$segments = $segmentsFile->readInt();
|
||||
|
||||
$this->_docCount = 0;
|
||||
|
||||
// read segmentInfos
|
||||
for ($count = 0; $count < $segments; $count++) {
|
||||
$segName = $segmentsFile->readString();
|
||||
$segSize = $segmentsFile->readInt();
|
||||
$this->_docCount += $segSize;
|
||||
|
||||
$this->_segmentInfos[$count] =
|
||||
new Zend_Search_Lucene_Index_SegmentInfo($segName,
|
||||
$segSize,
|
||||
$this->_directory);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Object destructor
|
||||
*/
|
||||
public function __destruct()
|
||||
{
|
||||
$this->commit();
|
||||
|
||||
if ($this->_closeDirOnExit) {
|
||||
$this->_directory->close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an instance of Zend_Search_Lucene_Index_Writer for the index
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_Writer
|
||||
*/
|
||||
public function getIndexWriter()
|
||||
{
|
||||
if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {
|
||||
$this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory);
|
||||
}
|
||||
|
||||
return $this->_writer;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
|
||||
*
|
||||
* @return Zend_Search_Lucene_Storage_Directory
|
||||
*/
|
||||
public function getDirectory()
|
||||
{
|
||||
return $this->_directory;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the total number of documents in this index.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function count()
|
||||
{
|
||||
return $this->_docCount;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Performs a query against the index and returns an array
|
||||
* of Zend_Search_Lucene_Search_QueryHit objects.
|
||||
* Input is a string or Zend_Search_Lucene_Search_Query.
|
||||
*
|
||||
* @param mixed $query
|
||||
* @return array ZSearchHit
|
||||
*/
|
||||
public function find($query)
|
||||
{
|
||||
if (is_string($query)) {
|
||||
$query = Zend_Search_Lucene_Search_QueryParser::parse($query);
|
||||
}
|
||||
|
||||
if (!$query instanceof Zend_Search_Lucene_Search_Query) {
|
||||
throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object');
|
||||
}
|
||||
|
||||
$this->commit();
|
||||
|
||||
$hits = array();
|
||||
$scores = array();
|
||||
|
||||
$docNum = $this->count();
|
||||
for( $count=0; $count < $docNum; $count++ ) {
|
||||
$docScore = $query->score( $count, $this);
|
||||
if( $docScore != 0 ) {
|
||||
$hit = new Zend_Search_Lucene_Search_QueryHit($this);
|
||||
$hit->id = $count;
|
||||
$hit->score = $docScore;
|
||||
|
||||
$hits[] = $hit;
|
||||
$scores[] = $docScore;
|
||||
}
|
||||
}
|
||||
array_multisort($scores, SORT_DESC, SORT_REGULAR, $hits);
|
||||
|
||||
return $hits;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a list of all unique field names that exist in this index.
|
||||
*
|
||||
* @param boolean $indexed
|
||||
* @return array
|
||||
*/
|
||||
public function getFieldNames($indexed = false)
|
||||
{
|
||||
$result = array();
|
||||
foreach( $this->_segmentInfos as $segmentInfo ) {
|
||||
$result = array_merge($result, $segmentInfo->getFields($indexed));
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a Zend_Search_Lucene_Document object for the document
|
||||
* number $id in this index.
|
||||
*
|
||||
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
|
||||
* @return Zend_Search_Lucene_Document
|
||||
*/
|
||||
public function getDocument($id)
|
||||
{
|
||||
if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
|
||||
/* @var $id Zend_Search_Lucene_Search_QueryHit */
|
||||
$id = $id->id;
|
||||
}
|
||||
|
||||
if ($id >= $this->_docCount) {
|
||||
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
|
||||
}
|
||||
|
||||
$segCount = 0;
|
||||
$nextSegmentStartId = $this->_segmentInfos[ 0 ]->count();
|
||||
while( $nextSegmentStartId <= $id ) {
|
||||
$segCount++;
|
||||
$nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count();
|
||||
}
|
||||
$segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count();
|
||||
|
||||
$fdxFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdx');
|
||||
$fdxFile->seek( ($id-$segmentStartId)*8, SEEK_CUR );
|
||||
$fieldValuesPosition = $fdxFile->readLong();
|
||||
|
||||
$fdtFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdt');
|
||||
$fdtFile->seek( $fieldValuesPosition, SEEK_CUR );
|
||||
$fieldCount = $fdtFile->readVInt();
|
||||
|
||||
$doc = new Zend_Search_Lucene_Document();
|
||||
for( $count = 0; $count < $fieldCount; $count++ ) {
|
||||
$fieldNum = $fdtFile->readVInt();
|
||||
$bits = $fdtFile->readByte();
|
||||
|
||||
$fieldInfo = $this->_segmentInfos[ $segCount ]->getField($fieldNum);
|
||||
|
||||
if( !($bits & 2) ) { // Text data
|
||||
$field = new Zend_Search_Lucene_Field($fieldInfo->name,
|
||||
$fdtFile->readString(),
|
||||
true,
|
||||
$fieldInfo->isIndexed,
|
||||
$bits & 1 );
|
||||
} else {
|
||||
$field = new Zend_Search_Lucene_Field($fieldInfo->name,
|
||||
$fdtFile->readBinary(),
|
||||
true,
|
||||
$fieldInfo->isIndexed,
|
||||
$bits & 1 );
|
||||
}
|
||||
|
||||
$doc->addField($field);
|
||||
}
|
||||
|
||||
return $doc;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns an array of all the documents which contain term.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return array
|
||||
*/
|
||||
public function termDocs(Zend_Search_Lucene_Index_Term $term)
|
||||
{
|
||||
$result = array();
|
||||
$segmentStartDocId = 0;
|
||||
|
||||
foreach ($this->_segmentInfos as $segInfo) {
|
||||
$termInfo = $segInfo->getTermInfo($term);
|
||||
|
||||
if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
|
||||
$segmentStartDocId += $segInfo->count();
|
||||
continue;
|
||||
}
|
||||
|
||||
$frqFile = $segInfo->openCompoundFile('.frq');
|
||||
$frqFile->seek($termInfo->freqPointer,SEEK_CUR);
|
||||
$docId = 0;
|
||||
for( $count=0; $count < $termInfo->docFreq; $count++ ) {
|
||||
$docDelta = $frqFile->readVInt();
|
||||
if( $docDelta % 2 == 1 ) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
// read freq
|
||||
$frqFile->readVInt();
|
||||
}
|
||||
|
||||
$result[] = $segmentStartDocId + $docId;
|
||||
}
|
||||
|
||||
$segmentStartDocId += $segInfo->count();
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns an array of all term positions in the documents.
|
||||
* Return array structure: array( docId => array( pos1, pos2, ...), ...)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return array
|
||||
*/
|
||||
public function termPositions(Zend_Search_Lucene_Index_Term $term)
|
||||
{
|
||||
$result = array();
|
||||
$segmentStartDocId = 0;
|
||||
foreach( $this->_segmentInfos as $segInfo ) {
|
||||
$termInfo = $segInfo->getTermInfo($term);
|
||||
|
||||
if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
|
||||
$segmentStartDocId += $segInfo->count();
|
||||
continue;
|
||||
}
|
||||
|
||||
$frqFile = $segInfo->openCompoundFile('.frq');
|
||||
$frqFile->seek($termInfo->freqPointer,SEEK_CUR);
|
||||
$freqs = array();
|
||||
$docId = 0;
|
||||
|
||||
for( $count = 0; $count < $termInfo->docFreq; $count++ ) {
|
||||
$docDelta = $frqFile->readVInt();
|
||||
if( $docDelta % 2 == 1 ) {
|
||||
$docId += ($docDelta-1)/2;
|
||||
$freqs[ $docId ] = 1;
|
||||
} else {
|
||||
$docId += $docDelta/2;
|
||||
$freqs[ $docId ] = $frqFile->readVInt();
|
||||
}
|
||||
}
|
||||
|
||||
$prxFile = $segInfo->openCompoundFile('.prx');
|
||||
$prxFile->seek($termInfo->proxPointer,SEEK_CUR);
|
||||
foreach ($freqs as $docId => $freq) {
|
||||
$termPosition = 0;
|
||||
$positions = array();
|
||||
|
||||
for ($count = 0; $count < $freq; $count++ ) {
|
||||
$termPosition += $prxFile->readVInt();
|
||||
$positions[] = $termPosition;
|
||||
}
|
||||
|
||||
$result[ $segmentStartDocId + $docId ] = $positions;
|
||||
}
|
||||
|
||||
$segmentStartDocId += $segInfo->count();
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of documents in this index containing the $term.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return integer
|
||||
*/
|
||||
public function docFreq(Zend_Search_Lucene_Index_Term $term)
|
||||
{
|
||||
$result = 0;
|
||||
foreach ($this->_segmentInfos as $segInfo) {
|
||||
$termInfo = $segInfo->getTermInfo($term);
|
||||
if ($termInfo !== null) {
|
||||
$result += $termInfo->docFreq;
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Retrive similarity used by index reader
|
||||
*
|
||||
* @return Zend_Search_Lucene_Search_Similarity
|
||||
*/
|
||||
public function getSimilarity()
|
||||
{
|
||||
return Zend_Search_Lucene_Search_Similarity::getDefault();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a normalization factor for "field, document" pair.
|
||||
*
|
||||
* @param integer $id
|
||||
* @param string $fieldName
|
||||
* @return Zend_Search_Lucene_Document
|
||||
*/
|
||||
public function norm( $id, $fieldName )
|
||||
{
|
||||
if ($id >= $this->_docCount) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$segmentStartId = 0;
|
||||
foreach ($this->_segmentInfos as $segInfo) {
|
||||
if ($segmentStartId + $segInfo->count() > $id) {
|
||||
break;
|
||||
}
|
||||
|
||||
$segmentStartId += $segInfo->count();
|
||||
}
|
||||
|
||||
if ($segInfo->isDeleted($id - $segmentStartId)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return $segInfo->norm($id - $segmentStartId, $fieldName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if any documents have been deleted from this index.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasDeletions()
|
||||
{
|
||||
foreach ($this->_segmentInfos as $segmentInfo) {
|
||||
if ($segmentInfo->hasDeletions()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Deletes a document from the index.
|
||||
* $id is an internal document id
|
||||
*
|
||||
* @param integer|Zend_Search_Lucene_Search_QueryHit $id
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function delete($id)
|
||||
{
|
||||
if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
|
||||
/* @var $id Zend_Search_Lucene_Search_QueryHit */
|
||||
$id = $id->id;
|
||||
}
|
||||
|
||||
if ($id >= $this->_docCount) {
|
||||
throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
|
||||
}
|
||||
|
||||
$segCount = 0;
|
||||
$nextSegmentStartId = $this->_segmentInfos[ 0 ]->count();
|
||||
while( $nextSegmentStartId <= $id ) {
|
||||
$segCount++;
|
||||
$nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count();
|
||||
}
|
||||
|
||||
$this->_hasChanges = true;
|
||||
$segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count();
|
||||
$this->_segmentInfos[ $segCount ]->delete($id - $segmentStartId);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Adds a document to this index.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Document $document
|
||||
*/
|
||||
public function addDocument(Zend_Search_Lucene_Document $document)
|
||||
{
|
||||
if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {
|
||||
$this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory);
|
||||
}
|
||||
|
||||
$this->_writer->addDocument($document);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Commit changes resulting from delete() or undeleteAll() operations.
|
||||
*
|
||||
* @todo delete() and undeleteAll processing.
|
||||
*/
|
||||
public function commit()
|
||||
{
|
||||
if ($this->_hasChanges) {
|
||||
foreach ($this->_segmentInfos as $segInfo) {
|
||||
$segInfo->writeChanges();
|
||||
}
|
||||
|
||||
$this->_hasChanges = false;
|
||||
}
|
||||
|
||||
if ($this->_writer !== null) {
|
||||
foreach ($this->_writer->commit() as $segmentName => $segmentInfo) {
|
||||
if ($segmentInfo !== null) {
|
||||
$this->_segmentInfos[] = $segmentInfo;
|
||||
$this->_docCount += $segmentInfo->count();
|
||||
} else {
|
||||
foreach ($this->_segmentInfos as $segId => $segInfo) {
|
||||
if ($segInfo->getName() == $segmentName) {
|
||||
unset($this->_segmentInfos[$segId]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*************************************************************************
|
||||
@todo UNIMPLEMENTED
|
||||
*************************************************************************/
|
||||
|
||||
/**
|
||||
* Returns an array of all terms in this index.
|
||||
*
|
||||
* @todo Implementation
|
||||
* @return array
|
||||
*/
|
||||
public function terms()
|
||||
{
|
||||
return array();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Undeletes all documents currently marked as deleted in this index.
|
||||
*
|
||||
* @todo Implementation
|
||||
*/
|
||||
public function undeleteAll()
|
||||
{}
|
||||
}
|
96
search/Zend/Search/Lucene/Analysis/Analyzer.php
Normal file
96
search/Zend/Search/Lucene/Analysis/Analyzer.php
Normal file
@ -0,0 +1,96 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Token */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Token.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php';
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* An Analyzer is used to analyze text.
|
||||
* It thus represents a policy for extracting index terms from text.
|
||||
*
|
||||
* Note:
|
||||
* Lucene Java implementation is oriented to streams. It provides effective work
|
||||
* with a huge documents (more then 20Mb).
|
||||
* But engine itself is not oriented such documents.
|
||||
* Thus Zend_Search_Lucene analysis API works with data strings and sets (arrays).
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
abstract class Zend_Search_Lucene_Analysis_Analyzer
|
||||
{
|
||||
/**
|
||||
* The Analyzer implementation used by default.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Analysis_Analyzer
|
||||
*/
|
||||
static private $_defaultImpl;
|
||||
|
||||
/**
|
||||
* Tokenize text to a terms
|
||||
* Returns array of Zend_Search_Lucene_Analysis_Token objects
|
||||
*
|
||||
* @param string $data
|
||||
* @return array
|
||||
*/
|
||||
abstract public function tokenize($data);
|
||||
|
||||
|
||||
/**
|
||||
* Set the default Analyzer implementation used by indexing code.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Analysis_Analyzer $similarity
|
||||
*/
|
||||
static public function setDefault(Zend_Search_Lucene_Analysis_Analyzer $analyzer)
|
||||
{
|
||||
self::$_defaultImpl = $analyzer;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the default Analyzer implementation used by indexing code.
|
||||
*
|
||||
* @return Zend_Search_Lucene_Analysis_Analyzer
|
||||
*/
|
||||
static public function getDefault()
|
||||
{
|
||||
if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Analysis_Analyzer) {
|
||||
self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive();
|
||||
}
|
||||
|
||||
return self::$_defaultImpl;
|
||||
}
|
||||
|
||||
}
|
||||
|
75
search/Zend/Search/Lucene/Analysis/Analyzer/Common.php
Normal file
75
search/Zend/Search/Lucene/Analysis/Analyzer/Common.php
Normal file
@ -0,0 +1,75 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
|
||||
|
||||
/**
|
||||
* Common implementation of the Zend_Search_Lucene_Analysis_Analyzer interface.
|
||||
* There are several standard standard subclasses provided by Zend_Search_Lucene/Analysis
|
||||
* subpackage: Zend_Search_Lucene_Analysis_Analyzer_Common_Text, ZSearchHTMLAnalyzer, ZSearchXMLAnalyzer.
|
||||
*
|
||||
* @todo ZSearchHTMLAnalyzer and ZSearchXMLAnalyzer implementation
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Analysis_Analyzer_Common extends Zend_Search_Lucene_Analysis_Analyzer
|
||||
{
|
||||
/**
|
||||
* The set of Token filters applied to the Token stream.
|
||||
* Array of Zend_Search_Lucene_Analysis_TokenFilter objects.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_filters = array();
|
||||
|
||||
/**
|
||||
* Add Token filter to the Analyzer
|
||||
*
|
||||
* @param Zend_Search_Lucene_Analysis_TokenFilter $filter
|
||||
*/
|
||||
public function addFilter(Zend_Search_Lucene_Analysis_TokenFilter $filter)
|
||||
{
|
||||
$this->_filters[] = $filter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply filters to the token.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Analysis_Token $token
|
||||
* @return Zend_Search_Lucene_Analysis_Token
|
||||
*/
|
||||
public function normalize(Zend_Search_Lucene_Analysis_Token $token)
|
||||
{
|
||||
foreach ($this->_filters as $filter) {
|
||||
$token = $filter->normalize($token);
|
||||
}
|
||||
|
||||
return $token;
|
||||
}
|
||||
}
|
||||
|
78
search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php
Normal file
78
search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php
Normal file
@ -0,0 +1,78 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
class Zend_Search_Lucene_Analysis_Analyzer_Common_Text extends Zend_Search_Lucene_Analysis_Analyzer_Common
|
||||
{
|
||||
/**
|
||||
* Tokenize text to a terms
|
||||
* Returns array of Zend_Search_Lucene_Analysis_Token objects
|
||||
*
|
||||
* @param string $data
|
||||
* @return array
|
||||
*/
|
||||
public function tokenize($data)
|
||||
{
|
||||
$tokenStream = array();
|
||||
|
||||
$position = 0;
|
||||
while ($position < strlen($data)) {
|
||||
// skip white space
|
||||
while ($position < strlen($data) && !ctype_alpha( $data{$position} )) {
|
||||
$position++;
|
||||
}
|
||||
|
||||
$termStartPosition = $position;
|
||||
|
||||
// read token
|
||||
while ($position < strlen($data) && ctype_alpha( $data{$position} )) {
|
||||
$position++;
|
||||
}
|
||||
|
||||
// Empty token, end of stream.
|
||||
if ($position == $termStartPosition) {
|
||||
break;
|
||||
}
|
||||
|
||||
$token = new Zend_Search_Lucene_Analysis_Token(substr($data,
|
||||
$termStartPosition,
|
||||
$position-$termStartPosition),
|
||||
$termStartPosition,
|
||||
$position);
|
||||
$tokenStream[] = $this->normalize($token);
|
||||
}
|
||||
|
||||
return $tokenStream;
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,46 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_TokenFilter_LowerCase */
|
||||
require_once 'Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
class Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive extends Zend_Search_Lucene_Analysis_Analyzer_Common_Text
|
||||
{
|
||||
public function __construct()
|
||||
{
|
||||
$this->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_LowerCase());
|
||||
}
|
||||
}
|
||||
|
171
search/Zend/Search/Lucene/Analysis/Token.php
Normal file
171
search/Zend/Search/Lucene/Analysis/Token.php
Normal file
@ -0,0 +1,171 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Analysis_Token
|
||||
{
|
||||
/**
|
||||
* The text of the term.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_termText;
|
||||
|
||||
/**
|
||||
* Start in source text.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_startOffset;
|
||||
|
||||
/**
|
||||
* End in source text
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_endOffset;
|
||||
|
||||
/**
|
||||
* Lexical type.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_type;
|
||||
|
||||
/**
|
||||
* The position of this token relative to the previous Token.
|
||||
*
|
||||
* The default value is one.
|
||||
*
|
||||
* Some common uses for this are:
|
||||
* Set it to zero to put multiple terms in the same position. This is
|
||||
* useful if, e.g., a word has multiple stems. Searches for phrases
|
||||
* including either stem will match. In this case, all but the first stem's
|
||||
* increment should be set to zero: the increment of the first instance
|
||||
* should be one. Repeating a token with an increment of zero can also be
|
||||
* used to boost the scores of matches on that token.
|
||||
*
|
||||
* Set it to values greater than one to inhibit exact phrase matches.
|
||||
* If, for example, one does not want phrases to match across removed stop
|
||||
* words, then one could build a stop word filter that removes stop words and
|
||||
* also sets the increment to the number of stop words removed before each
|
||||
* non-stop word. Then exact phrase queries will only match when the terms
|
||||
* occur with no intervening stop words.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_positionIncrement;
|
||||
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
*
|
||||
* @param string $text
|
||||
* @param integer $start
|
||||
* @param integer $end
|
||||
* @param string $type
|
||||
*/
|
||||
public function __construct($text, $start, $end, $type = 'word' )
|
||||
{
|
||||
$this->_termText = $text;
|
||||
$this->_startOffset = $start;
|
||||
$this->_endOffset = $end;
|
||||
$this->_type = $type;
|
||||
|
||||
$this->_positionIncrement = 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* positionIncrement setter
|
||||
*
|
||||
* @param integer $positionIncrement
|
||||
*/
|
||||
public function setPositionIncrement($positionIncrement)
|
||||
{
|
||||
$this->_positionIncrement = $positionIncrement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the position increment of this Token.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getPositionIncrement()
|
||||
{
|
||||
return $this->_positionIncrement;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the Token's term text.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getTermText()
|
||||
{
|
||||
return $this->_termText;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this Token's starting offset, the position of the first character
|
||||
* corresponding to this token in the source text.
|
||||
*
|
||||
* Note:
|
||||
* The difference between getEndOffset() and getStartOffset() may not be equal
|
||||
* to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
|
||||
* by a stemmer or some other filter.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getStartOffset()
|
||||
{
|
||||
return $this->_startOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this Token's ending offset, one greater than the position of the
|
||||
* last character corresponding to this token in the source text.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getEndOffset()
|
||||
{
|
||||
return $this->_endOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns this Token's lexical type. Defaults to 'word'.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getType()
|
||||
{
|
||||
return $this->_type;
|
||||
}
|
||||
}
|
||||
|
47
search/Zend/Search/Lucene/Analysis/TokenFilter.php
Normal file
47
search/Zend/Search/Lucene/Analysis/TokenFilter.php
Normal file
@ -0,0 +1,47 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Token */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Token.php';
|
||||
|
||||
|
||||
/**
|
||||
* Token filter converts (normalizes) Token ore removes it from a token stream.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
abstract class Zend_Search_Lucene_Analysis_TokenFilter
|
||||
{
|
||||
/**
|
||||
* Normalize Token or remove it (if null is returned)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Analysis_Token $srcToken
|
||||
* @return Zend_Search_Lucene_Analysis_Token
|
||||
*/
|
||||
abstract public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken);
|
||||
}
|
||||
|
57
search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php
Normal file
57
search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php
Normal file
@ -0,0 +1,57 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_TokenFilter */
|
||||
require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
|
||||
|
||||
|
||||
/**
|
||||
* Lower case Token filter.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Analysis
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
class Zend_Search_Lucene_Analysis_TokenFilter_LowerCase extends Zend_Search_Lucene_Analysis_TokenFilter
|
||||
{
|
||||
/**
|
||||
* Normalize Token or remove it (if null is returned)
|
||||
*
|
||||
* @param Zend_Search_Lucene_Analysis_Token $srcToken
|
||||
* @return Zend_Search_Lucene_Analysis_Token
|
||||
*/
|
||||
public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
|
||||
{
|
||||
$newToken = new Zend_Search_Lucene_Analysis_Token(strtolower( $srcToken->getTermText() ),
|
||||
$srcToken->getStartOffset(),
|
||||
$srcToken->getEndOffset(),
|
||||
$srcToken->getType());
|
||||
|
||||
$newToken->setPositionIncrement($srcToken->getPositionIncrement());
|
||||
|
||||
return $newToken;
|
||||
}
|
||||
}
|
||||
|
111
search/Zend/Search/Lucene/Document.php
Normal file
111
search/Zend/Search/Lucene/Document.php
Normal file
@ -0,0 +1,111 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Field */
|
||||
require_once 'Zend/Search/Lucene/Field.php';
|
||||
|
||||
|
||||
/**
|
||||
* A Document is a set of fields. Each field has a name and a textual value.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Document
|
||||
{
|
||||
|
||||
/**
|
||||
* Associative array Zend_Search_Lucene_Field objects where the keys to the
|
||||
* array are the names of the fields.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_fields = array();
|
||||
|
||||
public $boost = 1.0;
|
||||
|
||||
|
||||
/**
|
||||
* Proxy method for getFieldValue(), provides more convenient access to
|
||||
* the string value of a field.
|
||||
*
|
||||
* @param $offset
|
||||
* @return string
|
||||
*/
|
||||
public function __get($offset)
|
||||
{
|
||||
return $this->getFieldValue($offset);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add a field object to this document.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Field $field
|
||||
*/
|
||||
public function addField(Zend_Search_Lucene_Field $field)
|
||||
{
|
||||
$this->_fields[$field->name] = $field;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return an array with the names of the fields in this document.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getFieldNames()
|
||||
{
|
||||
return array_keys($this->_fields);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns Zend_Search_Lucene_Field object for a named field in this document.
|
||||
*
|
||||
* @param string $fieldName
|
||||
* @return Zend_Search_Lucene_Field
|
||||
*/
|
||||
public function getField($fieldName)
|
||||
{
|
||||
if (!array_key_exists($fieldName, $this->_fields)) {
|
||||
throw new Zend_Search_Lucene_Exception("Field name \"$fieldName\" not found in document.");
|
||||
}
|
||||
return $this->_fields[$fieldName];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the string value of a named field in this document.
|
||||
*
|
||||
* @see __get()
|
||||
* @return string
|
||||
*/
|
||||
public function getFieldValue($fieldName)
|
||||
{
|
||||
return $this->getField($fieldName)->stringValue;
|
||||
}
|
||||
|
||||
}
|
32
search/Zend/Search/Lucene/EncodingConverter.php
Normal file
32
search/Zend/Search/Lucene/EncodingConverter.php
Normal file
@ -0,0 +1,32 @@
|
||||
<?php
|
||||
|
||||
class EncodingConverter {
|
||||
private $last_error,
|
||||
$in_encoding,
|
||||
$out_encoding;
|
||||
|
||||
function __construct($in_encoding, $out_encoding) {
|
||||
$this->in_encoding = $in_encoding;
|
||||
$this->out_encoding = $out_encoding;
|
||||
} //constructor
|
||||
|
||||
function handleError($err, $msg) {
|
||||
$this->last_error = $msg;
|
||||
} //handleError
|
||||
|
||||
function convert($str) {
|
||||
$this->last_error = FALSE;
|
||||
|
||||
set_error_handler(array(&$this, 'handleError'));
|
||||
$ret = iconv($this->in_encoding, $this->out_encoding, $str);
|
||||
restore_error_handler();
|
||||
|
||||
return $ret;
|
||||
} //convert
|
||||
|
||||
function getLastError() {
|
||||
return $this->last_error;
|
||||
} //getLastError
|
||||
} //EncodingConverter
|
||||
|
||||
?>
|
36
search/Zend/Search/Lucene/Exception.php
Normal file
36
search/Zend/Search/Lucene/Exception.php
Normal file
@ -0,0 +1,36 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Framework base exception
|
||||
*/
|
||||
require_once 'Zend/Search/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Exception extends Zend_Search_Exception
|
||||
{}
|
||||
|
161
search/Zend/Search/Lucene/Field.php
Normal file
161
search/Zend/Search/Lucene/Field.php
Normal file
@ -0,0 +1,161 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* A field is a section of a Document. Each field has two parts,
|
||||
* a name and a value. Values may be free text or they may be atomic
|
||||
* keywords, which are not further processed. Such keywords may
|
||||
* be used to represent dates, urls, etc. Fields are optionally
|
||||
* stored in the index, so that they may be returned with hits
|
||||
* on the document.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Document
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
//require_once('EncodingConverter.php');
|
||||
|
||||
class Zend_Search_Lucene_Field
|
||||
{
|
||||
public $kind;
|
||||
|
||||
public $name = 'body';
|
||||
public $stringValue = null;
|
||||
public $isStored = false;
|
||||
public $isIndexed = true;
|
||||
public $isTokenized = true;
|
||||
public $isBinary = false;
|
||||
|
||||
public $storeTermVector = false;
|
||||
|
||||
public $boost = 1.0;
|
||||
|
||||
public function __construct($name, $stringValue, $isStored, $isIndexed, $isTokenized, $isBinary = false)
|
||||
{
|
||||
$this->name = $name;
|
||||
|
||||
if (!$isBinary) {
|
||||
/*
|
||||
$econv = new EncodingConverter(mb_detect_encoding($stringValue), 'ASCII//TRANSLIT');
|
||||
$this->stringValue = $econv->convert($stringValue);
|
||||
|
||||
if ($econv->getLastError()) {
|
||||
echo "Error: ".$econv->getLastError();
|
||||
echo "<br>";
|
||||
echo "x".$stringValue."x";
|
||||
exit();
|
||||
} else {
|
||||
}*/
|
||||
|
||||
/**
|
||||
* @todo Correct UTF-8 string should be required in future
|
||||
* Until full UTF-8 support is not completed, string should be normalized to ANSII encoding
|
||||
*/
|
||||
|
||||
$this->stringValue = iconv('ISO-8859-1', 'ASCII//TRANSLIT', $stringValue);
|
||||
//$this->stringValue = iconv(mb_detect_encoding($stringValue), 'ASCII//TRANSLIT', $stringValue);
|
||||
} else {
|
||||
$this->stringValue = $stringValue;
|
||||
}
|
||||
$this->isStored = $isStored;
|
||||
$this->isIndexed = $isIndexed;
|
||||
$this->isTokenized = $isTokenized;
|
||||
$this->isBinary = $isBinary;
|
||||
|
||||
$this->storeTermVector = false;
|
||||
$this->boost = 1.0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a String-valued Field that is not tokenized, but is indexed
|
||||
* and stored. Useful for non-text fields, e.g. date or url.
|
||||
*
|
||||
* @param string $name
|
||||
* @param string $value
|
||||
* @return Zend_Search_Lucene_Field
|
||||
*/
|
||||
static public function Keyword($name, $value)
|
||||
{
|
||||
return new self($name, $value, true, true, false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a String-valued Field that is not tokenized nor indexed,
|
||||
* but is stored in the index, for return with hits.
|
||||
*
|
||||
* @param string $name
|
||||
* @param string $value
|
||||
* @return Zend_Search_Lucene_Field
|
||||
*/
|
||||
static public function UnIndexed($name, $value)
|
||||
{
|
||||
return new self($name, $value, true, false, false);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a Binary String valued Field that is not tokenized nor indexed,
|
||||
* but is stored in the index, for return with hits.
|
||||
*
|
||||
* @param string $name
|
||||
* @param string $value
|
||||
* @return Zend_Search_Lucene_Field
|
||||
*/
|
||||
static public function Binary($name, $value)
|
||||
{
|
||||
return new self($name, $value, true, false, false, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a String-valued Field that is tokenized and indexed,
|
||||
* and is stored in the index, for return with hits. Useful for short text
|
||||
* fields, like "title" or "subject". Term vector will not be stored for this field.
|
||||
*
|
||||
* @param string $name
|
||||
* @param string $value
|
||||
* @return Zend_Search_Lucene_Field
|
||||
*/
|
||||
static public function Text($name, $value)
|
||||
{
|
||||
return new self($name, $value, true, true, true);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs a String-valued Field that is tokenized and indexed,
|
||||
* but that is not stored in the index.
|
||||
*
|
||||
* @param string $name
|
||||
* @param string $value
|
||||
* @return Zend_Search_Lucene_Field
|
||||
*/
|
||||
static public function UnStored($name, $value)
|
||||
{
|
||||
return new self($name, $value, false, true, true);
|
||||
}
|
||||
|
||||
}
|
||||
|
45
search/Zend/Search/Lucene/Index/FieldInfo.php
Normal file
45
search/Zend/Search/Lucene/Index/FieldInfo.php
Normal file
@ -0,0 +1,45 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_FieldInfo
|
||||
{
|
||||
public $name;
|
||||
public $isIndexed;
|
||||
public $number;
|
||||
public $storeTermVector;
|
||||
|
||||
public function __construct( $name, $isIndexed, $number, $storeTermVector )
|
||||
{
|
||||
$this->name = $name;
|
||||
$this->isIndexed = $isIndexed;
|
||||
$this->number = $number;
|
||||
$this->storeTermVector = $storeTermVector;
|
||||
}
|
||||
}
|
||||
|
575
search/Zend/Search/Lucene/Index/SegmentInfo.php
Normal file
575
search/Zend/Search/Lucene/Index/SegmentInfo.php
Normal file
@ -0,0 +1,575 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_SegmentInfo
|
||||
{
|
||||
/**
|
||||
* Number of docs in a segment
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_docCount;
|
||||
|
||||
/**
|
||||
* Segment name
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_name;
|
||||
|
||||
/**
|
||||
* Term Dictionary Index
|
||||
* Array of the Zend_Search_Lucene_Index_Term objects
|
||||
* Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_termDictionary;
|
||||
|
||||
/**
|
||||
* Term Dictionary Index TermInfos
|
||||
* Array of the Zend_Search_Lucene_Index_TermInfo objects
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_termDictionaryInfos;
|
||||
|
||||
/**
|
||||
* Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_fields;
|
||||
|
||||
/**
|
||||
* Field positions in a dictionary.
|
||||
* (Term dictionary contains filelds ordered by names)
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_fieldsDicPositions;
|
||||
|
||||
|
||||
/**
|
||||
* Associative array where the key is the file name and the value is data offset
|
||||
* in a compound segment file (.csf).
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_segFiles;
|
||||
|
||||
/**
|
||||
* File system adapter.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_Directory_Filesystem
|
||||
*/
|
||||
private $_directory;
|
||||
|
||||
/**
|
||||
* Normalization factors.
|
||||
* An array fieldName => normVector
|
||||
* normVector is a binary string.
|
||||
* Each byte corresponds to an indexed document in a segment and
|
||||
* encodes normalization factor (float value, encoded by
|
||||
* Zend_Search_Lucene_Search_Similarity::encodeNorm())
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_norms = array();
|
||||
|
||||
/**
|
||||
* List of deleted documents.
|
||||
* bitset if bitset extension is loaded or array otherwise.
|
||||
*
|
||||
* @var mixed
|
||||
*/
|
||||
private $_deleted;
|
||||
|
||||
/**
|
||||
* $this->_deleted update flag
|
||||
*
|
||||
* @var boolean
|
||||
*/
|
||||
private $_deletedDirty = false;
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname,
|
||||
* Documents count and Directory as a parameter.
|
||||
*
|
||||
* @param string $name
|
||||
* @param integer $docCount
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
*/
|
||||
public function __construct($name, $docCount, $directory)
|
||||
{
|
||||
$this->_name = $name;
|
||||
$this->_docCount = $docCount;
|
||||
$this->_directory = $directory;
|
||||
$this->_termDictionary = null;
|
||||
|
||||
$this->_segFiles = array();
|
||||
if ($this->_directory->fileExists($name . '.cfs')) {
|
||||
$cfsFile = $this->_directory->getFileObject($name . '.cfs');
|
||||
$segFilesCount = $cfsFile->readVInt();
|
||||
|
||||
for ($count = 0; $count < $segFilesCount; $count++) {
|
||||
$dataOffset = $cfsFile->readLong();
|
||||
$fileName = $cfsFile->readString();
|
||||
$this->_segFiles[$fileName] = $dataOffset;
|
||||
}
|
||||
}
|
||||
|
||||
$fnmFile = $this->openCompoundFile('.fnm');
|
||||
$fieldsCount = $fnmFile->readVInt();
|
||||
$fieldNames = array();
|
||||
$fieldNums = array();
|
||||
$this->_fields = array();
|
||||
for ($count=0; $count < $fieldsCount; $count++) {
|
||||
$fieldName = $fnmFile->readString();
|
||||
$fieldBits = $fnmFile->readByte();
|
||||
$this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
|
||||
$fieldBits & 1,
|
||||
$count,
|
||||
$fieldBits & 2 );
|
||||
if ($fieldBits & 0x10) {
|
||||
// norms are omitted for the indexed field
|
||||
$this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
|
||||
}
|
||||
|
||||
$fieldNums[$count] = $count;
|
||||
$fieldNames[$count] = $fieldName;
|
||||
}
|
||||
array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
|
||||
$this->_fieldsDicPositions = array_flip($fieldNums);
|
||||
|
||||
try {
|
||||
$delFile = $this->openCompoundFile('.del');
|
||||
|
||||
$byteCount = $delFile->readInt();
|
||||
$byteCount = ceil($byteCount/8);
|
||||
$bitCount = $delFile->readInt();
|
||||
|
||||
if ($bitCount == 0) {
|
||||
$delBytes = '';
|
||||
} else {
|
||||
$delBytes = $delFile->readBytes($byteCount);
|
||||
}
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
$this->_deleted = $delBytes;
|
||||
} else {
|
||||
$this->_deleted = array();
|
||||
for ($count = 0; $count < $byteCount; $count++) {
|
||||
$byte = ord($delBytes{$count});
|
||||
for ($bit = 0; $bit < 8; $bit++) {
|
||||
if ($byte & (1<<$bit)) {
|
||||
$this->_deleted[$count*8 + $bit] = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
} catch(Zend_Search_Exception $e) {
|
||||
if (strpos($e->getMessage(), 'compound file doesn\'t contain') !== false ) {
|
||||
$this->_deleted = null;
|
||||
} else {
|
||||
throw $e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens index file stoted within compound index file
|
||||
*
|
||||
* @param string $extension
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
* @return Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
public function openCompoundFile($extension)
|
||||
{
|
||||
$filename = $this->_name . $extension;
|
||||
|
||||
// Try to open common file first
|
||||
if ($this->_directory->fileExists($filename)) {
|
||||
return $this->_directory->getFileObject($filename);
|
||||
}
|
||||
|
||||
if( !isset($this->_segFiles[$filename]) ) {
|
||||
throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
|
||||
. $filename . ' file.' );
|
||||
}
|
||||
|
||||
$file = $this->_directory->getFileObject( $this->_name.".cfs" );
|
||||
$file->seek($this->_segFiles[$filename]);
|
||||
return $file;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns field index or -1 if field is not found
|
||||
*
|
||||
* @param string $fieldName
|
||||
* @return integer
|
||||
*/
|
||||
public function getFieldNum($fieldName)
|
||||
{
|
||||
foreach( $this->_fields as $field ) {
|
||||
if( $field->name == $fieldName ) {
|
||||
return $field->number;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns field info for specified field
|
||||
*
|
||||
* @param integer $fieldNum
|
||||
* @return ZSearchFieldInfo
|
||||
*/
|
||||
public function getField($fieldNum)
|
||||
{
|
||||
return $this->_fields[$fieldNum];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns array of fields.
|
||||
* if $indexed parameter is true, then returns only indexed fields.
|
||||
*
|
||||
* @param boolean $indexed
|
||||
* @return array
|
||||
*/
|
||||
public function getFields($indexed = false)
|
||||
{
|
||||
$result = array();
|
||||
foreach( $this->_fields as $field ) {
|
||||
if( (!$indexed) || $field->isIndexed ) {
|
||||
$result[ $field->name ] = $field->name;
|
||||
}
|
||||
}
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of documents in this segment.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function count()
|
||||
{
|
||||
return $this->_docCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get field position in a fields dictionary
|
||||
*
|
||||
* @param integer $fieldNum
|
||||
* @return integer
|
||||
*/
|
||||
private function _getFieldPosition($fieldNum) {
|
||||
// Treat values which are not in a translation table as a 'direct value'
|
||||
return isset($this->_fieldsDicPositions[$fieldNum]) ?
|
||||
$this->_fieldsDicPositions[$fieldNum] : $fieldNum;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads Term dictionary from TermInfoIndex file
|
||||
*/
|
||||
protected function _loadDictionary()
|
||||
{
|
||||
if ($this->_termDictionary !== null) {
|
||||
return;
|
||||
}
|
||||
|
||||
$this->_termDictionary = array();
|
||||
$this->_termDictionaryInfos = array();
|
||||
|
||||
$tiiFile = $this->openCompoundFile('.tii');
|
||||
$tiVersion = $tiiFile->readInt();
|
||||
if ($tiVersion != (int)0xFFFFFFFE) {
|
||||
throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
|
||||
}
|
||||
|
||||
$indexTermCount = $tiiFile->readLong();
|
||||
$tiiFile->readInt(); // IndexInterval
|
||||
$skipInterval = $tiiFile->readInt();
|
||||
|
||||
$prevTerm = '';
|
||||
$freqPointer = 0;
|
||||
$proxPointer = 0;
|
||||
$indexPointer = 0;
|
||||
for ($count = 0; $count < $indexTermCount; $count++) {
|
||||
$termPrefixLength = $tiiFile->readVInt();
|
||||
$termSuffix = $tiiFile->readString();
|
||||
$termValue = substr( $prevTerm, 0, $termPrefixLength ) . $termSuffix;
|
||||
|
||||
$termFieldNum = $tiiFile->readVInt();
|
||||
$docFreq = $tiiFile->readVInt();
|
||||
$freqPointer += $tiiFile->readVInt();
|
||||
$proxPointer += $tiiFile->readVInt();
|
||||
if( $docFreq >= $skipInterval ) {
|
||||
$skipDelta = $tiiFile->readVInt();
|
||||
} else {
|
||||
$skipDelta = 0;
|
||||
}
|
||||
|
||||
$indexPointer += $tiiFile->readVInt();
|
||||
|
||||
$this->_termDictionary[] = new Zend_Search_Lucene_Index_Term($termValue,$termFieldNum);
|
||||
$this->_termDictionaryInfos[] =
|
||||
new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipDelta, $indexPointer);
|
||||
$prevTerm = $termValue;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return segment name
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function getName()
|
||||
{
|
||||
return $this->_name;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Scans terms dictionary and returns term info
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @return Zend_Search_Lucene_Index_TermInfo
|
||||
*/
|
||||
public function getTermInfo($term)
|
||||
{
|
||||
$this->_loadDictionary();
|
||||
|
||||
$searchField = $this->getFieldNum($term->field);
|
||||
|
||||
if ($searchField == -1) {
|
||||
return null;
|
||||
}
|
||||
$searchDicField = $this->_getFieldPosition($searchField);
|
||||
|
||||
// search for appropriate value in dictionary
|
||||
$lowIndex = 0;
|
||||
$highIndex = count($this->_termDictionary)-1;
|
||||
while ($highIndex >= $lowIndex) {
|
||||
// $mid = ($highIndex - $lowIndex)/2;
|
||||
$mid = ($highIndex + $lowIndex) >> 1;
|
||||
$midTerm = $this->_termDictionary[$mid];
|
||||
|
||||
$fieldNum = $this->_getFieldPosition($midTerm->field);
|
||||
$delta = $searchDicField - $fieldNum;
|
||||
if ($delta == 0) {
|
||||
$delta = strcmp($term->text, $midTerm->text);
|
||||
}
|
||||
|
||||
if ($delta < 0) {
|
||||
$highIndex = $mid-1;
|
||||
} elseif ($delta > 0) {
|
||||
$lowIndex = $mid+1;
|
||||
} else {
|
||||
return $this->_termDictionaryInfos[$mid]; // We got it!
|
||||
}
|
||||
}
|
||||
|
||||
if ($highIndex == -1) {
|
||||
// Term is out of the dictionary range
|
||||
return null;
|
||||
}
|
||||
|
||||
$prevPosition = $highIndex;
|
||||
$prevTerm = $this->_termDictionary[$prevPosition];
|
||||
$prevTermInfo = $this->_termDictionaryInfos[ $prevPosition ];
|
||||
|
||||
$tisFile = $this->openCompoundFile('.tis');
|
||||
$tiVersion = $tisFile->readInt();
|
||||
if ($tiVersion != (int)0xFFFFFFFE) {
|
||||
throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
|
||||
}
|
||||
|
||||
$termCount = $tisFile->readLong();
|
||||
$indexInterval = $tisFile->readInt();
|
||||
$skipInterval = $tisFile->readInt();
|
||||
|
||||
$tisFile->seek($prevTermInfo->indexPointer - 20 /* header size*/, SEEK_CUR);
|
||||
|
||||
$termValue = $prevTerm->text;
|
||||
$termFieldNum = $prevTerm->field;
|
||||
$freqPointer = $prevTermInfo->freqPointer;
|
||||
$proxPointer = $prevTermInfo->proxPointer;
|
||||
for ($count = $prevPosition*$indexInterval + 1;
|
||||
$count < $termCount &&
|
||||
( $this->_getFieldPosition($termFieldNum) < $searchDicField ||
|
||||
($this->_getFieldPosition($termFieldNum) == $searchDicField &&
|
||||
strcmp($termValue, $term->text) < 0) );
|
||||
$count++) {
|
||||
$termPrefixLength = $tisFile->readVInt();
|
||||
$termSuffix = $tisFile->readString();
|
||||
$termFieldNum = $tisFile->readVInt();
|
||||
$termValue = substr( $termValue, 0, $termPrefixLength ) . $termSuffix;
|
||||
|
||||
$docFreq = $tisFile->readVInt();
|
||||
$freqPointer += $tisFile->readVInt();
|
||||
$proxPointer += $tisFile->readVInt();
|
||||
if( $docFreq >= $skipInterval ) {
|
||||
$skipOffset = $tisFile->readVInt();
|
||||
} else {
|
||||
$skipOffset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if ($termFieldNum == $searchField && $termValue == $term->text) {
|
||||
return new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns normalization factor for specified documents
|
||||
*
|
||||
* @param integer $id
|
||||
* @param string $fieldName
|
||||
* @return string
|
||||
*/
|
||||
public function norm($id, $fieldName)
|
||||
{
|
||||
$fieldNum = $this->getFieldNum($fieldName);
|
||||
|
||||
if ( !($this->_fields[$fieldNum]->isIndexed) ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if ( !isset( $this->_norms[$fieldNum] )) {
|
||||
$fFile = $this->openCompoundFile('.f' . $fieldNum);
|
||||
$this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
|
||||
}
|
||||
|
||||
return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum]{$id}) );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if any documents have been deleted from this index segment.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function hasDeletions()
|
||||
{
|
||||
return $this->_deleted !== null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Deletes a document from the index segment.
|
||||
* $id is an internal document id
|
||||
*
|
||||
* @param integer
|
||||
*/
|
||||
public function delete($id)
|
||||
{
|
||||
$this->_deletedDirty = true;
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
if ($this->_deleted === null) {
|
||||
$this->_deleted = bitset_empty($id);
|
||||
}
|
||||
bitset_incl($this->_deleted, $id);
|
||||
} else {
|
||||
if ($this->_deleted === null) {
|
||||
$this->_deleted = array();
|
||||
}
|
||||
|
||||
$this->_deleted[$id] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks, that document is deleted
|
||||
*
|
||||
* @param integer
|
||||
* @return boolean
|
||||
*/
|
||||
public function isDeleted($id)
|
||||
{
|
||||
if ($this->_deleted === null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
return bitset_in($this->_deleted, $id);
|
||||
} else {
|
||||
return isset($this->_deleted[$id]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Write changes if it's necessary.
|
||||
*/
|
||||
public function writeChanges()
|
||||
{
|
||||
if (!$this->_deletedDirty) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (extension_loaded('bitset')) {
|
||||
$delBytes = $this->_deleted;
|
||||
$bitCount = count(bitset_to_array($delBytes));
|
||||
} else {
|
||||
$byteCount = floor($this->_docCount/8)+1;
|
||||
$delBytes = str_repeat(chr(0), $byteCount);
|
||||
for ($count = 0; $count < $byteCount; $count++) {
|
||||
$byte = 0;
|
||||
for ($bit = 0; $bit < 8; $bit++) {
|
||||
if (isset($this->_deleted[$count*8 + $bit])) {
|
||||
$byte |= (1<<$bit);
|
||||
}
|
||||
}
|
||||
$delBytes{$count} = chr($byte);
|
||||
}
|
||||
$bitCount = count($this->_deleted);
|
||||
}
|
||||
|
||||
|
||||
$delFile = $this->_directory->createFile($this->_name . '.del');
|
||||
$delFile->writeInt($this->_docCount);
|
||||
$delFile->writeInt($bitCount);
|
||||
$delFile->writeBytes($delBytes);
|
||||
|
||||
$this->_deletedDirty = false;
|
||||
}
|
||||
}
|
||||
|
519
search/Zend/Search/Lucene/Index/SegmentWriter.php
Normal file
519
search/Zend/Search/Lucene/Index/SegmentWriter.php
Normal file
@ -0,0 +1,519 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
/** Zend_Search_Lucene_Analysis_Analyzer */
|
||||
require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_SegmentWriter
|
||||
{
|
||||
/**
|
||||
* Expert: The fraction of terms in the "dictionary" which should be stored
|
||||
* in RAM. Smaller values use more memory, but make searching slightly
|
||||
* faster, while larger values use less memory and make searching slightly
|
||||
* slower. Searching is typically not dominated by dictionary lookup, so
|
||||
* tweaking this is rarely useful.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
static public $indexInterval = 128;
|
||||
|
||||
/** Expert: The fraction of TermDocs entries stored in skip tables.
|
||||
* Larger values result in smaller indexes, greater acceleration, but fewer
|
||||
* accelerable cases, while smaller values result in bigger indexes,
|
||||
* less acceleration and more
|
||||
* accelerable cases. More detailed experiments would be useful here.
|
||||
*
|
||||
* 0x0x7FFFFFFF indicates that we don't use skip data
|
||||
* Default value is 16
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
static public $skipInterval = 0x7FFFFFFF;
|
||||
|
||||
/**
|
||||
* Number of docs in a segment
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_docCount;
|
||||
|
||||
/**
|
||||
* Segment name
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_name;
|
||||
|
||||
/**
|
||||
* File system adapter.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_Directory
|
||||
*/
|
||||
private $_directory;
|
||||
|
||||
/**
|
||||
* List of the index files.
|
||||
* Used for automatic compound file generation
|
||||
*
|
||||
* @var unknown_type
|
||||
*/
|
||||
private $_files;
|
||||
|
||||
/**
|
||||
* Term Dictionary
|
||||
* Array of the Zend_Search_Lucene_Index_Term objects
|
||||
* Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_termDictionary;
|
||||
|
||||
/**
|
||||
* Documents, which contain the term
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_termDocs;
|
||||
|
||||
/**
|
||||
* Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_fields;
|
||||
|
||||
/**
|
||||
* Sizes of the indexed fields.
|
||||
* Used for normalization factors calculation.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_fieldLengths;
|
||||
|
||||
/**
|
||||
* '.fdx' file - Stored Fields, the field index.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
private $_fdxFile;
|
||||
|
||||
/**
|
||||
* '.fdt' file - Stored Fields, the field data.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
private $_fdtFile;
|
||||
|
||||
|
||||
/**
|
||||
* Object constructor.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
* @param string $name
|
||||
*/
|
||||
public function __construct($directory, $name)
|
||||
{
|
||||
$this->_directory = $directory;
|
||||
$this->_name = $name;
|
||||
$this->_docCount = 0;
|
||||
|
||||
$this->_fields = array();
|
||||
$this->_termDocs = array();
|
||||
$this->_files = array();
|
||||
$this->_norms = array();
|
||||
$this->_fieldLengths = array();
|
||||
$this->_termDictionary = array();
|
||||
|
||||
$this->_fdxFile = null;
|
||||
$this->_fdtFile = null;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add field to the segment
|
||||
*
|
||||
* @param Zend_Search_Lucene_Field $field
|
||||
*/
|
||||
private function _addFieldInfo(Zend_Search_Lucene_Field $field)
|
||||
{
|
||||
if (!isset($this->_fields[$field->name])) {
|
||||
$this->_fields[$field->name] =
|
||||
new Zend_Search_Lucene_Index_FieldInfo($field->name,
|
||||
$field->isIndexed,
|
||||
count($this->_fields),
|
||||
$field->storeTermVector);
|
||||
} else {
|
||||
$this->_fields[$field->name]->isIndexed |= $field->isIndexed;
|
||||
$this->_fields[$field->name]->storeTermVector |= $field->storeTermVector;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a document to this segment.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Document $document
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function addDocument(Zend_Search_Lucene_Document $document)
|
||||
{
|
||||
$storedFields = array();
|
||||
|
||||
foreach ($document->getFieldNames() as $fieldName) {
|
||||
$field = $document->getField($fieldName);
|
||||
$this->_addFieldInfo($field);
|
||||
|
||||
if ($field->storeTermVector) {
|
||||
/**
|
||||
* @todo term vector storing support
|
||||
*/
|
||||
throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
|
||||
}
|
||||
|
||||
if ($field->isIndexed) {
|
||||
if ($field->isTokenized) {
|
||||
$tokenList = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($field->stringValue);
|
||||
} else {
|
||||
$tokenList = array();
|
||||
$tokenList[] = new Zend_Search_Lucene_Analysis_Token($field->stringValue, 0, strlen($field->stringValue));
|
||||
}
|
||||
$this->_fieldLengths[$field->name][$this->_docCount] = count($tokenList);
|
||||
|
||||
$position = 0;
|
||||
foreach ($tokenList as $token) {
|
||||
$term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name);
|
||||
$termKey = $term->key();
|
||||
|
||||
if (!isset($this->_termDictionary[$termKey])) {
|
||||
// New term
|
||||
$this->_termDictionary[$termKey] = $term;
|
||||
$this->_termDocs[$termKey] = array();
|
||||
$this->_termDocs[$termKey][$this->_docCount] = array();
|
||||
} else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
|
||||
// Existing term, but new term entry
|
||||
$this->_termDocs[$termKey][$this->_docCount] = array();
|
||||
}
|
||||
$position += $token->getPositionIncrement();
|
||||
$this->_termDocs[$termKey][$this->_docCount][] = $position;
|
||||
}
|
||||
}
|
||||
|
||||
if ($field->isStored) {
|
||||
$storedFields[] = $field;
|
||||
}
|
||||
}
|
||||
|
||||
if (count($storedFields) != 0) {
|
||||
if (!isset($this->_fdxFile)) {
|
||||
$this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx');
|
||||
$this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt');
|
||||
|
||||
$this->_files[] = $this->_name . '.fdx';
|
||||
$this->_files[] = $this->_name . '.fdt';
|
||||
}
|
||||
|
||||
$this->_fdxFile->writeLong($this->_fdtFile->tell());
|
||||
$this->_fdtFile->writeVInt(count($storedFields));
|
||||
foreach ($storedFields as $field) {
|
||||
$this->_fdtFile->writeVInt($this->_fields[$field->name]->number);
|
||||
$fieldBits = ($field->isTokenized ? 0x01 : 0x00) |
|
||||
($field->isBinary ? 0x02 : 0x00) |
|
||||
0x00; /* 0x04 - third bit, compressed (ZLIB) */
|
||||
$this->_fdtFile->writeByte($fieldBits);
|
||||
if ($field->isBinary) {
|
||||
$this->_fdtFile->writeVInt(strlen($field->stringValue));
|
||||
$this->_fdtFile->writeBytes($field->stringValue);
|
||||
} else {
|
||||
$this->_fdtFile->writeString($field->stringValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$this->_docCount++;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Dump Field Info (.fnm) segment file
|
||||
*/
|
||||
private function _dumpFNM()
|
||||
{
|
||||
$fnmFile = $this->_directory->createFile($this->_name . '.fnm');
|
||||
$fnmFile->writeVInt(count($this->_fields));
|
||||
|
||||
foreach ($this->_fields as $field) {
|
||||
$fnmFile->writeString($field->name);
|
||||
$fnmFile->writeByte(($field->isIndexed ? 0x01 : 0x00) |
|
||||
($field->storeTermVector ? 0x02 : 0x00)
|
||||
// not supported yet 0x04 /* term positions are stored with the term vectors */ |
|
||||
// not supported yet 0x08 /* term offsets are stored with the term vectors */ |
|
||||
);
|
||||
|
||||
if ($field->isIndexed) {
|
||||
$fieldNum = $this->_fields[$field->name]->number;
|
||||
$fieldName = $field->name;
|
||||
$similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
|
||||
$norm = '';
|
||||
|
||||
for ($count = 0; $count < $this->_docCount; $count++) {
|
||||
$numTokens = isset($this->_fieldLengths[$fieldName][$count]) ?
|
||||
$this->_fieldLengths[$fieldName][$count] : 0;
|
||||
$norm .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, $numTokens)));
|
||||
}
|
||||
|
||||
$normFileName = $this->_name . '.f' . $fieldNum;
|
||||
$fFile = $this->_directory->createFile($normFileName);
|
||||
$fFile->writeBytes($norm);
|
||||
$this->_files[] = $normFileName;
|
||||
}
|
||||
}
|
||||
|
||||
$this->_files[] = $this->_name . '.fnm';
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Dump Term Dictionary segment file entry.
|
||||
* Used to write entry to .tis or .tii files
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_File $dicFile
|
||||
* @param Zend_Search_Lucene_Index_Term $prevTerm
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param Zend_Search_Lucene_Index_TermInfo $prevTermInfo
|
||||
* @param Zend_Search_Lucene_Index_TermInfo $termInfo
|
||||
*/
|
||||
private function _dumpTermDictEntry(Zend_Search_Lucene_Storage_File $dicFile,
|
||||
&$prevTerm, Zend_Search_Lucene_Index_Term $term,
|
||||
&$prevTermInfo, Zend_Search_Lucene_Index_TermInfo $termInfo)
|
||||
{
|
||||
if (isset($prevTerm) && $prevTerm->field == $term->field) {
|
||||
$prefixLength = 0;
|
||||
while ($prefixLength < strlen($prevTerm->text) &&
|
||||
$prefixLength < strlen($term->text) &&
|
||||
$prevTerm->text{$prefixLength} == $term->text{$prefixLength}
|
||||
) {
|
||||
$prefixLength++;
|
||||
}
|
||||
// Write preffix length
|
||||
$dicFile->writeVInt($prefixLength);
|
||||
// Write suffix
|
||||
$dicFile->writeString( substr($term->text, $prefixLength) );
|
||||
} else {
|
||||
// Write preffix length
|
||||
$dicFile->writeVInt(0);
|
||||
// Write suffix
|
||||
$dicFile->writeString($term->text);
|
||||
}
|
||||
// Write field number
|
||||
$dicFile->writeVInt($term->field);
|
||||
// DocFreq (the count of documents which contain the term)
|
||||
$dicFile->writeVInt($termInfo->docFreq);
|
||||
|
||||
$prevTerm = $term;
|
||||
|
||||
if (!isset($prevTermInfo)) {
|
||||
// Write FreqDelta
|
||||
$dicFile->writeVInt($termInfo->freqPointer);
|
||||
// Write ProxDelta
|
||||
$dicFile->writeVInt($termInfo->proxPointer);
|
||||
} else {
|
||||
// Write FreqDelta
|
||||
$dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer);
|
||||
// Write ProxDelta
|
||||
$dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer);
|
||||
}
|
||||
// Write SkipOffset - it's not 0 when $termInfo->docFreq > self::$skipInterval
|
||||
if ($termInfo->skipOffset != 0) {
|
||||
$dicFile->writeVInt($termInfo->skipOffset);
|
||||
}
|
||||
|
||||
$prevTermInfo = $termInfo;
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files
|
||||
*/
|
||||
private function _dumpDictionary()
|
||||
{
|
||||
$termKeys = array_keys($this->_termDictionary);
|
||||
sort($termKeys, SORT_STRING);
|
||||
|
||||
$tisFile = $this->_directory->createFile($this->_name . '.tis');
|
||||
$tisFile->writeInt((int)0xFFFFFFFE);
|
||||
$tisFile->writeLong(count($termKeys));
|
||||
$tisFile->writeInt(self::$indexInterval);
|
||||
$tisFile->writeInt(self::$skipInterval);
|
||||
|
||||
$tiiFile = $this->_directory->createFile($this->_name . '.tii');
|
||||
$tiiFile->writeInt((int)0xFFFFFFFE);
|
||||
$tiiFile->writeLong(ceil((count($termKeys) + 2)/self::$indexInterval));
|
||||
$tiiFile->writeInt(self::$indexInterval);
|
||||
$tiiFile->writeInt(self::$skipInterval);
|
||||
|
||||
/** Dump dictionary header */
|
||||
$tiiFile->writeVInt(0); // preffix length
|
||||
$tiiFile->writeString(''); // suffix
|
||||
$tiiFile->writeInt((int)0xFFFFFFFF); // field number
|
||||
$tiiFile->writeByte((int)0x0F);
|
||||
$tiiFile->writeVInt(0); // DocFreq
|
||||
$tiiFile->writeVInt(0); // FreqDelta
|
||||
$tiiFile->writeVInt(0); // ProxDelta
|
||||
$tiiFile->writeVInt(20); // IndexDelta
|
||||
|
||||
$frqFile = $this->_directory->createFile($this->_name . '.frq');
|
||||
$prxFile = $this->_directory->createFile($this->_name . '.prx');
|
||||
|
||||
$termCount = 1;
|
||||
|
||||
$prevTerm = null;
|
||||
$prevTermInfo = null;
|
||||
$prevIndexTerm = null;
|
||||
$prevIndexTermInfo = null;
|
||||
$prevIndexPosition = 20;
|
||||
|
||||
foreach ($termKeys as $termId) {
|
||||
$freqPointer = $frqFile->tell();
|
||||
$proxPointer = $prxFile->tell();
|
||||
|
||||
$prevDoc = 0;
|
||||
foreach ($this->_termDocs[$termId] as $docId => $termPositions) {
|
||||
$docDelta = ($docId - $prevDoc)*2;
|
||||
$prevDoc = $docId;
|
||||
if (count($termPositions) > 1) {
|
||||
$frqFile->writeVInt($docDelta);
|
||||
$frqFile->writeVInt(count($termPositions));
|
||||
} else {
|
||||
$frqFile->writeVInt($docDelta + 1);
|
||||
}
|
||||
|
||||
$prevPosition = 0;
|
||||
foreach ($termPositions as $position) {
|
||||
$prxFile->writeVInt($position - $prevPosition);
|
||||
$prevPosition = $position;
|
||||
}
|
||||
}
|
||||
|
||||
if (count($this->_termDocs[$termId]) >= self::$skipInterval) {
|
||||
/**
|
||||
* @todo Write Skip Data to a freq file.
|
||||
* It's not used now, but make index more optimal
|
||||
*/
|
||||
$skipOffset = $frqFile->tell() - $freqPointer;
|
||||
} else {
|
||||
$skipOffset = 0;
|
||||
}
|
||||
|
||||
$term = new Zend_Search_Lucene_Index_Term($this->_termDictionary[$termId]->text,
|
||||
$this->_fields[$this->_termDictionary[$termId]->field]->number);
|
||||
$termInfo = new Zend_Search_Lucene_Index_TermInfo(count($this->_termDocs[$termId]),
|
||||
$freqPointer, $proxPointer, $skipOffset);
|
||||
|
||||
$this->_dumpTermDictEntry($tisFile, $prevTerm, $term, $prevTermInfo, $termInfo);
|
||||
|
||||
if ($termCount % self::$indexInterval == 0) {
|
||||
$this->_dumpTermDictEntry($tiiFile, $prevIndexTerm, $term, $prevIndexTermInfo, $termInfo);
|
||||
|
||||
$indexPosition = $tisFile->tell();
|
||||
$tiiFile->writeVInt($indexPosition - $prevIndexPosition);
|
||||
$prevIndexPosition = $indexPosition;
|
||||
}
|
||||
$termCount++;
|
||||
}
|
||||
|
||||
$this->_files[] = $this->_name . '.tis';
|
||||
$this->_files[] = $this->_name . '.tii';
|
||||
$this->_files[] = $this->_name . '.frq';
|
||||
$this->_files[] = $this->_name . '.prx';
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generate compound index file
|
||||
*/
|
||||
private function _generateCFS()
|
||||
{
|
||||
$cfsFile = $this->_directory->createFile($this->_name . '.cfs');
|
||||
$cfsFile->writeVInt(count($this->_files));
|
||||
|
||||
$dataOffsetPointers = array();
|
||||
foreach ($this->_files as $fileName) {
|
||||
$dataOffsetPointers[$fileName] = $cfsFile->tell();
|
||||
$cfsFile->writeLong(0); // write dummy data
|
||||
$cfsFile->writeString($fileName);
|
||||
}
|
||||
|
||||
foreach ($this->_files as $fileName) {
|
||||
// Get actual data offset
|
||||
$dataOffset = $cfsFile->tell();
|
||||
// Seek to the data offset pointer
|
||||
$cfsFile->seek($dataOffsetPointers[$fileName]);
|
||||
// Write actual data offset value
|
||||
$cfsFile->writeLong($dataOffset);
|
||||
// Seek back to the end of file
|
||||
$cfsFile->seek($dataOffset);
|
||||
|
||||
$dataFile = $this->_directory->getFileObject($fileName);
|
||||
$data = $dataFile->readBytes($this->_directory->fileLength($fileName));
|
||||
$cfsFile->writeBytes($data);
|
||||
|
||||
$this->_directory->deleteFile($fileName);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Close segment, write it to disk and return segment info
|
||||
*
|
||||
* @return Zend_Search_Lucene_Index_SegmentInfo
|
||||
*/
|
||||
public function close()
|
||||
{
|
||||
if ($this->_docCount == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$this->_dumpFNM();
|
||||
$this->_dumpDictionary();
|
||||
|
||||
$this->_generateCFS();
|
||||
|
||||
return new Zend_Search_Lucene_Index_SegmentInfo($this->_name,
|
||||
$this->_docCount,
|
||||
$this->_directory);
|
||||
}
|
||||
|
||||
}
|
||||
|
72
search/Zend/Search/Lucene/Index/Term.php
Normal file
72
search/Zend/Search/Lucene/Index/Term.php
Normal file
@ -0,0 +1,72 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* A Term represents a word from text. This is the unit of search. It is
|
||||
* composed of two elements, the text of the word, as a string, and the name of
|
||||
* the field that the text occured in, an interned string.
|
||||
*
|
||||
* Note that terms may represent more than words from text fields, but also
|
||||
* things like dates, email addresses, urls, etc.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_Term
|
||||
{
|
||||
/**
|
||||
* Field name or field number (depending from context)
|
||||
*
|
||||
* @var mixed
|
||||
*/
|
||||
public $field;
|
||||
|
||||
/**
|
||||
* Term value
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $text;
|
||||
|
||||
|
||||
/**
|
||||
* @todo docblock
|
||||
*/
|
||||
public function __construct( $text, $field = 'contents' )
|
||||
{
|
||||
$this->field = $field;
|
||||
$this->text = $text;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @todo docblock
|
||||
*/
|
||||
public function key()
|
||||
{
|
||||
return $this->field . chr(0) . $this->text;
|
||||
}
|
||||
}
|
||||
|
79
search/Zend/Search/Lucene/Index/TermInfo.php
Normal file
79
search/Zend/Search/Lucene/Index/TermInfo.php
Normal file
@ -0,0 +1,79 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* A Zend_Search_Lucene_Index_TermInfo represents a record of information stored for a term.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_TermInfo
|
||||
{
|
||||
/**
|
||||
* The number of documents which contain the term.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $docFreq;
|
||||
|
||||
/**
|
||||
* Data offset in a Frequencies file.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $freqPointer;
|
||||
|
||||
/**
|
||||
* Data offset in a Positions file.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $proxPointer;
|
||||
|
||||
/**
|
||||
* ScipData offset in a Frequencies file.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $skipOffset;
|
||||
|
||||
/**
|
||||
* Term offset of the _next_ term in a TermDictionary file.
|
||||
* Used only for Term Index
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $indexPointer;
|
||||
|
||||
public function __construct($docFreq, $freqPointer, $proxPointer, $skipOffset, $indexPointer = null)
|
||||
{
|
||||
$this->docFreq = $docFreq;
|
||||
$this->freqPointer = $freqPointer;
|
||||
$this->proxPointer = $proxPointer;
|
||||
$this->skipOffset = $skipOffset;
|
||||
$this->indexPointer = $indexPointer;
|
||||
}
|
||||
}
|
||||
|
331
search/Zend/Search/Lucene/Index/Writer.php
Normal file
331
search/Zend/Search/Lucene/Index/Writer.php
Normal file
@ -0,0 +1,331 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentWriter */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_SegmentInfo */
|
||||
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Index
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Index_Writer
|
||||
{
|
||||
/**
|
||||
* @todo Implement segment merger
|
||||
* @todo Implement mergeFactor, minMergeDocs, maxMergeDocs usage.
|
||||
* @todo Implement Analyzer substitution
|
||||
* @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for
|
||||
* temporary index files
|
||||
* @todo Directory lock processing
|
||||
*/
|
||||
|
||||
/**
|
||||
* File system adapter.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Storage_Directory
|
||||
*/
|
||||
private $_directory = null;
|
||||
|
||||
|
||||
/**
|
||||
* Index version
|
||||
* Counts how often the index has been changed by adding or deleting docs
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_version;
|
||||
|
||||
/**
|
||||
* Segment name counter.
|
||||
* Used to name new segments .
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
private $_segmentNameCounter;
|
||||
|
||||
/**
|
||||
* Number of the segments in the index
|
||||
*
|
||||
* @var inteher
|
||||
*/
|
||||
private $_segments;
|
||||
|
||||
/**
|
||||
* Determines how often segment indices
|
||||
* are merged by addDocument().
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $mergeFactor;
|
||||
|
||||
/**
|
||||
* Determines the minimal number of documents required before
|
||||
* the buffered in-memory documents are merging and a new Segment
|
||||
* is created.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $minMergeDocs;
|
||||
|
||||
/**
|
||||
* Determines the largest number of documents ever merged by addDocument().
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $maxMergeDocs;
|
||||
|
||||
/**
|
||||
* List of the segments, created by index writer
|
||||
* Array of Zend_Search_Lucene_Index_SegmentInfo objects
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_newSegments;
|
||||
|
||||
/**
|
||||
* Current segment to add documents
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_SegmentWriter
|
||||
*/
|
||||
private $_currentSegment;
|
||||
|
||||
/**
|
||||
* List of indexfiles extensions
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private static $_indexExtensions = array('.cfs' => '.cfs',
|
||||
'.fnm' => '.fnm',
|
||||
'.fdx' => '.fdx',
|
||||
'.fdt' => '.fdt',
|
||||
'.tis' => '.tis',
|
||||
'.tii' => '.tii',
|
||||
'.frq' => '.frq',
|
||||
'.prx' => '.prx',
|
||||
'.tvx' => '.tvx',
|
||||
'.tvd' => '.tvd',
|
||||
'.tvf' => '.tvf',
|
||||
'.del' => '.del' );
|
||||
|
||||
/**
|
||||
* Opens the index for writing
|
||||
*
|
||||
* IndexWriter constructor needs Directory as a parameter. It should be
|
||||
* a string with a path to the index folder or a Directory object.
|
||||
* Second constructor parameter create is optional - true to create the
|
||||
* index or overwrite the existing one.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Storage_Directory $directory
|
||||
* @param boolean $create
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $create = false)
|
||||
{
|
||||
$this->_directory = $directory;
|
||||
|
||||
if ($create) {
|
||||
foreach ($this->_directory->fileList() as $file) {
|
||||
if ($file == 'deletable' ||
|
||||
$file == 'segments' ||
|
||||
isset(self::$_indexExtensions[ substr($file, strlen($file)-4)]) ||
|
||||
preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) {
|
||||
$this->_directory->deleteFile($file);
|
||||
}
|
||||
}
|
||||
$segmentsFile = $this->_directory->createFile('segments');
|
||||
$segmentsFile->writeInt((int)0xFFFFFFFF);
|
||||
// write version
|
||||
$segmentsFile->writeLong(0);
|
||||
// write name counter
|
||||
$segmentsFile->writeInt(0);
|
||||
// write segment counter
|
||||
$segmentsFile->writeInt(0);
|
||||
|
||||
$deletableFile = $this->_directory->createFile('deletable');
|
||||
// write counter
|
||||
$deletableFile->writeInt(0);
|
||||
|
||||
$this->_version = 0;
|
||||
$this->_segmentNameCounter = 0;
|
||||
$this->_segments = 0;
|
||||
} else {
|
||||
$segmentsFile = $this->_directory->getFileObject('segments');
|
||||
$format = $segmentsFile->readInt();
|
||||
if ($format != (int)0xFFFFFFFF) {
|
||||
throw new Zend_Search_Lucene_Exception('Wrong segments file format');
|
||||
}
|
||||
|
||||
// read version
|
||||
$this->_version = $segmentsFile->readLong();
|
||||
// read counter
|
||||
$this->_segmentNameCounter = $segmentsFile->readInt();
|
||||
// read segment counter
|
||||
$this->_segments = $segmentsFile->readInt();
|
||||
}
|
||||
|
||||
$this->_newSegments = array();
|
||||
$this->_currentSegment = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a document to this index.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Document $document
|
||||
*/
|
||||
public function addDocument(Zend_Search_Lucene_Document $document)
|
||||
{
|
||||
if ($this->_currentSegment === null) {
|
||||
$this->_currentSegment =
|
||||
new Zend_Search_Lucene_Index_SegmentWriter($this->_directory, $this->_newSegmentName());
|
||||
}
|
||||
$this->_currentSegment->addDocument($document);
|
||||
$this->_version++;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Update segments file by adding current segment to a list
|
||||
* @todo !!!!!Finish the implementation
|
||||
*
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private function _updateSegments()
|
||||
{
|
||||
$segmentsFile = $this->_directory->getFileObject('segments');
|
||||
$newSegmentFile = $this->_directory->createFile('segments.new');
|
||||
|
||||
$newSegmentFile->writeInt((int)0xFFFFFFFF);
|
||||
$newSegmentFile->writeLong($this->_version);
|
||||
$newSegmentFile->writeInt($this->_segmentNameCounter);
|
||||
|
||||
$this->_segments += count($this->_newSegments);
|
||||
$newSegmentFile->writeInt($this->_segments);
|
||||
|
||||
$segmentsFile->seek(20);
|
||||
$newSegmentFile->writeBytes($segmentsFile->readBytes($this->_directory->fileLength('segments') - 20));
|
||||
|
||||
foreach ($this->_newSegments as $segmentName => $segmentInfo) {
|
||||
$newSegmentFile->writeString($segmentName);
|
||||
$newSegmentFile->writeInt($segmentInfo->count());
|
||||
}
|
||||
|
||||
$this->_directory->renameFile('segments.new', 'segments');
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Commit current changes
|
||||
* returns array of new segments
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function commit()
|
||||
{
|
||||
if ($this->_currentSegment !== null) {
|
||||
$newSegment = $this->_currentSegment->close();
|
||||
if ($newSegment !== null) {
|
||||
$this->_newSegments[$newSegment->getName()] = $newSegment;
|
||||
}
|
||||
$this->_currentSegment = null;
|
||||
}
|
||||
|
||||
if (count($this->_newSegments) != 0) {
|
||||
$this->_updateSegments();
|
||||
}
|
||||
|
||||
$result = $this->_newSegments;
|
||||
$this->_newSegments = array();
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Merges the provided indexes into this index.
|
||||
*
|
||||
* @param array $readers
|
||||
* @return void
|
||||
*/
|
||||
public function addIndexes($readers)
|
||||
{
|
||||
/**
|
||||
* @todo implementation
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the number of documents currently in this index.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function docCount($readers)
|
||||
{
|
||||
/**
|
||||
* @todo implementation
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Flushes all changes to an index and closes all associated files.
|
||||
*
|
||||
*/
|
||||
public function close()
|
||||
{
|
||||
/**
|
||||
* @todo implementation
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Merges all segments together into a single segment, optimizing
|
||||
* an index for search.
|
||||
*
|
||||
* return void
|
||||
*/
|
||||
public function optimize()
|
||||
{
|
||||
/**
|
||||
* @todo implementation
|
||||
*/
|
||||
}
|
||||
|
||||
/**
|
||||
* Get name for new segment
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
private function _newSegmentName()
|
||||
{
|
||||
return '_' . base_convert($this->_segmentNameCounter++, 10, 36);
|
||||
}
|
||||
|
||||
}
|
100
search/Zend/Search/Lucene/Search/Query.php
Normal file
100
search/Zend/Search/Lucene/Search/Query.php
Normal file
@ -0,0 +1,100 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
|
||||
/**
|
||||
* query boost factor
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_boost = 1.0;
|
||||
|
||||
/**
|
||||
* Query weight
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
protected $_weight;
|
||||
|
||||
|
||||
/**
|
||||
* Gets the boost for this clause. Documents matching
|
||||
* this clause will (in addition to the normal weightings) have their score
|
||||
* multiplied by boost. The boost is 1.0 by default.
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function getBoost()
|
||||
{
|
||||
return $this->_boost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the boost for this query clause to $boost.
|
||||
*
|
||||
* @param float $boost
|
||||
*/
|
||||
public function setBoost($boost)
|
||||
{
|
||||
$this->_boost = $boost;
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene $reader
|
||||
* @return float
|
||||
*/
|
||||
abstract public function score($docId, $reader);
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
abstract protected function _createWeight($reader);
|
||||
|
||||
/**
|
||||
* Constructs an initializes a Weight for a query.
|
||||
*
|
||||
* @param Zend_Search_Lucene $reader
|
||||
*/
|
||||
protected function _initWeight($reader)
|
||||
{
|
||||
$this->_weight = $this->_createWeight($reader);
|
||||
$sum = $this->_weight->sumOfSquaredWeights();
|
||||
$queryNorm = $reader->getSimilarity()->queryNorm($sum);
|
||||
$this->_weight->normalize($queryNorm);
|
||||
}
|
||||
|
||||
}
|
439
search/Zend/Search/Lucene/Search/Query/MultiTerm.php
Normal file
439
search/Zend/Search/Lucene/Search/Query/MultiTerm.php
Normal file
@ -0,0 +1,439 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query */
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Weight_MultiTerm */
|
||||
require_once 'Zend/Search/Lucene/Search/Weight/MultiTerm.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
|
||||
/**
|
||||
* Terms to find.
|
||||
* Array of Zend_Search_Lucene_Index_Term
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_terms = array();
|
||||
|
||||
/**
|
||||
* Term signs.
|
||||
* If true then term is required.
|
||||
* If false then term is prohibited.
|
||||
* If null then term is neither prohibited, nor required
|
||||
*
|
||||
* If array is null then all terms are required
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
|
||||
private $_signs = array();
|
||||
|
||||
/**
|
||||
* Result vector.
|
||||
* Bitset or array of document IDs
|
||||
* (depending from Bitset extension availability).
|
||||
*
|
||||
* @var mixed
|
||||
*/
|
||||
private $_resVector = null;
|
||||
|
||||
/**
|
||||
* Terms positions vectors.
|
||||
* Array of Arrays:
|
||||
* term1Id => (docId => array( pos1, pos2, ... ), ...)
|
||||
* term2Id => (docId => array( pos1, pos2, ... ), ...)
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_termsPositions = array();
|
||||
|
||||
|
||||
/**
|
||||
* A score factor based on the fraction of all query terms
|
||||
* that a document contains.
|
||||
* float for conjunction queries
|
||||
* array of float for non conjunction queries
|
||||
*
|
||||
* @var mixed
|
||||
*/
|
||||
private $_coord = null;
|
||||
|
||||
|
||||
/**
|
||||
* Terms weights
|
||||
* array of Zend_Search_Lucene_Search_Weight
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_weights = array();
|
||||
|
||||
|
||||
/**
|
||||
* Class constructor. Create a new multi-term query object.
|
||||
*
|
||||
* @param array $terms Array of Zend_Search_Lucene_Index_Term objects
|
||||
* @param array $signs Array of signs. Sign is boolean|null.
|
||||
* @return void
|
||||
*/
|
||||
public function __construct($terms = null, $signs = null)
|
||||
{
|
||||
/**
|
||||
* @todo Check contents of $terms and $signs before adding them.
|
||||
*/
|
||||
if (is_array($terms)) {
|
||||
$this->_terms = $terms;
|
||||
|
||||
$this->_signs = null;
|
||||
// Check if all terms are required
|
||||
if (is_array($signs)) {
|
||||
foreach ($signs as $sign ) {
|
||||
if ($sign !== true) {
|
||||
$this->_signs = $signs;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Add a $term (Zend_Search_Lucene_Index_Term) to this query.
|
||||
*
|
||||
* The sign is specified as:
|
||||
* TRUE - term is required
|
||||
* FALSE - term is prohibited
|
||||
* NULL - term is neither prohibited, nor required
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param boolean|null $sign
|
||||
* @return void
|
||||
*/
|
||||
public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign=null) {
|
||||
$this->_terms[] = $term;
|
||||
|
||||
/**
|
||||
* @todo This is not good. Sometimes $this->_signs is an array, sometimes
|
||||
* it is null, even when there are terms. It will be changed so that
|
||||
* it is always an array.
|
||||
*/
|
||||
if ($this->_signs === null) {
|
||||
if ($sign !== null) {
|
||||
$this->_signs = array();
|
||||
foreach ($this->_terms as $term) {
|
||||
$this->_signs[] = null;
|
||||
}
|
||||
$this->_signs[] = $sign;
|
||||
}
|
||||
} else {
|
||||
$this->_signs[] = $sign;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns query term
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getTerms()
|
||||
{
|
||||
return $this->_terms;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return terms signs
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getSigns()
|
||||
{
|
||||
return $this->_signs;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set weight for specified term
|
||||
*
|
||||
* @param integer $num
|
||||
* @param Zend_Search_Lucene_Search_Weight_Term $weight
|
||||
*/
|
||||
public function setWeight($num, $weight)
|
||||
{
|
||||
$this->_weights[$num] = $weight;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
protected function _createWeight($reader)
|
||||
{
|
||||
return new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate result vector for Conjunction query
|
||||
* (like '+something +another')
|
||||
*
|
||||
* @param Zend_Search_Lucene $reader
|
||||
*/
|
||||
private function _calculateConjunctionResult($reader)
|
||||
{
|
||||
if (extension_loaded('bitset')) {
|
||||
foreach( $this->_terms as $termId=>$term ) {
|
||||
if($this->_resVector === null) {
|
||||
$this->_resVector = bitset_from_array($reader->termDocs($term));
|
||||
} else {
|
||||
$this->_resVector = bitset_intersection(
|
||||
$this->_resVector,
|
||||
bitset_from_array($reader->termDocs($term)) );
|
||||
}
|
||||
|
||||
$this->_termsPositions[$termId] = $reader->termPositions($term);
|
||||
}
|
||||
} else {
|
||||
foreach( $this->_terms as $termId=>$term ) {
|
||||
if($this->_resVector === null) {
|
||||
$this->_resVector = array_flip($reader->termDocs($term));
|
||||
} else {
|
||||
$termDocs = array_flip($reader->termDocs($term));
|
||||
foreach($this->_resVector as $key=>$value) {
|
||||
if (!isset( $termDocs[$key] )) {
|
||||
unset( $this->_resVector[$key] );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$this->_termsPositions[$termId] = $reader->termPositions($term);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate result vector for non Conjunction query
|
||||
* (like '+something -another')
|
||||
*
|
||||
* @param Zend_Search_Lucene $reader
|
||||
*/
|
||||
private function _calculateNonConjunctionResult($reader)
|
||||
{
|
||||
if (extension_loaded('bitset')) {
|
||||
$required = null;
|
||||
$neither = bitset_empty();
|
||||
$prohibited = bitset_empty();
|
||||
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
$termDocs = bitset_from_array($reader->termDocs($term));
|
||||
|
||||
if ($this->_signs[$termId] === true) {
|
||||
// required
|
||||
if ($required !== null) {
|
||||
$required = bitset_intersection($required, $termDocs);
|
||||
} else {
|
||||
$required = $termDocs;
|
||||
}
|
||||
} elseif ($this->_signs[$termId] === false) {
|
||||
// prohibited
|
||||
$prohibited = bitset_union($prohibited, $termDocs);
|
||||
} else {
|
||||
// neither required, nor prohibited
|
||||
$neither = bitset_union($neither, $termDocs);
|
||||
}
|
||||
|
||||
$this->_termsPositions[$termId] = $reader->termPositions($term);
|
||||
}
|
||||
|
||||
if ($required === null) {
|
||||
$required = $neither;
|
||||
}
|
||||
$this->_resVector = bitset_intersection( $required,
|
||||
bitset_invert($prohibited, $reader->count()) );
|
||||
} else {
|
||||
$required = null;
|
||||
$neither = array();
|
||||
$prohibited = array();
|
||||
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
$termDocs = array_flip($reader->termDocs($term));
|
||||
|
||||
if ($this->_signs[$termId] === true) {
|
||||
// required
|
||||
if ($required !== null) {
|
||||
// substitute for bitset_intersection
|
||||
foreach ($required as $key => $value) {
|
||||
if (!isset( $termDocs[$key] )) {
|
||||
unset($required[$key]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
$required = $termDocs;
|
||||
}
|
||||
} elseif ($this->_signs[$termId] === false) {
|
||||
// prohibited
|
||||
// substitute for bitset_union
|
||||
foreach ($termDocs as $key => $value) {
|
||||
$prohibited[$key] = $value;
|
||||
}
|
||||
} else {
|
||||
// neither required, nor prohibited
|
||||
// substitute for bitset_union
|
||||
foreach ($termDocs as $key => $value) {
|
||||
$neither[$key] = $value;
|
||||
}
|
||||
}
|
||||
|
||||
$this->_termsPositions[$termId] = $reader->termPositions($term);
|
||||
}
|
||||
|
||||
if ($required === null) {
|
||||
$required = $neither;
|
||||
}
|
||||
|
||||
foreach ($required as $key=>$value) {
|
||||
if (isset( $prohibited[$key] )) {
|
||||
unset($required[$key]);
|
||||
}
|
||||
}
|
||||
$this->_resVector = $required;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Score calculator for conjunction queries (all terms are required)
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene $reader
|
||||
* @return float
|
||||
*/
|
||||
public function _conjunctionScore($docId, $reader)
|
||||
{
|
||||
if ($this->_coord === null) {
|
||||
$this->_coord = $reader->getSimilarity()->coord(count($this->_terms),
|
||||
count($this->_terms) );
|
||||
}
|
||||
|
||||
$score = 0.0;
|
||||
|
||||
foreach ($this->_terms as $termId=>$term) {
|
||||
$score += $reader->getSimilarity()->tf(count($this->_termsPositions[$termId][$docId]) ) *
|
||||
$this->_weights[$termId]->getValue() *
|
||||
$reader->norm($docId, $term->field);
|
||||
}
|
||||
|
||||
return $score * $this->_coord;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Score calculator for non conjunction queries (not all terms are required)
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene $reader
|
||||
* @return float
|
||||
*/
|
||||
public function _nonConjunctionScore($docId, $reader)
|
||||
{
|
||||
if ($this->_coord === null) {
|
||||
$this->_coord = array();
|
||||
|
||||
$maxCoord = 0;
|
||||
foreach ($this->_signs as $sign) {
|
||||
if ($sign !== false /* not prohibited */) {
|
||||
$maxCoord++;
|
||||
}
|
||||
}
|
||||
|
||||
for ($count = 0; $count <= $maxCoord; $count++) {
|
||||
$this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
|
||||
}
|
||||
}
|
||||
|
||||
$score = 0.0;
|
||||
$matchedTerms = 0;
|
||||
foreach ($this->_terms as $termId=>$term) {
|
||||
// Check if term is
|
||||
if ($this->_signs[$termId] !== false && // not prohibited
|
||||
isset($this->_termsPositions[$termId][$docId]) // matched
|
||||
) {
|
||||
$matchedTerms++;
|
||||
$score +=
|
||||
$reader->getSimilarity()->tf(count($this->_termsPositions[$termId][$docId]) ) *
|
||||
$this->_weights[$termId]->getValue() *
|
||||
$reader->norm($docId, $term->field);
|
||||
}
|
||||
}
|
||||
|
||||
return $score * $this->_coord[$matchedTerms];
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene $reader
|
||||
* @return float
|
||||
*/
|
||||
public function score($docId, $reader)
|
||||
{
|
||||
if($this->_resVector === null) {
|
||||
if ($this->_signs === null) {
|
||||
$this->_calculateConjunctionResult($reader);
|
||||
} else {
|
||||
$this->_calculateNonConjunctionResult($reader);
|
||||
}
|
||||
|
||||
$this->_initWeight($reader);
|
||||
}
|
||||
|
||||
if ( (extension_loaded('bitset')) ?
|
||||
bitset_in($this->_resVector, $docId) :
|
||||
isset($this->_resVector[$docId]) ) {
|
||||
if ($this->_signs === null) {
|
||||
return $this->_conjunctionScore($docId, $reader);
|
||||
} else {
|
||||
return $this->_nonConjunctionScore($docId, $reader);
|
||||
}
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
426
search/Zend/Search/Lucene/Search/Query/Phrase.php
Normal file
426
search/Zend/Search/Lucene/Search/Query/Phrase.php
Normal file
@ -0,0 +1,426 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Weight_MultiTerm
|
||||
*/
|
||||
require_once 'Zend/Search/Lucene/Search/Weight/Phrase.php';
|
||||
|
||||
|
||||
/**
|
||||
* A Query that matches documents containing a particular sequence of terms.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
/**
|
||||
* Terms to find.
|
||||
* Array of Zend_Search_Lucene_Index_Term objects.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_terms;
|
||||
|
||||
/**
|
||||
* Term positions (relative positions of terms within the phrase).
|
||||
* Array of integers
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_offsets;
|
||||
|
||||
/**
|
||||
* Sets the number of other words permitted between words in query phrase.
|
||||
* If zero, then this is an exact phrase search. For larger values this works
|
||||
* like a WITHIN or NEAR operator.
|
||||
*
|
||||
* The slop is in fact an edit-distance, where the units correspond to
|
||||
* moves of terms in the query phrase out of position. For example, to switch
|
||||
* the order of two words requires two moves (the first move places the words
|
||||
* atop one another), so to permit re-orderings of phrases, the slop must be
|
||||
* at least two.
|
||||
* More exact matches are scored higher than sloppier matches, thus search
|
||||
* results are sorted by exactness.
|
||||
*
|
||||
* The slop is zero by default, requiring exact matches.
|
||||
*
|
||||
* @var unknown_type
|
||||
*/
|
||||
private $_slop;
|
||||
|
||||
/**
|
||||
* Result vector.
|
||||
* Bitset or array of document IDs
|
||||
* (depending from Bitset extension availability).
|
||||
*
|
||||
* @var mixed
|
||||
*/
|
||||
private $_resVector = null;
|
||||
|
||||
/**
|
||||
* Terms positions vectors.
|
||||
* Array of Arrays:
|
||||
* term1Id => (docId => array( pos1, pos2, ... ), ...)
|
||||
* term2Id => (docId => array( pos1, pos2, ... ), ...)
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_termsPositions = array();
|
||||
|
||||
/**
|
||||
* Class constructor. Create a new prase query.
|
||||
*
|
||||
* @param string $field Field to search.
|
||||
* @param array $terms Terms to search Array of strings.
|
||||
* @param array $offsets Relative term positions. Array of integers.
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct($terms = null, $offsets = null, $field = null)
|
||||
{
|
||||
$this->_slop = 0;
|
||||
|
||||
if (is_array($terms)) {
|
||||
$this->_terms = array();
|
||||
foreach ($terms as $termId => $termText) {
|
||||
$this->_terms[$termId] = ($field !== null)? new Zend_Search_Lucene_Index_Term($termText, $field):
|
||||
new Zend_Search_Lucene_Index_Term($termText);
|
||||
}
|
||||
} else if ($terms === null) {
|
||||
$this->_terms = array();
|
||||
} else {
|
||||
throw new Zend_Search_Lucene_Exception('terms argument must be array of strings or null');
|
||||
}
|
||||
|
||||
if (is_array($offsets)) {
|
||||
if (count($this->_terms) != count($offsets)) {
|
||||
throw new Zend_Search_Lucene_Exception('terms and offsets arguments must have the same size.');
|
||||
}
|
||||
$this->_offsets = $offsets;
|
||||
} else if ($offsets === null) {
|
||||
$this->_offsets = array();
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
$position = count($this->_offsets);
|
||||
$this->_offsets[$termId] = $position;
|
||||
}
|
||||
} else {
|
||||
throw new Zend_Search_Lucene_Exception('offsets argument must be array of strings or null');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Set slop
|
||||
*
|
||||
* @param integer $slop
|
||||
*/
|
||||
public function setSlop($slop)
|
||||
{
|
||||
$this->_slop = $slop;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get slop
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function getSlop()
|
||||
{
|
||||
return $this->_slop;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adds a term to the end of the query phrase.
|
||||
* The relative position of the term is specified explicitly or the one immediately
|
||||
* after the last term added.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param integer $position
|
||||
*/
|
||||
public function addTerm(Zend_Search_Lucene_Index_Term $term, $position = null) {
|
||||
if ((count($this->_terms) != 0)&&(end($this->_terms)->field != $term->field)) {
|
||||
throw new Zend_Search_Lucene_Exception('All phrase terms must be in the same field: ' .
|
||||
$term->field . ':' . $term->text);
|
||||
}
|
||||
|
||||
$this->_terms[] = $term;
|
||||
if ($position !== null) {
|
||||
$this->_offsets[] = $position;
|
||||
} else if (count($this->_offsets) != 0) {
|
||||
$this->_offsets[] = end($this->_offsets) + 1;
|
||||
} else {
|
||||
$this->_offsets[] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns query term
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function getTerms()
|
||||
{
|
||||
return $this->_terms;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Set weight for specified term
|
||||
*
|
||||
* @param integer $num
|
||||
* @param Zend_Search_Lucene_Search_Weight_Term $weight
|
||||
*/
|
||||
public function setWeight($num, $weight)
|
||||
{
|
||||
$this->_weights[$num] = $weight;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
protected function _createWeight($reader)
|
||||
{
|
||||
return new Zend_Search_Lucene_Search_Weight_Phrase($this, $reader);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Calculate result vector
|
||||
*
|
||||
* @param Zend_Search_Lucene $reader
|
||||
*/
|
||||
private function _calculateResult($reader)
|
||||
{
|
||||
if (extension_loaded('bitset')) {
|
||||
foreach( $this->_terms as $termId=>$term ) {
|
||||
if($this->_resVector === null) {
|
||||
$this->_resVector = bitset_from_array($reader->termDocs($term));
|
||||
} else {
|
||||
$this->_resVector = bitset_intersection(
|
||||
$this->_resVector,
|
||||
bitset_from_array($reader->termDocs($term)) );
|
||||
}
|
||||
|
||||
$this->_termsPositions[$termId] = $reader->termPositions($term);
|
||||
}
|
||||
} else {
|
||||
foreach( $this->_terms as $termId=>$term ) {
|
||||
if($this->_resVector === null) {
|
||||
$this->_resVector = array_flip($reader->termDocs($term));
|
||||
} else {
|
||||
$termDocs = array_flip($reader->termDocs($term));
|
||||
foreach($this->_resVector as $key=>$value) {
|
||||
if (!isset( $termDocs[$key] )) {
|
||||
unset( $this->_resVector[$key] );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$this->_termsPositions[$termId] = $reader->termPositions($term);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Score calculator for exact phrase queries (terms sequence is fixed)
|
||||
*
|
||||
* @param integer $docId
|
||||
* @return float
|
||||
*/
|
||||
public function _exactPhraseFreq($docId)
|
||||
{
|
||||
$freq = 0;
|
||||
|
||||
// Term Id with lowest cardinality
|
||||
$lowCardTermId = null;
|
||||
|
||||
// Calculate $lowCardTermId
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
if ($lowCardTermId === null ||
|
||||
count($this->_termsPositions[$termId][$docId]) <
|
||||
count($this->_termsPositions[$lowCardTermId][$docId]) ) {
|
||||
$lowCardTermId = $termId;
|
||||
}
|
||||
}
|
||||
|
||||
// Walk through positions of the term with lowest cardinality
|
||||
foreach ($this->_termsPositions[$lowCardTermId][$docId] as $lowCardPos) {
|
||||
// We expect phrase to be found
|
||||
$freq++;
|
||||
|
||||
// Walk through other terms
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
if ($termId != $lowCardTermId) {
|
||||
$expectedPosition = $lowCardPos +
|
||||
($this->_offsets[$termId] -
|
||||
$this->_offsets[$lowCardTermId]);
|
||||
|
||||
if (!in_array($expectedPosition, $this->_termsPositions[$termId][$docId])) {
|
||||
$freq--; // Phrase wasn't found.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $freq;
|
||||
}
|
||||
|
||||
/**
|
||||
* Score calculator for sloppy phrase queries (terms sequence is fixed)
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene $reader
|
||||
* @return float
|
||||
*/
|
||||
public function _sloppyPhraseFreq($docId, Zend_Search_Lucene $reader)
|
||||
{
|
||||
$freq = 0;
|
||||
|
||||
$phraseQueue = array();
|
||||
$phraseQueue[0] = array(); // empty phrase
|
||||
$lastTerm = null;
|
||||
|
||||
// Walk through the terms to create phrases.
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
$queueSize = count($phraseQueue);
|
||||
$firstPass = true;
|
||||
|
||||
// Walk through the term positions.
|
||||
// Each term position produces a set of phrases.
|
||||
foreach ($this->_termsPositions[$termId][$docId] as $termPosition ) {
|
||||
if ($firstPass) {
|
||||
for ($count = 0; $count < $queueSize; $count++) {
|
||||
$phraseQueue[$count][$termId] = $termPosition;
|
||||
}
|
||||
} else {
|
||||
for ($count = 0; $count < $queueSize; $count++) {
|
||||
if ($lastTerm !== null &&
|
||||
abs( $termPosition - $phraseQueue[$count][$lastTerm] -
|
||||
($this->_offsets[$termId] - $this->_offsets[$lastTerm])) > $this->_slop) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$newPhraseId = count($phraseQueue);
|
||||
$phraseQueue[$newPhraseId] = $phraseQueue[$count];
|
||||
$phraseQueue[$newPhraseId][$termId] = $termPosition;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
$firstPass = false;
|
||||
}
|
||||
$lastTerm = $termId;
|
||||
}
|
||||
|
||||
|
||||
foreach ($phraseQueue as $phrasePos) {
|
||||
$minDistance = null;
|
||||
|
||||
for ($shift = -$this->_slop; $shift <= $this->_slop; $shift++) {
|
||||
$distance = 0;
|
||||
$start = reset($phrasePos) - reset($this->_offsets) + $shift;
|
||||
|
||||
foreach ($this->_terms as $termId => $term) {
|
||||
$distance += abs($phrasePos[$termId] - $this->_offsets[$termId] - $start);
|
||||
|
||||
if($distance > $this->_slop) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($minDistance === null || $distance < $minDistance) {
|
||||
$minDistance = $distance;
|
||||
}
|
||||
}
|
||||
|
||||
if ($minDistance <= $this->_slop) {
|
||||
$freq += $reader->getSimilarity()->sloppyFreq($minDistance);
|
||||
}
|
||||
}
|
||||
|
||||
return $freq;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene $reader
|
||||
* @return float
|
||||
*/
|
||||
public function score($docId, $reader)
|
||||
{
|
||||
// optimize zero-term case
|
||||
if (count($this->_terms) == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
if($this->_resVector === null) {
|
||||
$this->_calculateResult($reader);
|
||||
$this->_initWeight($reader);
|
||||
}
|
||||
|
||||
if ( (extension_loaded('bitset')) ?
|
||||
bitset_in($this->_resVector, $docId) :
|
||||
isset($this->_resVector[$docId]) ) {
|
||||
if ($this->_slop == 0) {
|
||||
$freq = $this->_exactPhraseFreq($docId);
|
||||
} else {
|
||||
$freq = $this->_sloppyPhraseFreq($docId, $reader);
|
||||
}
|
||||
|
||||
/*
|
||||
return $reader->getSimilarity()->tf($freq) *
|
||||
$this->_weight->getValue() *
|
||||
$reader->norm($docId, reset($this->_terms)->field);
|
||||
*/
|
||||
if ($freq != 0) {
|
||||
$tf = $reader->getSimilarity()->tf($freq);
|
||||
$weight = $this->_weight->getValue();
|
||||
$norm = $reader->norm($docId, reset($this->_terms)->field);
|
||||
|
||||
return $tf*$weight*$norm;
|
||||
}
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
128
search/Zend/Search/Lucene/Search/Query/Term.php
Normal file
128
search/Zend/Search/Lucene/Search/Query/Term.php
Normal file
@ -0,0 +1,128 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query */
|
||||
require_once 'Zend/Search/Lucene/Search/Query.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Weight_Term */
|
||||
require_once 'Zend/Search/Lucene/Search/Weight/Term.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Query
|
||||
{
|
||||
/**
|
||||
* Term to find.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
private $_term;
|
||||
|
||||
/**
|
||||
* Term sign.
|
||||
* If true then term is required
|
||||
* If false then term is prohibited.
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private $_sign;
|
||||
|
||||
/**
|
||||
* Documents vector.
|
||||
* Bitset or array of document IDs
|
||||
* (depending from Bitset extension availability).
|
||||
*
|
||||
* @var mixed
|
||||
*/
|
||||
private $_docVector = null;
|
||||
|
||||
/**
|
||||
* Term positions vector.
|
||||
* Array: docId => array( pos1, pos2, ... )
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_termPositions;
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Query_Term constructor
|
||||
*
|
||||
* @param Zend_Search_Lucene_Index_Term $term
|
||||
* @param boolean $sign
|
||||
*/
|
||||
public function __construct( $term, $sign = true )
|
||||
{
|
||||
$this->_term = $term;
|
||||
$this->_sign = $sign;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructs an appropriate Weight implementation for this query.
|
||||
*
|
||||
* @param Zend_Search_Lucene $reader
|
||||
* @return Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
protected function _createWeight($reader)
|
||||
{
|
||||
return new Zend_Search_Lucene_Search_Weight_Term($this->_term, $this, $reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Score specified document
|
||||
*
|
||||
* @param integer $docId
|
||||
* @param Zend_Search_Lucene $reader
|
||||
* @return float
|
||||
*/
|
||||
public function score( $docId, $reader )
|
||||
{
|
||||
if($this->_docVector===null) {
|
||||
if (extension_loaded('bitset')) {
|
||||
$this->_docVector = bitset_from_array( $reader->termDocs($this->_term) );
|
||||
} else {
|
||||
$this->_docVector = array_flip($reader->termDocs($this->_term));
|
||||
}
|
||||
|
||||
$this->_termPositions = $reader->termPositions($this->_term);
|
||||
$this->_initWeight($reader);
|
||||
}
|
||||
|
||||
$match = extension_loaded('bitset') ? bitset_in($this->_docVector, $docId) :
|
||||
isset($this->_docVector[$docId]);
|
||||
if ($this->_sign && $match) {
|
||||
return $reader->getSimilarity()->tf(count($this->_termPositions[$docId]) ) *
|
||||
$this->_weight->getValue() *
|
||||
$reader->norm($docId, $this->_term->field);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
108
search/Zend/Search/Lucene/Search/QueryHit.php
Normal file
108
search/Zend/Search/Lucene/Search/QueryHit.php
Normal file
@ -0,0 +1,108 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_QueryHit
|
||||
{
|
||||
/**
|
||||
* Object handle of the index
|
||||
* @var Zend_Search_Lucene
|
||||
*/
|
||||
protected $_index = null;
|
||||
|
||||
/**
|
||||
* Object handle of the document associated with this hit
|
||||
* @var Zend_Search_Lucene_Document
|
||||
*/
|
||||
protected $_document = null;
|
||||
|
||||
/**
|
||||
* Number of the document in the index
|
||||
* @var integer
|
||||
*/
|
||||
public $id;
|
||||
|
||||
/**
|
||||
* Score of the hit
|
||||
* @var float
|
||||
*/
|
||||
public $score;
|
||||
|
||||
|
||||
/**
|
||||
* Constructor - pass object handle of Zend_Search_Lucene index that produced
|
||||
* the hit so the document can be retrieved easily from the hit.
|
||||
*
|
||||
* @param Zend_Search_Lucene $index
|
||||
*/
|
||||
|
||||
public function __construct(Zend_Search_Lucene $index)
|
||||
{
|
||||
$this->_index = $index;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convenience function for getting fields from the document
|
||||
* associated with this hit.
|
||||
*
|
||||
* @param string $offset
|
||||
* @return string
|
||||
*/
|
||||
public function __get($offset)
|
||||
{
|
||||
return $this->getDocument()->getFieldValue($offset);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the document object for this hit
|
||||
*
|
||||
* @return Zend_Search_Lucene_Document
|
||||
*/
|
||||
public function getDocument()
|
||||
{
|
||||
if (!$this->_document instanceof Zend_Search_Lucene_Document) {
|
||||
$this->_document = $this->_index->getDocument($this->id);
|
||||
}
|
||||
|
||||
return $this->_document;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the index object for this hit
|
||||
*
|
||||
* @return Zend_Search_Lucene
|
||||
*/
|
||||
public function getIndex()
|
||||
{
|
||||
return $this->_index;
|
||||
}
|
||||
}
|
||||
|
142
search/Zend/Search/Lucene/Search/QueryParser.php
Normal file
142
search/Zend/Search/Lucene/Search/QueryParser.php
Normal file
@ -0,0 +1,142 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryTokenizer */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryTokenizer.php';
|
||||
|
||||
/** Zend_Search_Lucene_Index_Term */
|
||||
require_once 'Zend/Search/Lucene/Index/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Term */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Term.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_MultiTerm */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
|
||||
|
||||
/** Zend_Search_Lucene_Search_Query_Phrase */
|
||||
require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_QueryParser
|
||||
{
|
||||
|
||||
/**
|
||||
* Parses a query string, returning a Zend_Search_Lucene_Search_Query
|
||||
*
|
||||
* @param string $strQuery
|
||||
* @return Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
static public function parse($strQuery)
|
||||
{
|
||||
$tokens = new Zend_Search_Lucene_Search_QueryTokenizer($strQuery);
|
||||
|
||||
// Empty query
|
||||
if (!$tokens->count()) {
|
||||
throw new Zend_Search_Lucene_Exception('Syntax error: query string cannot be empty.');
|
||||
}
|
||||
|
||||
// Term query
|
||||
if ($tokens->count() == 1) {
|
||||
if ($tokens->current()->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD) {
|
||||
return new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term($tokens->current()->text, 'contents'));
|
||||
} else {
|
||||
throw new Zend_Search_Lucene_Exception('Syntax error: query string must contain at least one word.');
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* MultiTerm Query
|
||||
*
|
||||
* Process each token that was returned by the tokenizer.
|
||||
*/
|
||||
$terms = array();
|
||||
$signs = array();
|
||||
$prevToken = null;
|
||||
$openBrackets = 0;
|
||||
$field = 'contents';
|
||||
foreach ($tokens as $token) {
|
||||
switch ($token->type) {
|
||||
case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD:
|
||||
$terms[] = new Zend_Search_Lucene_Index_Term($token->text, $field);
|
||||
$field = 'contents';
|
||||
if ($prevToken !== null &&
|
||||
$prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) {
|
||||
if ($prevToken->text == "+") {
|
||||
$signs[] = true;
|
||||
} else {
|
||||
$signs[] = false;
|
||||
}
|
||||
} else {
|
||||
$signs[] = null;
|
||||
}
|
||||
break;
|
||||
case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN:
|
||||
if ($prevToken !== null &&
|
||||
$prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) {
|
||||
throw new Zend_Search_Lucene_Exception('Syntax error: sign operator must be followed by a word.');
|
||||
}
|
||||
break;
|
||||
case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_FIELD:
|
||||
$field = $token->text;
|
||||
// let previous token to be signed as next $prevToken
|
||||
$token = $prevToken;
|
||||
break;
|
||||
case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_BRACKET:
|
||||
$token->text=='(' ? $openBrackets++ : $openBrackets--;
|
||||
}
|
||||
$prevToken = $token;
|
||||
}
|
||||
|
||||
// Finish up parsing: check the last token in the query for an opening sign or parenthesis.
|
||||
if ($prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) {
|
||||
throw new Zend_Search_Lucene_Exception('Syntax Error: sign operator must be followed by a word.');
|
||||
}
|
||||
|
||||
// Finish up parsing: check that every opening bracket has a matching closing bracket.
|
||||
if ($openBrackets != 0) {
|
||||
throw new Zend_Search_Lucene_Exception('Syntax Error: mismatched parentheses, every opening must have closing.');
|
||||
}
|
||||
|
||||
switch (count($terms)) {
|
||||
case 0:
|
||||
throw new Zend_Search_Lucene_Exception('Syntax error: bad term count.');
|
||||
case 1:
|
||||
return new Zend_Search_Lucene_Search_Query_Term($terms[0],$signs[0] !== false);
|
||||
default:
|
||||
return new Zend_Search_Lucene_Search_Query_MultiTerm($terms,$signs);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
104
search/Zend/Search/Lucene/Search/QueryToken.php
Normal file
104
search/Zend/Search/Lucene/Search/QueryToken.php
Normal file
@ -0,0 +1,104 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_QueryToken
|
||||
{
|
||||
/**
|
||||
* Token type Word.
|
||||
*/
|
||||
const TOKTYPE_WORD = 0;
|
||||
|
||||
/**
|
||||
* Token type Field.
|
||||
* Field indicator in 'field:word' pair
|
||||
*/
|
||||
const TOKTYPE_FIELD = 1;
|
||||
|
||||
/**
|
||||
* Token type Sign.
|
||||
* '+' (required) or '-' (absentee) sign
|
||||
*/
|
||||
const TOKTYPE_SIGN = 2;
|
||||
|
||||
/**
|
||||
* Token type Bracket.
|
||||
* '(' or ')'
|
||||
*/
|
||||
const TOKTYPE_BRACKET = 3;
|
||||
|
||||
|
||||
/**
|
||||
* Token type.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $type;
|
||||
|
||||
/**
|
||||
* Token text.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
public $text;
|
||||
|
||||
|
||||
/**
|
||||
* IndexReader constructor needs token type and token text as a parameters.
|
||||
*
|
||||
* @param $tokType integer
|
||||
* @param $tokText string
|
||||
*/
|
||||
public function __construct($tokType, $tokText)
|
||||
{
|
||||
switch ($tokType) {
|
||||
case self::TOKTYPE_BRACKET:
|
||||
// fall through to the next case
|
||||
case self::TOKTYPE_FIELD:
|
||||
// fall through to the next case
|
||||
case self::TOKTYPE_SIGN:
|
||||
// fall through to the next case
|
||||
case self::TOKTYPE_WORD:
|
||||
break;
|
||||
default:
|
||||
throw new Zend_Search_Lucene_Exception("Unrecognized token type \"$tokType\".");
|
||||
}
|
||||
|
||||
if (!strlen($tokText)) {
|
||||
throw new Zend_Search_Lucene_Exception('Token text must be supplied.');
|
||||
}
|
||||
|
||||
$this->type = $tokType;
|
||||
$this->text = $tokText;
|
||||
}
|
||||
}
|
||||
|
164
search/Zend/Search/Lucene/Search/QueryTokenizer.php
Normal file
164
search/Zend/Search/Lucene/Search/QueryTokenizer.php
Normal file
@ -0,0 +1,164 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_QueryToken */
|
||||
require_once 'Zend/Search/Lucene/Search/QueryToken.php';
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_QueryTokenizer implements Iterator
|
||||
{
|
||||
/**
|
||||
* inputString tokens.
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
protected $_tokens = array();
|
||||
|
||||
/**
|
||||
* tokens pointer.
|
||||
*
|
||||
* @var integer
|
||||
*/
|
||||
protected $_currToken = 0;
|
||||
|
||||
|
||||
/**
|
||||
* QueryTokenize constructor needs query string as a parameter.
|
||||
*
|
||||
* @param string $inputString
|
||||
*/
|
||||
public function __construct($inputString)
|
||||
{
|
||||
if (!strlen($inputString)) {
|
||||
throw new Zend_Search_Lucene_Exception('Cannot tokenize empty query string.');
|
||||
}
|
||||
|
||||
$currentToken = '';
|
||||
for ($count = 0; $count < strlen($inputString); $count++) {
|
||||
if (ctype_alnum( $inputString{$count} )) {
|
||||
$currentToken .= $inputString{$count};
|
||||
} else {
|
||||
// Previous token is finished
|
||||
if (strlen($currentToken)) {
|
||||
$this->_tokens[] = new Zend_Search_Lucene_Search_QueryToken(Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD,
|
||||
$currentToken);
|
||||
$currentToken = '';
|
||||
}
|
||||
|
||||
if ($inputString{$count} == '+' || $inputString{$count} == '-') {
|
||||
$this->_tokens[] = new Zend_Search_Lucene_Search_QueryToken(Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN,
|
||||
$inputString{$count});
|
||||
} elseif ($inputString{$count} == '(' || $inputString{$count} == ')') {
|
||||
$this->_tokens[] = new Zend_Search_Lucene_Search_QueryToken(Zend_Search_Lucene_Search_QueryToken::TOKTYPE_BRACKET,
|
||||
$inputString{$count});
|
||||
} elseif ($inputString{$count} == ':' && $this->count()) {
|
||||
if ($this->_tokens[count($this->_tokens)-1]->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD) {
|
||||
$this->_tokens[count($this->_tokens)-1]->type = Zend_Search_Lucene_Search_QueryToken::TOKTYPE_FIELD;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (strlen($currentToken)) {
|
||||
$this->_tokens[] = new Zend_Search_Lucene_Search_QueryToken(Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD, $currentToken);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns number of tokens
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function count()
|
||||
{
|
||||
return count($this->_tokens);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns TRUE if a token exists at the current position.
|
||||
*
|
||||
* @return boolean
|
||||
*/
|
||||
public function valid()
|
||||
{
|
||||
return $this->_currToken < $this->count();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Resets token stream.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function rewind()
|
||||
{
|
||||
$this->_currToken = 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the token at the current position or FALSE if
|
||||
* the position does not contain a valid token.
|
||||
*
|
||||
* @return mixed
|
||||
*/
|
||||
public function current()
|
||||
{
|
||||
return $this->valid() ? $this->_tokens[$this->_currToken] : false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns next token
|
||||
*
|
||||
* @return Zend_Search_Lucene_Search_QueryToken
|
||||
*/
|
||||
public function next()
|
||||
{
|
||||
return ++$this->_currToken;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the position of the current token.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function key()
|
||||
{
|
||||
return $this->_currToken;
|
||||
}
|
||||
|
||||
}
|
||||
|
553
search/Zend/Search/Lucene/Search/Similarity.php
Normal file
553
search/Zend/Search/Lucene/Search/Similarity.php
Normal file
@ -0,0 +1,553 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Similarity_Default */
|
||||
require_once 'Zend/Search/Lucene/Search/Similarity/Default.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Search_Similarity
|
||||
{
|
||||
/**
|
||||
* The Similarity implementation used by default.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_Similarity
|
||||
*/
|
||||
static private $_defaultImpl;
|
||||
|
||||
/**
|
||||
* Cache of decoded bytes.
|
||||
* Array of floats
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
static private $_normTable = array( 0 => 0.0,
|
||||
1 => 5.820766E-10,
|
||||
2 => 6.9849193E-10,
|
||||
3 => 8.1490725E-10,
|
||||
4 => 9.313226E-10,
|
||||
5 => 1.1641532E-9,
|
||||
6 => 1.3969839E-9,
|
||||
7 => 1.6298145E-9,
|
||||
8 => 1.8626451E-9,
|
||||
9 => 2.3283064E-9,
|
||||
10 => 2.7939677E-9,
|
||||
11 => 3.259629E-9,
|
||||
12 => 3.7252903E-9,
|
||||
13 => 4.656613E-9,
|
||||
14 => 5.5879354E-9,
|
||||
15 => 6.519258E-9,
|
||||
16 => 7.4505806E-9,
|
||||
17 => 9.313226E-9,
|
||||
18 => 1.1175871E-8,
|
||||
19 => 1.3038516E-8,
|
||||
20 => 1.4901161E-8,
|
||||
21 => 1.8626451E-8,
|
||||
22 => 2.2351742E-8,
|
||||
23 => 2.6077032E-8,
|
||||
24 => 2.9802322E-8,
|
||||
25 => 3.7252903E-8,
|
||||
26 => 4.4703484E-8,
|
||||
27 => 5.2154064E-8,
|
||||
28 => 5.9604645E-8,
|
||||
29 => 7.4505806E-8,
|
||||
30 => 8.940697E-8,
|
||||
31 => 1.0430813E-7,
|
||||
32 => 1.1920929E-7,
|
||||
33 => 1.4901161E-7,
|
||||
34 => 1.7881393E-7,
|
||||
35 => 2.0861626E-7,
|
||||
36 => 2.3841858E-7,
|
||||
37 => 2.9802322E-7,
|
||||
38 => 3.5762787E-7,
|
||||
39 => 4.172325E-7,
|
||||
40 => 4.7683716E-7,
|
||||
41 => 5.9604645E-7,
|
||||
42 => 7.1525574E-7,
|
||||
43 => 8.34465E-7,
|
||||
44 => 9.536743E-7,
|
||||
45 => 1.1920929E-6,
|
||||
46 => 1.4305115E-6,
|
||||
47 => 1.66893E-6,
|
||||
48 => 1.9073486E-6,
|
||||
49 => 2.3841858E-6,
|
||||
50 => 2.861023E-6,
|
||||
51 => 3.33786E-6,
|
||||
52 => 3.8146973E-6,
|
||||
53 => 4.7683716E-6,
|
||||
54 => 5.722046E-6,
|
||||
55 => 6.67572E-6,
|
||||
56 => 7.6293945E-6,
|
||||
57 => 9.536743E-6,
|
||||
58 => 1.1444092E-5,
|
||||
59 => 1.335144E-5,
|
||||
60 => 1.5258789E-5,
|
||||
61 => 1.9073486E-5,
|
||||
62 => 2.2888184E-5,
|
||||
63 => 2.670288E-5,
|
||||
64 => 3.0517578E-5,
|
||||
65 => 3.8146973E-5,
|
||||
66 => 4.5776367E-5,
|
||||
67 => 5.340576E-5,
|
||||
68 => 6.1035156E-5,
|
||||
69 => 7.6293945E-5,
|
||||
70 => 9.1552734E-5,
|
||||
71 => 1.0681152E-4,
|
||||
72 => 1.2207031E-4,
|
||||
73 => 1.5258789E-4,
|
||||
74 => 1.8310547E-4,
|
||||
75 => 2.1362305E-4,
|
||||
76 => 2.4414062E-4,
|
||||
77 => 3.0517578E-4,
|
||||
78 => 3.6621094E-4,
|
||||
79 => 4.272461E-4,
|
||||
80 => 4.8828125E-4,
|
||||
81 => 6.1035156E-4,
|
||||
82 => 7.324219E-4,
|
||||
83 => 8.544922E-4,
|
||||
84 => 9.765625E-4,
|
||||
85 => 0.0012207031,
|
||||
86 => 0.0014648438,
|
||||
87 => 0.0017089844,
|
||||
88 => 0.001953125,
|
||||
89 => 0.0024414062,
|
||||
90 => 0.0029296875,
|
||||
91 => 0.0034179688,
|
||||
92 => 0.00390625,
|
||||
93 => 0.0048828125,
|
||||
94 => 0.005859375,
|
||||
95 => 0.0068359375,
|
||||
96 => 0.0078125,
|
||||
97 => 0.009765625,
|
||||
98 => 0.01171875,
|
||||
99 => 0.013671875,
|
||||
100 => 0.015625,
|
||||
101 => 0.01953125,
|
||||
102 => 0.0234375,
|
||||
103 => 0.02734375,
|
||||
104 => 0.03125,
|
||||
105 => 0.0390625,
|
||||
106 => 0.046875,
|
||||
107 => 0.0546875,
|
||||
108 => 0.0625,
|
||||
109 => 0.078125,
|
||||
110 => 0.09375,
|
||||
111 => 0.109375,
|
||||
112 => 0.125,
|
||||
113 => 0.15625,
|
||||
114 => 0.1875,
|
||||
115 => 0.21875,
|
||||
116 => 0.25,
|
||||
117 => 0.3125,
|
||||
118 => 0.375,
|
||||
119 => 0.4375,
|
||||
120 => 0.5,
|
||||
121 => 0.625,
|
||||
122 => 0.75,
|
||||
123 => 0.875,
|
||||
124 => 1.0,
|
||||
125 => 1.25,
|
||||
126 => 1.5,
|
||||
127 => 1.75,
|
||||
128 => 2.0,
|
||||
129 => 2.5,
|
||||
130 => 3.0,
|
||||
131 => 3.5,
|
||||
132 => 4.0,
|
||||
133 => 5.0,
|
||||
134 => 6.0,
|
||||
135 => 7.0,
|
||||
136 => 8.0,
|
||||
137 => 10.0,
|
||||
138 => 12.0,
|
||||
139 => 14.0,
|
||||
140 => 16.0,
|
||||
141 => 20.0,
|
||||
142 => 24.0,
|
||||
143 => 28.0,
|
||||
144 => 32.0,
|
||||
145 => 40.0,
|
||||
146 => 48.0,
|
||||
147 => 56.0,
|
||||
148 => 64.0,
|
||||
149 => 80.0,
|
||||
150 => 96.0,
|
||||
151 => 112.0,
|
||||
152 => 128.0,
|
||||
153 => 160.0,
|
||||
154 => 192.0,
|
||||
155 => 224.0,
|
||||
156 => 256.0,
|
||||
157 => 320.0,
|
||||
158 => 384.0,
|
||||
159 => 448.0,
|
||||
160 => 512.0,
|
||||
161 => 640.0,
|
||||
162 => 768.0,
|
||||
163 => 896.0,
|
||||
164 => 1024.0,
|
||||
165 => 1280.0,
|
||||
166 => 1536.0,
|
||||
167 => 1792.0,
|
||||
168 => 2048.0,
|
||||
169 => 2560.0,
|
||||
170 => 3072.0,
|
||||
171 => 3584.0,
|
||||
172 => 4096.0,
|
||||
173 => 5120.0,
|
||||
174 => 6144.0,
|
||||
175 => 7168.0,
|
||||
176 => 8192.0,
|
||||
177 => 10240.0,
|
||||
178 => 12288.0,
|
||||
179 => 14336.0,
|
||||
180 => 16384.0,
|
||||
181 => 20480.0,
|
||||
182 => 24576.0,
|
||||
183 => 28672.0,
|
||||
184 => 32768.0,
|
||||
185 => 40960.0,
|
||||
186 => 49152.0,
|
||||
187 => 57344.0,
|
||||
188 => 65536.0,
|
||||
189 => 81920.0,
|
||||
190 => 98304.0,
|
||||
191 => 114688.0,
|
||||
192 => 131072.0,
|
||||
193 => 163840.0,
|
||||
194 => 196608.0,
|
||||
195 => 229376.0,
|
||||
196 => 262144.0,
|
||||
197 => 327680.0,
|
||||
198 => 393216.0,
|
||||
199 => 458752.0,
|
||||
200 => 524288.0,
|
||||
201 => 655360.0,
|
||||
202 => 786432.0,
|
||||
203 => 917504.0,
|
||||
204 => 1048576.0,
|
||||
205 => 1310720.0,
|
||||
206 => 1572864.0,
|
||||
207 => 1835008.0,
|
||||
208 => 2097152.0,
|
||||
209 => 2621440.0,
|
||||
210 => 3145728.0,
|
||||
211 => 3670016.0,
|
||||
212 => 4194304.0,
|
||||
213 => 5242880.0,
|
||||
214 => 6291456.0,
|
||||
215 => 7340032.0,
|
||||
216 => 8388608.0,
|
||||
217 => 1.048576E7,
|
||||
218 => 1.2582912E7,
|
||||
219 => 1.4680064E7,
|
||||
220 => 1.6777216E7,
|
||||
221 => 2.097152E7,
|
||||
222 => 2.5165824E7,
|
||||
223 => 2.9360128E7,
|
||||
224 => 3.3554432E7,
|
||||
225 => 4.194304E7,
|
||||
226 => 5.0331648E7,
|
||||
227 => 5.8720256E7,
|
||||
228 => 6.7108864E7,
|
||||
229 => 8.388608E7,
|
||||
230 => 1.00663296E8,
|
||||
231 => 1.17440512E8,
|
||||
232 => 1.34217728E8,
|
||||
233 => 1.6777216E8,
|
||||
234 => 2.01326592E8,
|
||||
235 => 2.34881024E8,
|
||||
236 => 2.68435456E8,
|
||||
237 => 3.3554432E8,
|
||||
238 => 4.02653184E8,
|
||||
239 => 4.69762048E8,
|
||||
240 => 5.3687091E8,
|
||||
241 => 6.7108864E8,
|
||||
242 => 8.0530637E8,
|
||||
243 => 9.395241E8,
|
||||
244 => 1.07374182E9,
|
||||
245 => 1.34217728E9,
|
||||
246 => 1.61061274E9,
|
||||
247 => 1.87904819E9,
|
||||
248 => 2.14748365E9,
|
||||
249 => 2.68435456E9,
|
||||
250 => 3.22122547E9,
|
||||
251 => 3.75809638E9,
|
||||
252 => 4.2949673E9,
|
||||
253 => 5.3687091E9,
|
||||
254 => 6.4424509E9,
|
||||
255 => 7.5161928E9 );
|
||||
|
||||
|
||||
/**
|
||||
* Set the default Similarity implementation used by indexing and search
|
||||
* code.
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Similarity $similarity
|
||||
*/
|
||||
static public function setDefault(Zend_Search_Lucene_Search_Similarity $similarity)
|
||||
{
|
||||
self::$_defaultImpl = $similarity;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the default Similarity implementation used by indexing and search
|
||||
* code.
|
||||
*
|
||||
* @return Zend_Search_Lucene_Search_Similarity
|
||||
*/
|
||||
static public function getDefault()
|
||||
{
|
||||
if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) {
|
||||
self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default();
|
||||
}
|
||||
|
||||
return self::$_defaultImpl;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Computes the normalization value for a field given the total number of
|
||||
* terms contained in a field. These values, together with field boosts, are
|
||||
* stored in an index and multipled into scores for hits on each field by the
|
||||
* search code.
|
||||
*
|
||||
* Matches in longer fields are less precise, so implemenations of this
|
||||
* method usually return smaller values when 'numTokens' is large,
|
||||
* and larger values when 'numTokens' is small.
|
||||
*
|
||||
* That these values are computed under
|
||||
* IndexWriter::addDocument(Document) and stored then using
|
||||
* encodeNorm(float). Thus they have limited precision, and documents
|
||||
* must be re-indexed if this method is altered.
|
||||
*
|
||||
* fieldName - name of field
|
||||
* numTokens - the total number of tokens contained in fields named
|
||||
* 'fieldName' of 'doc'.
|
||||
* Returns a normalization factor for hits on this field of this document
|
||||
*
|
||||
* @param string $fieldName
|
||||
* @param integer $numTokens
|
||||
* @return float
|
||||
*/
|
||||
abstract public function lengthNorm($fieldName, $numTokens);
|
||||
|
||||
/**
|
||||
* Computes the normalization value for a query given the sum of the squared
|
||||
* weights of each of the query terms. This value is then multipled into the
|
||||
* weight of each query term.
|
||||
*
|
||||
* This does not affect ranking, but rather just attempts to make scores
|
||||
* from different queries comparable.
|
||||
*
|
||||
* sumOfSquaredWeights - the sum of the squares of query term weights
|
||||
* Returns a normalization factor for query weights
|
||||
*
|
||||
* @param float $sumOfSquaredWeights
|
||||
* @return float
|
||||
*/
|
||||
abstract public function queryNorm($sumOfSquaredWeights);
|
||||
|
||||
|
||||
/**
|
||||
* Decodes a normalization factor stored in an index.
|
||||
*
|
||||
* @param integer $byte
|
||||
* @return float
|
||||
*/
|
||||
static public function decodeNorm($byte)
|
||||
{
|
||||
return self::$_normTable[$byte & 0xFF];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Encodes a normalization factor for storage in an index.
|
||||
*
|
||||
* The encoding uses a five-bit exponent and three-bit mantissa, thus
|
||||
* representing values from around 7x10^9 to 2x10^-9 with about one
|
||||
* significant decimal digit of accuracy. Zero is also represented.
|
||||
* Negative numbers are rounded up to zero. Values too large to represent
|
||||
* are rounded down to the largest representable value. Positive values too
|
||||
* small to represent are rounded up to the smallest positive representable
|
||||
* value.
|
||||
*
|
||||
* @param float $f
|
||||
* @return integer
|
||||
*/
|
||||
static function encodeNorm($f)
|
||||
{
|
||||
return self::_floatToByte($f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Float to byte conversion
|
||||
*
|
||||
* @param integer $b
|
||||
* @return float
|
||||
*/
|
||||
static private function _floatToByte($f)
|
||||
{
|
||||
// round negatives up to zero
|
||||
if ($f <= 0.0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// search for appropriate value
|
||||
$lowIndex = 0;
|
||||
$highIndex = 255;
|
||||
while ($highIndex >= $lowIndex) {
|
||||
// $mid = ($highIndex - $lowIndex)/2;
|
||||
$mid = ($highIndex + $lowIndex) >> 1;
|
||||
$delta = $f - self::$_normTable[$mid];
|
||||
|
||||
if ($delta < 0) {
|
||||
$highIndex = $mid-1;
|
||||
} elseif ($delta > 0) {
|
||||
$lowIndex = $mid+1;
|
||||
} else {
|
||||
return $mid; // We got it!
|
||||
}
|
||||
}
|
||||
|
||||
// round to closest value
|
||||
if ($highIndex != 255 &&
|
||||
$f - self::$_normTable[$highIndex] > self::$_normTable[$highIndex+1] - $f ) {
|
||||
return $highIndex + 1;
|
||||
} else {
|
||||
return $highIndex;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Computes a score factor based on a term or phrase's frequency in a
|
||||
* document. This value is multiplied by the idf(Term, Searcher)
|
||||
* factor for each term in the query and these products are then summed to
|
||||
* form the initial score for a document.
|
||||
*
|
||||
* Terms and phrases repeated in a document indicate the topic of the
|
||||
* document, so implementations of this method usually return larger values
|
||||
* when 'freq' is large, and smaller values when 'freq'
|
||||
* is small.
|
||||
*
|
||||
* freq - the frequency of a term within a document
|
||||
* Returns a score factor based on a term's within-document frequency
|
||||
*
|
||||
* @param float $freq
|
||||
* @return float
|
||||
*/
|
||||
abstract public function tf($freq);
|
||||
|
||||
/**
|
||||
* Computes the amount of a sloppy phrase match, based on an edit distance.
|
||||
* This value is summed for each sloppy phrase match in a document to form
|
||||
* the frequency that is passed to tf(float).
|
||||
*
|
||||
* A phrase match with a small edit distance to a document passage more
|
||||
* closely matches the document, so implementations of this method usually
|
||||
* return larger values when the edit distance is small and smaller values
|
||||
* when it is large.
|
||||
*
|
||||
* distance - the edit distance of this sloppy phrase match
|
||||
* Returns the frequency increment for this match
|
||||
*
|
||||
* @param integer $distance
|
||||
* @return float
|
||||
*/
|
||||
abstract public function sloppyFreq($distance);
|
||||
|
||||
|
||||
/**
|
||||
* Computes a score factor for a simple term or a phrase.
|
||||
*
|
||||
* The default implementation is:
|
||||
* return idfFreq(searcher.docFreq(term), searcher.maxDoc());
|
||||
*
|
||||
* input - the term in question or array of terms
|
||||
* reader - reader the document collection being searched
|
||||
* Returns a score factor for the term
|
||||
*
|
||||
* @param mixed $input
|
||||
* @param Zend_Search_Lucene $reader
|
||||
* @return a score factor for the term
|
||||
*/
|
||||
public function idf($input, $reader)
|
||||
{
|
||||
if (!is_array($input)) {
|
||||
return $this->idfFreq($reader->docFreq($input), $reader->count());
|
||||
} else {
|
||||
$idf = 0.0;
|
||||
foreach ($input as $term) {
|
||||
$idf += $this->idfFreq($reader->docFreq($term), $reader->count());
|
||||
}
|
||||
return $idf;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes a score factor based on a term's document frequency (the number
|
||||
* of documents which contain the term). This value is multiplied by the
|
||||
* tf(int) factor for each term in the query and these products are
|
||||
* then summed to form the initial score for a document.
|
||||
*
|
||||
* Terms that occur in fewer documents are better indicators of topic, so
|
||||
* implemenations of this method usually return larger values for rare terms,
|
||||
* and smaller values for common terms.
|
||||
*
|
||||
* docFreq - the number of documents which contain the term
|
||||
* numDocs - the total number of documents in the collection
|
||||
* Returns a score factor based on the term's document frequency
|
||||
*
|
||||
* @param integer $docFreq
|
||||
* @param integer $numDocs
|
||||
* @return float
|
||||
*/
|
||||
abstract public function idfFreq($docFreq, $numDocs);
|
||||
|
||||
/**
|
||||
* Computes a score factor based on the fraction of all query terms that a
|
||||
* document contains. This value is multiplied into scores.
|
||||
*
|
||||
* The presence of a large portion of the query terms indicates a better
|
||||
* match with the query, so implemenations of this method usually return
|
||||
* larger values when the ratio between these parameters is large and smaller
|
||||
* values when the ratio between them is small.
|
||||
*
|
||||
* overlap - the number of query terms matched in the document
|
||||
* maxOverlap - the total number of terms in the query
|
||||
* Returns a score factor based on term overlap with the query
|
||||
*
|
||||
* @param integer $overlap
|
||||
* @param integer $maxOverlap
|
||||
* @return float
|
||||
*/
|
||||
abstract public function coord($overlap, $maxOverlap);
|
||||
}
|
||||
|
105
search/Zend/Search/Lucene/Search/Similarity/Default.php
Normal file
105
search/Zend/Search/Lucene/Search/Similarity/Default.php
Normal file
@ -0,0 +1,105 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Similarity_Default extends Zend_Search_Lucene_Search_Similarity
|
||||
{
|
||||
|
||||
/**
|
||||
* Implemented as '1/sqrt(numTerms)'.
|
||||
*
|
||||
* @param string $fieldName
|
||||
* @param integer numTerms
|
||||
* @return float
|
||||
*/
|
||||
public function lengthNorm($fieldName, $numTerms)
|
||||
{
|
||||
if ($numTerms == 0) {
|
||||
return 1E10;
|
||||
}
|
||||
|
||||
return 1.0/sqrt($numTerms);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implemented as '1/sqrt(sumOfSquaredWeights)'.
|
||||
*
|
||||
* @param float $sumOfSquaredWeights
|
||||
* @return float
|
||||
*/
|
||||
public function queryNorm($sumOfSquaredWeights)
|
||||
{
|
||||
return 1.0/sqrt($sumOfSquaredWeights);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implemented as 'sqrt(freq)'.
|
||||
*
|
||||
* @param float $freq
|
||||
* @return float
|
||||
*/
|
||||
public function tf($freq)
|
||||
{
|
||||
return sqrt($freq);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implemented as '1/(distance + 1)'.
|
||||
*
|
||||
* @param integer $distance
|
||||
* @return float
|
||||
*/
|
||||
public function sloppyFreq($distance)
|
||||
{
|
||||
return 1.0/($distance + 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Implemented as 'log(numDocs/(docFreq+1)) + 1'.
|
||||
*
|
||||
* @param integer $docFreq
|
||||
* @param integer $numDocs
|
||||
* @return float
|
||||
*/
|
||||
public function idfFreq($docFreq, $numDocs)
|
||||
{
|
||||
return log($numDocs/(float)($docFreq+1)) + 1.0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implemented as 'overlap/maxOverlap'.
|
||||
*
|
||||
* @param integer $overlap
|
||||
* @param integer $maxOverlap
|
||||
* @return float
|
||||
*/
|
||||
public function coord($overlap, $maxOverlap)
|
||||
{
|
||||
return $overlap/(float)$maxOverlap;
|
||||
}
|
||||
}
|
61
search/Zend/Search/Lucene/Search/Weight.php
Normal file
61
search/Zend/Search/Lucene/Search/Weight.php
Normal file
@ -0,0 +1,61 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Calculate query weights and build query scorers.
|
||||
*
|
||||
* A Weight is constructed by a query Query->createWeight().
|
||||
* The sumOfSquaredWeights() method is then called on the top-level
|
||||
* query to compute the query normalization factor Similarity->queryNorm(float).
|
||||
* This factor is then passed to normalize(float). At this point the weighting
|
||||
* is complete.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Search_Weight
|
||||
{
|
||||
/**
|
||||
* The weight for this query.
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
abstract public function getValue();
|
||||
|
||||
/**
|
||||
* The sum of squared weights of contained query clauses.
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
abstract public function sumOfSquaredWeights();
|
||||
|
||||
/**
|
||||
* Assigns the query normalization factor to this.
|
||||
*
|
||||
* @param $norm
|
||||
*/
|
||||
abstract public function normalize($norm);
|
||||
}
|
||||
|
135
search/Zend/Search/Lucene/Search/Weight/MultiTerm.php
Normal file
135
search/Zend/Search/Lucene/Search/Weight/MultiTerm.php
Normal file
@ -0,0 +1,135 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Weight */
|
||||
require_once 'Zend/Search/Lucene/Search/Weight.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Weight_MultiTerm extends Zend_Search_Lucene_Search_Weight
|
||||
{
|
||||
/**
|
||||
* IndexReader.
|
||||
*
|
||||
* @var Zend_Search_Lucene
|
||||
*/
|
||||
private $_reader;
|
||||
|
||||
/**
|
||||
* The query that this concerns.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_Query_MultiTerm
|
||||
*/
|
||||
private $_query;
|
||||
|
||||
/**
|
||||
* Query terms weights
|
||||
* Array of Zend_Search_Lucene_Search_Weight_Term
|
||||
*
|
||||
* @var array
|
||||
*/
|
||||
private $_weights;
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Weight_MultiTerm constructor
|
||||
* query - the query that this concerns.
|
||||
* reader - index reader
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Query_MultiTerm $query
|
||||
* @param Zend_Search_Lucene $reader
|
||||
*/
|
||||
public function __construct($query, $reader)
|
||||
{
|
||||
$this->_query = $query;
|
||||
$this->_reader = $reader;
|
||||
$this->_weights = array();
|
||||
|
||||
$signs = $query->getSigns();
|
||||
|
||||
foreach ($query->getTerms() as $num => $term) {
|
||||
if ($signs === null || $signs[$num] === null || $signs[$num]) {
|
||||
$this->_weights[$num] = new Zend_Search_Lucene_Search_Weight_Term($term, $query, $reader);
|
||||
$query->setWeight($num, $this->_weights[$num]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The weight for this query
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function getValue()
|
||||
{
|
||||
return $this->_query->getBoost();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The sum of squared weights of contained query clauses.
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function sumOfSquaredWeights()
|
||||
{
|
||||
$sum = 0;
|
||||
foreach ($this->_weights as $weight) {
|
||||
// sum sub weights
|
||||
$sum += $weight->sumOfSquaredWeights();
|
||||
}
|
||||
|
||||
// boost each sub-weight
|
||||
$sum *= $this->_query->getBoost() * $this->_query->getBoost();
|
||||
|
||||
// check for empty query (like '-something -another')
|
||||
if ($sum == 0) {
|
||||
$sum = 1.0;
|
||||
}
|
||||
return $sum;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Assigns the query normalization factor to this.
|
||||
*
|
||||
* @param float $queryNorm
|
||||
*/
|
||||
public function normalize($queryNorm)
|
||||
{
|
||||
// incorporate boost
|
||||
$queryNorm *= $this->_query->getBoost();
|
||||
|
||||
foreach ($this->_weights as $weight) {
|
||||
$weight->normalize($queryNorm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
141
search/Zend/Search/Lucene/Search/Weight/Phrase.php
Normal file
141
search/Zend/Search/Lucene/Search/Weight/Phrase.php
Normal file
@ -0,0 +1,141 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Weight
|
||||
*/
|
||||
require_once 'Zend/Search/Lucene/Search/Weight.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Weight_Phrase extends Zend_Search_Lucene_Search_Weight
|
||||
{
|
||||
/**
|
||||
* IndexReader.
|
||||
*
|
||||
* @var Zend_Search_Lucene
|
||||
*/
|
||||
private $_reader;
|
||||
|
||||
/**
|
||||
* The query that this concerns.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_Query_Phrase
|
||||
*/
|
||||
private $_query;
|
||||
|
||||
/**
|
||||
* Weight value
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_value;
|
||||
|
||||
/**
|
||||
* Score factor
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_idf;
|
||||
|
||||
/**
|
||||
* Normalization factor
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_queryNorm;
|
||||
|
||||
|
||||
/**
|
||||
* Query weight
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_queryWeight;
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Weight_Phrase constructor
|
||||
*
|
||||
* @param Zend_Search_Lucene_Search_Query_Phrase $query
|
||||
* @param Zend_Search_Lucene $reader
|
||||
*/
|
||||
public function __construct(Zend_Search_Lucene_Search_Query_Phrase $query, Zend_Search_Lucene $reader)
|
||||
{
|
||||
$this->_query = $query;
|
||||
$this->_reader = $reader;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The weight for this query
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function getValue()
|
||||
{
|
||||
return $this->_value;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The sum of squared weights of contained query clauses.
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function sumOfSquaredWeights()
|
||||
{
|
||||
// compute idf
|
||||
$this->_idf = $this->_reader->getSimilarity()->idf($this->_query->getTerms(), $this->_reader);
|
||||
|
||||
// compute query weight
|
||||
$this->_queryWeight = $this->_idf * $this->_query->getBoost();
|
||||
|
||||
// square it
|
||||
return $this->_queryWeight * $this->_queryWeight;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Assigns the query normalization factor to this.
|
||||
*
|
||||
* @param float $queryNorm
|
||||
*/
|
||||
public function normalize($queryNorm)
|
||||
{
|
||||
$this->_queryNorm = $queryNorm;
|
||||
|
||||
// normalize query weight
|
||||
$this->_queryWeight *= $queryNorm;
|
||||
|
||||
// idf for documents
|
||||
$this->_value = $this->_queryWeight * $this->_idf;
|
||||
}
|
||||
}
|
||||
|
||||
|
146
search/Zend/Search/Lucene/Search/Weight/Term.php
Normal file
146
search/Zend/Search/Lucene/Search/Weight/Term.php
Normal file
@ -0,0 +1,146 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Search_Weight */
|
||||
require_once 'Zend/Search/Lucene/Search/Weight.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Search
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Search_Weight_Term extends Zend_Search_Lucene_Search_Weight
|
||||
{
|
||||
/**
|
||||
* IndexReader.
|
||||
*
|
||||
* @var Zend_Search_Lucene
|
||||
*/
|
||||
private $_reader;
|
||||
|
||||
/**
|
||||
* Term
|
||||
*
|
||||
* @var Zend_Search_Lucene_Index_Term
|
||||
*/
|
||||
private $_term;
|
||||
|
||||
/**
|
||||
* The query that this concerns.
|
||||
*
|
||||
* @var Zend_Search_Lucene_Search_Query
|
||||
*/
|
||||
private $_query;
|
||||
|
||||
/**
|
||||
* Weight value
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_value;
|
||||
|
||||
/**
|
||||
* Score factor
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_idf;
|
||||
|
||||
/**
|
||||
* Normalization factor
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_queryNorm;
|
||||
|
||||
|
||||
/**
|
||||
* Query weight
|
||||
*
|
||||
* @var float
|
||||
*/
|
||||
private $_queryWeight;
|
||||
|
||||
|
||||
/**
|
||||
* Zend_Search_Lucene_Search_Weight_Term constructor
|
||||
* reader - index reader
|
||||
*
|
||||
* @param Zend_Search_Lucene $reader
|
||||
*/
|
||||
public function __construct($term, $query, $reader)
|
||||
{
|
||||
$this->_term = $term;
|
||||
$this->_query = $query;
|
||||
$this->_reader = $reader;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The weight for this query
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function getValue()
|
||||
{
|
||||
return $this->_value;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* The sum of squared weights of contained query clauses.
|
||||
*
|
||||
* @return float
|
||||
*/
|
||||
public function sumOfSquaredWeights()
|
||||
{
|
||||
// compute idf
|
||||
$this->_idf = $this->_reader->getSimilarity()->idf($this->_term, $this->_reader);
|
||||
|
||||
// compute query weight
|
||||
$this->_queryWeight = $this->_idf * $this->_query->getBoost();
|
||||
|
||||
// square it
|
||||
return $this->_queryWeight * $this->_queryWeight;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Assigns the query normalization factor to this.
|
||||
*
|
||||
* @param float $queryNorm
|
||||
*/
|
||||
public function normalize($queryNorm)
|
||||
{
|
||||
$this->_queryNorm = $queryNorm;
|
||||
|
||||
// normalize query weight
|
||||
$this->_queryWeight *= $queryNorm;
|
||||
|
||||
// idf for documents
|
||||
$this->_value = $this->_queryWeight * $this->_idf;
|
||||
}
|
||||
}
|
||||
|
120
search/Zend/Search/Lucene/Storage/Directory.php
Normal file
120
search/Zend/Search/Lucene/Storage/Directory.php
Normal file
@ -0,0 +1,120 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Storage_Directory
|
||||
{
|
||||
|
||||
/**
|
||||
* Closes the store.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
abstract public function close();
|
||||
|
||||
/**
|
||||
* Returns an array of strings, one for each file in the directory.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
abstract public function fileList();
|
||||
|
||||
/**
|
||||
* Creates a new, empty file in the directory with the given $filename.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
abstract public function createFile($filename);
|
||||
|
||||
|
||||
/**
|
||||
* Removes an existing $filename in the directory.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return void
|
||||
*/
|
||||
abstract public function deleteFile($filename);
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if a file with the given $filename exists.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return boolean
|
||||
*/
|
||||
abstract public function fileExists($filename);
|
||||
|
||||
|
||||
/**
|
||||
* Returns the length of a $filename in the directory.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return integer
|
||||
*/
|
||||
abstract public function fileLength($filename);
|
||||
|
||||
|
||||
/**
|
||||
* Returns the UNIX timestamp $filename was last modified.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return integer
|
||||
*/
|
||||
abstract public function fileModified($filename);
|
||||
|
||||
|
||||
/**
|
||||
* Renames an existing file in the directory.
|
||||
*
|
||||
* @param string $from
|
||||
* @param string $to
|
||||
* @return void
|
||||
*/
|
||||
abstract public function renameFile($from, $to);
|
||||
|
||||
|
||||
/**
|
||||
* Sets the modified time of $filename to now.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return void
|
||||
*/
|
||||
abstract public function touchFile($filename);
|
||||
|
||||
|
||||
/**
|
||||
* Returns a Zend_Search_Lucene_Storage_File object for a given $filename in the directory.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
abstract public function getFileObject($filename);
|
||||
|
||||
}
|
||||
|
272
search/Zend/Search/Lucene/Storage/Directory/Filesystem.php
Normal file
272
search/Zend/Search/Lucene/Storage/Directory/Filesystem.php
Normal file
@ -0,0 +1,272 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Storage_Directory */
|
||||
require_once 'Zend/Search/Lucene/Storage/Directory.php';
|
||||
|
||||
/** Zend_Search_Lucene_Storage_File_Filesystem */
|
||||
require_once 'Zend/Search/Lucene/Storage/File/Filesystem.php';
|
||||
|
||||
|
||||
/**
|
||||
* FileSystem implementation of Directory abstraction.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Storage_Directory_Filesystem extends Zend_Search_Lucene_Storage_Directory
|
||||
{
|
||||
/**
|
||||
* Filesystem path to the directory
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $_dirPath = null;
|
||||
|
||||
/**
|
||||
* Cache for Zend_Search_Lucene_Storage_File_Filesystem objects
|
||||
* Array: filename => Zend_Search_Lucene_Storage_File object
|
||||
*
|
||||
* @var array
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
private $_fileHandlers;
|
||||
|
||||
|
||||
/**
|
||||
* Utility function to recursive directory creation
|
||||
*
|
||||
* @param string $dir
|
||||
* @param integer $mode
|
||||
* @param boolean $recursive
|
||||
* @return boolean
|
||||
*/
|
||||
|
||||
static public function mkdirs($dir, $mode = 0777, $recursive = true)
|
||||
{
|
||||
if (is_null($dir) || $dir === '') {
|
||||
return false;
|
||||
}
|
||||
if (is_dir($dir) || $dir === '/') {
|
||||
return true;
|
||||
}
|
||||
if (self::mkdirs(dirname($dir), $mode, $recursive)) {
|
||||
return mkdir($dir, $mode);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Object constructor
|
||||
* Checks if $path is a directory or tries to create it.
|
||||
*
|
||||
* @param string $path
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function __construct($path)
|
||||
{
|
||||
if (!is_dir($path)) {
|
||||
if (file_exists($path)) {
|
||||
throw new Zend_Search_Lucene_Exception('Path exists, but it\'s not a directory');
|
||||
} else {
|
||||
if (!self::mkdirs($path)) {
|
||||
throw new Zend_Search_Lucene_Exception("Can't create directory '$path'.");
|
||||
}
|
||||
}
|
||||
}
|
||||
$this->_dirPath = $path;
|
||||
$this->_fileHandlers = array();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Closes the store.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function close()
|
||||
{
|
||||
foreach ($this->_fileHandlers as $fileObject) {
|
||||
$fileObject->close();
|
||||
}
|
||||
|
||||
unset($this->_fileHandlers);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns an array of strings, one for each file in the directory.
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function fileList()
|
||||
{
|
||||
$result = array();
|
||||
|
||||
$dirContent = opendir( $this->_dirPath );
|
||||
while ($file = readdir($dirContent)) {
|
||||
if (($file == '..')||($file == '.')) continue;
|
||||
|
||||
$fullName = $this->_dirPath . '/' . $file;
|
||||
|
||||
if( !is_dir($this->_dirPath . '/' . $file) ) {
|
||||
$result[] = $file;
|
||||
}
|
||||
}
|
||||
|
||||
return $result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new, empty file in the directory with the given $filename.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
public function createFile($filename)
|
||||
{
|
||||
if (isset($this->_fileHandlers[$filename])) {
|
||||
$this->_fileHandlers[$filename]->close();
|
||||
}
|
||||
unset($this->_fileHandlers[$filename]);
|
||||
$this->_fileHandlers[$filename] = new Zend_Search_Lucene_Storage_File_Filesystem($this->_dirPath . '/' . $filename, 'w+b');
|
||||
return $this->_fileHandlers[$filename];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Removes an existing $filename in the directory.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return void
|
||||
*/
|
||||
public function deleteFile($filename)
|
||||
{
|
||||
if (isset($this->_fileHandlers[$filename])) {
|
||||
$this->_fileHandlers[$filename]->close();
|
||||
}
|
||||
unset($this->_fileHandlers[$filename]);
|
||||
unlink($this->_dirPath .'/'. $filename);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns true if a file with the given $filename exists.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return boolean
|
||||
*/
|
||||
public function fileExists($filename)
|
||||
{
|
||||
return isset($this->_fileHandlers[$filename]) ||
|
||||
file_exists($this->_dirPath . '/' . $filename);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the length of a $filename in the directory.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return integer
|
||||
*/
|
||||
public function fileLength($filename)
|
||||
{
|
||||
if (isset( $this->_fileHandlers[$filename] )) {
|
||||
return $this->_fileHandlers[$filename]->size();
|
||||
}
|
||||
return filesize($this->_dirPath .'/'. $filename);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the UNIX timestamp $filename was last modified.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return integer
|
||||
*/
|
||||
public function fileModified($filename)
|
||||
{
|
||||
return filemtime($this->_dirPath .'/'. $filename);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Renames an existing file in the directory.
|
||||
*
|
||||
* @param string $from
|
||||
* @param string $to
|
||||
* @return void
|
||||
*/
|
||||
public function renameFile($from, $to)
|
||||
{
|
||||
if ($this->_fileHandlers[$from] !== null) {
|
||||
$this->_fileHandlers[$from]->close();
|
||||
}
|
||||
unset($this->_fileHandlers[$from]);
|
||||
|
||||
if ($this->_fileHandlers[$to] !== null) {
|
||||
$this->_fileHandlers[$to]->close();
|
||||
}
|
||||
unset($this->_fileHandlers[$to]);
|
||||
|
||||
if (file_exists($this->_dirPath . '/' . $to)) {
|
||||
unlink($this->_dirPath . '/' . $to);
|
||||
}
|
||||
|
||||
return @rename($this->_dirPath . '/' . $from, $this->_dirPath . '/' . $to);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the modified time of $filename to now.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return void
|
||||
*/
|
||||
public function touchFile($filename)
|
||||
{
|
||||
return touch($this->_dirPath .'/'. $filename);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a Zend_Search_Lucene_Storage_File object for a given $filename in the directory.
|
||||
*
|
||||
* @param string $filename
|
||||
* @return Zend_Search_Lucene_Storage_File
|
||||
*/
|
||||
public function getFileObject($filename)
|
||||
{
|
||||
if (isset( $this->_fileHandlers[$filename] )) {
|
||||
$this->_fileHandlers[$filename]->seek(0);
|
||||
return $this->_fileHandlers[$filename];
|
||||
}
|
||||
|
||||
$this->_fileHandlers[$filename] = new Zend_Search_Lucene_Storage_File_Filesystem($this->_dirPath . '/' . $filename);
|
||||
return $this->_fileHandlers[$filename];
|
||||
}
|
||||
}
|
||||
|
371
search/Zend/Search/Lucene/Storage/File.php
Normal file
371
search/Zend/Search/Lucene/Storage/File.php
Normal file
@ -0,0 +1,371 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
abstract class Zend_Search_Lucene_Storage_File
|
||||
{
|
||||
/**
|
||||
* Reads $length number of bytes at the current position in the
|
||||
* file and advances the file pointer.
|
||||
*
|
||||
* @param integer $length
|
||||
* @return string
|
||||
*/
|
||||
abstract protected function _fread($length=1);
|
||||
|
||||
|
||||
/**
|
||||
* Sets the file position indicator and advances the file pointer.
|
||||
* The new position, measured in bytes from the beginning of the file,
|
||||
* is obtained by adding offset to the position specified by whence,
|
||||
* whose values are defined as follows:
|
||||
* SEEK_SET - Set position equal to offset bytes.
|
||||
* SEEK_CUR - Set position to current location plus offset.
|
||||
* SEEK_END - Set position to end-of-file plus offset. (To move to
|
||||
* a position before the end-of-file, you need to pass a negative value
|
||||
* in offset.)
|
||||
* Upon success, returns 0; otherwise, returns -1
|
||||
*
|
||||
* @param integer $offset
|
||||
* @param integer $whence
|
||||
* @return integer
|
||||
*/
|
||||
abstract public function seek($offset, $whence=SEEK_SET);
|
||||
|
||||
/**
|
||||
* Get file position.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
abstract public function tell();
|
||||
|
||||
/**
|
||||
* Writes $length number of bytes (all, if $length===null) to the end
|
||||
* of the file.
|
||||
*
|
||||
* @param string $data
|
||||
* @param integer $length
|
||||
*/
|
||||
abstract protected function _fwrite($data, $length=null);
|
||||
|
||||
|
||||
/**
|
||||
* Reads a byte from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function readByte()
|
||||
{
|
||||
return ord($this->_fread(1));
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a byte to the end of the file.
|
||||
*
|
||||
* @param integer $byte
|
||||
*/
|
||||
public function writeByte($byte)
|
||||
{
|
||||
return $this->_fwrite(chr($byte), 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read num bytes from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @param integer $num
|
||||
* @return string
|
||||
*/
|
||||
public function readBytes($num)
|
||||
{
|
||||
return $this->_fread($num);
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes num bytes of data (all, if $num===null) to the end
|
||||
* of the string.
|
||||
*
|
||||
* @param string $data
|
||||
* @param integer $num
|
||||
*/
|
||||
public function writeBytes($data, $num=null)
|
||||
{
|
||||
$this->_fwrite($data, $num);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads an integer from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function readInt()
|
||||
{
|
||||
$str = $this->_fread(4);
|
||||
|
||||
return ord($str{0}) << 24 |
|
||||
ord($str{1}) << 16 |
|
||||
ord($str{2}) << 8 |
|
||||
ord($str{3});
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Writes an integer to the end of file.
|
||||
*
|
||||
* @param integer $value
|
||||
*/
|
||||
public function writeInt($value)
|
||||
{
|
||||
settype($value, 'integer');
|
||||
$this->_fwrite( chr($value>>24 & 0xFF) .
|
||||
chr($value>>16 & 0xFF) .
|
||||
chr($value>>8 & 0xFF) .
|
||||
chr($value & 0xFF), 4 );
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a long integer from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function readLong()
|
||||
{
|
||||
$str = $this->_fread(8);
|
||||
|
||||
/**
|
||||
* PHP uses long as largest integer. fseek() uses long for offset.
|
||||
* long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent
|
||||
* conversion to float.
|
||||
* So, largest index segment file is 2Gb
|
||||
*/
|
||||
return /* ord($str{0}) << 56 | */
|
||||
/* ord($str{1}) << 48 | */
|
||||
/* ord($str{2}) << 40 | */
|
||||
/* ord($str{3}) << 32 | */
|
||||
ord($str{4}) << 24 |
|
||||
ord($str{5}) << 16 |
|
||||
ord($str{6}) << 8 |
|
||||
ord($str{7});
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes long integer to the end of file
|
||||
*
|
||||
* @param integer $value
|
||||
*/
|
||||
public function writeLong($value)
|
||||
{
|
||||
/**
|
||||
* PHP uses long as largest integer. fseek() uses long for offset.
|
||||
* long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent
|
||||
* conversion to float.
|
||||
* So, largest index segment file is 2Gb
|
||||
*/
|
||||
settype($value, 'integer');
|
||||
$this->_fwrite( "\x00\x00\x00\x00" .
|
||||
chr($value>>24 & 0xFF) .
|
||||
chr($value>>16 & 0xFF) .
|
||||
chr($value>>8 & 0xFF) .
|
||||
chr($value & 0xFF), 8 );
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Returns a variable-length integer from the current
|
||||
* position in the file and advances the file pointer.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function readVInt()
|
||||
{
|
||||
$nextByte = ord($this->_fread(1));
|
||||
$val = $nextByte & 0x7F;
|
||||
|
||||
for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
|
||||
$nextByte = ord($this->_fread(1));
|
||||
$val |= ($nextByte & 0x7F) << $shift;
|
||||
}
|
||||
return $val;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a variable-length integer to the end of file.
|
||||
*
|
||||
* @param integer $value
|
||||
*/
|
||||
public function writeVInt($value)
|
||||
{
|
||||
settype($value, 'integer');
|
||||
while ($value > 0x7F) {
|
||||
$this->_fwrite(chr( ($value & 0x7F)|0x80 ));
|
||||
$value >>= 7;
|
||||
}
|
||||
$this->_fwrite(chr($value));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads a string from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function readString()
|
||||
{
|
||||
$strlen = $this->readVInt();
|
||||
if ($strlen == 0) {
|
||||
return '';
|
||||
} else {
|
||||
/**
|
||||
* This implementation supports only Basic Multilingual Plane
|
||||
* (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
|
||||
* "supplementary characters" (characters whose code points are
|
||||
* greater than 0xFFFF)
|
||||
* Java 2 represents these characters as a pair of char (16-bit)
|
||||
* values, the first from the high-surrogates range (0xD800-0xDBFF),
|
||||
* the second from the low-surrogates range (0xDC00-0xDFFF). Then
|
||||
* they are encoded as usual UTF-8 characters in six bytes.
|
||||
* Standard UTF-8 representation uses four bytes for supplementary
|
||||
* characters.
|
||||
*/
|
||||
|
||||
$str_val = $this->_fread($strlen);
|
||||
|
||||
for ($count = 0; $count < $strlen; $count++ ) {
|
||||
if (( ord($str_val{$count}) & 0xC0 ) == 0xC0) {
|
||||
$addBytes = 1;
|
||||
if (ord($str_val{$count}) & 0x20 ) {
|
||||
$addBytes++;
|
||||
|
||||
// Never used. Java2 doesn't encode strings in four bytes
|
||||
if (ord($str_val{$count}) & 0x10 ) {
|
||||
$addBytes++;
|
||||
}
|
||||
}
|
||||
$str_val .= $this->_fread($addBytes);
|
||||
$strlen += $addBytes;
|
||||
|
||||
// Check for null character. Java2 encodes null character
|
||||
// in two bytes.
|
||||
if (ord($str_val{$count}) == 0xC0 &&
|
||||
ord($str_val{$count+1}) == 0x80 ) {
|
||||
$str_val{$count} = 0;
|
||||
$str_val = substr($str_val,0,$count+1)
|
||||
. substr($str_val,$count+2);
|
||||
}
|
||||
$count += $addBytes;
|
||||
}
|
||||
}
|
||||
|
||||
return $str_val;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a string to the end of file.
|
||||
*
|
||||
* @param string $str
|
||||
* @throws Zend_Search_Lucene_Exception
|
||||
*/
|
||||
public function writeString($str)
|
||||
{
|
||||
/**
|
||||
* This implementation supports only Basic Multilingual Plane
|
||||
* (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
|
||||
* "supplementary characters" (characters whose code points are
|
||||
* greater than 0xFFFF)
|
||||
* Java 2 represents these characters as a pair of char (16-bit)
|
||||
* values, the first from the high-surrogates range (0xD800-0xDBFF),
|
||||
* the second from the low-surrogates range (0xDC00-0xDFFF). Then
|
||||
* they are encoded as usual UTF-8 characters in six bytes.
|
||||
* Standard UTF-8 representation uses four bytes for supplementary
|
||||
* characters.
|
||||
*/
|
||||
|
||||
// convert input to a string before iterating string characters
|
||||
settype($str, 'string');
|
||||
|
||||
$chars = $strlen = strlen($str);
|
||||
$containNullChars = false;
|
||||
|
||||
for ($count = 0; $count < $strlen; $count++ ) {
|
||||
/**
|
||||
* String is already in Java 2 representation.
|
||||
* We should only calculate actual string length and replace
|
||||
* \x00 by \xC0\x80
|
||||
*/
|
||||
if ((ord($str{$count}) & 0xC0) == 0xC0) {
|
||||
$addBytes = 1;
|
||||
if (ord($str{$count}) & 0x20 ) {
|
||||
$addBytes++;
|
||||
|
||||
// Never used. Java2 doesn't encode strings in four bytes
|
||||
// and we dont't support non-BMP characters
|
||||
if (ord($str{$count}) & 0x10 ) {
|
||||
$addBytes++;
|
||||
}
|
||||
}
|
||||
$chars -= $addBytes;
|
||||
|
||||
if (ord($str{$count}) == 0 ) {
|
||||
$containNullChars = true;
|
||||
}
|
||||
$count += $addBytes;
|
||||
}
|
||||
}
|
||||
|
||||
if ($chars < 0) {
|
||||
throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
|
||||
}
|
||||
|
||||
$this->writeVInt($chars);
|
||||
if ($containNullChars) {
|
||||
$this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
|
||||
} else {
|
||||
$this->_fwrite($str);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads binary data from the current position in the file
|
||||
* and advances the file pointer.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function readBinary()
|
||||
{
|
||||
return $this->_fread($this->readVInt());
|
||||
}
|
||||
}
|
171
search/Zend/Search/Lucene/Storage/File/Filesystem.php
Normal file
171
search/Zend/Search/Lucene/Storage/File/Filesystem.php
Normal file
@ -0,0 +1,171 @@
|
||||
<?php
|
||||
/**
|
||||
* Zend Framework
|
||||
*
|
||||
* LICENSE
|
||||
*
|
||||
* This source file is subject to the new BSD license that is bundled
|
||||
* with this package in the file LICENSE.txt.
|
||||
* It is also available through the world-wide-web at this URL:
|
||||
* http://framework.zend.com/license/new-bsd
|
||||
* If you did not receive a copy of the license and are unable to
|
||||
* obtain it through the world-wide-web, please send an email
|
||||
* to license@zend.com so we can send you a copy immediately.
|
||||
*
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
|
||||
|
||||
/** Zend_Search_Lucene_Storage_File */
|
||||
require_once 'Zend/Search/Lucene/Storage/File.php';
|
||||
|
||||
/** Zend_Search_Lucene_Exception */
|
||||
require_once 'Zend/Search/Lucene/Exception.php';
|
||||
|
||||
|
||||
/**
|
||||
* @category Zend
|
||||
* @package Zend_Search_Lucene
|
||||
* @subpackage Storage
|
||||
* @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
|
||||
* @license http://framework.zend.com/license/new-bsd New BSD License
|
||||
*/
|
||||
class Zend_Search_Lucene_Storage_File_Filesystem extends Zend_Search_Lucene_Storage_File
|
||||
{
|
||||
/**
|
||||
* Resource of the open file
|
||||
*
|
||||
* @var resource
|
||||
*/
|
||||
private $_fileHandle;
|
||||
|
||||
/**
|
||||
* Class constructor. Open the file.
|
||||
*
|
||||
* @param string $filename
|
||||
* @param string $mode
|
||||
*/
|
||||
public function __construct($filename, $mode='r+b')
|
||||
{
|
||||
global $php_errormsg;
|
||||
|
||||
$trackErrors = ini_get( "track_errors");
|
||||
ini_set('track_errors', '1');
|
||||
|
||||
$this->_fileHandle = @fopen($filename, $mode);
|
||||
|
||||
if ($this->_fileHandle===false) {
|
||||
ini_set('track_errors', $trackErrors);
|
||||
throw new Zend_Search_Lucene_Exception($php_errormsg);
|
||||
}
|
||||
|
||||
ini_set('track_errors', $trackErrors);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the file position indicator and advances the file pointer.
|
||||
* The new position, measured in bytes from the beginning of the file,
|
||||
* is obtained by adding offset to the position specified by whence,
|
||||
* whose values are defined as follows:
|
||||
* SEEK_SET - Set position equal to offset bytes.
|
||||
* SEEK_CUR - Set position to current location plus offset.
|
||||
* SEEK_END - Set position to end-of-file plus offset. (To move to
|
||||
* a position before the end-of-file, you need to pass a negative value
|
||||
* in offset.)
|
||||
* SEEK_CUR is the only supported offset type for compound files
|
||||
*
|
||||
* Upon success, returns 0; otherwise, returns -1
|
||||
*
|
||||
* @param integer $offset
|
||||
* @param integer $whence
|
||||
* @return integer
|
||||
*/
|
||||
public function seek($offset, $whence=SEEK_SET)
|
||||
{
|
||||
return fseek($this->_fileHandle, $offset, $whence);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get file position.
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function tell()
|
||||
{
|
||||
return ftell($this->_fileHandle);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Close File object
|
||||
*/
|
||||
public function close()
|
||||
{
|
||||
if ($this->_fileHandle !== null ) {
|
||||
@fclose($this->_fileHandle);
|
||||
$this->_fileHandle = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the size of the already opened file
|
||||
*
|
||||
* @return integer
|
||||
*/
|
||||
public function size()
|
||||
{
|
||||
$position = ftell($this->_fileHandle);
|
||||
fseek($this->_fileHandle, 0, SEEK_END);
|
||||
$size = ftell($this->_fileHandle);
|
||||
fseek($this->_fileHandle,$position);
|
||||
|
||||
return $size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a $length bytes from the file and advance the file pointer.
|
||||
*
|
||||
* @param integer $length
|
||||
* @return string
|
||||
*/
|
||||
protected function _fread($length=1)
|
||||
{
|
||||
if ($length == 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
if ($length < 1024) {
|
||||
return fread($this->_fileHandle, $length);
|
||||
}
|
||||
|
||||
$data = '';
|
||||
while ( $length > 0 && ($nextBlock = fread($this->_fileHandle, $length)) != false ) {
|
||||
$data .= $nextBlock;
|
||||
$length -= strlen($nextBlock);
|
||||
}
|
||||
return $data;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Writes $length number of bytes (all, if $length===null) to the end
|
||||
* of the file.
|
||||
*
|
||||
* @param string $data
|
||||
* @param integer $length
|
||||
*/
|
||||
protected function _fwrite($data, $length=null)
|
||||
{
|
||||
if ($length === null ) {
|
||||
fwrite($this->_fileHandle, $data);
|
||||
} else {
|
||||
fwrite($this->_fileHandle, $data, $length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
14
search/Zend/Search/TODO.txt
Normal file
14
search/Zend/Search/TODO.txt
Normal file
@ -0,0 +1,14 @@
|
||||
@todo
|
||||
|
||||
- Improve API: fix ZSearchMultiTermQuery($terms, $signs);
|
||||
|
||||
- Analysis and indexing engine
|
||||
|
||||
- Additional queries: phrase, wildcard, proximity, and range
|
||||
|
||||
- Better class-level docblocks (most functions okay)
|
||||
|
||||
- Some Windows issues(?) during indexing
|
||||
|
||||
- Finish renaming classes to PEAR-like conventions
|
||||
|
15
search/db/mysql.sql
Normal file
15
search/db/mysql.sql
Normal file
@ -0,0 +1,15 @@
|
||||
CREATE TABLE IF NOT EXISTS `search_documents` (
|
||||
`id` int(11) NOT NULL auto_increment,
|
||||
`type` varchar(12) NOT NULL default 'none',
|
||||
`title` varchar(100) NOT NULL default '',
|
||||
`url` varchar(100) NOT NULL default '',
|
||||
`updated` timestamp NOT NULL default CURRENT_TIMESTAMP,
|
||||
`courseid` int(11) NOT NULL default '0',
|
||||
`userid` int(11) NOT NULL default '0',
|
||||
`groupid` int(11) NOT NULL default '0',
|
||||
PRIMARY KEY (`id`)
|
||||
) ENGINE=MyISAM AUTO_INCREMENT=1;
|
||||
|
||||
DELETE FROM `search_documents` WHERE 1;
|
||||
ALTER TABLE `search_documents` AUTO_INCREMENT =1;
|
||||
|
21
search/db/postgres7.sql
Normal file
21
search/db/postgres7.sql
Normal file
@ -0,0 +1,21 @@
|
||||
--probably a bit suspect, need to explicitly create
|
||||
--id sequence (i.e. don't depend on postgres default seq naming)?
|
||||
--not sure about table owner either
|
||||
|
||||
CREATE TABLE search_documents
|
||||
(
|
||||
id serial,
|
||||
"type" varchar(12) NOT NULL DEFAULT 'none',
|
||||
title varchar(100) NOT NULL default '',
|
||||
url varchar(100) NOT NULL default '',
|
||||
updated timestamp NOT NULL DEFAULT NOW(),
|
||||
courseid int4,
|
||||
userid int4,
|
||||
groupid int4,
|
||||
CONSTRAINT id_pkey PRIMARY KEY (id)
|
||||
) WITHOUT OIDS;
|
||||
|
||||
--ALTER TABLE search_documents OWNER TO postgres;
|
||||
|
||||
DELETE FROM search_documents;
|
||||
SELECT setval('public.search_documents_id_seq', 1);
|
12
search/documents/document.php
Normal file
12
search/documents/document.php
Normal file
@ -0,0 +1,12 @@
|
||||
<?php
|
||||
|
||||
class SearchDocument extends Zend_Search_Lucene_Document {
|
||||
public function __construct($document_type, $cid, $uid, $gid) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Keyword('type', $document_type));
|
||||
$this->addField(Zend_Search_Lucene_Field::Keyword('courseid', $cid));
|
||||
$this->addField(Zend_Search_Lucene_Field::Keyword('userid', $uid));
|
||||
$this->addField(Zend_Search_Lucene_Field::Keyword('groupid', $gid));
|
||||
} //constructor
|
||||
} //SearchDocument
|
||||
|
||||
?>
|
28
search/documents/wiki_document.php
Normal file
28
search/documents/wiki_document.php
Normal file
@ -0,0 +1,28 @@
|
||||
<?php
|
||||
|
||||
require_once("$CFG->dirroot/search/documents/document.php");
|
||||
|
||||
class WikiSearchDocument extends SearchDocument {
|
||||
public function __construct(&$page, $wiki_id, $cid, $uid, $gid) {
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('title', $page->pagename));
|
||||
$this->addField(Zend_Search_Lucene_Field::Text('author', $page->author));
|
||||
$this->addField(Zend_Search_Lucene_Field::UnStored('contents', $page->content));
|
||||
|
||||
$this->addField(Zend_Search_Lucene_Field::Keyword('id', $page->id));
|
||||
$this->addField(Zend_Search_Lucene_Field::Keyword('version', $page->version));
|
||||
$this->addField(Zend_Search_Lucene_Field::Keyword('wiki', $wiki_id));
|
||||
|
||||
parent::__construct(SEARCH_WIKI_TYPE, $cid, $uid, $gid);
|
||||
} //constructor
|
||||
} //WikiSearchDocument
|
||||
|
||||
function wiki_name_convert($str) {
|
||||
return str_replace(' ', '+', $str);
|
||||
} //wiki_name_convert
|
||||
|
||||
function wiki_make_link(&$doc) {
|
||||
global $CFG;
|
||||
return $CFG->wwwroot.'/mod/wiki/view.php?wid='.$doc->wiki.'&page='.wiki_name_convert($doc->title).'&version='.$doc->version;
|
||||
} //wiki_make_link
|
||||
|
||||
?>
|
10
search/index.php
Normal file
10
search/index.php
Normal file
@ -0,0 +1,10 @@
|
||||
<?php
|
||||
/*$id = required_param('id', PARAM_INT); // course
|
||||
if (! $course = get_record("course", "id", $id)) {
|
||||
error("Course ID is incorrect");
|
||||
}
|
||||
require_course_login($course);
|
||||
add_to_log($course->id, "wiki", "view all", "index.php?id=$course->id", "");*/
|
||||
|
||||
header("Location: query.php");
|
||||
?>
|
152
search/indexer.php
Normal file
152
search/indexer.php
Normal file
@ -0,0 +1,152 @@
|
||||
<?php
|
||||
//this'll take some time, set up the environment
|
||||
@set_time_limit(0);
|
||||
@ob_implicit_flush(true);
|
||||
@ob_end_flush();
|
||||
|
||||
require_once('../config.php');
|
||||
require_once("$CFG->dirroot/search/lib.php");
|
||||
|
||||
require_login();
|
||||
|
||||
if (!isadmin()) {
|
||||
error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php");
|
||||
} //if
|
||||
|
||||
$sure = strtolower(optional_param('areyousure', '', PARAM_ALPHA));
|
||||
|
||||
if ($sure != 'yes') {
|
||||
mtrace("Sorry, you weren't sure enough (<a href='index.php'>back to query page</a>).");
|
||||
exit(0);
|
||||
} //if
|
||||
|
||||
//check for php5 (lib.php)
|
||||
if (!search_check_php5()) {
|
||||
$phpversion = phpversion();
|
||||
mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)");
|
||||
exit(0);
|
||||
} //if
|
||||
|
||||
require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
|
||||
|
||||
//begin timer
|
||||
search_stopwatch();
|
||||
mtrace('<pre>Server Time: '.date('r',time())."\n");
|
||||
|
||||
//paths
|
||||
$index_path = $CFG->dataroot.'/search';
|
||||
$index_db_file = "$CFG->dirroot/search/db/$CFG->dbtype.sql";
|
||||
|
||||
if (!file_exists($index_path)) {
|
||||
mtrace("Data directory ($index_path) does not exist, attempting to create.");
|
||||
if (!mkdir($index_path)) {
|
||||
search_pexit("Error creating data directory at: $index_path. Please correct.");
|
||||
} else {
|
||||
mtrace("Directory successfully created.");
|
||||
} //else
|
||||
} else {
|
||||
mtrace("Using $index_path as data directory.");
|
||||
} //else
|
||||
|
||||
//stop accidental re-indexing (zzz)
|
||||
//search_pexit("Not indexing at this time.");
|
||||
|
||||
$index = new Zend_Search_Lucene($index_path, true);
|
||||
|
||||
//create the database tables
|
||||
ob_start(); //turn output buffering on - to hide modify_database() output
|
||||
modify_database($index_db_file, '', false);
|
||||
ob_end_clean(); //chuck the buffer and resume normal operation
|
||||
|
||||
//empty database table goes here
|
||||
// delete * from search_documents;
|
||||
// set auto_increment back to 1
|
||||
|
||||
//-------- debug stuff
|
||||
/*
|
||||
include_once("$CFG->dirroot/mod/wiki/lib.php");
|
||||
|
||||
$wikis = get_all_instances_in_courses("wiki", get_courses());
|
||||
#search_pexit($wikis[1]);
|
||||
$entries = wiki_get_entries($wikis[1]);
|
||||
#search_pexit($entries);
|
||||
|
||||
#$r = wiki_get_pages($entries[134]);
|
||||
$r = wiki_get_latest_pages($entries[95]);
|
||||
|
||||
search_pexit($r);
|
||||
//ignore me --------*/
|
||||
|
||||
mtrace('Starting activity modules');
|
||||
if ($mods = get_records_select('modules' /*'index this module?' where statement*/)) {
|
||||
foreach ($mods as $mod) {
|
||||
$libfile = "$CFG->dirroot/mod/$mod->name/lib.php";
|
||||
if (file_exists($libfile)) {
|
||||
include_once($libfile);
|
||||
|
||||
$iter_function = $mod->name.'_iterator';
|
||||
$index_function = $mod->name.'_get_content_for_index';
|
||||
$include_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
|
||||
$c = 0;
|
||||
$doc = new stdClass;
|
||||
|
||||
if (function_exists($index_function) && function_exists($iter_function)) {
|
||||
include_once($include_file);
|
||||
|
||||
mtrace("Processing module function $index_function ...");
|
||||
|
||||
foreach ($iter_function() as $i) {
|
||||
$documents = $index_function($i);
|
||||
|
||||
//begin transaction
|
||||
|
||||
foreach($documents as $document) {
|
||||
$c++;
|
||||
|
||||
//db sync increases indexing time from 55 sec to 73 (64 on Saturday?), so ~30%
|
||||
//therefore, let us make a custom insert function for this search module
|
||||
|
||||
//data object for db
|
||||
$doc->type = $document->type;
|
||||
$doc->title = mysql_real_escape_string($document->title); //naughty
|
||||
$doc->update = time();
|
||||
$doc->permissions = 0;
|
||||
$doc->url = 'none';
|
||||
$doc->courseid = $document->courseid;
|
||||
$doc->userid = $document->userid;
|
||||
$doc->groupid = $document->groupid;
|
||||
|
||||
//insert summary into db
|
||||
$id = insert_record($CFG->prefix.'search_documents', $doc);
|
||||
|
||||
//synchronise db with index
|
||||
$document->addField(Zend_Search_Lucene_Field::Keyword('dbid', $id));
|
||||
$index->addDocument($document);
|
||||
|
||||
//commit every 100 new documents, and print a status message
|
||||
if (($c%100) == 0) {
|
||||
$index->commit();
|
||||
mtrace(".. $c");
|
||||
} //if
|
||||
} //foreach
|
||||
|
||||
//end transaction
|
||||
|
||||
} //foreach
|
||||
|
||||
//commit left over documents, and finish up
|
||||
$index->commit();
|
||||
mtrace("-- $c documents indexed");
|
||||
mtrace('done.');
|
||||
} //if
|
||||
} //if
|
||||
} //foreach
|
||||
} //if
|
||||
|
||||
//done modules
|
||||
mtrace('Finished activity modules');
|
||||
search_stopwatch();
|
||||
mtrace(".<br><a href='index.php'>Back to query page</a>.");
|
||||
mtrace('</pre>');
|
||||
|
||||
?>
|
44
search/indexersplash.php
Normal file
44
search/indexersplash.php
Normal file
@ -0,0 +1,44 @@
|
||||
<?php
|
||||
require_once('../config.php');
|
||||
require_once("$CFG->dirroot/search/lib.php");
|
||||
|
||||
require_login();
|
||||
|
||||
if (!isadmin()) {
|
||||
error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php");
|
||||
} //if
|
||||
|
||||
//check for php5 (lib.php)
|
||||
if (!search_check_php5()) {
|
||||
$phpversion = phpversion();
|
||||
mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)");
|
||||
exit(0);
|
||||
} //if
|
||||
|
||||
$index_path = "$CFG->dataroot/search";
|
||||
$index_dir = get_directory_list($index_path, '', false, false);
|
||||
$index_filecount = count($index_dir);
|
||||
|
||||
//check if the table exists in the db
|
||||
$tables = $db->MetaTables();
|
||||
|
||||
if (array_search('search_documents', $tables)) {
|
||||
$db_count = count_records($CFG->prefix.'search_documents');
|
||||
} else {
|
||||
$db_count = 0;
|
||||
} //else
|
||||
|
||||
//elaborate on error messages, when db!=0 and index=0 -> corrupt, etc.
|
||||
if ($index_filecount != 0 or $db_count != 0) {
|
||||
mtrace("<pre>The data directory ($index_path) contains $index_filecount files, and "
|
||||
."there are $db_count records in the <em>search_documents</em> table.");
|
||||
mtrace('');
|
||||
mtrace("This indicates that you have already indexed this site - click the following "
|
||||
."link if you're sure you want to continue: <a href='indexer.php?areyousure=yes'>Go!</a>");
|
||||
mtrace('');
|
||||
mtrace("<a href='index.php'>Back to query page</a>.");
|
||||
mtrace("</pre>");
|
||||
} else {
|
||||
header('Location: indexer.php?areyousure=yes');
|
||||
} //else
|
||||
?>
|
59
search/lib.php
Normal file
59
search/lib.php
Normal file
@ -0,0 +1,59 @@
|
||||
<?php
|
||||
|
||||
//Move this stuff to lib/searchlib.php?
|
||||
// Author: Michael Champanis
|
||||
|
||||
//document types that can be searched
|
||||
define('SEARCH_NO_TYPE', 'none');
|
||||
define('SEARCH_WIKI_TYPE', 'wiki');
|
||||
|
||||
//returns all the document type constants
|
||||
function search_get_document_types() {
|
||||
$r = Array(SEARCH_WIKI_TYPE, SEARCH_NO_TYPE);
|
||||
return $r;
|
||||
} //search_get_document_types
|
||||
|
||||
//shortens a url so it can fit on the results page
|
||||
function search_shorten_url($url, $length=30) {
|
||||
return substr($url, 0, $length)."...";
|
||||
} //search_shorten_url
|
||||
|
||||
//get a real php 5 version number, using 5.0.0 arbitrarily
|
||||
function search_check_php5($feedback=false) {
|
||||
if (!check_php_version("5.0.0")) {
|
||||
if ($feedback) {
|
||||
$phpversion = phpversion();
|
||||
print_heading("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)");
|
||||
} //if
|
||||
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
} //else
|
||||
} //search_check_php5
|
||||
|
||||
//simple timer function, outputs result on 2nd call
|
||||
function search_stopwatch($cli = false) {
|
||||
if (!empty($GLOBALS['search_script_start_time'])) {
|
||||
if (!$cli) print '<em>';
|
||||
print round(microtime(true) - $GLOBALS['search_script_start_time'], 6).' seconds';
|
||||
if (!$cli) print '</em>';
|
||||
|
||||
unset($GLOBALS['search_script_start_time']);
|
||||
} else {
|
||||
$GLOBALS['search_script_start_time'] = microtime(true);
|
||||
} //else
|
||||
} //search_stopwatch
|
||||
|
||||
//print and exit (for debugging)
|
||||
function search_pexit($str = "") {
|
||||
if (is_array($str) or is_object($str)) {
|
||||
print_r($str);
|
||||
} else if ($str) {
|
||||
print $str."<br>";
|
||||
} //if
|
||||
|
||||
exit(0);
|
||||
} //search_pexit
|
||||
|
||||
?>
|
116
search/query.php
Normal file
116
search/query.php
Normal file
@ -0,0 +1,116 @@
|
||||
<?php
|
||||
|
||||
require_once('../config.php');
|
||||
require_once("$CFG->dirroot/search/lib.php");
|
||||
|
||||
//check for php5, but don't die yet (see line 27)
|
||||
if ($check = search_check_php5()) {
|
||||
require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
|
||||
require_once("$CFG->dirroot/search/documents/wiki_document.php");
|
||||
|
||||
$query_string = optional_param('query_string', '', PARAM_CLEAN);
|
||||
$index_path = "$CFG->dataroot/search";
|
||||
$no_index = false; //optimism!
|
||||
|
||||
try {
|
||||
$index = new Zend_Search_Lucene($index_path, false);
|
||||
} catch(Exception $e) {
|
||||
//print $e;
|
||||
$no_index = true;
|
||||
} //catch
|
||||
} //if
|
||||
|
||||
if (!$site = get_site()) {
|
||||
redirect("index.php");
|
||||
} //if
|
||||
|
||||
$strsearch = "Search"; //get_string();
|
||||
$strquery = "Enter your search query"; //get_string();
|
||||
|
||||
print_header("$site->shortname: $strsearch: $strquery", "$site->fullname",
|
||||
"<a href=\"index.php\">$strsearch</a> -> $strquery");
|
||||
|
||||
//keep things pretty, even if php5 isn't available
|
||||
if (!$check) {
|
||||
print_heading(search_check_php5(true));
|
||||
print_footer();
|
||||
exit(0);
|
||||
} //if
|
||||
|
||||
print_simple_box_start('center', '100%', '', 20);
|
||||
print_heading($strquery);
|
||||
|
||||
print_simple_box_start('center', '', '', 20);
|
||||
?>
|
||||
|
||||
<form name="query" method="get" action="query.php">
|
||||
<input type="text" name="query_string" length="50" value="<?php print $query_string ?>"/>
|
||||
<input type="submit" value="Search"/> <a href="query.php?advanced=yes">Advanced search</a>
|
||||
<a href="stats.php">Statistics</a>
|
||||
</form>
|
||||
|
||||
<br>
|
||||
|
||||
<div align="center">
|
||||
<?php
|
||||
echo 'Searching: ';
|
||||
|
||||
if ($no_index) {
|
||||
print "0";
|
||||
} else {
|
||||
print $index->count();
|
||||
} //else
|
||||
|
||||
print ' documents.';
|
||||
|
||||
if ($no_index and isadmin()) {
|
||||
print "<br><br>Admin: There appears to be no index, click <a href='indexersplash.php'>here</a> to create one.";
|
||||
} //if
|
||||
?>
|
||||
</div>
|
||||
|
||||
<?php
|
||||
print_simple_box_end();
|
||||
|
||||
if (!empty($query_string) and !$no_index) {
|
||||
print_simple_box_start('center', '50%', 'white', 10);
|
||||
|
||||
search_stopwatch();
|
||||
$hits = $index->find(strtolower($query_string));
|
||||
|
||||
if (count($hits) > 0) {
|
||||
$link_function = $hits[0]->type.'_make_link';
|
||||
} //if
|
||||
|
||||
print "<br>";
|
||||
|
||||
print count($hits)." results returned for '".$query_string."'.";
|
||||
print "<br><br>";
|
||||
|
||||
print "<ol>";
|
||||
|
||||
foreach ($hits as $listing) {
|
||||
print "<li><a href='".$link_function($listing)."'>$listing->title</a><br>\n"
|
||||
."<em>".search_shorten_url($link_function($listing), 70)."</em><br>\n"
|
||||
."Type: ".$listing->type.", score: ".round($listing->score, 3)."<br>\n"
|
||||
."<br></li>\n";
|
||||
} //foreach
|
||||
|
||||
print "</ol>";
|
||||
|
||||
print_simple_box_end();
|
||||
} //if
|
||||
|
||||
if (!empty($query_string) and !$no_index) {
|
||||
?>
|
||||
|
||||
<div align="center">
|
||||
It took <?php search_stopwatch(); ?> to fetch these results.
|
||||
</div>
|
||||
|
||||
<?php
|
||||
} //if
|
||||
|
||||
print_simple_box_end();
|
||||
print_footer();
|
||||
?>
|
91
search/stats.php
Normal file
91
search/stats.php
Normal file
@ -0,0 +1,91 @@
|
||||
<?php
|
||||
require_once('../config.php');
|
||||
require_once("$CFG->dirroot/search/lib.php");
|
||||
|
||||
//check for php5, but don't die yet
|
||||
if ($check = search_check_php5()) {
|
||||
//filesystem stats
|
||||
$index_path = "$CFG->dataroot/search";
|
||||
$index_size = display_size(get_directory_size($index_path));
|
||||
$index_dir = get_directory_list($index_path, '', false, false);
|
||||
$index_filecount = count($index_dir);
|
||||
|
||||
//indexed documents stats
|
||||
$tables = $db->MetaTables();
|
||||
|
||||
if (array_search('search_documents', $tables)) {
|
||||
$types = search_get_document_types();
|
||||
sort($types);
|
||||
|
||||
//total documents
|
||||
$type_counts['Total'] = count_records($CFG->prefix.'search_documents');
|
||||
|
||||
foreach($types as $type) {
|
||||
$c = count_records($CFG->prefix.'search_documents', 'type', $type);
|
||||
$type_counts[$type] = (int)$c;
|
||||
} //foreach
|
||||
} else {
|
||||
$type_counts['Total'] = 0;
|
||||
} //else
|
||||
} //if
|
||||
|
||||
if (!$site = get_site()) {
|
||||
redirect("index.php");
|
||||
} //if
|
||||
|
||||
$strsearch = "Search"; //get_string();
|
||||
$strquery = "Search statistics"; //get_string();
|
||||
|
||||
print_header("$site->shortname: $strsearch: $strquery", "$site->fullname",
|
||||
"<a href=\"index.php\">$strsearch</a> -> $strquery");
|
||||
|
||||
//keep things pretty, even if php5 isn't available
|
||||
if (!$check) {
|
||||
print_heading(search_check_php5(true));
|
||||
print_footer();
|
||||
exit(0);
|
||||
} //if
|
||||
|
||||
print_simple_box_start('center', '100%', '', 20);
|
||||
print_heading($strquery);
|
||||
|
||||
print_simple_box_start('center', '', '', 20);
|
||||
|
||||
$table->tablealign = "center";
|
||||
$table->align = array ("right", "left");
|
||||
$table->wrap = array ("nowrap", "nowrap");
|
||||
$table->cellpadding = 5;
|
||||
$table->cellspacing = 0;
|
||||
$table->width = '500';
|
||||
|
||||
$table->data[] = array('<strong>Data directory</strong>', '<em><strong>'.$index_path.'</strong></em>');
|
||||
$table->data[] = array('Files in index directory', $index_filecount);
|
||||
$table->data[] = array('Total size', $index_size);
|
||||
|
||||
if ($index_filecount == 0) {
|
||||
$table->data[] = array('Click to create index', "<a href='indexersplash.php'>Indexer</a>");
|
||||
} //if
|
||||
|
||||
$return_of_table->tablealign = "center";
|
||||
$return_of_table->align = array ("right", "left");
|
||||
$return_of_table->wrap = array ("nowrap", "nowrap");
|
||||
$return_of_table->cellpadding = 5;
|
||||
$return_of_table->cellspacing = 0;
|
||||
$return_of_table->width = '500';
|
||||
|
||||
$return_of_table->data[] = array('<strong>Database</strong>', '<em><strong>search_documents<strong></em>');
|
||||
foreach($type_counts as $key => $value) {
|
||||
$return_of_table->data[] = array($key, $value);
|
||||
} //foreach
|
||||
|
||||
if (isadmin()) {
|
||||
print_table($table);
|
||||
print_spacer(20);
|
||||
} //if
|
||||
|
||||
print_table($return_of_table);
|
||||
|
||||
print_simple_box_end();
|
||||
print_simple_box_end();
|
||||
print_footer();
|
||||
?>
|
Loading…
x
Reference in New Issue
Block a user