1
0
mirror of https://github.com/tomahawk-player/tomahawk.git synced 2025-07-31 19:30:21 +02:00

* Ported to lucene++.

This commit is contained in:
Christian Muehlhaeuser
2014-09-16 01:02:59 +02:00
parent 88a5b6d97c
commit f837e9e7e5
8 changed files with 110 additions and 137 deletions

View File

@@ -250,6 +250,9 @@ macro_log_feature(ECHONEST_FOUND "Echonest" "Qt library for communicating with T
macro_optional_find_package(CLucene 0.9.23)
macro_log_feature(CLucene_FOUND "CLucene" "The open-source, C++ search engine" "http://clucene.sf.net" TRUE "" "CLucene is used for indexing the collection")
macro_optional_find_package(Lucene++ 3.0.0)
macro_log_feature(LUCENEPP_FOUND "Lucene++" "The open-source, C++ search engine" "https://github.com/luceneplusplus/LucenePlusPlus/" TRUE "" "Lucene++ is used for indexing the collection")
if( NOT TOMAHAWK_QT5 )
macro_optional_find_package(QJSON 0.8.1)
macro_log_feature(QJSON_FOUND "QJson" "Qt library that maps JSON data to QVariant objects" "http://qjson.sf.net" TRUE "" "libqjson is used for encoding communication between Tomahawk instances")

View File

@@ -71,7 +71,7 @@ ENDIF(WIN32)
SET(LUCENEPP_GOOD_VERSION TRUE)
FIND_PATH(LUCENEPP_LIBRARY_DIR
NAMES liblucene++.so
NAMES liblucene++.dylib
PATHS ${TRIAL_LIBRARY_PATHS} ${TRIAL_INCLUDE_PATHS} NO_DEFAULT_PATH)
IF (LUCENEPP_LIBRARY_DIR)
MESSAGE(STATUS "Found Lucene++ library dir: ${LUCENEPP_LIBRARY_DIR}")

View File

@@ -406,7 +406,7 @@ include_directories(
${QT_INCLUDE_DIR}
${QJSON_INCLUDE_DIR}
${ECHONEST_INCLUDE_DIR}
${CLUCENE_INCLUDE_DIRS}
${LUCENEPP_INCLUDE_DIRS}
${PHONON_INCLUDES}
${Boost_INCLUDE_DIR}
@@ -525,12 +525,13 @@ TARGET_LINK_LIBRARIES( tomahawklib
${LIBPORTFWD_LIBRARIES}
${QTKEYCHAIN_LIBRARIES}
boost_system
LINK_PUBLIC
# External deps
${QJSON_LIBRARIES}
${TAGLIB_LIBRARIES}
${CLUCENE_LIBRARIES}
${LUCENEPP_LIBRARIES}
${ECHONEST_LIBRARIES}
${QT_QTSQL_LIBRARY}
${QT_QTUITOOLS_LIBRARY}

View File

@@ -19,8 +19,8 @@
*/
#pragma once
#ifndef QUERY_H
#define QUERY_H
#ifndef TOMAHAWK_QUERY_H
#define TOMAHAWK_QUERY_H
#include <QObject>
#include <QList>
@@ -154,4 +154,4 @@ private:
Q_DECLARE_METATYPE( Tomahawk::query_ptr )
#endif // QUERY_H
#endif // TOMAHAWK_QUERY_H

View File

@@ -429,7 +429,6 @@ TomahawkSettings::doUpgrade( int oldVersion, int newVersion )
setValue( "configuration", configuration );
endGroup();
}
// Add a Last.Fm account since we now moved the infoplugin into the account
@@ -674,6 +673,11 @@ TomahawkSettings::doUpgrade( int oldVersion, int newVersion )
}
#endif //Q_OS_MAC
}
else if ( oldVersion == 15 )
{
// 0.8.0 switches to Lucene++. Force a reindex.
QTimer::singleShot( 0, this, SLOT( updateIndex() ) );
}
}

View File

@@ -31,7 +31,7 @@
#include <QNetworkProxy>
#include <QStringList>
#define TOMAHAWK_SETTINGS_VERSION 15
#define TOMAHAWK_SETTINGS_VERSION 16
/**
* Convenience wrapper around QSettings for tomahawk-specific config

View File

@@ -29,47 +29,36 @@
#include <QTime>
#include <QTimer>
#include <CLucene.h>
#include <CLucene/queryParser/MultiFieldQueryParser.h>
#include <lucene++/FuzzyQuery.h>
using namespace lucene::analysis;
using namespace lucene::analysis::standard;
using namespace lucene::document;
using namespace lucene::store;
using namespace lucene::index;
using namespace lucene::queryParser;
using namespace lucene::search;
using namespace Lucene;
FuzzyIndex::FuzzyIndex( QObject* parent, const QString& filename, bool wipe )
: QObject( parent )
, m_luceneReader( 0 )
, m_luceneSearcher( 0 )
{
m_lucenePath = TomahawkUtils::appDataDir().absoluteFilePath( filename );
QByteArray path = m_lucenePath.toUtf8();
const char* cPath = path.constData();
bool failed = false;
tDebug() << "Opening Lucene directory:" << path;
tDebug() << "Opening Lucene directory:" << m_lucenePath;
try
{
m_analyzer = _CLNEW SimpleAnalyzer();
m_luceneDir = FSDirectory::getDirectory( cPath );
m_analyzer = newLucene<StandardAnalyzer>(LuceneVersion::LUCENE_CURRENT);
m_luceneDir = FSDirectory::open( m_lucenePath.toStdWString() );
}
catch ( CLuceneError& error )
catch ( LuceneException& error )
{
tDebug() << "Caught CLucene error:" << error.what();
tDebug() << "Caught Lucene error:" << error.what();
failed = true;
}
if ( failed )
/* if ( failed )
{
tDebug() << "Initializing RAM directory instead.";
m_luceneDir = _CLNEW RAMDirectory();
wipe = true;
}
}*/
if ( wipe )
wipeIndex();
@@ -78,10 +67,10 @@ FuzzyIndex::FuzzyIndex( QObject* parent, const QString& filename, bool wipe )
FuzzyIndex::~FuzzyIndex()
{
delete m_luceneSearcher;
/* delete m_luceneSearcher;
delete m_luceneReader;
delete m_analyzer;
delete m_luceneDir;
delete m_luceneDir;*/
}
@@ -113,24 +102,22 @@ FuzzyIndex::beginIndexing()
try
{
tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "Starting indexing:" << m_lucenePath;
if ( m_luceneReader != 0 )
if ( m_luceneReader )
{
tDebug( LOGVERBOSE ) << "Deleting old lucene stuff.";
m_luceneSearcher->close();
m_luceneReader->close();
delete m_luceneSearcher;
delete m_luceneReader;
m_luceneSearcher = 0;
m_luceneReader = 0;
m_luceneSearcher.reset();
m_luceneReader.reset();
}
tDebug( LOGVERBOSE ) << "Creating new index writer.";
m_luceneWriter = new IndexWriter( m_luceneDir, m_analyzer, true );
m_luceneWriter = newLucene<IndexWriter>( m_luceneDir, m_analyzer, true, IndexWriter::MaxFieldLengthLIMITED );
}
catch( CLuceneError& error )
catch( LuceneException& error )
{
tDebug() << "Caught CLucene error:" << error.what();
tDebug() << "Caught Lucene error:" << error.what();
Q_ASSERT( false );
}
}
@@ -142,8 +129,7 @@ FuzzyIndex::endIndexing()
tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "Finishing indexing:" << m_lucenePath;
// m_luceneWriter->optimize();
m_luceneWriter->close();
delete m_luceneWriter;
m_luceneWriter = 0;
m_luceneWriter.reset();
m_mutex.unlock();
emit indexReady();
@@ -155,41 +141,41 @@ FuzzyIndex::appendFields( const Tomahawk::IndexData& data )
{
try
{
Document doc;
DocumentPtr doc = newLucene<Document>();
if ( !data.track.isEmpty() )
{
doc.add( *( _CLNEW Field( _T( "fulltext" ), Tomahawk::DatabaseImpl::sortname( QString( "%1 %2" ).arg( data.artist ).arg( data.track ) ).toStdWString().c_str(),
Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
doc->add(newLucene<Field>(L"fulltext", Tomahawk::DatabaseImpl::sortname( QString( "%1 %2" ).arg( data.artist ).arg( data.track ) ).toStdWString(),
Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) );
doc.add( *( _CLNEW Field( _T( "track" ), Tomahawk::DatabaseImpl::sortname( data.track ).toStdWString().c_str(),
Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
doc->add(newLucene<Field>(L"track", Tomahawk::DatabaseImpl::sortname( data.track ).toStdWString(),
Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) );
doc.add( *( _CLNEW Field( _T( "artist" ), Tomahawk::DatabaseImpl::sortname( data.artist ).toStdWString().c_str(),
Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
doc->add(newLucene<Field>(L"artist", Tomahawk::DatabaseImpl::sortname( data.artist ).toStdWString(),
Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) );
doc.add( *( _CLNEW Field( _T( "artistid" ), QString::number( data.artistId ).toStdWString().c_str(),
Field::STORE_YES | Field::INDEX_NO ) ) );
doc->add(newLucene<Field>(L"artistid", QString::number( data.artistId ).toStdWString(),
Field::STORE_YES, Field::INDEX_NO ) );
doc.add( *( _CLNEW Field( _T( "trackid" ), QString::number( data.id ).toStdWString().c_str(),
Field::STORE_YES | Field::INDEX_NO ) ) );
doc->add(newLucene<Field>(L"trackid", QString::number( data.id ).toStdWString(),
Field::STORE_YES, Field::INDEX_NO ) );
}
else if ( !data.album.isEmpty() )
{
doc.add( *( _CLNEW Field( _T( "album" ), Tomahawk::DatabaseImpl::sortname( data.album ).toStdWString().c_str(),
Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
doc->add(newLucene<Field>(L"album", Tomahawk::DatabaseImpl::sortname( data.album ).toStdWString(),
Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) );
doc.add( *( _CLNEW Field( _T( "albumid" ), QString::number( data.id ).toStdWString().c_str(),
Field::STORE_YES | Field::INDEX_NO ) ) );
doc->add(newLucene<Field>(L"albumid", QString::number( data.id ).toStdWString(),
Field::STORE_YES, Field::INDEX_NO ) );
}
else
return;
m_luceneWriter->addDocument( &doc );
m_luceneWriter->addDocument( doc );
}
catch( CLuceneError& error )
catch( LuceneException& error )
{
tDebug() << "Caught CLucene error:" << error.what();
tDebug() << "Caught Lucene error:" << error.what();
QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
}
@@ -199,21 +185,20 @@ FuzzyIndex::appendFields( const Tomahawk::IndexData& data )
void
FuzzyIndex::deleteIndex()
{
if ( m_luceneReader != 0 )
if ( m_luceneReader )
{
tDebug( LOGVERBOSE ) << "Deleting old lucene stuff.";
m_luceneSearcher->close();
m_luceneReader->close();
delete m_luceneSearcher;
delete m_luceneReader;
m_luceneSearcher = 0;
m_luceneReader = 0;
m_luceneSearcher.reset();
m_luceneReader.reset();
}
TomahawkUtils::removeDirectory( m_lucenePath );
}
void
FuzzyIndex::updateIndex()
{
@@ -238,63 +223,60 @@ FuzzyIndex::search( const Tomahawk::query_ptr& query )
{
if ( !m_luceneReader )
{
if ( !IndexReader::indexExists( m_lucenePath.toStdString().c_str() ) )
if ( !IndexReader::indexExists( m_luceneDir ) )
{
tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "index didn't exist.";
return resultsmap;
}
m_luceneReader = IndexReader::open( m_luceneDir );
m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
m_luceneSearcher = newLucene<IndexSearcher>( m_luceneReader );
}
float minScore;
const TCHAR** fields = 0;
MultiFieldQueryParser parser( fields, m_analyzer );
BooleanQuery* qry = _CLNEW BooleanQuery();
Collection<String> fields;// = newCollection<String>();
MultiFieldQueryParserPtr parser = newLucene<MultiFieldQueryParser>(LuceneVersion::LUCENE_CURRENT, fields, m_analyzer );
BooleanQueryPtr qry = newLucene<BooleanQuery>();
if ( query->isFullTextQuery() )
{
QString escapedQuery = QString::fromWCharArray( parser.escape( Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) );
QString q = Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() );
Term* term = _CLNEW Term( _T( "track" ), escapedQuery.toStdWString().c_str() );
Query* fqry = _CLNEW FuzzyQuery( term );
qry->add( fqry, true, BooleanClause::SHOULD );
FuzzyQueryPtr fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"track", q.toStdWString() ) );
qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::SHOULD );
term = _CLNEW Term( _T( "artist" ), escapedQuery.toStdWString().c_str() );
fqry = _CLNEW FuzzyQuery( term );
qry->add( fqry, true, BooleanClause::SHOULD );
fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"artist", q.toStdWString() ) );
qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::SHOULD );
term = _CLNEW Term( _T( "fulltext" ), escapedQuery.toStdWString().c_str() );
fqry = _CLNEW FuzzyQuery( term );
qry->add( fqry, true, BooleanClause::SHOULD );
fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"fulltext", q.toStdWString() ) );
qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::SHOULD );
minScore = 0.00;
}
else
{
QString track = QString::fromWCharArray( parser.escape( Tomahawk::DatabaseImpl::sortname( query->queryTrack()->track() ).toStdWString().c_str() ) );
QString artist = QString::fromWCharArray( parser.escape( Tomahawk::DatabaseImpl::sortname( query->queryTrack()->artist() ).toStdWString().c_str() ) );
QString track = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->track() );
QString artist = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->artist() );
// QString album = QString::fromWCharArray( parser.escape( query->album().toStdWString().c_str() ) );
Term* term = _CLNEW Term( _T( "track" ), track.toStdWString().c_str() );
Query* fqry = _CLNEW FuzzyQuery( term );
qry->add( fqry, true, BooleanClause::MUST );
FuzzyQueryPtr fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"track", track.toStdWString() ) );
qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::MUST );
term = _CLNEW Term( _T( "artist" ), artist.toStdWString().c_str() );
fqry = _CLNEW FuzzyQuery( term );
qry->add( fqry, true, BooleanClause::MUST );
fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"artist", artist.toStdWString() ) );
qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::MUST );
minScore = 0.00;
}
Hits* hits = m_luceneSearcher->search( qry );
for ( uint i = 0; i < hits->length(); i++ )
{
Document* d = &hits->doc( i );
TopScoreDocCollectorPtr collector = TopScoreDocCollector::create( 99999, false );
m_luceneSearcher->search( qry, collector );
Collection<ScoreDocPtr> hits = collector->topDocs()->scoreDocs;
float score = hits->score( i );
int id = QString::fromWCharArray( d->get( _T( "trackid" ) ) ).toInt();
for ( uint i = 0; i < collector->getTotalHits(); i++ )
{
DocumentPtr d = m_luceneSearcher->doc( hits[i]->doc );
float score = hits[i]->score;
int id = QString::fromStdWString( d->get( L"trackid" ) ).toInt();
if ( score > minScore )
{
@@ -303,12 +285,12 @@ FuzzyIndex::search( const Tomahawk::query_ptr& query )
}
}
delete hits;
delete qry;
// delete hits;
// delete qry;
}
catch( CLuceneError& error )
catch( LuceneException& error )
{
tDebug() << "Caught CLucene error:" << error.what() << query->toString();
tDebug() << "Caught Lucene error:" << error.what() << query->toString();
QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
}
@@ -329,27 +311,29 @@ FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query )
{
if ( !m_luceneReader )
{
if ( !IndexReader::indexExists( m_lucenePath.toStdString().c_str() ) )
if ( !IndexReader::indexExists( m_luceneDir ) )
{
tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "index didn't exist.";
return resultsmap;
}
m_luceneReader = IndexReader::open( m_luceneDir );
m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
m_luceneSearcher = newLucene<IndexSearcher>( m_luceneReader );
}
QueryParser parser( _T( "album" ), m_analyzer );
QString escapedName = QString::fromWCharArray( parser.escape( Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) );
QueryParserPtr parser = newLucene<QueryParser>( LuceneVersion::LUCENE_CURRENT, L"album", m_analyzer );
QString q = Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() );
Query* qry = _CLNEW FuzzyQuery( _CLNEW Term( _T( "album" ), escapedName.toStdWString().c_str() ) );
Hits* hits = m_luceneSearcher->search( qry );
for ( uint i = 0; i < hits->length(); i++ )
FuzzyQueryPtr qry = newLucene<FuzzyQuery>( newLucene<Term>( L"album", q.toStdWString() ) );
TopScoreDocCollectorPtr collector = TopScoreDocCollector::create( 99999, false );
m_luceneSearcher->search( boost::dynamic_pointer_cast<Query>( qry ), collector );
Collection<ScoreDocPtr> hits = collector->topDocs()->scoreDocs;
for ( uint i = 0; i < collector->getTotalHits(); i++ )
{
Document* d = &hits->doc( i );
float score = hits->score( i );
int id = QString::fromWCharArray( d->get( _T( "albumid" ) ) ).toInt();
DocumentPtr d = m_luceneSearcher->doc( hits[i]->doc );
float score = hits[i]->score;
int id = QString::fromStdWString( d->get( L"albumid" ) ).toInt();
if ( score > 0.30 )
{
@@ -358,12 +342,12 @@ FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query )
}
}
delete hits;
delete qry;
// delete hits;
// delete qry;
}
catch( CLuceneError& error )
catch( LuceneException& error )
{
tDebug() << "Caught CLucene error:" << error.what();
tDebug() << "Caught Lucene error:" << error.what();
QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
}

View File

@@ -25,30 +25,11 @@
#include <QString>
#include <QMutex>
#include <lucene++/LuceneHeaders.h>
#include "Query.h"
#include "database/DatabaseCommand_UpdateSearchIndex.h"
namespace lucene
{
namespace analysis
{
class SimpleAnalyzer;
}
namespace store
{
class Directory;
}
namespace index
{
class IndexReader;
class IndexWriter;
}
namespace search
{
class IndexSearcher;
}
}
class FuzzyIndex : public QObject
{
Q_OBJECT
@@ -87,11 +68,11 @@ private:
QMutex m_mutex;
QString m_lucenePath;
lucene::analysis::SimpleAnalyzer* m_analyzer;
lucene::store::Directory* m_luceneDir;
lucene::index::IndexReader* m_luceneReader;
lucene::index::IndexWriter* m_luceneWriter;
lucene::search::IndexSearcher* m_luceneSearcher;
boost::shared_ptr<Lucene::StandardAnalyzer> m_analyzer;
Lucene::IndexWriterPtr m_luceneWriter;
Lucene::IndexReaderPtr m_luceneReader;
Lucene::DirectoryPtr m_luceneDir;
Lucene::IndexSearcherPtr m_luceneSearcher;
};
#endif // FUZZYINDEX_H