1
0
mirror of https://github.com/tomahawk-player/tomahawk.git synced 2025-03-13 20:39:57 +01:00

* Revert to CLucene until we sort out the MinGW/Boost/Unicode mess :-(

This commit is contained in:
Christian Muehlhaeuser 2014-09-30 02:31:07 +02:00
parent 070a957b3d
commit 1e8bab26b2
10 changed files with 162 additions and 105 deletions

View File

@ -251,8 +251,8 @@ macro_log_feature(ECHONEST_FOUND "Echonest" "Qt library for communicating with T
find_package(Boost REQUIRED COMPONENTS system)
macro_log_feature(Boost_FOUND "Boost" "Provides free peer-reviewed portable C++ source libraries" "http://www.boost.org" TRUE "" "") #FIXME: give useful explanation
macro_optional_find_package(Lucene++ 3.0.0)
macro_log_feature(LUCENEPP_FOUND "Lucene++" "The open-source, C++ search engine" "https://github.com/luceneplusplus/LucenePlusPlus/" TRUE "" "Lucene++ is used for indexing the collection")
macro_optional_find_package(CLucene 0.9.23)
macro_log_feature(CLucene_FOUND "CLucene" "The open-source, C++ search engine" "http://clucene.sf.net" TRUE "" "CLucene is used for indexing the collection")
if( NOT TOMAHAWK_QT5 )
macro_optional_find_package(QJSON 0.8.1)

View File

@ -394,14 +394,8 @@ Section "Tomahawk Player" SEC_TOMAHAWK_PLAYER
File "${MING_BIN}\libssl-10.dll"
File "${MING_BIN}\libcrypto-10.dll"
; LucenePlusPlus
File "${MING_BIN}\liblucene++.dll"
File "${MING_BIN}\libboost_system-mt.dll"
File "${MING_BIN}\libboost_filesystem-mt.dll"
File "${MING_BIN}\libboost_iostreams-mt.dll"
File "${MING_BIN}\libboost_regex-mt.dll"
File "${MING_BIN}\libboost_thread-mt.dll"
File "${MING_BIN}\bz2-1.dll"
File "${MING_BIN}\libclucene-core.dll"
File "${MING_BIN}\libclucene-shared.dll"
File "${MING_BIN}\libqtsparkle.dll"
File "${MING_BIN}\libattica.dll"

View File

@ -43,7 +43,7 @@ Required dependencies:
* SQLite 3.6.22 - http://www.sqlite.org/
* TagLib 1.8 - http://developer.kde.org/~wheeler/taglib.html
* Boost 1.3 - http://www.boost.org/
* Lucene++ 3.0.6 - https://github.com/luceneplusplus/LucenePlusPlus/
* CLucene 0.9.23 (0.9.21 will fail) - http://clucene.sourceforge.net/download.shtml
* libechonest 2.2.0 - http://projects.kde.org/projects/playground/libs/libechonest/
* Attica 0.4.0 - ftp://ftp.kde.org/pub/kde/stable/attica/
* QuaZip 0.4.3 - http://quazip.sourceforge.net/

View File

@ -405,7 +405,7 @@ include_directories(
${QT_INCLUDE_DIR}
${QJSON_INCLUDE_DIR}
${ECHONEST_INCLUDE_DIR}
${LUCENEPP_INCLUDE_DIRS}
${CLUCENE_INCLUDE_DIRS}
${PHONON_INCLUDES}
${Boost_INCLUDE_DIR}
@ -528,7 +528,7 @@ TARGET_LINK_LIBRARIES( tomahawklib
# External deps
${QJSON_LIBRARIES}
${TAGLIB_LIBRARIES}
${LUCENEPP_LIBRARIES}
${CLUCENE_LIBRARIES}
${ECHONEST_LIBRARIES}
${QT_QTSQL_LIBRARY}
${QT_QTUITOOLS_LIBRARY}

View File

@ -185,6 +185,13 @@ Database::loadIndex()
}
void
Database::wipeIndex()
{
m_impl->wipeIndex();
}
void
Database::enqueue( const QList< Tomahawk::dbcmd_ptr >& lc )
{

View File

@ -82,6 +82,7 @@ public:
explicit Database( const QString& dbname, QObject* parent = 0 );
~Database();
void wipeIndex();
void loadIndex();
bool isReady() const { return m_ready; }

View File

@ -196,6 +196,13 @@ Tomahawk::DatabaseImpl::dumpDatabase()
}
void
Tomahawk::DatabaseImpl::wipeIndex()
{
m_fuzzyIndex->deleteIndex();
}
void
Tomahawk::DatabaseImpl::loadIndex()
{

View File

@ -84,6 +84,7 @@ public:
QString dbid() const { return m_dbid; }
void loadIndex();
void wipeIndex();
signals:
void indexReady();

View File

@ -1,6 +1,6 @@
/* === This file is part of Tomahawk Player - <http://tomahawk-player.org> ===
*
* Copyright 2010-2014, Christian Muehlhaeuser <muesli@tomahawk-player.org>
* Copyright 2010-2013, Christian Muehlhaeuser <muesli@tomahawk-player.org>
*
* Tomahawk is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -29,32 +29,45 @@
#include <QTime>
#include <QTimer>
#include <lucene++/FuzzyQuery.h>
#include <CLucene.h>
#include <CLucene/queryParser/MultiFieldQueryParser.h>
using namespace Lucene;
using namespace lucene::analysis;
using namespace lucene::analysis::standard;
using namespace lucene::document;
using namespace lucene::store;
using namespace lucene::index;
using namespace lucene::queryParser;
using namespace lucene::search;
FuzzyIndex::FuzzyIndex( QObject* parent, const QString& filename, bool wipe )
: QObject( parent )
, m_luceneReader( 0 )
, m_luceneSearcher( 0 )
{
m_lucenePath = TomahawkUtils::appDataDir().absoluteFilePath( filename );
QByteArray path = m_lucenePath.toUtf8();
const char* cPath = path.constData();
bool failed = false;
tDebug() << "Opening Lucene directory:" << m_lucenePath;
tDebug() << "Opening Lucene directory:" << path;
try
{
m_analyzer = newLucene<SimpleAnalyzer>();
m_luceneDir = FSDirectory::open( m_lucenePath.toStdWString() );
m_analyzer = _CLNEW SimpleAnalyzer();
m_luceneDir = FSDirectory::getDirectory( cPath );
}
catch ( LuceneException& error )
catch ( CLuceneError& error )
{
tDebug() << "Caught Lucene error:" << error.what();
tDebug() << "Caught CLucene error:" << error.what();
failed = true;
}
if ( failed )
{
deleteIndex();
tDebug() << "Initializing RAM directory instead.";
m_luceneDir = _CLNEW RAMDirectory();
wipe = true;
}
@ -65,7 +78,10 @@ FuzzyIndex::FuzzyIndex( QObject* parent, const QString& filename, bool wipe )
FuzzyIndex::~FuzzyIndex()
{
tLog( LOGVERBOSE ) << Q_FUNC_INFO;
delete m_luceneSearcher;
delete m_luceneReader;
delete m_analyzer;
delete m_luceneDir;
}
@ -97,22 +113,24 @@ FuzzyIndex::beginIndexing()
try
{
tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "Starting indexing:" << m_lucenePath;
if ( m_luceneReader )
if ( m_luceneReader != 0 )
{
tDebug( LOGVERBOSE ) << "Deleting old lucene stuff.";
m_luceneSearcher->close();
m_luceneReader->close();
m_luceneSearcher.reset();
m_luceneReader.reset();
delete m_luceneSearcher;
delete m_luceneReader;
m_luceneSearcher = 0;
m_luceneReader = 0;
}
tDebug( LOGVERBOSE ) << "Creating new index writer.";
m_luceneWriter = newLucene<IndexWriter>( m_luceneDir, m_analyzer, true, IndexWriter::MaxFieldLengthLIMITED );
m_luceneWriter = new IndexWriter( m_luceneDir, m_analyzer, true );
}
catch( LuceneException& error )
catch( CLuceneError& error )
{
tDebug() << "Caught Lucene error:" << error.what();
tDebug() << "Caught CLucene error:" << error.what();
Q_ASSERT( false );
}
}
@ -124,7 +142,8 @@ FuzzyIndex::endIndexing()
tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "Finishing indexing:" << m_lucenePath;
m_luceneWriter->optimize();
m_luceneWriter->close();
m_luceneWriter.reset();
delete m_luceneWriter;
m_luceneWriter = 0;
m_mutex.unlock();
emit indexReady();
@ -136,41 +155,41 @@ FuzzyIndex::appendFields( const Tomahawk::IndexData& data )
{
try
{
DocumentPtr doc = newLucene<Document>();
Document doc;
if ( !data.track.isEmpty() )
{
doc->add(newLucene<Field>( L"fulltext", Tomahawk::DatabaseImpl::sortname( QString( "%1 %2" ).arg( data.artist ).arg( data.track ) ).toStdWString(),
Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) );
doc.add( *( _CLNEW Field( _T( "fulltext" ), Tomahawk::DatabaseImpl::sortname( QString( "%1 %2" ).arg( data.artist ).arg( data.track ) ).toStdWString().c_str(),
Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
doc->add(newLucene<Field>( L"track", Tomahawk::DatabaseImpl::sortname( data.track ).toStdWString(),
Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) );
doc.add( *( _CLNEW Field( _T( "track" ), Tomahawk::DatabaseImpl::sortname( data.track ).toStdWString().c_str(),
Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
doc->add(newLucene<Field>( L"artist", Tomahawk::DatabaseImpl::sortname( data.artist ).toStdWString(),
Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) );
doc.add( *( _CLNEW Field( _T( "artist" ), Tomahawk::DatabaseImpl::sortname( data.artist ).toStdWString().c_str(),
Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
doc->add(newLucene<Field>( L"artistid", QString::number( data.artistId ).toStdWString(),
Field::STORE_YES, Field::INDEX_NO ) );
doc.add( *( _CLNEW Field( _T( "artistid" ), QString::number( data.artistId ).toStdWString().c_str(),
Field::STORE_YES | Field::INDEX_NO ) ) );
doc->add(newLucene<Field>( L"trackid", QString::number( data.id ).toStdWString(),
Field::STORE_YES, Field::INDEX_NO ) );
doc.add( *( _CLNEW Field( _T( "trackid" ), QString::number( data.id ).toStdWString().c_str(),
Field::STORE_YES | Field::INDEX_NO ) ) );
}
else if ( !data.album.isEmpty() )
{
doc->add(newLucene<Field>( L"album", Tomahawk::DatabaseImpl::sortname( data.album ).toStdWString(),
Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) );
doc.add( *( _CLNEW Field( _T( "album" ), Tomahawk::DatabaseImpl::sortname( data.album ).toStdWString().c_str(),
Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) );
doc->add(newLucene<Field>( L"albumid", QString::number( data.id ).toStdWString(),
Field::STORE_YES, Field::INDEX_NO ) );
doc.add( *( _CLNEW Field( _T( "albumid" ), QString::number( data.id ).toStdWString().c_str(),
Field::STORE_YES | Field::INDEX_NO ) ) );
}
else
return;
m_luceneWriter->addDocument( doc );
m_luceneWriter->addDocument( &doc );
}
catch( LuceneException& error )
catch( CLuceneError& error )
{
tDebug() << "Caught Lucene error:" << error.what();
tDebug() << "Caught CLucene error:" << error.what();
QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
}
@ -180,24 +199,26 @@ FuzzyIndex::appendFields( const Tomahawk::IndexData& data )
void
FuzzyIndex::deleteIndex()
{
if ( m_luceneReader )
tDebug() << Q_FUNC_INFO;
if ( m_luceneReader != 0 )
{
tDebug( LOGVERBOSE ) << "Deleting old lucene stuff.";
m_luceneSearcher->close();
m_luceneReader->close();
m_luceneSearcher.reset();
m_luceneReader.reset();
delete m_luceneSearcher;
delete m_luceneReader;
m_luceneSearcher = 0;
m_luceneReader = 0;
}
TomahawkUtils::removeDirectory( m_lucenePath );
}
void
FuzzyIndex::updateIndex()
{
// virtual NO-OP
// NO-OP
}
@ -218,60 +239,63 @@ FuzzyIndex::search( const Tomahawk::query_ptr& query )
{
if ( !m_luceneReader )
{
if ( !IndexReader::indexExists( m_luceneDir ) )
if ( !IndexReader::indexExists( m_lucenePath.toStdString().c_str() ) )
{
tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "index didn't exist.";
return resultsmap;
}
m_luceneReader = IndexReader::open( m_luceneDir );
m_luceneSearcher = newLucene<IndexSearcher>( m_luceneReader );
m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
}
float minScore;
Collection<String> fields; // = newCollection<String>();
MultiFieldQueryParserPtr parser = newLucene<MultiFieldQueryParser>( LuceneVersion::LUCENE_CURRENT, fields, m_analyzer );
BooleanQueryPtr qry = newLucene<BooleanQuery>();
const TCHAR** fields = 0;
MultiFieldQueryParser parser( fields, m_analyzer );
BooleanQuery* qry = _CLNEW BooleanQuery();
if ( query->isFullTextQuery() )
{
QString q = Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() );
QString escapedQuery = QString::fromWCharArray( parser.escape( Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) );
FuzzyQueryPtr fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"track", q.toStdWString() ) );
qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::SHOULD );
Term* term = _CLNEW Term( _T( "track" ), escapedQuery.toStdWString().c_str() );
Query* fqry = _CLNEW FuzzyQuery( term );
qry->add( fqry, true, BooleanClause::SHOULD );
FuzzyQueryPtr fqry2 = newLucene<FuzzyQuery>( newLucene<Term>( L"artist", q.toStdWString() ) );
qry->add( boost::dynamic_pointer_cast<Query>( fqry2 ), BooleanClause::SHOULD );
term = _CLNEW Term( _T( "artist" ), escapedQuery.toStdWString().c_str() );
fqry = _CLNEW FuzzyQuery( term );
qry->add( fqry, true, BooleanClause::SHOULD );
FuzzyQueryPtr fqry3 = newLucene<FuzzyQuery>( newLucene<Term>( L"fulltext", q.toStdWString() ) );
qry->add( boost::dynamic_pointer_cast<Query>( fqry3 ), BooleanClause::SHOULD );
term = _CLNEW Term( _T( "fulltext" ), escapedQuery.toStdWString().c_str() );
fqry = _CLNEW FuzzyQuery( term );
qry->add( fqry, true, BooleanClause::SHOULD );
minScore = 0.00;
}
else
{
QString track = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->track() );
QString artist = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->artist() );
//QString album = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->album() );
QString track = QString::fromWCharArray( parser.escape( Tomahawk::DatabaseImpl::sortname( query->queryTrack()->track() ).toStdWString().c_str() ) );
QString artist = QString::fromWCharArray( parser.escape( Tomahawk::DatabaseImpl::sortname( query->queryTrack()->artist() ).toStdWString().c_str() ) );
// QString album = QString::fromWCharArray( parser.escape( query->album().toStdWString().c_str() ) );
FuzzyQueryPtr fqry = newLucene<FuzzyQuery>( newLucene<Term>( L"track", track.toStdWString() ) );
qry->add( boost::dynamic_pointer_cast<Query>( fqry ), BooleanClause::MUST );
Term* term = _CLNEW Term( _T( "track" ), track.toStdWString().c_str() );
Query* fqry = _CLNEW FuzzyQuery( term );
qry->add( fqry, true, BooleanClause::MUST );
FuzzyQueryPtr fqry2 = newLucene<FuzzyQuery>( newLucene<Term>( L"artist", artist.toStdWString() ) );
qry->add( boost::dynamic_pointer_cast<Query>( fqry2 ), BooleanClause::MUST );
term = _CLNEW Term( _T( "artist" ), artist.toStdWString().c_str() );
fqry = _CLNEW FuzzyQuery( term );
qry->add( fqry, true, BooleanClause::MUST );
minScore = 0.00;
}
TopScoreDocCollectorPtr collector = TopScoreDocCollector::create( 50, false );
m_luceneSearcher->search( qry, collector );
Collection<ScoreDocPtr> hits = collector->topDocs()->scoreDocs;
for ( int i = 0; i < collector->getTotalHits() && i < 50; i++ )
Hits* hits = m_luceneSearcher->search( qry );
for ( uint i = 0; i < hits->length(); i++ )
{
DocumentPtr d = m_luceneSearcher->doc( hits[i]->doc );
float score = hits[i]->score;
int id = QString::fromStdWString( d->get( L"trackid" ) ).toInt();
Document* d = &hits->doc( i );
float score = hits->score( i );
int id = QString::fromWCharArray( d->get( _T( "trackid" ) ) ).toInt();
if ( score > minScore )
{
@ -279,10 +303,13 @@ FuzzyIndex::search( const Tomahawk::query_ptr& query )
// tDebug() << "Index hit:" << id << score << QString::fromWCharArray( ((Query*)qry)->toString() );
}
}
delete hits;
delete qry;
}
catch( LuceneException& error )
catch( CLuceneError& error )
{
tDebug() << "Caught Lucene error:" << error.what() << query->toString();
tDebug() << "Caught CLucene error:" << error.what() << query->toString();
QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
}
@ -303,29 +330,27 @@ FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query )
{
if ( !m_luceneReader )
{
if ( !IndexReader::indexExists( m_luceneDir ) )
if ( !IndexReader::indexExists( m_lucenePath.toStdString().c_str() ) )
{
tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "index didn't exist.";
return resultsmap;
}
m_luceneReader = IndexReader::open( m_luceneDir );
m_luceneSearcher = newLucene<IndexSearcher>( m_luceneReader );
m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader );
}
QueryParserPtr parser = newLucene<QueryParser>( LuceneVersion::LUCENE_CURRENT, L"album", m_analyzer );
QString q = Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() );
QueryParser parser( _T( "album" ), m_analyzer );
QString escapedName = QString::fromWCharArray( parser.escape( Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) );
FuzzyQueryPtr qry = newLucene<FuzzyQuery>( newLucene<Term>( L"album", q.toStdWString() ) );
TopScoreDocCollectorPtr collector = TopScoreDocCollector::create( 99999, false );
m_luceneSearcher->search( boost::dynamic_pointer_cast<Query>( qry ), collector );
Collection<ScoreDocPtr> hits = collector->topDocs()->scoreDocs;
for ( int i = 0; i < collector->getTotalHits(); i++ )
Query* qry = _CLNEW FuzzyQuery( _CLNEW Term( _T( "album" ), escapedName.toStdWString().c_str() ) );
Hits* hits = m_luceneSearcher->search( qry );
for ( uint i = 0; i < hits->length(); i++ )
{
DocumentPtr d = m_luceneSearcher->doc( hits[i]->doc );
float score = hits[i]->score;
int id = QString::fromStdWString( d->get( L"albumid" ) ).toInt();
Document* d = &hits->doc( i );
float score = hits->score( i );
int id = QString::fromWCharArray( d->get( _T( "albumid" ) ) ).toInt();
if ( score > 0.30 )
{
@ -333,10 +358,13 @@ FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query )
// tDebug() << "Index hit:" << id << score;
}
}
delete hits;
delete qry;
}
catch( LuceneException& error )
catch( CLuceneError& error )
{
tDebug() << "Caught Lucene error:" << error.what();
tDebug() << "Caught CLucene error:" << error.what();
QTimer::singleShot( 0, this, SLOT( wipeIndex() ) );
}

View File

@ -1,6 +1,6 @@
/* === This file is part of Tomahawk Player - <http://tomahawk-player.org> ===
*
* Copyright 2010-2014, Christian Muehlhaeuser <muesli@tomahawk-player.org>
* Copyright 2010-2013, Christian Muehlhaeuser <muesli@tomahawk-player.org>
*
* Tomahawk is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@ -25,11 +25,30 @@
#include <QString>
#include <QMutex>
#include <lucene++/LuceneHeaders.h>
#include "Query.h"
#include "database/DatabaseCommand_UpdateSearchIndex.h"
namespace lucene
{
namespace analysis
{
class SimpleAnalyzer;
}
namespace store
{
class Directory;
}
namespace index
{
class IndexReader;
class IndexWriter;
}
namespace search
{
class IndexSearcher;
}
}
class FuzzyIndex : public QObject
{
Q_OBJECT
@ -68,11 +87,11 @@ private:
QMutex m_mutex;
QString m_lucenePath;
boost::shared_ptr<Lucene::SimpleAnalyzer> m_analyzer;
Lucene::IndexWriterPtr m_luceneWriter;
Lucene::IndexReaderPtr m_luceneReader;
Lucene::FSDirectoryPtr m_luceneDir;
Lucene::IndexSearcherPtr m_luceneSearcher;
lucene::analysis::SimpleAnalyzer* m_analyzer;
lucene::store::Directory* m_luceneDir;
lucene::index::IndexReader* m_luceneReader;
lucene::index::IndexWriter* m_luceneWriter;
lucene::search::IndexSearcher* m_luceneSearcher;
};
#endif // FUZZYINDEX_H