From f837e9e7e5ecfb8fab7c71d81291cbb62182f8fe Mon Sep 17 00:00:00 2001 From: Christian Muehlhaeuser Date: Tue, 16 Sep 2014 01:02:59 +0200 Subject: [PATCH] * Ported to lucene++. --- CMakeLists.txt | 3 + CMakeModules/FindLucene++.cmake | 2 +- src/libtomahawk/CMakeLists.txt | 5 +- src/libtomahawk/Query.h | 6 +- src/libtomahawk/TomahawkSettings.cpp | 6 +- src/libtomahawk/TomahawkSettings.h | 2 +- .../database/fuzzyindex/FuzzyIndex.cpp | 190 ++++++++---------- .../database/fuzzyindex/FuzzyIndex.h | 33 +-- 8 files changed, 110 insertions(+), 137 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 523f66ce0..29916f8b3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -250,6 +250,9 @@ macro_log_feature(ECHONEST_FOUND "Echonest" "Qt library for communicating with T macro_optional_find_package(CLucene 0.9.23) macro_log_feature(CLucene_FOUND "CLucene" "The open-source, C++ search engine" "http://clucene.sf.net" TRUE "" "CLucene is used for indexing the collection") +macro_optional_find_package(Lucene++ 3.0.0) +macro_log_feature(LUCENEPP_FOUND "Lucene++" "The open-source, C++ search engine" "https://github.com/luceneplusplus/LucenePlusPlus/" TRUE "" "Lucene++ is used for indexing the collection") + if( NOT TOMAHAWK_QT5 ) macro_optional_find_package(QJSON 0.8.1) macro_log_feature(QJSON_FOUND "QJson" "Qt library that maps JSON data to QVariant objects" "http://qjson.sf.net" TRUE "" "libqjson is used for encoding communication between Tomahawk instances") diff --git a/CMakeModules/FindLucene++.cmake b/CMakeModules/FindLucene++.cmake index f30f9ac1a..1d7cdeabd 100644 --- a/CMakeModules/FindLucene++.cmake +++ b/CMakeModules/FindLucene++.cmake @@ -71,7 +71,7 @@ ENDIF(WIN32) SET(LUCENEPP_GOOD_VERSION TRUE) FIND_PATH(LUCENEPP_LIBRARY_DIR - NAMES liblucene++.so + NAMES liblucene++.dylib PATHS ${TRIAL_LIBRARY_PATHS} ${TRIAL_INCLUDE_PATHS} NO_DEFAULT_PATH) IF (LUCENEPP_LIBRARY_DIR) MESSAGE(STATUS "Found Lucene++ library dir: ${LUCENEPP_LIBRARY_DIR}") diff --git a/src/libtomahawk/CMakeLists.txt b/src/libtomahawk/CMakeLists.txt index 8224e8be2..d7e6dcd11 100644 --- a/src/libtomahawk/CMakeLists.txt +++ b/src/libtomahawk/CMakeLists.txt @@ -406,7 +406,7 @@ include_directories( ${QT_INCLUDE_DIR} ${QJSON_INCLUDE_DIR} ${ECHONEST_INCLUDE_DIR} - ${CLUCENE_INCLUDE_DIRS} + ${LUCENEPP_INCLUDE_DIRS} ${PHONON_INCLUDES} ${Boost_INCLUDE_DIR} @@ -525,12 +525,13 @@ TARGET_LINK_LIBRARIES( tomahawklib ${LIBPORTFWD_LIBRARIES} ${QTKEYCHAIN_LIBRARIES} + boost_system LINK_PUBLIC # External deps ${QJSON_LIBRARIES} ${TAGLIB_LIBRARIES} - ${CLUCENE_LIBRARIES} + ${LUCENEPP_LIBRARIES} ${ECHONEST_LIBRARIES} ${QT_QTSQL_LIBRARY} ${QT_QTUITOOLS_LIBRARY} diff --git a/src/libtomahawk/Query.h b/src/libtomahawk/Query.h index 2a4e4f701..9a9d02135 100644 --- a/src/libtomahawk/Query.h +++ b/src/libtomahawk/Query.h @@ -19,8 +19,8 @@ */ #pragma once -#ifndef QUERY_H -#define QUERY_H +#ifndef TOMAHAWK_QUERY_H +#define TOMAHAWK_QUERY_H #include #include @@ -154,4 +154,4 @@ private: Q_DECLARE_METATYPE( Tomahawk::query_ptr ) -#endif // QUERY_H +#endif // TOMAHAWK_QUERY_H diff --git a/src/libtomahawk/TomahawkSettings.cpp b/src/libtomahawk/TomahawkSettings.cpp index 05f848c84..631554d62 100644 --- a/src/libtomahawk/TomahawkSettings.cpp +++ b/src/libtomahawk/TomahawkSettings.cpp @@ -429,7 +429,6 @@ TomahawkSettings::doUpgrade( int oldVersion, int newVersion ) setValue( "configuration", configuration ); endGroup(); - } // Add a Last.Fm account since we now moved the infoplugin into the account @@ -674,6 +673,11 @@ TomahawkSettings::doUpgrade( int oldVersion, int newVersion ) } #endif //Q_OS_MAC } + else if ( oldVersion == 15 ) + { + // 0.8.0 switches to Lucene++. Force a reindex. + QTimer::singleShot( 0, this, SLOT( updateIndex() ) ); + } } diff --git a/src/libtomahawk/TomahawkSettings.h b/src/libtomahawk/TomahawkSettings.h index 491455557..3e9f800d7 100644 --- a/src/libtomahawk/TomahawkSettings.h +++ b/src/libtomahawk/TomahawkSettings.h @@ -31,7 +31,7 @@ #include #include -#define TOMAHAWK_SETTINGS_VERSION 15 +#define TOMAHAWK_SETTINGS_VERSION 16 /** * Convenience wrapper around QSettings for tomahawk-specific config diff --git a/src/libtomahawk/database/fuzzyindex/FuzzyIndex.cpp b/src/libtomahawk/database/fuzzyindex/FuzzyIndex.cpp index f51588be4..b389207af 100644 --- a/src/libtomahawk/database/fuzzyindex/FuzzyIndex.cpp +++ b/src/libtomahawk/database/fuzzyindex/FuzzyIndex.cpp @@ -29,47 +29,36 @@ #include #include -#include -#include +#include -using namespace lucene::analysis; -using namespace lucene::analysis::standard; -using namespace lucene::document; -using namespace lucene::store; -using namespace lucene::index; -using namespace lucene::queryParser; -using namespace lucene::search; +using namespace Lucene; FuzzyIndex::FuzzyIndex( QObject* parent, const QString& filename, bool wipe ) : QObject( parent ) - , m_luceneReader( 0 ) - , m_luceneSearcher( 0 ) { m_lucenePath = TomahawkUtils::appDataDir().absoluteFilePath( filename ); - QByteArray path = m_lucenePath.toUtf8(); - const char* cPath = path.constData(); bool failed = false; - tDebug() << "Opening Lucene directory:" << path; + tDebug() << "Opening Lucene directory:" << m_lucenePath; try { - m_analyzer = _CLNEW SimpleAnalyzer(); - m_luceneDir = FSDirectory::getDirectory( cPath ); + m_analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); + m_luceneDir = FSDirectory::open( m_lucenePath.toStdWString() ); } - catch ( CLuceneError& error ) + catch ( LuceneException& error ) { - tDebug() << "Caught CLucene error:" << error.what(); + tDebug() << "Caught Lucene error:" << error.what(); failed = true; } - if ( failed ) +/* if ( failed ) { tDebug() << "Initializing RAM directory instead."; m_luceneDir = _CLNEW RAMDirectory(); wipe = true; - } + }*/ if ( wipe ) wipeIndex(); @@ -78,10 +67,10 @@ FuzzyIndex::FuzzyIndex( QObject* parent, const QString& filename, bool wipe ) FuzzyIndex::~FuzzyIndex() { - delete m_luceneSearcher; +/* delete m_luceneSearcher; delete m_luceneReader; delete m_analyzer; - delete m_luceneDir; + delete m_luceneDir;*/ } @@ -113,24 +102,22 @@ FuzzyIndex::beginIndexing() try { tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "Starting indexing:" << m_lucenePath; - if ( m_luceneReader != 0 ) + if ( m_luceneReader ) { tDebug( LOGVERBOSE ) << "Deleting old lucene stuff."; m_luceneSearcher->close(); m_luceneReader->close(); - delete m_luceneSearcher; - delete m_luceneReader; - m_luceneSearcher = 0; - m_luceneReader = 0; + m_luceneSearcher.reset(); + m_luceneReader.reset(); } tDebug( LOGVERBOSE ) << "Creating new index writer."; - m_luceneWriter = new IndexWriter( m_luceneDir, m_analyzer, true ); + m_luceneWriter = newLucene( m_luceneDir, m_analyzer, true, IndexWriter::MaxFieldLengthLIMITED ); } - catch( CLuceneError& error ) + catch( LuceneException& error ) { - tDebug() << "Caught CLucene error:" << error.what(); + tDebug() << "Caught Lucene error:" << error.what(); Q_ASSERT( false ); } } @@ -142,8 +129,7 @@ FuzzyIndex::endIndexing() tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "Finishing indexing:" << m_lucenePath; // m_luceneWriter->optimize(); m_luceneWriter->close(); - delete m_luceneWriter; - m_luceneWriter = 0; + m_luceneWriter.reset(); m_mutex.unlock(); emit indexReady(); @@ -155,41 +141,41 @@ FuzzyIndex::appendFields( const Tomahawk::IndexData& data ) { try { - Document doc; + DocumentPtr doc = newLucene(); if ( !data.track.isEmpty() ) { - doc.add( *( _CLNEW Field( _T( "fulltext" ), Tomahawk::DatabaseImpl::sortname( QString( "%1 %2" ).arg( data.artist ).arg( data.track ) ).toStdWString().c_str(), - Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) ); + doc->add(newLucene(L"fulltext", Tomahawk::DatabaseImpl::sortname( QString( "%1 %2" ).arg( data.artist ).arg( data.track ) ).toStdWString(), + Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) ); - doc.add( *( _CLNEW Field( _T( "track" ), Tomahawk::DatabaseImpl::sortname( data.track ).toStdWString().c_str(), - Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) ); + doc->add(newLucene(L"track", Tomahawk::DatabaseImpl::sortname( data.track ).toStdWString(), + Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) ); - doc.add( *( _CLNEW Field( _T( "artist" ), Tomahawk::DatabaseImpl::sortname( data.artist ).toStdWString().c_str(), - Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) ); + doc->add(newLucene(L"artist", Tomahawk::DatabaseImpl::sortname( data.artist ).toStdWString(), + Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) ); - doc.add( *( _CLNEW Field( _T( "artistid" ), QString::number( data.artistId ).toStdWString().c_str(), - Field::STORE_YES | Field::INDEX_NO ) ) ); + doc->add(newLucene(L"artistid", QString::number( data.artistId ).toStdWString(), + Field::STORE_YES, Field::INDEX_NO ) ); - doc.add( *( _CLNEW Field( _T( "trackid" ), QString::number( data.id ).toStdWString().c_str(), - Field::STORE_YES | Field::INDEX_NO ) ) ); + doc->add(newLucene(L"trackid", QString::number( data.id ).toStdWString(), + Field::STORE_YES, Field::INDEX_NO ) ); } else if ( !data.album.isEmpty() ) { - doc.add( *( _CLNEW Field( _T( "album" ), Tomahawk::DatabaseImpl::sortname( data.album ).toStdWString().c_str(), - Field::STORE_NO | Field::INDEX_UNTOKENIZED ) ) ); + doc->add(newLucene(L"album", Tomahawk::DatabaseImpl::sortname( data.album ).toStdWString(), + Field::STORE_NO, Field::INDEX_NOT_ANALYZED ) ); - doc.add( *( _CLNEW Field( _T( "albumid" ), QString::number( data.id ).toStdWString().c_str(), - Field::STORE_YES | Field::INDEX_NO ) ) ); + doc->add(newLucene(L"albumid", QString::number( data.id ).toStdWString(), + Field::STORE_YES, Field::INDEX_NO ) ); } else return; - m_luceneWriter->addDocument( &doc ); + m_luceneWriter->addDocument( doc ); } - catch( CLuceneError& error ) + catch( LuceneException& error ) { - tDebug() << "Caught CLucene error:" << error.what(); + tDebug() << "Caught Lucene error:" << error.what(); QTimer::singleShot( 0, this, SLOT( wipeIndex() ) ); } @@ -199,21 +185,20 @@ FuzzyIndex::appendFields( const Tomahawk::IndexData& data ) void FuzzyIndex::deleteIndex() { - if ( m_luceneReader != 0 ) + if ( m_luceneReader ) { tDebug( LOGVERBOSE ) << "Deleting old lucene stuff."; m_luceneSearcher->close(); m_luceneReader->close(); - delete m_luceneSearcher; - delete m_luceneReader; - m_luceneSearcher = 0; - m_luceneReader = 0; + m_luceneSearcher.reset(); + m_luceneReader.reset(); } TomahawkUtils::removeDirectory( m_lucenePath ); } + void FuzzyIndex::updateIndex() { @@ -238,63 +223,60 @@ FuzzyIndex::search( const Tomahawk::query_ptr& query ) { if ( !m_luceneReader ) { - if ( !IndexReader::indexExists( m_lucenePath.toStdString().c_str() ) ) + if ( !IndexReader::indexExists( m_luceneDir ) ) { tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "index didn't exist."; return resultsmap; } m_luceneReader = IndexReader::open( m_luceneDir ); - m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader ); + m_luceneSearcher = newLucene( m_luceneReader ); } float minScore; - const TCHAR** fields = 0; - MultiFieldQueryParser parser( fields, m_analyzer ); - BooleanQuery* qry = _CLNEW BooleanQuery(); + Collection fields;// = newCollection(); + MultiFieldQueryParserPtr parser = newLucene(LuceneVersion::LUCENE_CURRENT, fields, m_analyzer ); + BooleanQueryPtr qry = newLucene(); if ( query->isFullTextQuery() ) { - QString escapedQuery = QString::fromWCharArray( parser.escape( Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) ); + QString q = Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() ); - Term* term = _CLNEW Term( _T( "track" ), escapedQuery.toStdWString().c_str() ); - Query* fqry = _CLNEW FuzzyQuery( term ); - qry->add( fqry, true, BooleanClause::SHOULD ); + FuzzyQueryPtr fqry = newLucene( newLucene( L"track", q.toStdWString() ) ); + qry->add( boost::dynamic_pointer_cast( fqry ), BooleanClause::SHOULD ); - term = _CLNEW Term( _T( "artist" ), escapedQuery.toStdWString().c_str() ); - fqry = _CLNEW FuzzyQuery( term ); - qry->add( fqry, true, BooleanClause::SHOULD ); + fqry = newLucene( newLucene( L"artist", q.toStdWString() ) ); + qry->add( boost::dynamic_pointer_cast( fqry ), BooleanClause::SHOULD ); - term = _CLNEW Term( _T( "fulltext" ), escapedQuery.toStdWString().c_str() ); - fqry = _CLNEW FuzzyQuery( term ); - qry->add( fqry, true, BooleanClause::SHOULD ); + fqry = newLucene( newLucene( L"fulltext", q.toStdWString() ) ); + qry->add( boost::dynamic_pointer_cast( fqry ), BooleanClause::SHOULD ); minScore = 0.00; } else { - QString track = QString::fromWCharArray( parser.escape( Tomahawk::DatabaseImpl::sortname( query->queryTrack()->track() ).toStdWString().c_str() ) ); - QString artist = QString::fromWCharArray( parser.escape( Tomahawk::DatabaseImpl::sortname( query->queryTrack()->artist() ).toStdWString().c_str() ) ); + QString track = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->track() ); + QString artist = Tomahawk::DatabaseImpl::sortname( query->queryTrack()->artist() ); // QString album = QString::fromWCharArray( parser.escape( query->album().toStdWString().c_str() ) ); - Term* term = _CLNEW Term( _T( "track" ), track.toStdWString().c_str() ); - Query* fqry = _CLNEW FuzzyQuery( term ); - qry->add( fqry, true, BooleanClause::MUST ); + FuzzyQueryPtr fqry = newLucene( newLucene( L"track", track.toStdWString() ) ); + qry->add( boost::dynamic_pointer_cast( fqry ), BooleanClause::MUST ); - term = _CLNEW Term( _T( "artist" ), artist.toStdWString().c_str() ); - fqry = _CLNEW FuzzyQuery( term ); - qry->add( fqry, true, BooleanClause::MUST ); + fqry = newLucene( newLucene( L"artist", artist.toStdWString() ) ); + qry->add( boost::dynamic_pointer_cast( fqry ), BooleanClause::MUST ); minScore = 0.00; } - Hits* hits = m_luceneSearcher->search( qry ); - for ( uint i = 0; i < hits->length(); i++ ) - { - Document* d = &hits->doc( i ); + TopScoreDocCollectorPtr collector = TopScoreDocCollector::create( 99999, false ); + m_luceneSearcher->search( qry, collector ); + Collection hits = collector->topDocs()->scoreDocs; - float score = hits->score( i ); - int id = QString::fromWCharArray( d->get( _T( "trackid" ) ) ).toInt(); + for ( uint i = 0; i < collector->getTotalHits(); i++ ) + { + DocumentPtr d = m_luceneSearcher->doc( hits[i]->doc ); + float score = hits[i]->score; + int id = QString::fromStdWString( d->get( L"trackid" ) ).toInt(); if ( score > minScore ) { @@ -303,12 +285,12 @@ FuzzyIndex::search( const Tomahawk::query_ptr& query ) } } - delete hits; - delete qry; +// delete hits; +// delete qry; } - catch( CLuceneError& error ) + catch( LuceneException& error ) { - tDebug() << "Caught CLucene error:" << error.what() << query->toString(); + tDebug() << "Caught Lucene error:" << error.what() << query->toString(); QTimer::singleShot( 0, this, SLOT( wipeIndex() ) ); } @@ -329,27 +311,29 @@ FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query ) { if ( !m_luceneReader ) { - if ( !IndexReader::indexExists( m_lucenePath.toStdString().c_str() ) ) + if ( !IndexReader::indexExists( m_luceneDir ) ) { tDebug( LOGVERBOSE ) << Q_FUNC_INFO << "index didn't exist."; return resultsmap; } m_luceneReader = IndexReader::open( m_luceneDir ); - m_luceneSearcher = _CLNEW IndexSearcher( m_luceneReader ); + m_luceneSearcher = newLucene( m_luceneReader ); } - QueryParser parser( _T( "album" ), m_analyzer ); - QString escapedName = QString::fromWCharArray( parser.escape( Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() ).toStdWString().c_str() ) ); + QueryParserPtr parser = newLucene( LuceneVersion::LUCENE_CURRENT, L"album", m_analyzer ); + QString q = Tomahawk::DatabaseImpl::sortname( query->fullTextQuery() ); - Query* qry = _CLNEW FuzzyQuery( _CLNEW Term( _T( "album" ), escapedName.toStdWString().c_str() ) ); - Hits* hits = m_luceneSearcher->search( qry ); - for ( uint i = 0; i < hits->length(); i++ ) + FuzzyQueryPtr qry = newLucene( newLucene( L"album", q.toStdWString() ) ); + TopScoreDocCollectorPtr collector = TopScoreDocCollector::create( 99999, false ); + m_luceneSearcher->search( boost::dynamic_pointer_cast( qry ), collector ); + Collection hits = collector->topDocs()->scoreDocs; + + for ( uint i = 0; i < collector->getTotalHits(); i++ ) { - Document* d = &hits->doc( i ); - - float score = hits->score( i ); - int id = QString::fromWCharArray( d->get( _T( "albumid" ) ) ).toInt(); + DocumentPtr d = m_luceneSearcher->doc( hits[i]->doc ); + float score = hits[i]->score; + int id = QString::fromStdWString( d->get( L"albumid" ) ).toInt(); if ( score > 0.30 ) { @@ -358,12 +342,12 @@ FuzzyIndex::searchAlbum( const Tomahawk::query_ptr& query ) } } - delete hits; - delete qry; +// delete hits; +// delete qry; } - catch( CLuceneError& error ) + catch( LuceneException& error ) { - tDebug() << "Caught CLucene error:" << error.what(); + tDebug() << "Caught Lucene error:" << error.what(); QTimer::singleShot( 0, this, SLOT( wipeIndex() ) ); } diff --git a/src/libtomahawk/database/fuzzyindex/FuzzyIndex.h b/src/libtomahawk/database/fuzzyindex/FuzzyIndex.h index e01f07b8b..e09b35c1f 100644 --- a/src/libtomahawk/database/fuzzyindex/FuzzyIndex.h +++ b/src/libtomahawk/database/fuzzyindex/FuzzyIndex.h @@ -25,30 +25,11 @@ #include #include +#include + #include "Query.h" #include "database/DatabaseCommand_UpdateSearchIndex.h" -namespace lucene -{ - namespace analysis - { - class SimpleAnalyzer; - } - namespace store - { - class Directory; - } - namespace index - { - class IndexReader; - class IndexWriter; - } - namespace search - { - class IndexSearcher; - } -} - class FuzzyIndex : public QObject { Q_OBJECT @@ -87,11 +68,11 @@ private: QMutex m_mutex; QString m_lucenePath; - lucene::analysis::SimpleAnalyzer* m_analyzer; - lucene::store::Directory* m_luceneDir; - lucene::index::IndexReader* m_luceneReader; - lucene::index::IndexWriter* m_luceneWriter; - lucene::search::IndexSearcher* m_luceneSearcher; + boost::shared_ptr m_analyzer; + Lucene::IndexWriterPtr m_luceneWriter; + Lucene::IndexReaderPtr m_luceneReader; + Lucene::DirectoryPtr m_luceneDir; + Lucene::IndexSearcherPtr m_luceneSearcher; }; #endif // FUZZYINDEX_H