diff --git a/src/libtomahawk/database/databasecommand_resolve.cpp b/src/libtomahawk/database/databasecommand_resolve.cpp index ae6e92910..dd8a16500 100644 --- a/src/libtomahawk/database/databasecommand_resolve.cpp +++ b/src/libtomahawk/database/databasecommand_resolve.cpp @@ -81,9 +81,9 @@ DatabaseCommand_Resolve::resolve( DatabaseImpl* lib ) typedef QPair scorepair_t; // STEP 1 - QList< QPair > artists = lib->searchTable( "artist", m_query->artist() ); - QList< QPair > tracks = lib->searchTable( "track", m_query->track() ); - QList< QPair > albums = lib->searchTable( "album", m_query->album() ); + QList< QPair > artists = lib->searchTable( "artist", m_query->artist(), false ); + QList< QPair > tracks = lib->searchTable( "track", m_query->track(), false ); + QList< QPair > albums = lib->searchTable( "album", m_query->album(), false ); if ( artists.length() == 0 || tracks.length() == 0 ) { @@ -201,9 +201,16 @@ DatabaseCommand_Resolve::fullTextResolve( DatabaseImpl* lib ) typedef QPair scorepair_t; // STEP 1 - QList< QPair > artistPairs = lib->searchTable( "artist", m_query->fullTextQuery(), 20 ); - QList< QPair > trackPairs = lib->searchTable( "track", m_query->fullTextQuery(), 20 ); - QList< QPair > albumPairs = lib->searchTable( "album", m_query->fullTextQuery(), 20 ); + QList< QPair > artistPairs = lib->searchTable( "artist", m_query->fullTextQuery(), false, 20 ); + QList< QPair > albumPairs = lib->searchTable( "album", m_query->fullTextQuery(), false, 20 ); + QList< QPair > trackArtistPairs = lib->searchTable( "trackartist", m_query->fullTextQuery(), true, 20 ); + + if ( artistPairs.length() == 0 && albumPairs.length() == 0 && trackArtistPairs.length() == 0 ) + { + qDebug() << "No candidates found in first pass, aborting resolve" << m_query->artist() << m_query->track(); + emit results( m_query->id(), res ); + return; + } foreach ( const scorepair_t& artistPair, artistPairs ) { @@ -241,28 +248,14 @@ DatabaseCommand_Resolve::fullTextResolve( DatabaseImpl* lib ) emit albums( m_query->id(), albumList ); } - if ( artistPairs.length() == 0 && trackPairs.length() == 0 && albumPairs.length() == 0 ) - { - qDebug() << "No candidates found in first pass, aborting resolve" << m_query->artist() << m_query->track(); - emit results( m_query->id(), res ); - return; - } - // STEP 2 TomahawkSqlQuery files_query = lib->newquery(); - QStringList artsl, trksl, albsl; - for ( int k = 0; k < artistPairs.count(); k++ ) - artsl.append( QString::number( artistPairs.at( k ).first ) ); - for ( int k = 0; k < trackPairs.count(); k++ ) - trksl.append( QString::number( trackPairs.at( k ).first ) ); - for ( int k = 0; k < albumPairs.count(); k++ ) - albsl.append( QString::number( albumPairs.at( k ).first ) ); + QStringList trksl; + for ( int k = 0; k < trackArtistPairs.count(); k++ ) + trksl.append( QString::number( trackArtistPairs.at( k ).first ) ); - QString artsToken = QString( "file_join.artist IN (%1)" ).arg( artsl.join( "," ) ); QString trksToken = QString( "file_join.track IN (%1)" ).arg( trksl.join( "," ) ); - QString albsToken = QString( "file_join.album IN (%1)" ).arg( albsl.join( "," ) ); - QString sql = QString( "SELECT " "url, mtime, size, md5, mimetype, duration, bitrate, " //0 "file_join.artist, file_join.album, file_join.track, " //7 @@ -284,7 +277,7 @@ DatabaseCommand_Resolve::fullTextResolve( DatabaseImpl* lib ) "track.id = file_join.track AND " "file.id = file_join.file AND " "%1" ) - .arg( trackPairs.length() > 0 ? trksToken : QString( "0" ) ); + .arg( trksl.length() > 0 ? trksToken : QString( "0" ) ); files_query.prepare( sql ); files_query.exec(); @@ -332,11 +325,11 @@ DatabaseCommand_Resolve::fullTextResolve( DatabaseImpl* lib ) result->setAlbumPos( files_query.value( 17 ).toUInt() ); result->setTrackId( files_query.value( 9 ).toUInt() ); - for ( int k = 0; k < trackPairs.count(); k++ ) + for ( int k = 0; k < trackArtistPairs.count(); k++ ) { - if ( trackPairs.at( k ).first == (int)result->trackId() ) + if ( trackArtistPairs.at( k ).first == (int)result->trackId() ) { - result->setScore( trackPairs.at( k ).second ); + result->setScore( trackArtistPairs.at( k ).second ); break; } } diff --git a/src/libtomahawk/database/databasecommand_updatesearchindex.cpp b/src/libtomahawk/database/databasecommand_updatesearchindex.cpp index 6fe5e18a4..bd8c81f76 100644 --- a/src/libtomahawk/database/databasecommand_updatesearchindex.cpp +++ b/src/libtomahawk/database/databasecommand_updatesearchindex.cpp @@ -22,6 +22,8 @@ #include "tomahawksqlquery.h" #include "utils/logger.h" +#include + DatabaseCommand_UpdateSearchIndex::DatabaseCommand_UpdateSearchIndex() : DatabaseCommand() @@ -31,18 +33,23 @@ DatabaseCommand_UpdateSearchIndex::DatabaseCommand_UpdateSearchIndex() void -DatabaseCommand_UpdateSearchIndex::indexTable( DatabaseImpl* db, const QString& table ) +DatabaseCommand_UpdateSearchIndex::indexTable( DatabaseImpl* db, const QString& table, const QString& query ) { qDebug() << Q_FUNC_INFO; - TomahawkSqlQuery query = db->newquery(); + TomahawkSqlQuery q = db->newquery(); qDebug() << "Building index for" << table; - query.exec( QString( "SELECT id, name FROM %1" ).arg( table ) ); + q.exec( QString( "SELECT %1" ).arg( query ) ); QMap< unsigned int, QString > fields; - while ( query.next() ) + QString value; + while ( q.next() ) { - fields.insert( query.value( 0 ).toUInt(), query.value( 1 ).toString() ); + value = ""; + for ( int v = 1; v < q.record().count(); v++ ) + value += q.value( v ).toString() + " "; + + fields.insert( q.value( 0 ).toUInt(), value.trimmed() ); } db->m_fuzzyIndex->appendFields( table, fields ); @@ -55,9 +62,10 @@ DatabaseCommand_UpdateSearchIndex::exec( DatabaseImpl* db ) { db->m_fuzzyIndex->beginIndexing(); - indexTable( db, "artist" ); - indexTable( db, "album" ); - indexTable( db, "track" ); + indexTable( db, "artist", "id, name FROM artist" ); + indexTable( db, "album", "id, name FROM album" ); + indexTable( db, "track", "id, name FROM track" ); + indexTable( db, "trackartist", "track.id, artist.name, track.name FROM track, artist WHERE artist.id = track.artist" ); db->m_fuzzyIndex->endIndexing(); } diff --git a/src/libtomahawk/database/databasecommand_updatesearchindex.h b/src/libtomahawk/database/databasecommand_updatesearchindex.h index 91c2203d1..daa5abbf2 100644 --- a/src/libtomahawk/database/databasecommand_updatesearchindex.h +++ b/src/libtomahawk/database/databasecommand_updatesearchindex.h @@ -36,7 +36,7 @@ signals: void indexUpdated(); private: - void indexTable( DatabaseImpl* db, const QString& table ); + void indexTable( DatabaseImpl* db, const QString& table, const QString& query ); QString table; }; diff --git a/src/libtomahawk/database/databaseimpl.cpp b/src/libtomahawk/database/databaseimpl.cpp index 847685457..c56714b58 100644 --- a/src/libtomahawk/database/databaseimpl.cpp +++ b/src/libtomahawk/database/databaseimpl.cpp @@ -78,7 +78,11 @@ DatabaseImpl::DatabaseImpl( const QString& dbname, Database* parent ) // in case of unclean shutdown last time: query.exec( "UPDATE source SET isonline = 'false'" ); +// schemaUpdated = true; // REMOVE ME m_fuzzyIndex = new FuzzyIndex( *this, schemaUpdated ); + if ( schemaUpdated ) + QTimer::singleShot( 0, this, SLOT( updateIndex() ) ); + tDebug( LOGVERBOSE ) << "Loaded index:" << t.elapsed(); if ( qApp->arguments().contains( "--dumpdb" ) ) @@ -405,13 +409,13 @@ DatabaseImpl::albumId( int artistid, const QString& name_orig, bool autoCreate ) QList< QPair > -DatabaseImpl::searchTable( const QString& table, const QString& name, uint limit ) +DatabaseImpl::searchTable( const QString& table, const QString& name, bool fulltext, uint limit ) { QList< QPair > resultslist; - if ( table != "artist" && table != "track" && table != "album" ) + if ( table != "artist" && table != "track" && table != "album" && table != "trackartist" ) return resultslist; - QMap< int, float > resultsmap = m_fuzzyIndex->search( table, name ); + QMap< int, float > resultsmap = m_fuzzyIndex->search( table, name, fulltext ); foreach ( int i, resultsmap.keys() ) { resultslist << QPair( i, (float)resultsmap.value( i ) ); @@ -696,3 +700,11 @@ DatabaseImpl::openDatabase( const QString& dbname ) return schemaUpdated; } + + +void +DatabaseImpl::updateIndex() +{ + DatabaseCommand* cmd = new DatabaseCommand_UpdateSearchIndex(); + Database::instance()->enqueue( QSharedPointer( cmd ) ); +} diff --git a/src/libtomahawk/database/databaseimpl.h b/src/libtomahawk/database/databaseimpl.h index 805dbab17..878e8194f 100644 --- a/src/libtomahawk/database/databaseimpl.h +++ b/src/libtomahawk/database/databaseimpl.h @@ -56,7 +56,7 @@ public: int trackId( int artistid, const QString& name_orig, bool autoCreate ); int albumId( int artistid, const QString& name_orig, bool autoCreate ); - QList< QPair > searchTable( const QString& table, const QString& name, uint limit = 0 ); + QList< QPair > searchTable( const QString& table, const QString& name, bool fulltext, uint limit = 0 ); QList< int > getTrackFids( int tid ); static QString sortname( const QString& str, bool replaceArticle = false ); @@ -81,6 +81,9 @@ signals: public slots: +private slots: + void updateIndex(); + private: QString cleanSql( const QString& sql ); bool updateSchema( int oldVersion ); diff --git a/src/libtomahawk/database/fuzzyindex.cpp b/src/libtomahawk/database/fuzzyindex.cpp index b5d4b15a9..1770854f2 100644 --- a/src/libtomahawk/database/fuzzyindex.cpp +++ b/src/libtomahawk/database/fuzzyindex.cpp @@ -28,6 +28,7 @@ #include "utils/logger.h" using namespace lucene::analysis; +using namespace lucene::analysis::standard; using namespace lucene::document; using namespace lucene::store; using namespace lucene::index; @@ -106,7 +107,7 @@ FuzzyIndex::appendFields( const QString& table, const QMap< unsigned int, QStrin { try { - qDebug() << "Appending to index:" << fields.count(); + tDebug() << "Appending to index:" << table << fields.count(); bool create = !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ); IndexWriter luceneWriter = IndexWriter( m_luceneDir, m_analyzer, create ); Document doc; @@ -117,16 +118,14 @@ FuzzyIndex::appendFields( const QString& table, const QMap< unsigned int, QStrin it.next(); unsigned int id = it.key(); QString name = it.value(); - { Field* field = _CLNEW Field( table.toStdWString().c_str(), DatabaseImpl::sortname( name ).toStdWString().c_str(), - Field::STORE_YES | Field::INDEX_UNTOKENIZED ); + Field::STORE_YES | Field::INDEX_TOKENIZED ); doc.add( *field ); } { - Field* field = _CLNEW Field( _T( "id" ), QString::number( id ).toStdWString().c_str(), - Field::STORE_YES | Field::INDEX_NO ); + Field* field = _CLNEW Field( _T( "id" ), QString::number( id ).toStdWString().c_str(), Field::STORE_YES | Field::INDEX_NO ); doc.add( *field ); } @@ -134,6 +133,7 @@ FuzzyIndex::appendFields( const QString& table, const QMap< unsigned int, QStrin doc.clear(); } + luceneWriter.optimize(); luceneWriter.close(); } catch( CLuceneError& error ) @@ -152,7 +152,7 @@ FuzzyIndex::loadLuceneIndex() QMap< int, float > -FuzzyIndex::search( const QString& table, const QString& name ) +FuzzyIndex::search( const QString& table, const QString& name, bool fulltext ) { QMutexLocker lock( &m_mutex ); @@ -174,13 +174,27 @@ FuzzyIndex::search( const QString& table, const QString& name ) if ( name.isEmpty() ) return resultsmap; - SimpleAnalyzer analyzer; - QueryParser parser( table.toStdWString().c_str(), m_analyzer ); Hits* hits = 0; + Query* qry = 0; + QueryParser parser( table.toStdWString().c_str(), m_analyzer ); + + if ( fulltext ) + { + QString escapedName = QString::fromWCharArray( parser.escape( name.toStdWString().c_str() ) ); + + QStringList sl = DatabaseImpl::sortname( escapedName ).split( " ", QString::SkipEmptyParts ); + qry = parser.parse( QString( "%1:%2~" ).arg( table ).arg( sl.join( "~ " ) ).toStdWString().c_str() ); + } + else + { +// qry = _CLNEW FuzzyQuery( _CLNEW Term( table.toStdWString().c_str(), DatabaseImpl::sortname( name ).toStdWString().c_str() ) ); + QString escapedName = QString::fromWCharArray( parser.escape( name.toStdWString().c_str() ) ); + + QStringList sl = DatabaseImpl::sortname( escapedName ).split( " ", QString::SkipEmptyParts ); + qry = parser.parse( QString( "%1:\"%2\"~" ).arg( table ).arg( sl.join( " " ) ).toStdWString().c_str() ); + } - FuzzyQuery* qry = _CLNEW FuzzyQuery( _CLNEW Term( table.toStdWString().c_str(), DatabaseImpl::sortname( name ).toStdWString().c_str() ) ); hits = m_luceneSearcher->search( qry ); - for ( uint i = 0; i < hits->length(); i++ ) { Document* d = &hits->doc( i ); @@ -194,7 +208,7 @@ FuzzyIndex::search( const QString& table, const QString& name ) else score = qMin( score, (float)0.99 ); - if ( score > 0.05 ) + if ( score > 0.20 ) { resultsmap.insert( id, score ); // qDebug() << "Hitres:" << result << id << score << table << name; diff --git a/src/libtomahawk/database/fuzzyindex.h b/src/libtomahawk/database/fuzzyindex.h index 3e93fbb23..12929c8ae 100644 --- a/src/libtomahawk/database/fuzzyindex.h +++ b/src/libtomahawk/database/fuzzyindex.h @@ -66,7 +66,7 @@ signals: public slots: void loadLuceneIndex(); - QMap< int, float > search( const QString& table, const QString& name ); + QMap< int, float > search( const QString& table, const QString& name, bool fulltext ); private: DatabaseImpl& m_db; diff --git a/src/libtomahawk/query.cpp b/src/libtomahawk/query.cpp index 40eb1d515..ca032ae51 100644 --- a/src/libtomahawk/query.cpp +++ b/src/libtomahawk/query.cpp @@ -459,7 +459,15 @@ Query::howSimilar( const Tomahawk::result_ptr& r ) if ( isFullTextQuery() ) { + const QString artistTrackname = DatabaseImpl::sortname( fullTextQuery() ); + const QString rArtistTrackname = DatabaseImpl::sortname( r->artist()->name() + " " + r->track() ); + + int atrdist = levenshtein( artistTrackname, rArtistTrackname ); + int mlatr = qMax( artistTrackname.length(), rArtistTrackname.length() ); + float dcatr = (float)( mlatr - atrdist ) / mlatr; + float res = qMax( dcart, dcalb ); + res = qMax( res, dcatr ); return qMax( res, dctrk ); } else diff --git a/src/libtomahawk/tomahawksettings.cpp b/src/libtomahawk/tomahawksettings.cpp index 9009b3d9a..d278299d5 100644 --- a/src/libtomahawk/tomahawksettings.cpp +++ b/src/libtomahawk/tomahawksettings.cpp @@ -29,7 +29,7 @@ #include "database/databasecommand_updatesearchindex.h" #include "database/database.h" -#define VERSION 5 +#define VERSION 6 using namespace Tomahawk; @@ -190,7 +190,7 @@ TomahawkSettings::doUpgrade( int oldVersion, int newVersion ) TomahawkUtils::removeDirectory( resolverDir.absolutePath() ); } } - else if ( oldVersion == 4 ) + else if ( oldVersion == 4 || oldVersion == 5 ) { // 0.3.0 contained a bug which prevent indexing local files. Force a reindex. QTimer::singleShot( 0, this, SLOT( updateIndex() ) );