From 7fcdbe9c9e1528f60437af6abf9d8c481efafb94 Mon Sep 17 00:00:00 2001 From: Christian Muehlhaeuser Date: Tue, 6 Sep 2011 04:40:08 +0200 Subject: [PATCH] * Improved score calculation and handling. Tomahawk now calculates all the scores internally and doesn't rely on resolvers providing it anymore. --- .../database/databasecommand_resolve.cpp | 127 ------------- .../database/databasecommand_resolve.h | 3 - src/libtomahawk/pipeline.cpp | 20 ++- src/libtomahawk/query.cpp | 169 ++++++++++++++++-- src/libtomahawk/query.h | 17 +- src/resolvers/qtscriptresolver.cpp | 1 - src/resolvers/scriptresolver.cpp | 53 +++--- 7 files changed, 214 insertions(+), 176 deletions(-) diff --git a/src/libtomahawk/database/databasecommand_resolve.cpp b/src/libtomahawk/database/databasecommand_resolve.cpp index 2585f4443..b2233fc45 100644 --- a/src/libtomahawk/database/databasecommand_resolve.cpp +++ b/src/libtomahawk/database/databasecommand_resolve.cpp @@ -22,8 +22,6 @@ #include "sourcelist.h" #include "utils/logger.h" -#define MINSCORE 0.5 - using namespace Tomahawk; @@ -171,12 +169,6 @@ DatabaseCommand_Resolve::resolve( DatabaseImpl* lib ) } result->setAttributes( attr ); - - float score = how_similar( m_query, result ); - result->setScore( score ); - if ( score < MINSCORE ) - continue; - result->setCollection( s->collection() ); res << result; } @@ -306,122 +298,3 @@ DatabaseCommand_Resolve::fullTextResolve( DatabaseImpl* lib ) emit results( m_query->id(), res ); } - - -// TODO make clever (ft. featuring live (stuff) etc) -float -DatabaseCommand_Resolve::how_similar( const Tomahawk::query_ptr& q, const Tomahawk::result_ptr& r ) -{ - // query values - const QString qArtistname = DatabaseImpl::sortname( q->artist() ); - const QString qAlbumname = DatabaseImpl::sortname( q->album() ); - const QString qTrackname = DatabaseImpl::sortname( q->track() ); - - // result values - const QString rArtistname = DatabaseImpl::sortname( r->artist()->name() ); - const QString rAlbumname = DatabaseImpl::sortname( r->album()->name() ); - const QString rTrackname = DatabaseImpl::sortname( r->track() ); - - // normal edit distance - int artdist = levenshtein( qArtistname, rArtistname ); - int albdist = levenshtein( qAlbumname, rAlbumname ); - int trkdist = levenshtein( qTrackname, rTrackname ); - - // max length of name - int mlart = qMax( qArtistname.length(), rArtistname.length() ); - int mlalb = qMax( qAlbumname.length(), rAlbumname.length() ); - int mltrk = qMax( qTrackname.length(), rTrackname.length() ); - - // distance scores - float dcart = (float)( mlart - artdist ) / mlart; - float dcalb = (float)( mlalb - albdist ) / mlalb; - float dctrk = (float)( mltrk - trkdist ) / mltrk; - - // don't penalize for missing album name - if( qAlbumname.length() == 0 ) - dcalb = 1.0; - - // weighted, so album match is worth less than track title - float combined = ( dcart*4 + dcalb + dctrk*5 ) / 10; - return combined; -} - - -int -DatabaseCommand_Resolve::levenshtein( const QString& source, const QString& target ) -{ - // Step 1 - const int n = source.length(); - const int m = target.length(); - - if ( n == 0 ) - return m; - if ( m == 0 ) - return n; - - // Good form to declare a TYPEDEF - typedef QVector< QVector > Tmatrix; - Tmatrix matrix; - matrix.resize( n + 1 ); - - // Size the vectors in the 2.nd dimension. Unfortunately C++ doesn't - // allow for allocation on declaration of 2.nd dimension of vec of vec - for ( int i = 0; i <= n; i++ ) - { - QVector tmp; - tmp.resize( m + 1 ); - matrix.insert( i, tmp ); - } - - // Step 2 - for ( int i = 0; i <= n; i++ ) - matrix[i][0] = i; - for ( int j = 0; j <= m; j++ ) - matrix[0][j] = j; - - // Step 3 - for ( int i = 1; i <= n; i++ ) - { - const QChar s_i = source[i - 1]; - - // Step 4 - for ( int j = 1; j <= m; j++ ) - { - const QChar t_j = target[j - 1]; - - // Step 5 - int cost; - if ( s_i == t_j ) - cost = 0; - else - cost = 1; - - // Step 6 - const int above = matrix[i - 1][j]; - const int left = matrix[i][j - 1]; - const int diag = matrix[i - 1][j - 1]; - - int cell = ( ((left + 1) > (diag + cost)) ? diag + cost : left + 1 ); - if( above + 1 < cell ) - cell = above + 1; - - // Step 6A: Cover transposition, in addition to deletion, - // insertion and substitution. This step is taken from: - // Berghel, Hal ; Roach, David : "An Extension of Ukkonen's - // Enhanced Dynamic Programming ASM Algorithm" - // (http://www.acm.org/~hlb/publications/asm/asm.html) - if ( i > 2 && j > 2 ) - { - int trans = matrix[i - 2][j - 2] + 1; - - if ( source[ i - 2 ] != t_j ) trans++; - if ( s_i != target[ j - 2 ] ) trans++; - if ( cell > trans) cell = trans; - } - matrix[i][j] = cell; - } - } - - // Step 7 - return matrix[n][m]; -} diff --git a/src/libtomahawk/database/databasecommand_resolve.h b/src/libtomahawk/database/databasecommand_resolve.h index bc46d0de5..75fca7bf5 100644 --- a/src/libtomahawk/database/databasecommand_resolve.h +++ b/src/libtomahawk/database/databasecommand_resolve.h @@ -48,9 +48,6 @@ private: void resolve( DatabaseImpl* lib ); Tomahawk::query_ptr m_query; - - float how_similar( const Tomahawk::query_ptr& q, const Tomahawk::result_ptr& r ); - static int levenshtein( const QString& source, const QString& target ); }; #endif // DATABASECOMMAND_RESOLVE_H diff --git a/src/libtomahawk/pipeline.cpp b/src/libtomahawk/pipeline.cpp index fc9789b6e..9bd7c052c 100644 --- a/src/libtomahawk/pipeline.cpp +++ b/src/libtomahawk/pipeline.cpp @@ -28,6 +28,7 @@ #define DEFAULT_CONCURRENT_QUERIES 4 #define MAX_CONCURRENT_QUERIES 16 #define CLEANUP_TIMEOUT 5 * 60 * 1000 +#define MINSCORE 0.5 using namespace Tomahawk; @@ -174,12 +175,23 @@ Pipeline::reportResults( QID qid, const QList< result_ptr >& results ) tDebug() << "Result arrived too late for:" << qid; return; } - const query_ptr& q = m_qids.value( qid ); - if ( !results.isEmpty() ) + + QList< result_ptr > cleanResults; + foreach( const result_ptr& r, results ) { - q->addResults( results ); - foreach( const result_ptr& r, q->results() ) + float score = q->howSimilar( r ); + r->setScore( score ); + if ( !q->isFullTextQuery() && score < MINSCORE ) + continue; + + cleanResults << r; + } + + if ( !cleanResults.isEmpty() ) + { + q->addResults( cleanResults ); + foreach( const result_ptr& r, cleanResults ) { m_rids.insert( r->id(), r ); } diff --git a/src/libtomahawk/query.cpp b/src/libtomahawk/query.cpp index ad0f7ad7d..4aa8e7444 100644 --- a/src/libtomahawk/query.cpp +++ b/src/libtomahawk/query.cpp @@ -24,6 +24,7 @@ #include "database/databasecommand_logplayback.h" #include "database/databasecommand_playbackhistory.h" #include "database/databasecommand_loadplaylistentries.h" +#include "album.h" #include "collection.h" #include "pipeline.h" #include "resolver.h" @@ -61,15 +62,13 @@ Query::get( const QString& query, const QID& qid ) Query::Query( const QString& artist, const QString& track, const QString& album, const QID& qid, bool autoResolve ) - : m_solved( false ) - , m_playable( false ) - , m_resolveFinished( false ) - , m_qid( qid ) + : m_qid( qid ) , m_artist( artist ) , m_album( album ) , m_track( track ) - , m_duration( -1 ) { + init(); + if ( autoResolve ) { connect( Database::instance(), SIGNAL( indexReady() ), SLOT( refreshResults() ), Qt::QueuedConnection ); @@ -83,13 +82,11 @@ Query::Query( const QString& artist, const QString& track, const QString& album, Query::Query( const QString& query, const QID& qid ) - : m_solved( false ) - , m_playable( false ) - , m_resolveFinished( false ) - , m_qid( qid ) + : m_qid( qid ) , m_fullTextQuery( query ) - , m_duration( -1 ) { + init(); + if ( !qid.isEmpty() ) { connect( Database::instance(), SIGNAL( indexReady() ), SLOT( refreshResults() ), Qt::QueuedConnection ); @@ -103,6 +100,36 @@ Query::~Query() } +void +Query::init() +{ + m_resolveFinished = false; + m_solved = false; + m_playable = false; + m_duration = -1; + + updateSortNames(); +} + + +void +Query::updateSortNames() +{ + if ( isFullTextQuery() ) + { + m_artistSortname = DatabaseImpl::sortname( m_fullTextQuery ); + m_albumSortname = m_artistSortname; + m_trackSortname = m_artistSortname; + } + else + { + m_artistSortname = DatabaseImpl::sortname( m_artist ); + m_albumSortname = DatabaseImpl::sortname( m_album ); + m_trackSortname = DatabaseImpl::sortname( m_track ); + } +} + + void Query::addResults( const QList< Tomahawk::result_ptr >& newresults ) { @@ -359,3 +386,125 @@ Query::toString() const { return QString( "Query(%1, %2 - %3)" ).arg( id() ).arg( artist() ).arg( track() ); } + + +// TODO make clever (ft. featuring live (stuff) etc) +float +Query::howSimilar( const Tomahawk::result_ptr& r ) +{ + // result values + const QString rArtistname = DatabaseImpl::sortname( r->artist()->name() ); + const QString rAlbumname = DatabaseImpl::sortname( r->album()->name() ); + const QString rTrackname = DatabaseImpl::sortname( r->track() ); + + // normal edit distance + int artdist = levenshtein( m_artistSortname, rArtistname ); + int albdist = levenshtein( m_albumSortname, rAlbumname ); + int trkdist = levenshtein( m_trackSortname, rTrackname ); + + // max length of name + int mlart = qMax( m_artistSortname.length(), rArtistname.length() ); + int mlalb = qMax( m_albumSortname.length(), rAlbumname.length() ); + int mltrk = qMax( m_trackSortname.length(), rTrackname.length() ); + + // distance scores + float dcart = (float)( mlart - artdist ) / mlart; + float dcalb = (float)( mlalb - albdist ) / mlalb; + float dctrk = (float)( mltrk - trkdist ) / mltrk; + + if ( isFullTextQuery() ) + { + float res = qMax( dcart, dcalb ); + return qMax( res, dctrk ); + } + else + { + // don't penalize for missing album name + if ( m_albumSortname.isEmpty() || rAlbumname.isEmpty() ) + dcalb = 1.0; + + // weighted, so album match is worth less than track title + float combined = ( dcart * 4 + dcalb + dctrk * 5 ) / 10; + return combined; + } +} + + +int +Query::levenshtein( const QString& source, const QString& target ) +{ + // Step 1 + const int n = source.length(); + const int m = target.length(); + + if ( n == 0 ) + return m; + if ( m == 0 ) + return n; + + // Good form to declare a TYPEDEF + typedef QVector< QVector > Tmatrix; + Tmatrix matrix; + matrix.resize( n + 1 ); + + // Size the vectors in the 2.nd dimension. Unfortunately C++ doesn't + // allow for allocation on declaration of 2.nd dimension of vec of vec + for ( int i = 0; i <= n; i++ ) + { + QVector tmp; + tmp.resize( m + 1 ); + matrix.insert( i, tmp ); + } + + // Step 2 + for ( int i = 0; i <= n; i++ ) + matrix[i][0] = i; + for ( int j = 0; j <= m; j++ ) + matrix[0][j] = j; + + // Step 3 + for ( int i = 1; i <= n; i++ ) + { + const QChar s_i = source[i - 1]; + + // Step 4 + for ( int j = 1; j <= m; j++ ) + { + const QChar t_j = target[j - 1]; + + // Step 5 + int cost; + if ( s_i == t_j ) + cost = 0; + else + cost = 1; + + // Step 6 + const int above = matrix[i - 1][j]; + const int left = matrix[i][j - 1]; + const int diag = matrix[i - 1][j - 1]; + + int cell = ( ( ( left + 1 ) > ( diag + cost ) ) ? diag + cost : left + 1 ); + if ( above + 1 < cell ) + cell = above + 1; + + // Step 6A: Cover transposition, in addition to deletion, + // insertion and substitution. This step is taken from: + // Berghel, Hal ; Roach, David : "An Extension of Ukkonen's + // Enhanced Dynamic Programming ASM Algorithm" + // (http://www.acm.org/~hlb/publications/asm/asm.html) + if ( i > 2 && j > 2 ) + { + int trans = matrix[i - 2][j - 2] + 1; + + if ( source[ i - 2 ] != t_j ) trans++; + if ( s_i != target[ j - 2 ] ) trans++; + if ( cell > trans ) cell = trans; + } + matrix[i][j] = cell; + } + } + + // Step 7 + return matrix[n][m]; +} diff --git a/src/libtomahawk/query.h b/src/libtomahawk/query.h index 394773fc1..dca702e27 100644 --- a/src/libtomahawk/query.h +++ b/src/libtomahawk/query.h @@ -75,14 +75,15 @@ public: QString fullTextQuery() const { return m_fullTextQuery; } bool isFullTextQuery() const { return !m_fullTextQuery.isEmpty(); } bool resolvingFinished() const { return m_resolveFinished; } + float howSimilar( const Tomahawk::result_ptr& r ); QPair< Tomahawk::source_ptr, unsigned int > playedBy() const { return m_playedBy; } Tomahawk::Resolver* currentResolver() const; QList< QWeakPointer< Tomahawk::Resolver > > resolvedBy() const { return m_resolvers; } - void setArtist( const QString& artist ) { m_artist = artist; } - void setAlbum( const QString& album ) { m_album = album; } - void setTrack( const QString& track ) { m_track = track; } + void setArtist( const QString& artist ) { m_artist = artist; updateSortNames(); } + void setAlbum( const QString& album ) { m_album = album; updateSortNames(); } + void setTrack( const QString& track ) { m_track = track; updateSortNames(); } void setResultHint( const QString& resultHint ) { m_resultHint = resultHint; } void setDuration( int duration ) { m_duration = duration; } @@ -122,17 +123,25 @@ private slots: void refreshResults(); private: - void setCurrentResolver( Tomahawk::Resolver* resolver ); + void init(); + void setCurrentResolver( Tomahawk::Resolver* resolver ); void clearResults(); void checkResults(); + void updateSortNames(); + static int levenshtein( const QString& source, const QString& target ); + QList< Tomahawk::result_ptr > m_results; bool m_solved; bool m_playable; bool m_resolveFinished; mutable QID m_qid; + QString m_artistSortname; + QString m_albumSortname; + QString m_trackSortname; + QString m_artist; QString m_album; QString m_track; diff --git a/src/resolvers/qtscriptresolver.cpp b/src/resolvers/qtscriptresolver.cpp index 9f4577105..905401385 100644 --- a/src/resolvers/qtscriptresolver.cpp +++ b/src/resolvers/qtscriptresolver.cpp @@ -324,7 +324,6 @@ QtScriptResolver::parseResultVariantList( const QVariantList& reslist ) rp->setBitrate( m.value( "bitrate" ).toUInt() ); rp->setUrl( m.value( "url" ).toString() ); rp->setSize( m.value( "size" ).toUInt() ); - rp->setScore( m.value( "score" ).toFloat() * ( (float)weight() / 100.0 ) ); rp->setRID( uuid() ); rp->setFriendlySource( name() ); diff --git a/src/resolvers/scriptresolver.cpp b/src/resolvers/scriptresolver.cpp index 4bd9c3b53..8511c32b5 100644 --- a/src/resolvers/scriptresolver.cpp +++ b/src/resolvers/scriptresolver.cpp @@ -44,16 +44,16 @@ ScriptResolver::ScriptResolver( const QString& exe ) connect( &m_proc, SIGNAL( readyReadStandardOutput() ), SLOT( readStdout() ) ); connect( &m_proc, SIGNAL( finished( int, QProcess::ExitStatus ) ), SLOT( cmdExited( int, QProcess::ExitStatus ) ) ); - QString pathToCheck = filePath(); + QString runPath = filePath(); #ifdef WIN32 // have to enclose in quotes if path contains spaces on windows... - setFilePath( QString( "\"%1\"" ).arg( filePath() ) ); + runPath = QString( "\"%1\"" ).arg( filePath() ); #endif - if( !QFile::exists( pathToCheck ) ) + if ( !QFile::exists( filePath() ) ) m_error = Tomahawk::ExternalResolver::FileNotFound; else - m_proc.start( filePath() ); + m_proc.start( runPath ); if ( !TomahawkUtils::nam() ) return; @@ -69,7 +69,7 @@ ScriptResolver::~ScriptResolver() Tomahawk::Pipeline::instance()->removeResolver( this ); - if( !m_configWidget.isNull() ) + if ( !m_configWidget.isNull() ) delete m_configWidget.data(); } @@ -77,11 +77,11 @@ ScriptResolver::~ScriptResolver() void ScriptResolver::sendConfig() { - // Send a configutaion message with any information the resolver might need // For now, only the proxy information is sent QVariantMap m; m.insert( "_msgtype", "config" ); + TomahawkUtils::NetworkProxyFactory* factory = dynamic_cast( TomahawkUtils::nam()->proxyFactory() ); QNetworkProxy proxy = factory->proxy(); QString proxyType = ( proxy.type() == QNetworkProxy::Socks5Proxy ? "socks5" : "none" ); @@ -90,11 +90,13 @@ ScriptResolver::sendConfig() m.insert( "proxyport", proxy.port() ); m.insert( "proxyuser", proxy.user() ); m.insert( "proxypass", proxy.password() ); + // QJson sucks QVariantList hosts; foreach ( const QString& host, factory->noProxyHosts() ) hosts << host; m.insert( "noproxyhosts", hosts ); + QByteArray data = m_serializer.serialize( m ); sendMsg( data ); } @@ -103,7 +105,7 @@ ScriptResolver::sendConfig() void ScriptResolver::reload() { - if( !QFile::exists( filePath() ) ) + if ( !QFile::exists( filePath() ) ) m_error = Tomahawk::ExternalResolver::FileNotFound; else { @@ -132,29 +134,28 @@ ScriptResolver::error() const void ScriptResolver::readStdout() { -// qDebug() << Q_FUNC_INFO << m_proc.bytesAvailable(); - - if( m_msgsize == 0 ) + if ( m_msgsize == 0 ) { - if( m_proc.bytesAvailable() < 4 ) return; + if ( m_proc.bytesAvailable() < 4 ) + return; + quint32 len_nbo; m_proc.read( (char*) &len_nbo, 4 ); m_msgsize = qFromBigEndian( len_nbo ); -// qDebug() << Q_FUNC_INFO << "msgsize" << m_msgsize; } - if( m_msgsize > 0 ) + if ( m_msgsize > 0 ) { m_msg.append( m_proc.read( m_msgsize - m_msg.length() ) ); } - if( m_msgsize == (quint32) m_msg.length() ) + if ( m_msgsize == (quint32) m_msg.length() ) { handleMsg( m_msg ); m_msgsize = 0; m_msg.clear(); - if( m_proc.bytesAvailable() ) + if ( m_proc.bytesAvailable() ) QTimer::singleShot( 0, this, SLOT( readStdout() ) ); } } @@ -164,7 +165,7 @@ void ScriptResolver::sendMsg( const QByteArray& msg ) { // qDebug() << Q_FUNC_INFO << m_ready << msg << msg.length(); - if( !m_proc.isOpen() ) + if ( !m_proc.isOpen() ) return; quint32 len; @@ -181,7 +182,7 @@ ScriptResolver::handleMsg( const QByteArray& msg ) bool ok; QVariant v = m_parser.parse( msg, &ok ); - if( !ok || v.type() != QVariant::Map ) + if ( !ok || v.type() != QVariant::Map ) { Q_ASSERT(false); return; @@ -189,18 +190,18 @@ ScriptResolver::handleMsg( const QByteArray& msg ) QVariantMap m = v.toMap(); QString msgtype = m.value( "_msgtype" ).toString(); - if( msgtype == "settings" ) + if ( msgtype == "settings" ) { doSetup( m ); return; } - else if( msgtype == "confwidget" ) + else if ( msgtype == "confwidget" ) { setupConfWidget( m ); return; } - if( msgtype == "results" ) + if ( msgtype == "results" ) { const QString qid = m.value( "qid" ).toString(); QList< Tomahawk::result_ptr > results; @@ -220,7 +221,6 @@ ScriptResolver::handleMsg( const QByteArray& msg ) rp->setBitrate( m.value( "bitrate" ).toUInt() ); rp->setUrl( m.value( "url" ).toString() ); rp->setSize( m.value( "size" ).toUInt() ); - rp->setScore( m.value( "score" ).toFloat() * ( (float)weight() / 100.0 ) ); rp->setRID( uuid() ); rp->setFriendlySource( m_name ); @@ -252,7 +252,7 @@ ScriptResolver::cmdExited( int code, QProcess::ExitStatus status ) tLog() << Q_FUNC_INFO << "SCRIPT EXITED, code" << code << "status" << status << filePath(); Tomahawk::Pipeline::instance()->removeResolver( this ); - if( m_stopped ) + if ( m_stopped ) { tLog() << "*** Script resolver stopped "; emit finished(); @@ -260,7 +260,7 @@ ScriptResolver::cmdExited( int code, QProcess::ExitStatus status ) return; } - if( m_num_restarts < 10 ) + if ( m_num_restarts < 10 ) { m_num_restarts++; tLog() << "*** Restart num" << m_num_restarts; @@ -326,7 +326,7 @@ ScriptResolver::setupConfWidget( const QVariantMap& m ) else uiData = QByteArray::fromBase64( uiData ); - if( m.contains( "images" ) ) + if ( m.contains( "images" ) ) uiData = fixDataImagePaths( uiData, compressed, m[ "images" ].toMap() ); m_configWidget = QWeakPointer< QWidget >( widgetFromData( uiData, 0 ) ); @@ -344,7 +344,7 @@ ScriptResolver::saveConfig() QVariant widgets = configMsgFromWidget( m_configWidget.data() ); m.insert( "widgets", widgets ); QByteArray data = m_serializer.serialize( m ); -// qDebug() << "Got widgets and data;" << widgets << data; + sendMsg( data ); } @@ -352,7 +352,7 @@ ScriptResolver::saveConfig() QWidget* ScriptResolver::configUI() const { - if( m_configWidget.isNull() ) + if ( m_configWidget.isNull() ) return 0; else return m_configWidget.data(); @@ -363,6 +363,5 @@ void ScriptResolver::stop() { m_stopped = true; -// qDebug() << "KILLING PROCESS!"; m_proc.kill(); }