1
0
mirror of https://github.com/tomahawk-player/tomahawk.git synced 2025-03-19 15:29:42 +01:00

* Improved score calculation and handling. Tomahawk now calculates all the scores internally and doesn't rely on resolvers providing it anymore.

This commit is contained in:
Christian Muehlhaeuser 2011-09-06 04:40:08 +02:00
parent 8fa8f9b733
commit 7fcdbe9c9e
7 changed files with 214 additions and 176 deletions

View File

@ -22,8 +22,6 @@
#include "sourcelist.h"
#include "utils/logger.h"
#define MINSCORE 0.5
using namespace Tomahawk;
@ -171,12 +169,6 @@ DatabaseCommand_Resolve::resolve( DatabaseImpl* lib )
}
result->setAttributes( attr );
float score = how_similar( m_query, result );
result->setScore( score );
if ( score < MINSCORE )
continue;
result->setCollection( s->collection() );
res << result;
}
@ -306,122 +298,3 @@ DatabaseCommand_Resolve::fullTextResolve( DatabaseImpl* lib )
emit results( m_query->id(), res );
}
// TODO make clever (ft. featuring live (stuff) etc)
float
DatabaseCommand_Resolve::how_similar( const Tomahawk::query_ptr& q, const Tomahawk::result_ptr& r )
{
// query values
const QString qArtistname = DatabaseImpl::sortname( q->artist() );
const QString qAlbumname = DatabaseImpl::sortname( q->album() );
const QString qTrackname = DatabaseImpl::sortname( q->track() );
// result values
const QString rArtistname = DatabaseImpl::sortname( r->artist()->name() );
const QString rAlbumname = DatabaseImpl::sortname( r->album()->name() );
const QString rTrackname = DatabaseImpl::sortname( r->track() );
// normal edit distance
int artdist = levenshtein( qArtistname, rArtistname );
int albdist = levenshtein( qAlbumname, rAlbumname );
int trkdist = levenshtein( qTrackname, rTrackname );
// max length of name
int mlart = qMax( qArtistname.length(), rArtistname.length() );
int mlalb = qMax( qAlbumname.length(), rAlbumname.length() );
int mltrk = qMax( qTrackname.length(), rTrackname.length() );
// distance scores
float dcart = (float)( mlart - artdist ) / mlart;
float dcalb = (float)( mlalb - albdist ) / mlalb;
float dctrk = (float)( mltrk - trkdist ) / mltrk;
// don't penalize for missing album name
if( qAlbumname.length() == 0 )
dcalb = 1.0;
// weighted, so album match is worth less than track title
float combined = ( dcart*4 + dcalb + dctrk*5 ) / 10;
return combined;
}
int
DatabaseCommand_Resolve::levenshtein( const QString& source, const QString& target )
{
// Step 1
const int n = source.length();
const int m = target.length();
if ( n == 0 )
return m;
if ( m == 0 )
return n;
// Good form to declare a TYPEDEF
typedef QVector< QVector<int> > Tmatrix;
Tmatrix matrix;
matrix.resize( n + 1 );
// Size the vectors in the 2.nd dimension. Unfortunately C++ doesn't
// allow for allocation on declaration of 2.nd dimension of vec of vec
for ( int i = 0; i <= n; i++ )
{
QVector<int> tmp;
tmp.resize( m + 1 );
matrix.insert( i, tmp );
}
// Step 2
for ( int i = 0; i <= n; i++ )
matrix[i][0] = i;
for ( int j = 0; j <= m; j++ )
matrix[0][j] = j;
// Step 3
for ( int i = 1; i <= n; i++ )
{
const QChar s_i = source[i - 1];
// Step 4
for ( int j = 1; j <= m; j++ )
{
const QChar t_j = target[j - 1];
// Step 5
int cost;
if ( s_i == t_j )
cost = 0;
else
cost = 1;
// Step 6
const int above = matrix[i - 1][j];
const int left = matrix[i][j - 1];
const int diag = matrix[i - 1][j - 1];
int cell = ( ((left + 1) > (diag + cost)) ? diag + cost : left + 1 );
if( above + 1 < cell )
cell = above + 1;
// Step 6A: Cover transposition, in addition to deletion,
// insertion and substitution. This step is taken from:
// Berghel, Hal ; Roach, David : "An Extension of Ukkonen's
// Enhanced Dynamic Programming ASM Algorithm"
// (http://www.acm.org/~hlb/publications/asm/asm.html)
if ( i > 2 && j > 2 )
{
int trans = matrix[i - 2][j - 2] + 1;
if ( source[ i - 2 ] != t_j ) trans++;
if ( s_i != target[ j - 2 ] ) trans++;
if ( cell > trans) cell = trans;
}
matrix[i][j] = cell;
}
}
// Step 7
return matrix[n][m];
}

View File

@ -48,9 +48,6 @@ private:
void resolve( DatabaseImpl* lib );
Tomahawk::query_ptr m_query;
float how_similar( const Tomahawk::query_ptr& q, const Tomahawk::result_ptr& r );
static int levenshtein( const QString& source, const QString& target );
};
#endif // DATABASECOMMAND_RESOLVE_H

View File

@ -28,6 +28,7 @@
#define DEFAULT_CONCURRENT_QUERIES 4
#define MAX_CONCURRENT_QUERIES 16
#define CLEANUP_TIMEOUT 5 * 60 * 1000
#define MINSCORE 0.5
using namespace Tomahawk;
@ -174,12 +175,23 @@ Pipeline::reportResults( QID qid, const QList< result_ptr >& results )
tDebug() << "Result arrived too late for:" << qid;
return;
}
const query_ptr& q = m_qids.value( qid );
if ( !results.isEmpty() )
QList< result_ptr > cleanResults;
foreach( const result_ptr& r, results )
{
q->addResults( results );
foreach( const result_ptr& r, q->results() )
float score = q->howSimilar( r );
r->setScore( score );
if ( !q->isFullTextQuery() && score < MINSCORE )
continue;
cleanResults << r;
}
if ( !cleanResults.isEmpty() )
{
q->addResults( cleanResults );
foreach( const result_ptr& r, cleanResults )
{
m_rids.insert( r->id(), r );
}

View File

@ -24,6 +24,7 @@
#include "database/databasecommand_logplayback.h"
#include "database/databasecommand_playbackhistory.h"
#include "database/databasecommand_loadplaylistentries.h"
#include "album.h"
#include "collection.h"
#include "pipeline.h"
#include "resolver.h"
@ -61,15 +62,13 @@ Query::get( const QString& query, const QID& qid )
Query::Query( const QString& artist, const QString& track, const QString& album, const QID& qid, bool autoResolve )
: m_solved( false )
, m_playable( false )
, m_resolveFinished( false )
, m_qid( qid )
: m_qid( qid )
, m_artist( artist )
, m_album( album )
, m_track( track )
, m_duration( -1 )
{
init();
if ( autoResolve )
{
connect( Database::instance(), SIGNAL( indexReady() ), SLOT( refreshResults() ), Qt::QueuedConnection );
@ -83,13 +82,11 @@ Query::Query( const QString& artist, const QString& track, const QString& album,
Query::Query( const QString& query, const QID& qid )
: m_solved( false )
, m_playable( false )
, m_resolveFinished( false )
, m_qid( qid )
: m_qid( qid )
, m_fullTextQuery( query )
, m_duration( -1 )
{
init();
if ( !qid.isEmpty() )
{
connect( Database::instance(), SIGNAL( indexReady() ), SLOT( refreshResults() ), Qt::QueuedConnection );
@ -103,6 +100,36 @@ Query::~Query()
}
void
Query::init()
{
m_resolveFinished = false;
m_solved = false;
m_playable = false;
m_duration = -1;
updateSortNames();
}
void
Query::updateSortNames()
{
if ( isFullTextQuery() )
{
m_artistSortname = DatabaseImpl::sortname( m_fullTextQuery );
m_albumSortname = m_artistSortname;
m_trackSortname = m_artistSortname;
}
else
{
m_artistSortname = DatabaseImpl::sortname( m_artist );
m_albumSortname = DatabaseImpl::sortname( m_album );
m_trackSortname = DatabaseImpl::sortname( m_track );
}
}
void
Query::addResults( const QList< Tomahawk::result_ptr >& newresults )
{
@ -359,3 +386,125 @@ Query::toString() const
{
return QString( "Query(%1, %2 - %3)" ).arg( id() ).arg( artist() ).arg( track() );
}
// TODO make clever (ft. featuring live (stuff) etc)
float
Query::howSimilar( const Tomahawk::result_ptr& r )
{
// result values
const QString rArtistname = DatabaseImpl::sortname( r->artist()->name() );
const QString rAlbumname = DatabaseImpl::sortname( r->album()->name() );
const QString rTrackname = DatabaseImpl::sortname( r->track() );
// normal edit distance
int artdist = levenshtein( m_artistSortname, rArtistname );
int albdist = levenshtein( m_albumSortname, rAlbumname );
int trkdist = levenshtein( m_trackSortname, rTrackname );
// max length of name
int mlart = qMax( m_artistSortname.length(), rArtistname.length() );
int mlalb = qMax( m_albumSortname.length(), rAlbumname.length() );
int mltrk = qMax( m_trackSortname.length(), rTrackname.length() );
// distance scores
float dcart = (float)( mlart - artdist ) / mlart;
float dcalb = (float)( mlalb - albdist ) / mlalb;
float dctrk = (float)( mltrk - trkdist ) / mltrk;
if ( isFullTextQuery() )
{
float res = qMax( dcart, dcalb );
return qMax( res, dctrk );
}
else
{
// don't penalize for missing album name
if ( m_albumSortname.isEmpty() || rAlbumname.isEmpty() )
dcalb = 1.0;
// weighted, so album match is worth less than track title
float combined = ( dcart * 4 + dcalb + dctrk * 5 ) / 10;
return combined;
}
}
int
Query::levenshtein( const QString& source, const QString& target )
{
// Step 1
const int n = source.length();
const int m = target.length();
if ( n == 0 )
return m;
if ( m == 0 )
return n;
// Good form to declare a TYPEDEF
typedef QVector< QVector<int> > Tmatrix;
Tmatrix matrix;
matrix.resize( n + 1 );
// Size the vectors in the 2.nd dimension. Unfortunately C++ doesn't
// allow for allocation on declaration of 2.nd dimension of vec of vec
for ( int i = 0; i <= n; i++ )
{
QVector<int> tmp;
tmp.resize( m + 1 );
matrix.insert( i, tmp );
}
// Step 2
for ( int i = 0; i <= n; i++ )
matrix[i][0] = i;
for ( int j = 0; j <= m; j++ )
matrix[0][j] = j;
// Step 3
for ( int i = 1; i <= n; i++ )
{
const QChar s_i = source[i - 1];
// Step 4
for ( int j = 1; j <= m; j++ )
{
const QChar t_j = target[j - 1];
// Step 5
int cost;
if ( s_i == t_j )
cost = 0;
else
cost = 1;
// Step 6
const int above = matrix[i - 1][j];
const int left = matrix[i][j - 1];
const int diag = matrix[i - 1][j - 1];
int cell = ( ( ( left + 1 ) > ( diag + cost ) ) ? diag + cost : left + 1 );
if ( above + 1 < cell )
cell = above + 1;
// Step 6A: Cover transposition, in addition to deletion,
// insertion and substitution. This step is taken from:
// Berghel, Hal ; Roach, David : "An Extension of Ukkonen's
// Enhanced Dynamic Programming ASM Algorithm"
// (http://www.acm.org/~hlb/publications/asm/asm.html)
if ( i > 2 && j > 2 )
{
int trans = matrix[i - 2][j - 2] + 1;
if ( source[ i - 2 ] != t_j ) trans++;
if ( s_i != target[ j - 2 ] ) trans++;
if ( cell > trans ) cell = trans;
}
matrix[i][j] = cell;
}
}
// Step 7
return matrix[n][m];
}

View File

@ -75,14 +75,15 @@ public:
QString fullTextQuery() const { return m_fullTextQuery; }
bool isFullTextQuery() const { return !m_fullTextQuery.isEmpty(); }
bool resolvingFinished() const { return m_resolveFinished; }
float howSimilar( const Tomahawk::result_ptr& r );
QPair< Tomahawk::source_ptr, unsigned int > playedBy() const { return m_playedBy; }
Tomahawk::Resolver* currentResolver() const;
QList< QWeakPointer< Tomahawk::Resolver > > resolvedBy() const { return m_resolvers; }
void setArtist( const QString& artist ) { m_artist = artist; }
void setAlbum( const QString& album ) { m_album = album; }
void setTrack( const QString& track ) { m_track = track; }
void setArtist( const QString& artist ) { m_artist = artist; updateSortNames(); }
void setAlbum( const QString& album ) { m_album = album; updateSortNames(); }
void setTrack( const QString& track ) { m_track = track; updateSortNames(); }
void setResultHint( const QString& resultHint ) { m_resultHint = resultHint; }
void setDuration( int duration ) { m_duration = duration; }
@ -122,17 +123,25 @@ private slots:
void refreshResults();
private:
void setCurrentResolver( Tomahawk::Resolver* resolver );
void init();
void setCurrentResolver( Tomahawk::Resolver* resolver );
void clearResults();
void checkResults();
void updateSortNames();
static int levenshtein( const QString& source, const QString& target );
QList< Tomahawk::result_ptr > m_results;
bool m_solved;
bool m_playable;
bool m_resolveFinished;
mutable QID m_qid;
QString m_artistSortname;
QString m_albumSortname;
QString m_trackSortname;
QString m_artist;
QString m_album;
QString m_track;

View File

@ -324,7 +324,6 @@ QtScriptResolver::parseResultVariantList( const QVariantList& reslist )
rp->setBitrate( m.value( "bitrate" ).toUInt() );
rp->setUrl( m.value( "url" ).toString() );
rp->setSize( m.value( "size" ).toUInt() );
rp->setScore( m.value( "score" ).toFloat() * ( (float)weight() / 100.0 ) );
rp->setRID( uuid() );
rp->setFriendlySource( name() );

View File

@ -44,16 +44,16 @@ ScriptResolver::ScriptResolver( const QString& exe )
connect( &m_proc, SIGNAL( readyReadStandardOutput() ), SLOT( readStdout() ) );
connect( &m_proc, SIGNAL( finished( int, QProcess::ExitStatus ) ), SLOT( cmdExited( int, QProcess::ExitStatus ) ) );
QString pathToCheck = filePath();
QString runPath = filePath();
#ifdef WIN32
// have to enclose in quotes if path contains spaces on windows...
setFilePath( QString( "\"%1\"" ).arg( filePath() ) );
runPath = QString( "\"%1\"" ).arg( filePath() );
#endif
if( !QFile::exists( pathToCheck ) )
if ( !QFile::exists( filePath() ) )
m_error = Tomahawk::ExternalResolver::FileNotFound;
else
m_proc.start( filePath() );
m_proc.start( runPath );
if ( !TomahawkUtils::nam() )
return;
@ -69,7 +69,7 @@ ScriptResolver::~ScriptResolver()
Tomahawk::Pipeline::instance()->removeResolver( this );
if( !m_configWidget.isNull() )
if ( !m_configWidget.isNull() )
delete m_configWidget.data();
}
@ -77,11 +77,11 @@ ScriptResolver::~ScriptResolver()
void
ScriptResolver::sendConfig()
{
// Send a configutaion message with any information the resolver might need
// For now, only the proxy information is sent
QVariantMap m;
m.insert( "_msgtype", "config" );
TomahawkUtils::NetworkProxyFactory* factory = dynamic_cast<TomahawkUtils::NetworkProxyFactory*>( TomahawkUtils::nam()->proxyFactory() );
QNetworkProxy proxy = factory->proxy();
QString proxyType = ( proxy.type() == QNetworkProxy::Socks5Proxy ? "socks5" : "none" );
@ -90,11 +90,13 @@ ScriptResolver::sendConfig()
m.insert( "proxyport", proxy.port() );
m.insert( "proxyuser", proxy.user() );
m.insert( "proxypass", proxy.password() );
// QJson sucks
QVariantList hosts;
foreach ( const QString& host, factory->noProxyHosts() )
hosts << host;
m.insert( "noproxyhosts", hosts );
QByteArray data = m_serializer.serialize( m );
sendMsg( data );
}
@ -103,7 +105,7 @@ ScriptResolver::sendConfig()
void
ScriptResolver::reload()
{
if( !QFile::exists( filePath() ) )
if ( !QFile::exists( filePath() ) )
m_error = Tomahawk::ExternalResolver::FileNotFound;
else
{
@ -132,29 +134,28 @@ ScriptResolver::error() const
void
ScriptResolver::readStdout()
{
// qDebug() << Q_FUNC_INFO << m_proc.bytesAvailable();
if( m_msgsize == 0 )
if ( m_msgsize == 0 )
{
if( m_proc.bytesAvailable() < 4 ) return;
if ( m_proc.bytesAvailable() < 4 )
return;
quint32 len_nbo;
m_proc.read( (char*) &len_nbo, 4 );
m_msgsize = qFromBigEndian( len_nbo );
// qDebug() << Q_FUNC_INFO << "msgsize" << m_msgsize;
}
if( m_msgsize > 0 )
if ( m_msgsize > 0 )
{
m_msg.append( m_proc.read( m_msgsize - m_msg.length() ) );
}
if( m_msgsize == (quint32) m_msg.length() )
if ( m_msgsize == (quint32) m_msg.length() )
{
handleMsg( m_msg );
m_msgsize = 0;
m_msg.clear();
if( m_proc.bytesAvailable() )
if ( m_proc.bytesAvailable() )
QTimer::singleShot( 0, this, SLOT( readStdout() ) );
}
}
@ -164,7 +165,7 @@ void
ScriptResolver::sendMsg( const QByteArray& msg )
{
// qDebug() << Q_FUNC_INFO << m_ready << msg << msg.length();
if( !m_proc.isOpen() )
if ( !m_proc.isOpen() )
return;
quint32 len;
@ -181,7 +182,7 @@ ScriptResolver::handleMsg( const QByteArray& msg )
bool ok;
QVariant v = m_parser.parse( msg, &ok );
if( !ok || v.type() != QVariant::Map )
if ( !ok || v.type() != QVariant::Map )
{
Q_ASSERT(false);
return;
@ -189,18 +190,18 @@ ScriptResolver::handleMsg( const QByteArray& msg )
QVariantMap m = v.toMap();
QString msgtype = m.value( "_msgtype" ).toString();
if( msgtype == "settings" )
if ( msgtype == "settings" )
{
doSetup( m );
return;
}
else if( msgtype == "confwidget" )
else if ( msgtype == "confwidget" )
{
setupConfWidget( m );
return;
}
if( msgtype == "results" )
if ( msgtype == "results" )
{
const QString qid = m.value( "qid" ).toString();
QList< Tomahawk::result_ptr > results;
@ -220,7 +221,6 @@ ScriptResolver::handleMsg( const QByteArray& msg )
rp->setBitrate( m.value( "bitrate" ).toUInt() );
rp->setUrl( m.value( "url" ).toString() );
rp->setSize( m.value( "size" ).toUInt() );
rp->setScore( m.value( "score" ).toFloat() * ( (float)weight() / 100.0 ) );
rp->setRID( uuid() );
rp->setFriendlySource( m_name );
@ -252,7 +252,7 @@ ScriptResolver::cmdExited( int code, QProcess::ExitStatus status )
tLog() << Q_FUNC_INFO << "SCRIPT EXITED, code" << code << "status" << status << filePath();
Tomahawk::Pipeline::instance()->removeResolver( this );
if( m_stopped )
if ( m_stopped )
{
tLog() << "*** Script resolver stopped ";
emit finished();
@ -260,7 +260,7 @@ ScriptResolver::cmdExited( int code, QProcess::ExitStatus status )
return;
}
if( m_num_restarts < 10 )
if ( m_num_restarts < 10 )
{
m_num_restarts++;
tLog() << "*** Restart num" << m_num_restarts;
@ -326,7 +326,7 @@ ScriptResolver::setupConfWidget( const QVariantMap& m )
else
uiData = QByteArray::fromBase64( uiData );
if( m.contains( "images" ) )
if ( m.contains( "images" ) )
uiData = fixDataImagePaths( uiData, compressed, m[ "images" ].toMap() );
m_configWidget = QWeakPointer< QWidget >( widgetFromData( uiData, 0 ) );
@ -344,7 +344,7 @@ ScriptResolver::saveConfig()
QVariant widgets = configMsgFromWidget( m_configWidget.data() );
m.insert( "widgets", widgets );
QByteArray data = m_serializer.serialize( m );
// qDebug() << "Got widgets and data;" << widgets << data;
sendMsg( data );
}
@ -352,7 +352,7 @@ ScriptResolver::saveConfig()
QWidget*
ScriptResolver::configUI() const
{
if( m_configWidget.isNull() )
if ( m_configWidget.isNull() )
return 0;
else
return m_configWidget.data();
@ -363,6 +363,5 @@ void
ScriptResolver::stop()
{
m_stopped = true;
// qDebug() << "KILLING PROCESS!";
m_proc.kill();
}