はてブのお気に入り情報に基づくRank計算
まだ8317ユーザ分のデータしか無いのですが、とりあえず計算してみた。
べき乗の繰り返し回数が200回、減衰係数が0.85で計算したよ。
この辺を参照:
http://www.kusastro.kyoto-u.ac.jp/~baba/wais/pagerank.html
http://www.sem-r.com/sem/google/20030909005531.html
まず単純にお気に入られ数(データが十分でないので暫定数ですが。)で順位付け
順位 | hatenaid | お気に入られ数 |
---|---|---|
1 | xxxxx | 619 |
2 | xxxxxx | 425 |
3 | xxxxxxxxxxx | 393 |
4 | xxxxxx | 280 |
5 | xxxxxxxxx | 235 |
6 | xxxxxxxx | 199 |
7 | xxxxx | 193 |
8 | xxxxxx | 175 |
9 | xxxxxxxxx2 | 130 |
10 | xxxxxxxx | 124 |
11 | xxxxxxxxxxx | 120 |
12 | xxxxxxxxx | 120 |
13 | xxxxxxx | 116 |
14 | xxxxxxx | 112 |
15 | xxxx | 109 |
16 | xxxxxxxxxx | 108 |
17 | xxxxxxx | 106 |
18 | xxxxxxxx | 103 |
19 | xxxxxxx6 | 101 |
20 | xxxxxx | 99 |
そしてHatebu Favorite Rankで順位付け
順位 | hatenaid | Rank |
---|---|---|
1 | xxxxx | 41.7872 |
2 | xxxxxxxxxxx | 27.6095 |
3 | xxxxxx | 20.6521 |
4 | xxxxxxxx | 18.8549 |
5 | xxxxxx | 12.2271 |
6 | xxxxxxxxx | 11.0448 |
7 | xxxxx | 7.37067 |
8 | xxxxxx | 7.28443 |
9 | xxxxxxx | 6.92857 |
10 | xxxxxx | 6.23682 |
11 | xxxxxxxx | 6.17176 |
12 | xxxxxxxx | 6.12786 |
13 | xxxxxxxx | 6.11624 |
14 | xxxxxxxx | 5.6269 |
15 | xxxx | 5.20906 |
16 | xxxxxxx | 5.10285 |
17 | xxxxxxxx | 5.05067 |
18 | xxxxxxxxx2 | 4.76219 |
19 | xxxxxxx | 4.70626 |
20 | xxx | 4.65832 |
なにか面白みの無い結果に。
xxxxxxよりxxxxxxxxxxxの方がアクティブな人に気に入られているってことでしょうか。
集計部分のソースコード
超汚いがこんなですよ。
#import <Foundation/Foundation.h> #import <SimpleDataAccess/SimpleDataAccess.h> #define CONFIGURATION_FILENAME @"Config" #define CONFIGURATION_EXT @"plist" typedef struct _Link { int destId; int srcId; int countOfLinksFromSrc; } Link; typedef struct _RankBuffer { float rank[2]; } RankBuffer; #define POWER_COUNT 200 #define DAMPING_FACTOR 0.85 #define LAST(buf, idx) (buf[1-(idx%2)]) #define CURRENT(buf, idx) (buf[(idx%2)]) int main (int argc, const char * argv[]) { NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init]; // Configuration NSDictionary *configuration = nil; NSString *configurationPath = [[NSBundle mainBundle] pathForResource:CONFIGURATION_FILENAME ofType:CONFIGURATION_EXT]; if (configurationPath) { configuration = [NSDictionary dictionaryWithContentsOfFile:configurationPath]; NSLog(@"configuration = %@", configuration); } NSDictionary *connectionDictionary = [configuration objectForKey:@"connectionDictionary"]; if (!connectionDictionary) { NSLog(@"connectionDictionary not specified."); return -1; } // データベースに簡単アクセスオブジェクトを作成 SimpleDataAccess *dataAccess = [SimpleDataAccess simpleDataAccessWithConnectionDictionary:connectionDictionary]; NSAutoreleasePool * innerpool = [[NSAutoreleasePool alloc] init]; // ユーザIDの最大値を取得 NSString *maxSQL = @"select max(record_id) as max_user_id from hatebu_user"; NSArray *results = [dataAccess executeQueryWithSQL:maxSQL]; int maxUserId = [[[results lastObject] objectForKey:@"max_user_id"] intValue]; NSLog(@"maxUserId = %d", maxUserId); if (!maxUserId) { NSLog(@"No users found."); return -1; } // リンク(お気に入り)情報を取得。ついでにリンク元からのリンク数も取得(後で使う) NSString *linksSQL = @"select f.src_id, f.dest_id, s.favoring_count from hatebu_favorite f, hatebu_statistics s ? where f.src_id = s.user_id and s.favoring_count != 0? order by f.src_id"; results = [dataAccess executeQueryWithSQL:linksSQL]; //NSLog(@"results = %@", results); // リンクリストがオブジェクトだとオーバーヘッドが心配なので配列化 int countOfLinks = [results count]; Link *links = (Link *)malloc(countOfLinks * sizeof(Link)); NSDictionary *aLink = nil; int index; for (index = 0; index < countOfLinks; index++) { aLink = [results objectAtIndex:index]; links[index].destId = [[aLink objectForKey:@"dest_id"] intValue]; links[index].srcId = [[aLink objectForKey:@"src_id"] intValue]; links[index].countOfLinksFromSrc = [[aLink objectForKey:@"favoring_count"] intValue]; } // メモリをスリムに [innerpool release]; // 会員全員分の配列を作成 int countOfUsers = (maxUserId + 1); int sizeofBuffer = countOfUsers * sizeof(RankBuffer); RankBuffer *ranks = (RankBuffer *)malloc(sizeofBuffer); bzero(ranks,sizeofBuffer); // べき乗法?でランクを求める // PR( A ) = ( 1-d ) +d ( PR ( t1 ) / C ( t1 ) + … + PR ( tn ) / C ( tn ) ) int counter = 0; int indexOfUsers; int indexOfLinks; for (counter = 0; counter < POWER_COUNT; counter++) { // まず、( 1-d )の部分 for (indexOfUsers = 0; indexOfUsers < countOfUsers; indexOfUsers++) { CURRENT(ranks[indexOfUsers].rank, counter) = 1.0 - DAMPING_FACTOR; } //残りの部分。会員ベースではなくリンクベースでループ for (indexOfLinks = 0; indexOfLinks < countOfLinks; indexOfLinks ++) { Link link = links[indexOfLinks]; //#define link links[indexOfLinks] CURRENT(ranks[link.destId].rank, counter) += DAMPING_FACTOR * LAST(ranks[link.srcId].rank, counter)/link.countOfLinksFromSrc; } } free(links); //DBに投入 NSString *updateRankSQL = @"update hatebu_statistics set favorite_rank=$1 where user_id=$2"; //全部を1トランザクションで更新するよ [dataAccess beginTransaction]; for (indexOfUsers = 0; indexOfUsers < countOfUsers; indexOfUsers++) { //NSLog(@"%d, %f",indexOfUsers, LAST(ranks[indexOfUsers].rank, counter)); NSArray *params = [NSArray arrayWithObjects: [NSNumber numberWithFloat:LAST(ranks[indexOfUsers].rank, counter)], [NSNumber numberWithInt:indexOfUsers], nil]; [dataAccess executeUpdateWithSQL:updateRankSQL parameters:params]; } [dataAccess commitTransaction]; //更新終わり free(ranks); [pool release]; return 0; }