はてブのお気に入り情報に基づくRank計算

まだ8317ユーザ分のデータしか無いのですが、とりあえず計算してみた。
べき乗の繰り返し回数が200回、減衰係数が0.85で計算したよ。
この辺を参照:

http://www.kusastro.kyoto-u.ac.jp/~baba/wais/pagerank.html
http://www.sem-r.com/sem/google/20030909005531.html

まず単純にお気に入られ数(データが十分でないので暫定数ですが。)で順位付け

順位 hatenaid お気に入られ
1 xxxxx 619
2 xxxxxx 425
3 xxxxxxxxxxx 393
4 xxxxxx 280
5 xxxxxxxxx 235
6 xxxxxxxx 199
7 xxxxx 193
8 xxxxxx 175
9 xxxxxxxxx2 130
10 xxxxxxxx 124
11 xxxxxxxxxxx 120
12 xxxxxxxxx 120
13 xxxxxxx 116
14 xxxxxxx 112
15 xxxx 109
16 xxxxxxxxxx 108
17 xxxxxxx 106
18 xxxxxxxx 103
19 xxxxxxx6 101
20 xxxxxx 99

そしてHatebu Favorite Rankで順位付け

順位 hatenaid Rank
1 xxxxx 41.7872
2 xxxxxxxxxxx 27.6095
3 xxxxxx 20.6521
4 xxxxxxxx 18.8549
5 xxxxxx 12.2271
6 xxxxxxxxx 11.0448
7 xxxxx 7.37067
8 xxxxxx 7.28443
9 xxxxxxx 6.92857
10 xxxxxx 6.23682
11 xxxxxxxx 6.17176
12 xxxxxxxx 6.12786
13 xxxxxxxx 6.11624
14 xxxxxxxx 5.6269
15 xxxx 5.20906
16 xxxxxxx 5.10285
17 xxxxxxxx 5.05067
18 xxxxxxxxx2 4.76219
19 xxxxxxx 4.70626
20 xxx 4.65832

なにか面白みの無い結果に。
xxxxxxよりxxxxxxxxxxxの方がアクティブな人に気に入られているってことでしょうか。

集計部分のソースコード

超汚いがこんなですよ。

#import <Foundation/Foundation.h>
#import <SimpleDataAccess/SimpleDataAccess.h>
#define CONFIGURATION_FILENAME @"Config"
#define CONFIGURATION_EXT @"plist"


typedef struct _Link {
    int destId;
    int srcId;
    int countOfLinksFromSrc;
} Link;

typedef struct _RankBuffer {
    float rank[2];
} RankBuffer;


#define POWER_COUNT 200
#define DAMPING_FACTOR 0.85
#define LAST(buf, idx) (buf[1-(idx%2)])
#define CURRENT(buf, idx) (buf[(idx%2)])

int main (int argc, const char * argv[]) {
    NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];

    // Configuration
    NSDictionary *configuration = nil;
    NSString *configurationPath = [[NSBundle mainBundle] pathForResource:CONFIGURATION_FILENAME ofType:CONFIGURATION_EXT];
    if (configurationPath) {
        configuration = [NSDictionary dictionaryWithContentsOfFile:configurationPath];
        NSLog(@"configuration = %@", configuration);
    }
    NSDictionary *connectionDictionary = [configuration objectForKey:@"connectionDictionary"];
    if (!connectionDictionary) {
        NSLog(@"connectionDictionary not specified.");
        return -1;
    }
    
    // データベースに簡単アクセスオブジェクトを作成
    SimpleDataAccess *dataAccess = [SimpleDataAccess simpleDataAccessWithConnectionDictionary:connectionDictionary];
    
    NSAutoreleasePool * innerpool = [[NSAutoreleasePool alloc] init];
    
    // ユーザIDの最大値を取得
    NSString *maxSQL = @"select max(record_id) as max_user_id from hatebu_user";
    NSArray *results = [dataAccess executeQueryWithSQL:maxSQL];
    int maxUserId = [[[results lastObject] objectForKey:@"max_user_id"] intValue];
    
    NSLog(@"maxUserId = %d", maxUserId);
    
    if (!maxUserId) {
        NSLog(@"No users found.");
        return -1;
    }
    
    // リンク(お気に入り)情報を取得。ついでにリンク元からのリンク数も取得(後で使う)
    NSString *linksSQL = @"select f.src_id, f.dest_id, s.favoring_count from hatebu_favorite f, hatebu_statistics s ?
                            where f.src_id = s.user_id and s.favoring_count != 0?
                            order by  f.src_id";
    results = [dataAccess executeQueryWithSQL:linksSQL];
    //NSLog(@"results = %@", results);
    
    // リンクリストがオブジェクトだとオーバーヘッドが心配なので配列化
    int countOfLinks = [results count];
    Link *links = (Link *)malloc(countOfLinks * sizeof(Link));
    NSDictionary *aLink = nil;
    int index;
    for (index = 0; index < countOfLinks; index++) {
        aLink = [results objectAtIndex:index];
        links[index].destId = [[aLink objectForKey:@"dest_id"] intValue];
        links[index].srcId = [[aLink objectForKey:@"src_id"] intValue];
        links[index].countOfLinksFromSrc = [[aLink objectForKey:@"favoring_count"] intValue];
    }
    
    // メモリをスリムに
    [innerpool release];
    
    
    // 会員全員分の配列を作成
    int countOfUsers = (maxUserId + 1);
    int sizeofBuffer = countOfUsers * sizeof(RankBuffer);
    RankBuffer *ranks = (RankBuffer *)malloc(sizeofBuffer);
    bzero(ranks,sizeofBuffer);
    
    // べき乗法?でランクを求める
    // PR( A ) = ( 1-d ) +d ( PR ( t1 ) / C ( t1 ) + … + PR ( tn ) / C ( tn ) )
    int counter = 0;
    int indexOfUsers;
    int indexOfLinks;
    for (counter = 0; counter < POWER_COUNT; counter++) {
        // まず、( 1-d )の部分
        for (indexOfUsers = 0; indexOfUsers < countOfUsers; indexOfUsers++) {
            CURRENT(ranks[indexOfUsers].rank, counter) = 1.0 - DAMPING_FACTOR;
        }
        //残りの部分。会員ベースではなくリンクベースでループ
        for (indexOfLinks = 0; indexOfLinks < countOfLinks; indexOfLinks ++) {
            Link link = links[indexOfLinks];
            //#define link links[indexOfLinks]
            CURRENT(ranks[link.destId].rank, counter) += 
                DAMPING_FACTOR * LAST(ranks[link.srcId].rank, counter)/link.countOfLinksFromSrc;
        }
    }
    free(links);

    //DBに投入
    NSString *updateRankSQL = @"update hatebu_statistics set favorite_rank=$1 where user_id=$2";
    //全部を1トランザクションで更新するよ
    [dataAccess beginTransaction];
    for (indexOfUsers = 0; indexOfUsers < countOfUsers; indexOfUsers++) {
        //NSLog(@"%d, %f",indexOfUsers, LAST(ranks[indexOfUsers].rank, counter));
        NSArray *params = [NSArray arrayWithObjects:
            [NSNumber numberWithFloat:LAST(ranks[indexOfUsers].rank, counter)],
            [NSNumber numberWithInt:indexOfUsers],
            nil];
        [dataAccess executeUpdateWithSQL:updateRankSQL parameters:params];
    }
    [dataAccess commitTransaction];
    //更新終わり

    free(ranks);
    [pool release];
    return 0;
}