はてブのお気に入り情報に基づくRank計算
まだ8317ユーザ分のデータしか無いのですが、とりあえず計算してみた。
べき乗の繰り返し回数が200回、減衰係数が0.85で計算したよ。
この辺を参照:
http://www.kusastro.kyoto-u.ac.jp/~baba/wais/pagerank.html
http://www.sem-r.com/sem/google/20030909005531.html
まず単純にお気に入られ数(データが十分でないので暫定数ですが。)で順位付け
| 順位 | hatenaid | お気に入られ数 |
|---|---|---|
| 1 | xxxxx | 619 |
| 2 | xxxxxx | 425 |
| 3 | xxxxxxxxxxx | 393 |
| 4 | xxxxxx | 280 |
| 5 | xxxxxxxxx | 235 |
| 6 | xxxxxxxx | 199 |
| 7 | xxxxx | 193 |
| 8 | xxxxxx | 175 |
| 9 | xxxxxxxxx2 | 130 |
| 10 | xxxxxxxx | 124 |
| 11 | xxxxxxxxxxx | 120 |
| 12 | xxxxxxxxx | 120 |
| 13 | xxxxxxx | 116 |
| 14 | xxxxxxx | 112 |
| 15 | xxxx | 109 |
| 16 | xxxxxxxxxx | 108 |
| 17 | xxxxxxx | 106 |
| 18 | xxxxxxxx | 103 |
| 19 | xxxxxxx6 | 101 |
| 20 | xxxxxx | 99 |
そしてHatebu Favorite Rankで順位付け
| 順位 | hatenaid | Rank |
|---|---|---|
| 1 | xxxxx | 41.7872 |
| 2 | xxxxxxxxxxx | 27.6095 |
| 3 | xxxxxx | 20.6521 |
| 4 | xxxxxxxx | 18.8549 |
| 5 | xxxxxx | 12.2271 |
| 6 | xxxxxxxxx | 11.0448 |
| 7 | xxxxx | 7.37067 |
| 8 | xxxxxx | 7.28443 |
| 9 | xxxxxxx | 6.92857 |
| 10 | xxxxxx | 6.23682 |
| 11 | xxxxxxxx | 6.17176 |
| 12 | xxxxxxxx | 6.12786 |
| 13 | xxxxxxxx | 6.11624 |
| 14 | xxxxxxxx | 5.6269 |
| 15 | xxxx | 5.20906 |
| 16 | xxxxxxx | 5.10285 |
| 17 | xxxxxxxx | 5.05067 |
| 18 | xxxxxxxxx2 | 4.76219 |
| 19 | xxxxxxx | 4.70626 |
| 20 | xxx | 4.65832 |
なにか面白みの無い結果に。
xxxxxxよりxxxxxxxxxxxの方がアクティブな人に気に入られているってことでしょうか。
集計部分のソースコード
超汚いがこんなですよ。
#import <Foundation/Foundation.h>
#import <SimpleDataAccess/SimpleDataAccess.h>
#define CONFIGURATION_FILENAME @"Config"
#define CONFIGURATION_EXT @"plist"
typedef struct _Link {
int destId;
int srcId;
int countOfLinksFromSrc;
} Link;
typedef struct _RankBuffer {
float rank[2];
} RankBuffer;
#define POWER_COUNT 200
#define DAMPING_FACTOR 0.85
#define LAST(buf, idx) (buf[1-(idx%2)])
#define CURRENT(buf, idx) (buf[(idx%2)])
int main (int argc, const char * argv[]) {
NSAutoreleasePool * pool = [[NSAutoreleasePool alloc] init];
// Configuration
NSDictionary *configuration = nil;
NSString *configurationPath = [[NSBundle mainBundle] pathForResource:CONFIGURATION_FILENAME ofType:CONFIGURATION_EXT];
if (configurationPath) {
configuration = [NSDictionary dictionaryWithContentsOfFile:configurationPath];
NSLog(@"configuration = %@", configuration);
}
NSDictionary *connectionDictionary = [configuration objectForKey:@"connectionDictionary"];
if (!connectionDictionary) {
NSLog(@"connectionDictionary not specified.");
return -1;
}
// データベースに簡単アクセスオブジェクトを作成
SimpleDataAccess *dataAccess = [SimpleDataAccess simpleDataAccessWithConnectionDictionary:connectionDictionary];
NSAutoreleasePool * innerpool = [[NSAutoreleasePool alloc] init];
// ユーザIDの最大値を取得
NSString *maxSQL = @"select max(record_id) as max_user_id from hatebu_user";
NSArray *results = [dataAccess executeQueryWithSQL:maxSQL];
int maxUserId = [[[results lastObject] objectForKey:@"max_user_id"] intValue];
NSLog(@"maxUserId = %d", maxUserId);
if (!maxUserId) {
NSLog(@"No users found.");
return -1;
}
// リンク(お気に入り)情報を取得。ついでにリンク元からのリンク数も取得(後で使う)
NSString *linksSQL = @"select f.src_id, f.dest_id, s.favoring_count from hatebu_favorite f, hatebu_statistics s ?
where f.src_id = s.user_id and s.favoring_count != 0?
order by f.src_id";
results = [dataAccess executeQueryWithSQL:linksSQL];
//NSLog(@"results = %@", results);
// リンクリストがオブジェクトだとオーバーヘッドが心配なので配列化
int countOfLinks = [results count];
Link *links = (Link *)malloc(countOfLinks * sizeof(Link));
NSDictionary *aLink = nil;
int index;
for (index = 0; index < countOfLinks; index++) {
aLink = [results objectAtIndex:index];
links[index].destId = [[aLink objectForKey:@"dest_id"] intValue];
links[index].srcId = [[aLink objectForKey:@"src_id"] intValue];
links[index].countOfLinksFromSrc = [[aLink objectForKey:@"favoring_count"] intValue];
}
// メモリをスリムに
[innerpool release];
// 会員全員分の配列を作成
int countOfUsers = (maxUserId + 1);
int sizeofBuffer = countOfUsers * sizeof(RankBuffer);
RankBuffer *ranks = (RankBuffer *)malloc(sizeofBuffer);
bzero(ranks,sizeofBuffer);
// べき乗法?でランクを求める
// PR( A ) = ( 1-d ) +d ( PR ( t1 ) / C ( t1 ) + … + PR ( tn ) / C ( tn ) )
int counter = 0;
int indexOfUsers;
int indexOfLinks;
for (counter = 0; counter < POWER_COUNT; counter++) {
// まず、( 1-d )の部分
for (indexOfUsers = 0; indexOfUsers < countOfUsers; indexOfUsers++) {
CURRENT(ranks[indexOfUsers].rank, counter) = 1.0 - DAMPING_FACTOR;
}
//残りの部分。会員ベースではなくリンクベースでループ
for (indexOfLinks = 0; indexOfLinks < countOfLinks; indexOfLinks ++) {
Link link = links[indexOfLinks];
//#define link links[indexOfLinks]
CURRENT(ranks[link.destId].rank, counter) +=
DAMPING_FACTOR * LAST(ranks[link.srcId].rank, counter)/link.countOfLinksFromSrc;
}
}
free(links);
//DBに投入
NSString *updateRankSQL = @"update hatebu_statistics set favorite_rank=$1 where user_id=$2";
//全部を1トランザクションで更新するよ
[dataAccess beginTransaction];
for (indexOfUsers = 0; indexOfUsers < countOfUsers; indexOfUsers++) {
//NSLog(@"%d, %f",indexOfUsers, LAST(ranks[indexOfUsers].rank, counter));
NSArray *params = [NSArray arrayWithObjects:
[NSNumber numberWithFloat:LAST(ranks[indexOfUsers].rank, counter)],
[NSNumber numberWithInt:indexOfUsers],
nil];
[dataAccess executeUpdateWithSQL:updateRankSQL parameters:params];
}
[dataAccess commitTransaction];
//更新終わり
free(ranks);
[pool release];
return 0;
}