<?php
/*
* Author: David Baker
* Email: dtbaker@gmail.com
* MySpace Spider Class
* - Supports logging into MySpace, collecting friend lists, pictures, comments, etc..
*/
class myspace{
var $ch;
var $data;
var $comment_data;
var $user_id;
var $loggedin_status;
function myspace($user_id=false){
$this->loggedin_status = false;
$this->ch = curl_init();
curl_setopt($this->ch, CURLOPT_HEADER, 0);
curl_setopt($this->ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($this->ch, CURLOPT_COOKIEJAR, "/tmp/myspace_cookies");
curl_setopt($this->ch, CURLOPT_COOKIEFILE, "/tmp/myspace_cookies");
curl_setopt($this->ch, CURLOPT_USERAGENT, "Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)");
if($user_id){
$this->process($user_id);
}
}
function process($user_id){
//echo "processing $user_id";
if($user_id===((int)$user_id)){
echo (int)$user_id;
$this->user_id = $user_id;
}else{
// find their id cos they're too lazy to do it for me
// process a url like http://myspace.com/dtbaker
$url = "http://myspace.com/".$user_id;
curl_setopt($this->ch, CURLOPT_URL, $url);
$data = curl_exec($this->ch);
preg_match('/friendID=(\d+)">Pics<\//',$data,$matches);
$this->user_id = $matches[1];
}
$this->load_data();
}
private function load_data(){
$url = "http://profile.myspace.com/index.cfm?fuseaction=user.viewprofile&friendid=".$this->user_id;
curl_setopt($this->ch, CURLOPT_URL, $url);
$this->data = curl_exec($this->ch);
}
function get_details($user_id=false,$return_html=false){
if($user_id){
$this->process($user_id);
}
$details = array();
$details_temp = array();
if($return_html)
$details['raw_html'] = $this->data;
$is_music = false;
if(preg_match('/#musicnav/',$this->data)){
$is_music = true;
}
preg_match('/span class="nametext">([^<]+)<\/span>/',$this->data,$matches);
$details['name'] = $matches[1];
$text = explode("\n",$this->data);
$collect = false;
$collect_music = false;
foreach($text as $t){
if($collect_music){
if(preg_match('/<\/font>/',$t)){ $collect = true; }
}
if($collect){
if(preg_match('/<\/td>/',$t)){ $collect = false; break; }
$tt = trim($t);
$details_temp[] = $tt;
}
if($is_music && preg_match('/<td width="193" height="75" bgcolor="#ffffff" class="text">/',$t)){
$collect_music=true;
}else if(preg_match('/<td class="text" width="193" bgcolor="#ffffff" height="75" align="left">(.*)/',$t,$matches)){
//$details['name'] = str_replace("<br>","",$matches[1]);
$collect = true;
}
}
if($is_music){
$details['location'] = $details_temp[3];
$details['country'] = $details_temp[5];
$details['profile_views'] = $details_temp[9];
$details['last_login'] = $details_temp[18];
$details['profile_type'] = 'music';
}else{
$details['location'] = $details_temp[5];
$details['country'] = $details_temp[7];
preg_match('/(\d+)/', $details_temp[3],$matches);
$details['age'] = $matches[1];
$details['gender'] = strtolower($details_temp[1][0]);
$details['last_login'] = $details_temp[17];
$details['profile_type'] = 'user';
}
return $details;
}
// this only returns the most recent 50 comments.
// copy some of the friends code above if you want to iterate through the list of comment pages
function get_comments($user_id){
// grab their comments:
if(!$this->is_loggedin()){
echo "You need to login to view comments";
exit;
}
$url = "http://comment.myspace.com/index.cfm?fuseaction=user.viewComments&friendID=".$user_id;
//$url = "http://profile.myspace.com/index.cfm?fuseaction=user.viewComments&friendID=10906042";
curl_setopt($this->ch, CURLOPT_URL, $url);
$this->comment_data = curl_exec($this->ch);
preg_match_all('/<tr id="profile_comments_\d+">(.*)<\/tr>/msU',$this->comment_data,$matches);
$comments = array();
foreach($matches[1] as $match){
//echo $match;exit;
// comment name:
preg_match('/friendid=(\d+)">([^<]+)<\/a>/',$match,$f);
//print_r($f);
$comment_user_id = $f[1];
$comment_name = $f[2];
// time
preg_match('/<h4>([^<]+)<\/h4>/',$match,$f);
$comment_date_time = $f[1];
// hmm they seem to end every comment with </textarea> .. oh well we'll use that
preg_match('/<\/h4>(.*)<\/textarea>/msU',$match,$f);
$comment_text = trim($f[1]);
$comments [] = array(
"user_id" => $comment_user_id,
"name" => $comment_name,
"date_time" => $comment_date_time,
"comment" => $comment_text,
);
}
return $comments;
}
function get_friends($user_id){
$url = "http://friends.myspace.com/index.cfm?fuseaction=user.viewfriends&friendID=$user_id";
curl_setopt($this->ch, CURLOPT_URL, $url);
$frienddata = curl_exec($this->ch);
//echo $frienddata ;
// find out how mange pages
$page_count = 1;
while(true){
if(!preg_match('/<a class="pagingLink"[^>]+>'.($page_count+1).'</',$frienddata)){
break;
}
$page_count++;
}
$friends=array();
$friends['page_count'] = $page_count;
$friend_id = 0;
for($x=1;$x<=$page_count;$x++){
//echo "Collecting page number $x: <br>\n";
preg_match_all('/<a href="http:\/\/profile.myspace.com\/index.cfm\?fuseaction=user.viewprofile&friendid=([^"]+)"><img src="([^"]+)"/',$frienddata,$matches);
//echo "Friend ID's: ";
foreach($matches[1] as $id => $user_id){
//echo "$user_id ";
$friends['friends'][] = $user_id;
$friends['friend_images'][] = $matches[2][$id];
}
if($x<$page_count){
// get curretn viewstate
preg_match('/ id="__VIEWSTATE" value="([^"]+)"/',$frienddata,$matches);
$viewstate = $matches[1];
$post = array(
"__VIEWSTATE" => $viewstate,
"__EVENTTARGET" => 'ctl00$cpMain$pagerTop',
"__EVENTARGUMENT" => $x+1,
);
curl_setopt($this->ch, CURLOPT_POST, true);
curl_setopt($this->ch, CURLOPT_POSTFIELDS, $post);
$frienddata = curl_exec($this->ch);
}
}
//$friends['friends'] = array_unique($friends['friends']);
$friends['friend_count'] = count($friends['friends']);
return $friends;
}
function login($email,$password){
// we hit up the home page for a cookie and a viewstate
$url = "http://www.myspace.com/";
curl_setopt($this->ch, CURLOPT_URL, $url);
$home_page = curl_exec($this->ch);
preg_match('/ id="__VIEWSTATE" value="([^"]+)"/',$home_page,$matches);
$viewstate = $matches[1];
$url = "http://secure.myspace.com/index.cfm?fuseaction=login.process";
curl_setopt($this->ch, CURLOPT_URL, $url);
$post = array(
"__VIEWSTATE" => $viewstate,
'ctl00$Main$SplashDisplay$ctl00$Email_Textbox' => $email,
'ctl00$Main$SplashDisplay$ctl00$Password_Textbox' => $password,
);
curl_setopt($this->ch, CURLOPT_POST, true);
curl_setopt($this->ch, CURLOPT_POSTFIELDS, $post);
$foo = curl_exec($this->ch);
if(preg_match('/fuseaction/',$foo)){
$this->loggedin_status=true;
return true;
}else{
// login failed :( bow bow.
return false;
}
}
function is_loggedin(){
return $this->loggedin_status;
}
}
?>