Recently I had a conversation with a friend about how I was sick of hearing the same songs all the time on a local radio station (yeah I internet radio) and I decided I wanted to see just how many songs are actually played during a day. After a quick write and a few hours fixing a few small bugs I finally have a fully functioning version to use.
<?php define('PLAYLIST_URL', 'http://www.98rock.com/iplaylist/playlist.html?last10=1'); define('PLAYLIST_ROW', 'tr[class^=playlist_row2]'); define('PLAYLIST_ARTIST', 'td[class^=playlist_artist]'); define('DEFAULT_SEPERATOR', '-'); define('DATABASE_HOST', '127.0.0.1'); define('DATABASE_USERNAME', ''); define('DATABASE_PASSWORD', ''); define('DATABASE_NAME', ''); define('TABLE_PREFIX', ''); define('TABLE_ARTIST', 'artist'); define('TABLE_SONG', 'song'); define('TABLE_PLAYLOG', 'playlog'); include_once('simple_html_dom.php'); function scraping_98rock() { $process = curl_init(PLAYLIST_URL); curl_setopt($process, CURLOPT_HEADER, 0); curl_setopt($process, CURLOPT_RETURNTRANSFER, 1); curl_setopt($process, CURLOPT_CONNECTTIMEOUT, 1); $resp = curl_exec($process); curl_close($process); $html = str_get_html($resp); if ($html == null) { die("Could not access the playlist!"); } foreach ($html->find(PLAYLIST_ROW) as $song) { $data = explode(DEFAULT_SEPERATOR, trim($song->find(PLAYLIST_ARTIST, 0)->plaintext)); $item['artist'] = $data[0]; $item['song'] = $data[1]; $ret[] = $item; } // clean up memory $html->clear(); unset($html); return $ret; } $ret = scraping_98rock(); $link = mysql_connect(DATABASE_HOST, DATABASE_USERNAME, DATABASE_PASSWORD); if (!$link) { die('Could not connect: ' . mysql_error()); } mysql_select_db(DATABASE_NAME); $songs = array(); for($i =9; $i >-1; $i--) { $rank++; $result = mysql_query("SELECT * FROM " . TABLE_PREFIX . TABLE_ARTIST . " WHERE name = '" . mysql_real_escape_string($ret[$i]['artist']) . "'"); if (!$result) { die('Could not query:' . mysql_error()); } $artist = mysql_fetch_row($result); if (!$artist) { mysql_query("INSERT into " . TABLE_PREFIX . TABLE_ARTIST . " (name) VALUES ('" . mysql_real_escape_string($ret[$i]['artist']) . "')"); echo "artist inserted: " . $ret[$i]['artist'] . "\n"; } if (!$artist) { $result = mysql_query("SELECT * FROM " . TABLE_PREFIX . TABLE_ARTIST . " WHERE name = '" . mysql_real_escape_string($ret[$i]['artist']) . "'"); if (!$result) { die('Could not query:' . mysql_error()); } $artist = mysql_fetch_row($result); } $result = mysql_query("SELECT * FROM ".TABLE_PREFIX.TABLE_SONG." WHERE name = '" . mysql_real_escape_string($ret[$i]['song']) . "'"); if (!$result) { die('Could not query:' . mysql_error()); } $song = mysql_fetch_row($result); if (!$song) { mysql_query("INSERT into ".TABLE_PREFIX.TABLE_SONG." (name,artist_id) VALUES ('" . mysql_real_escape_string($ret[$i]['song']) . "','" . mysql_real_escape_string($artist[0]) . "')"); echo "song inserted: " . $ret[$i]['song'] . "\n"; } if (!$song) { $result = mysql_query("SELECT * FROM ".TABLE_PREFIX.TABLE_SONG." WHERE name = '" . mysql_real_escape_string($ret[$i]['song']) . "'"); if (!$result) { die('Could not query:' . mysql_error()); } $song = mysql_fetch_row($result); } if (!$song || !$artist) { echo "DANGER $artist - $song \n"; continue; } array_push($songs, array("song" => $song[0], "artist" => $artist[0])); } $result = mysql_query("SELECT * FROM ".TABLE_PREFIX.TABLE_PLAYLOG." ORDER BY id DESC LIMIT 10"); if (!$result) { die('Could not query:' . mysql_error()); } while ($last = mysql_fetch_array($result)) { for ($i = 0; $i < 10; $i++) { if ($songs[$i]['song'] == $last['song_id'] && $songs[$i]['artist'] == $last['artist_id']) { unset($songs[$i]); continue; } } } for ($i = 0; $i < 10; $i++) { if (isset($songs[$i])) { $result = mysql_query("INSERT into ".TABLE_PREFIX.TABLE_PLAYLOG." (song_id,artist_id) VALUES ('" . $songs[$i]['song'] . "','" . $songs[$i]['artist'] . "')"); if (!$result) { die('Could not query:' . mysql_error()); } echo "Inserted: ".$s['song']." into playlog"; } } mysql_close($link); ?>
I will be leaving this running from today at 15 minute intervals I will update results later in the week.
