Autors: andrisp
Komentārs: http://apblog.lv
Skatīt bez rindiņu numerācijas
<?php /* * autors: andrisp * http://apblog.lv * apblog at apblog punkts lv */ $address = ''; // Å eit ievadam bloga adresi ar visu http:// $file_for_structure = 'structure.sql'; $file_for_articles = 'posts.sql'; $file_for_comments = 'comments.sql'; $debug_file = 'debug.txt'; $debug_mode = false; // Kad ieslÄ“gts, tad tiek logoti tikai HTTP pieprasÄ«jumi (iekÅ¡ $debug_file). // ApstrÄdes kļūdas utt netiek logotas $address .= '/'; } genereate_create_table_sql(); $html = request($address); $archive_links = get_archive_months($html); $article_id = 1; foreach($archive_links as $i) { $archive_page = request($i); $links_to_articles = parse_archive_page($archive_page); foreach($links_to_articles as $ii) { $article_page = request($ii); $article_data = parse_article_page($article_page); $file_for_articles, generate_article_insert_sql($article_id, $article_data) ); $file_for_comments, generate_comment_insert_sql($article_id, $article_data['comments']) ); $article_id++; } } function get_archive_months($html) { return false; } $sp = '/<div(?:(?:[\s\n\r\t]+[^>]+[\s\n\r\t]+)?|(?:[\s\n\r\t]+)?)?id=["\']ptb_archives["\'][^>]*>(.*?)<\/div>/ismu'; $sp2 = '/href="(.*?)"/ismu'; return $m[1]; } return false; } function parse_archive_page($html) { return false; } $sp = '/<h3>[^><]*?<a[^><]+?href="([^"]+?)"[^><]+?rel="bookmark"[^><]*?>(.+?)<\/a>[^><]*?<\/h3>/ismu'; return $article_links; } function parse_article_page($html) { return false; } $sp = '/<h3>[^><]*?<a[^><]*?>(.+?)<\/a>[^><]*?<\/h3>[^><]*?<div[^><]+?class="blogpost_body">(.+?)<\/div>[^><]*?<div[^><]+?class="blogpost_info"[^><]*?>.*?([0-9]{4}\.[0-9]{2}\.[0-9]{2}\.)\s+([0-9]{2}:[0-9]{2}).*?<\/div>/ismu'; $article_title = $a[1]; $tags = get_tags($a[2]); $article_text = remove_tags_block($a[2]); $article_date = $a[3]; $article_time = $a[4]; $sp = '/<div[^><]+?class="blogpostcomment(?:\sunregisteredUserComment){0,1}"[^><]*?>[^><]+?<div[^><]+?class="blogpostcomment_intro">[^><]*?<a[^><]*?>[^><]+?<\/a>[^><]*?<a[^><]+?href="(.+?)"[^><]*?>([^><]+?)<\/a>[^><]*?@[^><]*?([0-9]{4}\-[0-9]{2}\-[0-9]{2})[^><]*?([0-9]{2}:[0-9]{2}:[0-9]{2}).+?<\/div>[^><]*?<div[^><]*?>.+?<\/div>[^><]*?<p>(.+?)<\/p>[^><]*?<\/div>/ismu'; $comment_authors = $c[2]; $comment_texts = $c[5]; $comment_dates = $c[3]; $comment_times = $c[4]; $comment_urls = $c[1]; foreach ($comment_texts as $k => $v) { 'author' => $comment_authors[$k], 'text' => $v, 'date' => $comment_dates[$k], 'time' => $comment_times[$k], 'url' => $comment_urls[$k] ); } 'title' => $article_title, 'text' => $article_text, 'date' => $article_date, 'time' => $article_time, 'tags' => $tags ), 'comments' => $comments ); } function generate_article_insert_sql($id, $article_data) { global $file_for_articles; $datetime = $dparts[0].'-'.$dparts[1].'-'.$dparts[2].' ' . $article_data['article']['time'].':01'; $tags = ''; } $sql = " INSERT INTO `articles` ( `id`, `datetime`, `title`, `text`, `tags` ) VALUES ( ".(int)$id.", ); "; return $sql; } function generate_comment_insert_sql($id, $comments) { global $file_for_comments; $sql = ''; foreach($comments as $v) { $datetime = $v['date'].' '.$v['time']; $sql .= " INSERT INTO `comments` ( `article_id`, `datetime`, `author`, `text`, `homepage` ) VALUES ( ".(int)$id.", ); "."\n\n"; } return $sql; } function get_tags($html) { return false; } $sp = '/<a[^><]+?rel="tag"[^><]*?>(.+?)<\/a>/ismu'; return $c[1]; } return false; } function remove_tags_block($html) { return false; } $sp = '/<p[^><]+?class="blogpost_tags"[^><]*?>(:?[^><]*?<a[^><]*?>(.+)?<\/a>[^><]*?)<\/p>/ismu'; } function genereate_create_table_sql() { global $file_for_structure; $sql = " CREATE TABLE `blogiem_articles` ( `id` int(10) unsigned NOT NULL auto_increment, `title` varchar(255) default NULL, `text` text, `datetime` datetime default NULL, `tags` varchar(255) default NULL, PRIMARY KEY (`id`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8; CREATE TABLE `blogiem_comments` ( `id` int(10) unsigned NOT NULL auto_increment, `article_id` int(11) default NULL, `author` varchar(100) default NULL, `text` text, `homepage` varbinary(255) default NULL, `datetime` datetime default NULL, PRIMARY KEY (`id`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8; "; } function request($link) { return false; } 'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.4) Gecko/20070515 Firefox/2.0.0.4', 'Accept: text/xml,application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5', 'Accept-Language: en-us,en;q=0.5', 'Accept-Encoding: gzip,deflate', 'Accept-Charset: UTF-8,*', 'Keep-Alive: 300', 'Connection: keep-alive', ); $session = curl_init(); curl_setopt($session, CURLOPT_VERBOSE, 1); } curl_setopt($session, CURLOPT_STDERR, $debug_file); curl_setopt($session, CURLOPT_COOKIEJAR, "cookies.txt"); curl_setopt($session, CURLOPT_COOKIEFILE, "cookies.txt"); curl_setopt($session, CURLOPT_FOLLOWLOCATION, true); curl_setopt($session, CURLOPT_MAXREDIRS, 10); curl_setopt($session, CURLOPT_URL, $link); curl_setopt($session, CURLOPT_HEADER, false); curl_setopt($session, CURLOPT_RETURNTRANSFER, true); curl_setopt($session, CURLOPT_CONNECTTIMEOUT, 15); curl_setopt($session, CURLOPT_HTTPHEADER, $custom_headers); $result = curl_exec($session); curl_close($session); return $result; } ?>
© 2003 – 2010 PHP.lv komanda. Visas tiesības ir paturētas. Izņemot saturu, kurš ir tā autora īpašums, ja nekas nenosaka savādāk.