‘ , $buffer)) {
$authorbegin = 1;
//Otan to keimeno ine olo stin idia grammh
if (eregi(‘
//http://www.articlesbase.com/resumes-articles/765
// Orizo tis selides
for ($i=0; $i<=51; $i++) {
$linkforparcing[$i] ='http://www.articlesbase.com/resumes-articles/'.($i*15+1+0);
}
$ppages = count($linkforparcing);
///====================================== Diorthosh Articles ======================================
function cleandescription($string){
// Diorthosh description
$quotes = array(""", """, "&rdquo", "“", "‘", "’", "»", "«", "›", "‹", "“", "„", "‘", "‚");
$string = str_replace($quotes, '', $string);
$string = ereg_replace("[^[:space:]a-zA-Z0-9*_.-<>=”‘?!:]”, “”, $string);
$string = str_replace(‘”
‘, ”, $string);
$onclick = array(‘onClick=”javascript:pageTracker._trackPageview’, “‘/outgoing/article_exit_link’”, ‘;” ‘);
$string = str_replace($onclick, ”, $string);
$string = str_replace(“target=_blank>”, ”, $string);
$string = str_replace(“</a>”, ‘ ‘, $string);
$string = str_replace(‘/”gt;’, ”, $string);
$string = str_replace(‘
‘, ”, $string);
$string = str_replace(‘
‘, ”, $string);
$string = str_replace(‘
‘, ”, $string);
$spaces = array(‘ ‘, ‘ ‘, ‘ ‘, ‘ ‘);
$string = str_replace($spaces, ‘ ‘, $string);
$string = trim($string);
return $string;
}
function cleantitle($string){
// Diorthosh title
$quotes = array(“"”, “"”, “&rdquo”, ““”, “‘”, “’”, “»”, “«”, “›”, “‹”, ““”, “„”, “‘”, “‚”);
$string = str_replace($quotes, ”, $string);
$string = ereg_replace(“[^[:space:]a-zA-Z0-9.?!:-]”, “”, $string);
$spaces = array(‘ ‘, ‘ ‘, ‘ ‘, ‘ ‘);
$string = str_replace($spaces, ‘ ‘, $string);
$string = trim($string);
$string = ucwords($string);
return $string;
}
//====================================== Ean iparxei proxy ======================================
$proxycontent=0;
if ($proxycontent==1){
$proxy_name = ’172.20.60.10′;
$proxy_port = 3128;
}
//====================================== Parcing parapano apo mia selides ======================================
for ($j=0; $j<=($ppages-1); $j++) {
$stringlink = '^
//================================== Arpagma keimenon =======================================================
$count = 0;
for ($i=0; $i<=($items-1); $i++) {
$textbegin = 0;
$textend = 0;
$authorbegin = 0;
$authorend = 0;
// Ean iparxei proxy -----------------------------------------------------------
if ($proxycontent==1){
$proxy_fp = fsockopen($proxy_name, $proxy_port);
if (!$proxy_fp) {return false;}
fputs($proxy_fp, "GET $link[$i] HTTP/1.0rnHost: $proxy_namernrn");
while(!feof($proxy_fp)) {
$buffer = fgets($proxy_fp, 4096000);
//Description
if ($textbegin==1 && $textend==0 ){
if (eregi('
‘ , $buffer)){
$textend = 1;
}
$description .= $buffer;
$description = cleandescription($description);
}
//Author
if ($authorbegin==1 && $authorend==0 ){
if (eregi(‘
‘ , $buffer)){
$authorend = 1;
}
$author .= $buffer;
$author = cleandescription($author);
}
//Author
if (eregi(‘^
‘ , $buffer)) {
$authorbegin = 1;
//Otan to keimeno ine olo stin idia grammh
if (eregi(‘
‘ , $buffer)){
$authorend = 1;
$arxh = stripos($buffer, ‘
‘, $arxh);
$telos = $telos – $arxh;
$author = substr($buffer, $arxh, $telos);
$author = cleandescription($author);
$author = str_replace(‘
‘, ‘
About the writer: ’, $author);
}else{
$arxh = stripos($buffer, ‘
‘, ‘
About the writer: ’, $author);
}
}
//Title
if (eregi(“^
$arxh = stripos($buffer, ‘title>’);
$arxh = $arxh + 6;
$telos = stripos($buffer, ‘<', $arxh);
$telos = $telos - $arxh;
$title = substr($buffer, $arxh, $telos);
$title = cleantitle($title);
//Description
}elseif (eregi('
$textbegin = 1;
//Otan to keimeno ine olo stin idia grammh
if (eregi(‘
‘ , $buffer)){
$textend = 1;
$arxh = stripos($buffer, ‘KonaBody”>’);
$arxh = $arxh + 10;
$telos = stripos($buffer, ‘
‘, $arxh);
$telos = $telos – $arxh;
$description = substr($buffer, $arxh, $telos);
$description = cleandescription($description);
}else{
$arxh = stripos($buffer, ‘KonaBody”>’);
$arxh = $arxh + 10;
$telos = strlen($buffer);
$telos = $telos – $arxh;
$description = substr($buffer, $arxh, $telos);
$description = cleandescription($description);
}
}
}
fclose($proxy_fp);
// Ean den iparxei proxy————————————————————
}else{
// Tsimpaei grammi grammi to arxeio
$handle2 = @fopen($link[$i], “r”);
if ($handle2) {
while (!feof($handle2)) {
$buffer = fgets($handle2, 4096000);
//Desription
if ($textbegin==1 && $textend==0 ){
if (eregi(‘
‘ , $buffer)){
$textend = 1;
}
$description .= $buffer;
$description = cleandescription($description);
}
//Author
if ($authorbegin==1 && $authorend==0 ){
if (eregi(‘
‘ , $buffer)){
$authorend = 1;
}
$author .= $buffer;
$author = cleandescription($author);
}
//Author
if (eregi(‘^
‘ , $buffer)) {
$authorbegin = 1;
//Otan to keimeno ine olo stin idia grammh
if (eregi(‘
‘ , $buffer)){
$authorend = 1;
$arxh = stripos($buffer, ‘
‘, $arxh);
$telos = $telos – $arxh;
$author = substr($buffer, $arxh, $telos);
$author = cleandescription($author);
$author = str_replace(‘
‘, ‘
About the writer: ’, $author);
}else{
$arxh = stripos($buffer, ‘
‘, ‘
About the writer: ’, $author);
}
}
//Title
if (eregi(“^
$arxh = stripos($buffer, ‘title>’);
$arxh = $arxh + 6;
$telos = stripos($buffer, ‘<', $arxh);
$telos = $telos - $arxh;
$title = substr($buffer, $arxh, $telos);
$title = cleantitle($title);
//Description
}elseif (eregi('
$textbegin = 1;
//Otan to keimeno ine olo stin idia grammh
if (eregi(‘
‘ , $buffer)){
$textend = 1;
$arxh = stripos($buffer, ‘KonaBody”>’);
$arxh = $arxh + 10;
$telos = stripos($buffer, ‘
‘, $arxh);
$telos = $telos – $arxh;
$description = substr($buffer, $arxh, $telos);
$description = cleandescription($description);
}else{
$arxh = stripos($buffer, ‘KonaBody”>’);
$arxh = $arxh + 10;
$telos = strlen($buffer);
$telos = $telos – $arxh;
$description = substr($buffer, $arxh, $telos);
$description = cleandescription($description);
}
}
}
}
}
// Dimiourgei to txt
$article = $title.”n”;
$article .= $description.”n”;
$article .= $author.”n”;
$txttitle = trim($title);
$txttitle = ereg_replace(“[^A-Za-z0-9]“, ” “, $txttitle);
$txttitle = str_replace(‘ ‘, ‘-’, $txttitle);
$txttitle = str_replace(‘——-’, ‘-’, $txttitle);
$txttitle = str_replace(‘——’, ‘-’, $txttitle);
$txttitle = str_replace(‘—–’, ‘-’, $txttitle);
$txttitle = str_replace(‘—-’, ‘-’, $txttitle);
$txttitle = str_replace(‘—’, ‘-’, $txttitle);
$txttitle = str_replace(‘–’, ‘-’, $txttitle);
if (file_exists($txttitle.”.txt”)){
echo ‘To article: ‘.$txttitle.”.txt iparxei kai den to ksanakatebazoume.
“;
}else{
file_put_contents ( $txttitle.”.txt”, $article );
$count++;
}
}
echo “Dimiourghthikan: “.$count.” arxeia keimenou.
“;
?>
No related posts.
Related posts brought to you by Yet Another Related Posts Plugin.