Php image extraction from html
Last updated at (Mon Jan 14 2008 16:08:28)
Following are the functions to extract img tag's src from html code.
I have taken care of almost all the scenarios I could think of, I hope it helps you
-Nitin
<?php
/*********************************************
* BEGIN parser related functions
*********************************************/
function getSrcPos(&$html, $pos)
{
$posSrc = stripos($html,"src ", $pos);
$posSrc2 = stripos($html,"src=", $pos);
$posSrc3 = stripos($html,"src\t", $pos);
if($posSrc === false || ($posSrc2 !== false && $posSrc > $posSrc2))
{
$posSrc = $posSrc2;
}
if($posSrc === false || ($posSrc3 !== false && $posSrc > $posSrc3))
{
$posSrc = $posSrc3;
}
return $posSrc;
}
function &extractString(&$html, &$pos, $char='')
{
$posTemp = $pos;
if($char == '')
{
while($html[$pos] != " " && $html[$pos] != "\t" && $html[$pos] != ">")
{
++$pos;
}
}
else
{
while($html[$pos] != $char && $html[$pos] != ">")
{
++$pos;
}
}
$str = substr($html, $posTemp, ($pos-$posTemp));
$str = trim(str_replace(array("\n","\r"),"", $str));
return $str;
}
function parseNextImage(&$html, &$offset, &$srcStr)
{
$pos = stripos($html,"<img", $offset);
if($pos !== false)
{
$posImg = stripos($html,">",$pos);
$posSrc = getSrcPos($html, $pos);
//echo " == $posImg $posSrc == ";
if($posSrc !== false && ($posImg === false || $posImg > $posSrc))
{
//we have position of src now, lets extract the string
$i = $posSrc + 3;
while($html[$i] == "=" || $html[$i] == " " || $html[$i] == "\t" || $html[$i] == "\r" || $html[$i] == "\n")
{
++$i;
}
//echo "**".ord($html[$i])."**".substr($html, $i)."***\n";
if($html[$i] == "'" || $html[$i] == '"')
{
$j = $i+1;
$srcStr = extractString($html, $j, $html[$i]);
}
else
{
$j = $i;
$srcStr = extractString($html, $j);
}
$offset = $j;
return 1;
}
else if($posImg < $posSrc)
{
$offset = $posImg;
return -1;
}
else
{
return false;
}
}
else
{
return false;
}
}
/*********************************************
* END parser related functions
*********************************************/
//this function demonstrates how to use the parse function
function testImageExtraction()
{
$html = "<img src=abc.com /> <img class=hello src=\"abc1.com\" /> <img class=\"mno\" src='abc2.com ' /> <img src= 'abc3.com' />
<img src= abc4.com /> <img src = \"abc5.com\" /> <img src = ' abc6.com ' /> <img src = abc7.com\\n/new/img.jpg />
<img src =
\"
abc8.
com\"/>
";
/*****
* Always call the function parseNextImage() in this way
* first make two variables $offset = 0 and $str = ""
* and pass them along with $html
*****/
$offset = 0;
$str = "";
while(parseNextImage($html, $offset, $str))
{
// $str contains the src value extracted, use it the way you want
echo "### $str ###\n";
}
}
testImageExtraction();
?>
Comments
Leave your comment(s) below: