r**********d 发帖数: 510 | 1 我请教一个问题。
我想把 http://q.stock.sohu.com/app2/rpsholder.up?code=&sd=2013-7-13&ed=&type=date&dir=1&p=1
上的数据用 php DOM parse, 然后写道数据库中。 但
sohu encoding是gbk, php dom parse 后的encoding是ascii, 我 百度的很多, 试了
一些方法,都不能显示。
我现在只希望能在mysql 用utf8 ecoding 里显示中文。 然后我用 csmar数据做一些分
析。
各位大牛请指点迷津。我叮当包子相报。
$con = mysql_connect($host, $user, $pass);
if (!$con) {
echo "Could not connect to server\n";
trigger_error(mysql_error(), E_USER_ERROR);
} else {
echo "Connection established\n";
}
$ok = mysql_select_db("insider", $con);
echo mysql_get_server_info() . "\n";
# Set character_set_results
mysql_query("SET character_set_results=utf8", $con);
# Set character_set_client and character_set_connection
mysql_query("SET character_set_client=utf8", $con);
mysql_query("SET character_set_connection=utf8", $con);
for ($i = 1; $i< 2; $i++)
{
#$page = "http://vip.stock.finance.sina.com.cn/q/go.php/vInvestConsult/kind/nbjy/index.phtml?p=".$i;
$page = "http://q.stock.sohu.com/app2/rpsholder.up?code=&sd=2013-7-13&ed=&type=date&dir=1&p=".$i;
$content = file_get_contents($page );
$bodyStart = strpos($content, '') + 8;
$bodyStartEnd = strpos($content, " | ") ;
$body = "".substr($content, $bodyStart, $bodyStartEnd - $
bodyStart). "";
#$meta = '';
$body = $body;
$dom = new DOMDocument();
//load the html
$html = $dom->loadHTML($body);
header('Content-Type: text/html; charset=gbk');
print $body ;
//discard white space
$dom->preserveWhiteSpace = false;
//the table by its tag name
$tables = $dom->getElementsByTagName('table');
//get all rows from the table
$rows = $tables->item(0)->getElementsByTagName('tr');
// loop over the table rows
foreach ($rows as $row)
{
// get each column by tag name
$cols = $row->getElementsByTagName('td');
// print the values
echo "encloding".mb_detect_encoding($cols->item(4)->nodeValue);
#print mb_convert_encoding($cols->item(0)->nodeValue, 'gbk', 'UTF8'
).' ';
#print mb_convert_encoding( $cols->item(3)->nodeValue, 'gbk', 'UTF8'
).' ';
ini_set('mbstring.substitute_character', "none");
echo mb_convert_encoding($cols->item(1)->nodeValue, 'UTF-8', 'UTF-8')
;
print mb_convert_encoding($cols->item(0)->nodeValue,'UTF-8', 'gbk'
).' ';
print mb_convert_encoding($cols->item(1)->nodeValue, 'UTF-8', 'ascii
' ).' ';
print iconv ( "ascii", "UTF-8", $cols->item(3)->nodeValue ).'
/>';
print iconv ( "gbk", "UTF-8", $cols->item(4)->nodeValue).' ';
print $cols->item(5)->nodeValue.' ';
print $cols->item(6)->nodeValue.' ';
print $cols->item(7)->nodeValue.' ';
print $cols->item(9)->nodeValue.' ';
}
}g****z 发帖数: 1135 | 2 iconv('cp936', 'utf-8', $nodeValue);
不行吗? |
|
|
|
|
|