关于使用CHttpSession和CHttpFile去读网页内容中文乱码问题

首先确定项目属性不能是UNICODE

其次当ReadString读取后进行转换,如下

CInternetSession httpSession;

CString url;

CString html;

m_url.GetWindowText(url);

m_html.GetWindowText(html);

CHttpFile* htmlFile = (CHttpFile*) httpSession.OpenURL("http://www.cnki.net/KCMS/detail/detail.aspx?dbcode=CDFD&QueryID=8&CurRec=1&dbname=CDFDLAST2011&filename=1011095008.nh&urlid=&yx=&uid=WEEvREcwSlJHSldSdnQ1V05QSXlrTG5xNTFkeHU5SUVpeW9SUk1rK2hWblhYeVRwRWNhWjlpeGNNUml1engwPQ==");

//CString content;

CString str;

WCHAR *temp;

while (htmlFile->ReadString(str))

{

//ConvertUTF8toGB2312(content,CP_UTF8,CP_ACP );

char *pStr = str.GetBuffer(str.GetLength()); //取得str对象的原始字符串

int nBufferSize = MultiByteToWideChar(CP_UTF8, 0, pStr, -1, NULL, 0); //取得所需缓存的多少

wchar_t *pBuffer = (wchar_t*)malloc(nBufferSize * sizeof(wchar_t));//申请缓存空间

MultiByteToWideChar(CP_UTF8, 0, pStr, -1 , pBuffer, nBufferSize*sizeof(wchar_t));//转码

//MessageBoxW(NULL, pBuffer, L"Text", MB_OK); //显示

html+=pBuffer;

free(pBuffer); //释放缓存

//MessageBox(content);

}

m_html.SetWindowText(html);

htmlFile->Close();

httpSession.Close();
这里的网址是中国知网的学位论文网页,其charset是UTF-16,进行转换后也可以正常显示