/* ************************************************************************************ 項目是分析各視頻的真實地址 生成一個dll, 供其它項目使用, 項目中使用了zlib, boost, 要另下載
如果是sohu網(wǎng)站則自己分析地址. 如果是其它視頻網(wǎng)站才從flvcd上獲取結果. 項目中使用了: 1.gizp解壓. 2.UTF8與GB2312轉碼 3.boost正則表達式 boost查找單個匹配, 查找所有匹配 4.sohu視頻地址是分了四類視頻分析的. 可以用fiddler查找功能查找到所想要的幾個字符串 5.文件獲取是使用的MFC中的CHttpFile獲取的, 嘗試用了WinINet和WinHTTP ms的api訪問網(wǎng)絡的都不怎么行. chrome瀏覽器第一個版本是用winhttp訪問網(wǎng)絡的. 也試過socket訪問網(wǎng)絡 但要跳轉什么的太繁了 6.函數(shù)導出, 可以用def文件. 也可以用dellexport 7.多線程CreateThread 注: 網(wǎng)絡訪問花了相當大的時間 正則表達式boost中的perl正則表達式. "要寫成\" \要寫成\\, 要多用查找替換. 匹配多個結果時要迭代搜索查詢 網(wǎng)絡給的數(shù)據(jù)是壓縮的gzip問題也花了好長時間. utf-8與gb2312轉換也花了好長時間. buff最后一次讀取時, 字符串沒法控制. 內存初始化是沒置成0就行了 函數(shù)導出研究了兩種方法, 花了很長時間. 多線程沒花多長時間 ************************************************************************************ */
?
Analyzer.cpp
vector<string> Analyzer::GetPropertyInIntegratedBrackets(string strPropertyName, string strJson) { vector<string> vect; regex regclipsURL("(?<=(" + strPropertyName + "\":\\[))[^]]+?(?=(]))"); boost::smatch what; string strclipsURL = ""; //轉成另一個變量再傳,不然出錯 強轉是強的指針,以前是結構類型,強指針沒用 if(regex_search(strJson, what, regclipsURL)) { strclipsURL = what[0]; } int iIndex = 0; while (iIndex >= 0) { iIndex = strclipsURL.find(','); if(iIndex > 0) { vect.push_back(strclipsURL.substr(1, iIndex - 2));//去了兩邊的雙引號 strclipsURL = strclipsURL.substr(iIndex + 1); } else vect.push_back(strclipsURL.substr(1,strlen(strclipsURL.c_str()) - 2)); //去了兩邊的雙引號 } return vect; } int Analyzer::httpgzdecompress(Byte *zdata, uLong nzdata, Byte *data, uLong *ndata) { int err = 0; z_stream d_stream = {0}; /* decompression stream */ static char dummy_head[2] = { 0x8 + 0x7 * 0x10, (((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF, }; d_stream.zalloc = (alloc_func)0; d_stream.zfree = (free_func)0; d_stream.opaque = (voidpf)0; d_stream.next_in = zdata; d_stream.avail_in = 0; d_stream.next_out = data; if(inflateInit2(&d_stream, 47) != Z_OK) return -1; while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) { d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */ if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break; if(err != Z_OK ) { if(err == Z_DATA_ERROR) { d_stream.next_in = (Bytef*) dummy_head; d_stream.avail_in = sizeof(dummy_head); if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK) { return -1; } } else return -1; } } if(inflateEnd(&d_stream) != Z_OK) return -1; *ndata = d_stream.total_out; return 0; } //ms-help://MS.VSCC.v90/MS.MSDNQTR.v90.chs/intl/unicode_81rn.htm //將UTF8字符串轉換為gb2312 CString Analyzer::ConvertUTF8toGB2312(const char *pData, size_t size) { size_t n = MultiByteToWideChar(CP_UTF8, 0, pData, (int)size, NULL, 0); WCHAR * pChar = new WCHAR[n+1]; n = MultiByteToWideChar(CP_UTF8, 0, pData, (int)size, pChar, n); pChar[n]=0; n = WideCharToMultiByte(936, 0, pChar, -1, 0, 0, 0, 0); char *p = new char[n+1]; n = WideCharToMultiByte(936, 0, pChar, -1, p, (int)n, 0, 0); CString result(p); delete []pChar; delete []p; return result; } CString Analyzer::GetPageHtml(CString strUrl) { CString strHtml = "";//獲取HTML try { strUrl = strUrl.Trim(); CInternetSession session("HttpClient"); session.SetOption(INTERNET_OPTION_CONNECT_TIMEOUT, 5000); // 5秒的連接超時 session.SetOption(INTERNET_OPTION_SEND_TIMEOUT, 1000); // 1秒的發(fā)送超時 session.SetOption(INTERNET_OPTION_RECEIVE_TIMEOUT, 7000); // 7秒的接收超時 session.SetOption(INTERNET_OPTION_DATA_SEND_TIMEOUT, 1000); // 1秒的發(fā)送超時 session.SetOption(INTERNET_OPTION_DATA_RECEIVE_TIMEOUT, 7000); // 7秒的接收超時 session.SetOption(INTERNET_OPTION_CONNECT_RETRIES, 1); // 1次重試 CHttpFile* pFile = (CHttpFile*)session.OpenURL((LPCTSTR)strUrl, 1, INTERNET_FLAG_RELOAD | INTERNET_FLAG_TRANSFER_BINARY); DWORD dwStatusCode; pFile-> QueryInfoStatusCode(dwStatusCode); if(dwStatusCode == HTTP_STATUS_OK) { CString strLength = ""; CString strHeaders = ""; pFile->QueryInfo(HTTP_QUERY_CONTENT_LENGTH, strLength); pFile->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF, strHeaders); long lLength = 4096 * 500; byte* pbHtml = new byte[lLength]; //在堆上動態(tài)分配內存 memset(pbHtml, 0, lLength); //初始化 byte sRecived[512]; int iIndex = 0; int num = 0; while((num = pFile->Read(sRecived,512)) > 0 ) { memcpy(pbHtml+iIndex, sRecived, num); iIndex+=num; } pbHtml[iIndex] = NULL; if(strHeaders.Find("gzip") > -1) { uLong ulLength = 4096 * 500; byte* pbData = new byte[ulLength]; memset(pbData,0,ulLength); httpgzdecompress(pbHtml, lLength, pbData, &ulLength); pbData[ulLength] = NULL; strHtml = (CHAR*)pbData; delete pbData; } else { strHtml = (CHAR*)pbHtml; if(strHeaders.MakeLower().Find("utf-8") > - 1 || strHtml.MakeLower().Find("utf-8") > -1)//strHtml變成小寫了 { strHtml = ConvertUTF8toGB2312((CHAR*)pbHtml,strlen((CHAR*)pbHtml));//編碼轉換 } else//重新得到大小寫區(qū)分的 { strHtml = (CHAR*)pbHtml; } } delete pbHtml; } pFile -> Close(); delete pFile; session.Close(); return strHtml; } catch (CException* e) { (void)e; this->m_State = Analyzer_State_NetError; return ""; } }
更多文章、技術交流、商務合作、聯(lián)系博主
微信掃碼或搜索:z360901061

微信掃一掃加我為好友
QQ號聯(lián)系: 360901061
您的支持是博主寫作最大的動力,如果您喜歡我的文章,感覺我的文章對您有幫助,請用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點擊下面給點支持吧,站長非常感激您!手機微信長按不能支付解決辦法:請將微信支付二維碼保存到相冊,切換到微信,然后點擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對您有幫助就好】元
