123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351 |
- #include <array.au3>
- #include<IE.au3>
- ;#include<setdefault.au3>
- #include<file.au3>
- #include <MsgBoxConstants.au3>
- #include<excel.au3>
- #RequireAdmin
- #include<_html.au3>
- #include<sfilter.au3>
- #include<inet.au3>
- ;Opt("CaretCoordMode", 1)
- ;Opt("ExpandEnvStrings", 0)
- Opt("ExpandVarStrings", 0)
- ;Opt("GUICloseOnESC", 0)
- ;Opt("GUICoordMode", 1)
- ;Opt("GUIDataSeparatorChar","|")
- ;Opt("GUIOnEventMode", 0)
- ;Opt("GUIResizeMode",0)
- ;Opt("GUIEventOptions",1)
- Opt("MouseClickDelay", 10)
- Opt("MouseClickDownDelay", 10)
- Opt("MouseClickDragDelay", 250)
- Opt("MouseCoordMode", 1)
- Opt("MustDeclareVars", 0)
- Opt("PixelCoordMode", 1)
- Opt("SendAttachMode", 0)
- Opt("SendCapslockMode", 1)
- Opt("SendKeyDelay", 5)
- Opt("SendKeyDownDelay", 1)
- Opt("TCPTimeout",100)
- Opt("TrayAutoPause",1)
- Opt("TrayIconDebug", 0)
- Opt("TrayIconHide", 0)
- Opt("TrayMenuMode",0)
- Opt("TrayOnEventMode",0)
- Opt("WinDetectHiddenText", 0)
- Opt("WinSearchChildren", 1)
- Opt("WinTextMatchMode", 1)
- Opt("WinTitleMatchMode", 1)
- Opt("WinWaitDelay", 250)
- Global $g_idGUIEdit
- Global $g_idGUIProg
- global $oIE=0
- Global $filepath=@scriptdir
- Global $size=0 ;difine listfile size
- Global $list_file_url=@ScriptDir&"\任务列表.txt";页面提取的文件
- Global $line_url="http://guba.eastmoney.com/"
- Global $state=""
- Global $stock_id=""
- Global $resault_dir=@scriptdir&"\data"
- Global $oEventObject
- GLOBAL $SIZES=0
- GLOBAL $CURPID=0
- global $TXTDATA=""
- Global $oExcel, $oWorkbook
- Global $cur_process=0
- ;========================================
- Global $tmpfile=@ScriptDir&"\tmp.tmp"
- $source="dbc.data"
- $target="任务列表.txt"
- $resault_dir=@ScriptDir
- global $oIE=0
- global $ierw_url=""
- ;$oIE = ObjCreate("InternetExplorer.Application.1")
- _IELoadWaitTimeout(100)
- global $oie=_IECreate("http://guba.eastmoney.com")
- ;========================================
- While 1
- if FileExists("任务列表.txt") then
- $uulist=@ScriptDir&"\任务列表.txt"
- $list_file_url=@ScriptDir&"\任务列表.txt"
- Else
- $succ = FileCopy($source, $target)
- if $succ=1 then
-
- Global $uulist =$target
- Global $starturl = "http://guba.eastmoney.com/list,"
- EndIf
- endif
-
- MsgBox($MB_SYSTEMMODAL, "网址", "列表存在,从上次任务继续.", 1)
- start_job($oIE);执行打开窗口
- WEnd
- ;=============================================
- Func ierw($ierw_url)
-
- local $netdata=inetget($ierw_url,$tmpfile)
- $htms=FileRead($tmpfile)
- _IEBodyWriteHTML($oIE, $htms)
- FileClose($tmpfile)
- EndFunc
- removeIE()
- Func start_job($oie)
- ;进程包括了IE窗口生成
- Local $iMsg
- local $hGUIMain
- $hGUIMain = GUICreate("IE BROWSER", 600, 500)
- $g_idGUIEdit = GUICtrlCreateEdit("626" & @CRLF, 10, 20, 580, 400)
- $g_idGUIProg = GUICtrlCreateProgress(10, 5, 580, 10)
- Local $idGUIExit = GUICtrlCreateButton("退出采集", 250, 450, 80, 30)
- GUISetState() ;Show GUI
- Sleep(1000) ; Give it some time to load the web page
- GUISwitch($hGUIMain) ; Switch back to our GUI in case IE stole the focus
- ; We prepare the Internet Explorer as our test subject
- ieobjint()
- While 1
- $iMsg = GUIGetMsg()
- If $iMsg = $idGUIExit Then exit
- DOWN_BY_LIST($list_file_url)
- ;按列表下载
- WEnd
- EndFunc ;==>Example
- Func DOWN_BY_LIST($list_file_url)
- MsgBox($MB_SYSTEMMODAL, "网址", "开始下载列表里网址", 1)
- ;MsgBox($MB_SYSTEMMODAL, "", "开始下载列表里网址.")
- $FILE_HWN=FileOpen($list_file_url,1)
- $size=_FileCountLines($list_file_url)
- If $list_file_url = "" Then
-
- MsgBox($MB_SYSTEMMODAL, "", "没有选择入口列表.现在退回")
- Return
- else
-
- EndIf
- for $SIZES=$size to 1 Step -1
- ;
- $state=""
-
- $line_url = FileReadLine($list_file_url,$SIZES)
- fileclose($line_url)
- ConsoleWrite("列表名称"&$line_url)
- If StringLen($line_url)>10 Then
- ;to be continue
-
- $stock_id=StringTrimRight(StringTrimLeft( "$line_url",38 ),5)
- ;+++++++++++++++++++++++++++++++
- ;$oIE.navigate($line_url)
- ;_ieloadwait($oIE)
- ;++++++++++++++++++++++++++
- ierw($line_url)
- sleep(500)
- GLOBAL $CURPID=1
- pagereconize()
- set_stock_show()
- ;ending
- SetError(0)
- EndIf
- ;=========================================================
- _FileWriteToLine($list_file_url,$SIZES,"",True)
- fileclose($line_url)
- ;
- Next
- MsgBox($MB_SYSTEMMODAL, "", "已经提取完内容,右下任务栏选退出.")
- ;FileClose($FILE_HWN)
- EXIT
- ENDFUNC
- ;=================================================
- ;=====================================================================================
- func ieobjint()
- ;$oIE.Navigate("http://baidu.com")
- $sURL = "http://guba.eastmoney.com/list,600382.html"
- Local $oEventObject = ObjEvent($oIE, "IEEvent_", "DWebBrowserEvents")
- If @error Then
- MsgBox($MB_OK, "有错误发生", _
- "ObjEvent: Can't use event interface 'DWebBrowserEvents'. Error code: " & Hex(@error, 8))
- EndIf
-
- With $oIE
- .Visible = 1
- .Top = (@DesktopHeight - 400) / 2
- .Height = 600 ; Make it a bit smaller than our GUI.
- .Width = 600
- .Silent = 1 ; Don't show IE's dialog boxes
- GLOBAL $hIEWnd = HWnd(.hWnd) ; Remember the Window, in case user decides to close it
- EndWith
- endfunc
- ;======================================================================================
- func removeIE()
- $oEventObject.Stop ; Tell IE we don't want to receive events.
- $oEventObject = 0 ; Kill the Event Object
- If WinExists($hIEWnd) Then $oIE.Quit ; Close IE Window
- $oIE = 0 ; Remove IE from memory (not really necessary).
- ;GUIDelete($hGUIMain) ; Remove GUI
- endfunc
- func set_stock_show()
- ;GUICtrlSetData($input1,$stock_id)
- endfunc
- func pagereconize()
- Local $Ele = @extended
- ;SLEEP(1000)
- ;=====================hot============================
- ;SLEEP(300)
- Local $tags= @extended
- $tags = $oIE.document.GetElementsByTagName("div")
- Local $text_s=""
- Local $tmp=""
- For $tag in $tags
- ; $class_value = $tag.GetAttribute("class")
- $class_value = $tag.id
- If $class_value = "zwmbtilr" Then
- $tmp=$tag.innertext
- $text_s=$tmp&@CRLF&$text_s&@CRLF
-
- EndIf
- Next
- $TXTDATA=StringStripWS(HTML_Filters($text_s,0),8)
- FileWriteLine(@scriptdir&"\data.txt",$TXTDATA&"--热度 评论数 /////")
- LOCAL $TXTDATA=""
- ;=======================tim==================
- ;SLEEP(300)
- Local $tags= @extended
- $tags = $oIE.document.GetElementsByTagName("DIV")
- Local $text_s=""
- Local $tmp=""
- For $tag in $tags
- ; $class_value = $tag.GetAttribute("class")
- $class_value = $tag.className
- If $class_value = "zwfbtime" Then
- $tmp=$tag.innertext
- $text_s=$tmp&@CRLF&$text_s&@CRLF
-
- EndIf
- Next
- $TXTDATA=StringStripWS(HTML_Filters($text_s,0),8)
- FileWriteLine(@scriptdir&"\data.txt",$TXTDATA&"--发表时间/////")
- LOCAL $TXTDATA=""
- ;=================================================
- ;SLEEP(300)
- Local $tags= @extended
- $tags = $oIE.document.GetElementsByTagName("SPAN")
- Local $text_s=""
- Local $tmp=""
- For $tag in $tags
- ; $class_value = $tag.GetAttribute("class")
- $class_value = $tag.className
- If $class_value = "tc1" Then
- $tmp=$tag.innertext
- $text_s=$tmp&@CRLF&$text_s&@CRLF
-
- EndIf
- Next
- $TXTDATA=StringStripWS(HTML_Filters($text_s,0),8)
- FileWriteLine(@scriptdir&"\data.txt",$TXTDATA&"--阅读数/////")
- LOCAL $TXTDATA=""
- ;========================tt=========================
- ;SLEEP(300)
- Local $tags= @extended
- $tags = $oIE.document.GetElementsByTagName("DIV")
- Local $text_s=""
- Local $tmp=""
- For $tag in $tags
- ; $class_value = $tag.GetAttribute("class")
- $class_value = $tag.className
- If $class_value = "zwconttbt" Then
- $tmp=$tag.innertext
- $text_s=$tmp&@CRLF&$text_s&@CRLF
-
- EndIf
- Next
- $TXTDATA=StringStripWS(HTML_Filters($text_s,0),8)
- FileWriteLine(@scriptdir&"\data.txt",$TXTDATA&"--正文及标题/////")
- LOCAL $TXTDATA=""
- ;====================bdy=============================
- ;SLEEP(300)
- Local $tags= @extended
- $tags = $oIE.document.GetElementsByTagName("DIV")
- Local $text_s=""
- Local $tmp=""
- For $tag in $tags
- ; $class_value = $tag.GetAttribute("class")
- $class_value = $tag.className
- If $class_value = "zwcontentmain" Then
- $tmp=$tag.innertext
- $text_s=$tmp&@CRLF&$text_s&@CRLF
-
- EndIf
- Next
- $TXTDATA=StringStripWS(HTML_Filters($text_s,0),8)
- FileWriteLine(@scriptdir&"\data.txt",$TXTDATA&"--正文/////")
- LOCAL $TXTDATA=""
- Local $oLinks = @extended
- $oLinks = _IELinkGetCollection($oIE)
- ;Sleep(100)
- SetError(0)
- Local $curfilename3 =@scriptdir&"\pagereconize.tmp"
- FileDelete($curfilename3)
- SetError(0)
- Local $iNumLinks = @extended
- Local $succed = _FileCreate($curfilename3)
- SetError(0)
- If $succed = 1 Then
- FileOpen($curfilename3, $FO_APPEND)
- For $oLink In $oLinks
- $sTxt = $oLink.href & @CRLF
- If StringInStr($stxt, "html#storeply") =1 And StringInStr($stxt,"news,"&$stock_id)=1 Then
- FileWrite($curfilename3, $stxt)
- EndIf ; match url
- Next
- EndIf ;succed==1保存结果
- FileClose($curfilename3)
- SetError(0)
- $reply_size=_filecountlines($curfilename3)
- ;MsgBox($MB_SYSTEMMODAL, "", $reply_size)
- $curfront=stringtrimright($line_url,5)
- for $rpt=1 to $reply_size
- ;==========================================
- $nReply=$curfront&"_"&$rpt&".html#storeply"
- ConsoleWrite($nReply&@CRLF)
- ;_IENavigate($oie,$nReply)
- ierw($nreply)
- sleep(500)
- $curpid=2 ;set process to 2
- ;==============reconize=================
- ;=======================rpys==========================start
- ;SLEEP(300)
- Local $tags= @extended
- $tags = $oIE.document.GetElementsByTagName("DIV")
- Local $text_s=""
- Local $tmp=""
- For $tag in $tags
- ; $class_value = $tag.GetAttribute("class")
- $class_value = $tag.className
- If $class_value = "zwlitext stockcodec" Then
- $tmp=$tag.innertext
- $text_s=$tmp&@CRLF&$text_s&@CRLF
-
- EndIf
- Next
- $TXTDATA=StringStripWS(HTML_Filters($text_s,0),8)
- FileWriteLine(@scriptdir&"\data.txt",$TXTDATA&"--回复列出/////"&@CRLF&"-----"&@CRLF)
- LOCAL $TXTDATA=""
- next
- ;endCollectt rply
- $data="";restore data
- ;===============================================
- $curpid=0
- endfunc
|