db.idx 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. #include <array.au3>
  2. #include<IE.au3>
  3. ;#include<setdefault.au3>
  4. #include<file.au3>
  5. #include <MsgBoxConstants.au3>
  6. #include<excel.au3>
  7. #RequireAdmin
  8. #include<_html.au3>
  9. #include<sfilter.au3>
  10. #include<inet.au3>
  11. ;Opt("CaretCoordMode", 1)
  12. ;Opt("ExpandEnvStrings", 0)
  13. Opt("ExpandVarStrings", 0)
  14. ;Opt("GUICloseOnESC", 0)
  15. ;Opt("GUICoordMode", 1)
  16. ;Opt("GUIDataSeparatorChar","|")
  17. ;Opt("GUIOnEventMode", 0)
  18. ;Opt("GUIResizeMode",0)
  19. ;Opt("GUIEventOptions",1)
  20. Opt("MouseClickDelay", 10)
  21. Opt("MouseClickDownDelay", 10)
  22. Opt("MouseClickDragDelay", 250)
  23. Opt("MouseCoordMode", 1)
  24. Opt("MustDeclareVars", 0)
  25. Opt("PixelCoordMode", 1)
  26. Opt("SendAttachMode", 0)
  27. Opt("SendCapslockMode", 1)
  28. Opt("SendKeyDelay", 5)
  29. Opt("SendKeyDownDelay", 1)
  30. Opt("TCPTimeout",100)
  31. Opt("TrayAutoPause",1)
  32. Opt("TrayIconDebug", 0)
  33. Opt("TrayIconHide", 0)
  34. Opt("TrayMenuMode",0)
  35. Opt("TrayOnEventMode",0)
  36. Opt("WinDetectHiddenText", 0)
  37. Opt("WinSearchChildren", 1)
  38. Opt("WinTextMatchMode", 1)
  39. Opt("WinTitleMatchMode", 1)
  40. Opt("WinWaitDelay", 250)
  41. Global $g_idGUIEdit
  42. Global $g_idGUIProg
  43. global $oIE=0
  44. Global $filepath=@scriptdir
  45. Global $size=0 ;difine listfile size
  46. Global $list_file_url=@ScriptDir&"\任务列表.txt";页面提取的文件
  47. Global $line_url="http://guba.eastmoney.com/"
  48. Global $state=""
  49. Global $stock_id=""
  50. Global $resault_dir=@scriptdir&"\data"
  51. Global $oEventObject
  52. GLOBAL $SIZES=0
  53. GLOBAL $CURPID=0
  54. global $TXTDATA=""
  55. Global $oExcel, $oWorkbook
  56. Global $cur_process=0
  57. ;========================================
  58. Global $tmpfile=@ScriptDir&"\tmp.tmp"
  59. $source="dbc.data"
  60. $target="任务列表.txt"
  61. $resault_dir=@ScriptDir
  62. global $oIE=0
  63. global $ierw_url=""
  64. ;$oIE = ObjCreate("InternetExplorer.Application.1")
  65. _IELoadWaitTimeout(100)
  66. global $oie=_IECreate("http://guba.eastmoney.com")
  67. ;========================================
  68. While 1
  69. if FileExists("任务列表.txt") then
  70. $uulist=@ScriptDir&"\任务列表.txt"
  71. $list_file_url=@ScriptDir&"\任务列表.txt"
  72. Else
  73. $succ = FileCopy($source, $target)
  74. if $succ=1 then
  75. Global $uulist =$target
  76. Global $starturl = "http://guba.eastmoney.com/list,"
  77. EndIf
  78. endif
  79. MsgBox($MB_SYSTEMMODAL, "网址", "列表存在,从上次任务继续.", 1)
  80. start_job($oIE);执行打开窗口
  81. WEnd
  82. ;=============================================
  83. Func ierw($ierw_url)
  84. local $netdata=inetget($ierw_url,$tmpfile)
  85. $htms=FileRead($tmpfile)
  86. _IEBodyWriteHTML($oIE, $htms)
  87. FileClose($tmpfile)
  88. EndFunc
  89. removeIE()
  90. Func start_job($oie)
  91. ;进程包括了IE窗口生成
  92. Local $iMsg
  93. local $hGUIMain
  94. $hGUIMain = GUICreate("IE BROWSER", 600, 500)
  95. $g_idGUIEdit = GUICtrlCreateEdit("626" & @CRLF, 10, 20, 580, 400)
  96. $g_idGUIProg = GUICtrlCreateProgress(10, 5, 580, 10)
  97. Local $idGUIExit = GUICtrlCreateButton("退出采集", 250, 450, 80, 30)
  98. GUISetState() ;Show GUI
  99. Sleep(1000) ; Give it some time to load the web page
  100. GUISwitch($hGUIMain) ; Switch back to our GUI in case IE stole the focus
  101. ; We prepare the Internet Explorer as our test subject
  102. ieobjint()
  103. While 1
  104. $iMsg = GUIGetMsg()
  105. If $iMsg = $idGUIExit Then exit
  106. DOWN_BY_LIST($list_file_url)
  107. ;按列表下载
  108. WEnd
  109. EndFunc ;==>Example
  110. Func DOWN_BY_LIST($list_file_url)
  111. MsgBox($MB_SYSTEMMODAL, "网址", "开始下载列表里网址", 1)
  112. ;MsgBox($MB_SYSTEMMODAL, "", "开始下载列表里网址.")
  113. $FILE_HWN=FileOpen($list_file_url,1)
  114. $size=_FileCountLines($list_file_url)
  115. If $list_file_url = "" Then
  116. MsgBox($MB_SYSTEMMODAL, "", "没有选择入口列表.现在退回")
  117. Return
  118. else
  119. EndIf
  120. for $SIZES=$size to 1 Step -1
  121. ;
  122. $state=""
  123. $line_url = FileReadLine($list_file_url,$SIZES)
  124. fileclose($line_url)
  125. ConsoleWrite("列表名称"&$line_url)
  126. If StringLen($line_url)>10 Then
  127. ;to be continue
  128. $stock_id=StringTrimRight(StringTrimLeft( "$line_url",38 ),5)
  129. ;+++++++++++++++++++++++++++++++
  130. ;$oIE.navigate($line_url)
  131. ;_ieloadwait($oIE)
  132. ;++++++++++++++++++++++++++
  133. ierw($line_url)
  134. sleep(500)
  135. GLOBAL $CURPID=1
  136. pagereconize()
  137. set_stock_show()
  138. ;ending
  139. SetError(0)
  140. EndIf
  141. ;=========================================================
  142. _FileWriteToLine($list_file_url,$SIZES,"",True)
  143. fileclose($line_url)
  144. ;
  145. Next
  146. MsgBox($MB_SYSTEMMODAL, "", "已经提取完内容,右下任务栏选退出.")
  147. ;FileClose($FILE_HWN)
  148. EXIT
  149. ENDFUNC
  150. ;=================================================
  151. ;=====================================================================================
  152. func ieobjint()
  153. ;$oIE.Navigate("http://baidu.com")
  154. $sURL = "http://guba.eastmoney.com/list,600382.html"
  155. Local $oEventObject = ObjEvent($oIE, "IEEvent_", "DWebBrowserEvents")
  156. If @error Then
  157. MsgBox($MB_OK, "有错误发生", _
  158. "ObjEvent: Can't use event interface 'DWebBrowserEvents'. Error code: " & Hex(@error, 8))
  159. EndIf
  160. With $oIE
  161. .Visible = 1
  162. .Top = (@DesktopHeight - 400) / 2
  163. .Height = 600 ; Make it a bit smaller than our GUI.
  164. .Width = 600
  165. .Silent = 1 ; Don't show IE's dialog boxes
  166. GLOBAL $hIEWnd = HWnd(.hWnd) ; Remember the Window, in case user decides to close it
  167. EndWith
  168. endfunc
  169. ;======================================================================================
  170. func removeIE()
  171. $oEventObject.Stop ; Tell IE we don't want to receive events.
  172. $oEventObject = 0 ; Kill the Event Object
  173. If WinExists($hIEWnd) Then $oIE.Quit ; Close IE Window
  174. $oIE = 0 ; Remove IE from memory (not really necessary).
  175. ;GUIDelete($hGUIMain) ; Remove GUI
  176. endfunc
  177. func set_stock_show()
  178. ;GUICtrlSetData($input1,$stock_id)
  179. endfunc
  180. func pagereconize()
  181. Local $Ele = @extended
  182. ;SLEEP(1000)
  183. ;=====================hot============================
  184. ;SLEEP(300)
  185. Local $tags= @extended
  186. $tags = $oIE.document.GetElementsByTagName("div")
  187. Local $text_s=""
  188. Local $tmp=""
  189. For $tag in $tags
  190. ; $class_value = $tag.GetAttribute("class")
  191. $class_value = $tag.id
  192. If $class_value = "zwmbtilr" Then
  193. $tmp=$tag.innertext
  194. $text_s=$tmp&@CRLF&$text_s&@CRLF
  195. EndIf
  196. Next
  197. $TXTDATA=StringStripWS(HTML_Filters($text_s,0),8)
  198. FileWriteLine(@scriptdir&"\data.txt",$TXTDATA&"--热度 评论数 /////")
  199. LOCAL $TXTDATA=""
  200. ;=======================tim==================
  201. ;SLEEP(300)
  202. Local $tags= @extended
  203. $tags = $oIE.document.GetElementsByTagName("DIV")
  204. Local $text_s=""
  205. Local $tmp=""
  206. For $tag in $tags
  207. ; $class_value = $tag.GetAttribute("class")
  208. $class_value = $tag.className
  209. If $class_value = "zwfbtime" Then
  210. $tmp=$tag.innertext
  211. $text_s=$tmp&@CRLF&$text_s&@CRLF
  212. EndIf
  213. Next
  214. $TXTDATA=StringStripWS(HTML_Filters($text_s,0),8)
  215. FileWriteLine(@scriptdir&"\data.txt",$TXTDATA&"--发表时间/////")
  216. LOCAL $TXTDATA=""
  217. ;=================================================
  218. ;SLEEP(300)
  219. Local $tags= @extended
  220. $tags = $oIE.document.GetElementsByTagName("SPAN")
  221. Local $text_s=""
  222. Local $tmp=""
  223. For $tag in $tags
  224. ; $class_value = $tag.GetAttribute("class")
  225. $class_value = $tag.className
  226. If $class_value = "tc1" Then
  227. $tmp=$tag.innertext
  228. $text_s=$tmp&@CRLF&$text_s&@CRLF
  229. EndIf
  230. Next
  231. $TXTDATA=StringStripWS(HTML_Filters($text_s,0),8)
  232. FileWriteLine(@scriptdir&"\data.txt",$TXTDATA&"--阅读数/////")
  233. LOCAL $TXTDATA=""
  234. ;========================tt=========================
  235. ;SLEEP(300)
  236. Local $tags= @extended
  237. $tags = $oIE.document.GetElementsByTagName("DIV")
  238. Local $text_s=""
  239. Local $tmp=""
  240. For $tag in $tags
  241. ; $class_value = $tag.GetAttribute("class")
  242. $class_value = $tag.className
  243. If $class_value = "zwconttbt" Then
  244. $tmp=$tag.innertext
  245. $text_s=$tmp&@CRLF&$text_s&@CRLF
  246. EndIf
  247. Next
  248. $TXTDATA=StringStripWS(HTML_Filters($text_s,0),8)
  249. FileWriteLine(@scriptdir&"\data.txt",$TXTDATA&"--正文及标题/////")
  250. LOCAL $TXTDATA=""
  251. ;====================bdy=============================
  252. ;SLEEP(300)
  253. Local $tags= @extended
  254. $tags = $oIE.document.GetElementsByTagName("DIV")
  255. Local $text_s=""
  256. Local $tmp=""
  257. For $tag in $tags
  258. ; $class_value = $tag.GetAttribute("class")
  259. $class_value = $tag.className
  260. If $class_value = "zwcontentmain" Then
  261. $tmp=$tag.innertext
  262. $text_s=$tmp&@CRLF&$text_s&@CRLF
  263. EndIf
  264. Next
  265. $TXTDATA=StringStripWS(HTML_Filters($text_s,0),8)
  266. FileWriteLine(@scriptdir&"\data.txt",$TXTDATA&"--正文/////")
  267. LOCAL $TXTDATA=""
  268. Local $oLinks = @extended
  269. $oLinks = _IELinkGetCollection($oIE)
  270. ;Sleep(100)
  271. SetError(0)
  272. Local $curfilename3 =@scriptdir&"\pagereconize.tmp"
  273. FileDelete($curfilename3)
  274. SetError(0)
  275. Local $iNumLinks = @extended
  276. Local $succed = _FileCreate($curfilename3)
  277. SetError(0)
  278. If $succed = 1 Then
  279. FileOpen($curfilename3, $FO_APPEND)
  280. For $oLink In $oLinks
  281. $sTxt = $oLink.href & @CRLF
  282. If StringInStr($stxt, "html#storeply") =1 And StringInStr($stxt,"news,"&$stock_id)=1 Then
  283. FileWrite($curfilename3, $stxt)
  284. EndIf ; match url
  285. Next
  286. EndIf ;succed==1保存结果
  287. FileClose($curfilename3)
  288. SetError(0)
  289. $reply_size=_filecountlines($curfilename3)
  290. ;MsgBox($MB_SYSTEMMODAL, "", $reply_size)
  291. $curfront=stringtrimright($line_url,5)
  292. for $rpt=1 to $reply_size
  293. ;==========================================
  294. $nReply=$curfront&"_"&$rpt&".html#storeply"
  295. ConsoleWrite($nReply&@CRLF)
  296. ;_IENavigate($oie,$nReply)
  297. ierw($nreply)
  298. sleep(500)
  299. $curpid=2 ;set process to 2
  300. ;==============reconize=================
  301. ;=======================rpys==========================start
  302. ;SLEEP(300)
  303. Local $tags= @extended
  304. $tags = $oIE.document.GetElementsByTagName("DIV")
  305. Local $text_s=""
  306. Local $tmp=""
  307. For $tag in $tags
  308. ; $class_value = $tag.GetAttribute("class")
  309. $class_value = $tag.className
  310. If $class_value = "zwlitext stockcodec" Then
  311. $tmp=$tag.innertext
  312. $text_s=$tmp&@CRLF&$text_s&@CRLF
  313. EndIf
  314. Next
  315. $TXTDATA=StringStripWS(HTML_Filters($text_s,0),8)
  316. FileWriteLine(@scriptdir&"\data.txt",$TXTDATA&"--回复列出/////"&@CRLF&"-----"&@CRLF)
  317. LOCAL $TXTDATA=""
  318. next
  319. ;endCollectt rply
  320. $data="";restore data
  321. ;===============================================
  322. $curpid=0
  323. endfunc