爬取网址8btc.au3 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. #Region ;**** 编译指令由 by AutoIt3Wrapper_GUI 创建 ****
  2. #AutoIt3Wrapper_Outfile=提取网址 x86.exe
  3. #Au3Stripper_Parameters=/cs /1
  4. #EndRegion ;**** 编译指令由 by AutoIt3Wrapper_GUI 创建 ****
  5. #Region ;**** 编译指令由 by AutoIt3Wrapper_GUI 创建 ****
  6. #EndRegion ;**** 编译指令由 by AutoIt3Wrapper_GUI 创建 ****
  7. #Region ;**** 参数创建于 ACNWrapper_GUI ****
  8. #PRE_Res_requestedExecutionLevel=None
  9. #PRE_Run_Obfuscator=y
  10. #Obfuscator_Parameters=/cs /1
  11. #EndRegion ;**** 参数创建于 ACNWrapper_GUI ****
  12. #include-once
  13. #include <misc.au3>
  14. ;#include<makefile.au3>
  15. #include <ButtonConstants.au3>
  16. #include <ComboConstants.au3>
  17. #include <EditConstants.au3>
  18. #include <GUIConstantsEx.au3>
  19. #include <GUIListBox.au3>
  20. #include <StaticConstants.au3>
  21. #include <WindowsConstants.au3>
  22. #include <IE.au3>
  23. #include <MsgBoxConstants.au3>
  24. #include <file.au3>
  25. #Region ### START Koda GUI section ### Form=d:\autoit3\工程\采集界面.kxf
  26. $Form1_1 = GUICreate("网址库建立", 623, 107, 201, 278)
  27. $Label2 = GUICtrlCreateLabel("要采集几个月的数据", 64, 20, 112, 17)
  28. $Input3 = GUICtrlCreateInput("0.1", 184, 16, 133, 21)
  29. $Label3 = GUICtrlCreateLabel("股票编号", 128, 48, 52, 17)
  30. $Input4 = GUICtrlCreateInput("601138", 184, 44, 133, 21)
  31. $Button2 = GUICtrlCreateButton("采来源地址", 332, 16, 105, 49)
  32. $input1 = GUICtrlCreateInput("当前消息", 16, 80, 597, 21)
  33. GUISetState(@SW_SHOW)
  34. #EndRegion ### END Koda GUI section ###
  35. Global $RGtring = "/(\w+):\/\/([^/:]+)(:\d*)?([^# ]*)/"
  36. Global $tmpfile = @ScriptDir & "\tmp.tmp"
  37. Global $aera = ""
  38. $sSnipet = ""
  39. Global $oIE = _IECreate("http://8btc.com/")
  40. $oIE.visible = False
  41. $globaldb="dbc.data /link"
  42. Global $sstring = ""
  43. ;用于接受单个页面地址
  44. While 1
  45. $nMsg = GUIGetMsg()
  46. Switch $nMsg
  47. Case $GUI_EVENT_CLOSE
  48. Exit
  49. Case $Button2
  50. ;AutoIt_Debugger_Command:Enable_Debug
  51. $start_time = GUICtrlRead($Input3) * 50
  52. $stock_id = GUICtrlRead($Input4)
  53. Global $filepath_t = @ScriptDir & "\" & $stock_id
  54. $succ = _FileCreate($filepath_t & "\urllist.txt")
  55. Global $uulist = "com/thread-" & $stock_id
  56. Global $starturl = "http://8btc.com/forum-2"
  57. ConsoleWrite($succ)
  58. If $succ = 1 Then
  59. Global $file1 = $filepath_t & "\urllist.txt"
  60. Global $file_hw = FileOpen($file1)
  61. ;load supply service
  62. For $u = 1 To $start_time
  63. $list_url = $starturl & $stock_id & "-" & $u & ".html"
  64. FileWriteLine($file_hw, $list_url)
  65. ConsoleWrite($list_url)
  66. FileClose($file_hw)
  67. getlist($list_url)
  68. Next
  69. MsgBox($MB_SYSTEMMODAL, "网址收集完成", "即将完成,进行优化处理.", 1)
  70. FileClose($succ)
  71. checkfile()
  72. FileClose($tmpfile)
  73. FileDelete($tmpfile)
  74. ;Local $iReturn = ShellExecuteWait("notepad.exe")
  75. EndIf
  76. EndSwitch
  77. WEnd
  78. $oIE.visible=True
  79. _IEQuit($oIE)
  80. $oIE = 0
  81. Func getlist($list_url)
  82. Local $oIE = 0
  83. Local $down = $list_url
  84. If $down = "" Then MsgBox(16, "错误", "没有输入完整!")
  85. MsgBox(262144,'Debug line ~' & @ScriptLineNumber,'Selection:' & @lf & '' & @lf & @lf & 'Return:' & @lf & ) ;### Debug MSGBOX
  86. If @error Then
  87. GUICtrlSetData($input1, "错误,股票编号是否正确")
  88. Else
  89. ConsoleWrite($list_url)
  90. Global $tmpfile = @ScriptDir & "\tmp.tmp"
  91. Global $tag = @CRLF
  92. Local $pathdown = InetGet($list_url, $tmpfile)
  93. $sstring = _data(FileRead($tmpfile))
  94. EndIf
  95. Return $sstring
  96. ConsoleWrite($sstring)
  97. EndFunc ;==>getlist
  98. func m($filepath_t)
  99. local $src=$filepath_t
  100. local $hhw
  101. local $arr[0]
  102. _FileReadToArray($src,$arr)
  103. ConsoleWrite($arr)
  104. $scc=_FileCreate($filepath_t)
  105. $hhw=FileOpen($filepath_t,128)
  106. ConsoleWrite($scc)
  107. _ArrayDelete($arr,0)
  108. _ArrayDelete($arr,1)
  109. _FileWriteFromArray($filepath_t,$arr)
  110. FileClose($hhw)
  111. EndFunc
  112. Func _data($sSnipet)
  113. Local $head_rpl = "about:"
  114. Global $_IEBDY_ = "READY"
  115. _IEBodyWriteHTML($oIE, $sSnipet)
  116. $oLinks = _IELinkGetCollection($oIE)
  117. $iNumLinks = @extended
  118. Local $arr_txt[0]
  119. $file_hw = FileOpen($file1, 1)
  120. For $oLink In $oLinks
  121. $nstxt = $olink.href & $tag
  122. GUICtrlSetData($input1, $nstxt)
  123. If StringInStr($nstxt, $uulist, 0, 1, 1) And StringInStr($nstxt, $head_rpl) Then
  124. ConsoleWrite($nstxt)
  125. $nstxt = StringReplace($nstxt, $head_rpl, $aera)
  126. FileWriteLine($file_hw, $nstxt)
  127. EndIf
  128. Next
  129. ;_FileWriteFromArray($file1,$arr_txt)
  130. FileClose($file_hw)
  131. EndFunc ;==>_data
  132. Func checkfile()
  133. If FileCopy(@ScriptDir & "\include\merge.bat", $filepath_t & "\MERGE.bat") Then
  134. ShellExecuteWait($filepath_t & "\MERGE.bat")
  135. Else
  136. FileCopy(@ScriptDir & "\merge.bak", $filepath_t & "\MERGE.bat")
  137. RunWait($filepath_t & "\MERGE.bat")
  138. EndIf
  139. MsgBox($MB_SYSTEMMODAL, "删除了重复的行", "删除了重复的行,数据保存在编号文件夹下DBC.data,完成.", 2)
  140. MsgBox($MB_SYSTEMMODAL, "删除了重复的行", "可以进入目录,执行开始爬取数据", 1)
  141. FileDelete($filepath_t & "\MERGE.bat")
  142. Sleep(1000)
  143. ShellExecute($filepath_t)
  144. EndFunc ;==>checkfile