爬取网址.au3 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. #EndRegion ;**** 参数创建于 ACNWrapper_GUI ****
  2. #include-once
  3. #include <misc.au3>
  4. ;#include<makefile.au3>
  5. #include <ButtonConstants.au3>
  6. #include <ComboConstants.au3>
  7. #include <EditConstants.au3>
  8. #include <GUIConstantsEx.au3>
  9. #include <GUIListBox.au3>
  10. #include <StaticConstants.au3>
  11. #include <WindowsConstants.au3>
  12. #include <IE.au3>
  13. #include <MsgBoxConstants.au3>
  14. #include <file.au3>
  15. #Region ### START Koda GUI section ### Form=d:\autoit3\工程\采集界面.kxf
  16. $Form1_1 = GUICreate("网址库建立", 623, 107, 201, 278)
  17. $Label2 = GUICtrlCreateLabel("要采集几个月的数据", 64, 20, 112, 17)
  18. $Input3 = GUICtrlCreateInput("0.1", 184, 16, 133, 21)
  19. $Label3 = GUICtrlCreateLabel("股票编号", 128, 48, 52, 17)
  20. $Input4 = GUICtrlCreateInput("601138", 184, 44, 133, 21)
  21. $Button2 = GUICtrlCreateButton("采来源地址", 332, 16, 105, 49)
  22. $input1 = GUICtrlCreateInput("当前消息", 16, 80, 597, 21)
  23. GUISetState(@SW_SHOW)
  24. #EndRegion ### END Koda GUI section ###
  25. Global $tmpfile = @ScriptDir & "\tmp.tmp"
  26. Global $aera = "http://guba.eastmoney.com"
  27. $sSnipet = ""
  28. Global $oIE = _IECreate("http://guba.eastmoney.com")
  29. ;$oIE.visible = False
  30. Global $sstring = ""
  31. ;用于接受单个页面地址
  32. While 1
  33. $nMsg = GUIGetMsg()
  34. Switch $nMsg
  35. Case $GUI_EVENT_CLOSE
  36. Exit
  37. Case $Button2
  38. $start_time = GUICtrlRead($Input3) * 50
  39. $stock_id = GUICtrlRead($Input4)
  40. Global $filepath_t = @ScriptDir & "\" & $stock_id
  41. Global $file1 = $filepath_t & "\urllist.txt"
  42. $succ = _FileCreate($file1)
  43. Global $uulist = "news," & $stock_id
  44. Global $starturl = "http://guba.eastmoney.com/list,"
  45. ConsoleWrite($succ)
  46. If $succ = 1 Then
  47. ;load supply service
  48. For $u = 1 To $start_time
  49. Global $file_hw = FileOpen($file1)
  50. $list_url = $starturl & $stock_id & "_" & $u & ".html"
  51. ;FileWriteLine($file_hw, $list_url)
  52. ConsoleWrite($list_url&@CRLF)
  53. FileClose($file_hw)
  54. getlist($list_url)
  55. Next
  56. FileClose($file_hw)
  57. MsgBox($MB_SYSTEMMODAL, "网址收集完成", "即将完成,进行优化处理.", 1)
  58. FileClose($succ)
  59. checkfile()
  60. FileClose($tmpfile)
  61. ;FileDelete($tmpfile)
  62. ;Local $iReturn = ShellExecuteWait("notepad.exe")
  63. EndIf
  64. EndSwitch
  65. WEnd
  66. $oIE.visible=True
  67. _IEQuit($oIE)
  68. $oIE = 0
  69. Func getlist($list_url)
  70. Local $oIE = 0
  71. Local $down = $list_url
  72. If $down = "" Then MsgBox(16, "错误", "没有输入完整!")
  73. If @error Then
  74. GUICtrlSetData($input1, "错误,股票编号是否正确")
  75. Else
  76. ConsoleWrite($list_url)
  77. Global $tmpfile = @ScriptDir & "\tmp.tmp"
  78. Global $tag = @CRLF
  79. Local $pathdown = InetGet($list_url, $tmpfile)
  80. $sstring = _data(FileRead($tmpfile))
  81. EndIf
  82. Return $sstring
  83. ConsoleWrite($sstring)
  84. EndFunc ;==>getlist
  85. Func _data($sSnipet)
  86. Local $head_rpl = "about:"
  87. Global $_IEBDY_ = "READY"
  88. _IEBodyWriteHTML($oIE, $sSnipet)
  89. $oLinks=@extended
  90. $oLinks = _IELinkGetCollection($oIE)
  91. $iNumLinks = @extended
  92. Local $arr_txt[0]
  93. $file_hw = FileOpen($file1, 1)
  94. For $oLink In $oLinks
  95. $nstxt = $olink.href & $tag
  96. ConsoleWrite(FileWriteLine($file_hw, $list_url));临时测试
  97. If StringInStr($nstxt, $uulist, 0, 1, 1) Or StringInStr($nstxt, $head_rpl) Then
  98. GUICtrlSetData($input1, $nstxt)
  99. ConsoleWrite($nstxt)
  100. ;$nstxt = StringReplace($nstxt, $head_rpl, $aera)
  101. FileWriteLine($file_hw, $nstxt)
  102. EndIf
  103. Next
  104. ;_FileWriteFromArray($file1,$arr_txt)
  105. FileClose($file_hw)
  106. EndFunc ;==>_data
  107. Func checkfile()
  108. sleep (500)
  109. If FileCopy(@ScriptDir & "\include\merge.bat", $filepath_t & "\MERGE.bat") Then
  110. ShellExecuteWait($filepath_t & "\MERGE.bat")
  111. Else
  112. FileCopy(@ScriptDir & "\merge.bak", $filepath_t & "\MERGE.bat")
  113. RunWait($filepath_t & "\MERGE.bat")
  114. EndIf
  115. MsgBox($MB_SYSTEMMODAL, "删除了重复的行", "删除了重复的行,数据保存在编号文件夹下DBC.data,完成.", 2)
  116. MsgBox($MB_SYSTEMMODAL, "删除了重复的行", "可以进入目录,执行开始爬取数据", 1)
  117. ;FileDelete($filepath_t & "\MERGE.bat")
  118. Sleep(1000)
  119. ShellExecute($filepath_t)
  120. EndFunc ;==>checkfile