爬取网址ansi.au3 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. #EndRegion ;**** 参数创建于 ACNWrapper_GUI ****
  2. #include-once
  3. #include <misc.au3>
  4. ;#include<makefile.au3>
  5. #include <ButtonConstants.au3>
  6. #include <ComboConstants.au3>
  7. #include <EditConstants.au3>
  8. #include <GUIConstantsEx.au3>
  9. #include <GUIListBox.au3>
  10. #include <StaticConstants.au3>
  11. #include <WindowsConstants.au3>
  12. #include <IE.au3>
  13. #include <MsgBoxConstants.au3>
  14. #include <file.au3>
  15. #Region ### START Koda GUI section ### Form=d:\autoit3\工程\采集界面.kxf
  16. $Form1_1 = GUICreate("网址库建立", 623, 107, 201, 278)
  17. $Label2 = GUICtrlCreateLabel("要采集几个月的数据", 64, 20, 112, 17)
  18. $Input3 = GUICtrlCreateInput("0.1", 184, 16, 133, 21)
  19. $Label3 = GUICtrlCreateLabel("股票编号", 128, 48, 52, 17)
  20. $Input4 = GUICtrlCreateInput("601138", 184, 44, 133, 21)
  21. $Button2 = GUICtrlCreateButton("采来源地址", 332, 16, 105, 49)
  22. $input1 = GUICtrlCreateInput("当前消息", 16, 80, 597, 21)
  23. GUISetState(@SW_SHOW)
  24. #EndRegion ### END Koda GUI section ###
  25. Global $RGtring = "/(\w+):\/\/([^/:]+)(:\d*)?([^# ]*)/"
  26. Global $tmpfile = @ScriptDir & "\tmp.tmp"
  27. Global $aera = "http://guba.eastmoney.com"
  28. $sSnipet = ""
  29. Global $oIE = _IECreate("http://guba.eastmoney.com")
  30. $oIE.visible = False
  31. $globaldb="dbc.data /link"
  32. Global $sstring = ""
  33. ;用于接受单个页面地址
  34. While 1
  35. $nMsg = GUIGetMsg()
  36. Switch $nMsg
  37. Case $GUI_EVENT_CLOSE
  38. Exit
  39. Case $Button2
  40. $start_time = GUICtrlRead($Input3) * 50
  41. $stock_id = GUICtrlRead($Input4)
  42. Global $filepath_t = @ScriptDir & "\" & $stock_id
  43. $succ = _FileCreate($filepath_t & "\urllist.txt")
  44. Global $uulist = "news," & $stock_id
  45. Global $starturl = "http://guba.eastmoney.com/list,"
  46. ConsoleWrite($succ)
  47. If $succ = 1 Then
  48. Global $file1 = $filepath_t & "\urllist.txt"
  49. Global $file_hw = FileOpen($file1)
  50. ;load supply service
  51. For $u = 1 To $start_time
  52. $list_url = $starturl & $stock_id & "_" & $u & ".html"
  53. FileWriteLine($file_hw, $list_url)
  54. ConsoleWrite($list_url)
  55. FileClose($file_hw)
  56. getlist($list_url)
  57. Next
  58. MsgBox($MB_SYSTEMMODAL, "网址收集完成", "即将完成,进行优化处理.", 1)
  59. FileClose($succ)
  60. checkfile()
  61. FileClose($tmpfile)
  62. FileDelete($tmpfile)
  63. ;Local $iReturn = ShellExecuteWait("notepad.exe")
  64. EndIf
  65. EndSwitch
  66. WEnd
  67. $oIE.visible=True
  68. _IEQuit($oIE)
  69. $oIE = 0
  70. Func getlist($list_url)
  71. Local $oIE = 0
  72. Local $down = $list_url
  73. If $down = "" Then MsgBox(16, "错误", "没有输入完整!")
  74. If @error Then
  75. GUICtrlSetData($input1, "错误,股票编号是否正确")
  76. Else
  77. ConsoleWrite($list_url)
  78. Global $tmpfile = @ScriptDir & "\tmp.tmp"
  79. Global $tag = @CRLF
  80. Local $pathdown = InetGet($list_url, $tmpfile)
  81. $sstring = _data(FileRead($tmpfile))
  82. EndIf
  83. Return $sstring
  84. ConsoleWrite($sstring)
  85. EndFunc ;==>getlist
  86. Func _data($sSnipet)
  87. Local $head_rpl = "about:"
  88. Global $_IEBDY_ = "READY"
  89. _IEBodyWriteHTML($oIE, $sSnipet)
  90. $oLinks = _IELinkGetCollection($oIE)
  91. $iNumLinks = @extended
  92. Local $arr_txt[0]
  93. $file_hw = FileOpen($file1, 1)
  94. For $oLink In $oLinks
  95. $nstxt = $olink.href & $tag
  96. GUICtrlSetData($input1, $nstxt)
  97. If StringInStr($nstxt, $uulist, 0, 1, 1) And StringInStr($nstxt, $head_rpl) Then
  98. ConsoleWrite($nstxt)
  99. $nstxt = StringReplace($nstxt, $head_rpl, $aera)
  100. FileWriteLine($file_hw, $nstxt)
  101. EndIf
  102. Next
  103. ;_FileWriteFromArray($file1,$arr_txt)
  104. FileClose($file_hw)
  105. EndFunc ;==>_data
  106. Func checkfile()
  107. If FileCopy(@ScriptDir & "\include\merge.bat", $filepath_t & "\MERGE.bat") Then
  108. ShellExecuteWait($filepath_t & "\MERGE.bat")
  109. Else
  110. FileCopy(@ScriptDir & "\merge.bak", $filepath_t & "\MERGE.bat")
  111. RunWait($filepath_t & "\MERGE.bat")
  112. EndIf
  113. MsgBox($MB_SYSTEMMODAL, "删除了重复的行", "删除了重复的行,数据保存在编号文件夹下DBC.data,完成.", 2)
  114. MsgBox($MB_SYSTEMMODAL, "删除了重复的行", "可以进入目录,执行开始爬取数据", 1)
  115. FileDelete($filepath_t & "\MERGE.bat")
  116. Sleep(1000)
  117. ShellExecute($filepath_t)
  118. EndFunc ;==>checkfile