middlewares.py 787 B

123456789101112131415161718192021222324
  1. # Define here the models for your spider middleware
  2. #
  3. # See documentation in:
  4. # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
  5. from scrapy import signals
  6. from selenium import webdriver
  7. # useful for handling different item types with a single interface
  8. from itemadapter import is_item, ItemAdapter
  9. from scrapy import signals
  10. from scrapy.exceptions import IgnoreRequest
  11. class SeleniumDownloaderMiddleware:
  12. def _init_(self):
  13. # 创建driver
  14. chrome_path = r'E:\ProgramData\anaconda3\chromedriver.exe'
  15. self.driver = webdriver.Chrome(executable_path=chrome_path)
  16. pass
  17. def process_request(self, request, spider):
  18. # 通过driver访问第一个链接
  19. self.driver.get(request.url)
  20. print(request.url, "中间件")