123456789101112131415161718192021222324 |
- # Define here the models for your spider middleware
- #
- # See documentation in:
- # https://docs.scrapy.org/en/latest/topics/spider-middleware.html
- from scrapy import signals
- from selenium import webdriver
- # useful for handling different item types with a single interface
- from itemadapter import is_item, ItemAdapter
- from scrapy import signals
- from scrapy.exceptions import IgnoreRequest
- class SeleniumDownloaderMiddleware:
- def _init_(self):
- # 创建driver
- chrome_path = r'E:\ProgramData\anaconda3\chromedriver.exe'
- self.driver = webdriver.Chrome(executable_path=chrome_path)
- pass
- def process_request(self, request, spider):
- # 通过driver访问第一个链接
- self.driver.get(request.url)
- print(request.url, "中间件")
|