|
@@ -1,5 +1,6 @@
|
|
|
import io
|
|
|
import os.path
|
|
|
+import time
|
|
|
|
|
|
import scrapy
|
|
|
from PIL import Image
|
|
@@ -9,6 +10,8 @@ from selenium.webdriver.common.by import By
|
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
|
from selenium.webdriver.support.wait import WebDriverWait
|
|
|
|
|
|
+from nmc.compress_image import capture_element_screenshot
|
|
|
+
|
|
|
|
|
|
class NmcspiderSpider(scrapy.Spider):
|
|
|
name = "nmcspider"
|
|
@@ -24,8 +27,9 @@ class NmcspiderSpider(scrapy.Spider):
|
|
|
# 添加路径到chrome_options
|
|
|
chrome_options.add_argument('--webdriver-executable-path=' + chrome_path)
|
|
|
# 如果需要添加其他选项,例如禁用浏览器窗口,可以这样:
|
|
|
- # chrome_options.add_argument('--headless')
|
|
|
- # chrome_options.add_argument('--disable-gpu')
|
|
|
+ chrome_options.add_argument('--headless')
|
|
|
+ chrome_options.add_argument('--disable-gpu')
|
|
|
+ chrome_options.add_argument('--no-sandbox')
|
|
|
self.driver = webdriver.Chrome(options=chrome_options)
|
|
|
|
|
|
def closed(self, reason):
|
|
@@ -59,25 +63,46 @@ class NmcspiderSpider(scrapy.Spider):
|
|
|
city = response.meta['city']
|
|
|
self.driver.get(response.url)
|
|
|
self.driver.set_window_size(1920, 1080)
|
|
|
- title = response.xpath('//*[@id="realChart"]/div[1]/span[1]/text()').get()
|
|
|
# 创建文件目录
|
|
|
path_to_save = os.path.join(province, city)
|
|
|
full_path = os.path.join(".", path_to_save)
|
|
|
if not os.path.exists(full_path):
|
|
|
os.makedirs(full_path)
|
|
|
try:
|
|
|
- elemen = self.driver.find_element(By.ID, 'forecastChart')
|
|
|
- self.driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", elemen)
|
|
|
- element = WebDriverWait(self.driver, 20).until(
|
|
|
- EC.presence_of_element_located((By.ID, 'forecastChart')))
|
|
|
- location = elemen.location
|
|
|
- size = elemen.size
|
|
|
+ # 获取第一张图片-天气预报
|
|
|
+ # element = WebDriverWait(self.driver, 20).until(
|
|
|
+ # EC.presence_of_element_located((By.XPATH, '/html/body/div[2]/div')))
|
|
|
+ # self.driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", element)
|
|
|
+ # location = element.location
|
|
|
+ # size = element.size
|
|
|
+ # # 获取天气预报图
|
|
|
+ # # 使用PIL和numpy处理图像
|
|
|
+ # screenshot = self.driver.get_screenshot_as_png()
|
|
|
+ # image = Image.open(io.BytesIO(screenshot))
|
|
|
+ # crop_area = (location['x'], location['y'], location['x'] + size['width'], location['y'] + size['height'])
|
|
|
+ # cropped_image = image.crop(crop_area)
|
|
|
+ # # 保存裁剪后的图片
|
|
|
+ # image_path = os.path.join(full_path, city + '天气预报图.png')
|
|
|
+ # cropped_image.save(image_path)
|
|
|
+ # 获取第二张图片
|
|
|
+ highcharts = WebDriverWait(self.driver, 20).until(
|
|
|
+ EC.presence_of_element_located((By.ID, 'footer')))
|
|
|
+ self.driver.execute_script("arguments[0].scrollIntoView({block: 'center', behavior: 'instant'});",
|
|
|
+ highcharts)
|
|
|
+ # self.driver.execute_script(
|
|
|
+ # """var elem = document.getElementById('tempchart');
|
|
|
+ # elem.scrollIntoView({behavior: "instant", block: "center"});""")
|
|
|
+
|
|
|
+ time.sleep(5)
|
|
|
+ location = highcharts.location
|
|
|
+ size = highcharts.size
|
|
|
+ print(location, size, "================")
|
|
|
# 获取天气预报图
|
|
|
# 使用PIL和numpy处理图像
|
|
|
screenshot = self.driver.get_screenshot_as_png()
|
|
|
image = Image.open(io.BytesIO(screenshot))
|
|
|
- crop_area = (location['x'], location['y'], location['x'] + size['width'], location['y'] + size['height'])
|
|
|
- print(crop_area, "=========================")
|
|
|
+ crop_area = (
|
|
|
+ location['x'], location['y'], location['x'] + size['width'], location['y'] + size['height'])
|
|
|
cropped_image = image.crop(crop_area)
|
|
|
# 保存裁剪后的图片
|
|
|
image_path = os.path.join(full_path, city + '天气预报图.png')
|