|
@@ -2,10 +2,12 @@ import io
|
|
|
import os.path
|
|
|
import time
|
|
|
from datetime import datetime
|
|
|
+from urllib.parse import urlparse
|
|
|
|
|
|
import scrapy
|
|
|
from PIL import Image
|
|
|
from selenium import webdriver
|
|
|
+from selenium.webdriver import ActionChains
|
|
|
from selenium.webdriver.chrome.options import Options
|
|
|
from selenium.webdriver.common.by import By
|
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
@@ -37,7 +39,7 @@ class NmcspiderSpider(scrapy.Spider):
|
|
|
chrome_options.add_argument('--no-sandbox')
|
|
|
self.driver = webdriver.Chrome(options=chrome_options)
|
|
|
|
|
|
- def closed(self, reason):
|
|
|
+ def close(self, reason):
|
|
|
self.driver.quit()
|
|
|
|
|
|
def parse_provinces(self, response):
|
|
@@ -76,33 +78,34 @@ class NmcspiderSpider(scrapy.Spider):
|
|
|
os.makedirs(full_path)
|
|
|
try:
|
|
|
# 获取第一张图片-天气预报
|
|
|
- # element = WebDriverWait(self.driver, 20).until(
|
|
|
- # EC.presence_of_element_located((By.XPATH, '/html/body/div[2]/div')))
|
|
|
- # self.driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", element)
|
|
|
- # location = element.location
|
|
|
- # size = element.size
|
|
|
- # # 获取天气预报图
|
|
|
- # # 使用PIL和numpy处理图像
|
|
|
- # screenshot = self.driver.get_screenshot_as_png()
|
|
|
- # image = Image.open(io.BytesIO(screenshot))
|
|
|
- # crop_area = (location['x'], location['y'], location['x'] + size['width'], location['y'] + size['height'])
|
|
|
- # cropped_image = image.crop(crop_area)
|
|
|
- # # 保存裁剪后的图片
|
|
|
- # image_path = os.path.join(full_path, city + '天气预报图.png')
|
|
|
- # cropped_image.save(image_path)
|
|
|
+ element = WebDriverWait(self.driver, 20).until(
|
|
|
+ EC.presence_of_element_located((By.XPATH, '/html/body/div[2]/div')))
|
|
|
+ self.driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", element)
|
|
|
+ location = element.location
|
|
|
+ size = element.size
|
|
|
+ # 获取天气预报图
|
|
|
+ # 使用PIL和numpy处理图像
|
|
|
+ screenshot = self.driver.get_screenshot_as_png()
|
|
|
+ image = Image.open(io.BytesIO(screenshot))
|
|
|
+ crop_area = (location['x'], location['y'], location['x'] + size['width'], location['y'] + size['height'])
|
|
|
+ cropped_image = image.crop(crop_area)
|
|
|
+ # 保存裁剪后的图片
|
|
|
+ url = urlparse(response.url)
|
|
|
+ parts = url.path.split('/')
|
|
|
+ target_part = '_'.join([parts[3], parts[4].split('.')[0]])
|
|
|
+ name = target_part + "_1.png"
|
|
|
+ image_path = os.path.join(full_path, name)
|
|
|
+ cropped_image.save(image_path)
|
|
|
# 获取第二张图片
|
|
|
highcharts = WebDriverWait(self.driver, 20).until(
|
|
|
EC.presence_of_element_located((By.ID, 'realChart')))
|
|
|
self.driver.execute_script("arguments[0].scrollIntoView({block: 'center', behavior: 'instant'});",
|
|
|
highcharts)
|
|
|
- elem = WebDriverWait(self.driver, 20).until(
|
|
|
- EC.presence_of_element_located((By.ID, 'realChart')))
|
|
|
time.sleep(2)
|
|
|
# 等待js全部加载完成
|
|
|
js_is_complete(self.driver)
|
|
|
WebDriverWait(self.driver, 20).until(js_is_complete)
|
|
|
# 获取屏幕截图
|
|
|
- # self.driver.save_screenshot('screenshot.png')
|
|
|
screenshot = self.driver.get_screenshot_as_png()
|
|
|
script = """
|
|
|
var element = document.getElementById('realChart');
|
|
@@ -110,25 +113,57 @@ class NmcspiderSpider(scrapy.Spider):
|
|
|
return {left: rect.left, top: rect.top, width: rect.width, height: rect.height};
|
|
|
"""
|
|
|
position_info = self.driver.execute_script(script)
|
|
|
- print(position_info)
|
|
|
# 获取天气预报图
|
|
|
# 使用PIL和numpy处理图像
|
|
|
image = Image.open(io.BytesIO(screenshot))
|
|
|
- # location = elem.location
|
|
|
- # size = elem.size
|
|
|
- # print(f"Element Location: {location}, Size: {size}")
|
|
|
- #
|
|
|
- # crop_area = (
|
|
|
- # location['x'], location['y'], location['x'] + size['width'],
|
|
|
- # location['y'] + size['height'])
|
|
|
crop_area = (
|
|
|
position_info['left'], position_info['top'],
|
|
|
position_info['left'] + position_info['width'],
|
|
|
position_info['top'] + position_info['height'])
|
|
|
cropped_image = image.crop(crop_area)
|
|
|
# # 保存裁剪后的图片
|
|
|
- image_path = os.path.join(full_path, city + '天气预报图.png')
|
|
|
- # image.save(image_path)
|
|
|
+ name = target_part + "_2.png"
|
|
|
+ image_path = os.path.join(full_path, name)
|
|
|
+ cropped_image.save(image_path)
|
|
|
+ # 第三张图片
|
|
|
+ # 等待js全部加载完成
|
|
|
+ js_is_complete(self.driver)
|
|
|
+ WebDriverWait(self.driver, 20).until(js_is_complete)
|
|
|
+ highcharts = WebDriverWait(self.driver, 20).until(
|
|
|
+ EC.presence_of_element_located((By.CSS_SELECTOR, '#container')))
|
|
|
+ self.driver.execute_script("arguments[0].scrollIntoView({block: 'center', behavior: 'instant'});",
|
|
|
+ highcharts)
|
|
|
+ # 找到点击的按钮
|
|
|
+ element = self.driver.find_element(By.CSS_SELECTOR,
|
|
|
+ '#container> .highcharts-container >svg > g > g > g >g.highcharts-legend-item.highcharts-column-series.highcharts-color-undefined.highcharts-series-0')
|
|
|
+ self.driver.execute_script("arguments[0].dispatchEvent(new Event('click'));",
|
|
|
+ element)
|
|
|
+ # target_element = WebDriverWait(self.driver, 20).until(
|
|
|
+ # EC.visibility_of_element_located((By.CSS_SELECTOR,
|
|
|
+ # "#highcharts-v9zgpzc-36 > svg > g.highcharts-legend > g > g > g.highcharts-legend-item.highcharts-column-series.highcharts-color-undefined.highcharts-series-0 > text")))
|
|
|
+ # target_element.click()
|
|
|
+ # 等待点击之后页面加载完成
|
|
|
+ WebDriverWait(self.driver, 20).until(EC.visibility_of_element_located((By.ID, 'container')))
|
|
|
+ time.sleep(2)
|
|
|
+ # 获取屏幕截图
|
|
|
+ screenshot = self.driver.get_screenshot_as_png()
|
|
|
+ scripts = """
|
|
|
+ var element = document.getElementById('container');
|
|
|
+ var rect = element.getBoundingClientRect();
|
|
|
+ return {left: rect.left, top: rect.top, width: rect.width, height: rect.height};
|
|
|
+ """
|
|
|
+ position_info = self.driver.execute_script(scripts)
|
|
|
+ # 获取天气预报图
|
|
|
+ # 使用PIL和numpy处理图像
|
|
|
+ image = Image.open(io.BytesIO(screenshot))
|
|
|
+ crop_area = (
|
|
|
+ position_info['left'], position_info['top'] + 50,
|
|
|
+ position_info['left'] + position_info['width'],
|
|
|
+ position_info['top'] + position_info['height'])
|
|
|
+ cropped_image = image.crop(crop_area)
|
|
|
+ # # 保存裁剪后的图片
|
|
|
+ name = target_part + "_3.png"
|
|
|
+ image_path = os.path.join(full_path, name)
|
|
|
cropped_image.save(image_path)
|
|
|
|
|
|
finally:
|