处理原理:
循环打开页面,循环逐行展开客户详情读取客户数据写入excel
引用包
import selenium
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import tkinter as tk
from tkinter.filedialog import askdirectory
from lxml import etree
from openpyxl import Workbook
from openpyxl.styles import Font, colors, Alignment
from bs4 import BeautifulSoup
import pandas as pd
import time
import math
Chrom驱动加载:
option = webdriver.ChromeOptions()
# 防止打印一些无用的日志
option.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging'])
driver = webdriver.Chrome(chrome_options=option)
循环展开页面数据:
for i in range(10,12): ##range(10,11)只取第10页,取10~11页 range设置为range(10,12)
##beginning
js="$($(document.getElementById('clubermindmessage_pager')).find('input')[0]).attr('value',"+str(i)+");$($(document.getElementById('clubermindmessage_pager')).find('input')[1]).click()"
driver.execute_script(js)
html=driver.page_source
soup=BeautifulSoup(html,"lxml")
展开客户详情弹窗:
js="document.getElementsByClassName('td_orderId')["+str(j-1)+"].children[0].click()"
driver.execute_script(js)
取客户数据:
divTag = soup.find_all("div", {"id":"userDetailWinContent"})
k=0
for tag in divTag:
ulTag = tag.find_all("ul")
for tag in ulTag:
liTag = tag.find_all("li")
for tag in liTag:
spanTag = tag.find_all("span")
for tag in spanTag:
##这里写取数逻辑,将数据写入excel
k=k+1
关闭客户详情页:
js="document.getElementsByClassName('pui-dialog-titlebar-icon pui-dialog-titlebar-close ui-corner-all')[0].click()"
driver.execute_script(js)
代码获取地址:https://pan.baidu.com/s/1D8Yj_Eg7RM1AWJ_85ZrdJQ