本文共 1087 字,大约阅读时间需要 3 分钟。
#使用了selenium和urllib两种方法from selenium import webdriverimport time, reimport urllib.request as reqfrom urllib.parse import quoteimport stringdriver = webdriver.Firefox()driver.maximize_window()driver.get('https://www.zhihu.com/question/29134042')# i = 0# while i < 10:# driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")# time.sleep(2)## try:# driver.find_element_by_css_selector('button.QuestionMainAction').click()# print("page" + str(i))# i +=1# time.sleep(1)# except:# breakresult_raw = driver.page_sourcereg = r' src="(https://[^\s]*?\.jpg)'imgre = re.compile(reg) # 转换成一个正则对象# def get_html(url):# page = req.urlopen(url)# html_a = page.read()# return html_a.decode('utf-8')# imglist = imgre.findall(get_html('https://www.zhihu.com/question/29134042')) # 表示在整个网页过滤出所有图片的地址,放在imgList中imglist = imgre.findall(result_raw)n = 0while n < len(imglist): i = time.time() path = r"img\\{}.jpg".format(i) url = imglist[n] req.urlretrieve(quote(url,safe=string.printable), path) n = n + 1
转载地址:http://ceben.baihongyu.com/