Picture_Gatherer

基础操作,初学使用,未解决被网站ban的问题。

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
import requests
from bs4 import BeautifulSoup
import os
for i in range(2, 4):
list_url = 'http://www.mzitu.com/page/' + str(i)
list_source = requests.get(list_url).text
#print (list_source)
list_soup = BeautifulSoup(list_source, 'lxml')
#print (list_soup)
page_urls = list_soup.find('div',{'class': 'postlist'}).find_all('li')
for each22 in page_urls:
initial_url_copy = each22.find('a')['href']
initial_source = requests.get(initial_url_copy).text
initial_soup = BeautifulSoup(initial_source, 'lxml')
title = initial_soup.find('title').text.split(':')[0]
print (title)
os.mkdir('test22\\' + title)
for num in range(1,11):
initial_url = initial_url_copy + '/' + str(num)
#print (initial_url)
initial_request = requests.get(initial_url)
initial_pagesource = initial_request.text
initial_request.close()
page_soup = BeautifulSoup(initial_pagesource, 'lxml')
imgs = page_soup.find('div',{'class':'main-image'}).find_all('img')
for each in imgs:
url = (each['src'])
headers = {'Referer': 'http://www.mzitu.com/'}
img_source = requests.get(url, headers = headers).content
f = open('test22\\' + title + '\\' + str(num) + '.jpg', 'wb')
f.write(img_source)
f.close()
import requests from bs4 import BeautifulSoup import os for i in range(2, 4): list_url = 'http://www.mzitu.com/page/' + str(i) list_source = requests.get(list_url).text #print (list_source) list_soup = BeautifulSoup(list_source, 'lxml') #print (list_soup) page_urls = list_soup.find('div',{'class': 'postlist'}).find_all('li') for each22 in page_urls: initial_url_copy = each22.find('a')['href'] initial_source = requests.get(initial_url_copy).text initial_soup = BeautifulSoup(initial_source, 'lxml') title = initial_soup.find('title').text.split(':')[0] print (title) os.mkdir('test22\\' + title) for num in range(1,11): initial_url = initial_url_copy + '/' + str(num) #print (initial_url) initial_request = requests.get(initial_url) initial_pagesource = initial_request.text initial_request.close() page_soup = BeautifulSoup(initial_pagesource, 'lxml') imgs = page_soup.find('div',{'class':'main-image'}).find_all('img') for each in imgs: url = (each['src']) headers = {'Referer': 'http://www.mzitu.com/'} img_source = requests.get(url, headers = headers).content f = open('test22\\' + title + '\\' + str(num) + '.jpg', 'wb') f.write(img_source) f.close()
import requests
from bs4 import BeautifulSoup
import os

for i in range(2, 4):
    list_url = 'http://www.mzitu.com/page/' + str(i)
    list_source = requests.get(list_url).text
    
    #print (list_source)
    
    list_soup = BeautifulSoup(list_source, 'lxml')
    
    #print (list_soup)
    
    page_urls = list_soup.find('div',{'class': 'postlist'}).find_all('li')
    
    for each22 in page_urls:
        initial_url_copy = each22.find('a')['href']
        initial_source = requests.get(initial_url_copy).text
        initial_soup = BeautifulSoup(initial_source, 'lxml')
        
        title = initial_soup.find('title').text.split(':')[0]
        print (title)
        os.mkdir('test22\\' + title)
        
        for num in range(1,11):
            initial_url = initial_url_copy + '/' + str(num)
            
            #print (initial_url)
            
            initial_request = requests.get(initial_url)
            initial_pagesource = initial_request.text
            initial_request.close()
            
            page_soup = BeautifulSoup(initial_pagesource, 'lxml')
            imgs = page_soup.find('div',{'class':'main-image'}).find_all('img')

            for each in imgs:
                url = (each['src'])
                headers = {'Referer': 'http://www.mzitu.com/'}
                img_source = requests.get(url, headers = headers).content
                f = open('test22\\' + title + '\\' + str(num) + '.jpg', 'wb')
                f.write(img_source)
                f.close()

致谢:不二小段
https://www.bilibili.com/video/av18248096

Tags:

Add a Comment

Your email address will not be published. Required fields are marked *