I can't do "web scraping" properly from a Python comic strip site
Well, I was making a code that would check the day of each strip / gif of the page and, if the day is the same as the current day (in the code I put 14 only because the site does not update weekend and I needed to test somehow), I would download the Strip/gif. However, two errors occur: the code does not download all the comics / gifs(I noticed a pattern of up to 5 downloaded comics, nothing else), moreover, sometimes even when the date is less than the date current, the code downloads the same way.
from bs4 import BeautifulSoup
import requests
import datetime
import os
os.chdir('C:\\Users\\Rafael\\Desktop\\Scraping\\leninja_imgs')
def get_img():
r = requests.get("https://leninja.com.br/page/2/")
soup = BeautifulSoup(r.text, 'lxml')
daysPost = soup.select(".day-post")
imgLinks = [i.get("src") for i in soup.select(".le-inner-content img")]
#actualday = datetime.datetime.now().day
actualday = 14
n = 0
for day in daysPost:
if int(day.getText()) == actualday:
req = requests.get(imgLinks[n])
img = open(os.path.basename(imgLinks[n]), "wb")
for chunk in req.iter_content(100000):
img.write(chunk)
else:
print("Não foi possível baixar a imagem!")
return False
n += 1
return True
get_img()
0
Author: Matheus Andrade, 2018-09-15
1 answers
The images are a list, so you can download them all:
# coding=utf-8
from bs4 import BeautifulSoup
import requests
import datetime
import os
os.chdir('./')
def get_img():
r = requests.get("https://leninja.com.br/page/2/")
soup = BeautifulSoup(r.text, 'lxml')
daysPost = soup.select(".day-post")
imgLinks = [i.get("src") for i in soup.select(".le-inner-content img")]
#actualday = datetime.datetime.now().day
actualday = 20
n = 0
for img in imgLinks:
try:
req = requests.get(imgLinks[n])
except requests.exceptions.RequestException as e:
print e
print("Não foi possível baixar a imagem!")
return False
img = open(os.path.basename(imgLinks[n]), "wb")
for chunk in req.iter_content(100000):
img.write(chunk)
n += 1
return True
get_img()
0
Author: tomasantunes, 2018-12-21 10:05:37