This repository has been archived on 2020-12-10. You can view files and clone it, but cannot push or open issues or pull requests.
IRProject/photo_scraper/spiders/stock123rf.py

23 lines
715 B
Python
Raw Normal View History

2020-11-10 17:36:41 +00:00
import scrapy
import re
from scrapy_splash import SplashRequest
import dateparser
class Stock123refSpider(scrapy.Spider):
name = "123rfscraper"
2020-11-10 19:41:13 +00:00
start_urls = ["https://www.123rf.com/stock-photo/"]
2020-11-10 17:36:41 +00:00
2020-11-10 19:41:13 +00:00
def parse(self, response):
links = response.css('.index-stockphoto-thumb-container a::attr("href")').getall()
for link in links:
yield response.follow(link, self.parse_photo_list)
2020-11-10 17:36:41 +00:00
2020-11-10 19:41:13 +00:00
def parse_photo_list(self, response):
links = response.css('.mosaic-main-container a::attr("href")').getall()
for link in links:
yield response.follow(link, self.parse_photo)
2020-11-10 17:36:41 +00:00
2020-11-10 19:41:13 +00:00
def parse_photo(self, response):
yield [response.css('title::text').get()]