23 lines
715 B
Python
23 lines
715 B
Python
import scrapy
|
|
import re
|
|
from scrapy_splash import SplashRequest
|
|
import dateparser
|
|
|
|
class Stock123refSpider(scrapy.Spider):
|
|
name = "123rfscraper"
|
|
|
|
start_urls = ["https://www.123rf.com/stock-photo/"]
|
|
|
|
def parse(self, response):
|
|
links = response.css('.index-stockphoto-thumb-container a::attr("href")').getall()
|
|
for link in links:
|
|
yield response.follow(link, self.parse_photo_list)
|
|
|
|
def parse_photo_list(self, response):
|
|
links = response.css('.mosaic-main-container a::attr("href")').getall()
|
|
for link in links:
|
|
yield response.follow(link, self.parse_photo)
|
|
|
|
def parse_photo(self, response):
|
|
yield [response.css('title::text').get()]
|