Crawled 123RF
This commit is contained in:
parent
4edfcd99d7
commit
fb92add03f
2 changed files with 50016 additions and 1 deletions
|
@ -17,6 +17,20 @@ class Stock123refSpider(scrapy.Spider):
|
||||||
links = response.css('.mosaic-main-container a::attr("href")').getall()
|
links = response.css('.mosaic-main-container a::attr("href")').getall()
|
||||||
for link in links:
|
for link in links:
|
||||||
yield response.follow(link, self.parse_photo)
|
yield response.follow(link, self.parse_photo)
|
||||||
|
a = response.css('#btn_main_nextpg::attr("href")').get()
|
||||||
|
if a is not None:
|
||||||
|
yield response.follow(a, self.parse_photo_list)
|
||||||
|
|
||||||
def parse_photo(self, response):
|
def parse_photo(self, response):
|
||||||
yield [response.css('title::text').get()]
|
if response.css('h1::text').get() is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
a = response.css('#contributorPortfolioLink::text').get()
|
||||||
|
b = response.css("#imageDescriptionText::text").get()
|
||||||
|
|
||||||
|
yield {"author": "" if a is None else a.strip("\t\r\n "),
|
||||||
|
"title": response.css('h1::text').get().strip("\t\r\n "),
|
||||||
|
"description": "" if b is None else b.strip("\t\r\n "),
|
||||||
|
"date": "",
|
||||||
|
"img_url": response.css("picture source::attr('srcset')").get(),
|
||||||
|
"tags": response.css('.keywords-container a div.ui.label::text').getall()}
|
||||||
|
|
50001
scraped/123rf.csv
Normal file
50001
scraped/123rf.csv
Normal file
File diff suppressed because it is too large
Load diff
Reference in a new issue