import scrapy import re from scrapy_splash import SplashRequest import dateparser class ExploreSpider(scrapy.Spider): name = "explorescraper" terms = ['usi', 'eth', 'epfl', 'lugano', 'zurich', 'basel', 'oliveto%20lario', 'homework', 'plagiarism', 'rhb', 'sbb', 'ship', 'frighten', 'fool', 'have', 'inspire', 'heal', 'master', 'terminate', 'amend', 'scratch', 'embark', 'entail', 'execute', 'consolidate', 'cash', 'round', 'isolate', 'warrant', 'signal', 'weaken', 'pin', 'march', 'desire', 'widen', 'level', 'chat', 'board', 'contend', 'invent', 'resource', 'manufacture', 'seal', 'reconsider', 'suck', 'picture', 'crash', 'transport', 'plug', 'assign', 'enquire', 'campaign', 'trap', 'surround', 'debate', 'upgrade', 'decorate', 'confer', 'accumulate', 'profit', 'file', 'inherit', 'disrupt', 'contrast', 'chuck', 'tick', 'plead', 'dip', 'subscribe', 'educate', 'divorce', 'spin', 'row', 'obscure', 'creep', 'interest', 'overlook', 'twist', 'mature', 'blend', 'revise', 'attribute', 'explode', 'dwell', 'drown', 'alleviate', 'strip', 'grade', 'revert', 'value', 'award', 'strive', 'notify', 'remedy', 'accuse', 'instruct', 'spill', 'strain', 'comprehend', 'soften', 'postpone', 'wave', 'bounce', 'stock', 'position', 'insure', 'adhere', 'cling', 'summon','pause','empty','classify'] infinite_scroll_urls = ["https://flickr.com/explore"] start_urls = ["https://www.flickr.com/photos/tags/train", "https://www.flickr.com/photos/tags/tree", "https://www.flickr.com/photos/tags/outside"] def parse_photo(self, response): adesc = re.compile("