diff --git a/src/post_scrape/items.py b/src/post_scrape/items.py index fe33a9d..dcf9b54 100644 --- a/src/post_scrape/items.py +++ b/src/post_scrape/items.py @@ -1,12 +1,5 @@ -# Define here the models for your scraped items -# -# See documentation in: -# https://docs.scrapy.org/en/latest/topics/items.html - import scrapy class PostScrapeItem(scrapy.Item): - # define the fields for your item here like: - # name = scrapy.Field() pass diff --git a/src/post_scrape/pipelines.py b/src/post_scrape/pipelines.py index 1baa324..b604660 100644 --- a/src/post_scrape/pipelines.py +++ b/src/post_scrape/pipelines.py @@ -1,10 +1,3 @@ -# Define your item pipelines here -# -# Don't forget to add your pipeline to the ITEM_PIPELINES setting -# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html - - -# useful for handling different item types with a single interface from itemadapter import ItemAdapter diff --git a/src/post_scrape/spiders/car_spider.py b/src/post_scrape/spiders/car_spider.py index b635686..7b1fd92 100644 --- a/src/post_scrape/spiders/car_spider.py +++ b/src/post_scrape/spiders/car_spider.py @@ -1,13 +1,18 @@ -import scrapy +from scrapy.spiders import Spider +from scrapy import Request -class ToScrapeCSSSpider(scrapy.Spider): - name = "car" +class CarDataSpider(Spider): + name = "turbo.az" + allowed_domains = ('turbo.az',) headers = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36' } - first_page = 1 - last_page = 2 + + def __init__(self, first_page=1, last_page=2, *args, **kwargs): + super(CarDataSpider, self).__init__(*args, **kwargs) + self.first_page = first_page + self.last_page = int(last_page) def start_requests(self): urls = [ @@ -15,7 +20,7 @@ class ToScrapeCSSSpider(scrapy.Spider): ] for url in urls: - yield scrapy.Request(url=url, headers=self.headers, callback=self.parse) + yield Request(url=url, headers=self.headers, callback=self.parse) def parse(self, response): posts = response.xpath('//div[@class="products-container"]//div[@class="products"]')[2].xpath('./div/a[@class="products-i__link"]/@href') @@ -35,7 +40,7 @@ class ToScrapeCSSSpider(scrapy.Spider): phone = r.xpath('//div[@class="shop-contact--phones-list"]//a[@class="shop-contact--phones-number"]/text()').getall() else: avto_salon = False - phone = r.xpath('//a[@class="phone"]/text()').get() + phone = [r.xpath('//a[@class="phone"]/text()').get()] barter, loan = False, False