.

2025-11-03 11:19:58 +00:00 · 2022-09-08 03:39:56 +04:00
parent 10ee472243
commit 0462cfa37c
3 changed files with 12 additions and 21 deletions
--- a/src/post_scrape/items.py
+++ b/src/post_scrape/items.py
@@ -1,12 +1,5 @@
 # Define here the models for your scraped items
 #
 # See documentation in:
 # https://docs.scrapy.org/en/latest/topics/items.html
 import scrapy
 class PostScrapeItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    pass
--- a/src/post_scrape/pipelines.py
+++ b/src/post_scrape/pipelines.py
@@ -1,10 +1,3 @@
 # Define your item pipelines here
 #
 # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 # See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
 # useful for handling different item types with a single interface
 from itemadapter import ItemAdapter
--- a/src/post_scrape/spiders/car_spider.py
+++ b/src/post_scrape/spiders/car_spider.py
@@ -1,13 +1,18 @@
-import scrapy
+from scrapy.spiders import Spider
 from scrapy import Request
-class ToScrapeCSSSpider(scrapy.Spider):
+class CarDataSpider(Spider):
-    name = "car"
+    name = "turbo.az"
    allowed_domains = ('turbo.az',)
    headers = {
            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
        }
-    first_page = 1
+
-    last_page = 2
+    def __init__(self, first_page=1, last_page=2, *args, **kwargs):
        super(CarDataSpider, self).__init__(*args, **kwargs)
        self.first_page = first_page
        self.last_page = int(last_page)
    def start_requests(self):
        urls = [
@@ -15,7 +20,7 @@ class ToScrapeCSSSpider(scrapy.Spider):
        ]
        for url in urls:
-            yield scrapy.Request(url=url, headers=self.headers, callback=self.parse)
+            yield Request(url=url, headers=self.headers, callback=self.parse)
    def parse(self, response):
        posts = response.xpath('//div[@class="products-container"]//div[@class="products"]')[2].xpath('./div/a[@class="products-i__link"]/@href')
@@ -35,7 +40,7 @@ class ToScrapeCSSSpider(scrapy.Spider):
            phone = r.xpath('//div[@class="shop-contact--phones-list"]//a[@class="shop-contact--phones-number"]/text()').getall()
        else:
            avto_salon = False
-            phone = r.xpath('//a[@class="phone"]/text()').get()
+            phone = [r.xpath('//a[@class="phone"]/text()').get()]
        barter, loan = False, False