mirror of
https://github.com/aykhans/PostScrape.git
synced 2025-04-16 06:33:12 +00:00
.
This commit is contained in:
parent
10ee472243
commit
0462cfa37c
@ -1,12 +1,5 @@
|
||||
# Define here the models for your scraped items
|
||||
#
|
||||
# See documentation in:
|
||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
||||
|
||||
import scrapy
|
||||
|
||||
|
||||
class PostScrapeItem(scrapy.Item):
|
||||
# define the fields for your item here like:
|
||||
# name = scrapy.Field()
|
||||
pass
|
||||
|
@ -1,10 +1,3 @@
|
||||
# Define your item pipelines here
|
||||
#
|
||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
||||
|
||||
|
||||
# useful for handling different item types with a single interface
|
||||
from itemadapter import ItemAdapter
|
||||
|
||||
|
||||
|
@ -1,13 +1,18 @@
|
||||
import scrapy
|
||||
from scrapy.spiders import Spider
|
||||
from scrapy import Request
|
||||
|
||||
|
||||
class ToScrapeCSSSpider(scrapy.Spider):
|
||||
name = "car"
|
||||
class CarDataSpider(Spider):
|
||||
name = "turbo.az"
|
||||
allowed_domains = ('turbo.az',)
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
|
||||
}
|
||||
first_page = 1
|
||||
last_page = 2
|
||||
|
||||
def __init__(self, first_page=1, last_page=2, *args, **kwargs):
|
||||
super(CarDataSpider, self).__init__(*args, **kwargs)
|
||||
self.first_page = first_page
|
||||
self.last_page = int(last_page)
|
||||
|
||||
def start_requests(self):
|
||||
urls = [
|
||||
@ -15,7 +20,7 @@ class ToScrapeCSSSpider(scrapy.Spider):
|
||||
]
|
||||
|
||||
for url in urls:
|
||||
yield scrapy.Request(url=url, headers=self.headers, callback=self.parse)
|
||||
yield Request(url=url, headers=self.headers, callback=self.parse)
|
||||
|
||||
def parse(self, response):
|
||||
posts = response.xpath('//div[@class="products-container"]//div[@class="products"]')[2].xpath('./div/a[@class="products-i__link"]/@href')
|
||||
@ -35,7 +40,7 @@ class ToScrapeCSSSpider(scrapy.Spider):
|
||||
phone = r.xpath('//div[@class="shop-contact--phones-list"]//a[@class="shop-contact--phones-number"]/text()').getall()
|
||||
else:
|
||||
avto_salon = False
|
||||
phone = r.xpath('//a[@class="phone"]/text()').get()
|
||||
phone = [r.xpath('//a[@class="phone"]/text()').get()]
|
||||
|
||||
barter, loan = False, False
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user