mirror of
https://github.com/aykhans/PostScrape.git
synced 2025-04-21 00:07:16 +00:00
.
This commit is contained in:
parent
10ee472243
commit
0462cfa37c
@ -1,12 +1,5 @@
|
|||||||
# Define here the models for your scraped items
|
|
||||||
#
|
|
||||||
# See documentation in:
|
|
||||||
# https://docs.scrapy.org/en/latest/topics/items.html
|
|
||||||
|
|
||||||
import scrapy
|
import scrapy
|
||||||
|
|
||||||
|
|
||||||
class PostScrapeItem(scrapy.Item):
|
class PostScrapeItem(scrapy.Item):
|
||||||
# define the fields for your item here like:
|
|
||||||
# name = scrapy.Field()
|
|
||||||
pass
|
pass
|
||||||
|
@ -1,10 +1,3 @@
|
|||||||
# Define your item pipelines here
|
|
||||||
#
|
|
||||||
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
|
|
||||||
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
|
|
||||||
|
|
||||||
|
|
||||||
# useful for handling different item types with a single interface
|
|
||||||
from itemadapter import ItemAdapter
|
from itemadapter import ItemAdapter
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,13 +1,18 @@
|
|||||||
import scrapy
|
from scrapy.spiders import Spider
|
||||||
|
from scrapy import Request
|
||||||
|
|
||||||
|
|
||||||
class ToScrapeCSSSpider(scrapy.Spider):
|
class CarDataSpider(Spider):
|
||||||
name = "car"
|
name = "turbo.az"
|
||||||
|
allowed_domains = ('turbo.az',)
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
|
||||||
}
|
}
|
||||||
first_page = 1
|
|
||||||
last_page = 2
|
def __init__(self, first_page=1, last_page=2, *args, **kwargs):
|
||||||
|
super(CarDataSpider, self).__init__(*args, **kwargs)
|
||||||
|
self.first_page = first_page
|
||||||
|
self.last_page = int(last_page)
|
||||||
|
|
||||||
def start_requests(self):
|
def start_requests(self):
|
||||||
urls = [
|
urls = [
|
||||||
@ -15,7 +20,7 @@ class ToScrapeCSSSpider(scrapy.Spider):
|
|||||||
]
|
]
|
||||||
|
|
||||||
for url in urls:
|
for url in urls:
|
||||||
yield scrapy.Request(url=url, headers=self.headers, callback=self.parse)
|
yield Request(url=url, headers=self.headers, callback=self.parse)
|
||||||
|
|
||||||
def parse(self, response):
|
def parse(self, response):
|
||||||
posts = response.xpath('//div[@class="products-container"]//div[@class="products"]')[2].xpath('./div/a[@class="products-i__link"]/@href')
|
posts = response.xpath('//div[@class="products-container"]//div[@class="products"]')[2].xpath('./div/a[@class="products-i__link"]/@href')
|
||||||
@ -35,7 +40,7 @@ class ToScrapeCSSSpider(scrapy.Spider):
|
|||||||
phone = r.xpath('//div[@class="shop-contact--phones-list"]//a[@class="shop-contact--phones-number"]/text()').getall()
|
phone = r.xpath('//div[@class="shop-contact--phones-list"]//a[@class="shop-contact--phones-number"]/text()').getall()
|
||||||
else:
|
else:
|
||||||
avto_salon = False
|
avto_salon = False
|
||||||
phone = r.xpath('//a[@class="phone"]/text()').get()
|
phone = [r.xpath('//a[@class="phone"]/text()').get()]
|
||||||
|
|
||||||
barter, loan = False, False
|
barter, loan = False, False
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user