mirror of
https://github.com/aykhans/PostScrape.git
synced 2025-04-21 00:07:16 +00:00
Added PostgresqlPostPipeline
This commit is contained in:
parent
f321447fea
commit
fe0823d386
@ -1,15 +1,26 @@
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
|
import psycopg2
|
||||||
|
|
||||||
|
|
||||||
class SqlitePostPipeline:
|
class PostgresqlPostPipeline:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.id = 1
|
self.id = 1
|
||||||
self.con = sqlite3.connect('cars.db')
|
DB_NAME = "cars"
|
||||||
|
DB_USER = "ayxan"
|
||||||
|
DB_PASS = "admin"
|
||||||
|
DB_HOST = "127.0.0.1"
|
||||||
|
DB_PORT = "5432"
|
||||||
|
|
||||||
|
self.con = psycopg2.connect(database=DB_NAME,
|
||||||
|
user=DB_USER,
|
||||||
|
password=DB_PASS,
|
||||||
|
host=DB_HOST,
|
||||||
|
port=DB_PORT)
|
||||||
self.cur = self.con.cursor()
|
self.cur = self.con.cursor()
|
||||||
|
|
||||||
self.cur.execute("""
|
self.cur.execute("""
|
||||||
CREATE TABLE IF NOT EXISTS cars(
|
CREATE TABLE IF NOT EXISTS cars(
|
||||||
id INTEGER,
|
id INTEGER PRIMARY KEY,
|
||||||
url TEXT,
|
url TEXT,
|
||||||
avto_salon TEXT,
|
avto_salon TEXT,
|
||||||
description TEXT,
|
description TEXT,
|
||||||
@ -41,21 +52,138 @@ class SqlitePostPipeline:
|
|||||||
self.cur.execute("""
|
self.cur.execute("""
|
||||||
CREATE TABLE IF NOT EXISTS images(
|
CREATE TABLE IF NOT EXISTS images(
|
||||||
post_id INTEGER,
|
post_id INTEGER,
|
||||||
url TEXT
|
url TEXT,
|
||||||
|
FOREIGN KEY(post_id) REFERENCES cars(id)
|
||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
# PHONE
|
# PHONE
|
||||||
self.cur.execute("""
|
self.cur.execute("""
|
||||||
CREATE TABLE IF NOT EXISTS phones(
|
CREATE TABLE IF NOT EXISTS phones(
|
||||||
post_id INTEGER,
|
post_id INTEGER,
|
||||||
phone TEXT
|
phone TEXT,
|
||||||
|
FOREIGN KEY(post_id) REFERENCES cars(id)
|
||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
# EXTRA_FIELDS
|
# EXTRA_FIELDS
|
||||||
self.cur.execute("""
|
self.cur.execute("""
|
||||||
CREATE TABLE IF NOT EXISTS extra_fields(
|
CREATE TABLE IF NOT EXISTS extra_fields(
|
||||||
post_id INTEGER,
|
post_id INTEGER,
|
||||||
extra_field TEXT
|
extra_field TEXT,
|
||||||
|
FOREIGN KEY(post_id) REFERENCES cars(id)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
self.cur.execute("SELECT * FROM cars ORDER BY id DESC LIMIT 1")
|
||||||
|
result = self.cur.fetchone()
|
||||||
|
if result is not None: self.id = result[0] + 1
|
||||||
|
|
||||||
|
def process_item(self, item, spider):
|
||||||
|
self.cur.execute("""
|
||||||
|
INSERT INTO cars (id, url, avto_salon, description, city, brand, model, year, category, color, engine_volume, engine_power, fuel_type, mileage, mileage_type, transmission, gear, price, currency, loan, barter, market, seats_count, prior_owners_count, crashed, painted) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
self.id,
|
||||||
|
item['url'],
|
||||||
|
item['avto_salon'],
|
||||||
|
' '.join(item['description']),
|
||||||
|
item['city'],
|
||||||
|
item['brand'],
|
||||||
|
item['model'],
|
||||||
|
item['year'],
|
||||||
|
item['category'],
|
||||||
|
item['color'],
|
||||||
|
item['engine_volume'],
|
||||||
|
item['engine_power'],
|
||||||
|
item['fuel_type'],
|
||||||
|
item['mileage'],
|
||||||
|
item['mileage_type'],
|
||||||
|
item['transmission'],
|
||||||
|
item['gear'],
|
||||||
|
item['price'],
|
||||||
|
item['currency'],
|
||||||
|
item['loan'],
|
||||||
|
item['barter'],
|
||||||
|
item['market'],
|
||||||
|
item['seats_count'],
|
||||||
|
item['prior_owners_count'],
|
||||||
|
item['crashed'],
|
||||||
|
item['painted']
|
||||||
|
))
|
||||||
|
# IMAGE
|
||||||
|
for image in item['images']:
|
||||||
|
self.cur.execute("""INSERT INTO images (post_id, url) VALUES (%s, %s)""",
|
||||||
|
(self.id, image))
|
||||||
|
# PHONE
|
||||||
|
for phone in item['phone']:
|
||||||
|
self.cur.execute("""INSERT INTO phones (post_id, phone) VALUES (%s, %s)""",
|
||||||
|
(self.id, phone))
|
||||||
|
# EXTRA_FIELDS
|
||||||
|
for field in item['extra_fields']:
|
||||||
|
self.cur.execute("""INSERT INTO extra_fields (post_id, extra_field) VALUES (%s, %s)""",
|
||||||
|
(self.id, field))
|
||||||
|
|
||||||
|
self.con.commit()
|
||||||
|
self.id += 1
|
||||||
|
return item
|
||||||
|
|
||||||
|
class SqlitePostPipeline:
|
||||||
|
def __init__(self):
|
||||||
|
self.id = 1
|
||||||
|
self.con = sqlite3.connect('cars.db')
|
||||||
|
self.cur = self.con.cursor()
|
||||||
|
|
||||||
|
self.cur.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS cars(
|
||||||
|
id INTEGER PRIMARY KEY,
|
||||||
|
url TEXT,
|
||||||
|
avto_salon TEXT,
|
||||||
|
description TEXT,
|
||||||
|
city TEXT,
|
||||||
|
brand TEXT,
|
||||||
|
model TEXT,
|
||||||
|
year INTEGER,
|
||||||
|
category TEXT,
|
||||||
|
color TEXT,
|
||||||
|
engine_volume INTEGER,
|
||||||
|
engine_power INTEGER,
|
||||||
|
fuel_type TEXT,
|
||||||
|
mileage INTEGER,
|
||||||
|
mileage_type TEXT,
|
||||||
|
transmission TEXT,
|
||||||
|
gear TEXT,
|
||||||
|
price INTEGER,
|
||||||
|
currency TEXT,
|
||||||
|
loan TEXT,
|
||||||
|
barter TEXT,
|
||||||
|
market TEXT,
|
||||||
|
seats_count TEXT,
|
||||||
|
prior_owners_count TEXT,
|
||||||
|
crashed TEXT,
|
||||||
|
painted TEXT
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
# IMAGE
|
||||||
|
self.cur.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS images(
|
||||||
|
post_id INTEGER,
|
||||||
|
url TEXT,
|
||||||
|
FOREIGN KEY(post_id) REFERENCES cars(id)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
# PHONE
|
||||||
|
self.cur.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS phones(
|
||||||
|
post_id INTEGER,
|
||||||
|
phone TEXT,
|
||||||
|
FOREIGN KEY(post_id) REFERENCES cars(id)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
# EXTRA_FIELDS
|
||||||
|
self.cur.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS extra_fields(
|
||||||
|
post_id INTEGER,
|
||||||
|
extra_field TEXT,
|
||||||
|
FOREIGN KEY(post_id) REFERENCES cars(id)
|
||||||
)
|
)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
@ -60,9 +60,10 @@ ROBOTSTXT_OBEY = True
|
|||||||
# 'scrapy.extensions.telnet.TelnetConsole': None,
|
# 'scrapy.extensions.telnet.TelnetConsole': None,
|
||||||
#}
|
#}
|
||||||
|
|
||||||
# ITEM_PIPELINES = {
|
ITEM_PIPELINES = {
|
||||||
# 'post_scrape.pipelines.SqlitePostPipeline': 300,
|
# 'post_scrape.pipelines.SqlitePostPipeline': 300,
|
||||||
# }
|
'post_scrape.pipelines.PostgresqlPostPipeline': 300,
|
||||||
|
}
|
||||||
|
|
||||||
# Enable and configure the AutoThrottle extension (disabled by default)
|
# Enable and configure the AutoThrottle extension (disabled by default)
|
||||||
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
|
||||||
|
Loading…
x
Reference in New Issue
Block a user