Added BeautifulSoup to html2text function

This commit is contained in:
Aykhan 2023-09-13 23:46:11 +04:00
parent b74bac45f5
commit fc0551f7dc
3 changed files with 34 additions and 7 deletions

View File

@ -1,9 +1,6 @@
import re
from bs4 import BeautifulSoup
def html2text(html: str) -> str:
return re.sub(
re.compile('<.*?>'),
'',
html
)
soup = BeautifulSoup(html, 'html.parser')
return soup.get_text()

31
src/poetry.lock generated
View File

@ -130,6 +130,24 @@ files = [
docs = ["Sphinx (>=5.3.0,<5.4.0)", "sphinx-rtd-theme (>=1.2.2)", "sphinxcontrib-asyncio (>=0.3.0,<0.4.0)"]
test = ["flake8 (>=5.0,<6.0)", "uvloop (>=0.15.3)"]
[[package]]
name = "beautifulsoup4"
version = "4.12.2"
description = "Screen-scraping library"
optional = false
python-versions = ">=3.6.0"
files = [
{file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"},
{file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"},
]
[package.dependencies]
soupsieve = ">1.2"
[package.extras]
html5lib = ["html5lib"]
lxml = ["lxml"]
[[package]]
name = "blinker"
version = "1.6.2"
@ -975,6 +993,17 @@ files = [
{file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"},
]
[[package]]
name = "soupsieve"
version = "2.5"
description = "A modern CSS selector implementation for Beautiful Soup."
optional = false
python-versions = ">=3.8"
files = [
{file = "soupsieve-2.5-py3-none-any.whl", hash = "sha256:eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7"},
{file = "soupsieve-2.5.tar.gz", hash = "sha256:5663d5a7b3bfaeee0bc4372e7fc48f9cff4940b3eec54a6451cc5299f1097690"},
]
[[package]]
name = "sqlalchemy"
version = "2.0.20"
@ -1114,4 +1143,4 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)",
[metadata]
lock-version = "2.0"
python-versions = "^3.10"
content-hash = "333a9be3573863da41b6a2615cd28aa3a6daf2e608a76da1476af72d343dae52"
content-hash = "78fafe7f9e40a699277d899f9b66a4bc2b9d191008a5fcd6bd393462d51d473f"

View File

@ -23,6 +23,7 @@ pillow = "^10.0.0"
aiofiles = "^23.2.1"
python-jose = {extras = ["cryptography"], version = "^3.3.0"}
fastapi-mail = "^1.4.1"
beautifulsoup4 = "^4.12.2"
[build-system]