first commit

This commit is contained in:
Juanjo 2024-10-29 13:54:33 +01:00
commit 39b90ed7a2
13 changed files with 100 additions and 0 deletions

1
README.md Normal file
View File

@ -0,0 +1 @@
# Ejercicio Thread04

13
_servidorweb_/1.html Normal file
View File

@ -0,0 +1,13 @@
<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Página 1</title>
</head>
<BODY>
<a href="2.html">Ir a página 2</a>
<br>
<a href="index.html">Volver</a>
</BODY>
</html>

13
_servidorweb_/2.html Normal file
View File

@ -0,0 +1,13 @@
<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Página 2</title>
</head>
<BODY>
<a href="1.html">Ir a página 1</a>
<br>
<a href="index.html">Volver</a>
</BODY>
</html>

11
_servidorweb_/index.html Normal file
View File

@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Inicio</title>
</head>
<BODY>
<a href="1.html">Ir a página 1</a>
</BODY>
</html>

0
bbdd/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

31
bbdd/guardar.py Normal file
View File

@ -0,0 +1,31 @@
import mysql.connector
def guardar_enlace_en_bd(cadena):
try:
# Conexión a la base de datos
conexion = mysql.connector.connect(
host="localhost",
user="thread4",
password="1234",
database="thread4"
)
cursor = conexion.cursor()
# Consulta para insertar la cadena
consulta = "INSERT IGNORE INTO enlaces (enlace) VALUES (%s)"
cursor.execute(consulta, (cadena,))
# Confirmar la transacción
conexion.commit()
print("Cadena guardada exitosamente.")
except mysql.connector.Error as err:
print(f"Error: {err}")
finally:
if conexion.is_connected():
cursor.close()
conexion.close()

13
main.py Normal file
View File

@ -0,0 +1,13 @@
from bbdd import guardar
from scraping.busca import MiScraping
if __name__ == "__main__":
cadena = "https://www.google.com"
guardar.guardar_enlace_en_bd(cadena)
MiObjScraping = MiScraping("https://ieslamar.org")
MiObjScraping.start()
MiObjScraping.join()
links = MiObjScraping.get_links()
for link in links:
print(link)

0
scraping/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

18
scraping/busca.py Normal file
View File

@ -0,0 +1,18 @@
import threading
import requests
from bs4 import BeautifulSoup
import re
class MiScraping(threading.Thread):
def __init__(self, url):
threading.Thread.__init__(self)
self.url = url
self.links = []
def run(self):
response = requests.get(self.url)
soup = BeautifulSoup(response.text, 'html.parser')
self.links = [a['href'] for a in soup.find_all('a', href=True) if re.match(r'http[s]?://', a['href'])]
def get_links(self):
return self.links