commit 39b90ed7a2fac5b28cd0d5bd170daf507866447a Author: Juanjo Date: Tue Oct 29 13:54:33 2024 +0100 first commit diff --git a/README.md b/README.md new file mode 100644 index 0000000..bbd18d4 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# Ejercicio Thread04 diff --git a/_servidorweb_/1.html b/_servidorweb_/1.html new file mode 100644 index 0000000..9469c1a --- /dev/null +++ b/_servidorweb_/1.html @@ -0,0 +1,13 @@ + + + + + + Página 1 + + + Ir a página 2 +
+ Volver + + \ No newline at end of file diff --git a/_servidorweb_/2.html b/_servidorweb_/2.html new file mode 100644 index 0000000..083388b --- /dev/null +++ b/_servidorweb_/2.html @@ -0,0 +1,13 @@ + + + + + + Página 2 + + + Ir a página 1 +
+ Volver + + \ No newline at end of file diff --git a/_servidorweb_/index.html b/_servidorweb_/index.html new file mode 100644 index 0000000..5d91b08 --- /dev/null +++ b/_servidorweb_/index.html @@ -0,0 +1,11 @@ + + + + + + Inicio + + + Ir a página 1 + + \ No newline at end of file diff --git a/bbdd/__init__.py b/bbdd/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/bbdd/__pycache__/__init__.cpython-312.pyc b/bbdd/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..8463599 Binary files /dev/null and b/bbdd/__pycache__/__init__.cpython-312.pyc differ diff --git a/bbdd/__pycache__/guardar.cpython-312.pyc b/bbdd/__pycache__/guardar.cpython-312.pyc new file mode 100644 index 0000000..2861fa8 Binary files /dev/null and b/bbdd/__pycache__/guardar.cpython-312.pyc differ diff --git a/bbdd/guardar.py b/bbdd/guardar.py new file mode 100644 index 0000000..feef718 --- /dev/null +++ b/bbdd/guardar.py @@ -0,0 +1,31 @@ +import mysql.connector + +def guardar_enlace_en_bd(cadena): + try: + # Conexión a la base de datos + conexion = mysql.connector.connect( + host="localhost", + user="thread4", + password="1234", + database="thread4" + ) + + cursor = conexion.cursor() + + # Consulta para insertar la cadena + consulta = "INSERT IGNORE INTO enlaces (enlace) VALUES (%s)" + cursor.execute(consulta, (cadena,)) + + # Confirmar la transacción + conexion.commit() + + print("Cadena guardada exitosamente.") + + except mysql.connector.Error as err: + print(f"Error: {err}") + + finally: + if conexion.is_connected(): + cursor.close() + conexion.close() + diff --git a/main.py b/main.py new file mode 100644 index 0000000..3402829 --- /dev/null +++ b/main.py @@ -0,0 +1,13 @@ +from bbdd import guardar +from scraping.busca import MiScraping + +if __name__ == "__main__": + cadena = "https://www.google.com" + guardar.guardar_enlace_en_bd(cadena) + + MiObjScraping = MiScraping("https://ieslamar.org") + MiObjScraping.start() + MiObjScraping.join() + links = MiObjScraping.get_links() + for link in links: + print(link) \ No newline at end of file diff --git a/scraping/__init__.py b/scraping/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scraping/__pycache__/__init__.cpython-312.pyc b/scraping/__pycache__/__init__.cpython-312.pyc new file mode 100644 index 0000000..243bdb7 Binary files /dev/null and b/scraping/__pycache__/__init__.cpython-312.pyc differ diff --git a/scraping/__pycache__/busca.cpython-312.pyc b/scraping/__pycache__/busca.cpython-312.pyc new file mode 100644 index 0000000..68cb050 Binary files /dev/null and b/scraping/__pycache__/busca.cpython-312.pyc differ diff --git a/scraping/busca.py b/scraping/busca.py new file mode 100644 index 0000000..92c64c0 --- /dev/null +++ b/scraping/busca.py @@ -0,0 +1,18 @@ +import threading +import requests +from bs4 import BeautifulSoup +import re + +class MiScraping(threading.Thread): + def __init__(self, url): + threading.Thread.__init__(self) + self.url = url + self.links = [] + + def run(self): + response = requests.get(self.url) + soup = BeautifulSoup(response.text, 'html.parser') + self.links = [a['href'] for a in soup.find_all('a', href=True) if re.match(r'http[s]?://', a['href'])] + + def get_links(self): + return self.links \ No newline at end of file