first commit

2024-10-29 13:54:33 +01:00 · 2024-10-29 13:54:33 +01:00 · 39b90ed7a2
commit 39b90ed7a2
13 changed files with 100 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1 @@
+# Ejercicio Thread04
--- a/_servidorweb_/1.html
+++ b/_servidorweb_/1.html
@ -0,0 +1,13 @@
+<!DOCTYPE html>
+<html lang="es">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Página 1</title>
+    </head>
+    <BODY>
+        <a href="2.html">Ir a página 2</a>
+        <br>
+        <a href="index.html">Volver</a>
+    </BODY>
+</html>
--- a/_servidorweb_/2.html
+++ b/_servidorweb_/2.html
@ -0,0 +1,13 @@
+<!DOCTYPE html>
+<html lang="es">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Página 2</title>
+    </head>
+    <BODY>
+        <a href="1.html">Ir a página 1</a>
+        <br>
+        <a href="index.html">Volver</a>
+    </BODY>
+</html>
--- a/_servidorweb_/index.html
+++ b/_servidorweb_/index.html
@ -0,0 +1,11 @@
+<!DOCTYPE html>
+<html lang="es">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Inicio</title>
+    </head>
+    <BODY>
+        <a href="1.html">Ir a página 1</a>
+    </BODY>
+</html>
--- a/bbdd/init.py
+++ b/bbdd/init.py
--- a/bbdd/pycache/init.cpython-312.pyc
+++ b/bbdd/pycache/init.cpython-312.pyc
--- a/bbdd/pycache/guardar.cpython-312.pyc
+++ b/bbdd/pycache/guardar.cpython-312.pyc
--- a/bbdd/guardar.py
+++ b/bbdd/guardar.py
@ -0,0 +1,31 @@
+import mysql.connector
+
+def guardar_enlace_en_bd(cadena):
+    try:
+        # Conexión a la base de datos
+        conexion = mysql.connector.connect(
+            host="localhost",
+            user="thread4",
+            password="1234",
+            database="thread4"
+        )
+        
+        cursor = conexion.cursor()
+        
+        # Consulta para insertar la cadena
+        consulta = "INSERT IGNORE INTO enlaces (enlace) VALUES (%s)"
+        cursor.execute(consulta, (cadena,))
+        
+        # Confirmar la transacción
+        conexion.commit()
+        
+        print("Cadena guardada exitosamente.")
+        
+    except mysql.connector.Error as err:
+        print(f"Error: {err}")
+        
+    finally:
+        if conexion.is_connected():
+            cursor.close()
+            conexion.close()
+
--- a/main.py
+++ b/main.py
@ -0,0 +1,13 @@
+from bbdd import guardar
+from scraping.busca import MiScraping
+
+if __name__ == "__main__":
+    cadena = "https://www.google.com"
+    guardar.guardar_enlace_en_bd(cadena)
+
+    MiObjScraping = MiScraping("https://ieslamar.org")
+    MiObjScraping.start()
+    MiObjScraping.join()
+    links = MiObjScraping.get_links()
+    for link in links:
+            print(link)
--- a/scraping/init.py
+++ b/scraping/init.py
--- a/scraping/pycache/init.cpython-312.pyc
+++ b/scraping/pycache/init.cpython-312.pyc
--- a/scraping/pycache/busca.cpython-312.pyc
+++ b/scraping/pycache/busca.cpython-312.pyc
--- a/scraping/busca.py
+++ b/scraping/busca.py
@ -0,0 +1,18 @@
+import threading
+import requests
+from bs4 import BeautifulSoup
+import re
+
+class MiScraping(threading.Thread):
+    def __init__(self, url):
+        threading.Thread.__init__(self)
+        self.url = url
+        self.links = []
+
+    def run(self):
+        response = requests.get(self.url)
+        soup = BeautifulSoup(response.text, 'html.parser')
+        self.links = [a['href'] for a in soup.find_all('a', href=True) if re.match(r'http[s]?://', a['href'])]
+
+    def get_links(self):
+        return self.links