first commit
This commit is contained in:
commit
39b90ed7a2
|
@ -0,0 +1,13 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="es">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Página 1</title>
|
||||
</head>
|
||||
<BODY>
|
||||
<a href="2.html">Ir a página 2</a>
|
||||
<br>
|
||||
<a href="index.html">Volver</a>
|
||||
</BODY>
|
||||
</html>
|
|
@ -0,0 +1,13 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="es">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Página 2</title>
|
||||
</head>
|
||||
<BODY>
|
||||
<a href="1.html">Ir a página 1</a>
|
||||
<br>
|
||||
<a href="index.html">Volver</a>
|
||||
</BODY>
|
||||
</html>
|
|
@ -0,0 +1,11 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="es">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Inicio</title>
|
||||
</head>
|
||||
<BODY>
|
||||
<a href="1.html">Ir a página 1</a>
|
||||
</BODY>
|
||||
</html>
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,31 @@
|
|||
import mysql.connector
|
||||
|
||||
def guardar_enlace_en_bd(cadena):
|
||||
try:
|
||||
# Conexión a la base de datos
|
||||
conexion = mysql.connector.connect(
|
||||
host="localhost",
|
||||
user="thread4",
|
||||
password="1234",
|
||||
database="thread4"
|
||||
)
|
||||
|
||||
cursor = conexion.cursor()
|
||||
|
||||
# Consulta para insertar la cadena
|
||||
consulta = "INSERT IGNORE INTO enlaces (enlace) VALUES (%s)"
|
||||
cursor.execute(consulta, (cadena,))
|
||||
|
||||
# Confirmar la transacción
|
||||
conexion.commit()
|
||||
|
||||
print("Cadena guardada exitosamente.")
|
||||
|
||||
except mysql.connector.Error as err:
|
||||
print(f"Error: {err}")
|
||||
|
||||
finally:
|
||||
if conexion.is_connected():
|
||||
cursor.close()
|
||||
conexion.close()
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
from bbdd import guardar
|
||||
from scraping.busca import MiScraping
|
||||
|
||||
if __name__ == "__main__":
|
||||
cadena = "https://www.google.com"
|
||||
guardar.guardar_enlace_en_bd(cadena)
|
||||
|
||||
MiObjScraping = MiScraping("https://ieslamar.org")
|
||||
MiObjScraping.start()
|
||||
MiObjScraping.join()
|
||||
links = MiObjScraping.get_links()
|
||||
for link in links:
|
||||
print(link)
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,18 @@
|
|||
import threading
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import re
|
||||
|
||||
class MiScraping(threading.Thread):
|
||||
def __init__(self, url):
|
||||
threading.Thread.__init__(self)
|
||||
self.url = url
|
||||
self.links = []
|
||||
|
||||
def run(self):
|
||||
response = requests.get(self.url)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
self.links = [a['href'] for a in soup.find_all('a', href=True) if re.match(r'http[s]?://', a['href'])]
|
||||
|
||||
def get_links(self):
|
||||
return self.links
|
Loading…
Reference in New Issue