Compare commits
No commits in common. "90e66638a5ee7c239e753ff40228a4ded757ed4c" and "add737720e8ee64a2eb5d1cbe6ff2e887a7f5985" have entirely different histories.
90e66638a5
...
add737720e
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,107 +0,0 @@
|
||||||
import threading
|
|
||||||
import requests
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from urllib.parse import urljoin
|
|
||||||
import mysql.connector
|
|
||||||
|
|
||||||
#http://books.toscrape.com/ test scrap web
|
|
||||||
|
|
||||||
class Scrapper:
|
|
||||||
def __init__(self, ui_instance):
|
|
||||||
self.ui_instance = ui_instance
|
|
||||||
self.visited_links = set()
|
|
||||||
self.running=False
|
|
||||||
self.lock = threading.Lock()
|
|
||||||
|
|
||||||
#Configurar la base de datos para los enlaces
|
|
||||||
self.db_config = {
|
|
||||||
"host": "localhost",
|
|
||||||
"user": "root",
|
|
||||||
"password": "",
|
|
||||||
"database": "scrap_links_db",
|
|
||||||
"port": 3306
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
connection = mysql.connector.connect(**self.db_config)
|
|
||||||
print("Conexion exitosa a base de datos")
|
|
||||||
connection.close()
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error al conectar a la base de datos: {e}")
|
|
||||||
|
|
||||||
def start_scraping(self):
|
|
||||||
"""Inicia el proceso de scraping"""
|
|
||||||
self.running = True
|
|
||||||
url = self.get_url_from_ui()
|
|
||||||
if url:
|
|
||||||
print(f"Iniciando scraping en: {url}")
|
|
||||||
self.scrape_page(url)
|
|
||||||
else:
|
|
||||||
print("No se proporcionó una URL válida.")
|
|
||||||
|
|
||||||
def stop_scraping(self):
|
|
||||||
"""Detiene el proceso de scraping"""
|
|
||||||
self.running = False
|
|
||||||
|
|
||||||
def scrape_page(self, url):
|
|
||||||
"""Scrapea una web y busca los enlaces"""
|
|
||||||
if not self.running or url in self.visited_links:
|
|
||||||
return
|
|
||||||
|
|
||||||
with self.lock:
|
|
||||||
self.visited_links.add(url)
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = requests.get(url, timeout=10)
|
|
||||||
if response.status_code == 200:
|
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
|
||||||
links = [urljoin(url, a.get("href")) for a in soup.find_all("a", href=True)]
|
|
||||||
self.update_ui(url, links)
|
|
||||||
self.save_links_to_db(url, links)
|
|
||||||
|
|
||||||
for link in links:
|
|
||||||
if self.running:
|
|
||||||
threading.Thread(target=self.scrape_page, args=(link,), daemon=True).start()
|
|
||||||
else:
|
|
||||||
print(f"Error al acceder a {url}: {response.status_code}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error al scrapear {url}: {e}")
|
|
||||||
|
|
||||||
def update_ui(self, url, links):
|
|
||||||
"""Actualiza la pestaña 'Scrapping' con los enlaces encontrados"""
|
|
||||||
tab = self.ui_instance.tabs["Scrapping"]
|
|
||||||
text_widget = tab["text_widget"]
|
|
||||||
|
|
||||||
text_widget.configure(state="normal")
|
|
||||||
text_widget.insert("end", f"Enlaces encontrados en {url}:\n")
|
|
||||||
for link in links:
|
|
||||||
text_widget.insert("end", f" - {link}\n")
|
|
||||||
text_widget.see("end")
|
|
||||||
text_widget.configure(state="disabled")
|
|
||||||
|
|
||||||
def save_links_to_db(self, url, links):
|
|
||||||
"""Guarda los enlaces en la base de datos"""
|
|
||||||
try:
|
|
||||||
connection = mysql.connector.connect(**self.db_config)
|
|
||||||
cursor = connection.cursor()
|
|
||||||
cursor.execute("CREATE TABLE IF NOT EXISTS links (id INT AUTO_INCREMENT PRIMARY KEY, url TEXT, parent_url TEXT)")
|
|
||||||
|
|
||||||
for link in links:
|
|
||||||
print(f"Guardando enlace: {link} (parent: {url})") # Verifica los datos
|
|
||||||
cursor.execute("INSERT INTO links (url, parent_url) VALUES (%s, %s)", (link, url))
|
|
||||||
|
|
||||||
connection.commit()
|
|
||||||
cursor.close()
|
|
||||||
connection.close()
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error al gaurdar en la base de datos: {e}")
|
|
||||||
|
|
||||||
def get_url_from_ui(self):
|
|
||||||
"""Obtiene la URL desde la interfaz de usuario"""
|
|
||||||
try:
|
|
||||||
url_entry = self.ui_instance.left_panel.url_entry
|
|
||||||
return url_entry.get()
|
|
||||||
except AttributeError:
|
|
||||||
print("No se pudo obtener la URL desde la interfaz")
|
|
||||||
return None
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ class SystemMonitor:
|
||||||
},
|
},
|
||||||
"Processes": {
|
"Processes": {
|
||||||
"data": [],
|
"data": [],
|
||||||
"fetch_func": self.get_process_count,
|
"fetch_func": lambda: self.get_process_count,
|
||||||
"interval": 1
|
"interval": 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,6 @@ import random
|
||||||
from services.threaden_task import ThreadenTask
|
from services.threaden_task import ThreadenTask
|
||||||
from services.system_monitor import SystemMonitor
|
from services.system_monitor import SystemMonitor
|
||||||
from services.tetris_game import TetrisGame
|
from services.tetris_game import TetrisGame
|
||||||
from services.scrapper import Scrapper
|
|
||||||
|
|
||||||
class ThreadsManager:
|
class ThreadsManager:
|
||||||
"""Constructor"""
|
"""Constructor"""
|
||||||
|
@ -18,10 +17,8 @@ class ThreadsManager:
|
||||||
"temperature": ThreadenTask(),
|
"temperature": ThreadenTask(),
|
||||||
"emails":ThreadenTask(),
|
"emails":ThreadenTask(),
|
||||||
"tetris_game":ThreadenTask(),
|
"tetris_game":ThreadenTask(),
|
||||||
"scrapper":ThreadenTask(),
|
|
||||||
}
|
}
|
||||||
self.system_monitor_tasks = {}
|
self.system_monitor_tasks = {}
|
||||||
self.scrapper = Scrapper(ui_instance)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -38,7 +35,6 @@ class ThreadsManager:
|
||||||
self.tasks["time"].start(self.update_time)
|
self.tasks["time"].start(self.update_time)
|
||||||
self.tasks["temperature"].start(self.update_temperature)
|
self.tasks["temperature"].start(self.update_temperature)
|
||||||
self.tasks["emails"].start(self.update_emails)
|
self.tasks["emails"].start(self.update_emails)
|
||||||
self.tasks["scrapper"].start(self.scrapper.start_scraping)
|
|
||||||
|
|
||||||
if self.system_monitor:
|
if self.system_monitor:
|
||||||
for metric in self.system_monitor.metrics.keys():
|
for metric in self.system_monitor.metrics.keys():
|
||||||
|
|
Binary file not shown.
|
@ -90,20 +90,6 @@ class CenteredWindow(ctk.CTk):
|
||||||
btn = ctk.CTkButton(left_panel, text=text, command=command, width=150)
|
btn = ctk.CTkButton(left_panel, text=text, command=command, width=150)
|
||||||
btn.pack(pady=5, padx=10)
|
btn.pack(pady=5, padx=10)
|
||||||
|
|
||||||
scrapping_label = ctk.CTkLabel(left_panel, text="Scrapping", font=("Arial", 12, "bold"))
|
|
||||||
scrapping_label.pack(anchor=ctk.W, pady=5, padx=10)
|
|
||||||
url_entry = ctk.CTkEntry(left_panel, placeholder_text="Introduce la URL")
|
|
||||||
url_entry.pack(pady=5, padx=10)
|
|
||||||
|
|
||||||
self.left_panel = left_panel
|
|
||||||
self.left_panel.url_entry = url_entry
|
|
||||||
start_button = ctk.CTkButton(left_panel, text="Iniciar Scrapping", command=lambda:
|
|
||||||
self.thread_manager.tasks["scrapper"].start(self.thread_manager.scrapper.start_scraping))
|
|
||||||
start_button.pack(pady=5, padx=10)
|
|
||||||
|
|
||||||
stop_button = ctk.CTkButton(left_panel, text="Detener Scrapping", command=self.thread_manager.tasks["scrapper"].stop)
|
|
||||||
stop_button.pack(pady=5, padx=10)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def create_center_panel(self):
|
def create_center_panel(self):
|
||||||
|
@ -115,17 +101,9 @@ class CenteredWindow(ctk.CTk):
|
||||||
tab_view.pack(fill=ctk.BOTH, expand=True)
|
tab_view.pack(fill=ctk.BOTH, expand=True)
|
||||||
|
|
||||||
# Crear pestañas y manejar contenido por separado
|
# Crear pestañas y manejar contenido por separado
|
||||||
for tab_name in ["Scrapping", "Navegador", "Correos", "Juego", "Sistema"]:
|
for tab_name in ["Resultados Scrapping", "Navegador", "Correos", "Juego", "Sistema"]:
|
||||||
tab = tab_view.add(tab_name)
|
tab = tab_view.add(tab_name)
|
||||||
|
|
||||||
if tab_name == "Scrapping":
|
|
||||||
text_widget = ctk.CTkTextbox(tab, width=500, height=400)
|
|
||||||
text_widget.pack(fill=ctk.BOTH, expand=True, padx=10, pady=10)
|
|
||||||
|
|
||||||
text_widget.configure(state="disabled")
|
|
||||||
|
|
||||||
self.tabs = {"Scrapping": {"text_widget": text_widget}}
|
|
||||||
|
|
||||||
if tab_name == "Sistema":
|
if tab_name == "Sistema":
|
||||||
# Crear un frame para los gráficos del sistema
|
# Crear un frame para los gráficos del sistema
|
||||||
system_frame = ctk.CTkFrame(tab)
|
system_frame = ctk.CTkFrame(tab)
|
||||||
|
|
Loading…
Reference in New Issue