Scrapper works but shuts down with no errors
This commit is contained in:
parent
277e4dcf1e
commit
31e539c726
Binary file not shown.
|
@ -4,6 +4,8 @@ from bs4 import BeautifulSoup
|
|||
from urllib.parse import urljoin
|
||||
import mysql.connector
|
||||
|
||||
#http://books.toscrape.com/ test scrap web
|
||||
|
||||
class Scrapper:
|
||||
def __init__(self, ui_instance):
|
||||
self.ui_instance = ui_instance
|
||||
|
@ -31,9 +33,12 @@ class Scrapper:
|
|||
"""Inicia el proceso de scraping"""
|
||||
self.running = True
|
||||
url = self.get_url_from_ui()
|
||||
if url:
|
||||
self.scrape_page(url)
|
||||
|
||||
if url:
|
||||
print(f"Iniciando scraping en: {url}")
|
||||
self.scrape_page(url)
|
||||
else:
|
||||
print("No se proporcionó una URL válida.")
|
||||
|
||||
def stop_scraping(self):
|
||||
"""Detiene el proceso de scraping"""
|
||||
self.running = False
|
||||
|
@ -65,11 +70,14 @@ class Scrapper:
|
|||
def update_ui(self, url, links):
|
||||
"""Actualiza la pestaña 'Scrapping' con los enlaces encontrados"""
|
||||
tab = self.ui_instance.tabs["Scrapping"]
|
||||
text_widget = tab.text_widget
|
||||
text_widget = tab["text_widget"]
|
||||
|
||||
text_widget.configure(state="normal")
|
||||
text_widget.insert("end", f"Enlaces encontrados en {url}:\n")
|
||||
for link in links:
|
||||
text_widget.insert("end", f" - {link}\n")
|
||||
text_widget.see("end")
|
||||
text_widget.see("end")
|
||||
text_widget.configure(state="disabled")
|
||||
|
||||
def save_links_to_db(self, url, links):
|
||||
"""Guarda los enlaces en la base de datos"""
|
||||
|
@ -79,18 +87,18 @@ class Scrapper:
|
|||
cursor.execute("CREATE TABLE IF NOT EXISTS links (id INT AUTO_INCREMENT PRIMARY KEY, url TEXT, parent_url TEXT)")
|
||||
for link in links:
|
||||
cursor.execute("INSERT INTO links (url, parent_url) VALUES (%s, %s)", (link, url))
|
||||
connection.commit()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
connection.commit()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
except:
|
||||
print(f"Error al gaurdar en la base de datos")
|
||||
|
||||
def get_url_from_ui(self):
|
||||
"""Obtiene la URL desde la interfaz de usuario"""
|
||||
try:
|
||||
url_entry = self.ui_instance.left_panel.url_entry
|
||||
return url_entry.get()
|
||||
except AttributeError:
|
||||
print("No se pudo obtener la URL desde la interfaz")
|
||||
return None
|
||||
def get_url_from_ui(self):
|
||||
"""Obtiene la URL desde la interfaz de usuario"""
|
||||
try:
|
||||
url_entry = self.ui_instance.left_panel.url_entry
|
||||
return url_entry.get()
|
||||
except AttributeError:
|
||||
print("No se pudo obtener la URL desde la interfaz")
|
||||
return None
|
||||
|
Binary file not shown.
|
@ -92,11 +92,11 @@ class CenteredWindow(ctk.CTk):
|
|||
|
||||
scrapping_label = ctk.CTkLabel(left_panel, text="Scrapping", font=("Arial", 12, "bold"))
|
||||
scrapping_label.pack(anchor=ctk.W, pady=5, padx=10)
|
||||
url_entry = ctk.CTkEntry(left_panel, placeholder_text="Introduce la URL")
|
||||
url_entry.pack(pady=5, padx=10)
|
||||
url_entry = ctk.CTkEntry(left_panel, placeholder_text="Introduce la URL")
|
||||
url_entry.pack(pady=5, padx=10)
|
||||
|
||||
self.left_panel = left_panel
|
||||
self.left_panel.url_entry = url_entry
|
||||
self.left_panel.url_entry = url_entry
|
||||
start_button = ctk.CTkButton(left_panel, text="Iniciar Scrapping", command=lambda:
|
||||
self.thread_manager.tasks["scrapper"].start(self.thread_manager.scrapper.start_scraping))
|
||||
start_button.pack(pady=5, padx=10)
|
||||
|
@ -118,6 +118,14 @@ class CenteredWindow(ctk.CTk):
|
|||
for tab_name in ["Scrapping", "Navegador", "Correos", "Juego", "Sistema"]:
|
||||
tab = tab_view.add(tab_name)
|
||||
|
||||
if tab_name == "Scrapping":
|
||||
text_widget = ctk.CTkTextbox(tab, width=500, height=400)
|
||||
text_widget.pack(fill=ctk.BOTH, expand=True, padx=10, pady=10)
|
||||
|
||||
text_widget.configure(state="disabled")
|
||||
|
||||
self.tabs = {"Scrapping": {"text_widget": text_widget}}
|
||||
|
||||
if tab_name == "Sistema":
|
||||
# Crear un frame para los gráficos del sistema
|
||||
system_frame = ctk.CTkFrame(tab)
|
||||
|
|
Loading…
Reference in New Issue