Fixed Scrapper not finishing and when window closes it closes scrapper and insert to database correctly now
This commit is contained in:
parent
f39935ba98
commit
cb64a2b180
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -4,6 +4,7 @@ from bs4 import BeautifulSoup
|
||||||
from urllib.parse import urljoin
|
from urllib.parse import urljoin
|
||||||
import mysql.connector
|
import mysql.connector
|
||||||
from queue import Queue
|
from queue import Queue
|
||||||
|
from services.threaden_task import ThreadenTask
|
||||||
|
|
||||||
#http://books.toscrape.com/ test scrap web
|
#http://books.toscrape.com/ test scrap web
|
||||||
|
|
||||||
|
@ -11,7 +12,7 @@ class Scrapper:
|
||||||
def __init__(self, ui_instance):
|
def __init__(self, ui_instance):
|
||||||
self.ui_instance = ui_instance
|
self.ui_instance = ui_instance
|
||||||
self.visited_links = set()
|
self.visited_links = set()
|
||||||
self.running=False
|
self.running = False
|
||||||
self.lock = threading.Lock()
|
self.lock = threading.Lock()
|
||||||
self.link_queue = Queue()
|
self.link_queue = Queue()
|
||||||
|
|
||||||
|
@ -31,6 +32,10 @@ class Scrapper:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error al conectar a la base de datos: {e}")
|
print(f"Error al conectar a la base de datos: {e}")
|
||||||
|
|
||||||
|
#Tareas para el scrapping y la base de datos
|
||||||
|
self.scraping_task = ThreadenTask()
|
||||||
|
self.db_task = ThreadenTask()
|
||||||
|
|
||||||
def start_scraping(self):
|
def start_scraping(self):
|
||||||
"""Inicia el proceso de scraping"""
|
"""Inicia el proceso de scraping"""
|
||||||
if self.running:
|
if self.running:
|
||||||
|
@ -38,20 +43,24 @@ class Scrapper:
|
||||||
return
|
return
|
||||||
|
|
||||||
self.running = True
|
self.running = True
|
||||||
|
self.visited_links.clear()
|
||||||
|
self.link_queue.queue.clear()
|
||||||
|
|
||||||
url = self.get_url_from_ui()
|
url = self.get_url_from_ui()
|
||||||
if url:
|
if url:
|
||||||
print(f"Iniciando scraping en: {url}")
|
print(f"Iniciando scraping en: {url}")
|
||||||
threading.Thread(target=self.scrape_page, args=(url,), daemon=True).start()
|
self.scraping_task.start(self.scrape_page, url)
|
||||||
threading.Thread(target=self.insert_links_to_db, daemon=True).start()
|
self.db_task.start(self.insert_links_to_db)
|
||||||
else:
|
else:
|
||||||
print("No se proporcionó una URL válida.")
|
print("No se proporcionó una URL válida.")
|
||||||
|
|
||||||
def stop_scraping(self):
|
def stop_scraping(self):
|
||||||
"""Detiene el proceso de scraping"""
|
"""Detiene el proceso de scraping"""
|
||||||
print("Deteniendo el proceso de scraping...")
|
print("Deteniendo el proceso de scraping...")
|
||||||
|
self.running = False
|
||||||
# Detener las tareas
|
# Detener las tareas
|
||||||
self.scraping_task.stop_thread()
|
self.scraping_task.stop()
|
||||||
self.db_task.stop()
|
self.db_task.stop()
|
||||||
|
|
||||||
# Inserta un sentinel (None) en la cola para detener el hilo de inserción
|
# Inserta un sentinel (None) en la cola para detener el hilo de inserción
|
||||||
self.link_queue.put(None)
|
self.link_queue.put(None)
|
||||||
|
@ -79,7 +88,8 @@ class Scrapper:
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
soup = BeautifulSoup(response.text, "html.parser")
|
soup = BeautifulSoup(response.text, "html.parser")
|
||||||
links = [urljoin(url, a.get("href")) for a in soup.find_all("a", href=True)]
|
links = [urljoin(url, a.get("href")) for a in soup.find_all("a", href=True)]
|
||||||
self.update_ui(url, links)
|
if self.running:
|
||||||
|
self.update_ui(url, links)
|
||||||
|
|
||||||
for link in links:
|
for link in links:
|
||||||
if not self.running:
|
if not self.running:
|
||||||
|
@ -112,7 +122,7 @@ class Scrapper:
|
||||||
|
|
||||||
def insert_links_to_db(self):
|
def insert_links_to_db(self):
|
||||||
"""Inserta los enlaces en la base de datos desde la cola"""
|
"""Inserta los enlaces en la base de datos desde la cola"""
|
||||||
while True:
|
while self.db_task.running:
|
||||||
try:
|
try:
|
||||||
# Obtener un enlace de la cola
|
# Obtener un enlace de la cola
|
||||||
item = self.link_queue.get(timeout=1)
|
item = self.link_queue.get(timeout=1)
|
||||||
|
|
Binary file not shown.
|
@ -64,6 +64,9 @@ class CenteredWindow(ctk.CTk):
|
||||||
if "tetris_game" in self.thread_manager.tasks:
|
if "tetris_game" in self.thread_manager.tasks:
|
||||||
self.thread_manager.tasks["tetris_game"].stop()
|
self.thread_manager.tasks["tetris_game"].stop()
|
||||||
|
|
||||||
|
if hasattr(self.thread_manager, "scrapper"):
|
||||||
|
self.thread_manager.scrapper.stop_scraping()
|
||||||
|
|
||||||
self.destroy()
|
self.destroy()
|
||||||
|
|
||||||
|
|
||||||
|
@ -112,11 +115,10 @@ class CenteredWindow(ctk.CTk):
|
||||||
self.left_panel = left_panel
|
self.left_panel = left_panel
|
||||||
self.left_panel.url_entry = url_entry
|
self.left_panel.url_entry = url_entry
|
||||||
self.left_panel.url_entry_chrome = url_entry_chrome
|
self.left_panel.url_entry_chrome = url_entry_chrome
|
||||||
start_button = ctk.CTkButton(left_panel, text="Iniciar Scrapping", command=lambda:
|
start_button = ctk.CTkButton(left_panel, text="Iniciar Scrapping", command=self.thread_manager.scrapper.start_scraping)
|
||||||
self.thread_manager.tasks["scrapper"].start(self.thread_manager.scrapper.start_scraping))
|
|
||||||
start_button.pack(pady=5, padx=10)
|
start_button.pack(pady=5, padx=10)
|
||||||
|
|
||||||
stop_button = ctk.CTkButton(left_panel, text="Detener Scrapping", command=self.thread_manager.tasks["scrapper"].stop)
|
stop_button = ctk.CTkButton(left_panel, text="Detener Scrapping", command=self.thread_manager.scrapper.stop_scraping)
|
||||||
stop_button.pack(pady=5, padx=10)
|
stop_button.pack(pady=5, padx=10)
|
||||||
|
|
||||||
|
|
||||||
|
@ -177,10 +179,10 @@ class CenteredWindow(ctk.CTk):
|
||||||
self.tetris_game = TetrisGame(game_frame)
|
self.tetris_game = TetrisGame(game_frame)
|
||||||
self.tetris_game.pack()
|
self.tetris_game.pack()
|
||||||
|
|
||||||
else:
|
# else:
|
||||||
# Agregar contenido genérico a otras pestañas
|
# Agregar contenido genérico a otras pestañas
|
||||||
label = ctk.CTkLabel(tab, text=f"Contenido de {tab_name}", font=("Arial", 12))
|
#label = ctk.CTkLabel(tab, text=f"Contenido de {tab_name}", font=("Arial", 12))
|
||||||
label.pack(pady=10)
|
#label.pack(pady=10)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue