diff --git a/src/main.py b/src/main.py index 6ccb86b..68495be 100644 --- a/src/main.py +++ b/src/main.py @@ -8,4 +8,6 @@ def main(): print(f"Error al iniciar la aplicación: {e}") if __name__ == "__main__": - main() \ No newline at end of file + main() + + #self.tasks["scrapper"].start(self.scrapper.start_scraping) \ No newline at end of file diff --git a/src/services/__pycache__/scrapper.cpython-312.pyc b/src/services/__pycache__/scrapper.cpython-312.pyc index 314d4fe..38c4ff1 100644 Binary files a/src/services/__pycache__/scrapper.cpython-312.pyc and b/src/services/__pycache__/scrapper.cpython-312.pyc differ diff --git a/src/services/__pycache__/system_monitor.cpython-312.pyc b/src/services/__pycache__/system_monitor.cpython-312.pyc index b9251dc..fd19bbf 100644 Binary files a/src/services/__pycache__/system_monitor.cpython-312.pyc and b/src/services/__pycache__/system_monitor.cpython-312.pyc differ diff --git a/src/services/__pycache__/threads_manager.cpython-312.pyc b/src/services/__pycache__/threads_manager.cpython-312.pyc index dd282d2..c7e6f2f 100644 Binary files a/src/services/__pycache__/threads_manager.cpython-312.pyc and b/src/services/__pycache__/threads_manager.cpython-312.pyc differ diff --git a/src/services/scrapper.py b/src/services/scrapper.py index 3c477de..22a1486 100644 --- a/src/services/scrapper.py +++ b/src/services/scrapper.py @@ -33,6 +33,10 @@ class Scrapper: def start_scraping(self): """Inicia el proceso de scraping""" + if self.running: + print("El scrapping ya está en ejecución.") + return + self.running = True url = self.get_url_from_ui() if url: @@ -42,14 +46,15 @@ class Scrapper: else: print("No se proporcionó una URL válida.") - def stop_scraping(self): - """Detiene el proceso de scraping""" - self.running = False - print("Scrapping detenido. Proceso finalizado.") + def stop_scraping(self): + """Detiene el proceso de scraping""" + print("Deteniendo el proceso de scraping...") + # Detener las tareas + self.scraping_task.stop_thread() + self.db_task.stop() - #Vaciar la cola para detener el hilo de inserción - while not self.link_queue.empty(): - self.link_queue.get() + # Inserta un sentinel (None) en la cola para detener el hilo de inserción + self.link_queue.put(None) # Actualiza la pestaña "Scrapping" con un mensaje tab = self.ui_instance.tabs["Scrapping"] @@ -58,36 +63,39 @@ class Scrapper: text_widget.configure(state="normal") text_widget.insert("end", "Scrapping finalizado.\n") text_widget.see("end") - text_widget.configure(state="disabled") + text_widget.configure(state="disabled") + print("Scrapping detenido. Proceso finalizado.") - def scrape_page(self, url): - """Scrapea una web y busca los enlaces""" - if not self.running or url in self.visited_links: - return - - with self.lock: - self.visited_links.add(url) + def scrape_page(self, url): + """Scrapea una web y busca los enlaces""" + if not self.running or url in self.visited_links: + return - try: - response = requests.get(url, timeout=10) - if response.status_code == 200: - soup = BeautifulSoup(response.text, "html.parser") - links = [urljoin(url, a.get("href")) for a in soup.find_all("a", href=True)] - self.update_ui(url, links) + with self.lock: + self.visited_links.add(url) - for link in links: - if not self.running: - break - self.link_queue.put((url, link)) + try: + response = requests.get(url, timeout=10) + if response.status_code == 200: + soup = BeautifulSoup(response.text, "html.parser") + links = [urljoin(url, a.get("href")) for a in soup.find_all("a", href=True)] + self.update_ui(url, links) + + for link in links: + if not self.running: + break + self.link_queue.put((url, link)) + + # Procesar los enlaces de forma secuencial en lugar de crear nuevos hilos + for link in links: + if not self.running: + break + self.scrape_page(link) + else: + print(f"Error al acceder a {url}: {response.status_code}") + except Exception as e: + print(f"Error al scrapear {url}: {e}") - for link in links: - if not self.running: - break - threading.Thread(target=self.scrape_page, args=(link,), daemon=True).start() - else: - print(f"Error al acceder a {url}: {response.status_code}") - except Exception as e: - print(f"Error al scrapear {url}: {e}") def update_ui(self, url, links): """Actualiza la pestaña 'Scrapping' con los enlaces encontrados""" @@ -102,15 +110,16 @@ class Scrapper: text_widget.configure(state="disabled") - def insert_links_to_db(self): """Inserta los enlaces en la base de datos desde la cola""" - while self.running or not self.link_queue.empty(): + while True: try: - # Obtener un enlace de la cola - if not self.running and self.link_queue.empty(): - break - parent_url, link = self.link_queue.get(timeout=1) # Espera 1 segundo si la cola está vacía + # Obtener un enlace de la cola + item = self.link_queue.get(timeout=1) + if item is None: # Si encuentra el sentinel, detiene el hilo + break + + parent_url, link = item connection = mysql.connector.connect(**self.db_config) cursor = connection.cursor() cursor.execute("CREATE TABLE IF NOT EXISTS links (id INT AUTO_INCREMENT PRIMARY KEY, url TEXT, parent_url TEXT)") @@ -120,7 +129,7 @@ class Scrapper: connection.close() print(f"Enlace guardado: {link} (parent: {parent_url})") except Exception as e: - print(f"Error al guardar en la base de datos: {e}") + print(f"Error al guardar en la base de datos: {e}") def get_url_from_ui(self): @@ -131,23 +140,6 @@ class Scrapper: except AttributeError: print("No se pudo obtener la URL desde la interfaz") return None -""" - def save_links_to_db(self, url, links): - Guarda los enlaces en la base de datos - try: - connection = mysql.connector.connect(**self.db_config) - cursor = connection.cursor() - cursor.execute("CREATE TABLE IF NOT EXISTS links (id INT AUTO_INCREMENT PRIMARY KEY, url TEXT, parent_url TEXT)") - - for link in links: - print(f"Guardando enlace: {link} (parent: {url})") # Verifica los datos - cursor.execute("INSERT INTO links (url, parent_url) VALUES (%s, %s)", (link, url)) - connection.commit() - cursor.close() - connection.close() - except Exception as e: - print(f"Error al gaurdar en la base de datos: {e}") -""" \ No newline at end of file diff --git a/src/services/system_monitor.py b/src/services/system_monitor.py index e0f86ae..0e64cc8 100644 --- a/src/services/system_monitor.py +++ b/src/services/system_monitor.py @@ -92,5 +92,5 @@ class SystemMonitor: # Convierte a KB/s total_kb = (sent_bytes + recv_bytes) / 1024 - print(f"Network Usage: {total_kb} KB/s") + #print(f"Network Usage: {total_kb} KB/s") return total_kb \ No newline at end of file diff --git a/src/services/threads_manager.py b/src/services/threads_manager.py index 1e0fed9..3daed0c 100644 --- a/src/services/threads_manager.py +++ b/src/services/threads_manager.py @@ -38,7 +38,7 @@ class ThreadsManager: self.tasks["time"].start(self.update_time) self.tasks["temperature"].start(self.update_temperature) self.tasks["emails"].start(self.update_emails) - self.tasks["scrapper"].start(self.scrapper.start_scraping) + if self.system_monitor: for metric in self.system_monitor.metrics.keys():