Tried fixing the sopt issue with thread, but it just doesnt want to, start scrap works fine now
This commit is contained in:
parent
bc1badbb95
commit
647cef869a
|
@ -9,3 +9,5 @@ def main():
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
#self.tasks["scrapper"].start(self.scrapper.start_scraping)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -33,6 +33,10 @@ class Scrapper:
|
||||||
|
|
||||||
def start_scraping(self):
|
def start_scraping(self):
|
||||||
"""Inicia el proceso de scraping"""
|
"""Inicia el proceso de scraping"""
|
||||||
|
if self.running:
|
||||||
|
print("El scrapping ya está en ejecución.")
|
||||||
|
return
|
||||||
|
|
||||||
self.running = True
|
self.running = True
|
||||||
url = self.get_url_from_ui()
|
url = self.get_url_from_ui()
|
||||||
if url:
|
if url:
|
||||||
|
@ -44,12 +48,13 @@ class Scrapper:
|
||||||
|
|
||||||
def stop_scraping(self):
|
def stop_scraping(self):
|
||||||
"""Detiene el proceso de scraping"""
|
"""Detiene el proceso de scraping"""
|
||||||
self.running = False
|
print("Deteniendo el proceso de scraping...")
|
||||||
print("Scrapping detenido. Proceso finalizado.")
|
# Detener las tareas
|
||||||
|
self.scraping_task.stop_thread()
|
||||||
|
self.db_task.stop()
|
||||||
|
|
||||||
#Vaciar la cola para detener el hilo de inserción
|
# Inserta un sentinel (None) en la cola para detener el hilo de inserción
|
||||||
while not self.link_queue.empty():
|
self.link_queue.put(None)
|
||||||
self.link_queue.get()
|
|
||||||
|
|
||||||
# Actualiza la pestaña "Scrapping" con un mensaje
|
# Actualiza la pestaña "Scrapping" con un mensaje
|
||||||
tab = self.ui_instance.tabs["Scrapping"]
|
tab = self.ui_instance.tabs["Scrapping"]
|
||||||
|
@ -59,6 +64,7 @@ class Scrapper:
|
||||||
text_widget.insert("end", "Scrapping finalizado.\n")
|
text_widget.insert("end", "Scrapping finalizado.\n")
|
||||||
text_widget.see("end")
|
text_widget.see("end")
|
||||||
text_widget.configure(state="disabled")
|
text_widget.configure(state="disabled")
|
||||||
|
print("Scrapping detenido. Proceso finalizado.")
|
||||||
|
|
||||||
def scrape_page(self, url):
|
def scrape_page(self, url):
|
||||||
"""Scrapea una web y busca los enlaces"""
|
"""Scrapea una web y busca los enlaces"""
|
||||||
|
@ -80,15 +86,17 @@ class Scrapper:
|
||||||
break
|
break
|
||||||
self.link_queue.put((url, link))
|
self.link_queue.put((url, link))
|
||||||
|
|
||||||
|
# Procesar los enlaces de forma secuencial en lugar de crear nuevos hilos
|
||||||
for link in links:
|
for link in links:
|
||||||
if not self.running:
|
if not self.running:
|
||||||
break
|
break
|
||||||
threading.Thread(target=self.scrape_page, args=(link,), daemon=True).start()
|
self.scrape_page(link)
|
||||||
else:
|
else:
|
||||||
print(f"Error al acceder a {url}: {response.status_code}")
|
print(f"Error al acceder a {url}: {response.status_code}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error al scrapear {url}: {e}")
|
print(f"Error al scrapear {url}: {e}")
|
||||||
|
|
||||||
|
|
||||||
def update_ui(self, url, links):
|
def update_ui(self, url, links):
|
||||||
"""Actualiza la pestaña 'Scrapping' con los enlaces encontrados"""
|
"""Actualiza la pestaña 'Scrapping' con los enlaces encontrados"""
|
||||||
tab = self.ui_instance.tabs["Scrapping"]
|
tab = self.ui_instance.tabs["Scrapping"]
|
||||||
|
@ -102,15 +110,16 @@ class Scrapper:
|
||||||
text_widget.configure(state="disabled")
|
text_widget.configure(state="disabled")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def insert_links_to_db(self):
|
def insert_links_to_db(self):
|
||||||
"""Inserta los enlaces en la base de datos desde la cola"""
|
"""Inserta los enlaces en la base de datos desde la cola"""
|
||||||
while self.running or not self.link_queue.empty():
|
while True:
|
||||||
try:
|
try:
|
||||||
# Obtener un enlace de la cola
|
# Obtener un enlace de la cola
|
||||||
if not self.running and self.link_queue.empty():
|
item = self.link_queue.get(timeout=1)
|
||||||
|
if item is None: # Si encuentra el sentinel, detiene el hilo
|
||||||
break
|
break
|
||||||
parent_url, link = self.link_queue.get(timeout=1) # Espera 1 segundo si la cola está vacía
|
|
||||||
|
parent_url, link = item
|
||||||
connection = mysql.connector.connect(**self.db_config)
|
connection = mysql.connector.connect(**self.db_config)
|
||||||
cursor = connection.cursor()
|
cursor = connection.cursor()
|
||||||
cursor.execute("CREATE TABLE IF NOT EXISTS links (id INT AUTO_INCREMENT PRIMARY KEY, url TEXT, parent_url TEXT)")
|
cursor.execute("CREATE TABLE IF NOT EXISTS links (id INT AUTO_INCREMENT PRIMARY KEY, url TEXT, parent_url TEXT)")
|
||||||
|
@ -131,23 +140,6 @@ class Scrapper:
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
print("No se pudo obtener la URL desde la interfaz")
|
print("No se pudo obtener la URL desde la interfaz")
|
||||||
return None
|
return None
|
||||||
"""
|
|
||||||
def save_links_to_db(self, url, links):
|
|
||||||
Guarda los enlaces en la base de datos
|
|
||||||
try:
|
|
||||||
connection = mysql.connector.connect(**self.db_config)
|
|
||||||
cursor = connection.cursor()
|
|
||||||
cursor.execute("CREATE TABLE IF NOT EXISTS links (id INT AUTO_INCREMENT PRIMARY KEY, url TEXT, parent_url TEXT)")
|
|
||||||
|
|
||||||
for link in links:
|
|
||||||
print(f"Guardando enlace: {link} (parent: {url})") # Verifica los datos
|
|
||||||
cursor.execute("INSERT INTO links (url, parent_url) VALUES (%s, %s)", (link, url))
|
|
||||||
|
|
||||||
connection.commit()
|
|
||||||
cursor.close()
|
|
||||||
connection.close()
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Error al gaurdar en la base de datos: {e}")
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
|
@ -92,5 +92,5 @@ class SystemMonitor:
|
||||||
|
|
||||||
# Convierte a KB/s
|
# Convierte a KB/s
|
||||||
total_kb = (sent_bytes + recv_bytes) / 1024
|
total_kb = (sent_bytes + recv_bytes) / 1024
|
||||||
print(f"Network Usage: {total_kb} KB/s")
|
#print(f"Network Usage: {total_kb} KB/s")
|
||||||
return total_kb
|
return total_kb
|
|
@ -38,7 +38,7 @@ class ThreadsManager:
|
||||||
self.tasks["time"].start(self.update_time)
|
self.tasks["time"].start(self.update_time)
|
||||||
self.tasks["temperature"].start(self.update_temperature)
|
self.tasks["temperature"].start(self.update_temperature)
|
||||||
self.tasks["emails"].start(self.update_emails)
|
self.tasks["emails"].start(self.update_emails)
|
||||||
self.tasks["scrapper"].start(self.scrapper.start_scraping)
|
|
||||||
|
|
||||||
if self.system_monitor:
|
if self.system_monitor:
|
||||||
for metric in self.system_monitor.metrics.keys():
|
for metric in self.system_monitor.metrics.keys():
|
||||||
|
|
Loading…
Reference in New Issue