Compare commits
2 Commits
5c052c8d92
...
647cef869a
Author | SHA1 | Date |
---|---|---|
|
647cef869a | |
|
bc1badbb95 |
|
@ -8,4 +8,6 @@ def main():
|
|||
print(f"Error al iniciar la aplicación: {e}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
||||
|
||||
#self.tasks["scrapper"].start(self.scrapper.start_scraping)
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -33,6 +33,10 @@ class Scrapper:
|
|||
|
||||
def start_scraping(self):
|
||||
"""Inicia el proceso de scraping"""
|
||||
if self.running:
|
||||
print("El scrapping ya está en ejecución.")
|
||||
return
|
||||
|
||||
self.running = True
|
||||
url = self.get_url_from_ui()
|
||||
if url:
|
||||
|
@ -42,14 +46,15 @@ class Scrapper:
|
|||
else:
|
||||
print("No se proporcionó una URL válida.")
|
||||
|
||||
def stop_scraping(self):
|
||||
"""Detiene el proceso de scraping"""
|
||||
self.running = False
|
||||
print("Scrapping detenido. Proceso finalizado.")
|
||||
def stop_scraping(self):
|
||||
"""Detiene el proceso de scraping"""
|
||||
print("Deteniendo el proceso de scraping...")
|
||||
# Detener las tareas
|
||||
self.scraping_task.stop_thread()
|
||||
self.db_task.stop()
|
||||
|
||||
#Vaciar la cola para detener el hilo de inserción
|
||||
while not self.link_queue.empty():
|
||||
self.link_queue.get()
|
||||
# Inserta un sentinel (None) en la cola para detener el hilo de inserción
|
||||
self.link_queue.put(None)
|
||||
|
||||
# Actualiza la pestaña "Scrapping" con un mensaje
|
||||
tab = self.ui_instance.tabs["Scrapping"]
|
||||
|
@ -58,36 +63,39 @@ class Scrapper:
|
|||
text_widget.configure(state="normal")
|
||||
text_widget.insert("end", "Scrapping finalizado.\n")
|
||||
text_widget.see("end")
|
||||
text_widget.configure(state="disabled")
|
||||
text_widget.configure(state="disabled")
|
||||
print("Scrapping detenido. Proceso finalizado.")
|
||||
|
||||
def scrape_page(self, url):
|
||||
"""Scrapea una web y busca los enlaces"""
|
||||
if not self.running or url in self.visited_links:
|
||||
return
|
||||
|
||||
with self.lock:
|
||||
self.visited_links.add(url)
|
||||
def scrape_page(self, url):
|
||||
"""Scrapea una web y busca los enlaces"""
|
||||
if not self.running or url in self.visited_links:
|
||||
return
|
||||
|
||||
try:
|
||||
response = requests.get(url, timeout=10)
|
||||
if response.status_code == 200:
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
links = [urljoin(url, a.get("href")) for a in soup.find_all("a", href=True)]
|
||||
self.update_ui(url, links)
|
||||
with self.lock:
|
||||
self.visited_links.add(url)
|
||||
|
||||
for link in links:
|
||||
if not self.running:
|
||||
break
|
||||
self.link_queue.put((url, link))
|
||||
try:
|
||||
response = requests.get(url, timeout=10)
|
||||
if response.status_code == 200:
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
links = [urljoin(url, a.get("href")) for a in soup.find_all("a", href=True)]
|
||||
self.update_ui(url, links)
|
||||
|
||||
for link in links:
|
||||
if not self.running:
|
||||
break
|
||||
self.link_queue.put((url, link))
|
||||
|
||||
# Procesar los enlaces de forma secuencial en lugar de crear nuevos hilos
|
||||
for link in links:
|
||||
if not self.running:
|
||||
break
|
||||
self.scrape_page(link)
|
||||
else:
|
||||
print(f"Error al acceder a {url}: {response.status_code}")
|
||||
except Exception as e:
|
||||
print(f"Error al scrapear {url}: {e}")
|
||||
|
||||
for link in links:
|
||||
if not self.running:
|
||||
break
|
||||
threading.Thread(target=self.scrape_page, args=(link,), daemon=True).start()
|
||||
else:
|
||||
print(f"Error al acceder a {url}: {response.status_code}")
|
||||
except Exception as e:
|
||||
print(f"Error al scrapear {url}: {e}")
|
||||
|
||||
def update_ui(self, url, links):
|
||||
"""Actualiza la pestaña 'Scrapping' con los enlaces encontrados"""
|
||||
|
@ -102,15 +110,16 @@ class Scrapper:
|
|||
text_widget.configure(state="disabled")
|
||||
|
||||
|
||||
|
||||
def insert_links_to_db(self):
|
||||
"""Inserta los enlaces en la base de datos desde la cola"""
|
||||
while self.running or not self.link_queue.empty():
|
||||
while True:
|
||||
try:
|
||||
# Obtener un enlace de la cola
|
||||
if not self.running and self.link_queue.empty():
|
||||
break
|
||||
parent_url, link = self.link_queue.get(timeout=1) # Espera 1 segundo si la cola está vacía
|
||||
# Obtener un enlace de la cola
|
||||
item = self.link_queue.get(timeout=1)
|
||||
if item is None: # Si encuentra el sentinel, detiene el hilo
|
||||
break
|
||||
|
||||
parent_url, link = item
|
||||
connection = mysql.connector.connect(**self.db_config)
|
||||
cursor = connection.cursor()
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS links (id INT AUTO_INCREMENT PRIMARY KEY, url TEXT, parent_url TEXT)")
|
||||
|
@ -120,7 +129,7 @@ class Scrapper:
|
|||
connection.close()
|
||||
print(f"Enlace guardado: {link} (parent: {parent_url})")
|
||||
except Exception as e:
|
||||
print(f"Error al guardar en la base de datos: {e}")
|
||||
print(f"Error al guardar en la base de datos: {e}")
|
||||
|
||||
|
||||
def get_url_from_ui(self):
|
||||
|
@ -131,23 +140,6 @@ class Scrapper:
|
|||
except AttributeError:
|
||||
print("No se pudo obtener la URL desde la interfaz")
|
||||
return None
|
||||
"""
|
||||
def save_links_to_db(self, url, links):
|
||||
Guarda los enlaces en la base de datos
|
||||
try:
|
||||
connection = mysql.connector.connect(**self.db_config)
|
||||
cursor = connection.cursor()
|
||||
cursor.execute("CREATE TABLE IF NOT EXISTS links (id INT AUTO_INCREMENT PRIMARY KEY, url TEXT, parent_url TEXT)")
|
||||
|
||||
for link in links:
|
||||
print(f"Guardando enlace: {link} (parent: {url})") # Verifica los datos
|
||||
cursor.execute("INSERT INTO links (url, parent_url) VALUES (%s, %s)", (link, url))
|
||||
|
||||
connection.commit()
|
||||
cursor.close()
|
||||
connection.close()
|
||||
except Exception as e:
|
||||
print(f"Error al gaurdar en la base de datos: {e}")
|
||||
"""
|
||||
|
||||
|
|
@ -8,7 +8,8 @@ class SystemMonitor:
|
|||
def __init__(self, parent_frame):
|
||||
self.parent_frame = parent_frame
|
||||
self.max_data_points = 60
|
||||
self.running = False
|
||||
self.running = False
|
||||
self.previous_net_io = psutil.net_io_counters()
|
||||
self.metrics = {
|
||||
"CPU Usage": {
|
||||
"data": [],
|
||||
|
@ -20,9 +21,9 @@ class SystemMonitor:
|
|||
"fetch_func": lambda: psutil.virtual_memory().percent,
|
||||
"interval": 1
|
||||
},
|
||||
"Processes": {
|
||||
"Network Usage (KB/s)": {
|
||||
"data": [],
|
||||
"fetch_func": self.get_process_count,
|
||||
"fetch_func": self.get_network_usage,
|
||||
"interval": 1
|
||||
}
|
||||
}
|
||||
|
@ -76,11 +77,20 @@ class SystemMonitor:
|
|||
|
||||
def redraw():
|
||||
graph["line"].set_data(x, data)
|
||||
graph["axis"].set_xlim(0, len(data))
|
||||
graph["axis"].set_xlim(0, len(data))
|
||||
graph["axis"].set_ylim(0, max(data) * 1.2 if data else 100)
|
||||
graph["figure"].canvas.draw()
|
||||
|
||||
self.parent_frame.after(0, redraw)
|
||||
|
||||
def get_process_count(self):
|
||||
"""Obtiene el número de procesos actuales."""
|
||||
return len(psutil.pids())
|
||||
def get_network_usage(self):
|
||||
"""Calcula la velocidad de transferencia de red en KB/s."""
|
||||
current_net_io = psutil.net_io_counters()
|
||||
sent_bytes = current_net_io.bytes_sent - self.previous_net_io.bytes_sent
|
||||
recv_bytes = current_net_io.bytes_recv - self.previous_net_io.bytes_recv
|
||||
self.previous_net_io = current_net_io # Actualiza los datos previos
|
||||
|
||||
# Convierte a KB/s
|
||||
total_kb = (sent_bytes + recv_bytes) / 1024
|
||||
#print(f"Network Usage: {total_kb} KB/s")
|
||||
return total_kb
|
|
@ -38,7 +38,7 @@ class ThreadsManager:
|
|||
self.tasks["time"].start(self.update_time)
|
||||
self.tasks["temperature"].start(self.update_temperature)
|
||||
self.tasks["emails"].start(self.update_emails)
|
||||
self.tasks["scrapper"].start(self.scrapper.start_scraping)
|
||||
|
||||
|
||||
if self.system_monitor:
|
||||
for metric in self.system_monitor.metrics.keys():
|
||||
|
|
Loading…
Reference in New Issue